diff --git a/CHANGELOG.md b/CHANGELOG.md
index e1a70f961..94db8bcdc 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,21 @@
 # Release History
 
+# 3.7.2 (2025-01-31)
+
+- Updated the retry_dela_max and retry_timeout (databricks/databricks-sql-python#497 by @jprakash-db)
+
+# 3.7.1 (2025-01-07)
+
+- Relaxed the number of Http retry attempts (databricks/databricks-sql-python#486 by @jprakash-db)
+
+# 3.7.0 (2024-12-23)
+
+- Fix: Incorrect number of rows fetched in inline results when fetching results with FETCH_NEXT orientation (databricks/databricks-sql-python#479 by @jprakash-db)
+- Updated the doc to specify native parameters are not supported in PUT operation (databricks/databricks-sql-python#477 by @jprakash-db)
+- Relax `pyarrow` and `numpy` pin (databricks/databricks-sql-python#452 by @arredond)
+- Feature: Support for async execute has been added (databricks/databricks-sql-python#463 by @jprakash-db)
+- Updated the HTTP retry logic to be similar to the other Databricks drivers (databricks/databricks-sql-python#467 by @jprakash-db)
+
 # 3.6.0 (2024-10-25)
 
 - Support encryption headers in the cloud fetch request (https://github.com/databricks/databricks-sql-python/pull/460 by @jackyhu-db)
diff --git a/docs/parameters.md b/docs/parameters.md
index a538af1a6..f9f4c5ff9 100644
--- a/docs/parameters.md
+++ b/docs/parameters.md
@@ -17,6 +17,7 @@ See `examples/parameters.py` in this repository for a runnable demo.
 
 - A query executed with native parameters can contain at most 255 parameter markers
 - The maximum size of all parameterized values cannot exceed 1MB
+- For volume operations such as PUT, native parameters are not supported
 
 ## SQL Syntax
 
diff --git a/poetry.lock b/poetry.lock
index 9fe49690f..576adbd35 100755
--- a/poetry.lock
+++ b/poetry.lock
@@ -1202,4 +1202,4 @@ sqlalchemy = ["sqlalchemy"]
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.8.0"
-content-hash = "31066a85f646d0009d6fe9ffc833a64fcb4b6923c2e7f2652e7aa8540acba298"
+content-hash = "9d8a91369fc79f9ca9f7502e2ed284b66531c954ae59a723e465a76073966998"
diff --git a/pyproject.toml b/pyproject.toml
index 1747d21b1..6a38e33b1 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "databricks-sql-connector"
-version = "3.6.0"
+version = "3.7.2"
 description = "Databricks SQL Connector for Python"
 authors = ["Databricks <databricks-sql-connector-maintainers@databricks.com>"]
 license = "Apache-2.0"
@@ -14,14 +14,14 @@ thrift = ">=0.16.0,<0.21.0"
 pandas = [
     { version = ">=1.2.5,<2.3.0", python = ">=3.8" }
 ]
-pyarrow = ">=14.0.1,<17"
+pyarrow = ">=14.0.1"
 
 lz4 = "^4.0.2"
 requests = "^2.18.1"
 oauthlib = "^3.1.0"
 numpy = [
-    { version = "^1.16.6", python = ">=3.8,<3.11" },
-    { version = "^1.23.4", python = ">=3.11" },
+    { version = ">=1.16.6", python = ">=3.8,<3.11" },
+    { version = ">=1.23.4", python = ">=3.11" },
 ]
 sqlalchemy = { version = ">=2.0.21", optional = true }
 openpyxl = "^3.0.10"
diff --git a/src/databricks/sql/__init__.py b/src/databricks/sql/__init__.py
index 42167b008..931e5c0af 100644
--- a/src/databricks/sql/__init__.py
+++ b/src/databricks/sql/__init__.py
@@ -68,7 +68,7 @@ def __repr__(self):
 DATE = DBAPITypeObject("date")
 ROWID = DBAPITypeObject()
 
-__version__ = "3.6.0"
+__version__ = "3.7.2"
 USER_AGENT_NAME = "PyDatabricksSqlConnector"
 
 # These two functions are pyhive legacy
diff --git a/src/databricks/sql/auth/retry.py b/src/databricks/sql/auth/retry.py
index 0c6547cb4..eedcc773f 100755
--- a/src/databricks/sql/auth/retry.py
+++ b/src/databricks/sql/auth/retry.py
@@ -1,4 +1,5 @@
 import logging
+import random
 import time
 import typing
 from enum import Enum
@@ -285,25 +286,31 @@ def sleep_for_retry(self, response: BaseHTTPResponse) -> bool:
         """
         retry_after = self.get_retry_after(response)
         if retry_after:
-            backoff = self.get_backoff_time()
-            proposed_wait = max(backoff, retry_after)
-            self.check_proposed_wait(proposed_wait)
-            time.sleep(proposed_wait)
-            return True
+            proposed_wait = retry_after
+        else:
+            proposed_wait = self.get_backoff_time()
 
-        return False
+        proposed_wait = max(proposed_wait, self.delay_max)
+        self.check_proposed_wait(proposed_wait)
+        logger.debug(f"Retrying after {proposed_wait} seconds")
+        time.sleep(proposed_wait)
+        return True
 
     def get_backoff_time(self) -> float:
-        """Calls urllib3's built-in get_backoff_time.
+        """
+        This method implements the exponential backoff algorithm to calculate the delay between retries.
 
         Never returns a value larger than self.delay_max
         A MaxRetryDurationError will be raised if the calculated backoff would exceed self.max_attempts_duration
 
-        Note: within urllib3, a backoff is only calculated in cases where a Retry-After header is not present
-            in the previous unsuccessful request and `self.respect_retry_after_header` is True (which is always true)
+        :return:
         """
 
-        proposed_backoff = super().get_backoff_time()
+        current_attempt = self.stop_after_attempts_count - int(self.total or 0)
+        proposed_backoff = (2**current_attempt) * self.delay_min
+        if self.backoff_jitter != 0.0:
+            proposed_backoff += random.random() * self.backoff_jitter
+
         proposed_backoff = min(proposed_backoff, self.delay_max)
         self.check_proposed_wait(proposed_backoff)
 
diff --git a/src/databricks/sql/client.py b/src/databricks/sql/client.py
index 4e0ab941b..aefed1ef0 100755
--- a/src/databricks/sql/client.py
+++ b/src/databricks/sql/client.py
@@ -1,3 +1,4 @@
+import time
 from typing import Dict, Tuple, List, Optional, Any, Union, Sequence
 
 import pandas
@@ -47,6 +48,7 @@
 
 from databricks.sql.thrift_api.TCLIService.ttypes import (
     TSparkParameter,
+    TOperationState,
 )
 
 
@@ -430,6 +432,8 @@ def __init__(
         self.escaper = ParamEscaper()
         self.lastrowid = None
 
+        self.ASYNC_DEFAULT_POLLING_INTERVAL = 2
+
     # The ideal return type for this method is perhaps Self, but that was not added until 3.11, and we support pre-3.11 pythons, currently.
     def __enter__(self) -> "Cursor":
         return self
@@ -796,6 +800,7 @@ def execute(
             cursor=self,
             use_cloud_fetch=self.connection.use_cloud_fetch,
             parameters=prepared_params,
+            async_op=False,
         )
         self.active_result_set = ResultSet(
             self.connection,
@@ -803,6 +808,7 @@ def execute(
             self.thrift_backend,
             self.buffer_size_bytes,
             self.arraysize,
+            self.connection.use_cloud_fetch,
         )
 
         if execute_response.is_staging_operation:
@@ -812,6 +818,106 @@ def execute(
 
         return self
 
+    def execute_async(
+        self,
+        operation: str,
+        parameters: Optional[TParameterCollection] = None,
+    ) -> "Cursor":
+        """
+
+        Execute a query and do not wait for it to complete and just move ahead
+
+        :param operation:
+        :param parameters:
+        :return:
+        """
+        param_approach = self._determine_parameter_approach(parameters)
+        if param_approach == ParameterApproach.NONE:
+            prepared_params = NO_NATIVE_PARAMS
+            prepared_operation = operation
+
+        elif param_approach == ParameterApproach.INLINE:
+            prepared_operation, prepared_params = self._prepare_inline_parameters(
+                operation, parameters
+            )
+        elif param_approach == ParameterApproach.NATIVE:
+            normalized_parameters = self._normalize_tparametercollection(parameters)
+            param_structure = self._determine_parameter_structure(normalized_parameters)
+            transformed_operation = transform_paramstyle(
+                operation, normalized_parameters, param_structure
+            )
+            prepared_operation, prepared_params = self._prepare_native_parameters(
+                transformed_operation, normalized_parameters, param_structure
+            )
+
+        self._check_not_closed()
+        self._close_and_clear_active_result_set()
+        self.thrift_backend.execute_command(
+            operation=prepared_operation,
+            session_handle=self.connection._session_handle,
+            max_rows=self.arraysize,
+            max_bytes=self.buffer_size_bytes,
+            lz4_compression=self.connection.lz4_compression,
+            cursor=self,
+            use_cloud_fetch=self.connection.use_cloud_fetch,
+            parameters=prepared_params,
+            async_op=True,
+        )
+
+        return self
+
+    def get_query_state(self) -> "TOperationState":
+        """
+        Get the state of the async executing query or basically poll the status of the query
+
+        :return:
+        """
+        self._check_not_closed()
+        return self.thrift_backend.get_query_state(self.active_op_handle)
+
+    def get_async_execution_result(self):
+        """
+
+        Checks for the status of the async executing query and fetches the result if the query is finished
+        Otherwise it will keep polling the status of the query till there is a Not pending state
+        :return:
+        """
+        self._check_not_closed()
+
+        def is_executing(operation_state) -> "bool":
+            return not operation_state or operation_state in [
+                ttypes.TOperationState.RUNNING_STATE,
+                ttypes.TOperationState.PENDING_STATE,
+            ]
+
+        while is_executing(self.get_query_state()):
+            # Poll after some default time
+            time.sleep(self.ASYNC_DEFAULT_POLLING_INTERVAL)
+
+        operation_state = self.get_query_state()
+        if operation_state == ttypes.TOperationState.FINISHED_STATE:
+            execute_response = self.thrift_backend.get_execution_result(
+                self.active_op_handle, self
+            )
+            self.active_result_set = ResultSet(
+                self.connection,
+                execute_response,
+                self.thrift_backend,
+                self.buffer_size_bytes,
+                self.arraysize,
+            )
+
+            if execute_response.is_staging_operation:
+                self._handle_staging_operation(
+                    staging_allowed_local_path=self.thrift_backend.staging_allowed_local_path
+                )
+
+            return self
+        else:
+            raise Error(
+                f"get_execution_result failed with Operation status {operation_state}"
+            )
+
     def executemany(self, operation, seq_of_parameters):
         """
         Execute the operation once for every set of passed in parameters.
@@ -1097,6 +1203,7 @@ def __init__(
         thrift_backend: ThriftBackend,
         result_buffer_size_bytes: int = DEFAULT_RESULT_BUFFER_SIZE_BYTES,
         arraysize: int = 10000,
+        use_cloud_fetch: bool = True,
     ):
         """
         A ResultSet manages the results of a single command.
@@ -1118,6 +1225,7 @@ def __init__(
         self.description = execute_response.description
         self._arrow_schema_bytes = execute_response.arrow_schema_bytes
         self._next_row_index = 0
+        self._use_cloud_fetch = use_cloud_fetch
 
         if execute_response.arrow_queue:
             # In this case the server has taken the fast path and returned an initial batch of
@@ -1145,6 +1253,7 @@ def _fill_results_buffer(self):
             lz4_compressed=self.lz4_compressed,
             arrow_schema_bytes=self._arrow_schema_bytes,
             description=self.description,
+            use_cloud_fetch=self._use_cloud_fetch,
         )
         self.results = results
         self.has_more_rows = has_more_rows
diff --git a/src/databricks/sql/thrift_backend.py b/src/databricks/sql/thrift_backend.py
index cf5cd906b..9beab0371 100644
--- a/src/databricks/sql/thrift_backend.py
+++ b/src/databricks/sql/thrift_backend.py
@@ -7,6 +7,8 @@
 import threading
 from typing import List, Union
 
+from databricks.sql.thrift_api.TCLIService.ttypes import TOperationState
+
 try:
     import pyarrow
 except ImportError:
@@ -319,7 +321,7 @@ def _handle_request_error(self, error_info, attempt, elapsed):
 
     # FUTURE: Consider moving to https://github.com/litl/backoff or
     # https://github.com/jd/tenacity for retry logic.
-    def make_request(self, method, request):
+    def make_request(self, method, request, retryable=True):
         """Execute given request, attempting retries when
             1. Receiving HTTP 429/503 from server
             2. OSError is raised during a GetOperationStatus
@@ -458,7 +460,7 @@ def attempt_request(attempt):
         #       return on success
         #       if available: bounded delay and retry
         #       if not: raise error
-        max_attempts = self._retry_stop_after_attempts_count
+        max_attempts = self._retry_stop_after_attempts_count if retryable else 1
 
         # use index-1 counting for logging/human consistency
         for attempt in range(1, max_attempts + 1):
@@ -769,6 +771,63 @@ def _results_message_to_execute_response(self, resp, operation_state):
             arrow_schema_bytes=schema_bytes,
         )
 
+    def get_execution_result(self, op_handle, cursor):
+
+        assert op_handle is not None
+
+        req = ttypes.TFetchResultsReq(
+            operationHandle=ttypes.TOperationHandle(
+                op_handle.operationId,
+                op_handle.operationType,
+                False,
+                op_handle.modifiedRowCount,
+            ),
+            maxRows=cursor.arraysize,
+            maxBytes=cursor.buffer_size_bytes,
+            orientation=ttypes.TFetchOrientation.FETCH_NEXT,
+            includeResultSetMetadata=True,
+        )
+
+        resp = self.make_request(self._client.FetchResults, req)
+
+        t_result_set_metadata_resp = resp.resultSetMetadata
+
+        lz4_compressed = t_result_set_metadata_resp.lz4Compressed
+        is_staging_operation = t_result_set_metadata_resp.isStagingOperation
+        has_more_rows = resp.hasMoreRows
+        description = self._hive_schema_to_description(
+            t_result_set_metadata_resp.schema
+        )
+
+        schema_bytes = (
+            t_result_set_metadata_resp.arrowSchema
+            or self._hive_schema_to_arrow_schema(t_result_set_metadata_resp.schema)
+            .serialize()
+            .to_pybytes()
+        )
+
+        queue = ResultSetQueueFactory.build_queue(
+            row_set_type=resp.resultSetMetadata.resultFormat,
+            t_row_set=resp.results,
+            arrow_schema_bytes=schema_bytes,
+            max_download_threads=self.max_download_threads,
+            lz4_compressed=lz4_compressed,
+            description=description,
+            ssl_options=self._ssl_options,
+        )
+
+        return ExecuteResponse(
+            arrow_queue=queue,
+            status=resp.status,
+            has_been_closed_server_side=False,
+            has_more_rows=has_more_rows,
+            lz4_compressed=lz4_compressed,
+            is_staging_operation=is_staging_operation,
+            command_handle=op_handle,
+            description=description,
+            arrow_schema_bytes=schema_bytes,
+        )
+
     def _wait_until_command_done(self, op_handle, initial_operation_status_resp):
         if initial_operation_status_resp:
             self._check_command_not_in_error_or_closed_state(
@@ -787,6 +846,12 @@ def _wait_until_command_done(self, op_handle, initial_operation_status_resp):
             self._check_command_not_in_error_or_closed_state(op_handle, poll_resp)
         return operation_state
 
+    def get_query_state(self, op_handle) -> "TOperationState":
+        poll_resp = self._poll_for_status(op_handle)
+        operation_state = poll_resp.operationState
+        self._check_command_not_in_error_or_closed_state(op_handle, poll_resp)
+        return operation_state
+
     @staticmethod
     def _check_direct_results_for_error(t_spark_direct_results):
         if t_spark_direct_results:
@@ -817,6 +882,7 @@ def execute_command(
         cursor,
         use_cloud_fetch=True,
         parameters=[],
+        async_op=False,
     ):
         assert session_handle is not None
 
@@ -846,7 +912,11 @@ def execute_command(
             parameters=parameters,
         )
         resp = self.make_request(self._client.ExecuteStatement, req)
-        return self._handle_execute_response(resp, cursor)
+
+        if async_op:
+            self._handle_execute_response_async(resp, cursor)
+        else:
+            return self._handle_execute_response(resp, cursor)
 
     def get_catalogs(self, session_handle, max_rows, max_bytes, cursor):
         assert session_handle is not None
@@ -945,6 +1015,10 @@ def _handle_execute_response(self, resp, cursor):
 
         return self._results_message_to_execute_response(resp, final_operation_state)
 
+    def _handle_execute_response_async(self, resp, cursor):
+        cursor.active_op_handle = resp.operationHandle
+        self._check_direct_results_for_error(resp.directResults)
+
     def fetch_results(
         self,
         op_handle,
@@ -954,6 +1028,7 @@ def fetch_results(
         lz4_compressed,
         arrow_schema_bytes,
         description,
+        use_cloud_fetch=True,
     ):
         assert op_handle is not None
 
@@ -970,10 +1045,11 @@ def fetch_results(
             includeResultSetMetadata=True,
         )
 
-        resp = self.make_request(self._client.FetchResults, req)
+        # Fetch results in Inline mode with FETCH_NEXT orientation are not idempotent and hence not retried
+        resp = self.make_request(self._client.FetchResults, req, use_cloud_fetch)
         if resp.results.startRowOffset > expected_row_start_offset:
-            logger.warning(
-                "Expected results to start from {} but they instead start at {}".format(
+            raise DataError(
+                "fetch_results failed due to inconsistency in the state between the client and the server. Expected results to start from {} but they instead start at {}, some result batches must have been skipped".format(
                     expected_row_start_offset, resp.results.startRowOffset
                 )
             )
diff --git a/tests/e2e/common/retry_test_mixins.py b/tests/e2e/common/retry_test_mixins.py
index 7dd5f7450..b5d01a45d 100755
--- a/tests/e2e/common/retry_test_mixins.py
+++ b/tests/e2e/common/retry_test_mixins.py
@@ -121,9 +121,9 @@ class PySQLRetryTestsMixin:
     # For testing purposes
     _retry_policy = {
         "_retry_delay_min": 0.1,
-        "_retry_delay_max": 5,
+        "_retry_delay_max": 3,
         "_retry_stop_after_attempts_count": 5,
-        "_retry_stop_after_attempts_duration": 10,
+        "_retry_stop_after_attempts_duration": 30,
         "_retry_delay_default": 0.5,
     }
 
@@ -135,7 +135,7 @@ def test_retry_urllib3_settings_are_honored(self):
         urllib3_config = {"connect": 10, "read": 11, "redirect": 12}
         rp = DatabricksRetryPolicy(
             delay_min=0.1,
-            delay_max=10.0,
+            delay_max=3,
             stop_after_attempts_count=10,
             stop_after_attempts_duration=10.0,
             delay_default=1.0,
@@ -174,14 +174,14 @@ def test_retry_max_count_not_exceeded(self):
     def test_retry_exponential_backoff(self):
         """GIVEN the retry policy is configured for reasonable exponential backoff
         WHEN the server sends nothing but 429 responses with retry-afters
-        THEN the connector will use those retry-afters as a floor
+        THEN the connector will use those retry-afters values as floor
         """
         retry_policy = self._retry_policy.copy()
         retry_policy["_retry_delay_min"] = 1
 
         time_start = time.time()
         with mocked_server_response(
-            status=429, headers={"Retry-After": "3"}
+            status=429, headers={"Retry-After": "8"}
         ) as mock_obj:
             with pytest.raises(RequestError) as cm:
                 with self.connection(extra_params=retry_policy) as conn:
@@ -191,14 +191,14 @@ def test_retry_exponential_backoff(self):
             assert isinstance(cm.value.args[1], MaxRetryDurationError)
 
             # With setting delay_min to 1, the expected retry delays should be:
-            # 3, 3, 4
-            # The first 2 retries are allowed, the 3rd retry puts the total duration over the limit
-            # of 10 seconds
-            assert mock_obj.return_value.getresponse.call_count == 3
-            assert duration > 6
-
-            # Should be less than 7, but this is a safe margin for CI/CD slowness
-            assert duration < 10
+            # 8, 8, 8, 8
+            # The first 3 retries are allowed, the 4th retry puts the total duration over the limit
+            # of 30 seconds
+            assert mock_obj.return_value.getresponse.call_count == 4
+            assert duration > 24
+
+            # Should be less than 26, but this is a safe margin for CI/CD slowness
+            assert duration < 30
 
     def test_retry_max_duration_not_exceeded(self):
         """GIVEN the max attempt duration of 10 seconds
diff --git a/tests/e2e/test_driver.py b/tests/e2e/test_driver.py
index cfd1e9699..2f0881cda 100644
--- a/tests/e2e/test_driver.py
+++ b/tests/e2e/test_driver.py
@@ -36,6 +36,7 @@
     compare_dbr_versions,
     is_thrift_v5_plus,
 )
+from databricks.sql.thrift_api.TCLIService import ttypes
 from tests.e2e.common.core_tests import CoreTestMixin, SmokeTestMixin
 from tests.e2e.common.large_queries_mixin import LargeQueriesMixin
 from tests.e2e.common.timestamp_tests import TimestampTestsMixin
@@ -78,6 +79,7 @@ class PySQLPytestTestCase:
     }
     arraysize = 1000
     buffer_size_bytes = 104857600
+    POLLING_INTERVAL = 2
 
     @pytest.fixture(autouse=True)
     def get_details(self, connection_details):
@@ -175,6 +177,27 @@ def test_cloud_fetch(self):
                 for i in range(len(cf_result)):
                     assert cf_result[i] == noop_result[i]
 
+    def test_execute_async(self):
+        def isExecuting(operation_state):
+            return not operation_state or operation_state in [
+                ttypes.TOperationState.RUNNING_STATE,
+                ttypes.TOperationState.PENDING_STATE,
+            ]
+
+        long_running_query = "SELECT COUNT(*) FROM RANGE(10000 * 16) x JOIN RANGE(10000) y ON FROM_UNIXTIME(x.id * y.id, 'yyyy-MM-dd') LIKE '%not%a%date%'"
+        with self.cursor() as cursor:
+            cursor.execute_async(long_running_query)
+
+            ## Polling after every POLLING_INTERVAL seconds
+            while isExecuting(cursor.get_query_state()):
+                time.sleep(self.POLLING_INTERVAL)
+                log.info("Polling the status in test_execute_async")
+
+            cursor.get_async_execution_result()
+            result = cursor.fetchall()
+
+            assert result[0].asDict() == {"count(1)": 0}
+
 
 # Exclude Retry tests because they require specific setups, and LargeQueries too slow for core
 # tests
diff --git a/tests/unit/test_fetches.py b/tests/unit/test_fetches.py
index e9a58acdd..89cedcfae 100644
--- a/tests/unit/test_fetches.py
+++ b/tests/unit/test_fetches.py
@@ -66,6 +66,7 @@ def fetch_results(
             lz4_compressed,
             arrow_schema_bytes,
             description,
+            use_cloud_fetch=True,
         ):
             nonlocal batch_index
             results = FetchTests.make_arrow_queue(batch_list[batch_index])
diff --git a/tests/unit/test_retry.py b/tests/unit/test_retry.py
index 2108af4fa..897a1d111 100644
--- a/tests/unit/test_retry.py
+++ b/tests/unit/test_retry.py
@@ -1,10 +1,9 @@
-from os import error
 import time
-from unittest.mock import Mock, patch
+from unittest.mock import patch, call
 import pytest
-from requests import Request
 from urllib3 import HTTPResponse
-from databricks.sql.auth.retry import DatabricksRetryPolicy, RequestHistory
+from databricks.sql.auth.retry import DatabricksRetryPolicy, RequestHistory, CommandType
+from urllib3.exceptions import MaxRetryError
 
 
 class TestRetry:
@@ -25,32 +24,62 @@ def error_history(self) -> RequestHistory:
             method="POST", url=None, error=None, status=503, redirect_location=None
         )
 
+    def calculate_backoff_time(self, attempt, delay_min, delay_max):
+        exponential_backoff_time = (2**attempt) * delay_min
+        return min(exponential_backoff_time, delay_max)
+
     @patch("time.sleep")
     def test_sleep__no_retry_after(self, t_mock, retry_policy, error_history):
         retry_policy._retry_start_time = time.time()
         retry_policy.history = [error_history, error_history]
         retry_policy.sleep(HTTPResponse(status=503))
-        t_mock.assert_called_with(2)
 
-    @patch("time.sleep")
-    def test_sleep__retry_after_is_binding(self, t_mock, retry_policy, error_history):
-        retry_policy._retry_start_time = time.time()
-        retry_policy.history = [error_history, error_history]
-        retry_policy.sleep(HTTPResponse(status=503, headers={"Retry-After": "3"}))
-        t_mock.assert_called_with(3)
+        expected_backoff_time = max(
+            self.calculate_backoff_time(
+                0, retry_policy.delay_min, retry_policy.delay_max
+            ),
+            retry_policy.delay_max,
+        )
+        t_mock.assert_called_with(expected_backoff_time)
 
     @patch("time.sleep")
-    def test_sleep__retry_after_present_but_not_binding(
-        self, t_mock, retry_policy, error_history
-    ):
+    def test_sleep__no_retry_after_header__multiple_retries(self, t_mock, retry_policy):
+        num_attempts = retry_policy.stop_after_attempts_count
+
         retry_policy._retry_start_time = time.time()
-        retry_policy.history = [error_history, error_history]
-        retry_policy.sleep(HTTPResponse(status=503, headers={"Retry-After": "1"}))
-        t_mock.assert_called_with(2)
+        retry_policy.command_type = CommandType.OTHER
+
+        for attempt in range(num_attempts):
+            retry_policy.sleep(HTTPResponse(status=503))
+            # Internally urllib3 calls the increment function generating a new instance for every retry
+            retry_policy = retry_policy.increment()
+
+        expected_backoff_times = []
+        for attempt in range(num_attempts):
+            expected_backoff_times.append(
+                max(
+                    self.calculate_backoff_time(
+                        attempt, retry_policy.delay_min, retry_policy.delay_max
+                    ),
+                    retry_policy.delay_max,
+                )
+            )
+
+        # Asserts if the sleep value was called in the expected order
+        t_mock.assert_has_calls(
+            [call(expected_time) for expected_time in expected_backoff_times]
+        )
 
     @patch("time.sleep")
-    def test_sleep__retry_after_surpassed(self, t_mock, retry_policy, error_history):
+    def test_excessive_retry_attempts_error(self, t_mock, retry_policy):
+        # Attempting more than stop_after_attempt_count
+        num_attempts = retry_policy.stop_after_attempts_count + 1
+
         retry_policy._retry_start_time = time.time()
-        retry_policy.history = [error_history, error_history, error_history]
-        retry_policy.sleep(HTTPResponse(status=503, headers={"Retry-After": "3"}))
-        t_mock.assert_called_with(4)
+        retry_policy.command_type = CommandType.OTHER
+
+        with pytest.raises(MaxRetryError):
+            for attempt in range(num_attempts):
+                retry_policy.sleep(HTTPResponse(status=503))
+                # Internally urllib3 calls the increment function generating a new instance for every retry
+                retry_policy = retry_policy.increment()