From 0fbaa47d282847b45f1eb3590ddaf4e40a321707 Mon Sep 17 00:00:00 2001 From: Jesse Whitehouse Date: Tue, 13 Jun 2023 13:38:01 -0500 Subject: [PATCH 1/2] Retry GetOperationStatus for HTTPErrors too Signed-off-by: Jesse Whitehouse --- src/databricks/sql/thrift_backend.py | 12 +++++++ tests/unit/test_thrift_backend.py | 48 +++++++++++++++++++++++++++- 2 files changed, 59 insertions(+), 1 deletion(-) diff --git a/src/databricks/sql/thrift_backend.py b/src/databricks/sql/thrift_backend.py index c61dc99e4..d5eec9749 100644 --- a/src/databricks/sql/thrift_backend.py +++ b/src/databricks/sql/thrift_backend.py @@ -14,6 +14,8 @@ import thrift.transport.TSocket import thrift.transport.TTransport +import urllib3.exceptions + import databricks.sql.auth.thrift_http_client from databricks.sql.auth.authenticators import AuthProvider from databricks.sql.thrift_api.TCLIService import TCLIService, ttypes @@ -324,6 +326,16 @@ def attempt_request(attempt): logger.debug("Received response: {}".format(response)) return response + + except urllib3.exceptions.HTTPError as err: + # retry on timeout. Happens a lot in Azure and it is safe as data has not been sent to server yet + + gos_name = TCLIServiceClient.GetOperationStatus.__name__ + if method.__name__ == gos_name: + retry_delay = bound_retry_delay(attempt, self._retry_delay_default) + logger.info(f"GetOperationStatus failed with HTTP error and will be retried: {str(err)}") + else: + raise err except OSError as err: error = err error_message = str(err) diff --git a/tests/unit/test_thrift_backend.py b/tests/unit/test_thrift_backend.py index 347bce155..7ef0fa2ce 100644 --- a/tests/unit/test_thrift_backend.py +++ b/tests/unit/test_thrift_backend.py @@ -6,6 +6,7 @@ from ssl import CERT_NONE, CERT_REQUIRED import pyarrow +import urllib3 import databricks.sql from databricks.sql.thrift_api.TCLIService import ttypes @@ -1033,7 +1034,7 @@ def test_handle_execute_response_sets_active_op_handle(self): self.assertEqual(mock_resp.operationHandle, mock_cursor.active_op_handle) - @patch("thrift.transport.THttpClient.THttpClient") + @patch("databricks.sql.auth.thrift_http_client.THttpClient") @patch("databricks.sql.thrift_api.TCLIService.TCLIService.Client.GetOperationStatus") @patch("databricks.sql.thrift_backend._retry_policy", new_callable=retry_policy_factory) def test_make_request_will_retry_GetOperationStatus( @@ -1089,6 +1090,51 @@ def test_make_request_will_retry_GetOperationStatus( # The warnings should include this text self.assertIn(f"{this_gos_name} failed with code {errno.EEXIST} and will attempt to retry", cm.output[0]) + @patch("databricks.sql.thrift_api.TCLIService.TCLIService.Client.GetOperationStatus") + @patch("databricks.sql.thrift_backend._retry_policy", new_callable=retry_policy_factory) + def test_make_request_will_retry_GetOperationStatus_for_http_error( + self, mock_retry_policy, mock_gos): + + import urllib3.exceptions + mock_gos.side_effect = urllib3.exceptions.HTTPError("Read timed out") + + import thrift, errno + from databricks.sql.thrift_api.TCLIService.TCLIService import Client + from databricks.sql.exc import RequestError + from databricks.sql.utils import NoRetryReason + from databricks.sql.auth.thrift_http_client import THttpClient + + this_gos_name = "GetOperationStatus" + mock_gos.__name__ = this_gos_name + + protocol = thrift.protocol.TBinaryProtocol.TBinaryProtocol(THttpClient) + client = Client(protocol) + + req = ttypes.TGetOperationStatusReq( + operationHandle=self.operation_handle, + getProgressUpdate=False, + ) + + EXPECTED_RETRIES = 2 + + thrift_backend = ThriftBackend( + "foobar", + 443, + "path", [], + auth_provider=AuthProvider(), + _retry_stop_after_attempts_count=EXPECTED_RETRIES, + _retry_delay_default=1) + + + with self.assertRaises(RequestError) as cm: + thrift_backend.make_request(client.GetOperationStatus, req) + + + self.assertEqual(NoRetryReason.OUT_OF_ATTEMPTS.value, cm.exception.context["no-retry-reason"]) + self.assertEqual(f'{EXPECTED_RETRIES}/{EXPECTED_RETRIES}', cm.exception.context["attempt"]) + + + @patch("thrift.transport.THttpClient.THttpClient") def test_make_request_wont_retry_if_headers_not_present(self, t_transport_class): From 008161035270dbf4a490bcfb521bd1fe1b360373 Mon Sep 17 00:00:00 2001 From: Jesse Whitehouse Date: Tue, 13 Jun 2023 14:00:21 -0500 Subject: [PATCH 2/2] Black the code Signed-off-by: Jesse Whitehouse --- src/databricks/sql/thrift_backend.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/databricks/sql/thrift_backend.py b/src/databricks/sql/thrift_backend.py index d5eec9749..c17da8778 100644 --- a/src/databricks/sql/thrift_backend.py +++ b/src/databricks/sql/thrift_backend.py @@ -326,14 +326,16 @@ def attempt_request(attempt): logger.debug("Received response: {}".format(response)) return response - + except urllib3.exceptions.HTTPError as err: # retry on timeout. Happens a lot in Azure and it is safe as data has not been sent to server yet - + gos_name = TCLIServiceClient.GetOperationStatus.__name__ if method.__name__ == gos_name: retry_delay = bound_retry_delay(attempt, self._retry_delay_default) - logger.info(f"GetOperationStatus failed with HTTP error and will be retried: {str(err)}") + logger.info( + f"GetOperationStatus failed with HTTP error and will be retried: {str(err)}" + ) else: raise err except OSError as err: