diff --git a/src/databricks/sql/client.py b/src/databricks/sql/client.py index 8ea81e12b..aefed1ef0 100755 --- a/src/databricks/sql/client.py +++ b/src/databricks/sql/client.py @@ -808,6 +808,7 @@ def execute( self.thrift_backend, self.buffer_size_bytes, self.arraysize, + self.connection.use_cloud_fetch, ) if execute_response.is_staging_operation: @@ -1202,6 +1203,7 @@ def __init__( thrift_backend: ThriftBackend, result_buffer_size_bytes: int = DEFAULT_RESULT_BUFFER_SIZE_BYTES, arraysize: int = 10000, + use_cloud_fetch: bool = True, ): """ A ResultSet manages the results of a single command. @@ -1223,6 +1225,7 @@ def __init__( self.description = execute_response.description self._arrow_schema_bytes = execute_response.arrow_schema_bytes self._next_row_index = 0 + self._use_cloud_fetch = use_cloud_fetch if execute_response.arrow_queue: # In this case the server has taken the fast path and returned an initial batch of @@ -1250,6 +1253,7 @@ def _fill_results_buffer(self): lz4_compressed=self.lz4_compressed, arrow_schema_bytes=self._arrow_schema_bytes, description=self.description, + use_cloud_fetch=self._use_cloud_fetch, ) self.results = results self.has_more_rows = has_more_rows diff --git a/src/databricks/sql/thrift_backend.py b/src/databricks/sql/thrift_backend.py index 8c212c554..5fbd9f749 100644 --- a/src/databricks/sql/thrift_backend.py +++ b/src/databricks/sql/thrift_backend.py @@ -321,7 +321,7 @@ def _handle_request_error(self, error_info, attempt, elapsed): # FUTURE: Consider moving to https://github.com/litl/backoff or # https://github.com/jd/tenacity for retry logic. - def make_request(self, method, request): + def make_request(self, method, request, retryable=True): """Execute given request, attempting retries when 1. Receiving HTTP 429/503 from server 2. OSError is raised during a GetOperationStatus @@ -460,7 +460,7 @@ def attempt_request(attempt): # return on success # if available: bounded delay and retry # if not: raise error - max_attempts = self._retry_stop_after_attempts_count + max_attempts = self._retry_stop_after_attempts_count if retryable else 1 # use index-1 counting for logging/human consistency for attempt in range(1, max_attempts + 1): @@ -1028,6 +1028,7 @@ def fetch_results( lz4_compressed, arrow_schema_bytes, description, + use_cloud_fetch=True, ): assert op_handle is not None @@ -1044,10 +1045,11 @@ def fetch_results( includeResultSetMetadata=True, ) - resp = self.make_request(self._client.FetchResults, req) + # Fetch results in Inline mode with FETCH_NEXT orientation are not idempotent and hence not retried + resp = self.make_request(self._client.FetchResults, req, use_cloud_fetch) if resp.results.startRowOffset > expected_row_start_offset: - logger.warning( - "Expected results to start from {} but they instead start at {}".format( + raise DataError( + "fetch_results failed due to inconsistency in the state between the client and the server. Expected results to start from {} but they instead start at {}, some result batches must have been skipped".format( expected_row_start_offset, resp.results.startRowOffset ) ) diff --git a/tests/unit/test_fetches.py b/tests/unit/test_fetches.py index e9a58acdd..89cedcfae 100644 --- a/tests/unit/test_fetches.py +++ b/tests/unit/test_fetches.py @@ -66,6 +66,7 @@ def fetch_results( lz4_compressed, arrow_schema_bytes, description, + use_cloud_fetch=True, ): nonlocal batch_index results = FetchTests.make_arrow_queue(batch_list[batch_index])