From 8b841c702f9b77e4bba68f8777c9d9cb141513de Mon Sep 17 00:00:00 2001 From: Jothi Prakash Date: Tue, 24 Jun 2025 10:31:17 +0530 Subject: [PATCH 1/5] Updated tests (#614) --- tests/e2e/common/large_queries_mixin.py | 6 +++--- tests/e2e/common/staging_ingestion_tests.py | 8 ++++---- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/e2e/common/large_queries_mixin.py b/tests/e2e/common/large_queries_mixin.py index ed8ac4574..1181ef154 100644 --- a/tests/e2e/common/large_queries_mixin.py +++ b/tests/e2e/common/large_queries_mixin.py @@ -83,11 +83,11 @@ def test_query_with_large_narrow_result_set(self): assert row[0] == row_id def test_long_running_query(self): - """Incrementally increase query size until it takes at least 5 minutes, + """Incrementally increase query size until it takes at least 3 minutes, and asserts that the query completes successfully. """ minutes = 60 - min_duration = 5 * minutes + min_duration = 3 * minutes duration = -1 scale0 = 10000 @@ -113,5 +113,5 @@ def test_long_running_query(self): duration = time.time() - start current_fraction = duration / min_duration print("Took {} s with scale factor={}".format(duration, scale_factor)) - # Extrapolate linearly to reach 5 min and add 50% padding to push over the limit + # Extrapolate linearly to reach 3 min and add 50% padding to push over the limit scale_factor = math.ceil(1.5 * scale_factor / current_fraction) diff --git a/tests/e2e/common/staging_ingestion_tests.py b/tests/e2e/common/staging_ingestion_tests.py index 008055e33..825f830f3 100644 --- a/tests/e2e/common/staging_ingestion_tests.py +++ b/tests/e2e/common/staging_ingestion_tests.py @@ -46,7 +46,7 @@ def test_staging_ingestion_life_cycle(self, ingestion_user): ) as conn: cursor = conn.cursor() - query = f"PUT '{temp_path}' INTO 'stage://tmp/{ingestion_user}/tmp/11/15/file1.csv' OVERWRITE" + query = f"PUT '{temp_path}' INTO 'stage://tmp/{ingestion_user}/tmp/11/16/file1.csv' OVERWRITE" cursor.execute(query) # GET should succeed @@ -57,7 +57,7 @@ def test_staging_ingestion_life_cycle(self, ingestion_user): extra_params={"staging_allowed_local_path": new_temp_path} ) as conn: cursor = conn.cursor() - query = f"GET 'stage://tmp/{ingestion_user}/tmp/11/15/file1.csv' TO '{new_temp_path}'" + query = f"GET 'stage://tmp/{ingestion_user}/tmp/11/16/file1.csv' TO '{new_temp_path}'" cursor.execute(query) with open(new_fh, "rb") as fp: @@ -67,7 +67,7 @@ def test_staging_ingestion_life_cycle(self, ingestion_user): # REMOVE should succeed - remove_query = f"REMOVE 'stage://tmp/{ingestion_user}/tmp/11/15/file1.csv'" + remove_query = f"REMOVE 'stage://tmp/{ingestion_user}/tmp/11/16/file1.csv'" with self.connection(extra_params={"staging_allowed_local_path": "/"}) as conn: cursor = conn.cursor() @@ -79,7 +79,7 @@ def test_staging_ingestion_life_cycle(self, ingestion_user): Error, match="Staging operation over HTTP was unsuccessful: 404" ): cursor = conn.cursor() - query = f"GET 'stage://tmp/{ingestion_user}/tmp/11/15/file1.csv' TO '{new_temp_path}'" + query = f"GET 'stage://tmp/{ingestion_user}/tmp/11/16/file1.csv' TO '{new_temp_path}'" cursor.execute(query) os.remove(temp_path) From 4cebc366aead43bede8ad78942dfc41eaa5b9505 Mon Sep 17 00:00:00 2001 From: Vikrant Puppala Date: Tue, 24 Jun 2025 11:34:52 +0530 Subject: [PATCH 2/5] Add test to check thrift field IDs (#602) * Add test to check thrift field IDs --------- Signed-off-by: Vikrant Puppala --- tests/unit/test_thrift_field_ids.py | 97 +++++++++++++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 tests/unit/test_thrift_field_ids.py diff --git a/tests/unit/test_thrift_field_ids.py b/tests/unit/test_thrift_field_ids.py new file mode 100644 index 000000000..d4cd8168d --- /dev/null +++ b/tests/unit/test_thrift_field_ids.py @@ -0,0 +1,97 @@ +import inspect +import pytest + +from databricks.sql.thrift_api.TCLIService import ttypes + + +class TestThriftFieldIds: + """ + Unit test to validate that all Thrift-generated field IDs comply with the maximum limit. + + Field IDs in Thrift must stay below 3329 to avoid conflicts with reserved ranges + and ensure compatibility with various Thrift implementations and protocols. + """ + + MAX_ALLOWED_FIELD_ID = 3329 + + # Known exceptions that exceed the field ID limit + KNOWN_EXCEPTIONS = { + ('TExecuteStatementReq', 'enforceEmbeddedSchemaCorrectness'): 3353, + ('TSessionHandle', 'serverProtocolVersion'): 3329, + } + + def test_all_thrift_field_ids_are_within_allowed_range(self): + """ + Validates that all field IDs in Thrift-generated classes are within the allowed range. + + This test prevents field ID conflicts and ensures compatibility with different + Thrift implementations and protocols. + """ + violations = [] + + # Get all classes from the ttypes module + for name, obj in inspect.getmembers(ttypes): + if (inspect.isclass(obj) and + hasattr(obj, 'thrift_spec') and + obj.thrift_spec is not None): + + self._check_class_field_ids(obj, name, violations) + + if violations: + error_message = self._build_error_message(violations) + pytest.fail(error_message) + + def _check_class_field_ids(self, cls, class_name, violations): + """ + Checks all field IDs in a Thrift class and reports violations. + + Args: + cls: The Thrift class to check + class_name: Name of the class for error reporting + violations: List to append violation messages to + """ + thrift_spec = cls.thrift_spec + + if not isinstance(thrift_spec, (tuple, list)): + return + + for spec_entry in thrift_spec: + if spec_entry is None: + continue + + # Thrift spec format: (field_id, field_type, field_name, ...) + if isinstance(spec_entry, (tuple, list)) and len(spec_entry) >= 3: + field_id = spec_entry[0] + field_name = spec_entry[2] + + # Skip known exceptions + if (class_name, field_name) in self.KNOWN_EXCEPTIONS: + continue + + if isinstance(field_id, int) and field_id >= self.MAX_ALLOWED_FIELD_ID: + violations.append( + "{} field '{}' has field ID {} (exceeds maximum of {})".format( + class_name, field_name, field_id, self.MAX_ALLOWED_FIELD_ID - 1 + ) + ) + + def _build_error_message(self, violations): + """ + Builds a comprehensive error message for field ID violations. + + Args: + violations: List of violation messages + + Returns: + Formatted error message + """ + error_message = ( + "Found Thrift field IDs that exceed the maximum allowed value of {}.\n" + "This can cause compatibility issues and conflicts with reserved ID ranges.\n" + "Violations found:\n".format(self.MAX_ALLOWED_FIELD_ID - 1) + ) + + for violation in violations: + error_message += " - {}\n".format(violation) + + return error_message \ No newline at end of file From b3a6f55ab970d6d2a33dba2ebc68370a449241d7 Mon Sep 17 00:00:00 2001 From: Madhav Sainanee Date: Tue, 24 Jun 2025 14:22:01 +0530 Subject: [PATCH 3/5] =?UTF-8?q?Revert=20"Enhance=20Cursor=20close=20handli?= =?UTF-8?q?ng=20and=20context=20manager=20exception=20m=E2=80=A6=20(#613)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Revert "Enhance Cursor close handling and context manager exception management to prevent server side resource leaks (#554)" This reverts commit edfb283f932312e005d3749be30163c0e9982c73. * revert e2e --- src/databricks/sql/client.py | 33 +------- tests/e2e/test_driver.py | 142 +---------------------------------- tests/unit/test_client.py | 98 ------------------------ 3 files changed, 4 insertions(+), 269 deletions(-) diff --git a/src/databricks/sql/client.py b/src/databricks/sql/client.py index 0c9a08a85..b81416e15 100755 --- a/src/databricks/sql/client.py +++ b/src/databricks/sql/client.py @@ -321,13 +321,7 @@ def __enter__(self) -> "Connection": return self def __exit__(self, exc_type, exc_value, traceback): - try: - self.close() - except BaseException as e: - logger.warning(f"Exception during connection close in __exit__: {e}") - if exc_type is None: - raise - return False + self.close() def __del__(self): if self.open: @@ -468,14 +462,7 @@ def __enter__(self) -> "Cursor": return self def __exit__(self, exc_type, exc_value, traceback): - try: - logger.debug("Cursor context manager exiting, calling close()") - self.close() - except BaseException as e: - logger.warning(f"Exception during cursor close in __exit__: {e}") - if exc_type is None: - raise - return False + self.close() def __iter__(self): if self.active_result_set: @@ -1185,21 +1172,7 @@ def cancel(self) -> None: def close(self) -> None: """Close cursor""" self.open = False - - # Close active operation handle if it exists - if self.active_op_handle: - try: - self.thrift_backend.close_command(self.active_op_handle) - except RequestError as e: - if isinstance(e.args[1], CursorAlreadyClosedError): - logger.info("Operation was canceled by a prior request") - else: - logging.warning(f"Error closing operation handle: {e}") - except Exception as e: - logging.warning(f"Error closing operation handle: {e}") - finally: - self.active_op_handle = None - + self.active_op_handle = None if self.active_result_set: self._close_and_clear_active_result_set() diff --git a/tests/e2e/test_driver.py b/tests/e2e/test_driver.py index d0c721109..042fcc10a 100644 --- a/tests/e2e/test_driver.py +++ b/tests/e2e/test_driver.py @@ -50,7 +50,7 @@ from tests.e2e.common.uc_volume_tests import PySQLUCVolumeTestSuiteMixin -from databricks.sql.exc import SessionAlreadyClosedError, CursorAlreadyClosedError +from databricks.sql.exc import SessionAlreadyClosedError log = logging.getLogger(__name__) @@ -808,146 +808,6 @@ def test_catalogs_returns_arrow_table(self): results = cursor.fetchall_arrow() assert isinstance(results, pyarrow.Table) - def test_close_connection_closes_cursors(self): - - from databricks.sql.thrift_api.TCLIService import ttypes - - with self.connection() as conn: - cursor = conn.cursor() - cursor.execute( - "SELECT id, id `id2`, id `id3` FROM RANGE(1000000) order by RANDOM()" - ) - ars = cursor.active_result_set - - # We must manually run this check because thrift_backend always forces `has_been_closed_server_side` to True - # Cursor op state should be open before connection is closed - status_request = ttypes.TGetOperationStatusReq( - operationHandle=ars.command_id, getProgressUpdate=False - ) - op_status_at_server = ars.thrift_backend._client.GetOperationStatus( - status_request - ) - assert ( - op_status_at_server.operationState - != ttypes.TOperationState.CLOSED_STATE - ) - - conn.close() - - # When connection closes, any cursor operations should no longer exist at the server - with pytest.raises(SessionAlreadyClosedError) as cm: - op_status_at_server = ars.thrift_backend._client.GetOperationStatus( - status_request - ) - - def test_closing_a_closed_connection_doesnt_fail(self, caplog): - caplog.set_level(logging.DEBUG) - # Second .close() call is when this context manager exits - with self.connection() as conn: - # First .close() call is explicit here - conn.close() - assert "Session appears to have been closed already" in caplog.text - - conn = None - try: - with pytest.raises(KeyboardInterrupt): - with self.connection() as c: - conn = c - raise KeyboardInterrupt("Simulated interrupt") - finally: - if conn is not None: - assert ( - not conn.open - ), "Connection should be closed after KeyboardInterrupt" - - def test_cursor_close_properly_closes_operation(self): - """Test that Cursor.close() properly closes the active operation handle on the server.""" - with self.connection() as conn: - cursor = conn.cursor() - try: - cursor.execute("SELECT 1 AS test") - assert cursor.active_op_handle is not None - cursor.close() - assert cursor.active_op_handle is None - assert not cursor.open - finally: - if cursor.open: - cursor.close() - - conn = None - cursor = None - try: - with self.connection() as c: - conn = c - with pytest.raises(KeyboardInterrupt): - with conn.cursor() as cur: - cursor = cur - raise KeyboardInterrupt("Simulated interrupt") - finally: - if cursor is not None: - assert ( - not cursor.open - ), "Cursor should be closed after KeyboardInterrupt" - - def test_nested_cursor_context_managers(self): - """Test that nested cursor context managers properly close operations on the server.""" - with self.connection() as conn: - with conn.cursor() as cursor1: - cursor1.execute("SELECT 1 AS test1") - assert cursor1.active_op_handle is not None - - with conn.cursor() as cursor2: - cursor2.execute("SELECT 2 AS test2") - assert cursor2.active_op_handle is not None - - # After inner context manager exit, cursor2 should be not open - assert not cursor2.open - assert cursor2.active_op_handle is None - - # After outer context manager exit, cursor1 should be not open - assert not cursor1.open - assert cursor1.active_op_handle is None - - def test_cursor_error_handling(self): - """Test that cursor close handles errors properly to prevent orphaned operations.""" - with self.connection() as conn: - cursor = conn.cursor() - - cursor.execute("SELECT 1 AS test") - - op_handle = cursor.active_op_handle - - assert op_handle is not None - - # Manually close the operation to simulate server-side closure - conn.thrift_backend.close_command(op_handle) - - cursor.close() - - assert not cursor.open - - def test_result_set_close(self): - """Test that ResultSet.close() properly closes operations on the server and handles state correctly.""" - with self.connection() as conn: - cursor = conn.cursor() - try: - cursor.execute("SELECT * FROM RANGE(10)") - - result_set = cursor.active_result_set - assert result_set is not None - - initial_op_state = result_set.op_state - - result_set.close() - - assert result_set.op_state == result_set.thrift_backend.CLOSED_OP_STATE - assert result_set.op_state != initial_op_state - - # Closing the result set again should be a no-op and not raise exceptions - result_set.close() - finally: - cursor.close() - # use a RetrySuite to encapsulate these tests which we'll typically want to run together; however keep # the 429/503 subsuites separate since they execute under different circumstances. diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 588b0d70e..91e426c64 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -21,7 +21,6 @@ import databricks.sql import databricks.sql.client as client from databricks.sql import InterfaceError, DatabaseError, Error, NotSupportedError -from databricks.sql.exc import RequestError, CursorAlreadyClosedError from databricks.sql.types import Row from databricks.sql.utils import ExecuteResponse @@ -342,15 +341,6 @@ def test_context_manager_closes_cursor(self): cursor.close = mock_close mock_close.assert_called_once_with() - cursor = client.Cursor(Mock(), Mock()) - cursor.close = Mock() - try: - with self.assertRaises(KeyboardInterrupt): - with cursor: - raise KeyboardInterrupt("Simulated interrupt") - finally: - cursor.close.assert_called() - @patch("%s.client.ThriftBackend" % PACKAGE_NAME) def test_context_manager_closes_connection(self, mock_client_class): instance = mock_client_class.return_value @@ -366,15 +356,6 @@ def test_context_manager_closes_connection(self, mock_client_class): close_session_id = instance.close_session.call_args[0][0].sessionId self.assertEqual(close_session_id, b"\x22") - connection = databricks.sql.connect(**self.DUMMY_CONNECTION_ARGS) - connection.close = Mock() - try: - with self.assertRaises(KeyboardInterrupt): - with connection: - raise KeyboardInterrupt("Simulated interrupt") - finally: - connection.close.assert_called() - def dict_product(self, dicts): """ Generate cartesion product of values in input dictionary, outputting a dictionary @@ -753,42 +734,6 @@ def test_access_current_query_id(self): cursor.close() self.assertIsNone(cursor.query_id) - def test_cursor_close_handles_exception(self): - """Test that Cursor.close() handles exceptions from close_command properly.""" - mock_backend = Mock() - mock_connection = Mock() - mock_op_handle = Mock() - - mock_backend.close_command.side_effect = Exception("Test error") - - cursor = client.Cursor(mock_connection, mock_backend) - cursor.active_op_handle = mock_op_handle - - cursor.close() - - mock_backend.close_command.assert_called_once_with(mock_op_handle) - - self.assertIsNone(cursor.active_op_handle) - - self.assertFalse(cursor.open) - - def test_cursor_context_manager_handles_exit_exception(self): - """Test that cursor's context manager handles exceptions during __exit__.""" - mock_backend = Mock() - mock_connection = Mock() - - cursor = client.Cursor(mock_connection, mock_backend) - original_close = cursor.close - cursor.close = Mock(side_effect=Exception("Test error during close")) - - try: - with cursor: - raise ValueError("Test error inside context") - except ValueError: - pass - - cursor.close.assert_called_once() - def test_connection_close_handles_cursor_close_exception(self): """Test that _close handles exceptions from cursor.close() properly.""" cursors_closed = [] @@ -824,49 +769,6 @@ def mock_close_normal(): cursors_closed, [1, 2], "Both cursors should have close called" ) - def test_resultset_close_handles_cursor_already_closed_error(self): - """Test that ResultSet.close() handles CursorAlreadyClosedError properly.""" - result_set = client.ResultSet.__new__(client.ResultSet) - result_set.thrift_backend = Mock() - result_set.thrift_backend.CLOSED_OP_STATE = "CLOSED" - result_set.connection = Mock() - result_set.connection.open = True - result_set.op_state = "RUNNING" - result_set.has_been_closed_server_side = False - result_set.command_id = Mock() - - class MockRequestError(Exception): - def __init__(self): - self.args = ["Error message", CursorAlreadyClosedError()] - - result_set.thrift_backend.close_command.side_effect = MockRequestError() - - original_close = client.ResultSet.close - try: - try: - if ( - result_set.op_state != result_set.thrift_backend.CLOSED_OP_STATE - and not result_set.has_been_closed_server_side - and result_set.connection.open - ): - result_set.thrift_backend.close_command(result_set.command_id) - except MockRequestError as e: - if isinstance(e.args[1], CursorAlreadyClosedError): - pass - finally: - result_set.has_been_closed_server_side = True - result_set.op_state = result_set.thrift_backend.CLOSED_OP_STATE - - result_set.thrift_backend.close_command.assert_called_once_with( - result_set.command_id - ) - - assert result_set.has_been_closed_server_side is True - - assert result_set.op_state == result_set.thrift_backend.CLOSED_OP_STATE - finally: - pass - if __name__ == "__main__": suite = unittest.TestLoader().loadTestsFromModule(sys.modules[__name__]) From 76963164ef3c0da091de9215d130521fde852812 Mon Sep 17 00:00:00 2001 From: Madhav Sainanee Date: Tue, 24 Jun 2025 23:09:04 +0530 Subject: [PATCH 4/5] Bump version to 4.0.5 (#615) * Release version 4.0.5: Reverted cursor close handling changes to fix user errors. Updated version numbers in pyproject.toml and __init__.py. * Update CHANGELOG.md to include reference to issue databricks/databricks-sql-python#613 for cursor close handling fix. --- CHANGELOG.md | 3 +++ pyproject.toml | 2 +- src/databricks/sql/__init__.py | 2 +- 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a20cce4eb..0bd22e3ad 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,8 @@ # Release History +# 4.0.5 (2025-06-24) +- Fix: Reverted change in cursor close handling which led to errors impacting users (databricks/databricks-sql-python#613 by @madhav-db) + # 4.0.4 (2025-06-16) - Update thrift client library after cleaning up unused fields and structs (databricks/databricks-sql-python#553 by @vikrantpuppala) diff --git a/pyproject.toml b/pyproject.toml index 19edb7211..54fd263a1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "databricks-sql-connector" -version = "4.0.4" +version = "4.0.5" description = "Databricks SQL Connector for Python" authors = ["Databricks "] license = "Apache-2.0" diff --git a/src/databricks/sql/__init__.py b/src/databricks/sql/__init__.py index c930d746c..d3af2f5c8 100644 --- a/src/databricks/sql/__init__.py +++ b/src/databricks/sql/__init__.py @@ -68,7 +68,7 @@ def __repr__(self): DATE = DBAPITypeObject("date") ROWID = DBAPITypeObject() -__version__ = "4.0.4" +__version__ = "4.0.5" USER_AGENT_NAME = "PyDatabricksSqlConnector" # These two functions are pyhive legacy From 90f0ac1e6902eff5432f9bc037a830f1bbac9099 Mon Sep 17 00:00:00 2001 From: Jothi Prakash Date: Thu, 3 Jul 2025 13:38:58 +0530 Subject: [PATCH 5/5] Removed Codeowners (#623) nit --- .github/CODEOWNERS | 5 ----- 1 file changed, 5 deletions(-) delete mode 100644 .github/CODEOWNERS diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS deleted file mode 100644 index 11d5aeb0a..000000000 --- a/.github/CODEOWNERS +++ /dev/null @@ -1,5 +0,0 @@ -# These owners will be the default owners for everything in -# the repo. Unless a later match takes precedence, these -# users will be requested for review when someone opens a -# pull request. -* @deeksha-db @samikshya-db @jprakash-db @jackyhu-db @madhav-db @gopalldb @jayantsing-db @vikrantpuppala @shivam2680