From ff00eea86f429d3126d7fe27499cbcbeab2e1c35 Mon Sep 17 00:00:00 2001 From: Shobhit Singh Date: Wed, 24 Jan 2024 00:39:46 +0000 Subject: [PATCH 1/3] feat: Improve error message for drive based BQ table reads --- bigframes/session/__init__.py | 48 ++++++++++++++++++++--------------- 1 file changed, 27 insertions(+), 21 deletions(-) diff --git a/bigframes/session/__init__.py b/bigframes/session/__init__.py index 9e1e6b560a..ffa1ccc0e1 100644 --- a/bigframes/session/__init__.py +++ b/bigframes/session/__init__.py @@ -249,27 +249,33 @@ def read_gbq( query_or_table = self._filters_to_query(query_or_table, columns, filters) - if _is_query(query_or_table): - return self._read_gbq_query( - query_or_table, - index_col=index_col, - columns=columns, - max_results=max_results, - api_name="read_gbq", - use_cache=use_cache, - ) - else: - # TODO(swast): Query the snapshot table but mark it as a - # deterministic query so we can avoid serializing if we have a - # unique index. - return self._read_gbq_table( - query_or_table, - index_col=index_col, - columns=columns, - max_results=max_results, - api_name="read_gbq", - use_cache=use_cache, - ) + try: + + if _is_query(query_or_table): + return self._read_gbq_query( + query_or_table, + index_col=index_col, + columns=columns, + max_results=max_results, + api_name="read_gbq", + use_cache=use_cache, + ) + else: + # TODO(swast): Query the snapshot table but mark it as a + # deterministic query so we can avoid serializing if we have a + # unique index. + return self._read_gbq_table( + query_or_table, + index_col=index_col, + columns=columns, + max_results=max_results, + api_name="read_gbq", + use_cache=use_cache, + ) + except google.api_core.exceptions.Forbidden as ex: + if "Permission denied while getting Drive credentials." in ex.message: + ex.message += "\nCheck https://cloud.google.com/bigquery/docs/query-drive-data#Google_Drive_permissions." + raise def _filters_to_query(self, query_or_table, columns, filters): """Convert filters to query""" From 535f2afd6e8c897177c3783d3eff9105e2470b41 Mon Sep 17 00:00:00 2001 From: Shobhit Singh Date: Thu, 25 Jan 2024 20:37:57 +0000 Subject: [PATCH 2/3] move exception handling deeper to apply to read_gbq* --- bigframes/session/__init__.py | 69 +++++++++++++++++++---------------- 1 file changed, 38 insertions(+), 31 deletions(-) diff --git a/bigframes/session/__init__.py b/bigframes/session/__init__.py index ffa1ccc0e1..7c60b04218 100644 --- a/bigframes/session/__init__.py +++ b/bigframes/session/__init__.py @@ -249,33 +249,27 @@ def read_gbq( query_or_table = self._filters_to_query(query_or_table, columns, filters) - try: - - if _is_query(query_or_table): - return self._read_gbq_query( - query_or_table, - index_col=index_col, - columns=columns, - max_results=max_results, - api_name="read_gbq", - use_cache=use_cache, - ) - else: - # TODO(swast): Query the snapshot table but mark it as a - # deterministic query so we can avoid serializing if we have a - # unique index. - return self._read_gbq_table( - query_or_table, - index_col=index_col, - columns=columns, - max_results=max_results, - api_name="read_gbq", - use_cache=use_cache, - ) - except google.api_core.exceptions.Forbidden as ex: - if "Permission denied while getting Drive credentials." in ex.message: - ex.message += "\nCheck https://cloud.google.com/bigquery/docs/query-drive-data#Google_Drive_permissions." - raise + if _is_query(query_or_table): + return self._read_gbq_query( + query_or_table, + index_col=index_col, + columns=columns, + max_results=max_results, + api_name="read_gbq", + use_cache=use_cache, + ) + else: + # TODO(swast): Query the snapshot table but mark it as a + # deterministic query so we can avoid serializing if we have a + # unique index. + return self._read_gbq_table( + query_or_table, + index_col=index_col, + columns=columns, + max_results=max_results, + api_name="read_gbq", + use_cache=use_cache, + ) def _filters_to_query(self, query_or_table, columns, filters): """Convert filters to query""" @@ -603,9 +597,16 @@ def _get_snapshot_sql_and_primary_key( ).result() )[0][0] self._df_snapshot[table_ref] = snapshot_timestamp - table_expression = self.ibis_client.sql( - bigframes_io.create_snapshot_sql(table_ref, snapshot_timestamp) - ) + + try: + table_expression = self.ibis_client.sql( + bigframes_io.create_snapshot_sql(table_ref, snapshot_timestamp) + ) + except google.api_core.exceptions.Forbidden as ex: + if "Drive credentials." in ex.message: + ex.message += "\nCheck https://cloud.google.com/bigquery/docs/query-drive-data#Google_Drive_permissions." + raise + return table_expression, primary_keys def _read_gbq_table( @@ -1457,7 +1458,13 @@ def _start_query( job_config.labels = bigframes_io.create_job_configs_labels( job_configs_labels=job_config.labels, api_methods=api_methods ) - query_job = self.bqclient.query(sql, job_config=job_config) + + try: + query_job = self.bqclient.query(sql, job_config=job_config) + except google.api_core.exceptions.Forbidden as ex: + if "Drive credentials." in ex.message: + ex.message += "\nCheck https://cloud.google.com/bigquery/docs/query-drive-data#Google_Drive_permissions." + raise opts = bigframes.options.display if opts.progress_bar is not None and not query_job.configuration.dry_run: From ccebd059b56543b503cdf884937e965209074c81 Mon Sep 17 00:00:00 2001 From: Shobhit Singh Date: Thu, 25 Jan 2024 22:38:31 +0000 Subject: [PATCH 3/3] add unit tests --- bigframes/session/__init__.py | 4 ++-- tests/unit/session/test_session.py | 30 +++++++++++++++++++++++++++++- 2 files changed, 31 insertions(+), 3 deletions(-) diff --git a/bigframes/session/__init__.py b/bigframes/session/__init__.py index 7c60b04218..8275048e00 100644 --- a/bigframes/session/__init__.py +++ b/bigframes/session/__init__.py @@ -603,7 +603,7 @@ def _get_snapshot_sql_and_primary_key( bigframes_io.create_snapshot_sql(table_ref, snapshot_timestamp) ) except google.api_core.exceptions.Forbidden as ex: - if "Drive credentials." in ex.message: + if "Drive credentials" in ex.message: ex.message += "\nCheck https://cloud.google.com/bigquery/docs/query-drive-data#Google_Drive_permissions." raise @@ -1462,7 +1462,7 @@ def _start_query( try: query_job = self.bqclient.query(sql, job_config=job_config) except google.api_core.exceptions.Forbidden as ex: - if "Drive credentials." in ex.message: + if "Drive credentials" in ex.message: ex.message += "\nCheck https://cloud.google.com/bigquery/docs/query-drive-data#Google_Drive_permissions." raise diff --git a/tests/unit/session/test_session.py b/tests/unit/session/test_session.py index d38a393f27..5fc8996993 100644 --- a/tests/unit/session/test_session.py +++ b/tests/unit/session/test_session.py @@ -35,7 +35,7 @@ def test_read_gbq_missing_parts(missing_parts_table_id): "not_found_table_id", [("unknown.dataset.table"), ("project.unknown.table"), ("project.dataset.unknown")], ) -def test_read_gdb_not_found_tables(not_found_table_id): +def test_read_gbq_not_found_tables(not_found_table_id): bqclient = mock.create_autospec(google.cloud.bigquery.Client, instance=True) bqclient.project = "test-project" bqclient.get_table.side_effect = google.api_core.exceptions.NotFound( @@ -47,6 +47,34 @@ def test_read_gdb_not_found_tables(not_found_table_id): session.read_gbq(not_found_table_id) +@pytest.mark.parametrize( + ("api_name", "query_or_table"), + [ + ("read_gbq", "project.dataset.table"), + ("read_gbq_table", "project.dataset.table"), + ("read_gbq", "SELECT * FROM project.dataset.table"), + ("read_gbq_query", "SELECT * FROM project.dataset.table"), + ], + ids=[ + "read_gbq_on_table", + "read_gbq_table", + "read_gbq_on_query", + "read_gbq_query", + ], +) +def test_read_gbq_external_table_no_drive_access(api_name, query_or_table): + bqclient = mock.create_autospec(google.cloud.bigquery.Client, instance=True) + bqclient.project = "test-project" + bqclient.get_table.side_effect = google.api_core.exceptions.Forbidden( + "Access Denied: BigQuery BigQuery: Permission denied while getting Drive credentials." + ) + session = resources.create_bigquery_session(bqclient=bqclient) + + api = getattr(session, api_name) + with pytest.raises(google.api_core.exceptions.Forbidden): + api(query_or_table) + + @mock.patch.dict(os.environ, {}, clear=True) def test_session_init_fails_with_no_project(): with pytest.raises(