From 0f4de8190de82cc723fca1f16ca28c78a06aef5f Mon Sep 17 00:00:00 2001 From: Ashley Xu Date: Fri, 3 Nov 2023 22:51:27 +0000 Subject: [PATCH 1/5] fix: use random table when loading data for read_csv, read_json, read_parquet --- bigframes/session/__init__.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/bigframes/session/__init__.py b/bigframes/session/__init__.py index 12ee91a13a..368d7cac2e 100644 --- a/bigframes/session/__init__.py +++ b/bigframes/session/__init__.py @@ -1026,7 +1026,7 @@ def read_csv( encoding: Optional[str] = None, **kwargs, ) -> dataframe.DataFrame: - table = bigquery.Table(self._create_session_table()) + table = bigframes_io.random_table(self._anonymous_dataset) if engine is not None and engine == "bigquery": if any(param is not None for param in (dtype, names)): @@ -1140,7 +1140,7 @@ def read_parquet( # Note: "engine" is omitted because it is redundant. Loading a table # from a pandas DataFrame will just create another parquet file + load # job anyway. - table = bigquery.Table(self._create_session_table()) + table = bigframes_io.random_table(self._anonymous_dataset) job_config = bigquery.LoadJobConfig() job_config.create_disposition = bigquery.CreateDisposition.CREATE_IF_NEEDED @@ -1163,7 +1163,7 @@ def read_json( engine: Literal["ujson", "pyarrow", "bigquery"] = "ujson", **kwargs, ) -> dataframe.DataFrame: - table = bigquery.Table(self._create_session_table()) + table = bigframes_io.random_table(self._anonymous_dataset) if engine == "bigquery": From b4687617facf8180b42c2dc9df20b03876791101 Mon Sep 17 00:00:00 2001 From: Ashley Xu Date: Fri, 3 Nov 2023 22:51:27 +0000 Subject: [PATCH 2/5] fix: use random table when loading data for read_csv, read_json, read_parquet --- bigframes/session/__init__.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/bigframes/session/__init__.py b/bigframes/session/__init__.py index 12ee91a13a..3126e19ae0 100644 --- a/bigframes/session/__init__.py +++ b/bigframes/session/__init__.py @@ -790,7 +790,7 @@ def _read_gbq_with_ordering( def _read_bigquery_load_job( self, filepath_or_buffer: str | IO["bytes"], - table: bigquery.Table, + table: Union[bigquery.Table, bigquery.TableReference], *, job_config: bigquery.LoadJobConfig, index_col: Iterable[str] | str = (), @@ -1026,7 +1026,7 @@ def read_csv( encoding: Optional[str] = None, **kwargs, ) -> dataframe.DataFrame: - table = bigquery.Table(self._create_session_table()) + table = bigframes_io.random_table(self._anonymous_dataset) if engine is not None and engine == "bigquery": if any(param is not None for param in (dtype, names)): @@ -1140,7 +1140,7 @@ def read_parquet( # Note: "engine" is omitted because it is redundant. Loading a table # from a pandas DataFrame will just create another parquet file + load # job anyway. - table = bigquery.Table(self._create_session_table()) + table = bigframes_io.random_table(self._anonymous_dataset) job_config = bigquery.LoadJobConfig() job_config.create_disposition = bigquery.CreateDisposition.CREATE_IF_NEEDED @@ -1163,7 +1163,7 @@ def read_json( engine: Literal["ujson", "pyarrow", "bigquery"] = "ujson", **kwargs, ) -> dataframe.DataFrame: - table = bigquery.Table(self._create_session_table()) + table = bigframes_io.random_table(self._anonymous_dataset) if engine == "bigquery": From dfcc2d3f6918785a3048681c61e0f5f6c99d9d95 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 3 Nov 2023 21:44:23 -0500 Subject: [PATCH 3/5] Revert "fix: use table clone instead of system time for `read_gbq_table` (#109)" (#171) This reverts commit 031f253890f8a212309097554c3462980654201d. --- bigframes/constants.py | 2 +- bigframes/dataframe.py | 4 +- bigframes/session/__init__.py | 59 ++++++++++------- bigframes/session/_io/bigquery.py | 89 ++++++-------------------- tests/system/small/test_session.py | 3 + tests/unit/session/test_io_bigquery.py | 64 +++++++----------- 6 files changed, 84 insertions(+), 137 deletions(-) diff --git a/bigframes/constants.py b/bigframes/constants.py index a1ffd2b755..82b48dc967 100644 --- a/bigframes/constants.py +++ b/bigframes/constants.py @@ -26,4 +26,4 @@ ABSTRACT_METHOD_ERROR_MESSAGE = f"Abstract method. You have likely encountered a bug. Please share this stacktrace and how you reached it with the BigQuery DataFrames team. {FEEDBACK_LINK}" -DEFAULT_EXPIRATION = datetime.timedelta(days=7) +DEFAULT_EXPIRATION = datetime.timedelta(days=1) diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py index 40f12671ae..04a5456e26 100644 --- a/bigframes/dataframe.py +++ b/bigframes/dataframe.py @@ -16,7 +16,6 @@ from __future__ import annotations -import datetime import re import textwrap import typing @@ -2328,8 +2327,7 @@ def to_gbq( self._session.bqclient, self._session._anonymous_dataset, # TODO(swast): allow custom expiration times, probably via session configuration. - datetime.datetime.now(datetime.timezone.utc) - + constants.DEFAULT_EXPIRATION, + constants.DEFAULT_EXPIRATION, ) if if_exists is not None and if_exists != "replace": diff --git a/bigframes/session/__init__.py b/bigframes/session/__init__.py index 12ee91a13a..a1eae69715 100644 --- a/bigframes/session/__init__.py +++ b/bigframes/session/__init__.py @@ -16,7 +16,6 @@ from __future__ import annotations -import datetime import logging import os import re @@ -431,9 +430,7 @@ def _read_gbq_query( index_cols = list(index_col) destination, query_job = self._query_to_destination( - query, - index_cols, - api_name=api_name, + query, index_cols, api_name="read_gbq_query" ) # If there was no destination table, that means the query must have @@ -511,12 +508,6 @@ def _read_gbq_table_to_ibis_with_total_ordering( If we can get a total ordering from the table, such as via primary key column(s), then return those too so that ordering generation can be avoided. - - For tables that aren't already read-only, this creates Create a table - clone so that any changes to the underlying table don't affect the - DataFrame and break our assumptions, especially with regards to unique - index and ordering. See: - https://cloud.google.com/bigquery/docs/table-clones-create """ if table_ref.dataset_id.upper() == "_SESSION": # _SESSION tables aren't supported by the tables.get REST API. @@ -527,24 +518,15 @@ def _read_gbq_table_to_ibis_with_total_ordering( None, ) - now = datetime.datetime.now(datetime.timezone.utc) - destination = bigframes_io.create_table_clone( - table_ref, - self._anonymous_dataset, - # TODO(swast): Allow the default expiration to be configured. - now + constants.DEFAULT_EXPIRATION, - self, - api_name, - ) table_expression = self.ibis_client.table( - destination.table_id, - database=f"{destination.project}.{destination.dataset_id}", + table_ref.table_id, + database=f"{table_ref.project}.{table_ref.dataset_id}", ) # If there are primary keys defined, the query engine assumes these # columns are unique, even if the constraint is not enforced. We make # the same assumption and use these columns as the total ordering keys. - table = self.bqclient.get_table(destination) + table = self.bqclient.get_table(table_ref) # TODO(b/305264153): Use public properties to fetch primary keys once # added to google-cloud-bigquery. @@ -553,7 +535,23 @@ def _read_gbq_table_to_ibis_with_total_ordering( .get("primaryKey", {}) .get("columns") ) - return table_expression, primary_keys + + if not primary_keys: + return table_expression, None + else: + # Read from a snapshot since we won't have to copy the table data to create a total ordering. + job_config = bigquery.QueryJobConfig() + job_config.labels["bigframes-api"] = api_name + current_timestamp = list( + self.bqclient.query( + "SELECT CURRENT_TIMESTAMP() AS `current_timestamp`", + job_config=job_config, + ).result() + )[0][0] + table_expression = self.ibis_client.sql( + bigframes_io.create_snapshot_sql(table_ref, current_timestamp) + ) + return table_expression, primary_keys def _read_gbq_table( self, @@ -664,7 +662,20 @@ def _read_gbq_table( total_ordering_columns=frozenset(index_cols), ) - if not is_total_ordering: + # We have a total ordering, so query via "time travel" so that + # the underlying data doesn't mutate. + if is_total_ordering: + # Get the timestamp from the job metadata rather than the query + # text so that the query for determining uniqueness of the ID + # columns can be cached. + current_timestamp = query_job.started + + # The job finished, so we should have a start time. + assert current_timestamp is not None + table_expression = self.ibis_client.sql( + bigframes_io.create_snapshot_sql(table_ref, current_timestamp) + ) + else: # Make sure when we generate an ordering, the row_number() # coresponds to the index columns. table_expression = table_expression.order_by(index_cols) diff --git a/bigframes/session/_io/bigquery.py b/bigframes/session/_io/bigquery.py index fd3b1c59a7..d200a9a861 100644 --- a/bigframes/session/_io/bigquery.py +++ b/bigframes/session/_io/bigquery.py @@ -14,21 +14,14 @@ """Private module: Helpers for I/O operations.""" -from __future__ import annotations - import datetime import textwrap import types -import typing from typing import Dict, Iterable, Union import uuid import google.cloud.bigquery as bigquery -if typing.TYPE_CHECKING: - import bigframes.session - - IO_ORDERING_ID = "bqdf_row_nums" TEMP_TABLE_PREFIX = "bqdf{date}_{random_id}" @@ -76,83 +69,43 @@ def create_export_data_statement( ) -def random_table(dataset: bigquery.DatasetReference) -> bigquery.TableReference: - """Generate a random table ID with BigQuery DataFrames prefix. - - Args: - dataset (google.cloud.bigquery.DatasetReference): - The dataset to make the table reference in. Usually the anonymous - dataset for the session. - - Returns: - google.cloud.bigquery.TableReference: - Fully qualified table ID of a table that doesn't exist. - """ - now = datetime.datetime.now(datetime.timezone.utc) - random_id = uuid.uuid4().hex - table_id = TEMP_TABLE_PREFIX.format( - date=now.strftime("%Y%m%d"), random_id=random_id - ) - return dataset.table(table_id) - - -def table_ref_to_sql(table: bigquery.TableReference) -> str: - """Format a table reference as escaped SQL.""" - return f"`{table.project}`.`{table.dataset_id}`.`{table.table_id}`" +def create_snapshot_sql( + table_ref: bigquery.TableReference, current_timestamp: datetime.datetime +) -> str: + """Query a table via 'time travel' for consistent reads.""" + # If we have a _SESSION table, assume that it's already a copy. Nothing to do here. + if table_ref.dataset_id.upper() == "_SESSION": + return f"SELECT * FROM `_SESSION`.`{table_ref.table_id}`" -def create_table_clone( - source: bigquery.TableReference, - dataset: bigquery.DatasetReference, - expiration: datetime.datetime, - session: bigframes.session.Session, - api_name: str, -) -> bigquery.TableReference: - """Create a table clone for consistent reads.""" # If we have an anonymous query results table, it can't be modified and # there isn't any BigQuery time travel. - if source.dataset_id.startswith("_"): - return source - - fully_qualified_source_id = table_ref_to_sql(source) - destination = random_table(dataset) - fully_qualified_destination_id = table_ref_to_sql(destination) + if table_ref.dataset_id.startswith("_"): + return f"SELECT * FROM `{table_ref.project}`.`{table_ref.dataset_id}`.`{table_ref.table_id}`" - # Include a label so that Dataplex Lineage can identify temporary - # tables that BigQuery DataFrames creates. Googlers: See internal issue - # 296779699. - ddl = textwrap.dedent( + return textwrap.dedent( f""" - CREATE OR REPLACE TABLE - {fully_qualified_destination_id} - CLONE {fully_qualified_source_id} - OPTIONS( - expiration_timestamp=TIMESTAMP "{expiration.isoformat()}", - labels=[ - ("source", "bigquery-dataframes-temp"), - ("bigframes-api", {repr(api_name)}) - ] - ) + SELECT * + FROM `{table_ref.project}`.`{table_ref.dataset_id}`.`{table_ref.table_id}` + FOR SYSTEM_TIME AS OF TIMESTAMP({repr(current_timestamp.isoformat())}) """ ) - job_config = bigquery.QueryJobConfig() - job_config.labels = { - "source": "bigquery-dataframes-temp", - "bigframes-api": api_name, - } - session._start_query(ddl, job_config=job_config) - return destination def create_temp_table( bqclient: bigquery.Client, dataset: bigquery.DatasetReference, - expiration: datetime.datetime, + expiration: datetime.timedelta, ) -> str: """Create an empty table with an expiration in the desired dataset.""" - table_ref = random_table(dataset) + now = datetime.datetime.now(datetime.timezone.utc) + random_id = uuid.uuid4().hex + table_id = TEMP_TABLE_PREFIX.format( + date=now.strftime("%Y%m%d"), random_id=random_id + ) + table_ref = dataset.table(table_id) destination = bigquery.Table(table_ref) - destination.expires = expiration + destination.expires = now + expiration bqclient.create_table(destination) return f"{table_ref.project}.{table_ref.dataset_id}.{table_ref.table_id}" diff --git a/tests/system/small/test_session.py b/tests/system/small/test_session.py index 28486a1269..bf72e444eb 100644 --- a/tests/system/small/test_session.py +++ b/tests/system/small/test_session.py @@ -252,6 +252,9 @@ def test_read_gbq_w_primary_keys_table( sorted_result = result.sort_values(primary_keys) pd.testing.assert_frame_equal(result, sorted_result) + # Verify that we're working from a snapshot rather than a copy of the table. + assert "FOR SYSTEM_TIME AS OF TIMESTAMP" in df.sql + @pytest.mark.parametrize( ("query_or_table", "max_results"), diff --git a/tests/unit/session/test_io_bigquery.py b/tests/unit/session/test_io_bigquery.py index 7a8691232b..cb3003b1cc 100644 --- a/tests/unit/session/test_io_bigquery.py +++ b/tests/unit/session/test_io_bigquery.py @@ -19,63 +19,46 @@ import google.cloud.bigquery as bigquery import pytest -import bigframes.session import bigframes.session._io.bigquery -def test_create_table_clone_doesnt_clone_anonymous_datasets(): - session = mock.create_autospec(bigframes.session.Session) - source = bigquery.TableReference.from_string( +def test_create_snapshot_sql_doesnt_timetravel_anonymous_datasets(): + table_ref = bigquery.TableReference.from_string( "my-test-project._e8166e0cdb.anonbb92cd" ) - destination = bigframes.session._io.bigquery.create_table_clone( - source, - bigquery.DatasetReference("other-project", "other_dataset"), - datetime.datetime(2023, 11, 2, 15, 43, 21, tzinfo=datetime.timezone.utc), - session, - "test_api", + sql = bigframes.session._io.bigquery.create_snapshot_sql( + table_ref, datetime.datetime.now(datetime.timezone.utc) ) - # Anonymous query results tables don't support CLONE - assert destination is source - session._start_query.assert_not_called() + # Anonymous query results tables don't support time travel. + assert "SYSTEM_TIME" not in sql + # Need fully-qualified table name. + assert "`my-test-project`.`_e8166e0cdb`.`anonbb92cd`" in sql -def test_create_table_clone_sets_expiration(): - session = mock.create_autospec(bigframes.session.Session) - source = bigquery.TableReference.from_string( - "my-test-project.test_dataset.some_table" - ) - expiration = datetime.datetime( - 2023, 11, 2, 15, 43, 21, tzinfo=datetime.timezone.utc - ) - bigframes.session._io.bigquery.create_table_clone( - source, - bigquery.DatasetReference("other-project", "other_dataset"), - expiration, - session, - "test_api", +def test_create_snapshot_sql_doesnt_timetravel_session_tables(): + table_ref = bigquery.TableReference.from_string("my-test-project._session.abcdefg") + + sql = bigframes.session._io.bigquery.create_snapshot_sql( + table_ref, datetime.datetime.now(datetime.timezone.utc) ) - session._start_query.assert_called_once() - call_args = session._start_query.call_args - query = call_args.args[0] - assert "CREATE OR REPLACE TABLE" in query - assert "CLONE" in query - assert f'expiration_timestamp=TIMESTAMP "{expiration.isoformat()}"' in query - assert '("source", "bigquery-dataframes-temp")' in query - assert call_args.kwargs["job_config"].labels["bigframes-api"] == "test_api" + # We aren't modifying _SESSION tables, so don't use time travel. + assert "SYSTEM_TIME" not in sql + + # Don't need the project ID for _SESSION tables. + assert "my-test-project" not in sql def test_create_temp_table_default_expiration(): """Make sure the created table has an expiration.""" bqclient = mock.create_autospec(bigquery.Client) dataset = bigquery.DatasetReference("test-project", "test_dataset") - expiration = datetime.datetime( - 2023, 11, 2, 13, 44, 55, 678901, datetime.timezone.utc - ) + now = datetime.datetime.now(datetime.timezone.utc) + expiration = datetime.timedelta(days=3) + expected_expires = now + expiration bigframes.session._io.bigquery.create_temp_table(bqclient, dataset, expiration) @@ -85,11 +68,10 @@ def test_create_temp_table_default_expiration(): assert table.project == "test-project" assert table.dataset_id == "test_dataset" assert table.table_id.startswith("bqdf") - # TODO(swast): Why isn't the expiration exactly what we set it to? assert ( - (expiration - datetime.timedelta(minutes=1)) + (expected_expires - datetime.timedelta(minutes=1)) < table.expires - < (expiration + datetime.timedelta(minutes=1)) + < (expected_expires + datetime.timedelta(minutes=1)) ) From 4ff26cdf862e9f9b91a3a1d2abfa7fbdf0af9c5b Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Mon, 6 Nov 2023 12:35:06 -0600 Subject: [PATCH 4/5] fix: update default temp table expiration to 7 days (#174) --- bigframes/constants.py | 2 +- bigframes/dataframe.py | 4 ++- bigframes/session/__init__.py | 4 ++- bigframes/session/_io/bigquery.py | 36 ++++++++++++++++++++------ tests/unit/session/test_io_bigquery.py | 10 +++---- 5 files changed, 40 insertions(+), 16 deletions(-) diff --git a/bigframes/constants.py b/bigframes/constants.py index 82b48dc967..a1ffd2b755 100644 --- a/bigframes/constants.py +++ b/bigframes/constants.py @@ -26,4 +26,4 @@ ABSTRACT_METHOD_ERROR_MESSAGE = f"Abstract method. You have likely encountered a bug. Please share this stacktrace and how you reached it with the BigQuery DataFrames team. {FEEDBACK_LINK}" -DEFAULT_EXPIRATION = datetime.timedelta(days=1) +DEFAULT_EXPIRATION = datetime.timedelta(days=7) diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py index 04a5456e26..40f12671ae 100644 --- a/bigframes/dataframe.py +++ b/bigframes/dataframe.py @@ -16,6 +16,7 @@ from __future__ import annotations +import datetime import re import textwrap import typing @@ -2327,7 +2328,8 @@ def to_gbq( self._session.bqclient, self._session._anonymous_dataset, # TODO(swast): allow custom expiration times, probably via session configuration. - constants.DEFAULT_EXPIRATION, + datetime.datetime.now(datetime.timezone.utc) + + constants.DEFAULT_EXPIRATION, ) if if_exists is not None and if_exists != "replace": diff --git a/bigframes/session/__init__.py b/bigframes/session/__init__.py index a1eae69715..2537e81e19 100644 --- a/bigframes/session/__init__.py +++ b/bigframes/session/__init__.py @@ -430,7 +430,9 @@ def _read_gbq_query( index_cols = list(index_col) destination, query_job = self._query_to_destination( - query, index_cols, api_name="read_gbq_query" + query, + index_cols, + api_name=api_name, ) # If there was no destination table, that means the query must have diff --git a/bigframes/session/_io/bigquery.py b/bigframes/session/_io/bigquery.py index d200a9a861..06d240fec6 100644 --- a/bigframes/session/_io/bigquery.py +++ b/bigframes/session/_io/bigquery.py @@ -14,6 +14,8 @@ """Private module: Helpers for I/O operations.""" +from __future__ import annotations + import datetime import textwrap import types @@ -69,6 +71,29 @@ def create_export_data_statement( ) +def random_table(dataset: bigquery.DatasetReference) -> bigquery.TableReference: + """Generate a random table ID with BigQuery DataFrames prefix. + Args: + dataset (google.cloud.bigquery.DatasetReference): + The dataset to make the table reference in. Usually the anonymous + dataset for the session. + Returns: + google.cloud.bigquery.TableReference: + Fully qualified table ID of a table that doesn't exist. + """ + now = datetime.datetime.now(datetime.timezone.utc) + random_id = uuid.uuid4().hex + table_id = TEMP_TABLE_PREFIX.format( + date=now.strftime("%Y%m%d"), random_id=random_id + ) + return dataset.table(table_id) + + +def table_ref_to_sql(table: bigquery.TableReference) -> str: + """Format a table reference as escaped SQL.""" + return f"`{table.project}`.`{table.dataset_id}`.`{table.table_id}`" + + def create_snapshot_sql( table_ref: bigquery.TableReference, current_timestamp: datetime.datetime ) -> str: @@ -95,17 +120,12 @@ def create_snapshot_sql( def create_temp_table( bqclient: bigquery.Client, dataset: bigquery.DatasetReference, - expiration: datetime.timedelta, + expiration: datetime.datetime, ) -> str: """Create an empty table with an expiration in the desired dataset.""" - now = datetime.datetime.now(datetime.timezone.utc) - random_id = uuid.uuid4().hex - table_id = TEMP_TABLE_PREFIX.format( - date=now.strftime("%Y%m%d"), random_id=random_id - ) - table_ref = dataset.table(table_id) + table_ref = random_table(dataset) destination = bigquery.Table(table_ref) - destination.expires = now + expiration + destination.expires = expiration bqclient.create_table(destination) return f"{table_ref.project}.{table_ref.dataset_id}.{table_ref.table_id}" diff --git a/tests/unit/session/test_io_bigquery.py b/tests/unit/session/test_io_bigquery.py index cb3003b1cc..03470208e4 100644 --- a/tests/unit/session/test_io_bigquery.py +++ b/tests/unit/session/test_io_bigquery.py @@ -56,9 +56,9 @@ def test_create_temp_table_default_expiration(): """Make sure the created table has an expiration.""" bqclient = mock.create_autospec(bigquery.Client) dataset = bigquery.DatasetReference("test-project", "test_dataset") - now = datetime.datetime.now(datetime.timezone.utc) - expiration = datetime.timedelta(days=3) - expected_expires = now + expiration + expiration = datetime.datetime( + 2023, 11, 2, 13, 44, 55, 678901, datetime.timezone.utc + ) bigframes.session._io.bigquery.create_temp_table(bqclient, dataset, expiration) @@ -69,9 +69,9 @@ def test_create_temp_table_default_expiration(): assert table.dataset_id == "test_dataset" assert table.table_id.startswith("bqdf") assert ( - (expected_expires - datetime.timedelta(minutes=1)) + (expiration - datetime.timedelta(minutes=1)) < table.expires - < (expected_expires + datetime.timedelta(minutes=1)) + < (expiration + datetime.timedelta(minutes=1)) ) From b10df1e809e7726e10ec8ccee734270b6be1f3a2 Mon Sep 17 00:00:00 2001 From: Ashley Xu Date: Fri, 3 Nov 2023 22:51:27 +0000 Subject: [PATCH 5/5] fix: use random table when loading data for read_csv, read_json, read_parquet --- bigframes/session/__init__.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/bigframes/session/__init__.py b/bigframes/session/__init__.py index 2537e81e19..f564cbf174 100644 --- a/bigframes/session/__init__.py +++ b/bigframes/session/__init__.py @@ -803,7 +803,7 @@ def _read_gbq_with_ordering( def _read_bigquery_load_job( self, filepath_or_buffer: str | IO["bytes"], - table: bigquery.Table, + table: Union[bigquery.Table, bigquery.TableReference], *, job_config: bigquery.LoadJobConfig, index_col: Iterable[str] | str = (), @@ -1039,7 +1039,7 @@ def read_csv( encoding: Optional[str] = None, **kwargs, ) -> dataframe.DataFrame: - table = bigquery.Table(self._create_session_table()) + table = bigframes_io.random_table(self._anonymous_dataset) if engine is not None and engine == "bigquery": if any(param is not None for param in (dtype, names)): @@ -1153,7 +1153,7 @@ def read_parquet( # Note: "engine" is omitted because it is redundant. Loading a table # from a pandas DataFrame will just create another parquet file + load # job anyway. - table = bigquery.Table(self._create_session_table()) + table = bigframes_io.random_table(self._anonymous_dataset) job_config = bigquery.LoadJobConfig() job_config.create_disposition = bigquery.CreateDisposition.CREATE_IF_NEEDED @@ -1176,7 +1176,7 @@ def read_json( engine: Literal["ujson", "pyarrow", "bigquery"] = "ujson", **kwargs, ) -> dataframe.DataFrame: - table = bigquery.Table(self._create_session_table()) + table = bigframes_io.random_table(self._anonymous_dataset) if engine == "bigquery":