From 322899511dced2f1c9459dc564eabbb4fdead569 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Mon, 6 Nov 2023 17:11:00 +0000 Subject: [PATCH] fix: update default temp table expiration to 7 days --- bigframes/constants.py | 2 +- bigframes/dataframe.py | 4 ++- bigframes/session/__init__.py | 4 ++- bigframes/session/_io/bigquery.py | 36 ++++++++++++++++++++------ tests/unit/session/test_io_bigquery.py | 10 +++---- 5 files changed, 40 insertions(+), 16 deletions(-) diff --git a/bigframes/constants.py b/bigframes/constants.py index 82b48dc967..a1ffd2b755 100644 --- a/bigframes/constants.py +++ b/bigframes/constants.py @@ -26,4 +26,4 @@ ABSTRACT_METHOD_ERROR_MESSAGE = f"Abstract method. You have likely encountered a bug. Please share this stacktrace and how you reached it with the BigQuery DataFrames team. {FEEDBACK_LINK}" -DEFAULT_EXPIRATION = datetime.timedelta(days=1) +DEFAULT_EXPIRATION = datetime.timedelta(days=7) diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py index 04a5456e26..40f12671ae 100644 --- a/bigframes/dataframe.py +++ b/bigframes/dataframe.py @@ -16,6 +16,7 @@ from __future__ import annotations +import datetime import re import textwrap import typing @@ -2327,7 +2328,8 @@ def to_gbq( self._session.bqclient, self._session._anonymous_dataset, # TODO(swast): allow custom expiration times, probably via session configuration. - constants.DEFAULT_EXPIRATION, + datetime.datetime.now(datetime.timezone.utc) + + constants.DEFAULT_EXPIRATION, ) if if_exists is not None and if_exists != "replace": diff --git a/bigframes/session/__init__.py b/bigframes/session/__init__.py index a1eae69715..2537e81e19 100644 --- a/bigframes/session/__init__.py +++ b/bigframes/session/__init__.py @@ -430,7 +430,9 @@ def _read_gbq_query( index_cols = list(index_col) destination, query_job = self._query_to_destination( - query, index_cols, api_name="read_gbq_query" + query, + index_cols, + api_name=api_name, ) # If there was no destination table, that means the query must have diff --git a/bigframes/session/_io/bigquery.py b/bigframes/session/_io/bigquery.py index d200a9a861..06d240fec6 100644 --- a/bigframes/session/_io/bigquery.py +++ b/bigframes/session/_io/bigquery.py @@ -14,6 +14,8 @@ """Private module: Helpers for I/O operations.""" +from __future__ import annotations + import datetime import textwrap import types @@ -69,6 +71,29 @@ def create_export_data_statement( ) +def random_table(dataset: bigquery.DatasetReference) -> bigquery.TableReference: + """Generate a random table ID with BigQuery DataFrames prefix. + Args: + dataset (google.cloud.bigquery.DatasetReference): + The dataset to make the table reference in. Usually the anonymous + dataset for the session. + Returns: + google.cloud.bigquery.TableReference: + Fully qualified table ID of a table that doesn't exist. + """ + now = datetime.datetime.now(datetime.timezone.utc) + random_id = uuid.uuid4().hex + table_id = TEMP_TABLE_PREFIX.format( + date=now.strftime("%Y%m%d"), random_id=random_id + ) + return dataset.table(table_id) + + +def table_ref_to_sql(table: bigquery.TableReference) -> str: + """Format a table reference as escaped SQL.""" + return f"`{table.project}`.`{table.dataset_id}`.`{table.table_id}`" + + def create_snapshot_sql( table_ref: bigquery.TableReference, current_timestamp: datetime.datetime ) -> str: @@ -95,17 +120,12 @@ def create_snapshot_sql( def create_temp_table( bqclient: bigquery.Client, dataset: bigquery.DatasetReference, - expiration: datetime.timedelta, + expiration: datetime.datetime, ) -> str: """Create an empty table with an expiration in the desired dataset.""" - now = datetime.datetime.now(datetime.timezone.utc) - random_id = uuid.uuid4().hex - table_id = TEMP_TABLE_PREFIX.format( - date=now.strftime("%Y%m%d"), random_id=random_id - ) - table_ref = dataset.table(table_id) + table_ref = random_table(dataset) destination = bigquery.Table(table_ref) - destination.expires = now + expiration + destination.expires = expiration bqclient.create_table(destination) return f"{table_ref.project}.{table_ref.dataset_id}.{table_ref.table_id}" diff --git a/tests/unit/session/test_io_bigquery.py b/tests/unit/session/test_io_bigquery.py index cb3003b1cc..03470208e4 100644 --- a/tests/unit/session/test_io_bigquery.py +++ b/tests/unit/session/test_io_bigquery.py @@ -56,9 +56,9 @@ def test_create_temp_table_default_expiration(): """Make sure the created table has an expiration.""" bqclient = mock.create_autospec(bigquery.Client) dataset = bigquery.DatasetReference("test-project", "test_dataset") - now = datetime.datetime.now(datetime.timezone.utc) - expiration = datetime.timedelta(days=3) - expected_expires = now + expiration + expiration = datetime.datetime( + 2023, 11, 2, 13, 44, 55, 678901, datetime.timezone.utc + ) bigframes.session._io.bigquery.create_temp_table(bqclient, dataset, expiration) @@ -69,9 +69,9 @@ def test_create_temp_table_default_expiration(): assert table.dataset_id == "test_dataset" assert table.table_id.startswith("bqdf") assert ( - (expected_expires - datetime.timedelta(minutes=1)) + (expiration - datetime.timedelta(minutes=1)) < table.expires - < (expected_expires + datetime.timedelta(minutes=1)) + < (expiration + datetime.timedelta(minutes=1)) )