diff --git a/bigframes/__init__.py b/bigframes/__init__.py index 8f41790072..bd1476957b 100644 --- a/bigframes/__init__.py +++ b/bigframes/__init__.py @@ -14,7 +14,7 @@ """BigQuery DataFrames provides a DataFrame API scaled by the BigQuery engine.""" -from bigframes._config import options +from bigframes._config import option_context, options from bigframes._config.bigquery_options import BigQueryOptions from bigframes.core.global_session import close_session, get_global_session from bigframes.session import connect, Session @@ -28,4 +28,5 @@ "connect", "Session", "__version__", + "option_context", ] diff --git a/bigframes/_config/__init__.py b/bigframes/_config/__init__.py index e26eaf8800..8dcebfce6a 100644 --- a/bigframes/_config/__init__.py +++ b/bigframes/_config/__init__.py @@ -18,8 +18,10 @@ """ import bigframes._config.bigquery_options as bigquery_options +import bigframes._config.compute_options as compute_options import bigframes._config.display_options as display_options import bigframes._config.sampling_options as sampling_options +import third_party.bigframes_vendored.pandas._config.config as pandas_config class Options: @@ -29,6 +31,7 @@ def __init__(self): self._bigquery_options = bigquery_options.BigQueryOptions() self._display_options = display_options.DisplayOptions() self._sampling_options = sampling_options.SamplingOptions() + self._compute_options = compute_options.ComputeOptions() @property def bigquery(self) -> bigquery_options.BigQueryOptions: @@ -49,6 +52,11 @@ def sampling(self) -> sampling_options.SamplingOptions: parameters in specific functions.""" return self._sampling_options + @property + def compute(self) -> compute_options.ComputeOptions: + """Options controlling object computation.""" + return self._compute_options + options = Options() """Global options for default session.""" @@ -58,3 +66,6 @@ def sampling(self) -> sampling_options.SamplingOptions: "Options", "options", ) + + +option_context = pandas_config.option_context diff --git a/bigframes/_config/compute_options.py b/bigframes/_config/compute_options.py new file mode 100644 index 0000000000..20c31d3906 --- /dev/null +++ b/bigframes/_config/compute_options.py @@ -0,0 +1,35 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Options for displaying objects.""" + +import dataclasses +from typing import Optional + + +@dataclasses.dataclass +class ComputeOptions: + """ + Encapsulates configuration for compute options. + + Attributes: + maximum_bytes_billed (int, Options): + Limits the bytes billed for query jobs. Queries that will have + bytes billed beyond this limit will fail (without incurring a + charge). If unspecified, this will be set to your project default. + See `maximum_bytes_billed `_. + + """ + + maximum_bytes_billed: Optional[int] = None diff --git a/bigframes/_config/display_options.py b/bigframes/_config/display_options.py index 8bd2743f17..ad3ea3f68c 100644 --- a/bigframes/_config/display_options.py +++ b/bigframes/_config/display_options.py @@ -40,17 +40,12 @@ def pandas_repr(display_options: DisplayOptions): This context manager makes sure we reset the pandas options when we're done so that we don't override pandas behavior. """ - original_max_cols = pd.options.display.max_columns - original_max_rows = pd.options.display.max_rows - original_show_dimensions = pd.options.display.show_dimensions - - pd.options.display.max_columns = display_options.max_columns - pd.options.display.max_rows = display_options.max_rows - pd.options.display.show_dimensions = True # type: ignore - - try: - yield - finally: - pd.options.display.max_columns = original_max_cols - pd.options.display.max_rows = original_max_rows - pd.options.display.show_dimensions = original_show_dimensions + with pd.option_context( + "display.max_columns", + display_options.max_columns, + "display.max_rows", + display_options.max_rows, + "display.show_dimensions", + True, + ) as pandas_context: + yield (pandas_context) diff --git a/bigframes/pandas/__init__.py b/bigframes/pandas/__init__.py index 8d9726312f..0fab1109dc 100644 --- a/bigframes/pandas/__init__.py +++ b/bigframes/pandas/__init__.py @@ -462,6 +462,9 @@ def read_gbq_function(function_name: str): options = config.options """Global :class:`~bigframes._config.Options` to configure BigQuery DataFrames.""" +option_context = config.option_context +"""Global :class:`~bigframes._config.option_context` to configure BigQuery DataFrames.""" + # Session management APIs get_global_session = global_session.get_global_session close_session = global_session.close_session @@ -494,6 +497,7 @@ def read_gbq_function(function_name: str): # Other public pandas attributes "NamedAgg", "options", + "option_context", # Session management APIs "get_global_session", "close_session", diff --git a/bigframes/session/__init__.py b/bigframes/session/__init__.py index 932a41f283..4858c7726a 100644 --- a/bigframes/session/__init__.py +++ b/bigframes/session/__init__.py @@ -1504,12 +1504,10 @@ def _start_query( max_results: Optional[int] = None, ) -> Tuple[bigquery.table.RowIterator, bigquery.QueryJob]: """ - Starts query job and waits for results + Starts query job and waits for results. """ - if job_config is not None: - query_job = self.bqclient.query(sql, job_config=job_config) - else: - query_job = self.bqclient.query(sql) + job_config = self._prepare_job_config(job_config) + query_job = self.bqclient.query(sql, job_config=job_config) opts = bigframes.options.display if opts.progress_bar is not None and not query_job.configuration.dry_run: @@ -1538,6 +1536,17 @@ def _start_generic_job(self, job: formatting_helpers.GenericJob): else: job.result() + def _prepare_job_config( + self, job_config: Optional[bigquery.QueryJobConfig] = None + ) -> bigquery.QueryJobConfig: + if job_config is None: + job_config = self.bqclient.default_query_job_config + if bigframes.options.compute.maximum_bytes_billed is not None: + job_config.maximum_bytes_billed = ( + bigframes.options.compute.maximum_bytes_billed + ) + return job_config + def connect(context: Optional[bigquery_options.BigQueryOptions] = None) -> Session: return Session(context) diff --git a/docs/reference/bigframes/options.rst b/docs/reference/bigframes/options.rst index d831a519fe..991399eb88 100644 --- a/docs/reference/bigframes/options.rst +++ b/docs/reference/bigframes/options.rst @@ -12,3 +12,5 @@ Options and settings .. autoclass:: bigframes._config.display_options.DisplayOptions .. autoclass:: bigframes._config.sampling_options.SamplingOptions + +.. autoclass:: bigframes._config.compute_options.ComputeOptions diff --git a/docs/templates/toc.yml b/docs/templates/toc.yml index 4fe2ec1a6a..9879721d28 100644 --- a/docs/templates/toc.yml +++ b/docs/templates/toc.yml @@ -13,6 +13,8 @@ uid: bigframes._config.display_options.DisplayOptions - name: SamplingOptions uid: bigframes._config.sampling_options.SamplingOptions + - name: ComputeOptions + uid: bigframes._config.compute_options.ComputeOptions name: Options and settings - items: - name: Session diff --git a/tests/system/conftest.py b/tests/system/conftest.py index 8885b03d34..f9f69c6c8e 100644 --- a/tests/system/conftest.py +++ b/tests/system/conftest.py @@ -898,13 +898,6 @@ def usa_names_grouped_table( return session.bqclient.get_table(table_id) -@pytest.fixture() -def deferred_repr(): - bigframes.options.display.repr_mode = "deferred" - yield - bigframes.options.display.repr_mode = "head" - - @pytest.fixture() def restore_sampling_settings(): enable_downsampling = bigframes.options.sampling.enable_downsampling diff --git a/tests/system/small/test_progress_bar.py b/tests/system/small/test_progress_bar.py index 084b723fba..30ea63b483 100644 --- a/tests/system/small/test_progress_bar.py +++ b/tests/system/small/test_progress_bar.py @@ -135,12 +135,11 @@ def test_query_job_repr(penguins_df_default_index: bf.dataframe.DataFrame): assert string in query_job_repr -def test_query_job_dry_run( - penguins_df_default_index: bf.dataframe.DataFrame, capsys, deferred_repr -): - repr(penguins_df_default_index) - repr(penguins_df_default_index["body_mass_g"]) - lines = capsys.readouterr().out.split("\n") - lines = filter(None, lines) - for line in lines: - assert "Computation deferred. Computation will process" in line +def test_query_job_dry_run(penguins_df_default_index: bf.dataframe.DataFrame, capsys): + with bf.option_context("display.repr_mode", "deferred"): + repr(penguins_df_default_index) + repr(penguins_df_default_index["body_mass_g"]) + lines = capsys.readouterr().out.split("\n") + lines = filter(None, lines) + for line in lines: + assert "Computation deferred. Computation will process" in line diff --git a/tests/unit/test_compute_options.py b/tests/unit/test_compute_options.py new file mode 100644 index 0000000000..499a0a5fef --- /dev/null +++ b/tests/unit/test_compute_options.py @@ -0,0 +1,30 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import bigframes as bf + +from . import resources + + +def test_maximum_bytes_option(): + session = resources.create_bigquery_session() + num_query_calls = 0 + with bf.option_context("compute.maximum_bytes_billed", 10000): + # clear initial method calls + session.bqclient.method_calls = [] + session._start_query("query") + for call in session.bqclient.method_calls: + _, _, kwargs = call + num_query_calls += 1 + assert kwargs["job_config"].maximum_bytes_billed == 10000 + assert num_query_calls > 0 diff --git a/third_party/bigframes_vendored/pandas/_config/config.py b/third_party/bigframes_vendored/pandas/_config/config.py new file mode 100644 index 0000000000..8abaca76c7 --- /dev/null +++ b/third_party/bigframes_vendored/pandas/_config/config.py @@ -0,0 +1,45 @@ +# Contains code from https://github.com/pandas-dev/pandas/blob/main/pandas/_config/config.py +import contextlib +import operator + +import bigframes + + +class option_context(contextlib.ContextDecorator): + """ + Context manager to temporarily set options in the `with` statement context. + + You need to invoke as ``option_context(pat, val, [(pat, val), ...])``. + + Examples + -------- + >>> import bigframes + >>> with bigframes.option_context('display.max_rows', 10, 'display.max_columns', 5): + ... pass + """ + + def __init__(self, *args) -> None: + if len(args) % 2 != 0 or len(args) < 2: + raise ValueError( + "Need to invoke as option_context(pat, val, [(pat, val), ...])." + ) + + self.ops = list(zip(args[::2], args[1::2])) + + def __enter__(self) -> None: + self.undo = [ + (pat, operator.attrgetter(pat)(bigframes.options)) for pat, val in self.ops + ] + + for pat, val in self.ops: + self._set_option(pat, val) + + def __exit__(self, *args) -> None: + if self.undo: + for pat, val in self.undo: + self._set_option(pat, val) + + def _set_option(self, pat, val): + root, attr = pat.rsplit(".", 1) + parent = operator.attrgetter(root)(bigframes.options) + setattr(parent, attr, val)