From 542b9f4399a8a6395e5a6d0586f5c3159df59f5e Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 19 Mar 2024 15:42:54 +0000 Subject: [PATCH] feat: set `force=True` by default in `DataFrame.peek()` --- bigframes/dataframe.py | 12 ++++++------ tests/system/small/test_dataframe.py | 8 ++++---- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py index ad71c9b6e4..c10a0c2456 100644 --- a/bigframes/dataframe.py +++ b/bigframes/dataframe.py @@ -1086,19 +1086,19 @@ def head(self, n: int = 5) -> DataFrame: def tail(self, n: int = 5) -> DataFrame: return typing.cast(DataFrame, self.iloc[-n:]) - def peek(self, n: int = 5, *, force: bool = False) -> pandas.DataFrame: + def peek(self, n: int = 5, *, force: bool = True) -> pandas.DataFrame: """ Preview n arbitrary rows from the dataframe. No guarantees about row selection or ordering. - DataFrame.peek(force=False) will always be very fast, but will not succeed if data requires - full data scanning. Using force=True will always succeed, but may be perform expensive - computations. + ``DataFrame.peek(force=False)`` will always be very fast, but will not succeed if data requires + full data scanning. Using ``force=True`` will always succeed, but may be perform queries. + Query results will be cached so that future steps will benefit from these queries. Args: n (int, default 5): The number of rows to select from the dataframe. Which N rows are returned is non-deterministic. - force (bool, default False): + force (bool, default True): If the data cannot be peeked efficiently, the dataframe will instead be fully materialized as part - of the operation if force=True. If force=False, the operation will throw a ValueError. + of the operation if ``force=True``. If ``force=False``, the operation will throw a ValueError. Returns: pandas.DataFrame: A pandas DataFrame with n rows. diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py index 2048ada44f..ee32fb25ac 100644 --- a/tests/system/small/test_dataframe.py +++ b/tests/system/small/test_dataframe.py @@ -429,14 +429,14 @@ def test_rename(scalars_dfs): def test_df_peek(scalars_dfs): scalars_df, scalars_pandas_df = scalars_dfs - peek_result = scalars_df.peek(n=3) + peek_result = scalars_df.peek(n=3, force=False) pd.testing.assert_index_equal(scalars_pandas_df.columns, peek_result.columns) assert len(peek_result) == 3 def test_df_peek_filtered(scalars_dfs): scalars_df, scalars_pandas_df = scalars_dfs - peek_result = scalars_df[scalars_df.int64_col != 0].peek(n=3) + peek_result = scalars_df[scalars_df.int64_col != 0].peek(n=3, force=False) pd.testing.assert_index_equal(scalars_pandas_df.columns, peek_result.columns) assert len(peek_result) == 3 @@ -449,9 +449,9 @@ def test_df_peek_exception(scalars_dfs): scalars_df[["int64_col", "int64_too"]].cumsum().peek(n=3, force=False) -def test_df_peek_force(scalars_dfs): +def test_df_peek_force_default(scalars_dfs): scalars_df, scalars_pandas_df = scalars_dfs - peek_result = scalars_df[["int64_col", "int64_too"]].cumsum().peek(n=3, force=True) + peek_result = scalars_df[["int64_col", "int64_too"]].cumsum().peek(n=3) pd.testing.assert_index_equal( scalars_pandas_df[["int64_col", "int64_too"]].columns, peek_result.columns )