From f5e52a9b076dcf75efc13ec7b1f7757b9414a7a0 Mon Sep 17 00:00:00 2001 From: Henry J Solberg Date: Thu, 12 Oct 2023 18:50:45 +0000 Subject: [PATCH 1/3] feat: add AtIndexer getitems --- bigframes/core/indexers.py | 25 +++++++++++++++++++ bigframes/dataframe.py | 4 +++ bigframes/series.py | 4 +++ tests/system/small/test_dataframe.py | 23 +++++++++++++++++ tests/system/small/test_series.py | 10 ++++++++ .../bigframes_vendored/pandas/core/series.py | 7 +++++- 6 files changed, 72 insertions(+), 1 deletion(-) diff --git a/bigframes/core/indexers.py b/bigframes/core/indexers.py index a74880041c..7f78364643 100644 --- a/bigframes/core/indexers.py +++ b/bigframes/core/indexers.py @@ -107,6 +107,16 @@ def __getitem__(self, key: int) -> bigframes.core.scalar.Scalar: return self._series.iloc[key] +class AtSeriesIndexer: + def __init__(self, series: bigframes.series.Series): + self._series = series + + def __getitem__( + self, key: LocSingleKey + ) -> Union[bigframes.core.scalar.Scalar, bigframes.series.Series]: + return self._series.loc[key] + + class LocDataFrameIndexer: def __init__(self, dataframe: bigframes.dataframe.DataFrame): self._dataframe = dataframe @@ -220,6 +230,21 @@ def __getitem__(self, key: tuple) -> bigframes.core.scalar.Scalar: return column.iloc[key[0]] +class AtDataFrameIndexer: + def __init__(self, dataframe: bigframes.dataframe.DataFrame): + self._dataframe = dataframe + + def __getitem__( + self, key: tuple + ) -> Union[bigframes.core.scalar.Scalar, bigframes.series.Series]: + if not isinstance(key, tuple): + raise TypeError( + "DataFrame.at should be indexed by a (row label, column name) tuple." + ) + row_label, col = key + return self._dataframe[col].at[row_label] + + @typing.overload def _loc_getitem_series_or_dataframe( series_or_dataframe: bigframes.series.Series, key diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py index 015a7642f8..ea4bcc352e 100644 --- a/bigframes/dataframe.py +++ b/bigframes/dataframe.py @@ -259,6 +259,10 @@ def iloc(self) -> indexers.ILocDataFrameIndexer: def iat(self) -> indexers.IatDataFrameIndexer: return indexers.IatDataFrameIndexer(self) + @property + def at(self) -> indexers.AtDataFrameIndexer: + return indexers.AtDataFrameIndexer(self) + @property def dtypes(self) -> pandas.Series: return pandas.Series(data=self._block.dtypes, index=self._block.column_labels) diff --git a/bigframes/series.py b/bigframes/series.py index 56e1b43a03..4fac226b0f 100644 --- a/bigframes/series.py +++ b/bigframes/series.py @@ -91,6 +91,10 @@ def iloc(self) -> bigframes.core.indexers.IlocSeriesIndexer: def iat(self) -> bigframes.core.indexers.IatSeriesIndexer: return bigframes.core.indexers.IatSeriesIndexer(self) + @property + def at(self) -> bigframes.core.indexers.AtSeriesIndexer: + return bigframes.core.indexers.AtSeriesIndexer(self) + @property def name(self) -> blocks.Label: return self._name diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py index 9f1092d09d..ecafb7c1bf 100644 --- a/tests/system/small/test_dataframe.py +++ b/tests/system/small/test_dataframe.py @@ -2196,6 +2196,29 @@ def test_loc_single_index_no_duplicate(scalars_df_index, scalars_pandas_df_index ) +def test_at_with_duplicate(scalars_df_index, scalars_pandas_df_index): + scalars_df_index = scalars_df_index.set_index("string_col", drop=False) + scalars_pandas_df_index = scalars_pandas_df_index.set_index( + "string_col", drop=False + ) + index = "Hello, World!" + bf_result = scalars_df_index.at[index, "int64_too"] + pd_result = scalars_pandas_df_index.at[index, "int64_too"] + pd.testing.assert_series_equal( + bf_result.to_pandas(), + pd_result, + ) + + +def test_at_no_duplicate(scalars_df_index, scalars_pandas_df_index): + scalars_df_index = scalars_df_index.set_index("int64_too", drop=False) + scalars_pandas_df_index = scalars_pandas_df_index.set_index("int64_too", drop=False) + index = -2345 + bf_result = scalars_df_index.at[index, "string_col"] + pd_result = scalars_pandas_df_index.at[index, "string_col"] + assert bf_result == pd_result + + def test_loc_setitem_bool_series_scalar_new_col(scalars_dfs): scalars_df, scalars_pandas_df = scalars_dfs bf_df = scalars_df.copy() diff --git a/tests/system/small/test_series.py b/tests/system/small/test_series.py index 802425510a..f2ced841da 100644 --- a/tests/system/small/test_series.py +++ b/tests/system/small/test_series.py @@ -1953,6 +1953,16 @@ def test_series_iloc(scalars_df_index, scalars_pandas_df_index, start, stop, ste ) +def test_at(scalars_df_index, scalars_pandas_df_index): + scalars_df_index = scalars_df_index.set_index("int64_too", drop=False) + scalars_pandas_df_index = scalars_pandas_df_index.set_index("int64_too", drop=False) + index = -2345 + bf_result = scalars_df_index["string_col"].at[index] + pd_result = scalars_pandas_df_index["string_col"].at[index] + + assert bf_result == pd_result + + def test_iat(scalars_df_index, scalars_pandas_df_index): bf_result = scalars_df_index["int64_too"].iat[3] pd_result = scalars_pandas_df_index["int64_too"].iat[3] diff --git a/third_party/bigframes_vendored/pandas/core/series.py b/third_party/bigframes_vendored/pandas/core/series.py index 03729922d5..00557d421b 100644 --- a/third_party/bigframes_vendored/pandas/core/series.py +++ b/third_party/bigframes_vendored/pandas/core/series.py @@ -1831,5 +1831,10 @@ def iloc(self): @property def iat(self): - """Access a single value for a row/column pair by integer position.""" + """Access a single value for a row/column label pair.""" + raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) + + @property + def at(self): + """Access a single value for a row/column label pair.""" raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) From fa6008c6f0bf9f3761c552313e6aa8359dd09d12 Mon Sep 17 00:00:00 2001 From: Henry J Solberg Date: Thu, 12 Oct 2023 18:55:23 +0000 Subject: [PATCH 2/3] fix third party docstrings --- third_party/bigframes_vendored/pandas/core/frame.py | 5 +++++ third_party/bigframes_vendored/pandas/core/series.py | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/third_party/bigframes_vendored/pandas/core/frame.py b/third_party/bigframes_vendored/pandas/core/frame.py index e54f984d59..621d052cb8 100644 --- a/third_party/bigframes_vendored/pandas/core/frame.py +++ b/third_party/bigframes_vendored/pandas/core/frame.py @@ -2112,3 +2112,8 @@ def iloc(self): def iat(self): """Access a single value for a row/column pair by integer position.""" raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) + + @property + def at(self): + """Access a single value for a row/column label pair.""" + raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) diff --git a/third_party/bigframes_vendored/pandas/core/series.py b/third_party/bigframes_vendored/pandas/core/series.py index 00557d421b..ec3c07bfa8 100644 --- a/third_party/bigframes_vendored/pandas/core/series.py +++ b/third_party/bigframes_vendored/pandas/core/series.py @@ -1831,7 +1831,7 @@ def iloc(self): @property def iat(self): - """Access a single value for a row/column label pair.""" + """Access a single value for a row/column pair by integer position.""" raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) @property From 03a97b72697f85c300aed4b82d96a6ef227f2572 Mon Sep 17 00:00:00 2001 From: Henry J Solberg Date: Mon, 16 Oct 2023 23:15:19 +0000 Subject: [PATCH 3/3] use loc from at --- bigframes/core/indexers.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/bigframes/core/indexers.py b/bigframes/core/indexers.py index 7f78364643..53747f51d3 100644 --- a/bigframes/core/indexers.py +++ b/bigframes/core/indexers.py @@ -241,8 +241,7 @@ def __getitem__( raise TypeError( "DataFrame.at should be indexed by a (row label, column name) tuple." ) - row_label, col = key - return self._dataframe[col].at[row_label] + return self._dataframe.loc[key] @typing.overload