From 53cfe29412fbc66ce8b43d1ec2a2907814b38115 Mon Sep 17 00:00:00 2001 From: Henry J Solberg Date: Thu, 5 Oct 2023 18:52:29 +0000 Subject: [PATCH 1/6] feat: add iat indexers --- bigframes/core/indexers.py | 34 ++++++++++++++++++++++++++++++++++ bigframes/dataframe.py | 4 ++++ bigframes/series.py | 4 ++++ 3 files changed, 42 insertions(+) diff --git a/bigframes/core/indexers.py b/bigframes/core/indexers.py index a538c80711..e60a2d7c55 100644 --- a/bigframes/core/indexers.py +++ b/bigframes/core/indexers.py @@ -98,6 +98,18 @@ def __getitem__( return _iloc_getitem_series_or_dataframe(self._series, key) +class IatSeriesIndexer: + def __init__(self, series: bigframes.series.Series): + self._series = series + + def __getitem__(self, key: int) -> bigframes.core.scalar.Scalar: + if not isinstance(key, int): + raise ValueError( + "ValueError: iAt based indexing can only have integer indexers" + ) + return self._series.iloc[key] + + class LocDataFrameIndexer: def __init__(self, dataframe: bigframes.dataframe.DataFrame): self._dataframe = dataframe @@ -185,6 +197,28 @@ def __getitem__(self, key) -> bigframes.dataframe.DataFrame | pd.Series: return _iloc_getitem_series_or_dataframe(self._dataframe, key) +class IatDataFrameIndexer: + def __init__(self, dataframe: bigframes.dataframe.DataFrame): + self._dataframe = dataframe + + def __getitem__(self, key: int) -> bigframes.core.scalar.Scalar: + error_message = "DataFrame.iat should be indexed by a tuple of ints" + # we raise TypeError or ValueError under the same conditions that pandas does + if isinstance(key, int): + raise TypeError(error_message) + if not isinstance(key, tuple): + raise ValueError(error_message) + key_values_are_ints = [isinstance(key_value, int) for key_value in key] + if not all(key_values_are_ints): + raise ValueError(error_message) + if len(key) != 2: + raise TypeError(error_message) + block = self._dataframe._block + column_block = block.select_columns(block.value_columns[key[1]]) + column = bigframes.series.Series(column_block) + return column.iloc[key[0]] + + @typing.overload def _loc_getitem_series_or_dataframe( series_or_dataframe: bigframes.series.Series, key diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py index ea06e28cdf..c22a269769 100644 --- a/bigframes/dataframe.py +++ b/bigframes/dataframe.py @@ -247,6 +247,10 @@ def loc(self) -> indexers.LocDataFrameIndexer: def iloc(self) -> indexers.ILocDataFrameIndexer: return indexers.ILocDataFrameIndexer(self) + @property + def iat(self) -> indexers.IatDataFrameIndexer: + return indexers.IatDataFrameIndexer(self) + @property def dtypes(self) -> pandas.Series: return pandas.Series(data=self._block.dtypes, index=self._block.column_labels) diff --git a/bigframes/series.py b/bigframes/series.py index 47298d59f5..96e24b32d6 100644 --- a/bigframes/series.py +++ b/bigframes/series.py @@ -86,6 +86,10 @@ def loc(self) -> bigframes.core.indexers.LocSeriesIndexer: def iloc(self) -> bigframes.core.indexers.IlocSeriesIndexer: return bigframes.core.indexers.IlocSeriesIndexer(self) + @property + def iat(self) -> bigframes.core.indexers.IatSeriesIndexer: + return bigframes.core.indexers.IatSeriesIndexer(self) + @property def name(self) -> blocks.Label: return self._name From 48dd5e981f50240ea69ad4a3c4df0480885785fb Mon Sep 17 00:00:00 2001 From: Henry J Solberg Date: Thu, 5 Oct 2023 21:21:48 +0000 Subject: [PATCH 2/6] select_columns correction --- bigframes/core/indexers.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/bigframes/core/indexers.py b/bigframes/core/indexers.py index 1028fbf87c..fcbb89d2f2 100644 --- a/bigframes/core/indexers.py +++ b/bigframes/core/indexers.py @@ -103,9 +103,7 @@ def __init__(self, series: bigframes.series.Series): def __getitem__(self, key: int) -> bigframes.core.scalar.Scalar: if not isinstance(key, int): - raise ValueError( - "ValueError: iAt based indexing can only have integer indexers" - ) + raise ValueError("Series iAt based indexing can only have integer indexers") return self._series.iloc[key] @@ -217,7 +215,7 @@ def __getitem__(self, key: int) -> bigframes.core.scalar.Scalar: if len(key) != 2: raise TypeError(error_message) block = self._dataframe._block - column_block = block.select_columns(block.value_columns[key[1]]) + column_block = block.select_columns([block.value_columns[key[1]]]) column = bigframes.series.Series(column_block) return column.iloc[key[0]] From 317ab48c03155e181f817146083e51e17b4819cf Mon Sep 17 00:00:00 2001 From: Henry J Solberg Date: Thu, 5 Oct 2023 21:40:17 +0000 Subject: [PATCH 3/6] add iloc for tuples --- bigframes/core/indexers.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/bigframes/core/indexers.py b/bigframes/core/indexers.py index fcbb89d2f2..a74880041c 100644 --- a/bigframes/core/indexers.py +++ b/bigframes/core/indexers.py @@ -202,8 +202,8 @@ class IatDataFrameIndexer: def __init__(self, dataframe: bigframes.dataframe.DataFrame): self._dataframe = dataframe - def __getitem__(self, key: int) -> bigframes.core.scalar.Scalar: - error_message = "DataFrame.iat should be indexed by a tuple of ints" + def __getitem__(self, key: tuple) -> bigframes.core.scalar.Scalar: + error_message = "DataFrame.iat should be indexed by a tuple of exactly 2 ints" # we raise TypeError or ValueError under the same conditions that pandas does if isinstance(key, int): raise TypeError(error_message) @@ -388,6 +388,18 @@ def _iloc_getitem_series_or_dataframe( return result_pd_df.iloc[0] elif isinstance(key, slice): return series_or_dataframe._slice(key.start, key.stop, key.step) + elif isinstance(key, tuple) and len(key) == 0: + return series_or_dataframe + elif isinstance(key, tuple) and len(key) == 1: + return _iloc_getitem_series_or_dataframe(series_or_dataframe, key[0]) + elif ( + isinstance(key, tuple) + and isinstance(series_or_dataframe, bigframes.dataframe.DataFrame) + and len(key) == 2 + ): + return series_or_dataframe.iat[key] + elif isinstance(key, tuple): + raise pd.errors.IndexingError("Too many indexers") elif pd.api.types.is_list_like(key): if len(key) == 0: return typing.cast( From 9a4178eb4c3c6f3711488cf8634f8e8d8b4743e3 Mon Sep 17 00:00:00 2001 From: Henry J Solberg Date: Thu, 5 Oct 2023 22:08:51 +0000 Subject: [PATCH 4/6] add dataframe tests --- tests/system/small/test_dataframe.py | 55 +++++++++++++++++++++++++++- 1 file changed, 54 insertions(+), 1 deletion(-) diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py index 19ea9b8ae5..8f3dd1e765 100644 --- a/tests/system/small/test_dataframe.py +++ b/tests/system/small/test_dataframe.py @@ -2077,7 +2077,7 @@ def test_iloc_slice_nested(scalars_df_index, scalars_pandas_df_index): @pytest.mark.parametrize( "index", - [0, 5, -2], + [0, 5, -2, (2,)], ) def test_iloc_single_integer(scalars_df_index, scalars_pandas_df_index, index): bf_result = scalars_df_index.iloc[index] @@ -2089,6 +2089,59 @@ def test_iloc_single_integer(scalars_df_index, scalars_pandas_df_index, index): ) +@pytest.mark.parametrize( + "index", + [(2, 5), (5, 0), (0, 0)], +) +def test_iloc_tuple(scalars_df_index, scalars_pandas_df_index, index): + bf_result = scalars_df_index.iloc[index] + pd_result = scalars_pandas_df_index.iloc[index] + + assert bf_result == pd_result + + +@pytest.mark.parametrize( + ("index", "error"), + [ + ((1, 1, 1), pd.errors.IndexingError), + (("asd", "asd", "asd"), pd.errors.IndexingError), + (("asd"), TypeError), + ], +) +def test_iloc_tuple_errors(scalars_df_index, scalars_pandas_df_index, index, error): + with pytest.raises(error): + scalars_df_index.iloc[index] + with pytest.raises(error): + scalars_pandas_df_index.iloc[index] + + +@pytest.mark.parametrize( + "index", + [(2, 5), (5, 0), (0, 0)], +) +def test_iat(scalars_df_index, scalars_pandas_df_index, index): + bf_result = scalars_df_index.iat[index] + pd_result = scalars_pandas_df_index.iat[index] + + assert bf_result == pd_result + + +@pytest.mark.parametrize( + ("index", "error"), + [ + (0, TypeError), + ("asd", ValueError), + ((1, 2, 3), TypeError), + (("asd", "asd"), ValueError), + ], +) +def test_iat_errors(scalars_df_index, scalars_pandas_df_index, index, error): + with pytest.raises(error): + scalars_pandas_df_index.iat[index] + with pytest.raises(error): + scalars_df_index.iat[index] + + def test_iloc_single_integer_out_of_bound_error( scalars_df_index, scalars_pandas_df_index ): From 74725900c8549f8d9b9e53a32da2939a0fac3dd8 Mon Sep 17 00:00:00 2001 From: Henry J Solberg Date: Thu, 5 Oct 2023 22:12:30 +0000 Subject: [PATCH 5/6] add series tests --- tests/system/small/test_series.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/tests/system/small/test_series.py b/tests/system/small/test_series.py index 8c1c36720b..bcf3ce7d14 100644 --- a/tests/system/small/test_series.py +++ b/tests/system/small/test_series.py @@ -1915,6 +1915,20 @@ def test_series_iloc(scalars_df_index, scalars_pandas_df_index, start, stop, ste ) +def test_iat(scalars_df_index, scalars_pandas_df_index): + bf_result = scalars_df_index["int64_too"].iat[3] + pd_result = scalars_pandas_df_index["int64_too"].iat[3] + + assert bf_result == pd_result + + +def test_iat_error(scalars_df_index, scalars_pandas_df_index): + with pytest.raises(ValueError): + scalars_pandas_df_index["int64_too"].iat["asd"] + with pytest.raises(ValueError): + scalars_df_index["int64_too"].iat["asd"] + + def test_series_add_prefix(scalars_df_index, scalars_pandas_df_index): bf_result = scalars_df_index["int64_too"].add_prefix("prefix_").to_pandas() From 9442a136ae63ee45836551e10433eaf11fa6b057 Mon Sep 17 00:00:00 2001 From: Henry J Solberg Date: Thu, 5 Oct 2023 22:24:43 +0000 Subject: [PATCH 6/6] add third party comments --- third_party/bigframes_vendored/pandas/core/frame.py | 10 ++++++++++ third_party/bigframes_vendored/pandas/core/series.py | 10 ++++++++++ 2 files changed, 20 insertions(+) diff --git a/third_party/bigframes_vendored/pandas/core/frame.py b/third_party/bigframes_vendored/pandas/core/frame.py index 644e043e83..80a5428b36 100644 --- a/third_party/bigframes_vendored/pandas/core/frame.py +++ b/third_party/bigframes_vendored/pandas/core/frame.py @@ -2097,3 +2097,13 @@ def fillna(self, value): DataFrame: Object with missing values filled """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) + + @property + def iloc(self): + """Purely integer-location based indexing for selection by position.""" + raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) + + @property + def iat(self): + """Access a single value for a row/column pair by integer position.""" + raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) diff --git a/third_party/bigframes_vendored/pandas/core/series.py b/third_party/bigframes_vendored/pandas/core/series.py index 970ab1d8b4..03729922d5 100644 --- a/third_party/bigframes_vendored/pandas/core/series.py +++ b/third_party/bigframes_vendored/pandas/core/series.py @@ -1823,3 +1823,13 @@ def map( Series: Same index as caller. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) + + @property + def iloc(self): + """Purely integer-location based indexing for selection by position.""" + raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) + + @property + def iat(self): + """Access a single value for a row/column pair by integer position.""" + raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)