googleapis · Genesis929 · Apr 29, 2025 · Apr 29, 2025 · May 1, 2025
@@ -0,0 +1,20 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from bigframes.core.groupby import DataFrameGroupBy
+
+
+class Resampler(DataFrameGroupBy):
+    def __init__(self, obj, by, **kwargs):
+        super().__init__(obj, by, **kwargs)
@@ -3679,87 +3679,27 @@ def _split(
         blocks = self._block.split(ns=ns, fracs=fracs, random_state=random_state)
         return [DataFrame(block) for block in blocks]
 
-    @validations.requires_ordering()
-    def _resample(
+    def resample(
         self,
         rule: str,
         *,
         on: blocks.Label = None,
-        level: Optional[LevelsType] = None,
-        origin: Union[
-            Union[
-                pandas.Timestamp, datetime.datetime, numpy.datetime64, int, float, str
-            ],
-            Literal["epoch", "start", "start_day", "end", "end_day"],
-        ] = "start_day",
+        level: Optional[LevelType] = None,
+        origin: Literal["epoch", "start", "start_day"] = "start_day",
     ) -> bigframes.core.groupby.DataFrameGroupBy:
-        """Internal function to support resample. Resample time-series data.
-
-        **Examples:**
-
-        >>> import bigframes.pandas as bpd
-        >>> import pandas as pd
-        >>> bpd.options.display.progress_bar = None
-
-        >>> data = {
-        ...     "timestamp_col": pd.date_range(
-        ...         start="2021-01-01 13:00:00", periods=30, freq="1s"
-        ...     ),
-        ...     "int64_col": range(30),
-        ...     "int64_too": range(10, 40),
-        ... }
-
-        Resample on a DataFrame with index:
-
-        >>> df = bpd.DataFrame(data).set_index("timestamp_col")
-        >>> df._resample(rule="7s").min()
-                             int64_col  int64_too
-        2021-01-01 12:59:55          0         10
-        2021-01-01 13:00:02          2         12
-        2021-01-01 13:00:09          9         19
-        2021-01-01 13:00:16         16         26
-        2021-01-01 13:00:23         23         33
-        <BLANKLINE>
-        [5 rows x 2 columns]
-
-        Resample with column and origin set to 'start':
-
-        >>> df = bpd.DataFrame(data)
-        >>> df._resample(rule="7s", on = "timestamp_col", origin="start").min()
-                             int64_col  int64_too
-        2021-01-01 13:00:00          0         10
-        2021-01-01 13:00:07          7         17
-        2021-01-01 13:00:14         14         24
-        2021-01-01 13:00:21         21         31
-        2021-01-01 13:00:28         28         38
-        <BLANKLINE>
-        [5 rows x 2 columns]
-
-        Args:
-            rule (str):
-                The offset string representing target conversion.
-            on (str, default None):
-                For a DataFrame, column to use instead of index for resampling. Column
-                must be datetime-like.
-            level (str or int, default None):
-                For a MultiIndex, level (name or number) to use for resampling.
-                level must be datetime-like.
-            origin(str, default 'start_day'):
-                The timestamp on which to adjust the grouping. Must be one of the following:
-                'epoch': origin is 1970-01-01
-                'start': origin is the first value of the timeseries
-                'start_day': origin is the first day at midnight of the timeseries
-        Returns:
-            DataFrameGroupBy: DataFrameGroupBy object.
-        """
         block = self._block._generate_resample_label(
             rule=rule,
             on=on,
             level=level,
             origin=origin,
         )
         df = DataFrame(block)
-        return df.groupby(level=0)
+        return groupby.DataFrameGroupBy(
+            df._block,
+            by_col_ids=df._resolve_levels(0),
+            as_index=True,
+            dropna=True,
+        )
 
     @classmethod
     def from_dict(

@@ -2172,65 +2172,16 @@ def explode(self, *, ignore_index: Optional[bool] = False) -> Series:
             )
         )
 
-    @validations.requires_ordering()
-    def _resample(
+    def resample(
         self,
         rule: str,
         *,
-        closed: Optional[Literal["right", "left"]] = None,
-        label: Optional[Literal["right", "left"]] = None,
-        level: Optional[LevelsType] = None,
-        origin: Union[
-            Union[
-                pandas.Timestamp, datetime.datetime, numpy.datetime64, int, float, str
-            ],
-            Literal["epoch", "start", "start_day", "end", "end_day"],
-        ] = "start_day",
+        on: blocks.Label = None,
+        level: Optional[LevelType] = None,
+        origin: Literal["epoch", "start", "start_day"] = "start_day",
     ) -> bigframes.core.groupby.SeriesGroupBy:
-        """Internal function to support resample. Resample time-series data.
-
-        **Examples:**
-
-        >>> import bigframes.pandas as bpd
-        >>> import pandas as pd
-        >>> bpd.options.display.progress_bar = None
-
-        >>> data = {
-        ...     "timestamp_col": pd.date_range(
-        ...         start="2021-01-01 13:00:00", periods=30, freq="1s"
-        ...     ),
-        ...     "int64_col": range(30),
-        ... }
-        >>> s = bpd.DataFrame(data).set_index("timestamp_col")
-        >>> s._resample(rule="7s", origin="epoch").min()
-                             int64_col
-        2021-01-01 12:59:56          0
-        2021-01-01 13:00:03          3
-        2021-01-01 13:00:10         10
-        2021-01-01 13:00:17         17
-        2021-01-01 13:00:24         24
-        <BLANKLINE>
-        [5 rows x 1 columns]
-
-
-        Args:
-            rule (str):
-                The offset string representing target conversion.
-            level (str or int, default None):
-                For a MultiIndex, level (name or number) to use for resampling.
-                level must be datetime-like.
-            origin(str, default 'start_day'):
-                The timestamp on which to adjust the grouping. Must be one of the following:
-                'epoch': origin is 1970-01-01
-                'start': origin is the first value of the timeseries
-                'start_day': origin is the first day at midnight of the timeseries
-        Returns:
-            SeriesGroupBy: SeriesGroupBy object.
-        """
         block = self._block._generate_resample_label(
             rule=rule,
-            closed=closed,
-            label=label,
             on=None,
             level=level,
             origin=origin,

@@ -5418,13 +5418,13 @@ def test_dataframe_explode_xfail(col_names):
         ),
     ],
 )
-def test__resample_with_column(
+def test_resample_with_column(
     scalars_df_index, scalars_pandas_df_index, on, rule, origin
 ):
     # TODO: supply a reason why this isn't compatible with pandas 1.x
     pytest.importorskip("pandas", minversion="2.0.0")
     bf_result = (
-        scalars_df_index._resample(rule=rule, on=on, origin=origin)[
+        scalars_df_index.resample(rule=rule, on=on, origin=origin)[
             ["int64_col", "int64_too"]
         ]
         .max()
@@ -5446,7 +5446,7 @@ def test__resample_with_column(
         pytest.param(False, None, "datetime_col", "100d"),
     ],
 )
-def test__resample_with_index(
+def test_resample_with_index(
     scalars_df_index, scalars_pandas_df_index, append, level, col, rule
 ):
     # TODO: supply a reason why this isn't compatible with pandas 1.x
@@ -5455,7 +5455,7 @@ def test__resample_with_index(
     scalars_pandas_df_index = scalars_pandas_df_index.set_index(col, append=append)
     bf_result = (
         scalars_df_index[["int64_col", "int64_too"]]
-        ._resample(rule=rule, level=level)
+        .resample(rule=rule, level=level)
         .min()
         .to_pandas()
     )
@@ -5505,15 +5505,15 @@ def test__resample_with_index(
         ),
     ],
 )
-def test__resample_start_time(rule, origin, data):
+def test_resample_start_time(rule, origin, data):
     # TODO: supply a reason why this isn't compatible with pandas 1.x
     pytest.importorskip("pandas", minversion="2.0.0")
     col = "timestamp_col"
     scalars_df_index = bpd.DataFrame(data).set_index(col)
     scalars_pandas_df_index = pd.DataFrame(data).set_index(col)
     scalars_pandas_df_index.index.name = None
 
-    bf_result = scalars_df_index._resample(rule=rule, origin=origin).min().to_pandas()
+    bf_result = scalars_df_index.resample(rule=rule, origin=origin).min().to_pandas()
 
     pd_result = scalars_pandas_df_index.resample(rule=rule, origin=origin).min()
 

@@ -4398,14 +4398,14 @@ def test_series_explode_null(data):
         pytest.param(True, "timestamp_col", "timestamp_col", "1YE"),
     ],
 )
-def test__resample(scalars_df_index, scalars_pandas_df_index, append, level, col, rule):
+def test_resample(scalars_df_index, scalars_pandas_df_index, append, level, col, rule):
     # TODO: supply a reason why this isn't compatible with pandas 1.x
     pytest.importorskip("pandas", minversion="2.0.0")
     scalars_df_index = scalars_df_index.set_index(col, append=append)["int64_col"]
     scalars_pandas_df_index = scalars_pandas_df_index.set_index(col, append=append)[
         "int64_col"
     ]
-    bf_result = scalars_df_index._resample(rule=rule, level=level).min().to_pandas()
+    bf_result = scalars_df_index.resample(rule=rule, level=level).min().to_pandas()
     pd_result = scalars_pandas_df_index.resample(rule=rule, level=level).min()
     pd.testing.assert_series_equal(bf_result, pd_result)
 

@@ -250,15 +250,15 @@ def test_unordered_mode_no_ambiguity_warning(unordered_session):
         ),
     ],
 )
-def test__resample_with_index(unordered_session, rule, origin, data):
+def test_resample_with_index(unordered_session, rule, origin, data):
     # TODO: supply a reason why this isn't compatible with pandas 1.x
     pytest.importorskip("pandas", minversion="2.0.0")
     col = "timestamp_col"
     scalars_df_index = bpd.DataFrame(data, session=unordered_session).set_index(col)
     scalars_pandas_df_index = pd.DataFrame(data).set_index(col)
     scalars_pandas_df_index.index.name = None
 
-    bf_result = scalars_df_index._resample(rule=rule, origin=origin).min().to_pandas()
+    bf_result = scalars_df_index.resample(rule=rule, origin=origin).min().to_pandas()
 
     pd_result = scalars_pandas_df_index.resample(rule=rule, origin=origin).min()
 

@@ -1,7 +1,7 @@
 # Contains code from https://github.com/pandas-dev/pandas/blob/main/pandas/core/generic.py
 from __future__ import annotations
 
-from typing import Callable, Iterator, Literal, Optional, TYPE_CHECKING
+from typing import Callable, Hashable, Iterator, Literal, Optional, TYPE_CHECKING
 
 import bigframes_vendored.constants as constants
 from bigframes_vendored.pandas.core import indexing
@@ -1271,3 +1271,78 @@ def equals(self, other) -> bool:
             otherwise.
         """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
+
+    def resample(
+        self,
+        rule: str,
+        *,
+        on: Hashable = None,
+        level: Optional[Hashable] = None,
+        origin: Literal["epoch", "start", "start_day"] = "start_day",
+    ):
+        """
+        Resample time-series data.
+
+        Convenience method for frequency conversion and resampling of time
+        series. The object must have a datetime index or the caller must
+        pass the label of a datetime series/index to the on/level keyword
+        parameter.
+
+        **Examples:**
+
+        >>> import bigframes.pandas as bpd
+        >>> import pandas as pd
+        >>> bpd.options.display.progress_bar = None
+
+        >>> data = {
+        ...     "timestamp_col": pd.date_range(
+        ...         start="2021-01-01 13:00:00", periods=30, freq="1s"
+        ...     ),
+        ...     "int64_col": range(30),
+        ...     "int64_too": range(10, 40),
+        ... }
+
+        Resample on a DataFrame with index:
+
+        >>> df = bpd.DataFrame(data).set_index("timestamp_col")
+        >>> df.resample(rule="7s").min()
+                             int64_col  int64_too
+        2021-01-01 12:59:55          0         10
+        2021-01-01 13:00:02          2         12
+        2021-01-01 13:00:09          9         19
+        2021-01-01 13:00:16         16         26
+        2021-01-01 13:00:23         23         33
+        <BLANKLINE>
+        [5 rows x 2 columns]
+
+        Resample with column and origin set to 'start':
+
+        >>> df = bpd.DataFrame(data)
+        >>> df.resample(rule="7s", on = "timestamp_col", origin="start").min()
+                             int64_col  int64_too
+        2021-01-01 13:00:00          0         10
+        2021-01-01 13:00:07          7         17
+        2021-01-01 13:00:14         14         24
+        2021-01-01 13:00:21         21         31
+        2021-01-01 13:00:28         28         38
+        <BLANKLINE>
+        [5 rows x 2 columns]
+
+        Args:
+            rule (str):
+                The offset string representing target conversion.
+            on (str, default None):
+                For a DataFrame, column to use instead of index for resampling. Column
+                must be datetime-like.
+            level (str or int, default None):
+                For a MultiIndex, level (name or number) to use for resampling.
+                level must be datetime-like.
+            origin(str, default 'start_day'):
+                The timestamp on which to adjust the grouping. Must be one of the following:
+                'epoch': origin is 1970-01-01
+                'start': origin is the first value of the timeseries
+                'start_day': origin is the first day at midnight of the timeseries
+        Returns:
+            Resampler: Resampler object.
+        """
+        raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)