Thanks to visit codestin.com
Credit goes to github.com

Skip to content

chore: allow PRECEDING and FOLLOWING to appear on both side of BETWEEN when windowing #1507

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Mar 25, 2025
6 changes: 3 additions & 3 deletions bigframes/core/block_transforms.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,8 +213,8 @@ def _interpolate_column(
if interpolate_method not in ["linear", "nearest", "ffill"]:
raise ValueError("interpolate method not supported")
window_ordering = (ordering.OrderingExpression(ex.deref(x_values)),)
backwards_window = windows.rows(following=0, ordering=window_ordering)
forwards_window = windows.rows(preceding=0, ordering=window_ordering)
backwards_window = windows.rows(end=0, ordering=window_ordering)
forwards_window = windows.rows(start=0, ordering=window_ordering)

# Note, this method may
block, notnull = block.apply_unary_op(column, ops.notnull_op)
Expand Down Expand Up @@ -450,7 +450,7 @@ def rank(
)
if method == "dense"
else windows.rows(
following=0, ordering=window_ordering, grouping_keys=grouping_cols
end=0, ordering=window_ordering, grouping_keys=grouping_cols
),
skip_reproject_unsafe=(col != columns[-1]),
)
Expand Down
47 changes: 34 additions & 13 deletions bigframes/core/compile/compiled.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,9 @@
import bigframes_vendored.ibis
import bigframes_vendored.ibis.backends.bigquery.backend as ibis_bigquery
import bigframes_vendored.ibis.common.deferred as ibis_deferred # type: ignore
from bigframes_vendored.ibis.expr import builders as ibis_expr_builders
import bigframes_vendored.ibis.expr.datatypes as ibis_dtypes
from bigframes_vendored.ibis.expr.operations import window as ibis_expr_window
import bigframes_vendored.ibis.expr.operations as ibis_ops
import bigframes_vendored.ibis.expr.types as ibis_types
import pandas
Expand Down Expand Up @@ -551,20 +553,9 @@ def _ibis_window_from_spec(self, window_spec: WindowSpec):
# Unbound grouping window. Suitable for aggregations but not for analytic function application.
order_by = None

bounds = window_spec.bounds
window = bigframes_vendored.ibis.window(order_by=order_by, group_by=group_by)
if bounds is not None:
if isinstance(bounds, RangeWindowBounds):
window = window.preceding_following(
bounds.preceding, bounds.following, how="range"
)
if isinstance(bounds, RowsWindowBounds):
if bounds.preceding is not None or bounds.following is not None:
window = window.preceding_following(
bounds.preceding, bounds.following, how="rows"
)
else:
raise ValueError(f"unrecognized window bounds {bounds}")
if window_spec.bounds is not None:
return _add_boundary(window_spec.bounds, window)
return window


Expand Down Expand Up @@ -681,3 +672,33 @@ def _as_groupable(value: ibis_types.Value):
return scalar_op_compiler.to_json_string(value)
else:
return value


def _to_ibis_boundary(
boundary: Optional[int],
) -> Optional[ibis_expr_window.WindowBoundary]:
if boundary is None:
return None
return ibis_expr_window.WindowBoundary(
abs(boundary), preceding=boundary <= 0 # type:ignore
)


def _add_boundary(
bounds: typing.Union[RowsWindowBounds, RangeWindowBounds],
ibis_window: ibis_expr_builders.LegacyWindowBuilder,
) -> ibis_expr_builders.LegacyWindowBuilder:
if isinstance(bounds, RangeWindowBounds):
return ibis_window.range(
start=_to_ibis_boundary(bounds.start),
end=_to_ibis_boundary(bounds.end),
)
if isinstance(bounds, RowsWindowBounds):
if bounds.start is not None or bounds.end is not None:
return ibis_window.rows(
start=_to_ibis_boundary(bounds.start),
end=_to_ibis_boundary(bounds.end),
)
return ibis_window
else:
raise ValueError(f"unrecognized window bounds {bounds}")
33 changes: 15 additions & 18 deletions bigframes/core/compile/polars/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,10 @@
import dataclasses
import functools
import itertools
from typing import cast, Sequence, Tuple, TYPE_CHECKING
from typing import cast, Optional, Sequence, Tuple, TYPE_CHECKING, Union

import bigframes.core
from bigframes.core import window_spec
import bigframes.core.expression as ex
import bigframes.core.guid as guid
import bigframes.core.nodes as nodes
Expand Down Expand Up @@ -366,23 +367,8 @@ def compile_window(self, node: nodes.WindowOpNode):
indexed_df = df.with_row_index(index_col_name)
if len(window.grouping_keys) == 0: # rolling-only window
# https://docs.pola.rs/api/python/stable/reference/dataframe/api/polars.DataFrame.rolling.html
finite = (
window.bounds.preceding is not None
and window.bounds.following is not None
)
offset_n = (
None
if window.bounds.preceding is None
else -window.bounds.preceding
)
# collecting height is a massive kludge
period_n = (
df.collect().height
if not finite
else cast(int, window.bounds.preceding)
+ cast(int, window.bounds.following)
+ 1
)
offset_n = window.bounds.start
period_n = _get_period(window.bounds) or df.collect().height
results = indexed_df.rolling(
index_column=index_col_name,
period=f"{period_n}i",
Expand All @@ -395,3 +381,14 @@ def compile_window(self, node: nodes.WindowOpNode):
# polars is columnar, so this is efficient
# TODO: why can't just add columns?
return pl.concat([df, results], how="horizontal")


def _get_period(
bounds: Union[window_spec.RowsWindowBounds, window_spec.RangeWindowBounds]
) -> Optional[int]:
"""Returns None if the boundary is infinite."""
if bounds.start is None or bounds.end is None:
return None

# collecting height is a massive kludge
return bounds.end - bounds.start + 1
8 changes: 4 additions & 4 deletions bigframes/core/groupby/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -309,8 +309,8 @@ def rolling(self, window: int, min_periods=None) -> windows.Window:
# To get n size window, need current row and n-1 preceding rows.
window_spec = window_specs.rows(
grouping_keys=tuple(self._by_col_ids),
preceding=window - 1,
following=0,
start=-(window - 1),
end=0,
min_periods=min_periods or window,
)
block = self._block.order_by(
Expand Down Expand Up @@ -742,8 +742,8 @@ def rolling(self, window: int, min_periods=None) -> windows.Window:
# To get n size window, need current row and n-1 preceding rows.
window_spec = window_specs.rows(
grouping_keys=tuple(self._by_col_ids),
preceding=window - 1,
following=0,
start=-(window - 1),
end=0,
min_periods=min_periods or window,
)
block = self._block.order_by(
Expand Down
52 changes: 37 additions & 15 deletions bigframes/core/window_spec.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,8 @@ def unbound(
### Rows-based Windows
def rows(
grouping_keys: Tuple[str, ...] = (),
preceding: Optional[int] = None,
following: Optional[int] = None,
start: Optional[int] = None,
end: Optional[int] = None,
min_periods: int = 0,
ordering: Tuple[orderings.OrderingExpression, ...] = (),
) -> WindowSpec:
Expand All @@ -63,18 +63,23 @@ def rows(
Args:
grouping_keys:
Columns ids of grouping keys
preceding:
number of preceding rows to include. If None, include all preceding rows
start:
The window's starting boundary relative to the current row. For example, "-1" means one row prior
"1" means one row after, and "0" means the current row. If None, the window is unbounded from the start.
following:
number of following rows to include. If None, include all following rows
The window's ending boundary relative to the current row. For example, "-1" means one row prior
"1" means one row after, and "0" means the current row. If None, the window is unbounded until the end.
min_periods (int, default 0):
Minimum number of input rows to generate output.
ordering:
Ordering to apply on top of based dataframe ordering
Returns:
WindowSpec
"""
bounds = RowsWindowBounds(preceding=preceding, following=following)
bounds = RowsWindowBounds(
start=start,
end=end,
)
return WindowSpec(
grouping_keys=tuple(map(ex.deref, grouping_keys)),
bounds=bounds,
Expand All @@ -97,7 +102,7 @@ def cumulative_rows(
Returns:
WindowSpec
"""
bounds = RowsWindowBounds(following=0)
bounds = RowsWindowBounds(end=0)
return WindowSpec(
grouping_keys=tuple(map(ex.deref, grouping_keys)),
bounds=bounds,
Expand All @@ -119,7 +124,7 @@ def inverse_cumulative_rows(
Returns:
WindowSpec
"""
bounds = RowsWindowBounds(preceding=0)
bounds = RowsWindowBounds(start=0)
return WindowSpec(
grouping_keys=tuple(map(ex.deref, grouping_keys)),
bounds=bounds,
Expand All @@ -132,18 +137,35 @@ def inverse_cumulative_rows(

@dataclass(frozen=True)
class RowsWindowBounds:
preceding: Optional[int] = None
following: Optional[int] = None

start: Optional[int] = None
end: Optional[int] = None

# TODO: Expand to datetime offsets
OffsetType = Union[float, int]
def __post_init__(self):
if self.start is None:
return
if self.end is None:
return
if self.start > self.end:
raise ValueError(
f"Invalid window: start({self.start}) is greater than end({self.end})"
)


@dataclass(frozen=True)
class RangeWindowBounds:
preceding: Optional[OffsetType] = None
following: Optional[OffsetType] = None
# TODO(b/388916840) Support range rolling on timeseries with timedeltas.
start: Optional[int] = None
end: Optional[int] = None

def __post_init__(self):
if self.start is None:
return
if self.end is None:
return
if self.start > self.end:
raise ValueError(
f"Invalid window: start({self.start}) is greater than end({self.end})"
)


@dataclass(frozen=True)
Expand Down
6 changes: 3 additions & 3 deletions bigframes/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -2428,12 +2428,12 @@ def replace(

@validations.requires_ordering()
def ffill(self, *, limit: typing.Optional[int] = None) -> DataFrame:
window = windows.rows(preceding=limit, following=0)
window = windows.rows(start=None if limit is None else -limit, end=0)
return self._apply_window_op(agg_ops.LastNonNullOp(), window)

@validations.requires_ordering()
def bfill(self, *, limit: typing.Optional[int] = None) -> DataFrame:
window = windows.rows(preceding=0, following=limit)
window = windows.rows(start=0, end=limit)
return self._apply_window_op(agg_ops.FirstNonNullOp(), window)

def isin(self, values) -> DataFrame:
Expand Down Expand Up @@ -3310,7 +3310,7 @@ def _perform_join_by_index(
def rolling(self, window: int, min_periods=None) -> bigframes.core.window.Window:
# To get n size window, need current row and n-1 preceding rows.
window_def = windows.rows(
preceding=window - 1, following=0, min_periods=min_periods or window
start=-(window - 1), end=0, min_periods=min_periods or window
)
return bigframes.core.window.Window(
self._block, window_def, self._block.value_columns
Expand Down
6 changes: 3 additions & 3 deletions bigframes/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -544,15 +544,15 @@ def cumsum(self) -> Series:

@validations.requires_ordering()
def ffill(self, *, limit: typing.Optional[int] = None) -> Series:
window = windows.rows(preceding=limit, following=0)
window = windows.rows(start=None if limit is None else -limit, end=0)
return self._apply_window_op(agg_ops.LastNonNullOp(), window)

pad = ffill
pad.__doc__ = inspect.getdoc(vendored_pandas_series.Series.ffill)

@validations.requires_ordering()
def bfill(self, *, limit: typing.Optional[int] = None) -> Series:
window = windows.rows(preceding=0, following=limit)
window = windows.rows(start=0, end=limit)
return self._apply_window_op(agg_ops.FirstNonNullOp(), window)

@validations.requires_ordering()
Expand Down Expand Up @@ -1441,7 +1441,7 @@ def sort_index(self, *, axis=0, ascending=True, na_position="last") -> Series:
def rolling(self, window: int, min_periods=None) -> bigframes.core.window.Window:
# To get n size window, need current row and n-1 preceding rows.
window_spec = windows.rows(
preceding=window - 1, following=0, min_periods=min_periods or window
start=-(window - 1), end=0, min_periods=min_periods or window
)
return bigframes.core.window.Window(
self._block, window_spec, self._block.value_columns, is_series=True
Expand Down
29 changes: 29 additions & 0 deletions tests/unit/core/test_windowspec.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# Copyright 2025 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import pytest

from bigframes.core import window_spec


@pytest.mark.parametrize(("start", "end"), [(-1, -2), (1, -2), (2, 1)])
def test_invalid_rows_window_boundary_raise_error(start, end):
with pytest.raises(ValueError):
window_spec.RowsWindowBounds(start, end)


@pytest.mark.parametrize(("start", "end"), [(-1, -2), (1, -2), (2, 1)])
def test_invalid_range_window_boundary_raise_error(start, end):
with pytest.raises(ValueError):
window_spec.RangeWindowBounds(start, end)