Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions bigframes/core/reshape/tile.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import bigframes_vendored.pandas.core.reshape.tile as vendored_pandas_tile
import pandas as pd

import bigframes
import bigframes.constants
import bigframes.core.expression as ex
import bigframes.core.ordering as order
Expand All @@ -32,7 +33,7 @@


def cut(
x: bigframes.series.Series,
x,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we want to drop the type hints? Though pandas doesn't have for this parameter, giving hints helps users to know what to pass in.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good point. I'm keeping x untyped to remain consistent with Series.__init__(), which also doesn't have type hints.

bins: typing.Union[
int,
pd.IntervalIndex,
Expand Down Expand Up @@ -60,9 +61,12 @@ def cut(
f"but found {type(list(labels)[0])}. {constants.FEEDBACK_LINK}"
)

if x.size == 0:
if len(x) == 0:
raise ValueError("Cannot cut empty array.")

if not isinstance(x, bigframes.series.Series):
x = bigframes.series.Series(x)

if isinstance(bins, int):
if bins <= 0:
raise ValueError("`bins` should be a positive integer.")
Expand Down
12 changes: 12 additions & 0 deletions tests/system/small/test_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -520,6 +520,18 @@ def _convert_pandas_category(pd_s: pd.Series):
)


def test_cut_for_array():
"""Avoid regressions for internal issue 329866195"""
sc = [30, 80, 40, 90, 60, 45, 95, 75, 55, 100, 65, 85]
x = [20, 40, 60, 80, 100]

pd_result: pd.Series = pd.Series(pd.cut(sc, x))
bf_result = bpd.cut(sc, x)

pd_result = _convert_pandas_category(pd_result)
pd.testing.assert_series_equal(bf_result.to_pandas(), pd_result)


@pytest.mark.parametrize(
("right", "labels"),
[
Expand Down
3 changes: 3 additions & 0 deletions tests/unit/test_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,7 @@ def test_method_matches_session(method_name: str):
)
def test_cut_raises_with_invalid_labels(bins: int, labels, error_message: str):
mock_series = mock.create_autospec(bigframes.pandas.Series, instance=True)
mock_series.__len__.return_value = 5
with pytest.raises(ValueError, match=error_message):
bigframes.pandas.cut(mock_series, bins, labels=labels)

Expand Down Expand Up @@ -160,6 +161,8 @@ def test_cut_raises_with_unsupported_labels():
)
def test_cut_raises_with_invalid_bins(bins: int, error_message: str):
mock_series = mock.create_autospec(bigframes.pandas.Series, instance=True)
mock_series.__len__.return_value = 5

with pytest.raises(ValueError, match=error_message):
bigframes.pandas.cut(mock_series, bins, labels=False)

Expand Down
6 changes: 3 additions & 3 deletions third_party/bigframes_vendored/pandas/core/reshape/tile.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,11 @@

import pandas as pd

from bigframes import constants, series
from bigframes import constants


def cut(
x: series.Series,
x,
bins: typing.Union[
int,
pd.IntervalIndex,
Expand Down Expand Up @@ -113,7 +113,7 @@ def cut(
dtype: struct<left_inclusive: int64, right_exclusive: int64>[pyarrow]

Args:
x (bigframes.pandas.Series):
x (array-like):
The input Series to be binned. Must be 1-dimensional.
bins (int, pd.IntervalIndex, Iterable):
The criteria to bin by.
Expand Down