Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 51 additions & 0 deletions bigframes/core/block_transforms.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

import pandas as pd

import bigframes.constants as constants
import bigframes.core as core
import bigframes.core.blocks as blocks
import bigframes.core.ordering as ordering
Expand Down Expand Up @@ -576,3 +577,53 @@ def align_columns(
left_final = left_block.select_columns(left_column_ids)
right_final = right_block.select_columns(right_column_ids)
return left_final, right_final


def idxmin(block: blocks.Block) -> blocks.Block:
return _idx_extrema(block, "min")


def idxmax(block: blocks.Block) -> blocks.Block:
return _idx_extrema(block, "max")


def _idx_extrema(
block: blocks.Block, min_or_max: typing.Literal["min", "max"]
) -> blocks.Block:
if len(block.index_columns) != 1:
# TODO: Need support for tuple dtype
raise NotImplementedError(
f"idxmin not support for multi-index. {constants.FEEDBACK_LINK}"
)

original_block = block
result_cols = []
for value_col in original_block.value_columns:
direction = (
ordering.OrderingDirection.ASC
if min_or_max == "min"
else ordering.OrderingDirection.DESC
)
# Have to find the min for each
order_refs = [
ordering.OrderingColumnReference(value_col, direction),
*[
ordering.OrderingColumnReference(idx_col)
for idx_col in original_block.index_columns
],
]
window_spec = core.WindowSpec(ordering=order_refs)
idx_col = original_block.index_columns[0]
block, result_col = block.apply_window_op(
idx_col, agg_ops.first_op, window_spec
)
result_cols.append(result_col)

block = block.select_columns(result_cols).with_column_labels(
original_block.column_labels
)
# Stack the entire column axis to produce single-column result
# Assumption: uniform dtype for stackability
return block.aggregate_all_and_stack(
agg_ops.AnyValueOp(), dtype=block.dtypes[0]
).with_column_labels([original_block.index.name])
6 changes: 6 additions & 0 deletions bigframes/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -1642,6 +1642,12 @@ def agg(

aggregate = agg

def idxmin(self) -> bigframes.series.Series:
return bigframes.series.Series(block_ops.idxmin(self._block))

def idxmax(self) -> bigframes.series.Series:
return bigframes.series.Series(block_ops.idxmax(self._block))

def describe(self) -> DataFrame:
df_numeric = self._drop_non_numeric(keep_bool=False)
if len(df_numeric.columns) == 0:
Expand Down
28 changes: 28 additions & 0 deletions bigframes/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -887,6 +887,34 @@ def argmin(self) -> int:
scalars.Scalar, Series(block.select_column(row_nums)).iloc[0]
)

def idxmax(self) -> blocks.Label:
block = self._block.order_by(
[
OrderingColumnReference(
self._value_column, direction=OrderingDirection.DESC
),
*[
OrderingColumnReference(idx_col)
for idx_col in self._block.index_columns
],
]
)
block = block.slice(0, 1)
return indexes.Index._from_block(block).to_pandas()[0]

def idxmin(self) -> blocks.Label:
block = self._block.order_by(
[
OrderingColumnReference(self._value_column),
*[
OrderingColumnReference(idx_col)
for idx_col in self._block.index_columns
],
]
)
block = block.slice(0, 1)
return indexes.Index._from_block(block).to_pandas()[0]

@property
def is_monotonic_increasing(self) -> bool:
return typing.cast(
Expand Down
28 changes: 28 additions & 0 deletions tests/system/small/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -1292,6 +1292,34 @@ def test_df_update(overwrite, filter_func):
pd.testing.assert_frame_equal(bf_df1.to_pandas(), pd_df1)


def test_df_idxmin():
pd_df = pd.DataFrame(
{"a": [1, 2, 3], "b": [7, None, 3], "c": [4, 4, 4]}, index=["x", "y", "z"]
)
bf_df = dataframe.DataFrame(pd_df)

bf_result = bf_df.idxmin().to_pandas()
pd_result = pd_df.idxmin()

pd.testing.assert_series_equal(
bf_result, pd_result, check_index_type=False, check_dtype=False
)


def test_df_idxmax():
pd_df = pd.DataFrame(
{"a": [1, 2, 3], "b": [7, None, 3], "c": [4, 4, 4]}, index=["x", "y", "z"]
)
bf_df = dataframe.DataFrame(pd_df)

bf_result = bf_df.idxmax().to_pandas()
pd_result = pd_df.idxmax()

pd.testing.assert_series_equal(
bf_result, pd_result, check_index_type=False, check_dtype=False
)


@pytest.mark.parametrize(
("join", "axis"),
[
Expand Down
11 changes: 11 additions & 0 deletions tests/system/small/test_multiindex.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,17 @@ def test_reset_multi_index(scalars_df_index, scalars_pandas_df_index):
pandas.testing.assert_frame_equal(bf_result, pd_result)


def test_series_multi_index_idxmin(scalars_df_index, scalars_pandas_df_index):
bf_result = scalars_df_index.set_index(["bool_col", "int64_too"])[
"float64_col"
].idxmin()
pd_result = scalars_pandas_df_index.set_index(["bool_col", "int64_too"])[
"float64_col"
].idxmin()

assert bf_result == pd_result


def test_binop_series_series_matching_multi_indices(
scalars_df_index, scalars_pandas_df_index
):
Expand Down
12 changes: 12 additions & 0 deletions tests/system/small/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -2468,6 +2468,18 @@ def test_argmax(scalars_df_index, scalars_pandas_df_index):
assert bf_result == pd_result


def test_series_idxmin(scalars_df_index, scalars_pandas_df_index):
bf_result = scalars_df_index.string_col.idxmin()
pd_result = scalars_pandas_df_index.string_col.idxmin()
assert bf_result == pd_result


def test_series_idxmax(scalars_df_index, scalars_pandas_df_index):
bf_result = scalars_df_index.int64_too.idxmax()
pd_result = scalars_pandas_df_index.int64_too.idxmax()
assert bf_result == pd_result


def test_getattr_attribute_error_when_pandas_has(scalars_df_index):
# asof is implemented in pandas but not in bigframes
with pytest.raises(AttributeError):
Expand Down
22 changes: 22 additions & 0 deletions third_party/bigframes_vendored/pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1805,6 +1805,28 @@ def nsmallest(self, n: int, columns, keep: str = "first"):
"""
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)

def idxmin(self):
"""
Return index of first occurrence of minimum over requested axis.

NA/null values are excluded.

Returns:
Series: Indexes of minima along the specified axis.
"""
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)

def idxmax(self):
"""
Return index of first occurrence of maximum over requested axis.

NA/null values are excluded.

Returns:
Series: Indexes of maxima along the specified axis.
"""
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)

def nunique(self):
"""
Count number of distinct elements in specified axis.
Expand Down
40 changes: 24 additions & 16 deletions third_party/bigframes_vendored/pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
import numpy as np
from pandas._libs import lib
from pandas._typing import Axis, FilePath, NaPosition, WriteBuffer
import pandas.io.formats.format as fmt

from bigframes import constants
from third_party.bigframes_vendored.pandas.core.generic import NDFrame
Expand Down Expand Up @@ -151,21 +150,6 @@ def to_string(
str or None: String representation of Series if ``buf=None``,
otherwise None.
"""
formatter = fmt.SeriesFormatter(
self,
name=name,
length=length,
header=header,
index=index,
dtype=dtype,
na_rep=na_rep,
float_format=float_format,
min_rows=min_rows,
max_rows=max_rows,
)
result = formatter.to_string()

# catch contract violations
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)

def to_markdown(
Expand Down Expand Up @@ -475,6 +459,30 @@ def duplicated(self, keep="first") -> Series:
"""
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)

def idxmin(self) -> Hashable:
"""
Return the row label of the minimum value.

If multiple values equal the minimum, the first row label with that
value is returned.

Returns:
Index: Label of the minimum value.
"""
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)

def idxmax(self) -> Hashable:
"""
Return the row label of the maximum value.

If multiple values equal the maximum, the first row label with that
value is returned.

Returns:
Index: Label of the maximum value.
"""
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)

def round(self, decimals: int = 0) -> Series:
"""
Round each value in a Series to the given number of decimals.
Expand Down