From f4a06075b4eed6f03f46a4734764e4ff8802fdef Mon Sep 17 00:00:00 2001 From: Trevor Bergeron Date: Thu, 30 Nov 2023 01:56:58 +0000 Subject: [PATCH 1/3] fix: fix value_counts column label for normalize=True --- bigframes/core/block_transforms.py | 4 +++- tests/system/small/test_dataframe.py | 6 ++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/bigframes/core/block_transforms.py b/bigframes/core/block_transforms.py index ce0fdd219a..df84f70859 100644 --- a/bigframes/core/block_transforms.py +++ b/bigframes/core/block_transforms.py @@ -353,7 +353,9 @@ def value_counts( ) ] ) - return block.select_column(count_id).with_column_labels(["count"]) + return block.select_column(count_id).with_column_labels( + ["proportion" if normalize else "count"] + ) def pct_change(block: blocks.Block, periods: int = 1) -> blocks.Block: diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py index 9744d3f6e9..ac0b0efb2a 100644 --- a/tests/system/small/test_dataframe.py +++ b/tests/system/small/test_dataframe.py @@ -3385,6 +3385,8 @@ def test_df_to_orc(scalars_df_index, scalars_pandas_df_index): ], ) def test_df_value_counts(scalars_dfs, subset, normalize, ascending, dropna): + if pd.__version__.startswith("1."): + pytest.skip("pandas 1.x produces different column labels.") scalars_df, scalars_pandas_df = scalars_dfs bf_result = ( @@ -3396,10 +3398,6 @@ def test_df_value_counts(scalars_dfs, subset, normalize, ascending, dropna): subset, normalize=normalize, ascending=ascending, dropna=dropna ) - # Older pandas version may not have these values, bigframes tries to emulate 2.0+ - pd_result.name = "count" - pd_result.index.names = bf_result.index.names - pd.testing.assert_series_equal( bf_result, pd_result, check_dtype=False, check_index_type=False ) From 74f63bc8dd4c2e01e62cc1b1bcf736dba9a2194a Mon Sep 17 00:00:00 2001 From: Trevor Bergeron Date: Thu, 30 Nov 2023 17:30:05 +0000 Subject: [PATCH 2/3] amend series value_counts test --- tests/system/small/test_series.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/tests/system/small/test_series.py b/tests/system/small/test_series.py index d9fc23fad0..66b3b54bc5 100644 --- a/tests/system/small/test_series.py +++ b/tests/system/small/test_series.py @@ -1940,16 +1940,14 @@ def test_cummax_int(scalars_df_index, scalars_pandas_df_index): def test_value_counts(scalars_dfs): + if pd.__version__.startswith("1."): + pytest.skip("pandas 1.x produces different column labels.") scalars_df, scalars_pandas_df = scalars_dfs col_name = "int64_too" bf_result = scalars_df[col_name].value_counts().to_pandas() pd_result = scalars_pandas_df[col_name].value_counts() - # Older pandas version may not have these values, bigframes tries to emulate 2.0+ - pd_result.name = "count" - pd_result.index.name = col_name - pd.testing.assert_series_equal( bf_result, pd_result, From 5561febfdf563e38d945dd7ccb42527372422cdf Mon Sep 17 00:00:00 2001 From: Trevor Bergeron Date: Fri, 1 Dec 2023 03:40:28 +0000 Subject: [PATCH 3/3] fix another value_counts test --- tests/system/small/test_series.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/system/small/test_series.py b/tests/system/small/test_series.py index 66b3b54bc5..92a7b6f099 100644 --- a/tests/system/small/test_series.py +++ b/tests/system/small/test_series.py @@ -1955,6 +1955,8 @@ def test_value_counts(scalars_dfs): def test_value_counts_w_cut(scalars_dfs): + if pd.__version__.startswith("1."): + pytest.skip("value_counts results different in pandas 1.x.") scalars_df, scalars_pandas_df = scalars_dfs col_name = "int64_col" @@ -1963,9 +1965,6 @@ def test_value_counts_w_cut(scalars_dfs): bf_result = bf_cut.value_counts().to_pandas() pd_result = pd_cut.value_counts() - # Older pandas version may not have these values, bigframes tries to emulate 2.0+ - pd_result.name = "count" - pd_result.index.name = col_name pd_result.index = pd_result.index.astype(pd.Int64Dtype()) pd.testing.assert_series_equal(