From 559e3e5ab7d30e9745a6d5bfe872e4b4accf9aaf Mon Sep 17 00:00:00 2001 From: Huan Chen Date: Thu, 23 Nov 2023 00:03:44 +0000 Subject: [PATCH 1/3] docs: add examples for dataframe.mean, dataframe.median, dataframe.var and dataframe.skew --- .../bigframes_vendored/pandas/core/frame.py | 105 +++++++++++++++++- 1 file changed, 103 insertions(+), 2 deletions(-) diff --git a/third_party/bigframes_vendored/pandas/core/frame.py b/third_party/bigframes_vendored/pandas/core/frame.py index b771be3041..5cd4da5be0 100644 --- a/third_party/bigframes_vendored/pandas/core/frame.py +++ b/third_party/bigframes_vendored/pandas/core/frame.py @@ -2771,6 +2771,33 @@ def sum(self, axis=0, *, numeric_only: bool = False): def mean(self, axis=0, *, numeric_only: bool = False): """Return the mean of the values over the requested axis. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + >>> df = bpd.DataFrame({"A": [1, 3], "B": [2, 4]}) + >>> df + A B + 0 1 2 + 1 3 4 + + [2 rows x 2 columns] + + Calculating the mean of each column (the default behavior without an explicit axis parameter). + + >>> df.mean() + A 2.0 + B 3.0 + dtype: Float64 + + Calculating the mean of each row. + + >>> df.mean(axis=1) + 0 1.5 + 1 3.5 + dtype: Float64 + Args: axis ({index (0), columns (1)}): Axis for the function to be applied on. @@ -2784,7 +2811,27 @@ def mean(self, axis=0, *, numeric_only: bool = False): raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) def median(self, *, numeric_only: bool = False, exact: bool = False): - """Return the median of the values over the requested axis. + """Return the median of the values over colunm. + + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + >>> df = bpd.DataFrame({"A": [1, 3], "B": [2, 4]}) + >>> df + A B + 0 1 2 + 1 3 4 + + [2 rows x 2 columns] + + Finding the median value of each column. + + >>> df.median() + A 1.0 + B 2.0 + dtype: Float64 Args: numeric_only (bool. default False): @@ -2803,6 +2850,34 @@ def var(self, axis=0, *, numeric_only: bool = False): Normalized by N-1 by default. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + >>> df = bpd.DataFrame({"A": [1, 3], "B": [2, 4]}) + >>> df + A B + 0 1 2 + 1 3 4 + + [2 rows x 2 columns] + + Calculating the variance of each column (the default behavior without an explicit axis parameter). + + >>> df.var() + A 2.0 + B 2.0 + dtype: float64 + + Calculating the variance of each row. + + >>> df.var(axis=1) + 0 0.5 + 1 0.5 + dtype: float64 + + Args: axis ({index (0), columns (1)}): Axis for the function to be applied on. @@ -2816,10 +2891,36 @@ def var(self, axis=0, *, numeric_only: bool = False): raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) def skew(self, *, numeric_only: bool = False): - """Return unbiased skew over requested axis. + """Return unbiased skew over column. Normalized by N-1. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + + >>> df = bpd.DataFrame({'A': [1, 2, 3, 4, 5], + ... 'B': [5, 4, 3, 2, 1], + ... 'C': [2, 2, 3, 2, 2]}) + >>> df + A B C + 0 1 5 2 + 1 2 4 2 + 2 3 3 3 + 3 4 2 2 + 4 5 1 2 + + [5 rows x 3 columns] + + Calculating the skewness of each column. + + >>> df.skew() + A 0.0 + B 0.0 + C 2.236068 + dtype: Float64 + Args: numeric_only (bool, default False): Include only float, int, boolean columns. From 0639800e89d14d1563a00af303fcbae2819b69f3 Mon Sep 17 00:00:00 2001 From: Huan Chen Date: Thu, 23 Nov 2023 00:19:11 +0000 Subject: [PATCH 2/3] column to columns --- third_party/bigframes_vendored/pandas/core/frame.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/third_party/bigframes_vendored/pandas/core/frame.py b/third_party/bigframes_vendored/pandas/core/frame.py index 5cd4da5be0..e3d4236ad9 100644 --- a/third_party/bigframes_vendored/pandas/core/frame.py +++ b/third_party/bigframes_vendored/pandas/core/frame.py @@ -2811,7 +2811,7 @@ def mean(self, axis=0, *, numeric_only: bool = False): raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) def median(self, *, numeric_only: bool = False, exact: bool = False): - """Return the median of the values over colunm. + """Return the median of the values over colunms. **Examples:** @@ -2891,7 +2891,7 @@ def var(self, axis=0, *, numeric_only: bool = False): raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) def skew(self, *, numeric_only: bool = False): - """Return unbiased skew over column. + """Return unbiased skew over columns. Normalized by N-1. From 9b2bdb5b5bdd92fb4754a89194f2e3716114ce0b Mon Sep 17 00:00:00 2001 From: Huan Chen Date: Thu, 23 Nov 2023 01:00:17 +0000 Subject: [PATCH 3/3] update var example --- third_party/bigframes_vendored/pandas/core/frame.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/third_party/bigframes_vendored/pandas/core/frame.py b/third_party/bigframes_vendored/pandas/core/frame.py index e3d4236ad9..3d33a1723f 100644 --- a/third_party/bigframes_vendored/pandas/core/frame.py +++ b/third_party/bigframes_vendored/pandas/core/frame.py @@ -2868,14 +2868,14 @@ def var(self, axis=0, *, numeric_only: bool = False): >>> df.var() A 2.0 B 2.0 - dtype: float64 + dtype: Float64 Calculating the variance of each row. >>> df.var(axis=1) 0 0.5 1 0.5 - dtype: float64 + dtype: Float64 Args: