From 36fc6281686ea2dce3c184079163fd65d189038d Mon Sep 17 00:00:00 2001
From: Huan Chen <huanc@google.com>
Date: Fri, 1 Dec 2023 21:40:31 +0000
Subject: [PATCH 01/10] docs: add example for dataframe.melt, dataframe.pivot,
 dataframe.stack, dataframe.unstack

---
 .../bigframes_vendored/pandas/core/frame.py   | 167 ++++++++++++++++--
 1 file changed, 155 insertions(+), 12 deletions(-)

diff --git a/third_party/bigframes_vendored/pandas/core/frame.py b/third_party/bigframes_vendored/pandas/core/frame.py
index 08fe8e2de0..ecab3d2ba4 100644
--- a/third_party/bigframes_vendored/pandas/core/frame.py
+++ b/third_party/bigframes_vendored/pandas/core/frame.py
@@ -3414,18 +3414,75 @@ def melt(self, id_vars, value_vars, var_name, value_name):
         the row axis, leaving just two non-identifier columns, 'variable' and
         'value'.
 
-        Parameters
-        ----------
-        id_vars (tuple, list, or ndarray, optional):
-            Column(s) to use as identifier variables.
-        value_vars (tuple, list, or ndarray, optional):
-            Column(s) to unpivot. If not specified, uses all columns that
-            are not set as `id_vars`.
-        var_name (scalar):
-            Name to use for the 'variable' column. If None it uses
-            ``frame.columns.name`` or 'variable'.
-        value_name (scalar, default 'value'):
-            Name to use for the 'value' column.
+         **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+
+            >>> df = bpd.DataFrame({"A": [1, None, 3, 4, 5],
+            ...                     "B": [1, 2, 3, 4, 5],
+            ...                     "C": [None, 3.5, None, 4.5, 5.0]})
+            >>> df
+                    A	    B	   C
+            0	  1.0	    1	<NA>
+            1	 <NA>	    2	 3.5
+            2     3.0	    3	<NA>
+            3	  4.0	    4	 4.5
+            4	  5.0	    5	 5.0
+            <BLANKLINE>
+            [5 rows x 3 columns]
+
+        Using `melt` without optional arguments:
+
+            >>> df.melt()
+                variable    value
+            0	       A      1.0
+            1	       A     <NA>
+            2	       A      3.0
+            3	       A      4.0
+            4	       A      5.0
+            5	       B      1.0
+            6	       B      2.0
+            7	       B      3.0
+            8	       B      4.0
+            9	       B      5.0
+            10	       C     <NA>
+            11	       C      3.5
+            12	       C     <NA>
+            13	       C      4.5
+            14	       C      5.0
+            <BLANKLINE>
+            [15 rows x 2 columns]
+
+        Using `melt` with `id_vars` and `value_vars`:
+
+            >>> df.melt(id_vars='A', value_vars=['B', 'C'])
+                   A	variable	value
+            0	 1.0	       B	    1
+            1	<NA>	       B	    2
+            2	 3.0	       B	    3
+            3	 4.0	       B	    4
+            4	 5.0	       B	    5
+            5	 1.0	       C	 <NA>
+            6	 <NA>	       C	    3
+            7	 3.0	       C	 <NA>
+            8	 4.0	       C	    4
+            9	 5.0	       C	    5
+            <BLANKLINE>
+            [10 rows x 3 columns]
+
+
+        Args:
+            id_vars (tuple, list, or ndarray, optional):
+                Column(s) to use as identifier variables.
+            value_vars (tuple, list, or ndarray, optional):
+                Column(s) to unpivot. If not specified, uses all columns that
+                are not set as `id_vars`.
+            var_name (scalar):
+                Name to use for the 'variable' column. If None it uses
+                ``frame.columns.name`` or 'variable'.
+            value_name (scalar, default 'value'):
+                Name to use for the 'value' column.
 
         Returns:
             DataFrame: Unpivoted DataFrame.
@@ -3647,6 +3704,52 @@ def pivot(self, *, columns, index=None, values=None):
             do not together uniquely identify input rows, the output will be
             silently non-deterministic.
 
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+
+            >>> df = bpd.DataFrame({
+            ...     "foo": ["one", "one", "one", "two", "two"],
+            ...     "bar": ["A", "B", "C", "A", "B"],
+            ...     "baz": [1, 2, 3, 4, 5],
+            ...     "zoo": ['x', 'y', 'z', 'q', 'w']
+            ... })
+
+            >>> df
+                foo	bar	baz	zoo
+            0	one	  A	  1	  x
+            1	one	  B	  2	  y
+            2	one	  C	  3	  z
+            3	two	  A	  4	  q
+            4	two	  B	  5	  w
+            <BLANKLINE>
+            [5 rows x 4 columns]
+
+        Using `pivot` without optional arguments:
+
+            >>> df.pivot(columns='foo')
+                    bar	            baz	            zoo
+            foo	 one	 two	 one	 two	 one	 two
+            0	   A	<NA>	   1	<NA>	   x	<NA>
+            1	   B	<NA>	   2	<NA>	   y	<NA>
+            2	   C	<NA>	   3	<NA>	   z	<NA>
+            3	<NA>	   A	<NA>	   4	<NA>	   q
+            4	<NA>	   B	<NA>	   5	<NA>	   w
+            <BLANKLINE>
+            [5 rows x 6 columns]
+
+        Using `pivot` with `index` and `values`:
+
+            >>> df.pivot(columns='foo', index='bar', values='baz')
+            foo	    one     two
+            bar
+            A	    1         4
+            B	    2	      5
+            C	    3	   <NA>
+            <BLANKLINE>
+            [3 rows x 2 columns]
+
         Args:
             columns (str or object or a list of str):
                 Column to use to make new frame's columns.
@@ -3682,6 +3785,26 @@ def stack(self):
             BigQuery DataFrames does not support stack operations that would
             combine columns of different dtypes.
 
+        **Example:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+
+            >>> df = bpd.DataFrame({'A': [1, 3], 'B': [2, 4]}, index=['foo', 'bar'])
+            >>> df
+                    A	B
+            foo	    1	2
+            bar	    3	4
+            <BLANKLINE>
+            [2 rows x 2 columns]
+
+            >>> df.stack()
+            foo  A    1
+                 B    2
+            bar  A    3
+                 B    4
+            dtype: Int64
+
         Returns:
             DataFrame or Series: Stacked dataframe or series.
         """
@@ -3697,6 +3820,26 @@ def unstack(self):
         If the index is not a MultiIndex, the output will be a Series
         (the analogue of stack when the columns are not a MultiIndex).
 
+        **Example:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+
+            >>> df = bpd.DataFrame({'A': [1, 3], 'B': [2, 4]}, index=['foo', 'bar'])
+            >>> df
+                    A	B
+            foo	    1	2
+            bar	    3	4
+            <BLANKLINE>
+            [2 rows x 2 columns]
+
+            >>> df.unstack()
+            A   foo    1
+                bar    3
+            B   foo    2
+                bar    4
+            dtype: Int64
+
         Returns:
             DataFrame or Series
         """

From 6b7aeec88c571ffd89f0fada462c184709b89fa3 Mon Sep 17 00:00:00 2001
From: Huan Chen <huanc@google.com>
Date: Fri, 1 Dec 2023 21:43:19 +0000
Subject: [PATCH 02/10] remove empty line

---
 third_party/bigframes_vendored/pandas/core/frame.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/third_party/bigframes_vendored/pandas/core/frame.py b/third_party/bigframes_vendored/pandas/core/frame.py
index ecab3d2ba4..9e10dab97a 100644
--- a/third_party/bigframes_vendored/pandas/core/frame.py
+++ b/third_party/bigframes_vendored/pandas/core/frame.py
@@ -3471,7 +3471,6 @@ def melt(self, id_vars, value_vars, var_name, value_name):
             <BLANKLINE>
             [10 rows x 3 columns]
 
-
         Args:
             id_vars (tuple, list, or ndarray, optional):
                 Column(s) to use as identifier variables.

From 58ed6645a5131437971a6da4d870518519af123f Mon Sep 17 00:00:00 2001
From: Huan Chen <huanc@google.com>
Date: Fri, 1 Dec 2023 22:05:58 +0000
Subject: [PATCH 03/10] docstring fix

---
 .../bigframes_vendored/pandas/core/frame.py        | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/third_party/bigframes_vendored/pandas/core/frame.py b/third_party/bigframes_vendored/pandas/core/frame.py
index 9e10dab97a..5382e16df3 100644
--- a/third_party/bigframes_vendored/pandas/core/frame.py
+++ b/third_party/bigframes_vendored/pandas/core/frame.py
@@ -3766,7 +3766,7 @@ def pivot(self, *, columns, index=None, values=None):
         """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 
-    def stack(self):
+    def stack(self, level=-1):
         """
         Stack the prescribed level(s) from columns to index.
 
@@ -3804,12 +3804,16 @@ def stack(self):
                  B    4
             dtype: Int64
 
+        Args:
+            level (int, str, or list of these, default -1 (last level)):
+                Level(s) to stack from the column axis onto the index axis.
+
         Returns:
             DataFrame or Series: Stacked dataframe or series.
         """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 
-    def unstack(self):
+    def unstack(self, level=-1):
         """
         Pivot a level of the (necessarily hierarchical) index labels.
 
@@ -3839,8 +3843,12 @@ def unstack(self):
                 bar    4
             dtype: Int64
 
+        Args:
+            level (int, str, or list of these, default -1 (last level)):
+                Level(s) of index to unstack, can pass level name.
+
         Returns:
-            DataFrame or Series
+            DataFrame or Series: Unstacked dataframe or series.
         """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 

From b0464203ad4d2f70b51dcde213d4b613621ece43 Mon Sep 17 00:00:00 2001
From: Huan Chen <huanc@google.com>
Date: Fri, 1 Dec 2023 22:34:24 +0000
Subject: [PATCH 04/10] spacing update

---
 third_party/bigframes_vendored/pandas/core/frame.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/third_party/bigframes_vendored/pandas/core/frame.py b/third_party/bigframes_vendored/pandas/core/frame.py
index 5382e16df3..b95fc24c15 100644
--- a/third_party/bigframes_vendored/pandas/core/frame.py
+++ b/third_party/bigframes_vendored/pandas/core/frame.py
@@ -3464,7 +3464,7 @@ def melt(self, id_vars, value_vars, var_name, value_name):
             3	 4.0	       B	    4
             4	 5.0	       B	    5
             5	 1.0	       C	 <NA>
-            6	 <NA>	       C	    3
+            6	<NA>	       C	    3
             7	 3.0	       C	 <NA>
             8	 4.0	       C	    4
             9	 5.0	       C	    5

From 16653a6ccfb5337e10779caee12e2f6b5a92821c Mon Sep 17 00:00:00 2001
From: Ashley Xu <139821907+ashleyxuu@users.noreply.github.com>
Date: Mon, 4 Dec 2023 16:18:16 -0800
Subject: [PATCH 05/10] docs: correct the params rendering for `ml.remote` and
 `ml.ensemble` modules (#248)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly:
- [ ] Make sure to open an issue as a [bug/issue](https://togithub.com/googleapis/python-bigquery-dataframes/issues/new/choose) before writing your code!  That way we can discuss the change, evaluate designs, and agree on the general idea
- [ ] Ensure the tests and linter pass
- [ ] Code coverage does not decrease (if any source code was changed)
- [x] Appropriate docs were updated (if necessary)
  - `ensemble.RandomForestClassifier`: https://screenshot.googleplex.com/4Q88xgdm5hkaYXu
  - `ensemble.RandomForestRegressor`: https://screenshot.googleplex.com/3CU6pJBjYHQvnDo
  - `remote.VertexAIModel`: https://screenshot.googleplex.com/8SL2max6GfPMwFe

Fixes internal issue 314150462 🦕
---
 bigframes/ml/remote.py                        |  8 +--
 docs/templates/toc.yml                        | 12 ++--
 .../sklearn/ensemble/_forest.py               | 72 +++++++++----------
 3 files changed, 46 insertions(+), 46 deletions(-)

diff --git a/bigframes/ml/remote.py b/bigframes/ml/remote.py
index d4c34bbd0d..8da073802d 100644
--- a/bigframes/ml/remote.py
+++ b/bigframes/ml/remote.py
@@ -47,10 +47,10 @@ class VertexAIModel(base.BaseEstimator):
     Args:
         endpoint (str):
             Vertex AI https endpoint.
-        input ({column_name: column_type}):
-            Input schema. Supported types are "bool", "string", "int64", "float64", "array<bool>", "array<string>", "array<int64>", "array<float64>".
-        output ({column_name: column_type}):
-            Output label schema. Supported the same types as the input.
+        input (Mapping):
+            Input schema: `{column_name: column_type}`. Supported types are "bool", "string", "int64", "float64", "array<bool>", "array<string>", "array<int64>", "array<float64>".
+        output (Mapping):
+            Output label schema: `{column_name: column_type}`. Supported the same types as the input.
         session (bigframes.Session or None):
             BQ session to create the model. If None, use the global default session.
         connection_name (str or None):
diff --git a/docs/templates/toc.yml b/docs/templates/toc.yml
index 58ac1c0efe..b680a5fc1a 100644
--- a/docs/templates/toc.yml
+++ b/docs/templates/toc.yml
@@ -108,12 +108,6 @@
       - name: PaLM2TextEmbeddingGenerator
         uid: bigframes.ml.llm.PaLM2TextEmbeddingGenerator
       name: llm
-    - items:
-      - name: Overview
-        uid: bigframes.ml.remote
-      - name: VertexAIModel
-        uid: bigframes.ml.remote.VertexAIModel
-      name: remote
     - items:
       - name: metrics
         uid: bigframes.ml.metrics
@@ -144,6 +138,12 @@
       - name: OneHotEncoder
         uid: bigframes.ml.preprocessing.OneHotEncoder
       name: preprocessing
+    - items:
+      - name: Overview
+        uid: bigframes.ml.remote
+      - name: VertexAIModel
+        uid: bigframes.ml.remote.VertexAIModel
+      name: remote
     name: bigframes.ml
   name: BigQuery DataFrames
   status: beta
diff --git a/third_party/bigframes_vendored/sklearn/ensemble/_forest.py b/third_party/bigframes_vendored/sklearn/ensemble/_forest.py
index 6be41bf9aa..63c62274fd 100644
--- a/third_party/bigframes_vendored/sklearn/ensemble/_forest.py
+++ b/third_party/bigframes_vendored/sklearn/ensemble/_forest.py
@@ -47,16 +47,16 @@ def fit(self, X, y):
         """Build a forest of trees from the training set (X, y).
 
         Args:
-            X:
+            X (bigframes.dataframe.DataFrame or bigframes.series.Series):
                 Series or DataFrame of shape (n_samples, n_features). Training data.
 
-            y:
+            y (bigframes.dataframe.DataFrame or bigframes.series.Series):
                 Series or DataFrame of shape (n_samples,) or (n_samples, n_targets).
                 Target values. Will be cast to X's dtype if necessary.
 
 
         Returns:
-            Fitted Estimator.
+            ForestModel: Fitted Estimator.
         """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 
@@ -73,12 +73,12 @@ def predict(self, X):
         mean predicted regression targets of the trees in the forest.
 
         Args:
-            X:
+            X (bigframes.dataframe.DataFrame or bigframes.series.Series):
                 Series or DataFrame of shape (n_samples, n_features). The data matrix for
                 which we want to get the predictions.
 
         Returns:
-            The predicted values.
+            bigframes.dataframe.DataFrame: The predicted values.
         """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 
@@ -91,38 +91,38 @@ class RandomForestRegressor(ForestRegressor):
     to improve the predictive accuracy and control over-fitting.
 
     Args:
-        num_parallel_tree: Optional[int]
+        num_parallel_tree (Optional[int]):
             Number of parallel trees constructed during each iteration. Default to 100. Minimum value is 2.
-        tree_method: Optional[str]
+        tree_method (Optional[str]):
             Specify which tree method to use. Default to "auto". If this parameter is set to
             default, XGBoost will choose the most conservative option available. Possible values: ""exact", "approx",
             "hist".
-        min_child_weight : Optional[float]
+        min_child_weight (Optional[float]):
             Minimum sum of instance weight(hessian) needed in a child. Default to 1.
-        colsample_bytree : Optional[float]
+        colsample_bytree (Optional[float]):
             Subsample ratio of columns when constructing each tree. Default to 1.0. The value should be between 0 and 1.
-        colsample_bylevel : Optional[float]
+        colsample_bylevel (Optional[float]):
             Subsample ratio of columns for each level. Default to 1.0. The value should be between 0 and 1.
-        colsample_bynode : Optional[float]
+        colsample_bynode (Optional[float]):
             Subsample ratio of columns for each split. Default to 0.8. The value should be between 0 and 1.
-        gamma : Optional[float]
+        gamma (Optional[float]):
             (min_split_loss) Minimum loss reduction required to make a further partition on a
             leaf node of the tree. Default to 0.0.
-        max_depth :  Optional[int]
+        max_depth (Optional[int]):
             Maximum tree depth for base learners. Default to 15. The value should be greater than 0 and less than 1.
-        subsample : Optional[float]
+        subsample (Optional[float]:
             Subsample ratio of the training instance. Default to 0.8. The value should be greater than 0 and less than 1.
-        reg_alpha : Optional[float]
+        reg_alpha (Optional[float]):
             L1 regularization term on weights (xgb's alpha). Default to 0.0.
-        reg_lambda : Optional[float]
+        reg_lambda (Optional[float]):
             L2 regularization term on weights (xgb's lambda). Default to 1.0.
-        early_stop: Optional[bool]
+        early_stop (Optional[bool]):
             Whether training should stop after the first iteration. Default to True.
-        min_rel_progress: Optional[float]
+        min_rel_progress (Optional[float]):
             Minimum relative loss improvement necessary to continue training when early_stop is set to True. Default to 0.01.
-        enable_global_explain: Optional[bool]
+        enable_global_explain (Optional[bool]):
             Whether to compute global explanations using explainable AI to evaluate global feature importance to the model. Default to False.
-        xgboost_version: Optional[str]
+        xgboost_version (Optional[str]):
             Specifies the Xgboost version for model training.  Default to "0.9". Possible values: "0.9", "1.1".
     """
 
@@ -144,7 +144,7 @@ def predict(self, X):
                 which we want to get the predictions.
 
         Returns:
-            The predicted values.
+            bigframes.dataframe.DataFrame: The predicted values.
         """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 
@@ -158,37 +158,37 @@ class RandomForestClassifier(ForestClassifier):
     improve the predictive accuracy and control over-fitting.
 
     Args:
-        num_parallel_tree: Optional[int]
+        num_parallel_tree (Optional[int]):
             Number of parallel trees constructed during each iteration. Default to 100. Minimum value is 2.
-        tree_method: Optional[str]
+        tree_method (Optional[str]):
             Specify which tree method to use. Default to "auto". If this parameter is set to
             default, XGBoost will choose the most conservative option available. Possible values: ""exact", "approx",
             "hist".
-        min_child_weight : Optional[float]
+        min_child_weight (Optional[float]):
             Minimum sum of instance weight(hessian) needed in a child. Default to 1.
-        colsample_bytree : Optional[float]
+        colsample_bytree (Optional[float]):
             Subsample ratio of columns when constructing each tree. Default to 1.0. The value should be between 0 and 1.
-        colsample_bylevel : Optional[float]
+        colsample_bylevel (Optional[float]):
             Subsample ratio of columns for each level. Default to 1.0. The value should be between 0 and 1.
-        colsample_bynode : Optional[float]
+        colsample_bynode (Optional[float]):
             Subsample ratio of columns for each split. Default to 0.8. The value should be between 0 and 1.
-        gamma : Optional[float]
+        gamma (Optional[float]):
             (min_split_loss) Minimum loss reduction required to make a further partition on a
             leaf node of the tree. Default to 0.0.
-        max_depth :  Optional[int]
+        max_depth (Optional[int]):
             Maximum tree depth for base learners. Default to 15. The value should be greater than 0 and less than 1.
-        subsample : Optional[float]
+        subsample (Optional[float]):
             Subsample ratio of the training instance. Default to 0.8. The value should be greater than 0 and less than 1.
-        reg_alpha : Optional[float]
+        reg_alpha (Optional[float]):
             L1 regularization term on weights (xgb's alpha). Default to 0.0.
-        reg_lambda : Optional[float]
+        reg_lambda (Optional[float]):
             L2 regularization term on weights (xgb's lambda). Default to 1.0.
-        early_stop: Optional[bool]
+        early_stop (Optional[bool]):
             Whether training should stop after the first iteration. Default to True.
-        min_rel_progress: Optional[float]
+        min_rel_progress (Optional[float]):
             Minimum relative loss improvement necessary to continue training when early_stop is set to True. Default to 0.01.
-        enable_global_explain: Optional[bool]
+        enable_global_explain (Optional[bool]):
             Whether to compute global explanations using explainable AI to evaluate global feature importance to the model. Default to False.
-        xgboost_version: Optional[str]
+        xgboost_version (Optional[str]):
             Specifies the Xgboost version for model training.  Default to "0.9". Possible values: "0.9", "1.1".ß
     """

From de5138632cc1378d051a95111424649a67a5e19a Mon Sep 17 00:00:00 2001
From: Huan Chen <142538604+Genesis929@users.noreply.github.com>
Date: Mon, 4 Dec 2023 17:15:14 -0800
Subject: [PATCH 06/10] =?UTF-8?q?docs:=20add=20examples=20for=20dataframe.?=
 =?UTF-8?q?nunique,=20dataframe.diff,=20dataframe.a=E2=80=A6=20(#251)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* docs: add examples for dataframe.nunique, dataframe.diff, dataframe.agg, dataframe.describe

* update spacing

* update ordering
---
 .../bigframes_vendored/pandas/core/frame.py   | 114 +++++++++++++++++-
 1 file changed, 112 insertions(+), 2 deletions(-)

diff --git a/third_party/bigframes_vendored/pandas/core/frame.py b/third_party/bigframes_vendored/pandas/core/frame.py
index b95fc24c15..bd2f41abd1 100644
--- a/third_party/bigframes_vendored/pandas/core/frame.py
+++ b/third_party/bigframes_vendored/pandas/core/frame.py
@@ -3490,7 +3490,26 @@ def melt(self, id_vars, value_vars, var_name, value_name):
 
     def nunique(self):
         """
-        Count number of distinct elements in specified axis.
+        Count number of distinct elements in each column.
+
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+
+            >>> df = bpd.DataFrame({"A": [3, 1, 2], "B": [1, 2, 2]})
+            >>> df
+                A	B
+            0	3	1
+            1	1	2
+            2	2	2
+            <BLANKLINE>
+            [3 rows x 2 columns]
+
+            >>> df.nunique()
+            A    3.0
+            B    2.0
+            dtype: Float64
 
         Returns:
             bigframes.series.Series: Series with number of distinct elements.
@@ -3634,6 +3653,40 @@ def diff(
         Calculates the difference of a DataFrame element compared with another
         element in the DataFrame (default is element in previous row).
 
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+
+            >>> df = bpd.DataFrame({"A": [3, 1, 2], "B": [1, 2, 3]})
+            >>> df
+                A	B
+            0	3	1
+            1	1	2
+            2	2	3
+            <BLANKLINE>
+            [3 rows x 2 columns]
+
+        Calculating difference with default periods=1:
+
+            >>> df.diff()
+                   A	   B
+            0	<NA>	<NA>
+            1	  -2	   1
+            2	   1	   1
+            <BLANKLINE>
+            [3 rows x 2 columns]
+
+        Calculating difference with periods=-1:
+
+            >>> df.diff(periods=-1)
+                   A	   B
+            0	   2	  -1
+            1	  -1	  -1
+            2	<NA>	<NA>
+            <BLANKLINE>
+            [3 rows x 2 columns]
+
         Args:
             periods (int, default 1):
                 Periods to shift for calculating difference, accepts negative
@@ -3646,7 +3699,37 @@ def diff(
 
     def agg(self, func):
         """
-        Aggregate using one or more operations over the specified axis.
+        Aggregate using one or more operations over columns.
+
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+
+            >>> df = bpd.DataFrame({"A": [3, 1, 2], "B": [1, 2, 3]})
+            >>> df
+                A	B
+            0	3	1
+            1	1	2
+            2	2	3
+            <BLANKLINE>
+            [3 rows x 2 columns]
+
+        Using a single function:
+
+            >>> df.agg('sum')
+            A    6.0
+            B    6.0
+            dtype: Float64
+
+        Using a list of functions:
+
+            >>> df.agg(['sum', 'mean'])
+                      A	  B
+            sum	    6.0	6.0
+            mean	2.0	2.0
+            <BLANKLINE>
+            [2 rows x 2 columns]
 
         Args:
             func (function):
@@ -3679,6 +3762,33 @@ def describe(self):
             upper percentile is ``75``. The ``50`` percentile is the
             same as the median.
 
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+
+            >>> df = bpd.DataFrame({"A": [3, 1, 2], "B": [0, 2, 8]})
+            >>> df
+                A	B
+            0	3	0
+            1	1	2
+            2	2	8
+            <BLANKLINE>
+            [3 rows x 2 columns]
+
+            >>> df.describe()
+                          A	          B
+            count       3.0	        3.0
+            mean        2.0	   3.333333
+            std	        1.0	   4.163332
+            min	        1.0	        0.0
+            25%	        1.0	        0.0
+            50%	        2.0	        2.0
+            75%	        3.0	        8.0
+            max	        3.0	        8.0
+            <BLANKLINE>
+            [8 rows x 2 columns]
+
         Returns:
             bigframes.dataframe.DataFrame: Summary statistics of the Series or Dataframe provided.
         """

From 2e1091086cf75e83a1753550c45d91799cd848f6 Mon Sep 17 00:00:00 2001
From: Shobhit Singh <shobs@google.com>
Date: Tue, 5 Dec 2023 02:14:14 +0000
Subject: [PATCH 07/10] docs: Fix return annotation in API docstrings (#253)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly:
- [ ] Make sure to open an issue as a [bug/issue](https://togithub.com/googleapis/python-bigquery-dataframes/issues/new/choose) before writing your code!  That way we can discuss the change, evaluate designs, and agree on the general idea
- [ ] Ensure the tests and linter pass
- [ ] Code coverage does not decrease (if any source code was changed)
- [ ] Appropriate docs were updated (if necessary)

Fixes internal issue 314367409 🦕
---
 .../bigframes_vendored/pandas/core/frame.py   | 30 +++++++++----------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/third_party/bigframes_vendored/pandas/core/frame.py b/third_party/bigframes_vendored/pandas/core/frame.py
index bd2f41abd1..3b622221b2 100644
--- a/third_party/bigframes_vendored/pandas/core/frame.py
+++ b/third_party/bigframes_vendored/pandas/core/frame.py
@@ -226,7 +226,7 @@ def from_dict(
                 if used with ``orient='columns'`` or ``orient='tight'``.
 
         Returns:
-            DataFrame
+            DataFrame: DataFrame.
         """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 
@@ -267,7 +267,7 @@ def from_records(
                 Number of rows to read if data is an iterator.
 
         Returns:
-            DataFrame
+            DataFrame: DataFrame.
         """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 
@@ -717,7 +717,7 @@ def to_markdown(
                 These parameters will be passed to `tabulate <https://pypi.org/project/tabulate>`_.
 
         Returns:
-            DataFrame in Markdown-friendly format.
+            DataFrame: DataFrame in Markdown-friendly format.
         """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 
@@ -1272,7 +1272,7 @@ def sort_values(
              if `first`; `last` puts NaNs at the end.
 
         Returns:
-            DataFrame with sorted values.
+            DataFrame: DataFrame with sorted values.
         """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 
@@ -1282,7 +1282,7 @@ def sort_index(
         """Sort object by labels (along an axis).
 
         Returns:
-            The original DataFrame sorted by the labels.
+            DataFrame: The original DataFrame sorted by the labels.
         """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 
@@ -1330,7 +1330,7 @@ def eq(self, other, axis: str | int = "columns") -> DataFrame:
                 (1 or 'columns').
 
         Returns:
-            Result of the comparison.
+            DataFrame: Result of the comparison.
         """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 
@@ -1861,7 +1861,7 @@ def rtruediv(self, other, axis: str | int = "columns") -> DataFrame:
                 (1 or 'columns'). For Series input, axis to match Series index on.
 
         Returns:
-            DataFrame result of the arithmetic operation.
+            DataFrame: DataFrame result of the arithmetic operation.
         """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 
@@ -2796,7 +2796,7 @@ def any(self, *, axis=0, bool_only: bool = False):
                 Include only boolean columns.
 
         Returns:
-            Series
+            bigframes.series.Series: Series indicating if any element is True per column.
         """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 
@@ -2843,7 +2843,7 @@ def all(self, axis=0, *, bool_only: bool = False):
                 Include only boolean columns.
 
         Returns:
-            bigframes.series.Series: Series if all elements are True.
+            bigframes.series.Series: Series indicating if all elements are True per column.
         """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 
@@ -3184,7 +3184,7 @@ def skew(self, *, numeric_only: bool = False):
                 Include only float, int, boolean columns.
 
         Returns:
-            Series
+            Series: Series.
         """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 
@@ -3225,7 +3225,7 @@ def kurt(self, *, numeric_only: bool = False):
                 Include only float, int, boolean columns.
 
         Returns:
-            Series
+            Series: Series.
         """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 
@@ -3872,7 +3872,7 @@ def pivot(self, *, columns, index=None, values=None):
                 have hierarchically indexed columns.
 
         Returns:
-            Returns reshaped DataFrame.
+            DataFrame: Returns reshaped DataFrame.
         """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 
@@ -3958,7 +3958,7 @@ def unstack(self, level=-1):
                 Level(s) of index to unstack, can pass level name.
 
         Returns:
-            DataFrame or Series: Unstacked dataframe or series.
+            DataFrame or Series: DataFrame or Series.
         """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 
@@ -4016,7 +4016,7 @@ def index(self):
                 dtype=object)
 
         Returns:
-            The index labels of the DataFrame.
+            Index: The index object of the DataFrame.
         """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 
@@ -4265,7 +4265,7 @@ def dot(self, other):
                 The other object to compute the matrix product with.
 
         Returns:
-            Series or DataFrame
+            Series or DataFrame:
                 If `other` is a Series, return the matrix product between self and
                 other as a Series. If other is a DataFrame, return
                 the matrix product of self and other in a DataFrame.

From 00d30bf4acb62a6c88ebee9eb77b5285af0f7b8d Mon Sep 17 00:00:00 2001
From: TrevorBergeron <tbergeron@google.com>
Date: Tue, 5 Dec 2023 11:34:15 -0800
Subject: [PATCH 08/10] feat: add nunique method to Series/DataFrameGroupby
 (#256)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly:
- [ ] Make sure to open an issue as a [bug/issue](https://togithub.com/googleapis/python-bigquery-dataframes/issues/new/choose) before writing your code!  That way we can discuss the change, evaluate designs, and agree on the general idea
- [ ] Ensure the tests and linter pass
- [ ] Code coverage does not decrease (if any source code was changed)
- [ ] Appropriate docs were updated (if necessary)

Fixes #<issue_number_goes_here> 🦕
---
 bigframes/core/groupby/__init__.py             |  6 ++++++
 tests/system/small/test_groupby.py             |  2 ++
 .../pandas/core/groupby/__init__.py            | 18 ++++++++++++++++++
 3 files changed, 26 insertions(+)

diff --git a/bigframes/core/groupby/__init__.py b/bigframes/core/groupby/__init__.py
index 18cb83fa18..a8b8afdae7 100644
--- a/bigframes/core/groupby/__init__.py
+++ b/bigframes/core/groupby/__init__.py
@@ -179,6 +179,9 @@ def any(self) -> df.DataFrame:
     def count(self) -> df.DataFrame:
         return self._aggregate_all(agg_ops.count_op)
 
+    def nunique(self) -> df.DataFrame:
+        return self._aggregate_all(agg_ops.nunique_op)
+
     def cumsum(self, *args, numeric_only: bool = False, **kwargs) -> df.DataFrame:
         if not numeric_only:
             self._raise_on_non_numeric("cumsum")
@@ -442,6 +445,9 @@ def max(self, *args) -> series.Series:
     def count(self) -> series.Series:
         return self._aggregate(agg_ops.count_op)
 
+    def nunique(self) -> series.Series:
+        return self._aggregate(agg_ops.nunique_op)
+
     def sum(self, *args) -> series.Series:
         return self._aggregate(agg_ops.sum_op)
 
diff --git a/tests/system/small/test_groupby.py b/tests/system/small/test_groupby.py
index a24713c2b3..5214905186 100644
--- a/tests/system/small/test_groupby.py
+++ b/tests/system/small/test_groupby.py
@@ -69,11 +69,13 @@ def test_dataframe_groupby_median(scalars_df_index, scalars_pandas_df_index):
     ("operator"),
     [
         (lambda x: x.count()),
+        (lambda x: x.nunique()),
         (lambda x: x.any()),
         (lambda x: x.all()),
     ],
     ids=[
         "count",
+        "nunique",
         "any",
         "all",
     ],
diff --git a/third_party/bigframes_vendored/pandas/core/groupby/__init__.py b/third_party/bigframes_vendored/pandas/core/groupby/__init__.py
index b05319b4f7..8730cf0007 100644
--- a/third_party/bigframes_vendored/pandas/core/groupby/__init__.py
+++ b/third_party/bigframes_vendored/pandas/core/groupby/__init__.py
@@ -363,6 +363,15 @@ def agg(self, func):
         """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 
+    def nunique(self):
+        """
+        Return number of unique elements in the group.
+
+        Returns:
+            Series: Number of unique values within each group.
+        """
+        raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
+
 
 class DataFrameGroupBy(GroupBy):
     def agg(self, func, **kwargs):
@@ -391,3 +400,12 @@ def agg(self, func, **kwargs):
             DataFrame
         """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
+
+    def nunique(self):
+        """
+        Return DataFrame with counts of unique elements in each position.
+
+        Returns:
+            DataFrame
+        """
+        raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)

From 1793427d070d3a301d5a2978bc68ecd904ee5555 Mon Sep 17 00:00:00 2001
From: Huan Chen <huanc@google.com>
Date: Fri, 1 Dec 2023 21:40:31 +0000
Subject: [PATCH 09/10] docs: add example for dataframe.melt, dataframe.pivot,
 dataframe.stack, dataframe.unstack

---
 third_party/bigframes_vendored/pandas/core/frame.py | 11 ++---------
 1 file changed, 2 insertions(+), 9 deletions(-)

diff --git a/third_party/bigframes_vendored/pandas/core/frame.py b/third_party/bigframes_vendored/pandas/core/frame.py
index 3b622221b2..cc3c3546bd 100644
--- a/third_party/bigframes_vendored/pandas/core/frame.py
+++ b/third_party/bigframes_vendored/pandas/core/frame.py
@@ -3464,13 +3464,14 @@ def melt(self, id_vars, value_vars, var_name, value_name):
             3	 4.0	       B	    4
             4	 5.0	       B	    5
             5	 1.0	       C	 <NA>
-            6	<NA>	       C	    3
+            6	 <NA>	       C	    3
             7	 3.0	       C	 <NA>
             8	 4.0	       C	    4
             9	 5.0	       C	    5
             <BLANKLINE>
             [10 rows x 3 columns]
 
+
         Args:
             id_vars (tuple, list, or ndarray, optional):
                 Column(s) to use as identifier variables.
@@ -3914,10 +3915,6 @@ def stack(self, level=-1):
                  B    4
             dtype: Int64
 
-        Args:
-            level (int, str, or list of these, default -1 (last level)):
-                Level(s) to stack from the column axis onto the index axis.
-
         Returns:
             DataFrame or Series: Stacked dataframe or series.
         """
@@ -3953,10 +3950,6 @@ def unstack(self, level=-1):
                 bar    4
             dtype: Int64
 
-        Args:
-            level (int, str, or list of these, default -1 (last level)):
-                Level(s) of index to unstack, can pass level name.
-
         Returns:
             DataFrame or Series: DataFrame or Series.
         """

From 39abefe171a6771872d88b208c361313788d7479 Mon Sep 17 00:00:00 2001
From: Huan Chen <huanc@google.com>
Date: Fri, 1 Dec 2023 22:05:58 +0000
Subject: [PATCH 10/10] docstring fix

---
 third_party/bigframes_vendored/pandas/core/frame.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/third_party/bigframes_vendored/pandas/core/frame.py b/third_party/bigframes_vendored/pandas/core/frame.py
index cc3c3546bd..5b00385eb8 100644
--- a/third_party/bigframes_vendored/pandas/core/frame.py
+++ b/third_party/bigframes_vendored/pandas/core/frame.py
@@ -3915,6 +3915,10 @@ def stack(self, level=-1):
                  B    4
             dtype: Int64
 
+        Args:
+            level (int, str, or list of these, default -1 (last level)):
+                Level(s) to stack from the column axis onto the index axis.
+
         Returns:
             DataFrame or Series: Stacked dataframe or series.
         """
@@ -3950,6 +3954,10 @@ def unstack(self, level=-1):
                 bar    4
             dtype: Int64
 
+        Args:
+            level (int, str, or list of these, default -1 (last level)):
+                Level(s) of index to unstack, can pass level name.
+
         Returns:
             DataFrame or Series: DataFrame or Series.
         """