From a8576f9f24b7a7a7c9427858682dc69bdd7bd7e2 Mon Sep 17 00:00:00 2001
From: Ashley Xu <ashleyxu@google.com>
Date: Tue, 31 Oct 2023 17:15:38 +0000
Subject: [PATCH 1/8] test: add code snippets for using bigframes.ml

---
 .../snippets/clean_and_prep_ml_data_test.py   | 42 +++++++++++++++
 samples/snippets/clustering_model_test.py     | 34 ++++++++++++
 samples/snippets/gen_ai_model_test.py         | 39 ++++++++++++++
 samples/snippets/regression_model_test.py     | 53 +++++++++++++++++++
 4 files changed, 168 insertions(+)
 create mode 100644 samples/snippets/clean_and_prep_ml_data_test.py
 create mode 100644 samples/snippets/clustering_model_test.py
 create mode 100644 samples/snippets/gen_ai_model_test.py
 create mode 100644 samples/snippets/regression_model_test.py

diff --git a/samples/snippets/clean_and_prep_ml_data_test.py b/samples/snippets/clean_and_prep_ml_data_test.py
new file mode 100644
index 0000000000..8aefb437c8
--- /dev/null
+++ b/samples/snippets/clean_and_prep_ml_data_test.py
@@ -0,0 +1,42 @@
+# Copyright 2023 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+def test_clean_and_prep_ml_data():
+    import bigframes.pandas as bpd
+
+    query_or_table = "bigquery-public-data.ml_datasets.penguins"
+    bq_df = bpd.read_gbq(query_or_table)
+
+    # [START bigquery_dataframes_clean_and_prep_data]
+    # Filter down to the data to the Adelie Penguin species
+    adelie_data = bq_df[bq_df.species == "Adelie Penguin (Pygoscelis adeliae)"]
+
+    # Drop the species column
+    adelie_data = adelie_data.drop(columns=["species"])
+
+    # Drop rows with nulls to get training data
+    training_data = adelie_data.dropna()
+
+    # Specify your feature (or input) columns and the label (or output) column:
+    feature_columns = training_data[
+        ["island", "culmen_length_mm", "culmen_depth_mm", "flipper_length_mm", "sex"]
+    ]
+    label_columns = training_data[["body_mass_g"]]
+
+    test_data = adelie_data[adelie_data.body_mass_g.isnull()]
+    # [END bigquery_dataframes_clean_and_prep_data]
+    assert test_data is not None
+    assert feature_columns is not None
+    assert label_columns is not None
diff --git a/samples/snippets/clustering_model_test.py b/samples/snippets/clustering_model_test.py
new file mode 100644
index 0000000000..6d127c9005
--- /dev/null
+++ b/samples/snippets/clustering_model_test.py
@@ -0,0 +1,34 @@
+# Copyright 2023 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+def test_clustering_model():
+    from bigframes.ml.cluster import KMeans
+    import bigframes.pandas as bpd
+
+    # Load data from BigQuery
+    query_or_table = "bigquery-public-data.ml_datasets.penguins"
+    bq_df = bpd.read_gbq(query_or_table)
+    # [START bigquery_dataframes_clustering_model]
+    # Create the KMeans model
+    cluster_model = KMeans(n_clusters=10)
+    cluster_model.fit(bq_df["culmen_length_mm"], bq_df["sex"])
+
+    # Predict using the model
+    result = cluster_model.predict(bq_df)
+    # Score the model
+    score = cluster_model.score(bq_df)
+    # [END bigquery_dataframes_clustering_model]
+    assert result is not None
+    assert score is not None
diff --git a/samples/snippets/gen_ai_model_test.py b/samples/snippets/gen_ai_model_test.py
new file mode 100644
index 0000000000..7cbc90d4c0
--- /dev/null
+++ b/samples/snippets/gen_ai_model_test.py
@@ -0,0 +1,39 @@
+# Copyright 2023 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+def test_llm_model():
+    PROJECT_ID = "bigframes-dev"
+    REGION = "us"
+    CONN_NAME = "bigframes-ml"
+    # [START bigquery_dataframes_gen_ai_model]
+    from bigframes.ml.llm import PaLM2TextGenerator
+    import bigframes.pandas as bpd
+
+    # Create the LLM model
+    session = bpd.get_global_session()
+    connection = f"{PROJECT_ID}.{REGION}.{CONN_NAME}"
+    model = PaLM2TextGenerator(session=session, connection_name=connection)
+
+    df_api = bpd.read_csv("gs://cloud-samples-data/vertex-ai/bigframe/df.csv")
+
+    # Prepare the prompts and send them to the LLM model for prediction
+    df_prompt_prefix = "Generate Pandas sample code for DataFrame."
+    df_prompt = df_prompt_prefix + df_api["API"]
+
+    # Predict using the model
+    df_pred = model.predict(df_prompt.to_frame(), max_output_tokens=1024)
+    # [END bigquery_dataframes_gen_ai_model]
+    assert df_pred["ml_generate_text_llm_result"] is not None
+    assert df_pred["ml_generate_text_llm_result"].iloc[0] is not None
diff --git a/samples/snippets/regression_model_test.py b/samples/snippets/regression_model_test.py
new file mode 100644
index 0000000000..fe65680b44
--- /dev/null
+++ b/samples/snippets/regression_model_test.py
@@ -0,0 +1,53 @@
+# Copyright 2023 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+def test_regression_model():
+    import bigframes.pandas as bpd
+
+    query_or_table = "bigquery-public-data.ml_datasets.penguins"
+    bq_df = bpd.read_gbq(query_or_table)
+
+    # Filter down to the data to the Adelie Penguin species
+    adelie_data = bq_df[bq_df.species == "Adelie Penguin (Pygoscelis adeliae)"]
+
+    # Drop the species column
+    adelie_data = adelie_data.drop(columns=["species"])
+
+    # Drop rows with nulls to get training data
+    training_data = adelie_data.dropna()
+
+    # Specify your feature (or input) columns and the label (or output) column:
+    feature_columns = training_data[
+        ["island", "culmen_length_mm", "culmen_depth_mm", "flipper_length_mm", "sex"]
+    ]
+    label_columns = training_data[["body_mass_g"]]
+
+    test_data = adelie_data[adelie_data.body_mass_g.isnull()]
+    # [START bigquery_dataframes_regression_model]
+    from bigframes.ml.linear_model import LinearRegression
+
+    model = LinearRegression()
+
+    model.fit(feature_columns, label_columns)
+
+    # Score the model
+    score = model.score(feature_columns, label_columns)
+
+    # Predict using the model
+    result = model.predict(test_data)
+    # [END bigquery_dataframes_regression_model]
+    assert model is not None
+    assert score is not None
+    assert result is not None

From aa442ce3c82ab2e577e93f3fc20873592143799f Mon Sep 17 00:00:00 2001
From: Ashley Xu <139821907+ashleyxuu@users.noreply.github.com>
Date: Tue, 31 Oct 2023 10:54:17 -0700
Subject: [PATCH 2/8] test: add code snippets for loading data from BigQuery
 Job (#154)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* test: add code snippets for loading data from BigQuery Job

* fix: address the comments

* fix: fix the broken test

* use BigQuery Client library to get the job_id

* feat: Implement operator `@` for `DataFrame.dot` (#139)

Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly:
- [ ] Make sure to open an issue as a [bug/issue](https://togithub.com/googleapis/python-bigquery-dataframes/issues/new/choose) before writing your code!  That way we can discuss the change, evaluate designs, and agree on the general idea
- [ ] Ensure the tests and linter pass
- [ ] Code coverage does not decrease (if any source code was changed)
- [ ] Appropriate docs were updated (if necessary)

Fixes b/297502513 🦕

* fix: fix the comments

---------

Co-authored-by: Shobhit Singh <shobs@google.com>
---
 .../load_data_from_biquery_job_test.py        | 51 +++++++++++++++++++
 1 file changed, 51 insertions(+)
 create mode 100644 samples/snippets/load_data_from_biquery_job_test.py

diff --git a/samples/snippets/load_data_from_biquery_job_test.py b/samples/snippets/load_data_from_biquery_job_test.py
new file mode 100644
index 0000000000..5271574a49
--- /dev/null
+++ b/samples/snippets/load_data_from_biquery_job_test.py
@@ -0,0 +1,51 @@
+# Copyright 2023 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+def test_bigquery_dataframes_load_data_from_bigquery_job():
+    from google.cloud import bigquery
+
+    # Construct a BigQuery client object.
+    client = bigquery.Client(project="bigframes-dev", location="us")
+
+    query = """
+        SELECT *
+        FROM `bigquery-public-data.ml_datasets.penguins`
+        LIMIT 20
+    """
+    query_job = client.query(query)
+    JOB_ID = query_job.job_id
+    your_project_id = "bigframes-dev"
+
+    # [START bigquery_dataframes_load_data_from_bigquery_job]
+    from google.cloud import bigquery
+
+    import bigframes.pandas as bpd
+
+    # Project ID inserted based on the query results selected to explore
+    project = your_project_id
+    # Location inserted based on the query results selected to explore
+    location = "us"
+    client = bigquery.Client(project=project, location=location)
+
+    # Job ID inserted based on the query results selcted to explore
+    job_id = JOB_ID
+    job = client.get_job(job_id)
+    destination = str(job.destination)
+
+    # Load data from a BigQuery table using BigFrames DataFrames:
+    bq_df = bpd.read_gbq_table(destination)
+
+    # [END bigquery_dataframes_load_data_from_bigquery_job]
+    assert bq_df is not None

From 14bf68d0338612fd3a99706c4ca1d635937b1188 Mon Sep 17 00:00:00 2001
From: Bradford Orr <15842009+orrbradford@users.noreply.github.com>
Date: Tue, 31 Oct 2023 12:46:47 -0700
Subject: [PATCH 3/8] feat: add bigframes.options.compute.maximum_bytes_billed
 option that sets maximum bytes billed on query jobs (#133)

-implement context manager for global options
-maximum_bytes_billed only applies to query jobs. This limitation will be set per query. Operations that trigger multiple jobs may result in total usage beyond this setting
---
 bigframes/__init__.py                         |  3 +-
 bigframes/_config/__init__.py                 | 11 +++++
 bigframes/_config/compute_options.py          | 35 +++++++++++++++
 bigframes/_config/display_options.py          | 23 ++++------
 bigframes/pandas/__init__.py                  |  4 ++
 bigframes/session/__init__.py                 | 19 +++++---
 docs/reference/bigframes/options.rst          |  2 +
 docs/templates/toc.yml                        |  2 +
 tests/system/conftest.py                      |  7 ---
 tests/system/small/test_progress_bar.py       | 17 ++++---
 tests/unit/test_compute_options.py            | 30 +++++++++++++
 .../pandas/_config/config.py                  | 45 +++++++++++++++++++
 12 files changed, 162 insertions(+), 36 deletions(-)
 create mode 100644 bigframes/_config/compute_options.py
 create mode 100644 tests/unit/test_compute_options.py
 create mode 100644 third_party/bigframes_vendored/pandas/_config/config.py

diff --git a/bigframes/__init__.py b/bigframes/__init__.py
index 8f41790072..bd1476957b 100644
--- a/bigframes/__init__.py
+++ b/bigframes/__init__.py
@@ -14,7 +14,7 @@
 
 """BigQuery DataFrames provides a DataFrame API scaled by the BigQuery engine."""
 
-from bigframes._config import options
+from bigframes._config import option_context, options
 from bigframes._config.bigquery_options import BigQueryOptions
 from bigframes.core.global_session import close_session, get_global_session
 from bigframes.session import connect, Session
@@ -28,4 +28,5 @@
     "connect",
     "Session",
     "__version__",
+    "option_context",
 ]
diff --git a/bigframes/_config/__init__.py b/bigframes/_config/__init__.py
index e26eaf8800..8dcebfce6a 100644
--- a/bigframes/_config/__init__.py
+++ b/bigframes/_config/__init__.py
@@ -18,8 +18,10 @@
 """
 
 import bigframes._config.bigquery_options as bigquery_options
+import bigframes._config.compute_options as compute_options
 import bigframes._config.display_options as display_options
 import bigframes._config.sampling_options as sampling_options
+import third_party.bigframes_vendored.pandas._config.config as pandas_config
 
 
 class Options:
@@ -29,6 +31,7 @@ def __init__(self):
         self._bigquery_options = bigquery_options.BigQueryOptions()
         self._display_options = display_options.DisplayOptions()
         self._sampling_options = sampling_options.SamplingOptions()
+        self._compute_options = compute_options.ComputeOptions()
 
     @property
     def bigquery(self) -> bigquery_options.BigQueryOptions:
@@ -49,6 +52,11 @@ def sampling(self) -> sampling_options.SamplingOptions:
         parameters in specific functions."""
         return self._sampling_options
 
+    @property
+    def compute(self) -> compute_options.ComputeOptions:
+        """Options controlling object computation."""
+        return self._compute_options
+
 
 options = Options()
 """Global options for default session."""
@@ -58,3 +66,6 @@ def sampling(self) -> sampling_options.SamplingOptions:
     "Options",
     "options",
 )
+
+
+option_context = pandas_config.option_context
diff --git a/bigframes/_config/compute_options.py b/bigframes/_config/compute_options.py
new file mode 100644
index 0000000000..20c31d3906
--- /dev/null
+++ b/bigframes/_config/compute_options.py
@@ -0,0 +1,35 @@
+# Copyright 2023 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Options for displaying objects."""
+
+import dataclasses
+from typing import Optional
+
+
+@dataclasses.dataclass
+class ComputeOptions:
+    """
+    Encapsulates configuration for compute options.
+
+    Attributes:
+        maximum_bytes_billed (int, Options):
+            Limits the bytes billed for query jobs. Queries that will have
+            bytes billed beyond this limit will fail (without incurring a
+            charge). If unspecified, this will be set to your project default.
+            See `maximum_bytes_billed <https://cloud.google.com/python/docs/reference/bigquery/latest/google.cloud.bigquery.job.QueryJobConfig#google_cloud_bigquery_job_QueryJobConfig_maximum_bytes_billed>`_.
+
+    """
+
+    maximum_bytes_billed: Optional[int] = None
diff --git a/bigframes/_config/display_options.py b/bigframes/_config/display_options.py
index 8bd2743f17..ad3ea3f68c 100644
--- a/bigframes/_config/display_options.py
+++ b/bigframes/_config/display_options.py
@@ -40,17 +40,12 @@ def pandas_repr(display_options: DisplayOptions):
     This context manager makes sure we reset the pandas options when we're done
     so that we don't override pandas behavior.
     """
-    original_max_cols = pd.options.display.max_columns
-    original_max_rows = pd.options.display.max_rows
-    original_show_dimensions = pd.options.display.show_dimensions
-
-    pd.options.display.max_columns = display_options.max_columns
-    pd.options.display.max_rows = display_options.max_rows
-    pd.options.display.show_dimensions = True  # type: ignore
-
-    try:
-        yield
-    finally:
-        pd.options.display.max_columns = original_max_cols
-        pd.options.display.max_rows = original_max_rows
-        pd.options.display.show_dimensions = original_show_dimensions
+    with pd.option_context(
+        "display.max_columns",
+        display_options.max_columns,
+        "display.max_rows",
+        display_options.max_rows,
+        "display.show_dimensions",
+        True,
+    ) as pandas_context:
+        yield (pandas_context)
diff --git a/bigframes/pandas/__init__.py b/bigframes/pandas/__init__.py
index 8d9726312f..0fab1109dc 100644
--- a/bigframes/pandas/__init__.py
+++ b/bigframes/pandas/__init__.py
@@ -462,6 +462,9 @@ def read_gbq_function(function_name: str):
 options = config.options
 """Global :class:`~bigframes._config.Options` to configure BigQuery DataFrames."""
 
+option_context = config.option_context
+"""Global :class:`~bigframes._config.option_context` to configure BigQuery DataFrames."""
+
 # Session management APIs
 get_global_session = global_session.get_global_session
 close_session = global_session.close_session
@@ -494,6 +497,7 @@ def read_gbq_function(function_name: str):
     # Other public pandas attributes
     "NamedAgg",
     "options",
+    "option_context",
     # Session management APIs
     "get_global_session",
     "close_session",
diff --git a/bigframes/session/__init__.py b/bigframes/session/__init__.py
index 932a41f283..4858c7726a 100644
--- a/bigframes/session/__init__.py
+++ b/bigframes/session/__init__.py
@@ -1504,12 +1504,10 @@ def _start_query(
         max_results: Optional[int] = None,
     ) -> Tuple[bigquery.table.RowIterator, bigquery.QueryJob]:
         """
-        Starts query job and waits for results
+        Starts query job and waits for results.
         """
-        if job_config is not None:
-            query_job = self.bqclient.query(sql, job_config=job_config)
-        else:
-            query_job = self.bqclient.query(sql)
+        job_config = self._prepare_job_config(job_config)
+        query_job = self.bqclient.query(sql, job_config=job_config)
 
         opts = bigframes.options.display
         if opts.progress_bar is not None and not query_job.configuration.dry_run:
@@ -1538,6 +1536,17 @@ def _start_generic_job(self, job: formatting_helpers.GenericJob):
         else:
             job.result()
 
+    def _prepare_job_config(
+        self, job_config: Optional[bigquery.QueryJobConfig] = None
+    ) -> bigquery.QueryJobConfig:
+        if job_config is None:
+            job_config = self.bqclient.default_query_job_config
+        if bigframes.options.compute.maximum_bytes_billed is not None:
+            job_config.maximum_bytes_billed = (
+                bigframes.options.compute.maximum_bytes_billed
+            )
+        return job_config
+
 
 def connect(context: Optional[bigquery_options.BigQueryOptions] = None) -> Session:
     return Session(context)
diff --git a/docs/reference/bigframes/options.rst b/docs/reference/bigframes/options.rst
index d831a519fe..991399eb88 100644
--- a/docs/reference/bigframes/options.rst
+++ b/docs/reference/bigframes/options.rst
@@ -12,3 +12,5 @@ Options and settings
 .. autoclass:: bigframes._config.display_options.DisplayOptions
 
 .. autoclass:: bigframes._config.sampling_options.SamplingOptions
+
+.. autoclass:: bigframes._config.compute_options.ComputeOptions
diff --git a/docs/templates/toc.yml b/docs/templates/toc.yml
index 4fe2ec1a6a..9879721d28 100644
--- a/docs/templates/toc.yml
+++ b/docs/templates/toc.yml
@@ -13,6 +13,8 @@
         uid: bigframes._config.display_options.DisplayOptions
       - name: SamplingOptions
         uid: bigframes._config.sampling_options.SamplingOptions
+      - name: ComputeOptions
+        uid: bigframes._config.compute_options.ComputeOptions
       name: Options and settings
     - items:
       - name: Session
diff --git a/tests/system/conftest.py b/tests/system/conftest.py
index 8885b03d34..f9f69c6c8e 100644
--- a/tests/system/conftest.py
+++ b/tests/system/conftest.py
@@ -898,13 +898,6 @@ def usa_names_grouped_table(
         return session.bqclient.get_table(table_id)
 
 
-@pytest.fixture()
-def deferred_repr():
-    bigframes.options.display.repr_mode = "deferred"
-    yield
-    bigframes.options.display.repr_mode = "head"
-
-
 @pytest.fixture()
 def restore_sampling_settings():
     enable_downsampling = bigframes.options.sampling.enable_downsampling
diff --git a/tests/system/small/test_progress_bar.py b/tests/system/small/test_progress_bar.py
index 084b723fba..30ea63b483 100644
--- a/tests/system/small/test_progress_bar.py
+++ b/tests/system/small/test_progress_bar.py
@@ -135,12 +135,11 @@ def test_query_job_repr(penguins_df_default_index: bf.dataframe.DataFrame):
         assert string in query_job_repr
 
 
-def test_query_job_dry_run(
-    penguins_df_default_index: bf.dataframe.DataFrame, capsys, deferred_repr
-):
-    repr(penguins_df_default_index)
-    repr(penguins_df_default_index["body_mass_g"])
-    lines = capsys.readouterr().out.split("\n")
-    lines = filter(None, lines)
-    for line in lines:
-        assert "Computation deferred. Computation will process" in line
+def test_query_job_dry_run(penguins_df_default_index: bf.dataframe.DataFrame, capsys):
+    with bf.option_context("display.repr_mode", "deferred"):
+        repr(penguins_df_default_index)
+        repr(penguins_df_default_index["body_mass_g"])
+        lines = capsys.readouterr().out.split("\n")
+        lines = filter(None, lines)
+        for line in lines:
+            assert "Computation deferred. Computation will process" in line
diff --git a/tests/unit/test_compute_options.py b/tests/unit/test_compute_options.py
new file mode 100644
index 0000000000..499a0a5fef
--- /dev/null
+++ b/tests/unit/test_compute_options.py
@@ -0,0 +1,30 @@
+# Copyright 2023 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import bigframes as bf
+
+from . import resources
+
+
+def test_maximum_bytes_option():
+    session = resources.create_bigquery_session()
+    num_query_calls = 0
+    with bf.option_context("compute.maximum_bytes_billed", 10000):
+        # clear initial method calls
+        session.bqclient.method_calls = []
+        session._start_query("query")
+        for call in session.bqclient.method_calls:
+            _, _, kwargs = call
+            num_query_calls += 1
+            assert kwargs["job_config"].maximum_bytes_billed == 10000
+    assert num_query_calls > 0
diff --git a/third_party/bigframes_vendored/pandas/_config/config.py b/third_party/bigframes_vendored/pandas/_config/config.py
new file mode 100644
index 0000000000..8abaca76c7
--- /dev/null
+++ b/third_party/bigframes_vendored/pandas/_config/config.py
@@ -0,0 +1,45 @@
+# Contains code from https://github.com/pandas-dev/pandas/blob/main/pandas/_config/config.py
+import contextlib
+import operator
+
+import bigframes
+
+
+class option_context(contextlib.ContextDecorator):
+    """
+    Context manager to temporarily set options in the `with` statement context.
+
+    You need to invoke as ``option_context(pat, val, [(pat, val), ...])``.
+
+    Examples
+    --------
+    >>> import bigframes
+    >>> with bigframes.option_context('display.max_rows', 10, 'display.max_columns', 5):
+    ...     pass
+    """
+
+    def __init__(self, *args) -> None:
+        if len(args) % 2 != 0 or len(args) < 2:
+            raise ValueError(
+                "Need to invoke as option_context(pat, val, [(pat, val), ...])."
+            )
+
+        self.ops = list(zip(args[::2], args[1::2]))
+
+    def __enter__(self) -> None:
+        self.undo = [
+            (pat, operator.attrgetter(pat)(bigframes.options)) for pat, val in self.ops
+        ]
+
+        for pat, val in self.ops:
+            self._set_option(pat, val)
+
+    def __exit__(self, *args) -> None:
+        if self.undo:
+            for pat, val in self.undo:
+                self._set_option(pat, val)
+
+    def _set_option(self, pat, val):
+        root, attr = pat.rsplit(".", 1)
+        parent = operator.attrgetter(root)(bigframes.options)
+        setattr(parent, attr, val)

From 9a42f039ccb2c207637d46e876bcfb8a5db00046 Mon Sep 17 00:00:00 2001
From: Tim Swast <swast@google.com>
Date: Tue, 31 Oct 2023 18:00:20 -0500
Subject: [PATCH 4/8] docs: fix indentation on `read_gbq_function` code sample
 (#163)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly:
- [ ] Make sure to open an issue as a [bug/issue](https://togithub.com/googleapis/python-bigquery-dataframes/issues/new/choose) before writing your code!  That way we can discuss the change, evaluate designs, and agree on the general idea
- [ ] Ensure the tests and linter pass
- [ ] Code coverage does not decrease (if any source code was changed)
- [ ] Appropriate docs were updated (if necessary)

Fixes #<issue_number_goes_here> 🦕
---
 bigframes/session/__init__.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/bigframes/session/__init__.py b/bigframes/session/__init__.py
index 4858c7726a..5a61ed534f 100644
--- a/bigframes/session/__init__.py
+++ b/bigframes/session/__init__.py
@@ -1467,13 +1467,13 @@ def read_gbq_function(
 
         **Examples:**
 
-        >>> import bigframes.pandas as bpd
-        >>> bpd.options.display.progress_bar = None
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
 
-        >>> function_name = "bqutil.fn.cw_lower_case_ascii_only"
-        >>> func = bpd.read_gbq_function(function_name=function_name)
-        >>> func.bigframes_remote_function
-        'bqutil.fn.cw_lower_case_ascii_only'
+            >>> function_name = "bqutil.fn.cw_lower_case_ascii_only"
+            >>> func = bpd.read_gbq_function(function_name=function_name)
+            >>> func.bigframes_remote_function
+            'bqutil.fn.cw_lower_case_ascii_only'
 
         Args:
             function_name (str):

From fdeb9f293cd56796bb3aa4224a6b99b06dbc61c7 Mon Sep 17 00:00:00 2001
From: Ashley Xu <ashleyxu@google.com>
Date: Tue, 31 Oct 2023 18:36:09 +0000
Subject: [PATCH 5/8] move ml samples to e2e, make the tests standalone

---
 noxfile.py                                    |  2 +-
 .../snippets/clean_and_prep_ml_data_test.py   | 42 -------------------
 .../{ => ml_samples}/clustering_model_test.py |  3 +-
 .../{ => ml_samples}/gen_ai_model_test.py     |  0
 .../{ => ml_samples}/regression_model_test.py | 10 +++--
 5 files changed, 10 insertions(+), 47 deletions(-)
 delete mode 100644 samples/snippets/clean_and_prep_ml_data_test.py
 rename samples/snippets/{ => ml_samples}/clustering_model_test.py (99%)
 rename samples/snippets/{ => ml_samples}/gen_ai_model_test.py (100%)
 rename samples/snippets/{ => ml_samples}/regression_model_test.py (91%)

diff --git a/noxfile.py b/noxfile.py
index d0bbda80fd..602b17291d 100644
--- a/noxfile.py
+++ b/noxfile.py
@@ -377,7 +377,7 @@ def e2e(session: nox.sessions.Session):
     run_system(
         session=session,
         prefix_name="e2e",
-        test_folder=os.path.join("tests", "system", "large"),
+        test_folder=[os.path.join("tests", "system", "large")],
         print_duration=True,
     )
 
diff --git a/samples/snippets/clean_and_prep_ml_data_test.py b/samples/snippets/clean_and_prep_ml_data_test.py
deleted file mode 100644
index 8aefb437c8..0000000000
--- a/samples/snippets/clean_and_prep_ml_data_test.py
+++ /dev/null
@@ -1,42 +0,0 @@
-# Copyright 2023 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-def test_clean_and_prep_ml_data():
-    import bigframes.pandas as bpd
-
-    query_or_table = "bigquery-public-data.ml_datasets.penguins"
-    bq_df = bpd.read_gbq(query_or_table)
-
-    # [START bigquery_dataframes_clean_and_prep_data]
-    # Filter down to the data to the Adelie Penguin species
-    adelie_data = bq_df[bq_df.species == "Adelie Penguin (Pygoscelis adeliae)"]
-
-    # Drop the species column
-    adelie_data = adelie_data.drop(columns=["species"])
-
-    # Drop rows with nulls to get training data
-    training_data = adelie_data.dropna()
-
-    # Specify your feature (or input) columns and the label (or output) column:
-    feature_columns = training_data[
-        ["island", "culmen_length_mm", "culmen_depth_mm", "flipper_length_mm", "sex"]
-    ]
-    label_columns = training_data[["body_mass_g"]]
-
-    test_data = adelie_data[adelie_data.body_mass_g.isnull()]
-    # [END bigquery_dataframes_clean_and_prep_data]
-    assert test_data is not None
-    assert feature_columns is not None
-    assert label_columns is not None
diff --git a/samples/snippets/clustering_model_test.py b/samples/snippets/ml_samples/clustering_model_test.py
similarity index 99%
rename from samples/snippets/clustering_model_test.py
rename to samples/snippets/ml_samples/clustering_model_test.py
index 6d127c9005..a407fc7805 100644
--- a/samples/snippets/clustering_model_test.py
+++ b/samples/snippets/ml_samples/clustering_model_test.py
@@ -14,13 +14,14 @@
 
 
 def test_clustering_model():
+    # [START bigquery_dataframes_clustering_model]
     from bigframes.ml.cluster import KMeans
     import bigframes.pandas as bpd
 
     # Load data from BigQuery
     query_or_table = "bigquery-public-data.ml_datasets.penguins"
     bq_df = bpd.read_gbq(query_or_table)
-    # [START bigquery_dataframes_clustering_model]
+
     # Create the KMeans model
     cluster_model = KMeans(n_clusters=10)
     cluster_model.fit(bq_df["culmen_length_mm"], bq_df["sex"])
diff --git a/samples/snippets/gen_ai_model_test.py b/samples/snippets/ml_samples/gen_ai_model_test.py
similarity index 100%
rename from samples/snippets/gen_ai_model_test.py
rename to samples/snippets/ml_samples/gen_ai_model_test.py
diff --git a/samples/snippets/regression_model_test.py b/samples/snippets/ml_samples/regression_model_test.py
similarity index 91%
rename from samples/snippets/regression_model_test.py
rename to samples/snippets/ml_samples/regression_model_test.py
index fe65680b44..7d1bde689c 100644
--- a/samples/snippets/regression_model_test.py
+++ b/samples/snippets/ml_samples/regression_model_test.py
@@ -14,8 +14,11 @@
 
 
 def test_regression_model():
+    # [START bigquery_dataframes_regression_model]
+    from bigframes.ml.linear_model import LinearRegression
     import bigframes.pandas as bpd
 
+    # Load data from BigQuery
     query_or_table = "bigquery-public-data.ml_datasets.penguins"
     bq_df = bpd.read_gbq(query_or_table)
 
@@ -35,11 +38,9 @@ def test_regression_model():
     label_columns = training_data[["body_mass_g"]]
 
     test_data = adelie_data[adelie_data.body_mass_g.isnull()]
-    # [START bigquery_dataframes_regression_model]
-    from bigframes.ml.linear_model import LinearRegression
 
+    # Create the linear model
     model = LinearRegression()
-
     model.fit(feature_columns, label_columns)
 
     # Score the model
@@ -48,6 +49,9 @@ def test_regression_model():
     # Predict using the model
     result = model.predict(test_data)
     # [END bigquery_dataframes_regression_model]
+    assert test_data is not None
+    assert feature_columns is not None
+    assert label_columns is not None
     assert model is not None
     assert score is not None
     assert result is not None

From 32e03ccb840fb6d7a7e4d69ac9c30250e9440863 Mon Sep 17 00:00:00 2001
From: Henry Solberg <henry.j.solberg@gmail.com>
Date: Wed, 1 Nov 2023 13:41:27 -0700
Subject: [PATCH 6/8] feat: add pd.get_dummies (#149)

* feat: add pd.get_dummies

* remove unneeded prefix case

* param/documentation fixes

* be stricter about types in test

* be stricter about types in series test

* remove unneeded comment

* adjust for type difference in pandas 1

* add example code (tested)

* fix None columns and add test cases

* variable names and _get_unique_values per-column

* account for pandas 1 behavior difference

* remove already_seen set

* avoid unnecessary join/projection

* fix column ordering edge case

* adjust for picky examples checker

* example tweak

* make part of the example comments

* use ellipsis in doctest comment

* add <BLANKLINES> to doctest string

* extract parameter standardization

* extract submethods

---------

Co-authored-by: Henry J Solberg <henryjsolberg@google.com>
---
 bigframes/pandas/__init__.py                  | 177 ++++++++++++++++++
 tests/system/small/test_pandas.py             | 112 +++++++++++
 .../pandas/core/reshape/concat.py             |   2 +-
 .../pandas/core/reshape/encoding.py           | 119 ++++++++++++
 .../pandas/core/reshape/merge.py              |   1 -
 .../pandas/core/reshape/tile.py               |   2 +-
 6 files changed, 410 insertions(+), 3 deletions(-)
 create mode 100644 third_party/bigframes_vendored/pandas/core/reshape/encoding.py

diff --git a/bigframes/pandas/__init__.py b/bigframes/pandas/__init__.py
index 0fab1109dc..1c52b103fb 100644
--- a/bigframes/pandas/__init__.py
+++ b/bigframes/pandas/__init__.py
@@ -45,14 +45,18 @@
 )
 
 import bigframes._config as config
+import bigframes.constants as constants
+import bigframes.core.blocks
 import bigframes.core.global_session as global_session
 import bigframes.core.indexes
 import bigframes.core.reshape
 import bigframes.dataframe
+import bigframes.operations as ops
 import bigframes.series
 import bigframes.session
 import bigframes.session.clients
 import third_party.bigframes_vendored.pandas.core.reshape.concat as vendored_pandas_concat
+import third_party.bigframes_vendored.pandas.core.reshape.encoding as vendored_pandas_encoding
 import third_party.bigframes_vendored.pandas.core.reshape.merge as vendored_pandas_merge
 import third_party.bigframes_vendored.pandas.core.reshape.tile as vendored_pandas_tile
 
@@ -134,6 +138,179 @@ def cut(
 cut.__doc__ = vendored_pandas_tile.cut.__doc__
 
 
+def get_dummies(
+    data: Union[DataFrame, Series],
+    prefix: Union[List, dict, str, None] = None,
+    prefix_sep: Union[List, dict, str, None] = "_",
+    dummy_na: bool = False,
+    columns: Optional[List] = None,
+    drop_first: bool = False,
+    dtype: Any = None,
+) -> DataFrame:
+    # simplify input parameters into per-input-label lists
+    # also raise errors for invalid parameters
+    column_labels, prefixes, prefix_seps = _standardize_get_dummies_params(
+        data, prefix, prefix_sep, columns, dtype
+    )
+
+    # combine prefixes into per-column-id list
+    full_columns_prefixes, columns_ids = _determine_get_dummies_columns_from_labels(
+        data, column_labels, prefix is not None, prefixes, prefix_seps
+    )
+
+    # run queries to compute unique values
+    block = data._block
+    max_unique_value = (
+        bigframes.core.blocks._BQ_MAX_COLUMNS
+        - len(block.value_columns)
+        - len(block.index_columns)
+        - 1
+    ) // len(column_labels)
+    columns_values = [
+        block._get_unique_values([col_id], max_unique_value) for col_id in columns_ids
+    ]
+
+    # for each dummified column, add the content of the output columns via block operations
+    intermediate_col_ids = []
+    for i in range(len(columns_values)):
+        level = columns_values[i].get_level_values(0).sort_values().dropna()
+        if drop_first:
+            level = level[1:]
+        column_label = full_columns_prefixes[i]
+        column_id = columns_ids[i]
+        block, new_intermediate_col_ids = _perform_get_dummies_block_operations(
+            block, level, column_label, column_id, dummy_na
+        )
+        intermediate_col_ids.extend(new_intermediate_col_ids)
+
+    # drop dummified columns (and the intermediate columns we added)
+    block = block.drop_columns(columns_ids + intermediate_col_ids)
+    return DataFrame(block)
+
+
+get_dummies.__doc__ = vendored_pandas_encoding.get_dummies.__doc__
+
+
+def _standardize_get_dummies_params(
+    data: Union[DataFrame, Series],
+    prefix: Union[List, dict, str, None],
+    prefix_sep: Union[List, dict, str, None],
+    columns: Optional[List],
+    dtype: Any,
+) -> Tuple[List, List[str], List[str]]:
+    block = data._block
+
+    if isinstance(data, Series):
+        columns = [block.column_labels[0]]
+    if columns is not None and not pandas.api.types.is_list_like(columns):
+        raise TypeError("Input must be a list-like for parameter `columns`")
+    if dtype is not None and dtype not in [
+        pandas.BooleanDtype,
+        bool,
+        "Boolean",
+        "boolean",
+        "bool",
+    ]:
+        raise NotImplementedError(
+            f"Only Boolean dtype is currently supported. {constants.FEEDBACK_LINK}"
+        )
+
+    if columns is None:
+        default_dummy_types = [pandas.StringDtype, "string[pyarrow]"]
+        columns = []
+        columns_set = set()
+        for col_id in block.value_columns:
+            label = block.col_id_to_label[col_id]
+            if (
+                label not in columns_set
+                and block.expr.get_column_type(col_id) in default_dummy_types
+            ):
+                columns.append(label)
+                columns_set.add(label)
+
+    column_labels: List = typing.cast(List, columns)
+
+    def parse_prefix_kwarg(kwarg, kwarg_name) -> Optional[List[str]]:
+        if kwarg is None:
+            return None
+        if isinstance(kwarg, str):
+            return [kwarg] * len(column_labels)
+        if isinstance(kwarg, dict):
+            return [kwarg[column] for column in column_labels]
+        kwarg = typing.cast(List, kwarg)
+        if pandas.api.types.is_list_like(kwarg) and len(kwarg) != len(column_labels):
+            raise ValueError(
+                f"Length of '{kwarg_name}' ({len(kwarg)}) did not match "
+                f"the length of the columns being encoded ({len(column_labels)})."
+            )
+        if pandas.api.types.is_list_like(kwarg):
+            return list(map(str, kwarg))
+        raise TypeError(f"{kwarg_name} kwarg must be a string, list, or dictionary")
+
+    prefix_seps = parse_prefix_kwarg(prefix_sep or "_", "prefix_sep")
+    prefix_seps = typing.cast(List, prefix_seps)
+    prefixes = parse_prefix_kwarg(prefix, "prefix")
+    if prefixes is None:
+        prefixes = column_labels
+    prefixes = typing.cast(List, prefixes)
+
+    return column_labels, prefixes, prefix_seps
+
+
+def _determine_get_dummies_columns_from_labels(
+    data: Union[DataFrame, Series],
+    column_labels: List,
+    prefix_given: bool,
+    prefixes: List[str],
+    prefix_seps: List[str],
+) -> Tuple[List[str], List[str]]:
+    block = data._block
+
+    columns_ids = []
+    columns_prefixes = []
+    for i in range(len(column_labels)):
+        label = column_labels[i]
+        empty_prefix = label is None or (isinstance(data, Series) and not prefix_given)
+        full_prefix = "" if empty_prefix else prefixes[i] + prefix_seps[i]
+
+        for col_id in block.label_to_col_id[label]:
+            columns_ids.append(col_id)
+            columns_prefixes.append(full_prefix)
+
+    return columns_prefixes, columns_ids
+
+
+def _perform_get_dummies_block_operations(
+    block: bigframes.core.blocks.Block,
+    level: pandas.Index,
+    column_label: str,
+    column_id: str,
+    dummy_na: bool,
+) -> Tuple[bigframes.core.blocks.Block, List[str]]:
+    intermediate_col_ids = []
+    for value in level:
+        new_column_label = f"{column_label}{value}"
+        if column_label == "":
+            new_column_label = value
+        new_block, new_id = block.apply_unary_op(
+            column_id, ops.BinopPartialLeft(ops.eq_op, value)
+        )
+        intermediate_col_ids.append(new_id)
+        block, _ = new_block.apply_unary_op(
+            new_id,
+            ops.BinopPartialRight(ops.fillna_op, False),
+            result_label=new_column_label,
+        )
+    if dummy_na:
+        # dummy column name for na depends on the dtype
+        na_string = str(pandas.Index([None], dtype=level.dtype)[0])
+        new_column_label = f"{column_label}{na_string}"
+        block, _ = block.apply_unary_op(
+            column_id, ops.isnull_op, result_label=new_column_label
+        )
+    return block, intermediate_col_ids
+
+
 def qcut(
     x: bigframes.series.Series,
     q: int,
diff --git a/tests/system/small/test_pandas.py b/tests/system/small/test_pandas.py
index f8fa78587f..0292ebd206 100644
--- a/tests/system/small/test_pandas.py
+++ b/tests/system/small/test_pandas.py
@@ -45,6 +45,118 @@ def test_concat_series(scalars_dfs):
     pd.testing.assert_series_equal(bf_result, pd_result)
 
 
+@pytest.mark.parametrize(
+    ("kwargs"),
+    [
+        {
+            "prefix": ["prefix1", "prefix2"],
+            "prefix_sep": "_",
+            "dummy_na": None,
+            "columns": ["bool_col", "int64_col"],
+            "drop_first": False,
+        },
+        {
+            "prefix": "prefix",
+            "prefix_sep": ["_", ","],
+            "dummy_na": False,
+            "columns": ["int64_too", "string_col"],
+            "drop_first": False,
+        },
+        {
+            "prefix": None,
+            "prefix_sep": ".",
+            "dummy_na": True,
+            "columns": ["time_col", "float64_col"],
+            "drop_first": True,
+        },
+    ],
+)
+def test_get_dummies_dataframe(scalars_dfs, kwargs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+
+    bf_result = bpd.get_dummies(scalars_df, **kwargs, dtype=bool)
+    pd_result = pd.get_dummies(scalars_pandas_df, **kwargs, dtype=bool)
+    # dtype argument above is needed for pandas v1 only
+
+    # adjust for expected dtype differences
+    for (column_name, type_name) in zip(pd_result.columns, pd_result.dtypes):
+        if type_name == "bool":
+            pd_result[column_name] = pd_result[column_name].astype("boolean")
+
+    pd.testing.assert_frame_equal(bf_result.to_pandas(), pd_result)
+
+
+def test_get_dummies_dataframe_duplicate_labels(scalars_dfs):
+    if pd.__version__.startswith("1."):
+        pytest.skip("pandas has different behavior in 1.x")
+
+    scalars_df, scalars_pandas_df = scalars_dfs
+
+    scalars_renamed_df = scalars_df.rename(
+        columns={"int64_too": "int64_col", "float64_col": None, "string_col": None}
+    )
+    scalars_renamed_pandas_df = scalars_pandas_df.rename(
+        columns={"int64_too": "int64_col", "float64_col": None, "string_col": None}
+    )
+
+    bf_result = bpd.get_dummies(
+        scalars_renamed_df, columns=["int64_col", None], dtype=bool
+    )
+    pd_result = pd.get_dummies(
+        scalars_renamed_pandas_df, columns=["int64_col", None], dtype=bool
+    )
+    # dtype argument above is needed for pandas v1 only
+
+    # adjust for expected dtype differences
+    for (column_name, type_name) in zip(pd_result.columns, pd_result.dtypes):
+        if type_name == "bool":
+            pd_result[column_name] = pd_result[column_name].astype("boolean")
+
+    pd.testing.assert_frame_equal(bf_result.to_pandas(), pd_result)
+
+
+def test_get_dummies_series(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    bf_series = scalars_df.date_col
+    pd_series = scalars_pandas_df.date_col
+
+    bf_result = bpd.get_dummies(bf_series, dtype=bool)
+    pd_result = pd.get_dummies(pd_series, dtype=bool)
+    # dtype argument above is needed for pandas v1 only
+
+    # adjust for expected dtype differences
+    for (column_name, type_name) in zip(pd_result.columns, pd_result.dtypes):
+        if type_name == "bool":
+            pd_result[column_name] = pd_result[column_name].astype("boolean")
+    pd_result.columns = pd_result.columns.astype(object)
+
+    pd.testing.assert_frame_equal(
+        bf_result.to_pandas(),
+        pd_result,
+    )
+
+
+def test_get_dummies_series_nameless(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    bf_series = scalars_df.date_col.rename(None)
+    pd_series = scalars_pandas_df.date_col.rename(None)
+
+    bf_result = bpd.get_dummies(bf_series, dtype=bool)
+    pd_result = pd.get_dummies(pd_series, dtype=bool)
+    # dtype argument above is needed for pandas v1 only
+
+    # adjust for expected dtype differences
+    for (column_name, type_name) in zip(pd_result.columns, pd_result.dtypes):
+        if type_name == "bool":
+            pd_result[column_name] = pd_result[column_name].astype("boolean")
+    pd_result.columns = pd_result.columns.astype(object)
+
+    pd.testing.assert_frame_equal(
+        bf_result.to_pandas(),
+        pd_result,
+    )
+
+
 @pytest.mark.parametrize(
     ("how"),
     [
diff --git a/third_party/bigframes_vendored/pandas/core/reshape/concat.py b/third_party/bigframes_vendored/pandas/core/reshape/concat.py
index 6e6d2d8b5c..b0472c524a 100644
--- a/third_party/bigframes_vendored/pandas/core/reshape/concat.py
+++ b/third_party/bigframes_vendored/pandas/core/reshape/concat.py
@@ -1,6 +1,6 @@
 # Contains code from https://github.com/pandas-dev/pandas/blob/main/pandas/core/reshape/concat.py
 """
-Concat routines.
+Concat routines
 """
 from __future__ import annotations
 
diff --git a/third_party/bigframes_vendored/pandas/core/reshape/encoding.py b/third_party/bigframes_vendored/pandas/core/reshape/encoding.py
new file mode 100644
index 0000000000..da92b58f50
--- /dev/null
+++ b/third_party/bigframes_vendored/pandas/core/reshape/encoding.py
@@ -0,0 +1,119 @@
+# Contains code from https://github.com/pandas-dev/pandas/blob/main/pandas/core/reshape/encoding.py
+"""
+Encoding routines
+"""
+from __future__ import annotations
+
+from bigframes import constants
+
+
+def get_dummies(
+    data,
+    prefix=None,
+    prefix_sep="_",
+    dummy_na=False,
+    columns=None,
+    drop_first=False,
+    dtype=None,
+):
+    """
+    Convert categorical variable into dummy/indicator variables.
+
+    Each variable is converted in as many 0/1 variables as there are
+    different values. Columns in the output are each named after a value;
+    if the input is a DataFrame, the name of the original variable is
+    prepended to the value.
+
+    **Examples:**
+        >>> import bigframes.pandas as pd
+        >>> pd.options.display.progress_bar = None
+        >>> s = pd.Series(list('abca'))
+        >>> pd.get_dummies(s)
+               a      b      c
+        0   True  False  False
+        1  False   True  False
+        2  False  False   True
+        3   True  False  False
+        <BLANKLINE>
+        [4 rows x 3 columns]
+
+        >>> s1 = pd.Series(['a', 'b', None])
+        >>> pd.get_dummies(s1)
+               a      b
+        0   True  False
+        1  False   True
+        2  False  False
+        <BLANKLINE>
+        [3 rows x 2 columns]
+
+        >>> pd.get_dummies(s1, dummy_na=True)
+               a      b   <NA>
+        0   True  False  False
+        1  False   True  False
+        2  False  False   True
+        <BLANKLINE>
+        [3 rows x 3 columns]
+
+        >>> df = pd.DataFrame({'A': ['a', 'b', 'a'], 'B': ['b', 'a', 'c'], 'C': [1, 2, 3]})
+        >>> pd.get_dummies(df, prefix=['col1', 'col2'])
+           C  col1_a  col1_b  col2_a  col2_b  col2_c
+        0  1    True   False   False    True   False
+        1  2   False    True    True   False   False
+        2  3    True   False   False   False    True
+        <BLANKLINE>
+        [3 rows x 6 columns]
+
+        >>> pd.get_dummies(pd.Series(list('abcaa')))
+               a      b      c
+        0   True  False  False
+        1  False   True  False
+        2  False  False   True
+        3   True  False  False
+        4   True  False  False
+        <BLANKLINE>
+        [5 rows x 3 columns]
+
+        >>> pd.get_dummies(pd.Series(list('abcaa')), drop_first=True)
+               b      c
+        0  False  False
+        1   True  False
+        2  False   True
+        3  False  False
+        4  False  False
+        <BLANKLINE>
+        [5 rows x 2 columns]
+
+    Args:
+      data (Series or DataFrame):
+        Data of which to get dummy indicators.
+
+      prefix (str, list of str, or dict of str, default None):
+        String to append DataFrame column names. Pass a list with length
+        equal to the number of columns when calling get_dummies on a
+        DataFrame. Alternatively, prefix can be a dictionary mapping column
+        names to prefixes.
+
+      prefix_sep (str, list of str, or dict of str, default '_'):
+        Separator/delimiter to use, appended to prefix. Or pass a list or
+        dictionary as with prefix.
+
+      dummy_na (bool, default False):
+        Add a column to indicate NaNs, if False NaNs are ignored.
+
+      columns (list-like, default None):
+        Column names in the DataFrame to be encoded. If columns is None
+        then only the columns with string dtype will be converted.
+
+      drop_first (bool, default False):
+        Whether to get k-1 dummies out of k categorical levels by removing the
+        first level.
+
+      dtype (dtype, default bool):
+        Data type for new columns. Only a single dtype is allowed.
+
+    Returns:
+      DataFrame: Dummy-coded data. If data contains other columns than the
+      dummy-coded one(s), these will be prepended, unaltered, to the
+      result.
+    """
+    raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
diff --git a/third_party/bigframes_vendored/pandas/core/reshape/merge.py b/third_party/bigframes_vendored/pandas/core/reshape/merge.py
index cc81de405b..b03f366fca 100644
--- a/third_party/bigframes_vendored/pandas/core/reshape/merge.py
+++ b/third_party/bigframes_vendored/pandas/core/reshape/merge.py
@@ -16,7 +16,6 @@ def merge(
     sort=False,
     suffixes=("_x", "_y"),
 ):
-
     """
     Merge DataFrame objects with a database-style join.
 
diff --git a/third_party/bigframes_vendored/pandas/core/reshape/tile.py b/third_party/bigframes_vendored/pandas/core/reshape/tile.py
index 24ea655a5f..d4471ed68e 100644
--- a/third_party/bigframes_vendored/pandas/core/reshape/tile.py
+++ b/third_party/bigframes_vendored/pandas/core/reshape/tile.py
@@ -1,6 +1,6 @@
 # Contains code from https://github.com/pandas-dev/pandas/blob/main/pandas/core/reshape/tile.py
 """
-Quantilization functions and related stuff
+Quantilization functions and related routines
 """
 from __future__ import annotations
 

From a005bd1085adfb9c421fcbdfbf1cbbd269fc0e26 Mon Sep 17 00:00:00 2001
From: Ashley Xu <ashleyxu@google.com>
Date: Wed, 1 Nov 2023 21:14:41 +0000
Subject: [PATCH 7/8] fix: fix the failed test

---
 noxfile.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/noxfile.py b/noxfile.py
index 602b17291d..d0bbda80fd 100644
--- a/noxfile.py
+++ b/noxfile.py
@@ -377,7 +377,7 @@ def e2e(session: nox.sessions.Session):
     run_system(
         session=session,
         prefix_name="e2e",
-        test_folder=[os.path.join("tests", "system", "large")],
+        test_folder=os.path.join("tests", "system", "large"),
         print_duration=True,
     )
 

From 7be45b643df45ae4990cc02c3c0100e2b872fd1e Mon Sep 17 00:00:00 2001
From: Ashley Xu <ashleyxu@google.com>
Date: Wed, 1 Nov 2023 21:56:58 +0000
Subject: [PATCH 8/8] fix: reorganize the directory

---
 samples/snippets/{ml_samples => }/clustering_model_test.py | 0
 samples/snippets/{ml_samples => }/gen_ai_model_test.py     | 0
 samples/snippets/{ml_samples => }/regression_model_test.py | 0
 3 files changed, 0 insertions(+), 0 deletions(-)
 rename samples/snippets/{ml_samples => }/clustering_model_test.py (100%)
 rename samples/snippets/{ml_samples => }/gen_ai_model_test.py (100%)
 rename samples/snippets/{ml_samples => }/regression_model_test.py (100%)

diff --git a/samples/snippets/ml_samples/clustering_model_test.py b/samples/snippets/clustering_model_test.py
similarity index 100%
rename from samples/snippets/ml_samples/clustering_model_test.py
rename to samples/snippets/clustering_model_test.py
diff --git a/samples/snippets/ml_samples/gen_ai_model_test.py b/samples/snippets/gen_ai_model_test.py
similarity index 100%
rename from samples/snippets/ml_samples/gen_ai_model_test.py
rename to samples/snippets/gen_ai_model_test.py
diff --git a/samples/snippets/ml_samples/regression_model_test.py b/samples/snippets/regression_model_test.py
similarity index 100%
rename from samples/snippets/ml_samples/regression_model_test.py
rename to samples/snippets/regression_model_test.py