diff --git a/.kokoro/continuous/notebook.cfg b/.kokoro/continuous/notebook.cfg
index c14297019a..cc73c3bea4 100644
--- a/.kokoro/continuous/notebook.cfg
+++ b/.kokoro/continuous/notebook.cfg
@@ -6,11 +6,6 @@ env_vars: {
     value: "notebook"
 }
 
-env_vars: {
-    key: "BENCHMARK_AND_PUBLISH"
-    value: "true"
-}
-
 env_vars: {
     key: "GOOGLE_CLOUD_PROJECT"
     value: "bigframes-testing"
diff --git a/.kokoro/load/notebook.cfg b/.kokoro/load/notebook.cfg
new file mode 100644
index 0000000000..c14297019a
--- /dev/null
+++ b/.kokoro/load/notebook.cfg
@@ -0,0 +1,17 @@
+# Format: //devtools/kokoro/config/proto/build.proto
+
+# Only run this nox session.
+env_vars: {
+    key: "NOX_SESSION"
+    value: "notebook"
+}
+
+env_vars: {
+    key: "BENCHMARK_AND_PUBLISH"
+    value: "true"
+}
+
+env_vars: {
+    key: "GOOGLE_CLOUD_PROJECT"
+    value: "bigframes-testing"
+}
diff --git a/CHANGELOG.md b/CHANGELOG.md
index c398f17d43..55e295f06a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,6 +4,37 @@
 
 [1]: https://pypi.org/project/bigframes/#history
 
+## [1.22.0](https://github.com/googleapis/python-bigquery-dataframes/compare/v1.21.0...v1.22.0) (2024-10-09)
+
+
+### Features
+
+* Support regional endpoints for more bigquery locations ([#1061](https://github.com/googleapis/python-bigquery-dataframes/issues/1061)) ([45b672a](https://github.com/googleapis/python-bigquery-dataframes/commit/45b672a9a6359ec8c4755d94e63e5ae77a39754b))
+* Update LLM generators to warn user about model name instead of raising error. ([#1048](https://github.com/googleapis/python-bigquery-dataframes/issues/1048)) ([650d80d](https://github.com/googleapis/python-bigquery-dataframes/commit/650d80d1ad90927068cdb71efbfc548b416641a6))
+
+
+### Bug Fixes
+
+* Access MATERIALIZED_VIEW with read_gbq ([#1070](https://github.com/googleapis/python-bigquery-dataframes/issues/1070)) ([601e984](https://github.com/googleapis/python-bigquery-dataframes/commit/601e984aeb3ebf1dcf9cb3f1c34b7f0e4ec7cd16))
+* Correct zero row count in DataFrame from table view ([#1062](https://github.com/googleapis/python-bigquery-dataframes/issues/1062)) ([b536070](https://github.com/googleapis/python-bigquery-dataframes/commit/b53607015abb79be0aa5666681f1c53b5b1bc2b5))
+* Fix generic error message when entering an incorrect column name ([#1031](https://github.com/googleapis/python-bigquery-dataframes/issues/1031)) ([5ac217d](https://github.com/googleapis/python-bigquery-dataframes/commit/5ac217d650bc4f5576ba2b6595a3c0b1d88813ad))
+* Make `explode` respect the index labels ([#1064](https://github.com/googleapis/python-bigquery-dataframes/issues/1064)) ([99ca0df](https://github.com/googleapis/python-bigquery-dataframes/commit/99ca0df90acbbd81197c9b6718b7de7e4dfb86cc))
+* Make invalid location warning case-insensitive ([#1044](https://github.com/googleapis/python-bigquery-dataframes/issues/1044)) ([b6cd55a](https://github.com/googleapis/python-bigquery-dataframes/commit/b6cd55afc49b522904a13a7fd34d40201d176588))
+* Remove palm2 test case from llm load test ([#1063](https://github.com/googleapis/python-bigquery-dataframes/issues/1063)) ([575a10a](https://github.com/googleapis/python-bigquery-dataframes/commit/575a10a7ba0fbac76867f02da1dd65355f00d7aa))
+* Show warning for unknown location set through .ctor ([#1052](https://github.com/googleapis/python-bigquery-dataframes/issues/1052)) ([02c2da7](https://github.com/googleapis/python-bigquery-dataframes/commit/02c2da733b834b99d8044f3c5cac3ac9a85802a6))
+
+
+### Performance Improvements
+
+* Reduce schema tracking overhead ([#1056](https://github.com/googleapis/python-bigquery-dataframes/issues/1056)) ([1c3879d](https://github.com/googleapis/python-bigquery-dataframes/commit/1c3879df2d6925e17e2cdca827db8ec919471f72))
+* Repr generates fewer queries ([#1046](https://github.com/googleapis/python-bigquery-dataframes/issues/1046)) ([d204603](https://github.com/googleapis/python-bigquery-dataframes/commit/d204603fdc024823421397dbe514f1f7ced1bc2c))
+* Speedup internal tree comparisons ([#1060](https://github.com/googleapis/python-bigquery-dataframes/issues/1060)) ([4379438](https://github.com/googleapis/python-bigquery-dataframes/commit/4379438fc4f44ea847fd2c00a82af544265a30d2))
+
+
+### Documentation
+
+* Add docstring return type section to BigQueryOptions class ([#964](https://github.com/googleapis/python-bigquery-dataframes/issues/964)) ([307385f](https://github.com/googleapis/python-bigquery-dataframes/commit/307385f5295ae6918e7d42dcca2c0e0c32e82446))
+
 ## [1.21.0](https://github.com/googleapis/python-bigquery-dataframes/compare/v1.20.0...v1.21.0) (2024-10-02)
 
 
diff --git a/bigframes/_config/__init__.py b/bigframes/_config/__init__.py
index ac58c19fa5..75f91b28d3 100644
--- a/bigframes/_config/__init__.py
+++ b/bigframes/_config/__init__.py
@@ -29,6 +29,7 @@
 import bigframes._config.bigquery_options as bigquery_options
 import bigframes._config.compute_options as compute_options
 import bigframes._config.display_options as display_options
+import bigframes._config.experiment_options as experiment_options
 import bigframes._config.sampling_options as sampling_options
 
 
@@ -46,6 +47,9 @@ class ThreadLocalConfig(threading.local):
     compute_options: compute_options.ComputeOptions = field(
         default_factory=compute_options.ComputeOptions
     )
+    experiment_options: experiment_options.ExperimentOptions = field(
+        default_factory=experiment_options.ExperimentOptions
+    )
 
 
 class Options:
@@ -122,6 +126,16 @@ def compute(self) -> compute_options.ComputeOptions:
         """
         return self._local.compute_options
 
+    @property
+    def experiments(self) -> experiment_options.ExperimentOptions:
+        """Options controlling experiments
+
+        Returns:
+            bigframes._config.experiment_options.ExperimentOptions:
+               Thread-local options for controlling experiments
+        """
+        return self._local.experiment_options
+
     @property
     def is_bigquery_thread_local(self) -> bool:
         """Indicator that we're using a thread-local session.
diff --git a/bigframes/_config/bigquery_options.py b/bigframes/_config/bigquery_options.py
index 502f103bb5..2fdd7d6feb 100644
--- a/bigframes/_config/bigquery_options.py
+++ b/bigframes/_config/bigquery_options.py
@@ -36,26 +36,36 @@
 UNKNOWN_LOCATION_MESSAGE = "The location '{location}' is set to an unknown value. Did you mean '{possibility}'?"
 
 
-def _validate_location(value: Optional[str]):
-
-    if value is None:
-        return
-
-    if value not in bigframes.constants.ALL_BIGQUERY_LOCATIONS:
-        location = str(value)
-        possibility = min(
-            bigframes.constants.ALL_BIGQUERY_LOCATIONS,
-            key=lambda item: jellyfish.levenshtein_distance(location, item),
-        )
-        warnings.warn(
-            UNKNOWN_LOCATION_MESSAGE.format(location=location, possibility=possibility),
-            # There are many layers before we get to (possibly) the user's code:
-            # -> bpd.options.bigquery.location = "us-central-1"
-            # -> location.setter
-            # -> _validate_location
-            stacklevel=3,
-            category=bigframes.exceptions.UnknownLocationWarning,
-        )
+def _get_validated_location(value: Optional[str]) -> Optional[str]:
+
+    if value is None or value in bigframes.constants.ALL_BIGQUERY_LOCATIONS:
+        return value
+
+    location = str(value)
+
+    location_lowercase = location.lower()
+    if location_lowercase in bigframes.constants.BIGQUERY_REGIONS:
+        return location_lowercase
+
+    location_uppercase = location.upper()
+    if location_uppercase in bigframes.constants.BIGQUERY_MULTIREGIONS:
+        return location_uppercase
+
+    possibility = min(
+        bigframes.constants.ALL_BIGQUERY_LOCATIONS,
+        key=lambda item: jellyfish.levenshtein_distance(location, item),
+    )
+    warnings.warn(
+        UNKNOWN_LOCATION_MESSAGE.format(location=location, possibility=possibility),
+        # There are many layers before we get to (possibly) the user's code:
+        # -> bpd.options.bigquery.location = "us-central-1"
+        # -> location.setter
+        # -> _get_validated_location
+        stacklevel=3,
+        category=bigframes.exceptions.UnknownLocationWarning,
+    )
+
+    return value
 
 
 def _validate_ordering_mode(value: str) -> bigframes.enums.OrderingMode:
@@ -84,7 +94,7 @@ def __init__(
     ):
         self._credentials = credentials
         self._project = project
-        self._location = location
+        self._location = _get_validated_location(location)
         self._bq_connection = bq_connection
         self._use_regional_endpoints = use_regional_endpoints
         self._application_name = application_name
@@ -101,6 +111,10 @@ def application_name(self) -> Optional[str]:
         The application name to amend to the user agent sent to Google APIs.
         The recommended format is  ``"application-name/major.minor.patch_version"``
         or ``"(gpn:PartnerName;)"`` for official Google partners.
+
+        Returns:
+            None or str:
+                Application name as a string if exists; otherwise None.
         """
         return self._application_name
 
@@ -114,7 +128,12 @@ def application_name(self, value: Optional[str]):
 
     @property
     def credentials(self) -> Optional[google.auth.credentials.Credentials]:
-        """The OAuth2 credentials to use for this client."""
+        """The OAuth2 credentials to use for this client.
+
+        Returns:
+            None or google.auth.credentials.Credentials:
+                google.auth.credentials.Credentials if exists; otherwise None.
+        """
         return self._credentials
 
     @credentials.setter
@@ -128,6 +147,10 @@ def location(self) -> Optional[str]:
         """Default location for job, datasets, and tables.
 
         For more information, see https://cloud.google.com/bigquery/docs/locations BigQuery locations.
+
+        Returns:
+            None or str:
+                Default location as a string; otherwise None.
         """
         return self._location
 
@@ -135,12 +158,16 @@ def location(self) -> Optional[str]:
     def location(self, value: Optional[str]):
         if self._session_started and self._location != value:
             raise ValueError(SESSION_STARTED_MESSAGE.format(attribute="location"))
-        _validate_location(value)
-        self._location = value
+        self._location = _get_validated_location(value)
 
     @property
     def project(self) -> Optional[str]:
-        """Google Cloud project ID to use for billing and as the default project."""
+        """Google Cloud project ID to use for billing and as the default project.
+
+        Returns:
+            None or str:
+                Google Cloud project ID as a string; otherwise None.
+        """
         return self._project
 
     @project.setter
@@ -163,6 +190,10 @@ def bq_connection(self) -> Optional[str]:
 
         If this option isn't provided, or project or location aren't provided,
         session will use its default project/location/connection_id as default connection.
+
+        Returns:
+            None or str:
+                Name of the BigQuery connection as a string; otherwise None.
         """
         return self._bq_connection
 
@@ -181,6 +212,12 @@ def skip_bq_connection_check(self) -> bool:
         connection (default or user-provided) does not exist, or it does not have
         necessary permissions set up to support BigQuery DataFrames operations,
         then a runtime error will be reported.
+
+        Returns:
+            bool:
+                A boolean value, where True indicates a BigQuery connection is
+                not created or the connection does not have necessary
+                permissions set up; otherwise False.
         """
         return self._skip_bq_connection_check
 
@@ -196,13 +233,29 @@ def skip_bq_connection_check(self, value: bool):
     def use_regional_endpoints(self) -> bool:
         """Flag to connect to regional API endpoints.
 
-        .. deprecated:: 0.13.0
-            Use of regional endpoints is a feature in Preview and
-            available only in selected regions and projects.
+        .. note::
+            Use of regional endpoints is a feature in Preview and available only
+            in regions "europe-west3", "europe-west9", "europe-west8",
+            "me-central2", "us-east4" and "us-west1".
 
-        Requires that ``location`` is set. For example, to connect to
-        asia-northeast1-bigquery.googleapis.com, specify
-        ``location='asia-northeast1'`` and ``use_regional_endpoints=True``.
+        .. deprecated:: 0.13.0
+            Use of locational endpoints is available only in selected projects.
+
+        Requires that ``location`` is set. For supported regions, for example
+        ``europe-west3``, you need to specify ``location='europe-west3'`` and
+        ``use_regional_endpoints=True``, and then BigQuery DataFrames would
+        connect to the BigQuery endpoint ``bigquery.europe-west3.rep.googleapis.com``.
+        For not supported regions, for example ``asia-northeast1``, when you
+        specify ``location='asia-northeast1'`` and ``use_regional_endpoints=True``,
+        a different endpoint (called locational endpoint, now deprecated, used
+        to provide weaker promise on the request remaining within the location
+        during transit) ``europe-west3-bigquery.googleapis.com`` would be used.
+
+        Returns:
+            bool:
+              A boolean value, where True indicates that regional endpoints
+              would be used for BigQuery and BigQuery storage APIs; otherwise
+              global endpoints would be used.
         """
         return self._use_regional_endpoints
 
@@ -235,6 +288,10 @@ def kms_key_name(self) -> Optional[str]:
         Cloud KMS CryptoKey Encrypter/Decrypter IAM role in the key's project.
         For more information, see https://cloud.google.com/bigquery/docs/customer-managed-encryption#assign_role
         Assign the Encrypter/Decrypter.
+
+        Returns:
+            None or str:
+                Name of the customer managed encryption key as a string; otherwise None.
         """
         return self._kms_key_name
 
@@ -247,7 +304,12 @@ def kms_key_name(self, value: str):
 
     @property
     def ordering_mode(self) -> Literal["strict", "partial"]:
-        """Controls whether total row order is always maintained for DataFrame/Series."""
+        """Controls whether total row order is always maintained for DataFrame/Series.
+
+        Returns:
+            Literal:
+                A literal string value of either strict or partial ordering mode.
+        """
         return self._ordering_mode.value
 
     @ordering_mode.setter
diff --git a/bigframes/_config/experiment_options.py b/bigframes/_config/experiment_options.py
new file mode 100644
index 0000000000..c39502eade
--- /dev/null
+++ b/bigframes/_config/experiment_options.py
@@ -0,0 +1,36 @@
+# Copyright 2024 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import warnings
+
+
+class ExperimentOptions:
+    """
+    Encapsulates the configration for experiments
+    """
+
+    def __init__(self):
+        self._semantic_operators = False
+
+    @property
+    def semantic_operators(self) -> bool:
+        return self._semantic_operators
+
+    @semantic_operators.setter
+    def semantic_operators(self, value: bool):
+        if value is True:
+            warnings.warn(
+                "Semantic operators are still under experiments, and are subject to change in the future."
+            )
+        self._semantic_operators = value
diff --git a/bigframes/_config/sampling_options.py b/bigframes/_config/sampling_options.py
index f4fa0928e1..ddb2a49713 100644
--- a/bigframes/_config/sampling_options.py
+++ b/bigframes/_config/sampling_options.py
@@ -33,14 +33,44 @@ class SamplingOptions:
     random_state: Optional[int] = None
 
     def with_max_download_size(self, max_rows: Optional[int]) -> SamplingOptions:
+        """Configures the maximum download size for data sampling in MB
+
+        Args:
+            max_rows (None or int):
+                An int value for the maximum row size.
+
+        Returns:
+            bigframes._config.sampling_options.SamplingOptions:
+                The configuration for data sampling.
+        """
         return SamplingOptions(
             max_rows, self.enable_downsampling, self.sampling_method, self.random_state
         )
 
     def with_method(self, method: Literal["head", "uniform"]) -> SamplingOptions:
+        """Configures the downsampling algorithms to be chosen from
+
+        Args:
+            method (None or Literal):
+                A literal string value of either head or uniform data sampling method.
+
+        Returns:
+            bigframes._config.sampling_options.SamplingOptions:
+                The configuration for data sampling.
+        """
         return SamplingOptions(self.max_download_size, True, method, self.random_state)
 
     def with_random_state(self, state: Optional[int]) -> SamplingOptions:
+        """Configures the seed for the uniform downsampling algorithm
+
+        Args:
+            state (None or int):
+                An int value for the data sampling random state
+
+        Returns:
+            bigframes._config.sampling_options.SamplingOptions:
+                The configuration for data sampling.
+        """
         return SamplingOptions(
             self.max_download_size,
             self.enable_downsampling,
@@ -49,6 +79,12 @@ def with_random_state(self, state: Optional[int]) -> SamplingOptions:
         )
 
     def with_disabled(self) -> SamplingOptions:
+        """Configures whether to disable downsampling
+
+        Returns:
+            bigframes._config.sampling_options.SamplingOptions:
+                The configuration for data sampling.
+        """
         return SamplingOptions(
             self.max_download_size, False, self.sampling_method, self.random_state
         )
diff --git a/bigframes/constants.py b/bigframes/constants.py
index 4d5b6b8eb3..13636a4484 100644
--- a/bigframes/constants.py
+++ b/bigframes/constants.py
@@ -22,9 +22,8 @@
 DEFAULT_EXPIRATION = datetime.timedelta(days=7)
 
 # https://cloud.google.com/bigquery/docs/locations
-ALL_BIGQUERY_LOCATIONS = frozenset(
+BIGQUERY_REGIONS = frozenset(
     {
-        # regions
         "us-east5",
         "us-south1",
         "us-central1",
@@ -68,18 +67,23 @@
         "me-central1",
         "me-west1",
         "africa-south1",
-        # multi-regions
+    }
+)
+BIGQUERY_MULTIREGIONS = frozenset(
+    {
         "US",
         "EU",
     }
 )
+ALL_BIGQUERY_LOCATIONS = frozenset(BIGQUERY_REGIONS.union(BIGQUERY_MULTIREGIONS))
 
 # https://cloud.google.com/storage/docs/regional-endpoints
 REP_ENABLED_BIGQUERY_LOCATIONS = frozenset(
     {
-        "me-central2",
-        "europe-west9",
         "europe-west3",
+        "europe-west9",
+        "europe-west8",
+        "me-central2",
         "us-east4",
         "us-west1",
     }
diff --git a/bigframes/core/__init__.py b/bigframes/core/__init__.py
index acab99f249..485a9d79a7 100644
--- a/bigframes/core/__init__.py
+++ b/bigframes/core/__init__.py
@@ -158,10 +158,6 @@ def session(self) -> Session:
     def schema(self) -> schemata.ArraySchema:
         return self.node.schema
 
-    @functools.cached_property
-    def _compiled_schema(self) -> schemata.ArraySchema:
-        return bigframes.core.compile.test_only_ibis_inferred_schema(self.node)
-
     @property
     def explicitly_ordered(self) -> bool:
         # see BigFrameNode.explicitly_ordered
@@ -229,6 +225,23 @@ def order_by(self, by: Sequence[OrderingExpression]) -> ArrayValue:
     def reversed(self) -> ArrayValue:
         return ArrayValue(nodes.ReversedNode(child=self.node))
 
+    def slice(
+        self, start: Optional[int], stop: Optional[int], step: Optional[int]
+    ) -> ArrayValue:
+        if self.node.order_ambiguous and not (self.session._strictly_ordered):
+            warnings.warn(
+                "Window ordering may be ambiguous, this can cause unstable results.",
+                bigframes.exceptions.AmbiguousWindowWarning,
+            )
+        return ArrayValue(
+            nodes.SliceNode(
+                self.node,
+                start=start,
+                stop=stop,
+                step=step if (step is not None) else 1,
+            )
+        )
+
     def promote_offsets(self) -> Tuple[ArrayValue, str]:
         """
         Convenience function to promote copy of column offsets to a value column. Can be used to reset index.
@@ -394,20 +407,6 @@ def project_window_op(
             output_name,
         )
 
-    def _reproject_to_table(self) -> ArrayValue:
-        """
-        Internal operators that projects the internal representation into a
-        new ibis table expression where each value column is a direct
-        reference to a column in that table expression. Needed after
-        some operations such as window operations that cannot be used
-        recursively in projections.
-        """
-        return ArrayValue(
-            nodes.ReprojectOpNode(
-                child=self.node,
-            )
-        )
-
     def relational_join(
         self,
         other: ArrayValue,
diff --git a/bigframes/core/block_transforms.py b/bigframes/core/block_transforms.py
index 2c4991b629..785691edd6 100644
--- a/bigframes/core/block_transforms.py
+++ b/bigframes/core/block_transforms.py
@@ -196,8 +196,7 @@ def interpolate(block: blocks.Block, method: str = "linear") -> blocks.Block:
         else:
             output_column_ids.append(column)
 
-    # Force reproject since used `skip_project_unsafe` perviously
-    block = block.select_columns(output_column_ids)._force_reproject()
+    block = block.select_columns(output_column_ids)
     return block.with_column_labels(original_labels)
 
 
diff --git a/bigframes/core/blocks.py b/bigframes/core/blocks.py
index 9e245399cd..b0a8903e19 100644
--- a/bigframes/core/blocks.py
+++ b/bigframes/core/blocks.py
@@ -1390,7 +1390,7 @@ def explode(
                 expr,
                 column_labels=self.column_labels,
                 index_columns=self.index_columns,
-                index_labels=self.column_labels.names,
+                index_labels=self._index_labels,
             )
 
     def _standard_stats(self, column_id) -> typing.Sequence[agg_ops.UnaryAggregateOp]:
@@ -1465,84 +1465,17 @@ def slice(
         self,
         start: typing.Optional[int] = None,
         stop: typing.Optional[int] = None,
-        step: typing.Optional[int] = None,
-    ) -> bigframes.core.blocks.Block:
-        if step is None:
-            step = 1
+        step: int = 1,
+    ) -> Block:
         if step == 0:
-            raise ValueError("slice step cannot be zero")
-        if step < 0:
-            reverse_start = (-start - 1) if start else 0
-            reverse_stop = (-stop - 1) if stop else None
-            reverse_step = -step
-            return self.reversed()._forward_slice(
-                reverse_start, reverse_stop, reverse_step
-            )
-        return self._forward_slice(start or 0, stop, step)
-
-    def _forward_slice(self, start: int = 0, stop=None, step: int = 1):
-        """Performs slice but only for positive step size."""
-        if step <= 0:
-            raise ValueError("forward_slice only supports positive step size")
-
-        use_postive_offsets = (
-            (start > 0)
-            or ((stop is not None) and (stop >= 0))
-            or ((step > 1) and (start >= 0))
-        )
-        use_negative_offsets = (
-            (start < 0) or (stop and (stop < 0)) or ((step > 1) and (start < 0))
+            raise ValueError("Slice step size must be non-zero")
+        return Block(
+            self.expr.slice(start, stop, step),
+            index_columns=self.index_columns,
+            column_labels=self.column_labels,
+            index_labels=self._index_labels,
         )
 
-        block = self
-
-        # only generate offsets that are used
-        positive_offsets = None
-        negative_offsets = None
-
-        if use_postive_offsets:
-            block, positive_offsets = self.promote_offsets()
-        if use_negative_offsets:
-            block, negative_offsets = block.reversed().promote_offsets()
-            block = block.reversed()
-
-        conditions = []
-        if start != 0:
-            if start > 0:
-                assert positive_offsets
-                conditions.append(ops.ge_op.as_expr(positive_offsets, ex.const(start)))
-            else:
-                assert negative_offsets
-                conditions.append(
-                    ops.le_op.as_expr(negative_offsets, ex.const(-start - 1))
-                )
-        if stop is not None:
-            if stop >= 0:
-                assert positive_offsets
-                conditions.append(ops.lt_op.as_expr(positive_offsets, ex.const(stop)))
-            else:
-                assert negative_offsets
-                conditions.append(
-                    ops.gt_op.as_expr(negative_offsets, ex.const(-stop - 1))
-                )
-        if step > 1:
-            if start >= 0:
-                assert positive_offsets
-                start_diff = ops.sub_op.as_expr(positive_offsets, ex.const(start))
-            else:
-                assert negative_offsets
-                start_diff = ops.sub_op.as_expr(negative_offsets, ex.const(-start + 1))
-            step_cond = ops.eq_op.as_expr(
-                ops.mod_op.as_expr(start_diff, ex.const(step)), ex.const(0)
-            )
-            conditions.append(step_cond)
-
-        for cond in conditions:
-            block, cond_id = block.project_expr(cond)
-            block = block.filter_by_id(cond_id)
-
-        return block.select_columns(self.value_columns)
-
     # Using cache to optimize for Jupyter Notebook's behavior where both '__repr__'
     # and '__repr_html__' are called in a single display action, reducing redundant
     # queries.
@@ -1557,10 +1490,11 @@ def retrieve_repr_request_results(
         Returns a tuple of the dataframe and the overall number of rows of the query.
         """
 
+        # head caches full underlying expression, so row_count will be free after
         head_result = self.session._executor.head(self.expr, max_results)
         count = self.session._executor.get_row_count(self.expr)
 
-        arrow = self.session._executor.execute(self.expr).to_arrow_table()
+        arrow = head_result.to_arrow_table()
         df = io_pandas.arrow_to_pandas(arrow, schema=self.expr.schema)
         self._copy_index_to_pandas(df)
         return df, count, head_result.query_job
@@ -2432,15 +2366,6 @@ def join(
             # Always sort mult-index join
             return join_multi_indexed(self, other, how=how, sort=sort)
 
-    def _force_reproject(self) -> Block:
-        """Forces a reprojection of the underlying tables expression. Used to force predicate/order application before subsequent operations."""
-        return Block(
-            self._expr._reproject_to_table(),
-            index_columns=self.index_columns,
-            column_labels=self.column_labels,
-            index_labels=self.index.names,
-        )
-
     def is_monotonic_increasing(
         self, column_id: typing.Union[str, Sequence[str]]
     ) -> bool:
diff --git a/bigframes/core/compile/compiler.py b/bigframes/core/compile/compiler.py
index 0917097c70..fd1514d7b7 100644
--- a/bigframes/core/compile/compiler.py
+++ b/bigframes/core/compile/compiler.py
@@ -335,10 +335,6 @@ def compile_window(self, node: nodes.WindowOpNode, ordered: bool = True):
         )
         return result if ordered else result.to_unordered()
 
-    @_compile_node.register
-    def compile_reproject(self, node: nodes.ReprojectOpNode, ordered: bool = True):
-        return self.compile_node(node.child, ordered)._reproject_to_table()
-
     @_compile_node.register
     def compile_explode(self, node: nodes.ExplodeNode, ordered: bool = True):
         return self.compile_node(node.child, ordered).explode(node.column_ids)
diff --git a/bigframes/core/groupby/__init__.py b/bigframes/core/groupby/__init__.py
index 5cb0e65729..2d351cf82d 100644
--- a/bigframes/core/groupby/__init__.py
+++ b/bigframes/core/groupby/__init__.py
@@ -88,6 +88,12 @@ def __getitem__(
             keys = list(key)
         else:
             keys = [key]
+
+        bad_keys = [key for key in keys if key not in self._block.column_labels]
+
+        if len(bad_keys) > 0:
+            raise KeyError(f"Columns not found: {str(bad_keys)[1:-1]}")
+
         columns = [
             col_id for col_id, label in self._col_id_labels.items() if label in keys
         ]
diff --git a/bigframes/core/nodes.py b/bigframes/core/nodes.py
index e65040686e..1d01936509 100644
--- a/bigframes/core/nodes.py
+++ b/bigframes/core/nodes.py
@@ -20,7 +20,7 @@
 import functools
 import itertools
 import typing
-from typing import Callable, Iterable, Sequence, Tuple
+from typing import Callable, Iterable, Optional, Sequence, Tuple
 
 import google.cloud.bigquery as bq
 
@@ -51,8 +51,8 @@ class Field:
     dtype: bigframes.dtypes.Dtype
 
 
-@dataclass(frozen=True)
-class BigFrameNode:
+@dataclass(eq=False, frozen=True)
+class BigFrameNode(abc.ABC):
     """
     Immutable node for representing 2D typed array as a tree of operators.
 
@@ -95,12 +95,30 @@ def session(self):
             return sessions[0]
         return None
 
+    def _as_tuple(self) -> Tuple:
+        """Get all fields as tuple."""
+        return tuple(getattr(self, field.name) for field in fields(self))
+
+    def __hash__(self) -> int:
+        # Custom hash that uses cache to avoid costly recomputation
+        return self._cached_hash
+
+    def __eq__(self, other) -> bool:
+        # Custom eq that tries to short-circuit full structural comparison
+        if not isinstance(other, self.__class__):
+            return False
+        if self is other:
+            return True
+        if hash(self) != hash(other):
+            return False
+        return self._as_tuple() == other._as_tuple()
+
     # BigFrameNode trees can be very deep so its important avoid recalculating the hash from scratch
     # Each subclass of BigFrameNode should use this property to implement __hash__
     # The default dataclass-generated __hash__ method is not cached
     @functools.cached_property
-    def _node_hash(self):
-        return hash(tuple(hash(getattr(self, field.name)) for field in fields(self)))
+    def _cached_hash(self):
+        return hash(self._as_tuple())
 
     @property
     def roots(self) -> typing.Set[BigFrameNode]:
@@ -109,10 +127,10 @@ def roots(self) -> typing.Set[BigFrameNode]:
         )
         return set(roots)
 
-    # TODO: For deep trees, this can create a lot of overhead, maybe use zero-copy persistent datastructure?
+    # TODO: Store some local data lazily for select, aggregate nodes.
     @property
     @abc.abstractmethod
-    def fields(self) -> Tuple[Field, ...]:
+    def fields(self) -> Iterable[Field]:
         ...
 
     @property
@@ -226,7 +244,7 @@ def prune(self, used_cols: COLUMN_SET) -> BigFrameNode:
         return self.transform_children(lambda x: x.prune(used_cols))
 
 
-@dataclass(frozen=True)
+@dataclass(frozen=True, eq=False)
 class UnaryNode(BigFrameNode):
     child: BigFrameNode
 
@@ -234,8 +252,8 @@ class UnaryNode(BigFrameNode):
     def child_nodes(self) -> typing.Sequence[BigFrameNode]:
         return (self.child,)
 
-    @functools.cached_property
-    def fields(self) -> Tuple[Field, ...]:
+    @property
+    def fields(self) -> Iterable[Field]:
         return self.child.fields
 
     @property
@@ -252,7 +270,38 @@ def order_ambiguous(self) -> bool:
         return self.child.order_ambiguous
 
 
-@dataclass(frozen=True)
+@dataclass(frozen=True, eq=False)
+class SliceNode(UnaryNode):
+    """Logical slice node conditionally becomes limit or filter over row numbers."""
+
+    start: Optional[int]
+    stop: Optional[int]
+    step: int = 1
+
+    @property
+    def row_preserving(self) -> bool:
+        """Whether this node preserves input rows."""
+        return False
+
+    @property
+    def non_local(self) -> bool:
+        """
+        Whether this node combines information across multiple rows instead of processing rows independently.
+        Used as an approximation for whether the expression may require shuffling to execute (and therefore be expensive).
+        """
+        return True
+
+    # these are overestimates, more accurate numbers available by converting to concrete limit or analytic+filter ops
+    @property
+    def variables_introduced(self) -> int:
+        return 2
+
+    @property
+    def relation_ops_created(self) -> int:
+        return 2
+
+
+@dataclass(frozen=True, eq=False)
 class JoinNode(BigFrameNode):
     left_child: BigFrameNode
     right_child: BigFrameNode
@@ -285,12 +334,9 @@ def explicitly_ordered(self) -> bool:
         # Do not consider user pre-join ordering intent - they need to re-order post-join in unordered mode.
         return False
 
-    def __hash__(self):
-        return self._node_hash
-
-    @functools.cached_property
-    def fields(self) -> Tuple[Field, ...]:
-        return tuple(itertools.chain(self.left_child.fields, self.right_child.fields))
+    @property
+    def fields(self) -> Iterable[Field]:
+        return itertools.chain(self.left_child.fields, self.right_child.fields)
 
     @functools.cached_property
     def variables_introduced(self) -> int:
@@ -320,7 +366,7 @@ def prune(self, used_cols: COLUMN_SET) -> BigFrameNode:
         return self.transform_children(lambda x: x.prune(new_used))
 
 
-@dataclass(frozen=True)
+@dataclass(frozen=True, eq=False)
 class ConcatNode(BigFrameNode):
     # TODO: Explcitly map column ids from each child
     children: Tuple[BigFrameNode, ...]
@@ -345,13 +391,10 @@ def explicitly_ordered(self) -> bool:
         # Consider concat as an ordered operations (even though input frames may not be ordered)
         return True
 
-    def __hash__(self):
-        return self._node_hash
-
-    @functools.cached_property
-    def fields(self) -> Tuple[Field, ...]:
+    @property
+    def fields(self) -> Iterable[Field]:
         # TODO: Output names should probably be aligned beforehand or be part of concat definition
-        return tuple(
+        return (
             Field(bfet_ids.ColumnId(f"column_{i}"), field.dtype)
             for i, field in enumerate(self.children[0].fields)
         )
@@ -371,16 +414,13 @@ def prune(self, used_cols: COLUMN_SET) -> BigFrameNode:
         return self
 
 
-@dataclass(frozen=True)
+@dataclass(frozen=True, eq=False)
 class FromRangeNode(BigFrameNode):
     # TODO: Enforce single-row, single column constraint
     start: BigFrameNode
     end: BigFrameNode
     step: int
 
-    def __hash__(self):
-        return self._node_hash
-
     @property
     def roots(self) -> typing.Set[BigFrameNode]:
         return {self}
@@ -398,8 +438,10 @@ def explicitly_ordered(self) -> bool:
         return True
 
     @functools.cached_property
-    def fields(self) -> Tuple[Field, ...]:
-        return (Field(bfet_ids.ColumnId("labels"), self.start.fields[0].dtype),)
+    def fields(self) -> Iterable[Field]:
+        return (
+            Field(bfet_ids.ColumnId("labels"), next(iter(self.start.fields)).dtype),
+        )
 
     @functools.cached_property
     def variables_introduced(self) -> int:
@@ -419,7 +461,7 @@ def prune(self, used_cols: COLUMN_SET) -> BigFrameNode:
 # Input Nodex
 # TODO: Most leaf nodes produce fixed column names based on the datasource
 # They should support renaming
-@dataclass(frozen=True)
+@dataclass(frozen=True, eq=False)
 class LeafNode(BigFrameNode):
     @property
     def roots(self) -> typing.Set[BigFrameNode]:
@@ -451,7 +493,7 @@ class ScanList:
     items: typing.Tuple[ScanItem, ...]
 
 
-@dataclass(frozen=True)
+@dataclass(frozen=True, eq=False)
 class ReadLocalNode(LeafNode):
     feather_bytes: bytes
     data_schema: schemata.ArraySchema
@@ -460,14 +502,11 @@ class ReadLocalNode(LeafNode):
     scan_list: ScanList
     session: typing.Optional[bigframes.session.Session] = None
 
-    def __hash__(self):
-        return self._node_hash
-
-    @functools.cached_property
-    def fields(self) -> Tuple[Field, ...]:
-        return tuple(Field(col_id, dtype) for col_id, dtype, _ in self.scan_list.items)
+    @property
+    def fields(self) -> Iterable[Field]:
+        return (Field(col_id, dtype) for col_id, dtype, _ in self.scan_list.items)
 
-    @functools.cached_property
+    @property
     def variables_introduced(self) -> int:
         """Defines the number of variables generated by the current node. Used to estimate query planning complexity."""
         return len(self.scan_list.items) + 1
@@ -508,6 +547,7 @@ class GbqTable:
     table_id: str = field()
     physical_schema: Tuple[bq.SchemaField, ...] = field()
     n_rows: int = field()
+    is_physically_stored: bool = field()
     cluster_cols: typing.Optional[Tuple[str, ...]]
 
     @staticmethod
@@ -523,6 +563,7 @@ def from_table(table: bq.Table, columns: Sequence[str] = ()) -> GbqTable:
             table_id=table.table_id,
             physical_schema=schema,
             n_rows=table.num_rows,
+            is_physically_stored=(table.table_type in ["TABLE", "MATERIALIZED_VIEW"]),
             cluster_cols=None
             if table.clustering_fields is None
             else tuple(table.clustering_fields),
@@ -545,7 +586,7 @@ class BigqueryDataSource:
 
 
 ## Put ordering in here or just add order_by node above?
-@dataclass(frozen=True)
+@dataclass(frozen=True, eq=False)
 class ReadTableNode(LeafNode):
     source: BigqueryDataSource
     # Subset of physical schema column
@@ -568,12 +609,9 @@ def __post_init__(self):
     def session(self):
         return self.table_session
 
-    def __hash__(self):
-        return self._node_hash
-
-    @functools.cached_property
-    def fields(self) -> Tuple[Field, ...]:
-        return tuple(Field(col_id, dtype) for col_id, dtype, _ in self.scan_list.items)
+    @property
+    def fields(self) -> Iterable[Field]:
+        return (Field(col_id, dtype) for col_id, dtype, _ in self.scan_list.items)
 
     @property
     def relation_ops_created(self) -> int:
@@ -603,7 +641,7 @@ def variables_introduced(self) -> int:
 
     @property
     def row_count(self) -> typing.Optional[int]:
-        if self.source.sql_predicate is None:
+        if self.source.sql_predicate is None and self.source.table.is_physically_stored:
             return self.source.table.n_rows
         return None
 
@@ -614,15 +652,12 @@ def prune(self, used_cols: COLUMN_SET) -> BigFrameNode:
         return ReadTableNode(self.source, new_scan_list, self.table_session)
 
 
-@dataclass(frozen=True)
+@dataclass(frozen=True, eq=False)
 class CachedTableNode(ReadTableNode):
     # The original BFET subtree that was cached
     # note: this isn't a "child" node.
     original_node: BigFrameNode = field()
 
-    def __hash__(self):
-        return self._node_hash
-
     def prune(self, used_cols: COLUMN_SET) -> BigFrameNode:
         new_scan_list = ScanList(
             tuple(item for item in self.scan_list.items if item.id in used_cols)
@@ -633,20 +668,19 @@ def prune(self, used_cols: COLUMN_SET) -> BigFrameNode:
 
 
 # Unary nodes
-@dataclass(frozen=True)
+@dataclass(frozen=True, eq=False)
 class PromoteOffsetsNode(UnaryNode):
     col_id: bigframes.core.identifiers.ColumnId
 
-    def __hash__(self):
-        return self._node_hash
-
     @property
     def non_local(self) -> bool:
         return True
 
     @property
-    def fields(self) -> Tuple[Field, ...]:
-        return (*self.child.fields, Field(self.col_id, bigframes.dtypes.INT_DTYPE))
+    def fields(self) -> Iterable[Field]:
+        return itertools.chain(
+            self.child.fields, [Field(self.col_id, bigframes.dtypes.INT_DTYPE)]
+        )
 
     @property
     def relation_ops_created(self) -> int:
@@ -664,7 +698,7 @@ def prune(self, used_cols: COLUMN_SET) -> BigFrameNode:
             return self.transform_children(lambda x: x.prune(new_used))
 
 
-@dataclass(frozen=True)
+@dataclass(frozen=True, eq=False)
 class FilterNode(UnaryNode):
     predicate: ex.Expression
 
@@ -672,9 +706,6 @@ class FilterNode(UnaryNode):
     def row_preserving(self) -> bool:
         return False
 
-    def __hash__(self):
-        return self._node_hash
-
     @property
     def variables_introduced(self) -> int:
         return 1
@@ -685,13 +716,10 @@ def prune(self, used_cols: COLUMN_SET) -> BigFrameNode:
         return FilterNode(pruned_child, self.predicate)
 
 
-@dataclass(frozen=True)
+@dataclass(frozen=True, eq=False)
 class OrderByNode(UnaryNode):
     by: Tuple[OrderingExpression, ...]
 
-    def __hash__(self):
-        return self._node_hash
-
     @property
     def variables_introduced(self) -> int:
         return 0
@@ -714,14 +742,11 @@ def prune(self, used_cols: COLUMN_SET) -> BigFrameNode:
         return OrderByNode(pruned_child, self.by)
 
 
-@dataclass(frozen=True)
+@dataclass(frozen=True, eq=False)
 class ReversedNode(UnaryNode):
     # useless field to make sure has distinct hash
     reversed: bool = True
 
-    def __hash__(self):
-        return self._node_hash
-
     @property
     def variables_introduced(self) -> int:
         return 0
@@ -732,17 +757,14 @@ def relation_ops_created(self) -> int:
         return 0
 
 
-@dataclass(frozen=True)
+@dataclass(frozen=True, eq=False)
 class SelectionNode(UnaryNode):
     input_output_pairs: typing.Tuple[
         typing.Tuple[ex.DerefOp, bigframes.core.identifiers.ColumnId], ...
     ]
 
-    def __hash__(self):
-        return self._node_hash
-
     @functools.cached_property
-    def fields(self) -> Tuple[Field, ...]:
+    def fields(self) -> Iterable[Field]:
         return tuple(
             Field(output, self.child.get_type(input.id))
             for input, output in self.input_output_pairs
@@ -770,7 +792,7 @@ def prune(self, used_cols: COLUMN_SET) -> BigFrameNode:
         return SelectionNode(pruned_child, pruned_selections)
 
 
-@dataclass(frozen=True)
+@dataclass(frozen=True, eq=False)
 class ProjectionNode(UnaryNode):
     """Assigns new variables (without modifying existing ones)"""
 
@@ -786,17 +808,17 @@ def __post_init__(self):
         # Cannot assign to existing variables - append only!
         assert all(name not in self.child.schema.names for _, name in self.assignments)
 
-    def __hash__(self):
-        return self._node_hash
-
     @functools.cached_property
-    def fields(self) -> Tuple[Field, ...]:
+    def added_fields(self) -> Tuple[Field, ...]:
         input_types = self.child._dtype_lookup
-        new_fields = (
+        return tuple(
             Field(id, bigframes.dtypes.dtype_for_etype(ex.output_type(input_types)))
             for ex, id in self.assignments
         )
-        return (*self.child.fields, *new_fields)
+
+    @property
+    def fields(self) -> Iterable[Field]:
+        return itertools.chain(self.child.fields, self.added_fields)
 
     @property
     def variables_introduced(self) -> int:
@@ -817,7 +839,7 @@ def prune(self, used_cols: COLUMN_SET) -> BigFrameNode:
 
 # TODO: Merge RowCount into Aggregate Node?
 # Row count can be compute from table metadata sometimes, so it is a bit special.
-@dataclass(frozen=True)
+@dataclass(frozen=True, eq=False)
 class RowCountNode(UnaryNode):
     @property
     def row_preserving(self) -> bool:
@@ -827,8 +849,8 @@ def row_preserving(self) -> bool:
     def non_local(self) -> bool:
         return True
 
-    @functools.cached_property
-    def fields(self) -> Tuple[Field, ...]:
+    @property
+    def fields(self) -> Iterable[Field]:
         return (Field(bfet_ids.ColumnId("count"), bigframes.dtypes.INT_DTYPE),)
 
     @property
@@ -840,7 +862,7 @@ def defines_namespace(self) -> bool:
         return True
 
 
-@dataclass(frozen=True)
+@dataclass(frozen=True, eq=False)
 class AggregateNode(UnaryNode):
     aggregations: typing.Tuple[
         typing.Tuple[ex.Aggregation, bigframes.core.identifiers.ColumnId], ...
@@ -852,15 +874,12 @@ class AggregateNode(UnaryNode):
     def row_preserving(self) -> bool:
         return False
 
-    def __hash__(self):
-        return self._node_hash
-
     @property
     def non_local(self) -> bool:
         return True
 
     @functools.cached_property
-    def fields(self) -> Tuple[Field, ...]:
+    def fields(self) -> Iterable[Field]:
         by_items = (
             Field(ref.id, self.child.get_type(ref.id)) for ref in self.by_column_ids
         )
@@ -873,7 +892,7 @@ def fields(self) -> Tuple[Field, ...]:
             )
             for agg, id in self.aggregations
         )
-        return (*by_items, *agg_items)
+        return tuple(itertools.chain(by_items, agg_items))
 
     @property
     def variables_introduced(self) -> int:
@@ -902,7 +921,7 @@ def prune(self, used_cols: COLUMN_SET) -> BigFrameNode:
         return AggregateNode(pruned_child, pruned_aggs, self.by_column_ids, self.dropna)
 
 
-@dataclass(frozen=True)
+@dataclass(frozen=True, eq=False)
 class WindowOpNode(UnaryNode):
     column_name: ex.DerefOp
     op: agg_ops.UnaryWindowOp
@@ -911,18 +930,13 @@ class WindowOpNode(UnaryNode):
     never_skip_nulls: bool = False
     skip_reproject_unsafe: bool = False
 
-    def __hash__(self):
-        return self._node_hash
-
     @property
     def non_local(self) -> bool:
         return True
 
-    @functools.cached_property
-    def fields(self) -> Tuple[Field, ...]:
-        input_type = self.child.get_type(self.column_name.id)
-        new_item_dtype = self.op.output_type(input_type)
-        return (*self.child.fields, Field(self.output_name, new_item_dtype))
+    @property
+    def fields(self) -> Iterable[Field]:
+        return itertools.chain(self.child.fields, [self.added_field])
 
     @property
     def variables_introduced(self) -> int:
@@ -933,6 +947,12 @@ def relation_ops_created(self) -> int:
         # Assume that if not reprojecting, that there is a sequence of window operations sharing the same window
         return 0 if self.skip_reproject_unsafe else 4
 
+    @functools.cached_property
+    def added_field(self) -> Field:
+        input_type = self.child.get_type(self.column_name.id)
+        new_item_dtype = self.op.output_type(input_type)
+        return Field(self.output_name, new_item_dtype)
+
     def prune(self, used_cols: COLUMN_SET) -> BigFrameNode:
         if self.output_name not in used_cols:
             return self.child
@@ -942,23 +962,7 @@ def prune(self, used_cols: COLUMN_SET) -> BigFrameNode:
         return self.transform_children(lambda x: x.prune(consumed_ids))
 
 
-# TODO: Remove this op
-@dataclass(frozen=True)
-class ReprojectOpNode(UnaryNode):
-    def __hash__(self):
-        return self._node_hash
-
-    @property
-    def variables_introduced(self) -> int:
-        return 0
-
-    @property
-    def relation_ops_created(self) -> int:
-        # This op is not a real transformation, just a hint to the sql generator
-        return 0
-
-
-@dataclass(frozen=True)
+@dataclass(frozen=True, eq=False)
 class RandomSampleNode(UnaryNode):
     fraction: float
 
@@ -970,16 +974,13 @@ def deterministic(self) -> bool:
     def row_preserving(self) -> bool:
         return False
 
-    def __hash__(self):
-        return self._node_hash
-
     @property
     def variables_introduced(self) -> int:
         return 1
 
 
 # TODO: Explode should create a new column instead of overriding the existing one
-@dataclass(frozen=True)
+@dataclass(frozen=True, eq=False)
 class ExplodeNode(UnaryNode):
     column_ids: typing.Tuple[ex.DerefOp, ...]
 
@@ -987,12 +988,9 @@ class ExplodeNode(UnaryNode):
     def row_preserving(self) -> bool:
         return False
 
-    def __hash__(self):
-        return self._node_hash
-
-    @functools.cached_property
-    def fields(self) -> Tuple[Field, ...]:
-        return tuple(
+    @property
+    def fields(self) -> Iterable[Field]:
+        return (
             Field(
                 field.id,
                 bigframes.dtypes.arrow_dtype_to_bigframes_dtype(
diff --git a/bigframes/core/rewrite.py b/bigframes/core/rewrite.py
index 095f537c21..d4e530fff3 100644
--- a/bigframes/core/rewrite.py
+++ b/bigframes/core/rewrite.py
@@ -16,13 +16,15 @@
 import dataclasses
 import functools
 import itertools
-from typing import Mapping, Optional, Sequence, Tuple
+from typing import cast, Mapping, Optional, Sequence, Tuple
 
 import bigframes.core.expression as scalar_exprs
+import bigframes.core.guid as guids
 import bigframes.core.identifiers as ids
 import bigframes.core.join_def as join_defs
 import bigframes.core.nodes as nodes
 import bigframes.core.ordering as order
+import bigframes.core.tree_properties as traversals
 import bigframes.operations as ops
 
 Selection = Tuple[Tuple[scalar_exprs.Expression, ids.ColumnId], ...]
@@ -381,3 +383,172 @@ def common_selection_root(
     if r_node in l_nodes:
         return r_node
     return None
+
+
+def replace_slice_ops(root: nodes.BigFrameNode) -> nodes.BigFrameNode:
+    # TODO: we want to pull up some slices into limit op if near root.
+    if isinstance(root, nodes.SliceNode):
+        root = root.transform_children(replace_slice_ops)
+        return convert_slice_to_filter(cast(nodes.SliceNode, root))
+    else:
+        return root.transform_children(replace_slice_ops)
+
+
+def get_simplified_slice(node: nodes.SliceNode):
+    """Attempts to simplify the slice."""
+    row_count = traversals.row_count(node)
+    start, stop, step = node.start, node.stop, node.step
+
+    if start is None:
+        start = 0 if step > 0 else -1
+    if row_count and step > 0:
+        if start and start < 0:
+            start = row_count + start
+        if stop and stop < 0:
+            stop = row_count + stop
+    return start, stop, step
+
+
+def convert_slice_to_filter(node: nodes.SliceNode):
+    start, stop, step = get_simplified_slice(node)
+
+    # no-op (eg. df[::1])
+    if (
+        ((start == 0) or (start is None))
+        and ((stop is None) or (stop == -1))
+        and (step == 1)
+    ):
+        return node.child
+    # No filtering, just reverse (eg. df[::-1])
+    if ((start is None) or (start == -1)) and (not stop) and (step == -1):
+        return nodes.ReversedNode(node.child)
+    # if start/stop/step are all non-negative, and do a simple predicate on forward offsets
+    if ((start is None) or (start >= 0)) and ((stop is None) or (stop >= 0)):
+        node_w_offset = add_offsets(node.child)
+        predicate = convert_simple_slice(
+            scalar_exprs.DerefOp(node_w_offset.col_id), start or 0, stop, step
+        )
+        filtered = nodes.FilterNode(node_w_offset, predicate)
+        return drop_cols(filtered, (node_w_offset.col_id,))
+
+    # fallback cases, generate both forward and backward offsets
+    if step < 0:
+        forward_offsets = add_offsets(node.child)
+        reversed_offsets = add_offsets(nodes.ReversedNode(forward_offsets))
+        dual_indexed = reversed_offsets
+    else:
+        reversed_offsets = add_offsets(nodes.ReversedNode(node.child))
+        forward_offsets = add_offsets(nodes.ReversedNode(reversed_offsets))
+        dual_indexed = forward_offsets
+    predicate = convert_complex_slice(
+        scalar_exprs.DerefOp(forward_offsets.col_id),
+        scalar_exprs.DerefOp(reversed_offsets.col_id),
+        start,
+        stop,
+        step,
+    )
+    filtered = nodes.FilterNode(dual_indexed, predicate)
+    return drop_cols(filtered, (forward_offsets.col_id, reversed_offsets.col_id))
+
+
+def add_offsets(node: nodes.BigFrameNode) -> nodes.PromoteOffsetsNode:
+    # Allow providing custom id generator?
+    offsets_id = ids.ColumnId(guids.generate_guid())
+    return nodes.PromoteOffsetsNode(node, offsets_id)
+
+
+def drop_cols(
+    node: nodes.BigFrameNode, drop_cols: Tuple[ids.ColumnId, ...]
+) -> nodes.SelectionNode:
+    # adding a whole node that redefines the schema is a lot of overhead, should do something more efficient
+    selections = tuple(
+        (scalar_exprs.DerefOp(id), id) for id in node.ids if id not in drop_cols
+    )
+    return nodes.SelectionNode(node, selections)
+
+
+def convert_simple_slice(
+    offsets: scalar_exprs.Expression,
+    start: int = 0,
+    stop: Optional[int] = None,
+    step: int = 1,
+) -> scalar_exprs.Expression:
+    """Performs slice but only for positive step size."""
+    assert start >= 0
+    assert (stop is None) or (stop >= 0)
+
+    conditions = []
+    if start > 0:
+        conditions.append(ops.ge_op.as_expr(offsets, scalar_exprs.const(start)))
+    if (stop is not None) and (stop >= 0):
+        conditions.append(ops.lt_op.as_expr(offsets, scalar_exprs.const(stop)))
+    if step > 1:
+        start_diff = ops.sub_op.as_expr(offsets, scalar_exprs.const(start))
+        step_cond = ops.eq_op.as_expr(
+            ops.mod_op.as_expr(start_diff, scalar_exprs.const(step)),
+            scalar_exprs.const(0),
+        )
+        conditions.append(step_cond)
+
+    return merge_predicates(conditions) or scalar_exprs.const(True)
+
+
+def convert_complex_slice(
+    forward_offsets: scalar_exprs.Expression,
+    reverse_offsets: scalar_exprs.Expression,
+    start: int,
+    stop: Optional[int],
+    step: int = 1,
+) -> scalar_exprs.Expression:
+    conditions = []
+    assert step != 0
+    if start or ((start is not None) and step < 0):
+        if start > 0 and step > 0:
+            start_cond = ops.ge_op.as_expr(forward_offsets, scalar_exprs.const(start))
+        elif start > 0 and step < 0:
+            start_cond = ops.le_op.as_expr(forward_offsets, scalar_exprs.const(start))
+        elif start < 0 and step > 0:
+            start_cond = ops.le_op.as_expr(
+                reverse_offsets, scalar_exprs.const(-start - 1)
+            )
+        else:
+            assert start < 0 and step < 0
+            start_cond = ops.ge_op.as_expr(
+                reverse_offsets, scalar_exprs.const(-start - 1)
+            )
+        conditions.append(start_cond)
+    if stop is not None:
+        if stop >= 0 and step > 0:
+            stop_cond = ops.lt_op.as_expr(forward_offsets, scalar_exprs.const(stop))
+        elif stop >= 0 and step < 0:
+            stop_cond = ops.gt_op.as_expr(forward_offsets, scalar_exprs.const(stop))
+        elif stop < 0 and step > 0:
+            stop_cond = ops.gt_op.as_expr(
+                reverse_offsets, scalar_exprs.const(-stop - 1)
+            )
+        else:
+            assert (stop < 0) and (step < 0)
+            stop_cond = ops.lt_op.as_expr(
+                reverse_offsets, scalar_exprs.const(-stop - 1)
+            )
+        conditions.append(stop_cond)
+    if step != 1:
+        if step > 1 and start >= 0:
+            start_diff = ops.sub_op.as_expr(forward_offsets, scalar_exprs.const(start))
+        elif step > 1 and start < 0:
+            start_diff = ops.sub_op.as_expr(
+                reverse_offsets, scalar_exprs.const(-start + 1)
+            )
+        elif step < 0 and start >= 0:
+            start_diff = ops.add_op.as_expr(forward_offsets, scalar_exprs.const(start))
+        else:
+            assert step < 0 and start < 0
+            start_diff = ops.add_op.as_expr(
+                reverse_offsets, scalar_exprs.const(-start + 1)
+            )
+        step_cond = ops.eq_op.as_expr(
+            ops.mod_op.as_expr(start_diff, scalar_exprs.const(step)),
+            scalar_exprs.const(0),
+        )
+        conditions.append(step_cond)
+    return merge_predicates(conditions) or scalar_exprs.const(True)
diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py
index d9f7cb9f42..0cfa5a2154 100644
--- a/bigframes/dataframe.py
+++ b/bigframes/dataframe.py
@@ -74,6 +74,7 @@
 import bigframes.operations.aggregations
 import bigframes.operations.aggregations as agg_ops
 import bigframes.operations.plotting as plotting
+import bigframes.operations.semantics
 import bigframes.operations.structs
 import bigframes.series
 import bigframes.series as bf_series
@@ -689,7 +690,6 @@ def _repr_html_(self) -> str:
         if opts.repr_mode == "deferred":
             return formatter.repr_query_job(self._compute_dry_run())
 
-        self._cached()
         # TODO(swast): pass max_columns and get the true column count back. Maybe
         # get 1 more column than we have requested so that pandas can add the
         # ... for us?
@@ -3722,7 +3722,9 @@ def _slice(
         stop: typing.Optional[int] = None,
         step: typing.Optional[int] = None,
     ) -> DataFrame:
-        block = self._block.slice(start=start, stop=stop, step=step)
+        block = self._block.slice(
+            start=start, stop=stop, step=step if (step is not None) else 1
+        )
         return DataFrame(block)
 
     def __array_ufunc__(
@@ -3875,3 +3877,7 @@ def _throw_if_null_index(self, opname: str):
             raise bigframes.exceptions.NullIndexError(
                 f"DataFrame cannot perform {opname} as it has no index. Set an index using set_index."
             )
+
+    @property
+    def semantics(self):
+        return bigframes.operations.semantics.Semantics(self)
diff --git a/bigframes/ml/llm.py b/bigframes/ml/llm.py
index c12da01b54..3920da6c71 100644
--- a/bigframes/ml/llm.py
+++ b/bigframes/ml/llm.py
@@ -83,6 +83,13 @@
 _ML_EMBED_TEXT_STATUS = "ml_embed_text_status"
 _ML_GENERATE_EMBEDDING_STATUS = "ml_generate_embedding_status"
 
+_MODEL_NOT_SUPPORTED_WARNING = (
+    "Model name '{model_name}' is not supported. "
+    "We are currently aware of the following models: {known_models}. "
+    "However, model names can change, and the supported models may be outdated. "
+    "You should use this model name only if you are sure that it is supported in BigQuery."
+)
+
 
 @typing_extensions.deprecated(
     "PaLM2TextGenerator is going to be deprecated. Use GeminiTextGenerator(https://cloud.google.com/python/docs/reference/bigframes/latest/bigframes.ml.llm.GeminiTextGenerator) instead. ",
@@ -154,8 +161,11 @@ def _create_bqml_model(self):
             )
 
         if self.model_name not in _TEXT_GENERATOR_ENDPOINTS:
-            raise ValueError(
-                f"Model name {self.model_name} is not supported. We only support {', '.join(_TEXT_GENERATOR_ENDPOINTS)}."
+            warnings.warn(
+                _MODEL_NOT_SUPPORTED_WARNING.format(
+                    model_name=self.model_name,
+                    known_models=", ".join(_TEXT_GENERATOR_ENDPOINTS),
+                )
             )
 
         options = {
@@ -484,8 +494,11 @@ def _create_bqml_model(self):
             )
 
         if self.model_name not in _PALM2_EMBEDDING_GENERATOR_ENDPOINTS:
-            raise ValueError(
-                f"Model name {self.model_name} is not supported. We only support {', '.join(_PALM2_EMBEDDING_GENERATOR_ENDPOINTS)}."
+            warnings.warn(
+                _MODEL_NOT_SUPPORTED_WARNING.format(
+                    model_name=self.model_name,
+                    known_models=", ".join(_PALM2_EMBEDDING_GENERATOR_ENDPOINTS),
+                )
             )
 
         endpoint = (
@@ -644,8 +657,11 @@ def _create_bqml_model(self):
             )
 
         if self.model_name not in _TEXT_EMBEDDING_ENDPOINTS:
-            raise ValueError(
-                f"Model name {self.model_name} is not supported. We only support {', '.join(_TEXT_EMBEDDING_ENDPOINTS)}."
+            warnings.warn(
+                _MODEL_NOT_SUPPORTED_WARNING.format(
+                    model_name=self.model_name,
+                    known_models=", ".join(_TEXT_EMBEDDING_ENDPOINTS),
+                )
             )
 
         options = {
@@ -801,8 +817,11 @@ def _create_bqml_model(self):
             )
 
         if self.model_name not in _GEMINI_ENDPOINTS:
-            raise ValueError(
-                f"Model name {self.model_name} is not supported. We only support {', '.join(_GEMINI_ENDPOINTS)}."
+            warnings.warn(
+                _MODEL_NOT_SUPPORTED_WARNING.format(
+                    model_name=self.model_name,
+                    known_models=", ".join(_GEMINI_ENDPOINTS),
+                )
             )
 
         options = {"endpoint": self.model_name}
@@ -1118,8 +1137,11 @@ def _create_bqml_model(self):
             )
 
         if self.model_name not in _CLAUDE_3_ENDPOINTS:
-            raise ValueError(
-                f"Model name {self.model_name} is not supported. We only support {', '.join(_CLAUDE_3_ENDPOINTS)}."
+            warnings.warn(
+                _MODEL_NOT_SUPPORTED_WARNING.format(
+                    model_name=self.model_name,
+                    known_models=", ".join(_CLAUDE_3_ENDPOINTS),
+                )
             )
 
         options = {
diff --git a/bigframes/operations/semantics.py b/bigframes/operations/semantics.py
new file mode 100644
index 0000000000..9ff7ea38b2
--- /dev/null
+++ b/bigframes/operations/semantics.py
@@ -0,0 +1,762 @@
+# Copyright 2024 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import re
+import typing
+from typing import List, Optional
+
+import bigframes
+import bigframes.core.guid
+import bigframes.dtypes as dtypes
+
+
+class Semantics:
+    def __init__(self, df) -> None:
+        if not bigframes.options.experiments.semantic_operators:
+            raise NotImplementedError()
+
+        self._df = df
+
+    def agg(
+        self,
+        instruction: str,
+        model,
+        cluster_column: typing.Optional[str] = None,
+        max_agg_rows: int = 10,
+    ):
+        """
+        Performs an aggregation over all rows of the table.
+
+        This method recursively aggregates the input data to produce partial answers
+        in parallel, until a single answer remains.
+
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+            >>> bpd.options.experiments.semantic_operators = True
+
+            >>> import bigframes.ml.llm as llm
+            >>> model = llm.GeminiTextGenerator(model_name="gemini-1.5-flash-001")
+
+            >>> df = bpd.DataFrame(
+            ... {
+            ...     "Movies": [
+            ...         "Titanic",
+            ...         "The Wolf of Wall Street",
+            ...         "Inception",
+            ...     ],
+            ...     "Year": [1997, 2013, 2010],
+            ... })
+            >>> df.semantics.agg(
+            ...     "Find the first name shared by all actors in {Movies}. One word answer.",
+            ...     model=model,
+            ... )
+            0    Leonardo
+            <BLANKLINE>
+            Name: Movies, dtype: string
+
+        Args:
+            instruction (str):
+                An instruction on how to map the data. This value must contain
+                column references by name enclosed in braces.
+                For example, to reference a column named "movies", use "{movies}" in the
+                instruction, like: "Find actor names shared by all {movies}."
+
+            model (bigframes.ml.llm.GeminiTextGenerator):
+                A GeminiTextGenerator provided by the Bigframes ML package.
+
+            cluster_column (Optional[str], default None):
+                If set, aggregates each cluster before performing aggregations across
+                clusters. Clustering based on semantic similarity can improve accuracy
+                of the sementic aggregations.
+
+            max_agg_rows (int, default 10):
+                The maxinum number of rows to be aggregated at a time.
+
+        Returns:
+            bigframes.dataframe.DataFrame: A new DataFrame with the aggregated answers.
+
+        Raises:
+            NotImplementedError: when the semantic operator experiment is off.
+            ValueError: when the instruction refers to a non-existing column, or when
+                more than one columns are referred to.
+        """
+        self._validate_model(model)
+
+        columns = self._parse_columns(instruction)
+        for column in columns:
+            if column not in self._df.columns:
+                raise ValueError(f"Column {column} not found.")
+        if len(columns) > 1:
+            raise NotImplementedError(
+                "Semantic aggregations are limited to a single column."
+            )
+        column = columns[0]
+
+        if max_agg_rows <= 1:
+            raise ValueError(
+                f"Invalid value for `max_agg_rows`: {max_agg_rows}."
+                "It must be greater than 1."
+            )
+
+        import bigframes.bigquery as bbq
+        import bigframes.dataframe
+        import bigframes.series
+
+        df: bigframes.dataframe.DataFrame = self._df.copy()
+        user_instruction = self._format_instruction(instruction, columns)
+
+        num_cluster = 1
+        if cluster_column is not None:
+            if cluster_column not in df.columns:
+                raise ValueError(f"Cluster column `{cluster_column}` not found.")
+
+            if df[cluster_column].dtype != dtypes.INT_DTYPE:
+                raise TypeError(
+                    "Cluster column must be an integer type, not "
+                    f"{type(df[cluster_column])}"
+                )
+
+            num_cluster = len(df[cluster_column].unique())
+            df = df.sort_values(cluster_column)
+        else:
+            cluster_column = bigframes.core.guid.generate_guid("pid")
+            df[cluster_column] = 0
+
+        aggregation_group_id = bigframes.core.guid.generate_guid("agg")
+        group_row_index = bigframes.core.guid.generate_guid("gid")
+        llm_prompt = bigframes.core.guid.generate_guid("prompt")
+        df = (
+            df.reset_index(drop=True)
+            .reset_index()
+            .rename(columns={"index": aggregation_group_id})
+        )
+
+        output_instruction = (
+            "Answer user instructions using the provided context from various sources. "
+            "Combine all relevant information into a single, concise, well-structured response. "
+            f"Instruction: {user_instruction}.\n\n"
+        )
+
+        while len(df) > 1:
+            df[group_row_index] = (df[aggregation_group_id] % max_agg_rows + 1).astype(
+                dtypes.STRING_DTYPE
+            )
+            df[aggregation_group_id] = (df[aggregation_group_id] / max_agg_rows).astype(
+                dtypes.INT_DTYPE
+            )
+            df[llm_prompt] = "\t\nSource #" + df[group_row_index] + ": " + df[column]
+
+            if len(df) > num_cluster:
+                # Aggregate within each partition
+                agg_df = bbq.array_agg(
+                    df.groupby(by=[cluster_column, aggregation_group_id])
+                )
+            else:
+                # Aggregate cross partitions
+                agg_df = bbq.array_agg(df.groupby(by=[aggregation_group_id]))
+                agg_df[cluster_column] = agg_df[cluster_column].list[0]
+
+            # Skip if the aggregated group only has a single item
+            single_row_df: bigframes.series.Series = bbq.array_to_string(
+                agg_df[agg_df[group_row_index].list.len() <= 1][column],
+                delimiter="",
+            )
+            prompt_s: bigframes.series.Series = bbq.array_to_string(
+                agg_df[agg_df[group_row_index].list.len() > 1][llm_prompt],
+                delimiter="",
+            )
+            prompt_s = output_instruction + prompt_s  # type:ignore
+
+            # Run model
+            predict_df = typing.cast(
+                bigframes.dataframe.DataFrame, model.predict(prompt_s)
+            )
+            agg_df[column] = predict_df["ml_generate_text_llm_result"].combine_first(
+                single_row_df
+            )
+
+            agg_df = agg_df.reset_index()
+            df = agg_df[[aggregation_group_id, cluster_column, column]]
+
+        return df[column]
+
+    def cluster_by(
+        self,
+        column: str,
+        output_column: str,
+        model,
+        n_clusters: int = 5,
+    ):
+        """
+        Clusters data based on the semantic similarity of text within a specified column.
+
+        This method leverages a language model to generate text embeddings for each value in
+        the given column. These embeddings capture the semantic meaning of the text.
+        The data is then grouped into `n` clusters using the k-means clustering algorithm,
+        which groups data points based on the similarity of their embeddings.
+
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+            >>> bpd.options.experiments.semantic_operators = True
+
+            >>> import bigframes.ml.llm as llm
+            >>> model = llm.TextEmbeddingGenerator()
+
+            >>> df = bpd.DataFrame({
+            ...     "Product": ["Smartphone", "Laptop", "T-shirt", "Jeans"],
+            ... })
+            >>> df.semantics.cluster_by("Product", "Cluster ID", model, n_clusters=2)
+                    Product  Cluster ID
+            0    Smartphone           2
+            1        Laptop           2
+            2       T-shirt           1
+            3         Jeans           1
+            <BLANKLINE>
+            [4 rows x 2 columns]
+
+        Args:
+            column (str):
+                An column name to perform the similarity clustering.
+
+            output_column (str):
+                An output column to store the clustering ID.
+
+            model (bigframes.ml.llm.TextEmbeddingGenerator):
+                A TextEmbeddingGenerator provided by Bigframes ML package.
+
+            n_clusters (int, default 5):
+                Default 5. Number of clusters to be detected.
+
+        Returns:
+            bigframes.dataframe.DataFrame: A new DataFrame with the clustering output column.
+
+        Raises:
+            NotImplementedError: when the semantic operator experiment is off.
+            ValueError: when the column refers to a non-existing column.
+        """
+
+        import bigframes.dataframe
+        import bigframes.ml.cluster as cluster
+        import bigframes.ml.llm as llm
+
+        if not isinstance(model, llm.TextEmbeddingGenerator):
+            raise TypeError(f"Expect a text embedding model, but got: {type(model)}")
+
+        if column not in self._df.columns:
+            raise ValueError(f"Column {column} not found.")
+
+        if n_clusters <= 1:
+            raise ValueError(
+                f"Invalid value for `n_clusters`: {n_clusters}."
+                "It must be greater than 1."
+            )
+
+        df: bigframes.dataframe.DataFrame = self._df.copy()
+        embeddings_df = model.predict(df[column])
+
+        cluster_model = cluster.KMeans(n_clusters=n_clusters)
+        cluster_model.fit(embeddings_df[["ml_generate_embedding_result"]])
+        clustered_result = cluster_model.predict(embeddings_df)
+        df[output_column] = clustered_result["CENTROID_ID"]
+        return df
+
+    def filter(self, instruction: str, model):
+        """
+        Filters the DataFrame with the semantics of the user instruction.
+
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+            >>> bpd.options.experiments.semantic_operators = True
+
+            >>> import bigframes.ml.llm as llm
+            >>> model = llm.GeminiTextGenerator(model_name="gemini-1.5-flash-001")
+
+            >>> df = bpd.DataFrame({"country": ["USA", "Germany"], "city": ["Seattle", "Berlin"]})
+            >>> df.semantics.filter("{city} is the capital of {country}", model)
+               country    city
+            1  Germany  Berlin
+            <BLANKLINE>
+            [1 rows x 2 columns]
+
+        Args:
+            instruction:
+                An instruction on how to filter the data. This value must contain
+                column references by name, which should be wrapped in a pair of braces.
+                For example, if you have a column "food", you can refer to this column
+                in the instructions like:
+                "The {food} is healthy."
+
+            model:
+                A GeminiTextGenerator provided by Bigframes ML package.
+
+        Returns:
+            DataFrame filtered by the instruction.
+
+        Raises:
+            NotImplementedError: when the semantic operator experiment is off.
+            ValueError: when the instruction refers to a non-existing column, or when no
+                columns are referred to.
+        """
+        self._validate_model(model)
+        columns = self._parse_columns(instruction)
+        for column in columns:
+            if column not in self._df.columns:
+                raise ValueError(f"Column {column} not found.")
+
+        user_instruction = self._format_instruction(instruction, columns)
+        output_instruction = "Based on the provided context, reply to the following claim by only True or False:"
+
+        from bigframes.dataframe import DataFrame
+
+        results = typing.cast(
+            DataFrame,
+            model.predict(
+                self._make_prompt(columns, user_instruction, output_instruction)
+            ),
+        )
+
+        return self._df[
+            results["ml_generate_text_llm_result"].str.lower().str.contains("true")
+        ]
+
+    def map(self, instruction: str, output_column: str, model):
+        """
+        Maps the DataFrame with the semantics of the user instruction.
+
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+            >>> bpd.options.experiments.semantic_operators = True
+
+            >>> import bigframes.ml.llm as llm
+            >>> model = llm.GeminiTextGenerator(model_name="gemini-1.5-flash-001")
+
+            >>> df = bpd.DataFrame({"ingredient_1": ["Burger Bun", "Soy Bean"], "ingredient_2": ["Beef Patty", "Bittern"]})
+            >>> df.semantics.map("What is the food made from {ingredient_1} and {ingredient_2}? One word only.", output_column="food", model=model)
+              ingredient_1 ingredient_2      food
+            0   Burger Bun   Beef Patty  Burger
+            <BLANKLINE>
+            1     Soy Bean      Bittern    Tofu
+            <BLANKLINE>
+            <BLANKLINE>
+            [2 rows x 3 columns]
+
+        Args:
+            instruction:
+                An instruction on how to map the data. This value must contain
+                column references by name, which should be wrapped in a pair of braces.
+                For example, if you have a column "food", you can refer to this column
+                in the instructions like:
+                "Get the ingredients of {food}."
+
+            result_column_name:
+                The column name of the mapping result.
+
+            model:
+                A GeminiTextGenerator provided by Bigframes ML package.
+
+        Returns:
+            DataFrame with attached mapping results.
+
+        Raises:
+            NotImplementedError: when the semantic operator experiment is off.
+            ValueError: when the instruction refers to a non-existing column, or when no
+                columns are referred to.
+        """
+        self._validate_model(model)
+        columns = self._parse_columns(instruction)
+        for column in columns:
+            if column not in self._df.columns:
+                raise ValueError(f"Column {column} not found.")
+
+        user_instruction = self._format_instruction(instruction, columns)
+        output_instruction = (
+            "Based on the provided contenxt, answer the following instruction:"
+        )
+
+        from bigframes.series import Series
+
+        results = typing.cast(
+            Series,
+            model.predict(
+                self._make_prompt(columns, user_instruction, output_instruction)
+            )["ml_generate_text_llm_result"],
+        )
+
+        from bigframes.core.reshape import concat
+
+        return concat([self._df, results.rename(output_column)], axis=1)
+
+    def join(self, other, instruction: str, model, max_rows: int = 1000):
+        """
+        Joines two dataframes by applying the instruction over each pair of rows from
+        the left and right table.
+
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+            >>> bpd.options.experiments.semantic_operators = True
+
+            >>> import bigframes.ml.llm as llm
+            >>> model = llm.GeminiTextGenerator(model_name="gemini-1.5-flash-001")
+
+            >>> cities = bpd.DataFrame({'city': ['Seattle', 'Ottawa', 'Berlin', 'Shanghai', 'New Delhi']})
+            >>> continents = bpd.DataFrame({'continent': ['North America', 'Africa', 'Asia']})
+
+            >>> cities.semantics.join(continents, "{city} is in {continent}", model)
+                    city      continent
+            0    Seattle  North America
+            1     Ottawa  North America
+            2   Shanghai           Asia
+            3  New Delhi           Asia
+            <BLANKLINE>
+            [4 rows x 2 columns]
+
+        Args:
+            other:
+                The other dataframe.
+
+            instruction:
+                An instruction on how left and right rows can be joined. This value must contain
+                column references by name. which should be wrapped in a pair of braces.
+                For example: "The {city} belongs to the {country}".
+                For column names that are shared between two dataframes, you need to add "_left"
+                and "_right" suffix for differentiation. This is especially important when you do
+                self joins. For example: "The {employee_name_left} reports to {employee_name_right}"
+                You must not add "_left" or "_right" suffix to non-overlapping columns.
+
+            model:
+                A GeminiTextGenerator provided by Bigframes ML package.
+
+            max_rows:
+                The maximum number of rows allowed to be sent to the model per call. If the result is too large, the method
+                call will end early with an error.
+
+        Returns:
+            The joined dataframe.
+
+        Raises:
+            ValueError if the amount of data that will be sent for LLM processing is larger than max_rows.
+        """
+        self._validate_model(model)
+        columns = self._parse_columns(instruction)
+
+        joined_table_rows = len(self._df) * len(other)
+
+        if joined_table_rows > max_rows:
+            raise ValueError(
+                f"Number of rows that need processing is {joined_table_rows}, which exceeds row limit {max_rows}."
+            )
+
+        left_columns = []
+        right_columns = []
+
+        for col in columns:
+            if col in self._df.columns and col in other.columns:
+                raise ValueError(f"Ambiguous column reference: {col}")
+
+            elif col in self._df.columns:
+                left_columns.append(col)
+
+            elif col in other.columns:
+                right_columns.append(col)
+
+            elif col.endswith("_left"):
+                original_col_name = col[: -len("_left")]
+                if (
+                    original_col_name in self._df.columns
+                    and original_col_name in other.columns
+                ):
+                    left_columns.append(col)
+                elif original_col_name in self._df.columns:
+                    raise ValueError(f"Unnecessary suffix for {col}")
+                else:
+                    raise ValueError(f"Column {col} not found")
+
+            elif col.endswith("_right"):
+                original_col_name = col[: -len("_right")]
+                if (
+                    original_col_name in self._df.columns
+                    and original_col_name in other.columns
+                ):
+                    right_columns.append(col)
+                elif original_col_name in other.columns:
+                    raise ValueError(f"Unnecessary suffix for {col}")
+                else:
+                    raise ValueError(f"Column {col} not found")
+
+            else:
+                raise ValueError(f"Column {col} not found")
+
+        if not left_columns or not right_columns:
+            raise ValueError()
+
+        joined_df = self._df.merge(other, how="cross", suffixes=("_left", "_right"))
+
+        return joined_df.semantics.filter(instruction, model).reset_index(drop=True)
+
+    def search(
+        self,
+        search_column: str,
+        query: str,
+        top_k: int,
+        model,
+        score_column: Optional[str] = None,
+    ):
+        """
+        Performs semantic search on the DataFrame.
+
+        ** Examples: **
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+
+            >>> import bigframes
+            >>> bigframes.options.experiments.semantic_operators = True
+
+            >>> import bigframes.ml.llm as llm
+            >>> model = llm.TextEmbeddingGenerator(model_name="text-embedding-004")
+
+            >>> df = bpd.DataFrame({"creatures": ["salmon", "sea urchin", "frog", "chimpanzee"]})
+            >>> df.semantics.search("creatures", "monkey", top_k=1, model=model, score_column='distance')
+                creatures  distance
+            3  chimpanzee  0.781101
+            <BLANKLINE>
+            [1 rows x 2 columns]
+
+        Args:
+            search_column:
+                The name of the column to search from.
+            query (str):
+                The search query.
+            top_k (int):
+                The number of nearest neighbors to return.
+            model (TextEmbeddingGenerator):
+                A TextEmbeddingGenerator provided by Bigframes ML package.
+            score_column (Optional[str], default None):
+                The name of the the additional column containning the similarity scores. If None,
+                this column won't be attached to the result.
+
+        Returns:
+            DataFrame: the DataFrame with the search result.
+
+        Raises:
+            ValueError: when the search_column is not found from the the data frame.
+            TypeError: when the provided model is not TextEmbeddingGenerator.
+        """
+
+        if search_column not in self._df.columns:
+            raise ValueError(f"Column {search_column} not found")
+
+        import bigframes.ml.llm as llm
+
+        if not isinstance(model, llm.TextEmbeddingGenerator):
+            raise TypeError(f"Expect a text embedding model, but got: {type(model)}")
+
+        embedded_df = model.predict(self._df[search_column])
+        embedded_table = embedded_df.reset_index().to_gbq()
+
+        import bigframes.pandas as bpd
+
+        embedding_result_column = "ml_generate_embedding_result"
+        query_df = model.predict(bpd.DataFrame({"query_id": [query]})).rename(
+            columns={"content": "query_id", embedding_result_column: "embedding"}
+        )
+
+        import bigframes.bigquery as bbq
+
+        search_result = (
+            bbq.vector_search(
+                base_table=embedded_table,
+                column_to_search=embedding_result_column,
+                query=query_df,
+                top_k=top_k,
+            )
+            .rename(columns={"content": search_column})
+            .set_index("index")
+        )
+
+        search_result.index.name = self._df.index.name
+
+        if score_column is not None:
+            search_result = search_result.rename(columns={"distance": score_column})[
+                [search_column, score_column]
+            ]
+        else:
+            search_result = search_result[[search_column]]
+
+        import bigframes.dataframe
+
+        return typing.cast(bigframes.dataframe.DataFrame, search_result)
+
+    def sim_join(
+        self,
+        other,
+        left_on: str,
+        right_on: str,
+        model,
+        top_k: int = 3,
+        score_column: Optional[str] = None,
+        max_rows: int = 1000,
+    ):
+        """
+        Joins two dataframes based on the similarity of the specified columns.
+
+        This method uses BigQuery's VECTOR_SEARCH function to match rows on the left side with the rows that have
+        nearest embedding vectors on the right. In the worst case scenario, the complexity is around O(M * N * log K).
+        Therefore, this is a potentially expensive operation.
+
+        ** Examples: **
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+
+            >>> import bigframes
+            >>> bigframes.options.experiments.semantic_operators = True
+
+            >>> import bigframes.ml.llm as llm
+            >>> model = llm.TextEmbeddingGenerator(model_name="text-embedding-004")
+
+            >>> df1 = bpd.DataFrame({'animal': ['monkey', 'spider']})
+            >>> df2 = bpd.DataFrame({'animal': ['scorpion', 'baboon']})
+
+            >>> df1.semantics.sim_join(df2, left_on='animal', right_on='animal', model=model, top_k=1)
+            animal  animal_1
+            0  monkey    baboon
+            1  spider  scorpion
+            <BLANKLINE>
+            [2 rows x 2 columns]
+
+        Args:
+            other (DataFrame):
+                The other data frame to join with.
+            left_on (str):
+                The name of the column on left side for the join.
+            right_on (str):
+                The name of the column on the right side for the join.
+            top_k (int, default 3):
+                The number of nearest neighbors to return.
+            model (TextEmbeddingGenerator):
+                A TextEmbeddingGenerator provided by Bigframes ML package.
+            score_column (Optional[str], default None):
+                The name of the the additional column containning the similarity scores. If None,
+                this column won't be attached to the result.
+            max_rows:
+                The maximum number of rows allowed to be processed per call. If the result is too large, the method
+                call will end early with an error.
+
+        Returns:
+            DataFrame: the data frame with the join result.
+
+        Raises:
+            ValueError: when the amount of data to be processed exceeds the specified max_rows.
+        """
+
+        if left_on not in self._df.columns:
+            raise ValueError(f"Left column {left_on} not found")
+        if right_on not in self._df.columns:
+            raise ValueError(f"Right column {right_on} not found")
+
+        import bigframes.ml.llm as llm
+
+        if not isinstance(model, llm.TextEmbeddingGenerator):
+            raise TypeError(f"Expect a text embedding model, but got: {type(model)}")
+
+        joined_table_rows = len(self._df) * len(other)
+        if joined_table_rows > max_rows:
+            raise ValueError(
+                f"Number of rows that need processing is {joined_table_rows}, which exceeds row limit {max_rows}."
+            )
+
+        base_table_embedding_column = bigframes.core.guid.generate_guid()
+        base_table = self._attach_embedding(
+            other, right_on, base_table_embedding_column, model
+        ).to_gbq()
+        query_table = self._attach_embedding(self._df, left_on, "embedding", model)
+
+        import bigframes.bigquery as bbq
+
+        join_result = bbq.vector_search(
+            base_table=base_table,
+            column_to_search=base_table_embedding_column,
+            query=query_table,
+            top_k=top_k,
+        )
+
+        join_result = join_result.drop(
+            ["embedding", base_table_embedding_column], axis=1
+        )
+
+        if score_column is not None:
+            join_result = join_result.rename(columns={"distance": score_column})
+        else:
+            del join_result["distance"]
+
+        return join_result
+
+    @staticmethod
+    def _attach_embedding(dataframe, source_column: str, embedding_column: str, model):
+        result_df = dataframe.copy()
+        embeddings = model.predict(dataframe[source_column])[
+            "ml_generate_embedding_result"
+        ]
+        result_df[embedding_column] = embeddings
+        return result_df
+
+    def _make_prompt(
+        self, columns: List[str], user_instruction: str, output_instruction: str
+    ):
+        prompt_df = self._df[columns].copy()
+        prompt_df["prompt"] = f"{output_instruction}\n{user_instruction}\nContext: "
+
+        # Combine context from multiple columns.
+        for col in columns:
+            prompt_df["prompt"] += f"{col} is `" + prompt_df[col] + "`\n"
+
+        return prompt_df["prompt"]
+
+    def _parse_columns(self, instruction: str) -> List[str]:
+        """Extracts column names enclosed in curly braces from the user instruction.
+        For example, _parse_columns("{city} is in {continent}") == ["city", "continent"]
+        """
+        columns = re.findall(r"(?<!{)\{(?!{)(.*?)\}(?!\})", instruction)
+
+        if not columns:
+            raise ValueError("No column references.")
+
+        return columns
+
+    @staticmethod
+    def _format_instruction(instruction: str, columns: List[str]) -> str:
+        """Extracts column names enclosed in curly braces from the user instruction.
+        For example, `_format_instruction(["city", "continent"], "{city} is in {continent}")
+         == "city is in continent"`
+        """
+        return instruction.format(**{col: col for col in columns})
+
+    @staticmethod
+    def _validate_model(model):
+        from bigframes.ml.llm import GeminiTextGenerator
+
+        if not isinstance(model, GeminiTextGenerator):
+            raise ValueError("Model is not GeminiText Generator")
diff --git a/bigframes/series.py b/bigframes/series.py
index 16e2eef6f1..1a913f18d7 100644
--- a/bigframes/series.py
+++ b/bigframes/series.py
@@ -1923,9 +1923,9 @@ def _slice(
         step: typing.Optional[int] = None,
     ) -> bigframes.series.Series:
         return bigframes.series.Series(
-            self._block.slice(start=start, stop=stop, step=step).select_column(
-                self._value_column
-            ),
+            self._block.slice(
+                start=start, stop=stop, step=step if (step is not None) else 1
+            ).select_column(self._value_column),
         )
 
     def cache(self):
diff --git a/bigframes/session/_io/bigquery/read_gbq_table.py b/bigframes/session/_io/bigquery/read_gbq_table.py
index 7585dd3f45..01ff1a3f15 100644
--- a/bigframes/session/_io/bigquery/read_gbq_table.py
+++ b/bigframes/session/_io/bigquery/read_gbq_table.py
@@ -102,6 +102,7 @@ def validate_table(
     table_ref: bigquery.table.TableReference,
     columns: Optional[Sequence[str]],
     snapshot_time: datetime.datetime,
+    table_type: str,
     filter_str: Optional[str] = None,
 ) -> bool:
     """Validates that the table can be read, returns True iff snapshot is supported."""
@@ -124,6 +125,17 @@ def validate_table(
     if table_ref.dataset_id.startswith("_"):
         return False
 
+    # Materialized views，does not support snapshot
+    if table_type == "MATERIALIZED_VIEW":
+        warnings.warn(
+            "Materialized views do not support FOR SYSTEM_TIME AS OF queries. "
+            "Attempting query without time travel. Be aware that as materialized views "
+            "are updated periodically, modifications to the underlying data in the view may "
+            "result in errors or unexpected behavior.",
+            category=bigframes.exceptions.TimeTravelDisabledWarning,
+        )
+        return False
+
     # Second, try with snapshot to verify table supports this feature
     snapshot_sql = bigframes.session._io.bigquery.to_query(
         query_or_table=f"{table_ref.project}.{table_ref.dataset_id}.{table_ref.table_id}",
diff --git a/bigframes/session/clients.py b/bigframes/session/clients.py
index 7b53d40f74..04cd1a2ff0 100644
--- a/bigframes/session/clients.py
+++ b/bigframes/session/clients.py
@@ -31,21 +31,13 @@
 import ibis
 import pydata_google_auth
 
+import bigframes.constants
 import bigframes.version
 
 _ENV_DEFAULT_PROJECT = "GOOGLE_CLOUD_PROJECT"
 _APPLICATION_NAME = f"bigframes/{bigframes.version.__version__} ibis/{ibis.__version__}"
 _SCOPES = ["https://www.googleapis.com/auth/cloud-platform"]
 
-# Regions for which Regional Endpoints (REPs) are supported
-_REP_SUPPORTED_REGIONS = {
-    "me-central2",
-    "europe-west9",
-    "europe-west3",
-    "us-east4",
-    "us-west1",
-}
-
 
 # BigQuery is a REST API, which requires the protocol as part of the URL.
 _BIGQUERY_LOCATIONAL_ENDPOINT = "https://{location}-bigquery.googleapis.com"
@@ -129,7 +121,8 @@ def _create_bigquery_client(self):
                 api_endpoint=(
                     _BIGQUERY_REGIONAL_ENDPOINT
                     if self._location is not None
-                    and self._location.lower() in _REP_SUPPORTED_REGIONS
+                    and self._location.lower()
+                    in bigframes.constants.REP_ENABLED_BIGQUERY_LOCATIONS
                     else _BIGQUERY_LOCATIONAL_ENDPOINT
                 ).format(location=self._location),
             )
@@ -201,7 +194,8 @@ def bqstoragereadclient(self):
                     api_endpoint=(
                         _BIGQUERYSTORAGE_REGIONAL_ENDPOINT
                         if self._location is not None
-                        and self._location.lower() in _REP_SUPPORTED_REGIONS
+                        and self._location.lower()
+                        in bigframes.constants.REP_ENABLED_BIGQUERY_LOCATIONS
                         else _BIGQUERYSTORAGE_LOCATIONAL_ENDPOINT
                     ).format(location=self._location),
                 )
diff --git a/bigframes/session/executor.py b/bigframes/session/executor.py
index 8508c714fd..ab2ebed0d4 100644
--- a/bigframes/session/executor.py
+++ b/bigframes/session/executor.py
@@ -45,6 +45,7 @@
 import bigframes.core.identifiers
 import bigframes.core.nodes as nodes
 import bigframes.core.ordering as order
+import bigframes.core.rewrite as rewrites
 import bigframes.core.schema
 import bigframes.core.tree_properties as tree_properties
 import bigframes.features
@@ -186,7 +187,7 @@ def iterator_supplier():
         # Runs strict validations to ensure internal type predictions and ibis are completely in sync
         # Do not execute these validations outside of testing suite.
         if "PYTEST_CURRENT_TEST" in os.environ and len(col_id_overrides) == 0:
-            validate_result_schema(array_value, iterator.schema)
+            self._validate_result_schema(array_value, iterator.schema)
 
         return ExecuteResult(
             arrow_batches=iterator_supplier,
@@ -436,6 +437,7 @@ def _get_optimized_plan(self, node: nodes.BigFrameNode) -> nodes.BigFrameNode:
         if ENABLE_PRUNING:
             used_fields = frozenset(field.id for field in optimized_plan.fields)
             optimized_plan = optimized_plan.prune(used_fields)
+        optimized_plan = rewrites.replace_slice_ops(optimized_plan)
         return optimized_plan
 
     def _is_trivially_executable(self, array_value: bigframes.core.ArrayValue):
@@ -558,6 +560,27 @@ def _sql_as_cached_temp_table(
         query_job.result()
         return query_job.destination
 
+    def _validate_result_schema(
+        self,
+        array_value: bigframes.core.ArrayValue,
+        bq_schema: list[bigquery.schema.SchemaField],
+    ):
+        actual_schema = tuple(bq_schema)
+        ibis_schema = bigframes.core.compile.test_only_ibis_inferred_schema(
+            self._get_optimized_plan(array_value.node)
+        )
+        internal_schema = array_value.schema
+        if not bigframes.features.PANDAS_VERSIONS.is_arrow_list_dtype_usable:
+            return
+        if internal_schema.to_bigquery() != actual_schema:
+            raise ValueError(
+                f"This error should only occur while testing. BigFrames internal schema: {internal_schema.to_bigquery()} does not match actual schema: {actual_schema}"
+            )
+        if ibis_schema.to_bigquery() != actual_schema:
+            raise ValueError(
+                f"This error should only occur while testing. Ibis schema: {ibis_schema.to_bigquery()} does not match actual schema: {actual_schema}"
+            )
+
 
 def generate_head_plan(node: nodes.BigFrameNode, n: int):
     offsets_id = bigframes.core.guid.generate_guid("offsets_")
@@ -578,21 +601,3 @@ def generate_head_plan(node: nodes.BigFrameNode, n: int):
 
 def generate_row_count_plan(node: nodes.BigFrameNode):
     return nodes.RowCountNode(node)
-
-
-def validate_result_schema(
-    array_value: bigframes.core.ArrayValue, bq_schema: list[bigquery.schema.SchemaField]
-):
-    actual_schema = tuple(bq_schema)
-    ibis_schema = array_value._compiled_schema
-    internal_schema = array_value.schema
-    if not bigframes.features.PANDAS_VERSIONS.is_arrow_list_dtype_usable:
-        return
-    if internal_schema.to_bigquery() != actual_schema:
-        raise ValueError(
-            f"This error should only occur while testing. BigFrames internal schema: {internal_schema.to_bigquery()} does not match actual schema: {actual_schema}"
-        )
-    if ibis_schema.to_bigquery() != actual_schema:
-        raise ValueError(
-            f"This error should only occur while testing. Ibis schema: {ibis_schema.to_bigquery()} does not match actual schema: {actual_schema}"
-        )
diff --git a/bigframes/session/loader.py b/bigframes/session/loader.py
index 22de367804..923605627d 100644
--- a/bigframes/session/loader.py
+++ b/bigframes/session/loader.py
@@ -339,7 +339,12 @@ def read_gbq_table(
         )
 
         enable_snapshot = enable_snapshot and bf_read_gbq_table.validate_table(
-            self._bqclient, table_ref, all_columns, time_travel_timestamp, filter_str
+            self._bqclient,
+            table_ref,
+            all_columns,
+            time_travel_timestamp,
+            table.table_type,
+            filter_str,
         )
 
         # ----------------------------
diff --git a/bigframes/version.py b/bigframes/version.py
index c07f26bc6f..75f66191ca 100644
--- a/bigframes/version.py
+++ b/bigframes/version.py
@@ -12,4 +12,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-__version__ = "1.21.0"
+__version__ = "1.22.0"
diff --git a/notebooks/experimental/semantic_operators.ipynb b/notebooks/experimental/semantic_operators.ipynb
new file mode 100644
index 0000000000..bfaad69ce2
--- /dev/null
+++ b/notebooks/experimental/semantic_operators.ipynb
@@ -0,0 +1,1749 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Preparation"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import bigframes\n",
+    "import bigframes.pandas as bpd"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Enable the semantic operator experiment"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/usr/local/google/home/sycai/src/python-bigquery-dataframes/bigframes/_config/experiment_options.py:33: UserWarning: Semantic operators are still under experiments, and are subject to change in the future.\n",
+      "  warnings.warn(\n"
+     ]
+    }
+   ],
+   "source": [
+    "bigframes.options.experiments.semantic_operators = True"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Prepare the LLM model. Here we are going to use Gemini 1.5 Flash."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/usr/local/google/home/sycai/src/python-bigquery-dataframes/bigframes/pandas/__init__.py:559: DefaultLocationWarning: No explicit location is set, so using location US for the session.\n",
+      "  return global_session.get_global_session()\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "Query job 1494d834-8b38-4928-9911-ba3bb9b1228b is DONE. 0 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:1494d834-8b38-4928-9911-ba3bb9b1228b&page=queryresults\">Open Job</a>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "Query job 6caa309b-492d-4ad3-94e3-cb2b9522ef1e is DONE. 0 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:6caa309b-492d-4ad3-94e3-cb2b9522ef1e&page=queryresults\">Open Job</a>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "import bigframes.ml.llm as llm\n",
+    "gemini_model = llm.GeminiTextGenerator(model_name=llm._GEMINI_1P5_FLASH_001_ENDPOINT)\n",
+    "text_embedding_model = llm.TextEmbeddingGenerator(model_name=\"text-embedding-004\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Semantic Filtering"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "Query job d56e32bd-f06a-4086-aac2-560ed03dceca is DONE. 0 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:d56e32bd-f06a-4086-aac2-560ed03dceca&page=queryresults\">Open Job</a>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/usr/local/google/home/sycai/src/python-bigquery-dataframes/bigframes/core/__init__.py:112: PreviewWarning: Interpreting JSON column(s) as StringDtype. This behavior may change in future versions.\n",
+      "  warnings.warn(\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "Query job 0b96351f-5a48-4059-b830-1aebd330599f is DONE. 4 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:0b96351f-5a48-4059-b830-1aebd330599f&page=queryresults\">Open Job</a>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "Query job 34b2ce70-b9be-49bb-a06d-f228b0e5937c is DONE. 33 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:34b2ce70-b9be-49bb-a06d-f228b0e5937c&page=queryresults\">Open Job</a>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "Query job a4f799eb-24d6-4fcf-8661-371226788b53 is DONE. 33 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:a4f799eb-24d6-4fcf-8661-371226788b53&page=queryresults\">Open Job</a>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>country</th>\n",
+       "      <th>city</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>Germany</td>\n",
+       "      <td>Berlin</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>1 rows × 2 columns</p>\n",
+       "</div>[1 rows x 2 columns in total]"
+      ],
+      "text/plain": [
+       "   country    city\n",
+       "1  Germany  Berlin\n",
+       "\n",
+       "[1 rows x 2 columns]"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df = bpd.DataFrame({'country': ['USA', 'Germany'], 'city': ['Seattle', 'Berlin']})\n",
+    "df.semantics.filter(\"{city} is the capital of {country}\", gemini_model)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Semantic Mapping"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df = bpd.DataFrame(\n",
+    "        data={\"ingredient_1\": [\"Burger Bun\", \"Soy Bean\"], \"ingredient_2\": [\"Beef Patty\", \"Bittern\"]}\n",
+    "    )"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "Query job 04a27084-a71e-4c2d-9a73-46b768615c94 is DONE. 0 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:04a27084-a71e-4c2d-9a73-46b768615c94&page=queryresults\">Open Job</a>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/usr/local/google/home/sycai/src/python-bigquery-dataframes/bigframes/core/__init__.py:112: PreviewWarning: Interpreting JSON column(s) as StringDtype. This behavior may change in future versions.\n",
+      "  warnings.warn(\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "Query job 1a4c0d7f-0bb4-4f16-b2c0-ebb930fa6cd1 is DONE. 4 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:1a4c0d7f-0bb4-4f16-b2c0-ebb930fa6cd1&page=queryresults\">Open Job</a>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "Query job 87bf5653-d3d8-4c0a-8017-af43907465de is DONE. 34 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:87bf5653-d3d8-4c0a-8017-af43907465de&page=queryresults\">Open Job</a>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "Query job 12822e33-0ca3-4968-a685-7fcb2bdb0790 is DONE. 93 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:12822e33-0ca3-4968-a685-7fcb2bdb0790&page=queryresults\">Open Job</a>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>ingredient_1</th>\n",
+       "      <th>ingredient_2</th>\n",
+       "      <th>food</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>Burger Bun</td>\n",
+       "      <td>Beef Patty</td>\n",
+       "      <td>Burger</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>Soy Bean</td>\n",
+       "      <td>Bittern</td>\n",
+       "      <td>Tofu</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>2 rows × 3 columns</p>\n",
+       "</div>[2 rows x 3 columns in total]"
+      ],
+      "text/plain": [
+       "  ingredient_1 ingredient_2      food\n",
+       "0   Burger Bun   Beef Patty  Burger \n",
+       "\n",
+       "1     Soy Bean      Bittern    Tofu \n",
+       "\n",
+       "\n",
+       "[2 rows x 3 columns]"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df.semantics.map(\"What is the food made from {ingredient_1} and {ingredient_2}? One word only.\", output_column=\"food\", model=gemini_model)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Semantic Joining"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "cities = bpd.DataFrame({'city': ['Seattle', 'Ottawa', 'Berlin', 'Shanghai', 'New Delhi']})\n",
+    "continents = bpd.DataFrame({'continent': ['North America', 'Africa', 'Asia']})"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "Query job fcda7d35-d969-47a8-b611-0c516e2e39e8 is DONE. 0 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:fcda7d35-d969-47a8-b611-0c516e2e39e8&page=queryresults\">Open Job</a>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/usr/local/google/home/sycai/src/python-bigquery-dataframes/bigframes/core/__init__.py:112: PreviewWarning: Interpreting JSON column(s) as StringDtype. This behavior may change in future versions.\n",
+      "  warnings.warn(\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "Query job c532592c-c4ce-4f08-9397-21b1b8b1f347 is DONE. 30 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:c532592c-c4ce-4f08-9397-21b1b8b1f347&page=queryresults\">Open Job</a>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "Query job a11bd20f-7a75-462c-b6a5-64d954645e1b is DONE. 251 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:a11bd20f-7a75-462c-b6a5-64d954645e1b&page=queryresults\">Open Job</a>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "Query job 4703c2a9-ab08-46f1-a612-3354c5df391f is DONE. 144 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:4703c2a9-ab08-46f1-a612-3354c5df391f&page=queryresults\">Open Job</a>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>city</th>\n",
+       "      <th>continent</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>Seattle</td>\n",
+       "      <td>North America</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>Ottawa</td>\n",
+       "      <td>North America</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>Shanghai</td>\n",
+       "      <td>Asia</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>New Delhi</td>\n",
+       "      <td>Asia</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>4 rows × 2 columns</p>\n",
+       "</div>[4 rows x 2 columns in total]"
+      ],
+      "text/plain": [
+       "        city      continent\n",
+       "0    Seattle  North America\n",
+       "1     Ottawa  North America\n",
+       "2   Shanghai           Asia\n",
+       "3  New Delhi           Asia\n",
+       "\n",
+       "[4 rows x 2 columns]"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "cities.semantics.join(continents, \"{city} is in {continent}\", gemini_model)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Self Joins"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "animals = bpd.DataFrame({'animal': ['cow', 'cat', 'spider', 'elephant']})"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "Query job 8c1f1313-3eee-47dc-ad2d-27a49dc831dc is DONE. 0 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:8c1f1313-3eee-47dc-ad2d-27a49dc831dc&page=queryresults\">Open Job</a>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/usr/local/google/home/sycai/src/python-bigquery-dataframes/bigframes/core/__init__.py:112: PreviewWarning: Interpreting JSON column(s) as StringDtype. This behavior may change in future versions.\n",
+      "  warnings.warn(\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "Query job 08dda435-13bd-49d0-a941-1cf91a9a1c96 is DONE. 32 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:08dda435-13bd-49d0-a941-1cf91a9a1c96&page=queryresults\">Open Job</a>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "Query job abf33f67-0056-499b-b7fe-583391c6bc02 is DONE. 266 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:abf33f67-0056-499b-b7fe-583391c6bc02&page=queryresults\">Open Job</a>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "Query job 74249b99-8975-4fc4-b599-1b682edf8aeb is DONE. 180 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:74249b99-8975-4fc4-b599-1b682edf8aeb&page=queryresults\">Open Job</a>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>animal_left</th>\n",
+       "      <th>animal_right</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>cow</td>\n",
+       "      <td>cat</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>cow</td>\n",
+       "      <td>spider</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>cat</td>\n",
+       "      <td>spider</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>elephant</td>\n",
+       "      <td>cow</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>elephant</td>\n",
+       "      <td>cat</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>elephant</td>\n",
+       "      <td>spider</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>6 rows × 2 columns</p>\n",
+       "</div>[6 rows x 2 columns in total]"
+      ],
+      "text/plain": [
+       "  animal_left animal_right\n",
+       "0         cow          cat\n",
+       "1         cow       spider\n",
+       "2         cat       spider\n",
+       "3    elephant          cow\n",
+       "4    elephant          cat\n",
+       "5    elephant       spider\n",
+       "\n",
+       "[6 rows x 2 columns]"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "animals.semantics.join(animals, \"{animal_left} generally weighs heavier than {animal_right}\", gemini_model)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Semantic Search"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "Query job 8be41631-537e-4b73-b3c8-1cad09dffb95 is DONE. 0 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:8be41631-537e-4b73-b3c8-1cad09dffb95&page=queryresults\">Open Job</a>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>creatures</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>salmon</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>sea urchin</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>baboons</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>frog</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>chimpanzee</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>5 rows × 1 columns</p>\n",
+       "</div>[5 rows x 1 columns in total]"
+      ],
+      "text/plain": [
+       "    creatures\n",
+       "0      salmon\n",
+       "1  sea urchin\n",
+       "2     baboons\n",
+       "3        frog\n",
+       "4  chimpanzee\n",
+       "\n",
+       "[5 rows x 1 columns]"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df = bpd.DataFrame({\"creatures\": [\"salmon\", \"sea urchin\", \"baboons\", \"frog\", \"chimpanzee\"]})\n",
+    "df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "Query job 56d5f17f-f64a-46ca-8d30-74f8e2ad5dec is DONE. 0 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:56d5f17f-f64a-46ca-8d30-74f8e2ad5dec&page=queryresults\">Open Job</a>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/usr/local/google/home/sycai/src/python-bigquery-dataframes/bigframes/core/__init__.py:112: PreviewWarning: Interpreting JSON column(s) as StringDtype. This behavior may change in future versions.\n",
+      "  warnings.warn(\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "Query job fe75b64a-41a3-4675-ae1e-d2db6b2270d3 is DONE. 10 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:fe75b64a-41a3-4675-ae1e-d2db6b2270d3&page=queryresults\">Open Job</a>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "Query job 9f06c24e-d931-4e59-a444-1a6013c43290 is DONE. 30.9 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:9f06c24e-d931-4e59-a444-1a6013c43290&page=queryresults\">Open Job</a>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "Query job 627b8206-b3f9-4c25-a5d9-dde7c0042a4d is DONE. 0 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:627b8206-b3f9-4c25-a5d9-dde7c0042a4d&page=queryresults\">Open Job</a>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/usr/local/google/home/sycai/src/python-bigquery-dataframes/bigframes/core/__init__.py:112: PreviewWarning: Interpreting JSON column(s) as StringDtype. This behavior may change in future versions.\n",
+      "  warnings.warn(\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "Query job d01597bb-30ef-495f-be5d-c9fb16d4c112 is DONE. 2 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:d01597bb-30ef-495f-be5d-c9fb16d4c112&page=queryresults\">Open Job</a>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "Query job bbc67bc3-830d-4ede-829d-16d4829dec33 is RUNNING. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:bbc67bc3-830d-4ede-829d-16d4829dec33&page=queryresults\">Open Job</a>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "Query job 0c844655-b7d9-494b-8073-925b4e0743ce is DONE. 37.2 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:0c844655-b7d9-494b-8073-925b4e0743ce&page=queryresults\">Open Job</a>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "Query job 1993f0be-bfc2-4dad-ba85-92f5bba44945 is DONE. 0 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:1993f0be-bfc2-4dad-ba85-92f5bba44945&page=queryresults\">Open Job</a>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>creatures</th>\n",
+       "      <th>similarity score</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>baboons</td>\n",
+       "      <td>0.773411</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>chimpanzee</td>\n",
+       "      <td>0.781101</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>2 rows × 2 columns</p>\n",
+       "</div>[2 rows x 2 columns in total]"
+      ],
+      "text/plain": [
+       "    creatures  similarity score\n",
+       "2     baboons          0.773411\n",
+       "4  chimpanzee          0.781101\n",
+       "\n",
+       "[2 rows x 2 columns]"
+      ]
+     },
+     "execution_count": 12,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df.semantics.search(\"creatures\", \"monkey\", top_k = 2, model = text_embedding_model, score_column='similarity score')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Semantic Similarity Join"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df1 = bpd.DataFrame({'animal': ['monkey', 'spider', 'salmon', 'giraffe', 'sparrow']})\n",
+    "df2 = bpd.DataFrame({'animal': ['scorpion', 'baboon', 'owl', 'elephant', 'tuna']})"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "Query job 222a9dcb-2389-4ad3-a1e6-c2b197f3a409 is DONE. 0 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:222a9dcb-2389-4ad3-a1e6-c2b197f3a409&page=queryresults\">Open Job</a>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/usr/local/google/home/sycai/src/python-bigquery-dataframes/bigframes/core/__init__.py:112: PreviewWarning: Interpreting JSON column(s) as StringDtype. This behavior may change in future versions.\n",
+      "  warnings.warn(\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "Query job 24afcd9d-6be5-44d9-aa89-6fbe71f5e9a7 is DONE. 10 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:24afcd9d-6be5-44d9-aa89-6fbe71f5e9a7&page=queryresults\">Open Job</a>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "Query job 6bc36226-7bbb-4954-b042-044e9fd98a47 is DONE. 30.8 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:6bc36226-7bbb-4954-b042-044e9fd98a47&page=queryresults\">Open Job</a>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "Query job f247f63d-1d8a-4f81-a833-628143fda463 is DONE. 0 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:f247f63d-1d8a-4f81-a833-628143fda463&page=queryresults\">Open Job</a>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/usr/local/google/home/sycai/src/python-bigquery-dataframes/bigframes/core/__init__.py:112: PreviewWarning: Interpreting JSON column(s) as StringDtype. This behavior may change in future versions.\n",
+      "  warnings.warn(\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "Query job 9bec5633-8ba1-4453-b9c7-6cb555d3c60e is DONE. 10 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:9bec5633-8ba1-4453-b9c7-6cb555d3c60e&page=queryresults\">Open Job</a>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "Query job d7df7004-b499-436b-898c-15abee330d9e is RUNNING. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:d7df7004-b499-436b-898c-15abee330d9e&page=queryresults\">Open Job</a>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "Query job 9012c011-b4e7-4fba-85a6-e439fe3c32d3 is DONE. 61.5 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:9012c011-b4e7-4fba-85a6-e439fe3c32d3&page=queryresults\">Open Job</a>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "Query job bb9987eb-aa37-42ca-bcf1-1ea575a147a8 is DONE. 0 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:bb9987eb-aa37-42ca-bcf1-1ea575a147a8&page=queryresults\">Open Job</a>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>animal</th>\n",
+       "      <th>animal_1</th>\n",
+       "      <th>distance</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>monkey</td>\n",
+       "      <td>baboon</td>\n",
+       "      <td>0.747665</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>spider</td>\n",
+       "      <td>scorpion</td>\n",
+       "      <td>0.890909</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>salmon</td>\n",
+       "      <td>tuna</td>\n",
+       "      <td>0.925461</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>giraffe</td>\n",
+       "      <td>elephant</td>\n",
+       "      <td>0.887858</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>sparrow</td>\n",
+       "      <td>owl</td>\n",
+       "      <td>0.932959</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>5 rows × 3 columns</p>\n",
+       "</div>[5 rows x 3 columns in total]"
+      ],
+      "text/plain": [
+       "    animal  animal_1  distance\n",
+       "0   monkey    baboon  0.747665\n",
+       "1   spider  scorpion  0.890909\n",
+       "2   salmon      tuna  0.925461\n",
+       "3  giraffe  elephant  0.887858\n",
+       "4  sparrow       owl  0.932959\n",
+       "\n",
+       "[5 rows x 3 columns]"
+      ]
+     },
+     "execution_count": 14,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df1.semantics.sim_join(df2, left_on='animal', right_on='animal', top_k=1, model= text_embedding_model, score_column='distance')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "Query job 46e1cbb4-2b4a-4578-b3fd-7caba80d5dcc is DONE. 0 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:46e1cbb4-2b4a-4578-b3fd-7caba80d5dcc&page=queryresults\">Open Job</a>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/usr/local/google/home/sycai/src/python-bigquery-dataframes/bigframes/core/__init__.py:112: PreviewWarning: Interpreting JSON column(s) as StringDtype. This behavior may change in future versions.\n",
+      "  warnings.warn(\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "Query job 356840f4-840c-41fc-9c9e-8bbaf9ffa02c is DONE. 4 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:356840f4-840c-41fc-9c9e-8bbaf9ffa02c&page=queryresults\">Open Job</a>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "Query job 428d070e-fd5c-4b2f-b651-b3de9836c02a is DONE. 12.3 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:428d070e-fd5c-4b2f-b651-b3de9836c02a&page=queryresults\">Open Job</a>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "Query job bf566989-7bd4-4560-952e-34d007ee1e7e is DONE. 0 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:bf566989-7bd4-4560-952e-34d007ee1e7e&page=queryresults\">Open Job</a>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/usr/local/google/home/sycai/src/python-bigquery-dataframes/bigframes/core/__init__.py:112: PreviewWarning: Interpreting JSON column(s) as StringDtype. This behavior may change in future versions.\n",
+      "  warnings.warn(\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "Query job 92818df7-d4e9-4cea-884e-304126e78b71 is DONE. 4 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:92818df7-d4e9-4cea-884e-304126e78b71&page=queryresults\">Open Job</a>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "Query job e8619330-7b91-4ae2-99b3-f4386de4c512 is RUNNING. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:e8619330-7b91-4ae2-99b3-f4386de4c512&page=queryresults\">Open Job</a>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "Query job be89eca7-462a-4b1c-95ed-0b0c031aaaac is DONE. 24.6 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:be89eca7-462a-4b1c-95ed-0b0c031aaaac&page=queryresults\">Open Job</a>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "Query job 40dcd8ed-1262-459a-b6b3-7471722da078 is DONE. 0 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:40dcd8ed-1262-459a-b6b3-7471722da078&page=queryresults\">Open Job</a>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>animal</th>\n",
+       "      <th>animal_1</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>monkey</td>\n",
+       "      <td>baboon</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>spider</td>\n",
+       "      <td>scorpion</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>2 rows × 2 columns</p>\n",
+       "</div>[2 rows x 2 columns in total]"
+      ],
+      "text/plain": [
+       "   animal  animal_1\n",
+       "0  monkey    baboon\n",
+       "1  spider  scorpion\n",
+       "\n",
+       "[2 rows x 2 columns]"
+      ]
+     },
+     "execution_count": 15,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df1 = bpd.DataFrame({'animal': ['monkey', 'spider']})\n",
+    "df2 = bpd.DataFrame({'animal': ['scorpion', 'baboon']})\n",
+    "\n",
+    "df1.semantics.sim_join(df2, left_on='animal', right_on='animal', top_k=1, model= text_embedding_model)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Semantic Aggregation"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "Query job ea1e5180-a13a-4ec7-a6b4-8eca042ac9a6 is DONE. 0 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:ea1e5180-a13a-4ec7-a6b4-8eca042ac9a6&page=queryresults\">Open Job</a>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Movies</th>\n",
+       "      <th>Year</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>Titanic</td>\n",
+       "      <td>1997</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>The Wolf of Wall Street</td>\n",
+       "      <td>2013</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>Killers of the Flower Moon</td>\n",
+       "      <td>2023</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>The Revenant</td>\n",
+       "      <td>2015</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>Inception</td>\n",
+       "      <td>2010</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>Shuttle Island</td>\n",
+       "      <td>2010</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>The Great Gatsby</td>\n",
+       "      <td>2013</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>7 rows × 2 columns</p>\n",
+       "</div>[7 rows x 2 columns in total]"
+      ],
+      "text/plain": [
+       "                       Movies  Year\n",
+       "0                     Titanic  1997\n",
+       "1     The Wolf of Wall Street  2013\n",
+       "2  Killers of the Flower Moon  2023\n",
+       "3                The Revenant  2015\n",
+       "4                   Inception  2010\n",
+       "5              Shuttle Island  2010\n",
+       "6            The Great Gatsby  2013\n",
+       "\n",
+       "[7 rows x 2 columns]"
+      ]
+     },
+     "execution_count": 16,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df = bpd.DataFrame({\n",
+    "    \"Movies\": [\n",
+    "        \"Titanic\",\n",
+    "        \"The Wolf of Wall Street\",\n",
+    "        \"Killers of the Flower Moon\",\n",
+    "        \"The Revenant\",\n",
+    "        \"Inception\",\n",
+    "        \"Shuttle Island\",\n",
+    "        \"The Great Gatsby\",\n",
+    "    ],\n",
+    "    \"Year\": [1997, 2013, 2023, 2015, 2010, 2010, 2013],\n",
+    "})\n",
+    "df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "Query job 274df4fb-06ee-49d8-8e7f-2c7eaee3440f is DONE. 0 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:274df4fb-06ee-49d8-8e7f-2c7eaee3440f&page=queryresults\">Open Job</a>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/usr/local/google/home/sycai/src/python-bigquery-dataframes/bigframes/core/__init__.py:112: PreviewWarning: Interpreting JSON column(s) as StringDtype. This behavior may change in future versions.\n",
+      "  warnings.warn(\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "Query job 878b41c8-6428-4f05-aa0b-dcba14761ac0 is DONE. 2 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:878b41c8-6428-4f05-aa0b-dcba14761ac0&page=queryresults\">Open Job</a>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "Query job 5a909cb7-fcbf-43d5-aac2-79b7ba466dd3 is DONE. 16 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:5a909cb7-fcbf-43d5-aac2-79b7ba466dd3&page=queryresults\">Open Job</a>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "Query job 10f97d30-101c-447c-876c-d329d3a6d89b is DONE. 28 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:10f97d30-101c-447c-876c-d329d3a6d89b&page=queryresults\">Open Job</a>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "Query job b1b94183-6ad4-4014-94da-7d585d45bc6d is DONE. 28 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:b1b94183-6ad4-4014-94da-7d585d45bc6d&page=queryresults\">Open Job</a>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/plain": [
+       "0    Leonardo \n",
+       "\n",
+       "Name: Movies, dtype: string"
+      ]
+     },
+     "execution_count": 17,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "agg_df = df.semantics.agg(\"Find the shared first name of actors in {Movies}. One word answer.\", model=gemini_model)\n",
+    "agg_df"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Semantic Cluster"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "Query job e52f886a-1f87-45fc-990d-e66c23417a66 is DONE. 0 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:e52f886a-1f87-45fc-990d-e66c23417a66&page=queryresults\">Open Job</a>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/usr/local/google/home/sycai/src/python-bigquery-dataframes/bigframes/core/__init__.py:112: PreviewWarning: Interpreting JSON column(s) as StringDtype. This behavior may change in future versions.\n",
+      "  warnings.warn(\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "Query job 82ac6302-78a1-41f7-8665-769887a47d42 is DONE. 10 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:82ac6302-78a1-41f7-8665-769887a47d42&page=queryresults\">Open Job</a>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "Query job cd42b04e-e9ea-4b56-a891-78608dbef215 is DONE. 30.8 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:cd42b04e-e9ea-4b56-a891-78608dbef215&page=queryresults\">Open Job</a>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "Query job dced08f2-12ee-4b52-b5b2-b7dd177dae12 is DONE. 30.7 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:dced08f2-12ee-4b52-b5b2-b7dd177dae12&page=queryresults\">Open Job</a>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "Query job 5cbdac9b-f5dd-488c-8262-7a96f8501faa is DONE. 138.9 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:5cbdac9b-f5dd-488c-8262-7a96f8501faa&page=queryresults\">Open Job</a>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "Query job e30ff06e-b561-4ea2-b150-8cd91d4f827c is DONE. 80 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:e30ff06e-b561-4ea2-b150-8cd91d4f827c&page=queryresults\">Open Job</a>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "Query job 278d8a51-711a-42fe-86aa-408b2b44d4c7 is DONE. 170 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:278d8a51-711a-42fe-86aa-408b2b44d4c7&page=queryresults\">Open Job</a>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Product</th>\n",
+       "      <th>Cluster ID</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>Smartphone</td>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>Laptop</td>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>Coffee Maker</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>T-shirt</td>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>Jeans</td>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>5 rows × 2 columns</p>\n",
+       "</div>[5 rows x 2 columns in total]"
+      ],
+      "text/plain": [
+       "        Product  Cluster ID\n",
+       "0    Smartphone           3\n",
+       "1        Laptop           3\n",
+       "2  Coffee Maker           1\n",
+       "3       T-shirt           2\n",
+       "4         Jeans           2\n",
+       "\n",
+       "[5 rows x 2 columns]"
+      ]
+     },
+     "execution_count": 19,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df = bpd.DataFrame({'Product': ['Smartphone', 'Laptop', 'Coffee Maker', 'T-shirt', 'Jeans']})\n",
+    "\n",
+    "df.semantics.cluster_by(column='Product', output_column='Cluster ID', model=text_embedding_model, n_clusters=3)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/notebooks/ml/bq_dataframes_ml_cross_validation.ipynb b/notebooks/ml/bq_dataframes_ml_cross_validation.ipynb
index 824d911aff..4bfdcc24aa 100644
--- a/notebooks/ml/bq_dataframes_ml_cross_validation.ipynb
+++ b/notebooks/ml/bq_dataframes_ml_cross_validation.ipynb
@@ -272,7 +272,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## 2.1 Define KFold class and Train/Test for Each Fold (Mauanl Approach)"
+    "## 2.1 Define KFold class and Train/Test for Each Fold (Manual Approach)"
    ]
   },
   {
diff --git a/noxfile.py b/noxfile.py
index 714c8333bd..92f8acad7f 100644
--- a/noxfile.py
+++ b/noxfile.py
@@ -23,7 +23,6 @@
 import re
 import shutil
 import time
-import traceback
 from typing import Dict, List
 import warnings
 
@@ -794,10 +793,6 @@ def notebook(session: nox.Session):
             *notebooks,
         )
 
-        # Shared flag using multiprocessing.Manager() to indicate if
-        # any process encounters an error. This flag may be updated
-        # across different processes.
-        error_flag = multiprocessing.Manager().Value("i", False)
         processes = []
         for notebook in notebooks:
             args = (
@@ -808,8 +803,8 @@ def notebook(session: nox.Session):
             )
             if multi_process_mode:
                 process = multiprocessing.Process(
-                    target=_run_process,
-                    args=(session, args, error_flag),
+                    target=session.run,
+                    args=args,
                 )
                 process.start()
                 processes.append(process)
@@ -819,10 +814,6 @@ def notebook(session: nox.Session):
             else:
                 session.run(*args)
 
-        for process in processes:
-            process.join()
-
-        processes = []
         for notebook, regions in notebooks_reg.items():
             for region in regions:
                 region_args = (
@@ -834,8 +825,8 @@ def notebook(session: nox.Session):
                 )
                 if multi_process_mode:
                     process = multiprocessing.Process(
-                        target=_run_process,
-                        args=(session, region_args, error_flag),
+                        target=session.run,
+                        args=region_args,
                     )
                     process.start()
                     processes.append(process)
@@ -847,11 +838,6 @@ def notebook(session: nox.Session):
 
         for process in processes:
             process.join()
-
-        # Check the shared error flag and raise an exception if any process
-        # reported an error
-        if error_flag.value:
-            raise Exception("Errors occurred in one or more subprocesses.")
     finally:
         # Prevent our notebook changes from getting checked in to git
         # accidentally.
@@ -868,15 +854,6 @@ def notebook(session: nox.Session):
         )
 
 
-def _run_process(session: nox.Session, args, error_flag):
-    try:
-        session.run(*args)
-    except Exception:
-        traceback_str = traceback.format_exc()
-        print(traceback_str)
-        error_flag.value = True
-
-
 @nox.session(python=DEFAULT_PYTHON_VERSION)
 def benchmark(session: nox.Session):
     session.install("-e", ".[all]")
diff --git a/scripts/run_and_publish_benchmark.py b/scripts/run_and_publish_benchmark.py
index a42301cb13..8b55493770 100644
--- a/scripts/run_and_publish_benchmark.py
+++ b/scripts/run_and_publish_benchmark.py
@@ -17,10 +17,11 @@
 import json
 import os
 import pathlib
+import re
 import subprocess
 import sys
 import tempfile
-from typing import Dict, List, Union
+from typing import Dict, List, Tuple, Union
 
 import numpy as np
 import pandas as pd
@@ -30,7 +31,7 @@
 CURRENT_DIRECTORY = pathlib.Path(__file__).parent.absolute()
 
 
-def run_benchmark_subprocess(args, log_env_name_var, filename=None, region=None):
+def run_benchmark_subprocess(args, log_env_name_var, file_path=None, region=None):
     """
     Runs a benchmark subprocess with configured environment variables. Adjusts PYTHONPATH,
     sets region-specific BigQuery location, and logs environment variables.
@@ -48,10 +49,37 @@ def run_benchmark_subprocess(args, log_env_name_var, filename=None, region=None)
     if region:
         env["BIGQUERY_LOCATION"] = region
     env[LOGGING_NAME_ENV_VAR] = log_env_name_var
-    subprocess.run(args, env=env, check=True)
-
-
-def collect_benchmark_result(benchmark_path: str, iterations: int) -> pd.DataFrame:
+    try:
+        if file_path:  # Notebooks
+            duration_pattern = re.compile(r"(\d+\.\d+)s call")
+            process = subprocess.Popen(args, env=env, stdout=subprocess.PIPE, text=True)
+            assert process.stdout is not None
+            for line in process.stdout:
+                print(line, end="")
+                match = duration_pattern.search(line)
+                if match:
+                    duration = match.group(1)
+                    with open(f"{file_path}.local_exec_time_seconds", "w") as f:
+                        f.write(f"{duration}\n")
+            process.wait()
+            if process.returncode != 0:
+                raise subprocess.CalledProcessError(process.returncode, args)
+        else:  # Benchmarks
+            file_path = log_env_name_var
+            subprocess.run(args, env=env, check=True)
+    except Exception:
+        directory = pathlib.Path(file_path).parent
+        for file in directory.glob(f"{pathlib.Path(file_path).name}.*"):
+            if file.suffix != ".backup":
+                print(f"Benchmark failed, deleting: {file}")
+                file.unlink()
+        error_file = directory / f"{pathlib.Path(file_path).name}.error"
+        error_file.touch()
+
+
+def collect_benchmark_result(
+    benchmark_path: str, iterations: int
+) -> Tuple[pd.DataFrame, Union[str, None]]:
     """Generate a DataFrame report on HTTP queries, bytes processed, slot time and execution time from log files."""
     path = pathlib.Path(benchmark_path)
     try:
@@ -59,25 +87,18 @@ def collect_benchmark_result(benchmark_path: str, iterations: int) -> pd.DataFra
         bytes_files = sorted(path.rglob("*.bytesprocessed"))
         millis_files = sorted(path.rglob("*.slotmillis"))
         bq_seconds_files = sorted(path.rglob("*.bq_exec_time_seconds"))
-
         local_seconds_files = sorted(path.rglob("*.local_exec_time_seconds"))
-        has_local_seconds = len(local_seconds_files) > 0
-
-        if has_local_seconds:
-            if not (
-                len(bytes_files)
-                == len(millis_files)
-                == len(local_seconds_files)
-                == len(bq_seconds_files)
-            ):
-                raise ValueError(
-                    "Mismatch in the number of report files for bytes, millis, and seconds."
-                )
-        else:
-            if not (len(bytes_files) == len(millis_files) == len(bq_seconds_files)):
-                raise ValueError(
-                    "Mismatch in the number of report files for bytes, millis, and seconds."
-                )
+        error_files = sorted(path.rglob("*.error"))
+
+        if not (
+            len(bytes_files)
+            == len(millis_files)
+            == len(local_seconds_files)
+            == len(bq_seconds_files)
+        ):
+            raise ValueError(
+                "Mismatch in the number of report files for bytes, millis, and seconds."
+            )
 
         for idx in range(len(bytes_files)):
             bytes_file = bytes_files[idx]
@@ -92,12 +113,11 @@ def collect_benchmark_result(benchmark_path: str, iterations: int) -> pd.DataFra
                     "File name mismatch among bytes, millis, and seconds reports."
                 )
 
-            if has_local_seconds:
-                local_seconds_file = local_seconds_files[idx]
-                if filename != local_seconds_file.relative_to(path).with_suffix(""):
-                    raise ValueError(
-                        "File name mismatch among bytes, millis, and seconds reports."
-                    )
+            local_seconds_file = local_seconds_files[idx]
+            if filename != local_seconds_file.relative_to(path).with_suffix(""):
+                raise ValueError(
+                    "File name mismatch among bytes, millis, and seconds reports."
+                )
 
             with open(bytes_file, "r") as file:
                 lines = file.read().splitlines()
@@ -108,12 +128,9 @@ def collect_benchmark_result(benchmark_path: str, iterations: int) -> pd.DataFra
                 lines = file.read().splitlines()
                 total_slot_millis = sum(int(line) for line in lines) / iterations
 
-            if has_local_seconds:
-                with open(local_seconds_file, "r") as file:
-                    lines = file.read().splitlines()
-                    local_seconds = sum(float(line) for line in lines) / iterations
-            else:
-                local_seconds = None
+            with open(local_seconds_file, "r") as file:
+                lines = file.read().splitlines()
+                local_seconds = sum(float(line) for line in lines) / iterations
 
             with open(bq_seconds_file, "r") as file:
                 lines = file.read().splitlines()
@@ -132,6 +149,7 @@ def collect_benchmark_result(benchmark_path: str, iterations: int) -> pd.DataFra
             path.rglob("*.slotmillis"),
             path.rglob("*.local_exec_time_seconds"),
             path.rglob("*.bq_exec_time_seconds"),
+            path.rglob("*.error"),
         ):
             for log_file in files_to_remove:
                 log_file.unlink()
@@ -170,13 +188,19 @@ def collect_benchmark_result(benchmark_path: str, iterations: int) -> pd.DataFra
             f" bigquery execution time: {round(row['BigQuery_Execution_Time_Sec'], 1)} seconds"
         )
 
-    geometric_mean_queries = geometric_mean(benchmark_metrics["Query_Count"])
-    geometric_mean_bytes = geometric_mean(benchmark_metrics["Bytes_Processed"])
-    geometric_mean_slot_millis = geometric_mean(benchmark_metrics["Slot_Millis"])
-    geometric_mean_local_seconds = geometric_mean(
+    geometric_mean_queries = geometric_mean_excluding_zeros(
+        benchmark_metrics["Query_Count"]
+    )
+    geometric_mean_bytes = geometric_mean_excluding_zeros(
+        benchmark_metrics["Bytes_Processed"]
+    )
+    geometric_mean_slot_millis = geometric_mean_excluding_zeros(
+        benchmark_metrics["Slot_Millis"]
+    )
+    geometric_mean_local_seconds = geometric_mean_excluding_zeros(
         benchmark_metrics["Local_Execution_Time_Sec"]
     )
-    geometric_mean_bq_seconds = geometric_mean(
+    geometric_mean_bq_seconds = geometric_mean_excluding_zeros(
         benchmark_metrics["BigQuery_Execution_Time_Sec"]
     )
 
@@ -188,15 +212,33 @@ def collect_benchmark_result(benchmark_path: str, iterations: int) -> pd.DataFra
         f"Geometric mean of BigQuery execution time: {geometric_mean_bq_seconds} seconds---"
     )
 
-    return benchmark_metrics.reset_index().rename(columns={"index": "Benchmark_Name"})
+    error_message = (
+        "\n"
+        + "\n".join(
+            [
+                f"Failed: {error_file.relative_to(path).with_suffix('')}"
+                for error_file in error_files
+            ]
+        )
+        if error_files
+        else None
+    )
+    return (
+        benchmark_metrics.reset_index().rename(columns={"index": "Benchmark_Name"}),
+        error_message,
+    )
 
 
-def geometric_mean(data):
+def geometric_mean_excluding_zeros(data):
     """
-    Calculate the geometric mean of a dataset, rounding the result to one decimal place.
-    Returns NaN if the dataset is empty or contains only NaN values.
+    Calculate the geometric mean of a dataset, excluding any zero values.
+    Returns NaN if the dataset is empty, contains only NaN values, or if
+    all non-NaN values are zeros.
+
+    The result is rounded to one decimal place.
     """
     data = data.dropna()
+    data = data[data != 0]
     if len(data) == 0:
         return np.nan
     log_data = np.log(data)
@@ -321,13 +363,15 @@ def run_notebook_benchmark(benchmark_file: str, region: str):
         "py.test",
         "--nbmake",
         "--nbmake-timeout=900",  # 15 minutes
+        "--durations=0",
+        "--color=yes",
     ]
     benchmark_args = (*pytest_command, benchmark_file)
 
     run_benchmark_subprocess(
         args=benchmark_args,
         log_env_name_var=log_env_name_var,
-        filename=export_file,
+        file_path=export_file,
         region=region,
     )
 
@@ -383,7 +427,7 @@ def main():
     args = parse_arguments()
 
     if args.publish_benchmarks:
-        benchmark_metrics = collect_benchmark_result(
+        benchmark_metrics, error_message = collect_benchmark_result(
             args.publish_benchmarks, args.iterations
         )
         # Output results to CSV without specifying a location
@@ -412,6 +456,9 @@ def main():
         # intended for local testing where the default behavior is not to publish results.
         elif project := os.getenv("GCLOUD_BENCH_PUBLISH_PROJECT", ""):
             publish_to_bigquery(benchmark_metrics, args.notebook, project)
+
+        if error_message:
+            raise Exception(error_message)
     elif args.notebook:
         run_notebook_benchmark(args.benchmark_path, args.region)
     else:
diff --git a/tests/benchmark/README.md b/tests/benchmark/README.md
index a30c36065b..e5b7585514 100644
--- a/tests/benchmark/README.md
+++ b/tests/benchmark/README.md
@@ -11,6 +11,28 @@ This section lists the benchmarks currently available, with descriptions and lin
 - **TPC-H Benchmark**: Based on the TPC-H standards, this benchmark evaluates transaction processing capabilities. It is adapted from code found in the Polars repository, specifically tailored to test and compare these capabilities. Details are available on the [Polars Benchmark GitHub repository](https://github.com/pola-rs/polars-benchmark).
 - **Notebooks**: These Jupyter notebooks showcase BigFrames' key features and patterns, and also enable performance benchmarking. Explore them at the [BigFrames Notebooks repository](https://github.com/googleapis/python-bigquery-dataframes/tree/main/notebooks).
 
+## Benchmark Configuration Using `config.jsonl` Files
+
+For each benchmark, a corresponding `config.jsonl` file exists in the same folder or its parent folder. These configuration files allow users to control various benchmark parameters without modifying the code directly. By updating the relevant `config.jsonl` file in the specific benchmark's folder, you can easily configure settings such as:
+- **benchmark_suffix**: A suffix appended to the benchmark name for identification purposes.
+- **ordered**: Controls the mode for BigFrames, specifying whether to use ordered (`true`) or unordered mode (`false`).
+- **project_id**: The Google Cloud project ID where the benchmark dataset or table is located.
+- **dataset_id**: The dataset ID for querying during the benchmark.
+- **table_id**: This is **required** for benchmarks like `dbbenchmark` that target a specific table, but is **not configurable** for benchmarks like `TPC-H`, which use multiple tables with fixed names.
+
+### Example `config.jsonl` Files
+
+#### `dbbenchmark` Example
+```jsonl
+{"benchmark_suffix": "50g_ordered", "project_id": "your-google-cloud-project", "dataset_id": "dbbenchmark", "table_id": "G1_1e9_1e2_5_0", "ordered": true}
+{"benchmark_suffix": "50g_unordered", "project_id": "your-google-cloud-project", "dataset_id": "dbbenchmark", "table_id": "G1_1e9_1e2_5_0", "ordered": false}
+```
+
+#### `TPC-H` Example
+```jsonl
+{"benchmark_suffix": "10t_unordered", "project_id":  "your-google-cloud-project", "dataset_id": "tpch_0010t", "ordered": false}
+```
+
 ## Usage Examples
 Our benchmarking process runs internally on a daily basis to continuously monitor the performance of BigFrames. However, there are occasions when you might need to conduct benchmarking locally to test specific changes or new features.
 
diff --git a/tests/benchmark/db_benchmark/groupby/config.jsonl b/tests/benchmark/db_benchmark/groupby/config.jsonl
index dd881e76ac..b6f23ebbf7 100644
--- a/tests/benchmark/db_benchmark/groupby/config.jsonl
+++ b/tests/benchmark/db_benchmark/groupby/config.jsonl
@@ -1,2 +1,2 @@
-{"benchmark_suffix": "50g_ordered", "table_id": "G1_1e9_1e2_5_0", "ordered": true}
-{"benchmark_suffix": "50g_unordered", "table_id": "G1_1e9_1e2_5_0", "ordered": false}
+{"benchmark_suffix": "50g_ordered", "project_id": "bigframes-dev-perf", "dataset_id": "dbbenchmark", "table_id": "G1_1e9_1e2_5_0", "ordered": true}
+{"benchmark_suffix": "50g_unordered", "project_id": "bigframes-dev-perf", "dataset_id": "dbbenchmark", "table_id": "G1_1e9_1e2_5_0", "ordered": false}
diff --git a/tests/benchmark/db_benchmark/groupby/q1.py b/tests/benchmark/db_benchmark/groupby/q1.py
index 02a709def9..dc86817908 100644
--- a/tests/benchmark/db_benchmark/groupby/q1.py
+++ b/tests/benchmark/db_benchmark/groupby/q1.py
@@ -18,9 +18,21 @@
 import bigframes_vendored.db_benchmark.groupby_queries as vendored_dbbenchmark_groupby_queries
 
 if __name__ == "__main__":
-    table_id, session, suffix = utils.get_dbbenchmark_configuration()
+    (
+        project_id,
+        dataset_id,
+        table_id,
+        session,
+        suffix,
+    ) = utils.get_configuration(include_table_id=True)
     current_path = pathlib.Path(__file__).absolute()
 
     utils.get_execution_time(
-        vendored_dbbenchmark_groupby_queries.q1, current_path, suffix, table_id, session
+        vendored_dbbenchmark_groupby_queries.q1,
+        current_path,
+        suffix,
+        project_id,
+        dataset_id,
+        table_id,
+        session,
     )
diff --git a/tests/benchmark/db_benchmark/groupby/q10.py b/tests/benchmark/db_benchmark/groupby/q10.py
index 0cd195b04a..99d28e2f9a 100644
--- a/tests/benchmark/db_benchmark/groupby/q10.py
+++ b/tests/benchmark/db_benchmark/groupby/q10.py
@@ -18,13 +18,21 @@
 import bigframes_vendored.db_benchmark.groupby_queries as vendored_dbbenchmark_groupby_queries
 
 if __name__ == "__main__":
-    table_id, session, suffix = utils.get_dbbenchmark_configuration()
+    (
+        project_id,
+        dataset_id,
+        table_id,
+        session,
+        suffix,
+    ) = utils.get_configuration(include_table_id=True)
     current_path = pathlib.Path(__file__).absolute()
 
     utils.get_execution_time(
         vendored_dbbenchmark_groupby_queries.q10,
         current_path,
         suffix,
+        project_id,
+        dataset_id,
         table_id,
         session,
     )
diff --git a/tests/benchmark/db_benchmark/groupby/q2.py b/tests/benchmark/db_benchmark/groupby/q2.py
index 398c63e09f..b06a4189fe 100644
--- a/tests/benchmark/db_benchmark/groupby/q2.py
+++ b/tests/benchmark/db_benchmark/groupby/q2.py
@@ -18,9 +18,21 @@
 import bigframes_vendored.db_benchmark.groupby_queries as vendored_dbbenchmark_groupby_queries
 
 if __name__ == "__main__":
-    table_id, session, suffix = utils.get_dbbenchmark_configuration()
+    (
+        project_id,
+        dataset_id,
+        table_id,
+        session,
+        suffix,
+    ) = utils.get_configuration(include_table_id=True)
     current_path = pathlib.Path(__file__).absolute()
 
     utils.get_execution_time(
-        vendored_dbbenchmark_groupby_queries.q2, current_path, suffix, table_id, session
+        vendored_dbbenchmark_groupby_queries.q2,
+        current_path,
+        suffix,
+        project_id,
+        dataset_id,
+        table_id,
+        session,
     )
diff --git a/tests/benchmark/db_benchmark/groupby/q3.py b/tests/benchmark/db_benchmark/groupby/q3.py
index 9863b969d8..d66dd7b39d 100644
--- a/tests/benchmark/db_benchmark/groupby/q3.py
+++ b/tests/benchmark/db_benchmark/groupby/q3.py
@@ -18,9 +18,21 @@
 import bigframes_vendored.db_benchmark.groupby_queries as vendored_dbbenchmark_groupby_queries
 
 if __name__ == "__main__":
-    table_id, session, suffix = utils.get_dbbenchmark_configuration()
+    (
+        project_id,
+        dataset_id,
+        table_id,
+        session,
+        suffix,
+    ) = utils.get_configuration(include_table_id=True)
     current_path = pathlib.Path(__file__).absolute()
 
     utils.get_execution_time(
-        vendored_dbbenchmark_groupby_queries.q3, current_path, suffix, table_id, session
+        vendored_dbbenchmark_groupby_queries.q3,
+        current_path,
+        suffix,
+        project_id,
+        dataset_id,
+        table_id,
+        session,
     )
diff --git a/tests/benchmark/db_benchmark/groupby/q4.py b/tests/benchmark/db_benchmark/groupby/q4.py
index ce29e3ceaf..6c72069a53 100644
--- a/tests/benchmark/db_benchmark/groupby/q4.py
+++ b/tests/benchmark/db_benchmark/groupby/q4.py
@@ -18,9 +18,21 @@
 import bigframes_vendored.db_benchmark.groupby_queries as vendored_dbbenchmark_groupby_queries
 
 if __name__ == "__main__":
-    table_id, session, suffix = utils.get_dbbenchmark_configuration()
+    (
+        project_id,
+        dataset_id,
+        table_id,
+        session,
+        suffix,
+    ) = utils.get_configuration(include_table_id=True)
     current_path = pathlib.Path(__file__).absolute()
 
     utils.get_execution_time(
-        vendored_dbbenchmark_groupby_queries.q4, current_path, suffix, table_id, session
+        vendored_dbbenchmark_groupby_queries.q4,
+        current_path,
+        suffix,
+        project_id,
+        dataset_id,
+        table_id,
+        session,
     )
diff --git a/tests/benchmark/db_benchmark/groupby/q5.py b/tests/benchmark/db_benchmark/groupby/q5.py
index 27a4a52a8f..3e6db9783e 100644
--- a/tests/benchmark/db_benchmark/groupby/q5.py
+++ b/tests/benchmark/db_benchmark/groupby/q5.py
@@ -18,9 +18,21 @@
 import bigframes_vendored.db_benchmark.groupby_queries as vendored_dbbenchmark_groupby_queries
 
 if __name__ == "__main__":
-    table_id, session, suffix = utils.get_dbbenchmark_configuration()
+    (
+        project_id,
+        dataset_id,
+        table_id,
+        session,
+        suffix,
+    ) = utils.get_configuration(include_table_id=True)
     current_path = pathlib.Path(__file__).absolute()
 
     utils.get_execution_time(
-        vendored_dbbenchmark_groupby_queries.q5, current_path, suffix, table_id, session
+        vendored_dbbenchmark_groupby_queries.q5,
+        current_path,
+        suffix,
+        project_id,
+        dataset_id,
+        table_id,
+        session,
     )
diff --git a/tests/benchmark/db_benchmark/groupby/q6.py b/tests/benchmark/db_benchmark/groupby/q6.py
index f1befc6840..f763280b5b 100644
--- a/tests/benchmark/db_benchmark/groupby/q6.py
+++ b/tests/benchmark/db_benchmark/groupby/q6.py
@@ -18,9 +18,21 @@
 import bigframes_vendored.db_benchmark.groupby_queries as vendored_dbbenchmark_groupby_queries
 
 if __name__ == "__main__":
-    table_id, session, suffix = utils.get_dbbenchmark_configuration()
+    (
+        project_id,
+        dataset_id,
+        table_id,
+        session,
+        suffix,
+    ) = utils.get_configuration(include_table_id=True)
     current_path = pathlib.Path(__file__).absolute()
 
     utils.get_execution_time(
-        vendored_dbbenchmark_groupby_queries.q6, current_path, suffix, table_id, session
+        vendored_dbbenchmark_groupby_queries.q6,
+        current_path,
+        suffix,
+        project_id,
+        dataset_id,
+        table_id,
+        session,
     )
diff --git a/tests/benchmark/db_benchmark/groupby/q7.py b/tests/benchmark/db_benchmark/groupby/q7.py
index c0791612e8..4e7f2d58b6 100644
--- a/tests/benchmark/db_benchmark/groupby/q7.py
+++ b/tests/benchmark/db_benchmark/groupby/q7.py
@@ -18,9 +18,21 @@
 import bigframes_vendored.db_benchmark.groupby_queries as vendored_dbbenchmark_groupby_queries
 
 if __name__ == "__main__":
-    table_id, session, suffix = utils.get_dbbenchmark_configuration()
+    (
+        project_id,
+        dataset_id,
+        table_id,
+        session,
+        suffix,
+    ) = utils.get_configuration(include_table_id=True)
     current_path = pathlib.Path(__file__).absolute()
 
     utils.get_execution_time(
-        vendored_dbbenchmark_groupby_queries.q7, current_path, suffix, table_id, session
+        vendored_dbbenchmark_groupby_queries.q7,
+        current_path,
+        suffix,
+        project_id,
+        dataset_id,
+        table_id,
+        session,
     )
diff --git a/tests/benchmark/db_benchmark/groupby/q8.py b/tests/benchmark/db_benchmark/groupby/q8.py
index 46dd8c45eb..75d5dcaa0c 100644
--- a/tests/benchmark/db_benchmark/groupby/q8.py
+++ b/tests/benchmark/db_benchmark/groupby/q8.py
@@ -18,9 +18,21 @@
 import bigframes_vendored.db_benchmark.groupby_queries as vendored_dbbenchmark_groupby_queries
 
 if __name__ == "__main__":
-    table_id, session, suffix = utils.get_dbbenchmark_configuration()
+    (
+        project_id,
+        dataset_id,
+        table_id,
+        session,
+        suffix,
+    ) = utils.get_configuration(include_table_id=True)
     current_path = pathlib.Path(__file__).absolute()
 
     utils.get_execution_time(
-        vendored_dbbenchmark_groupby_queries.q8, current_path, suffix, table_id, session
+        vendored_dbbenchmark_groupby_queries.q8,
+        current_path,
+        suffix,
+        project_id,
+        dataset_id,
+        table_id,
+        session,
     )
diff --git a/tests/benchmark/db_benchmark/join/config.jsonl b/tests/benchmark/db_benchmark/join/config.jsonl
index 72884d6c5a..e709281137 100644
--- a/tests/benchmark/db_benchmark/join/config.jsonl
+++ b/tests/benchmark/db_benchmark/join/config.jsonl
@@ -1,2 +1,2 @@
-{"benchmark_suffix": "50g_ordered", "table_id": "J1_1e9_NA_0_0", "ordered": true}
-{"benchmark_suffix": "50g_unordered", "table_id": "J1_1e9_NA_0_0", "ordered": false}
+{"benchmark_suffix": "50g_ordered", "project_id": "bigframes-dev-perf", "dataset_id": "dbbenchmark", "table_id": "J1_1e9_NA_0_0", "ordered": true}
+{"benchmark_suffix": "50g_unordered", "project_id": "bigframes-dev-perf", "dataset_id": "dbbenchmark", "table_id": "J1_1e9_NA_0_0", "ordered": false}
diff --git a/tests/benchmark/db_benchmark/join/q1.py b/tests/benchmark/db_benchmark/join/q1.py
index ce05359789..4ca0ee3389 100644
--- a/tests/benchmark/db_benchmark/join/q1.py
+++ b/tests/benchmark/db_benchmark/join/q1.py
@@ -18,10 +18,22 @@
 import bigframes_vendored.db_benchmark.join_queries as vendored_dbbenchmark_join_queries
 
 if __name__ == "__main__":
-    table_id, session, suffix = utils.get_dbbenchmark_configuration()
+    (
+        project_id,
+        dataset_id,
+        table_id,
+        session,
+        suffix,
+    ) = utils.get_configuration(include_table_id=True)
 
     current_path = pathlib.Path(__file__).absolute()
 
     utils.get_execution_time(
-        vendored_dbbenchmark_join_queries.q1, current_path, suffix, table_id, session
+        vendored_dbbenchmark_join_queries.q1,
+        current_path,
+        suffix,
+        project_id,
+        dataset_id,
+        table_id,
+        session,
     )
diff --git a/tests/benchmark/db_benchmark/join/q2.py b/tests/benchmark/db_benchmark/join/q2.py
index 6c9141b316..19efd6fbf2 100644
--- a/tests/benchmark/db_benchmark/join/q2.py
+++ b/tests/benchmark/db_benchmark/join/q2.py
@@ -18,10 +18,22 @@
 import bigframes_vendored.db_benchmark.join_queries as vendored_dbbenchmark_join_queries
 
 if __name__ == "__main__":
-    table_id, session, suffix = utils.get_dbbenchmark_configuration()
+    (
+        project_id,
+        dataset_id,
+        table_id,
+        session,
+        suffix,
+    ) = utils.get_configuration(include_table_id=True)
 
     current_path = pathlib.Path(__file__).absolute()
 
     utils.get_execution_time(
-        vendored_dbbenchmark_join_queries.q2, current_path, suffix, table_id, session
+        vendored_dbbenchmark_join_queries.q2,
+        current_path,
+        suffix,
+        project_id,
+        dataset_id,
+        table_id,
+        session,
     )
diff --git a/tests/benchmark/db_benchmark/join/q3.py b/tests/benchmark/db_benchmark/join/q3.py
index 284ab6a2b3..d0a931bfb2 100644
--- a/tests/benchmark/db_benchmark/join/q3.py
+++ b/tests/benchmark/db_benchmark/join/q3.py
@@ -18,10 +18,22 @@
 import bigframes_vendored.db_benchmark.join_queries as vendored_dbbenchmark_join_queries
 
 if __name__ == "__main__":
-    table_id, session, suffix = utils.get_dbbenchmark_configuration()
+    (
+        project_id,
+        dataset_id,
+        table_id,
+        session,
+        suffix,
+    ) = utils.get_configuration(include_table_id=True)
 
     current_path = pathlib.Path(__file__).absolute()
 
     utils.get_execution_time(
-        vendored_dbbenchmark_join_queries.q3, current_path, suffix, table_id, session
+        vendored_dbbenchmark_join_queries.q3,
+        current_path,
+        suffix,
+        project_id,
+        dataset_id,
+        table_id,
+        session,
     )
diff --git a/tests/benchmark/db_benchmark/join/q4.py b/tests/benchmark/db_benchmark/join/q4.py
index 1504e0a663..ebd7c461d0 100644
--- a/tests/benchmark/db_benchmark/join/q4.py
+++ b/tests/benchmark/db_benchmark/join/q4.py
@@ -18,10 +18,22 @@
 import bigframes_vendored.db_benchmark.join_queries as vendored_dbbenchmark_join_queries
 
 if __name__ == "__main__":
-    table_id, session, suffix = utils.get_dbbenchmark_configuration()
+    (
+        project_id,
+        dataset_id,
+        table_id,
+        session,
+        suffix,
+    ) = utils.get_configuration(include_table_id=True)
 
     current_path = pathlib.Path(__file__).absolute()
 
     utils.get_execution_time(
-        vendored_dbbenchmark_join_queries.q4, current_path, suffix, table_id, session
+        vendored_dbbenchmark_join_queries.q4,
+        current_path,
+        suffix,
+        project_id,
+        dataset_id,
+        table_id,
+        session,
     )
diff --git a/tests/benchmark/db_benchmark/join/q5.py b/tests/benchmark/db_benchmark/join/q5.py
index 575b3711e6..7114acd408 100644
--- a/tests/benchmark/db_benchmark/join/q5.py
+++ b/tests/benchmark/db_benchmark/join/q5.py
@@ -18,10 +18,22 @@
 import bigframes_vendored.db_benchmark.join_queries as vendored_dbbenchmark_join_queries
 
 if __name__ == "__main__":
-    table_id, session, suffix = utils.get_dbbenchmark_configuration()
+    (
+        project_id,
+        dataset_id,
+        table_id,
+        session,
+        suffix,
+    ) = utils.get_configuration(include_table_id=True)
 
     current_path = pathlib.Path(__file__).absolute()
 
     utils.get_execution_time(
-        vendored_dbbenchmark_join_queries.q5, current_path, suffix, table_id, session
+        vendored_dbbenchmark_join_queries.q5,
+        current_path,
+        suffix,
+        project_id,
+        dataset_id,
+        table_id,
+        session,
     )
diff --git a/tests/benchmark/db_benchmark/sort/config.jsonl b/tests/benchmark/db_benchmark/sort/config.jsonl
index 72884d6c5a..e709281137 100644
--- a/tests/benchmark/db_benchmark/sort/config.jsonl
+++ b/tests/benchmark/db_benchmark/sort/config.jsonl
@@ -1,2 +1,2 @@
-{"benchmark_suffix": "50g_ordered", "table_id": "J1_1e9_NA_0_0", "ordered": true}
-{"benchmark_suffix": "50g_unordered", "table_id": "J1_1e9_NA_0_0", "ordered": false}
+{"benchmark_suffix": "50g_ordered", "project_id": "bigframes-dev-perf", "dataset_id": "dbbenchmark", "table_id": "J1_1e9_NA_0_0", "ordered": true}
+{"benchmark_suffix": "50g_unordered", "project_id": "bigframes-dev-perf", "dataset_id": "dbbenchmark", "table_id": "J1_1e9_NA_0_0", "ordered": false}
diff --git a/tests/benchmark/db_benchmark/sort/q1.py b/tests/benchmark/db_benchmark/sort/q1.py
index f17a843192..5f6c404443 100644
--- a/tests/benchmark/db_benchmark/sort/q1.py
+++ b/tests/benchmark/db_benchmark/sort/q1.py
@@ -18,9 +18,21 @@
 import bigframes_vendored.db_benchmark.sort_queries as vendored_dbbenchmark_sort_queries
 
 if __name__ == "__main__":
-    table_id, session, suffix = utils.get_dbbenchmark_configuration()
+    (
+        project_id,
+        dataset_id,
+        table_id,
+        session,
+        suffix,
+    ) = utils.get_configuration(include_table_id=True)
     current_path = pathlib.Path(__file__).absolute()
 
     utils.get_execution_time(
-        vendored_dbbenchmark_sort_queries.q1, current_path, suffix, table_id, session
+        vendored_dbbenchmark_sort_queries.q1,
+        current_path,
+        suffix,
+        project_id,
+        dataset_id,
+        table_id,
+        session,
     )
diff --git a/tests/benchmark/tpch/q1.py b/tests/benchmark/tpch/q1.py
index 3f1c63967e..a672103931 100644
--- a/tests/benchmark/tpch/q1.py
+++ b/tests/benchmark/tpch/q1.py
@@ -17,7 +17,7 @@
 import bigframes_vendored.tpch.queries.q1 as vendored_tpch_q1
 
 if __name__ == "__main__":
-    project_id, dataset_id, session, suffix = utils.get_tpch_configuration()
+    project_id, dataset_id, session, suffix = utils.get_configuration()
     current_path = pathlib.Path(__file__).absolute()
 
     utils.get_execution_time(
diff --git a/tests/benchmark/tpch/q10.py b/tests/benchmark/tpch/q10.py
index bea18975ca..d468a90156 100644
--- a/tests/benchmark/tpch/q10.py
+++ b/tests/benchmark/tpch/q10.py
@@ -17,7 +17,7 @@
 import bigframes_vendored.tpch.queries.q10 as vendored_tpch_q10
 
 if __name__ == "__main__":
-    project_id, dataset_id, session, suffix = utils.get_tpch_configuration()
+    project_id, dataset_id, session, suffix = utils.get_configuration()
     current_path = pathlib.Path(__file__).absolute()
 
     utils.get_execution_time(
diff --git a/tests/benchmark/tpch/q11.py b/tests/benchmark/tpch/q11.py
index 538d8d3e5f..dbf3fd94de 100644
--- a/tests/benchmark/tpch/q11.py
+++ b/tests/benchmark/tpch/q11.py
@@ -17,7 +17,7 @@
 import bigframes_vendored.tpch.queries.q11 as vendored_tpch_q11
 
 if __name__ == "__main__":
-    project_id, dataset_id, session, suffix = utils.get_tpch_configuration()
+    project_id, dataset_id, session, suffix = utils.get_configuration()
     current_path = pathlib.Path(__file__).absolute()
 
     utils.get_execution_time(
diff --git a/tests/benchmark/tpch/q12.py b/tests/benchmark/tpch/q12.py
index 6503b543f4..57774457ae 100644
--- a/tests/benchmark/tpch/q12.py
+++ b/tests/benchmark/tpch/q12.py
@@ -17,7 +17,7 @@
 import bigframes_vendored.tpch.queries.q12 as vendored_tpch_q12
 
 if __name__ == "__main__":
-    project_id, dataset_id, session, suffix = utils.get_tpch_configuration()
+    project_id, dataset_id, session, suffix = utils.get_configuration()
     current_path = pathlib.Path(__file__).absolute()
 
     utils.get_execution_time(
diff --git a/tests/benchmark/tpch/q13.py b/tests/benchmark/tpch/q13.py
index 60c2101f6f..a7f2780e4b 100644
--- a/tests/benchmark/tpch/q13.py
+++ b/tests/benchmark/tpch/q13.py
@@ -17,7 +17,7 @@
 import bigframes_vendored.tpch.queries.q13 as vendored_tpch_q13
 
 if __name__ == "__main__":
-    project_id, dataset_id, session, suffix = utils.get_tpch_configuration()
+    project_id, dataset_id, session, suffix = utils.get_configuration()
     current_path = pathlib.Path(__file__).absolute()
 
     utils.get_execution_time(
diff --git a/tests/benchmark/tpch/q14.py b/tests/benchmark/tpch/q14.py
index 1698a01628..e9599f3bd8 100644
--- a/tests/benchmark/tpch/q14.py
+++ b/tests/benchmark/tpch/q14.py
@@ -17,7 +17,7 @@
 import bigframes_vendored.tpch.queries.q14 as vendored_tpch_q14
 
 if __name__ == "__main__":
-    project_id, dataset_id, session, suffix = utils.get_tpch_configuration()
+    project_id, dataset_id, session, suffix = utils.get_configuration()
     current_path = pathlib.Path(__file__).absolute()
 
     utils.get_execution_time(
diff --git a/tests/benchmark/tpch/q15.py b/tests/benchmark/tpch/q15.py
index 49e2ce4e92..ff200384a8 100644
--- a/tests/benchmark/tpch/q15.py
+++ b/tests/benchmark/tpch/q15.py
@@ -17,7 +17,7 @@
 import bigframes_vendored.tpch.queries.q15 as vendored_tpch_q15
 
 if __name__ == "__main__":
-    project_id, dataset_id, session, suffix = utils.get_tpch_configuration()
+    project_id, dataset_id, session, suffix = utils.get_configuration()
     current_path = pathlib.Path(__file__).absolute()
 
     utils.get_execution_time(
diff --git a/tests/benchmark/tpch/q16.py b/tests/benchmark/tpch/q16.py
index ef6edf6b12..69fc1b9523 100644
--- a/tests/benchmark/tpch/q16.py
+++ b/tests/benchmark/tpch/q16.py
@@ -17,7 +17,7 @@
 import bigframes_vendored.tpch.queries.q16 as vendored_tpch_q16
 
 if __name__ == "__main__":
-    project_id, dataset_id, session, suffix = utils.get_tpch_configuration()
+    project_id, dataset_id, session, suffix = utils.get_configuration()
     current_path = pathlib.Path(__file__).absolute()
 
     utils.get_execution_time(
diff --git a/tests/benchmark/tpch/q17.py b/tests/benchmark/tpch/q17.py
index 2f680d206e..14707f4a93 100644
--- a/tests/benchmark/tpch/q17.py
+++ b/tests/benchmark/tpch/q17.py
@@ -17,7 +17,7 @@
 import bigframes_vendored.tpch.queries.q17 as vendored_tpch_q17
 
 if __name__ == "__main__":
-    project_id, dataset_id, session, suffix = utils.get_tpch_configuration()
+    project_id, dataset_id, session, suffix = utils.get_configuration()
     current_path = pathlib.Path(__file__).absolute()
 
     utils.get_execution_time(
diff --git a/tests/benchmark/tpch/q18.py b/tests/benchmark/tpch/q18.py
index 7336246f1b..54cf0d0432 100644
--- a/tests/benchmark/tpch/q18.py
+++ b/tests/benchmark/tpch/q18.py
@@ -17,7 +17,7 @@
 import bigframes_vendored.tpch.queries.q18 as vendored_tpch_q18
 
 if __name__ == "__main__":
-    project_id, dataset_id, session, suffix = utils.get_tpch_configuration()
+    project_id, dataset_id, session, suffix = utils.get_configuration()
     current_path = pathlib.Path(__file__).absolute()
 
     utils.get_execution_time(
diff --git a/tests/benchmark/tpch/q19.py b/tests/benchmark/tpch/q19.py
index 3bf34794bf..1ec44391ff 100644
--- a/tests/benchmark/tpch/q19.py
+++ b/tests/benchmark/tpch/q19.py
@@ -17,7 +17,7 @@
 import bigframes_vendored.tpch.queries.q19 as vendored_tpch_q19
 
 if __name__ == "__main__":
-    project_id, dataset_id, session, suffix = utils.get_tpch_configuration()
+    project_id, dataset_id, session, suffix = utils.get_configuration()
     current_path = pathlib.Path(__file__).absolute()
 
     utils.get_execution_time(
diff --git a/tests/benchmark/tpch/q2.py b/tests/benchmark/tpch/q2.py
index c738aae124..da8064b400 100644
--- a/tests/benchmark/tpch/q2.py
+++ b/tests/benchmark/tpch/q2.py
@@ -17,7 +17,7 @@
 import bigframes_vendored.tpch.queries.q2 as vendored_tpch_q2
 
 if __name__ == "__main__":
-    project_id, dataset_id, session, suffix = utils.get_tpch_configuration()
+    project_id, dataset_id, session, suffix = utils.get_configuration()
     current_path = pathlib.Path(__file__).absolute()
 
     utils.get_execution_time(
diff --git a/tests/benchmark/tpch/q20.py b/tests/benchmark/tpch/q20.py
index 1b254c6a78..33e4f72ef6 100644
--- a/tests/benchmark/tpch/q20.py
+++ b/tests/benchmark/tpch/q20.py
@@ -17,7 +17,7 @@
 import bigframes_vendored.tpch.queries.q20 as vendored_tpch_q20
 
 if __name__ == "__main__":
-    project_id, dataset_id, session, suffix = utils.get_tpch_configuration()
+    project_id, dataset_id, session, suffix = utils.get_configuration()
     current_path = pathlib.Path(__file__).absolute()
 
     utils.get_execution_time(
diff --git a/tests/benchmark/tpch/q21.py b/tests/benchmark/tpch/q21.py
index 18e8df87fe..f73f87725f 100644
--- a/tests/benchmark/tpch/q21.py
+++ b/tests/benchmark/tpch/q21.py
@@ -17,7 +17,7 @@
 import bigframes_vendored.tpch.queries.q21 as vendored_tpch_q21
 
 if __name__ == "__main__":
-    project_id, dataset_id, session, suffix = utils.get_tpch_configuration()
+    project_id, dataset_id, session, suffix = utils.get_configuration()
     current_path = pathlib.Path(__file__).absolute()
 
     utils.get_execution_time(
diff --git a/tests/benchmark/tpch/q22.py b/tests/benchmark/tpch/q22.py
index 6c10021c2b..0a6f6d923c 100644
--- a/tests/benchmark/tpch/q22.py
+++ b/tests/benchmark/tpch/q22.py
@@ -17,7 +17,7 @@
 import bigframes_vendored.tpch.queries.q22 as vendored_tpch_q22
 
 if __name__ == "__main__":
-    project_id, dataset_id, session, suffix = utils.get_tpch_configuration()
+    project_id, dataset_id, session, suffix = utils.get_configuration()
     current_path = pathlib.Path(__file__).absolute()
 
     utils.get_execution_time(
diff --git a/tests/benchmark/tpch/q3.py b/tests/benchmark/tpch/q3.py
index 5bcaaa0d5d..92322eea21 100644
--- a/tests/benchmark/tpch/q3.py
+++ b/tests/benchmark/tpch/q3.py
@@ -17,7 +17,7 @@
 import bigframes_vendored.tpch.queries.q3 as vendored_tpch_q3
 
 if __name__ == "__main__":
-    project_id, dataset_id, session, suffix = utils.get_tpch_configuration()
+    project_id, dataset_id, session, suffix = utils.get_configuration()
     current_path = pathlib.Path(__file__).absolute()
 
     utils.get_execution_time(
diff --git a/tests/benchmark/tpch/q4.py b/tests/benchmark/tpch/q4.py
index 462c6336d1..2d6931d6b1 100644
--- a/tests/benchmark/tpch/q4.py
+++ b/tests/benchmark/tpch/q4.py
@@ -17,7 +17,7 @@
 import bigframes_vendored.tpch.queries.q4 as vendored_tpch_q4
 
 if __name__ == "__main__":
-    project_id, dataset_id, session, suffix = utils.get_tpch_configuration()
+    project_id, dataset_id, session, suffix = utils.get_configuration()
     current_path = pathlib.Path(__file__).absolute()
 
     utils.get_execution_time(
diff --git a/tests/benchmark/tpch/q5.py b/tests/benchmark/tpch/q5.py
index 108cde58cc..e8fd83e193 100644
--- a/tests/benchmark/tpch/q5.py
+++ b/tests/benchmark/tpch/q5.py
@@ -17,7 +17,7 @@
 import bigframes_vendored.tpch.queries.q5 as vendored_tpch_q5
 
 if __name__ == "__main__":
-    project_id, dataset_id, session, suffix = utils.get_tpch_configuration()
+    project_id, dataset_id, session, suffix = utils.get_configuration()
     current_path = pathlib.Path(__file__).absolute()
 
     utils.get_execution_time(
diff --git a/tests/benchmark/tpch/q6.py b/tests/benchmark/tpch/q6.py
index ccefc1b0bf..152d6c663e 100644
--- a/tests/benchmark/tpch/q6.py
+++ b/tests/benchmark/tpch/q6.py
@@ -17,7 +17,7 @@
 import bigframes_vendored.tpch.queries.q6 as vendored_tpch_q6
 
 if __name__ == "__main__":
-    project_id, dataset_id, session, suffix = utils.get_tpch_configuration()
+    project_id, dataset_id, session, suffix = utils.get_configuration()
     current_path = pathlib.Path(__file__).absolute()
 
     utils.get_execution_time(
diff --git a/tests/benchmark/tpch/q7.py b/tests/benchmark/tpch/q7.py
index 0cad599a60..1c3e455e1c 100644
--- a/tests/benchmark/tpch/q7.py
+++ b/tests/benchmark/tpch/q7.py
@@ -17,7 +17,7 @@
 import bigframes_vendored.tpch.queries.q7 as vendored_tpch_q7
 
 if __name__ == "__main__":
-    project_id, dataset_id, session, suffix = utils.get_tpch_configuration()
+    project_id, dataset_id, session, suffix = utils.get_configuration()
     current_path = pathlib.Path(__file__).absolute()
 
     utils.get_execution_time(
diff --git a/tests/benchmark/tpch/q8.py b/tests/benchmark/tpch/q8.py
index 6c6ac23b9b..8d23194834 100644
--- a/tests/benchmark/tpch/q8.py
+++ b/tests/benchmark/tpch/q8.py
@@ -17,7 +17,7 @@
 import bigframes_vendored.tpch.queries.q8 as vendored_tpch_q8
 
 if __name__ == "__main__":
-    project_id, dataset_id, session, suffix = utils.get_tpch_configuration()
+    project_id, dataset_id, session, suffix = utils.get_configuration()
     current_path = pathlib.Path(__file__).absolute()
 
     utils.get_execution_time(
diff --git a/tests/benchmark/tpch/q9.py b/tests/benchmark/tpch/q9.py
index 05c82fb66e..329e315c2c 100644
--- a/tests/benchmark/tpch/q9.py
+++ b/tests/benchmark/tpch/q9.py
@@ -17,7 +17,7 @@
 import bigframes_vendored.tpch.queries.q9 as vendored_tpch_q9
 
 if __name__ == "__main__":
-    project_id, dataset_id, session, suffix = utils.get_tpch_configuration()
+    project_id, dataset_id, session, suffix = utils.get_configuration()
     current_path = pathlib.Path(__file__).absolute()
 
     utils.get_execution_time(
diff --git a/tests/benchmark/utils.py b/tests/benchmark/utils.py
index 32be33fc74..887d54dba2 100644
--- a/tests/benchmark/utils.py
+++ b/tests/benchmark/utils.py
@@ -18,43 +18,29 @@
 import bigframes
 
 
-def get_dbbenchmark_configuration():
+def get_configuration(include_table_id=False):
     parser = argparse.ArgumentParser()
-    parser.add_argument(
-        "--table_id",
-        type=str,
-        required=True,
-        help="The BigQuery table ID to query.",
-    )
-    parser.add_argument(
-        "--ordered",
-        type=str,
-        help="Set to True (default) to have an ordered session, or False for an unordered session.",
-    )
-    parser.add_argument(
-        "--benchmark_suffix",
-        type=str,
-        help="Suffix to append to benchmark names for identification purposes.",
-    )
-    args = parser.parse_args()
-    session = _initialize_session(_str_to_bool(args.ordered))
-    return args.table_id, session, args.benchmark_suffix
-
-
-def get_tpch_configuration():
-    parser = argparse.ArgumentParser(description="Process TPC-H Query using BigFrames.")
     parser.add_argument(
         "--project_id",
         type=str,
         required=True,
-        help="The BigQuery dataset ID to query.",
+        help="The BigQuery project ID.",
     )
     parser.add_argument(
         "--dataset_id",
         type=str,
         required=True,
-        help="The BigQuery dataset ID to query.",
+        help="The BigQuery dataset ID.",
     )
+
+    if include_table_id:
+        parser.add_argument(
+            "--table_id",
+            type=str,
+            required=True,
+            help="The BigQuery table ID to query.",
+        )
+
     parser.add_argument(
         "--ordered",
         type=str,
@@ -68,7 +54,22 @@ def get_tpch_configuration():
 
     args = parser.parse_args()
     session = _initialize_session(_str_to_bool(args.ordered))
-    return args.project_id, args.dataset_id, session, args.benchmark_suffix
+
+    if include_table_id:
+        return (
+            args.project_id,
+            args.dataset_id,
+            args.table_id,
+            session,
+            args.benchmark_suffix,
+        )
+    else:
+        return (
+            args.project_id,
+            args.dataset_id,
+            session,
+            args.benchmark_suffix,
+        )
 
 
 def get_execution_time(func, current_path, suffix, *args, **kwargs):
diff --git a/tests/system/large/operations/__init__.py b/tests/system/large/operations/__init__.py
new file mode 100644
index 0000000000..6d5e14bcf4
--- /dev/null
+++ b/tests/system/large/operations/__init__.py
@@ -0,0 +1,13 @@
+# Copyright 2024 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/tests/system/large/operations/conftest.py b/tests/system/large/operations/conftest.py
new file mode 100644
index 0000000000..7ab3811f10
--- /dev/null
+++ b/tests/system/large/operations/conftest.py
@@ -0,0 +1,33 @@
+# Copyright 2024 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pytest
+
+import bigframes.ml.llm as llm
+
+
+@pytest.fixture(scope="session")
+def gemini_flash_model(session, bq_connection) -> llm.GeminiTextGenerator:
+    return llm.GeminiTextGenerator(
+        session=session,
+        connection_name=bq_connection,
+        model_name="gemini-1.5-flash-001",
+    )
+
+
+@pytest.fixture(scope="session")
+def text_embedding_generator(session, bq_connection) -> llm.TextEmbeddingGenerator:
+    return llm.TextEmbeddingGenerator(
+        session=session, connection_name=bq_connection, model_name="text-embedding-004"
+    )
diff --git a/tests/system/large/operations/test_semantics.py b/tests/system/large/operations/test_semantics.py
new file mode 100644
index 0000000000..2d7f4756af
--- /dev/null
+++ b/tests/system/large/operations/test_semantics.py
@@ -0,0 +1,635 @@
+# Copyright 2024 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pandas as pd
+import pandas.testing
+import pytest
+
+import bigframes
+import bigframes.dataframe as dataframe
+import bigframes.dtypes as dtypes
+
+
+def test_semantics_experiment_off_raise_error():
+    bigframes.options.experiments.semantic_operators = False
+    df = dataframe.DataFrame(
+        {"country": ["USA", "Germany"], "city": ["Seattle", "Berlin"]}
+    )
+
+    with pytest.raises(NotImplementedError):
+        df.semantics
+
+
+@pytest.mark.parametrize(
+    ("max_agg_rows", "cluster_column"),
+    [
+        pytest.param(1, None, id="one", marks=pytest.mark.xfail(raises=ValueError)),
+        pytest.param(2, None, id="two"),
+        pytest.param(3, None, id="three"),
+        pytest.param(4, None, id="four"),
+        pytest.param(5, "Year", id="two_w_cluster_column"),
+        pytest.param(6, "Year", id="three_w_cluster_column"),
+        pytest.param(7, "Year", id="four_w_cluster_column"),
+    ],
+)
+def test_agg_w_max_agg_rows(session, gemini_flash_model, max_agg_rows, cluster_column):
+    bigframes.options.experiments.semantic_operators = True
+    df = dataframe.DataFrame(
+        data={
+            "Movies": [
+                "Titanic",
+                "The Wolf of Wall Street",
+                "Killers of the Flower Moon",
+                "The Revenant",
+                "Inception",
+                "Shuttle Island",
+                "The Great Gatsby",
+            ],
+            "Year": [1997, 2013, 2023, 2015, 2010, 2010, 2013],
+        },
+        session=session,
+    )
+    instruction = "Find the shared first name of actors in {Movies}. One word answer."
+    actual_s = df.semantics.agg(
+        instruction,
+        model=gemini_flash_model,
+        max_agg_rows=max_agg_rows,
+        cluster_column=cluster_column,
+    ).to_pandas()
+
+    expected_s = pd.Series(["Leonardo \n"], dtype=dtypes.STRING_DTYPE)
+    expected_s.name = "Movies"
+    pandas.testing.assert_series_equal(actual_s, expected_s, check_index_type=False)
+
+
+@pytest.mark.parametrize(
+    "instruction",
+    [
+        pytest.param(
+            "No column reference",
+            id="zero_column",
+            marks=pytest.mark.xfail(raises=ValueError),
+        ),
+        pytest.param(
+            "{city} is in the {non_existing_column}",
+            id="non_existing_column",
+            marks=pytest.mark.xfail(raises=ValueError),
+        ),
+        pytest.param(
+            "{city} is in the {country}",
+            id="two_columns",
+            marks=pytest.mark.xfail(raises=NotImplementedError),
+        ),
+    ],
+)
+def test_agg_invalid_instruction_raise_error(instruction, gemini_flash_model):
+    bigframes.options.experiments.semantic_operators = True
+    df = dataframe.DataFrame(
+        {"country": ["USA", "Germany"], "city": ["Seattle", "Berlin"]}
+    )
+    df.semantics.agg(instruction, gemini_flash_model)
+
+
+@pytest.mark.parametrize(
+    "cluster_column",
+    [
+        pytest.param(
+            "non_existing_column",
+            id="non_existing_column",
+            marks=pytest.mark.xfail(raises=ValueError),
+        ),
+        pytest.param(
+            "Movies", id="non_int_column", marks=pytest.mark.xfail(raises=TypeError)
+        ),
+    ],
+)
+def test_agg_invalid_cluster_column_raise_error(gemini_flash_model, cluster_column):
+    bigframes.options.experiments.semantic_operators = True
+    df = dataframe.DataFrame(
+        data={
+            "Movies": [
+                "Titanic",
+                "The Wolf of Wall Street",
+                "Killers of the Flower Moon",
+                "The Revenant",
+            ],
+        },
+    )
+    instruction = "Find the shared first name of actors in {Movies}. One word answer."
+    df.semantics.agg(instruction, gemini_flash_model, cluster_column=cluster_column)
+
+
+@pytest.mark.parametrize(
+    ("n_clusters"),
+    [
+        pytest.param(1, id="one", marks=pytest.mark.xfail(raises=ValueError)),
+        pytest.param(2, id="two"),
+        pytest.param(4, id="four"),
+    ],
+)
+def test_cluster_by(session, text_embedding_generator, n_clusters):
+    bigframes.options.experiments.semantic_operators = True
+    df = dataframe.DataFrame(
+        ({"Product": ["Smartphone", "Laptop", "Coffee Maker", "T-shirt", "Jeans"]}),
+        session=session,
+    )
+    output_column = "cluster id"
+    result = df.semantics.cluster_by(
+        "Product",
+        output_column,
+        text_embedding_generator,
+        n_clusters=n_clusters,
+    )
+
+    assert output_column in result
+    assert len(result[output_column].unique()) == n_clusters
+
+
+def test_cluster_by_invalid_column(session, text_embedding_generator):
+    bigframes.options.experiments.semantic_operators = True
+
+    df = dataframe.DataFrame(
+        ({"Product": ["Smartphone", "Laptop", "Coffee Maker", "T-shirt", "Jeans"]}),
+        session=session,
+    )
+
+    output_column = "cluster id"
+    with pytest.raises(ValueError):
+        df.semantics.cluster_by(
+            "unknown_column",
+            output_column,
+            text_embedding_generator,
+            n_clusters=3,
+        )
+
+
+def test_cluster_by_invalid_model(session, gemini_flash_model):
+    bigframes.options.experiments.semantic_operators = True
+
+    df = dataframe.DataFrame(
+        ({"Product": ["Smartphone", "Laptop", "Coffee Maker", "T-shirt", "Jeans"]}),
+        session=session,
+    )
+
+    output_column = "cluster id"
+    with pytest.raises(TypeError):
+        df.semantics.cluster_by(
+            "Product",
+            output_column,
+            gemini_flash_model,
+            n_clusters=3,
+        )
+
+
+def test_filter(session, gemini_flash_model):
+    bigframes.options.experiments.semantic_operators = True
+    df = dataframe.DataFrame(
+        data={"country": ["USA", "Germany"], "city": ["Seattle", "Berlin"]},
+        session=session,
+    )
+
+    actual_df = df.semantics.filter(
+        "{city} is the capital of {country}", gemini_flash_model
+    ).to_pandas()
+
+    expected_df = pd.DataFrame({"country": ["Germany"], "city": ["Berlin"]}, index=[1])
+    pandas.testing.assert_frame_equal(
+        actual_df, expected_df, check_dtype=False, check_index_type=False
+    )
+
+
+def test_filter_single_column_reference(session, gemini_flash_model):
+    bigframes.options.experiments.semantic_operators = True
+    df = dataframe.DataFrame(
+        data={"country": ["USA", "Germany"], "city": ["Seattle", "Berlin"]},
+        session=session,
+    )
+
+    actual_df = df.semantics.filter(
+        "{country} is in Europe", gemini_flash_model
+    ).to_pandas()
+
+    expected_df = pd.DataFrame({"country": ["Germany"], "city": ["Berlin"]}, index=[1])
+    pandas.testing.assert_frame_equal(
+        actual_df, expected_df, check_dtype=False, check_index_type=False
+    )
+
+
+@pytest.mark.parametrize(
+    "instruction",
+    [
+        "No column reference",
+        "{city} is in the {non_existing_column}",
+    ],
+)
+def test_filter_invalid_instruction_raise_error(instruction, gemini_flash_model):
+    bigframes.options.experiments.semantic_operators = True
+    df = dataframe.DataFrame(
+        {"country": ["USA", "Germany"], "city": ["Seattle", "Berlin"]}
+    )
+
+    with pytest.raises(ValueError):
+        df.semantics.filter(instruction, gemini_flash_model)
+
+
+def test_filter_invalid_model_raise_error():
+    bigframes.options.experiments.semantic_operators = True
+    df = dataframe.DataFrame(
+        {"country": ["USA", "Germany"], "city": ["Seattle", "Berlin"]}
+    )
+
+    with pytest.raises(ValueError):
+        df.semantics.filter("{city} is the capital of {country}", None)
+
+
+def test_map(session, gemini_flash_model):
+    bigframes.options.experiments.semantic_operators = True
+    df = dataframe.DataFrame(
+        data={
+            "ingredient_1": ["Burger Bun", "Soy Bean"],
+            "ingredient_2": ["Beef Patty", "Bittern"],
+        },
+        session=session,
+    )
+
+    actual_df = df.semantics.map(
+        "What is the food made from {ingredient_1} and {ingredient_2}? One word only.",
+        "food",
+        gemini_flash_model,
+    ).to_pandas()
+    # Result sanitation
+    actual_df["food"] = actual_df["food"].str.strip().str.lower()
+
+    expected_df = pd.DataFrame(
+        {
+            "ingredient_1": ["Burger Bun", "Soy Bean"],
+            "ingredient_2": ["Beef Patty", "Bittern"],
+            "food": ["burger", "tofu"],
+        }
+    )
+    pandas.testing.assert_frame_equal(
+        actual_df,
+        expected_df,
+        check_dtype=False,
+        check_index_type=False,
+        check_column_type=False,
+    )
+
+
+@pytest.mark.parametrize(
+    "instruction",
+    [
+        "No column reference",
+        "What is the food made from {ingredient_1} and {non_existing_column}?}",
+    ],
+)
+def test_map_invalid_instruction_raise_error(instruction, gemini_flash_model):
+    bigframes.options.experiments.semantic_operators = True
+    df = dataframe.DataFrame(
+        data={
+            "ingredient_1": ["Burger Bun", "Soy Bean"],
+            "ingredient_2": ["Beef Patty", "Bittern"],
+        }
+    )
+
+    with pytest.raises(ValueError):
+        df.semantics.map(instruction, "food", gemini_flash_model)
+
+
+def test_map_invalid_model_raise_error():
+    bigframes.options.experiments.semantic_operators = True
+    df = dataframe.DataFrame(
+        data={
+            "ingredient_1": ["Burger Bun", "Soy Bean"],
+            "ingredient_2": ["Beef Patty", "Bittern"],
+        },
+    )
+
+    with pytest.raises(ValueError):
+        df.semantics.map(
+            "What is the food made from {ingredient_1} and {ingredient_2}? One word only.",
+            "food",
+            None,
+        )
+
+
+def test_join(session, gemini_flash_model):
+    bigframes.options.experiments.semantic_operators = True
+    cities = dataframe.DataFrame(
+        data={
+            "city": ["Seattle", "Berlin"],
+        },
+        session=session,
+    )
+    countries = dataframe.DataFrame(
+        data={"country": ["USA", "UK", "Germany"]},
+        session=session,
+    )
+
+    actual_df = cities.semantics.join(
+        countries,
+        "{city} belongs to {country}",
+        gemini_flash_model,
+    ).to_pandas()
+
+    expected_df = pd.DataFrame(
+        {
+            "city": ["Seattle", "Berlin"],
+            "country": ["USA", "Germany"],
+        }
+    )
+    pandas.testing.assert_frame_equal(
+        actual_df,
+        expected_df,
+        check_dtype=False,
+        check_index_type=False,
+        check_column_type=False,
+    )
+
+
+def test_self_join(session, gemini_flash_model):
+    bigframes.options.experiments.semantic_operators = True
+    animals = dataframe.DataFrame(
+        data={
+            "animal": ["spider", "capybara"],
+        },
+        session=session,
+    )
+
+    actual_df = animals.semantics.join(
+        animals,
+        "{animal_left} is heavier than {animal_right}",
+        gemini_flash_model,
+    ).to_pandas()
+
+    expected_df = pd.DataFrame(
+        {
+            "animal_left": ["capybara"],
+            "animal_right": ["spider"],
+        }
+    )
+    pandas.testing.assert_frame_equal(
+        actual_df,
+        expected_df,
+        check_dtype=False,
+        check_index_type=False,
+        check_column_type=False,
+    )
+
+
+def test_join_data_too_large_raise_error(session, gemini_flash_model):
+    bigframes.options.experiments.semantic_operators = True
+    cities = dataframe.DataFrame(
+        data={
+            "city": ["Seattle", "Berlin"],
+        },
+        session=session,
+    )
+    countries = dataframe.DataFrame(
+        data={"country": ["USA", "UK", "Germany"]},
+        session=session,
+    )
+
+    with pytest.raises(ValueError):
+        cities.semantics.join(
+            countries, "{city} belongs to {country}", gemini_flash_model, max_rows=1
+        )
+
+
+@pytest.mark.parametrize(
+    ("instruction", "error_pattern"),
+    [
+        ("No column reference", "No column references"),
+        pytest.param(
+            "{city} is in {continent}", r"Column .+ not found", id="non_existing_column"
+        ),
+        pytest.param(
+            "{city} is in {country}",
+            r"Ambiguous column reference: .+",
+            id="ambiguous_column",
+        ),
+        pytest.param(
+            "{city_left} is in {country}",
+            r"Unnecessary suffix for .+",
+            id="suffix_on_left_unique_column",
+        ),
+        pytest.param(
+            "{city} is in {region_right}",
+            r"Unnecessary suffix for .+",
+            id="suffix_on_right_unique_column",
+        ),
+        pytest.param(
+            "{city_right} is in {country}", r"Column .+ not found", id="wrong_suffix"
+        ),
+        pytest.param(
+            "{city} is in {continent_right}",
+            r"Column .+ not found",
+            id="suffix_on_non_existing_column",
+        ),
+    ],
+)
+def test_join_invalid_instruction_raise_error(
+    instruction, error_pattern, gemini_flash_model
+):
+    bigframes.options.experiments.semantic_operators = True
+    df1 = dataframe.DataFrame(
+        {"city": ["Seattle", "Berlin"], "country": ["USA", "Germany"]}
+    )
+    df2 = dataframe.DataFrame(
+        {
+            "country": ["USA", "UK", "Germany"],
+            "region": ["North America", "Europe", "Europe"],
+        }
+    )
+
+    with pytest.raises(ValueError, match=error_pattern):
+        df1.semantics.join(df2, instruction, gemini_flash_model)
+
+
+def test_join_invalid_model_raise_error():
+    bigframes.options.experiments.semantic_operators = True
+    cities = dataframe.DataFrame({"city": ["Seattle", "Berlin"]})
+    countries = dataframe.DataFrame({"country": ["USA", "UK", "Germany"]})
+
+    with pytest.raises(ValueError):
+        cities.semantics.join(countries, "{city} is in {country}", None)
+
+
+@pytest.mark.parametrize(
+    "score_column",
+    [
+        pytest.param(None, id="no_score_column"),
+        pytest.param("distance", id="has_score_column"),
+    ],
+)
+def test_search(session, text_embedding_generator, score_column):
+    bigframes.options.experiments.semantic_operators = True
+    df = dataframe.DataFrame(
+        data={"creatures": ["salmon", "sea urchin", "baboons", "frog", "chimpanzee"]},
+        session=session,
+    )
+
+    actual_result = df.semantics.search(
+        "creatures",
+        "monkey",
+        top_k=2,
+        model=text_embedding_generator,
+        score_column=score_column,
+    ).to_pandas()
+
+    expected_result = pd.Series(
+        ["baboons", "chimpanzee"], index=[2, 4], name="creatures"
+    )
+    pandas.testing.assert_series_equal(
+        actual_result["creatures"],
+        expected_result,
+        check_dtype=False,
+        check_index_type=False,
+    )
+
+    if score_column is None:
+        assert len(actual_result.columns) == 1
+    else:
+        assert score_column in actual_result.columns
+
+
+def test_search_invalid_column_raises_error(session, text_embedding_generator):
+    bigframes.options.experiments.semantic_operators = True
+    df = dataframe.DataFrame(
+        data={"creatures": ["salmon", "sea urchin", "baboons", "frog", "chimpanzee"]},
+        session=session,
+    )
+
+    with pytest.raises(ValueError):
+        df.semantics.search(
+            "whatever", "monkey", top_k=2, model=text_embedding_generator
+        )
+
+
+def test_search_invalid_model_raises_error(session):
+    bigframes.options.experiments.semantic_operators = True
+    df = dataframe.DataFrame(
+        data={"creatures": ["salmon", "sea urchin", "baboons", "frog", "chimpanzee"]},
+        session=session,
+    )
+
+    with pytest.raises(TypeError):
+        df.semantics.search("creatures", "monkey", top_k=2, model=None)
+
+
+@pytest.mark.parametrize(
+    "score_column",
+    [
+        pytest.param(None, id="no_score_column"),
+        pytest.param("distance", id="has_score_column"),
+    ],
+)
+def test_sim_join(session, text_embedding_generator, score_column):
+    bigframes.options.experiments.semantic_operators = True
+    df1 = dataframe.DataFrame(
+        data={"creatures": ["salmon", "cat"]},
+        session=session,
+    )
+    df2 = dataframe.DataFrame(
+        data={"creatures": ["dog", "tuna"]},
+        session=session,
+    )
+
+    actual_result = df1.semantics.sim_join(
+        df2,
+        left_on="creatures",
+        right_on="creatures",
+        model=text_embedding_generator,
+        top_k=1,
+        score_column=score_column,
+    ).to_pandas()
+
+    expected_result = pd.DataFrame(
+        {"creatures": ["salmon", "cat"], "creatures_1": ["tuna", "dog"]}
+    )
+    pandas.testing.assert_frame_equal(
+        actual_result[["creatures", "creatures_1"]],
+        expected_result,
+        check_dtype=False,
+        check_index_type=False,
+    )
+
+    if score_column is None:
+        assert len(actual_result.columns) == 2
+    else:
+        assert score_column in actual_result.columns
+
+
+@pytest.mark.parametrize(
+    ("left_on", "right_on"),
+    [
+        pytest.param("whatever", "creatures", id="incorrect_left_column"),
+        pytest.param("creatures", "whatever", id="incorrect_right_column"),
+    ],
+)
+def test_sim_join_invalid_column_raises_error(
+    session, text_embedding_generator, left_on, right_on
+):
+    bigframes.options.experiments.semantic_operators = True
+    df1 = dataframe.DataFrame(
+        data={"creatures": ["salmon", "cat"]},
+        session=session,
+    )
+    df2 = dataframe.DataFrame(
+        data={"creatures": ["dog", "tuna"]},
+        session=session,
+    )
+
+    with pytest.raises(ValueError):
+        df1.semantics.sim_join(
+            df2, left_on=left_on, right_on=right_on, model=text_embedding_generator
+        )
+
+
+def test_sim_join_invalid_model_raises_error(session):
+    bigframes.options.experiments.semantic_operators = True
+    df1 = dataframe.DataFrame(
+        data={"creatures": ["salmon", "cat"]},
+        session=session,
+    )
+    df2 = dataframe.DataFrame(
+        data={"creatures": ["dog", "tuna"]},
+        session=session,
+    )
+
+    with pytest.raises(TypeError):
+        df1.semantics.sim_join(
+            df2, left_on="creatures", right_on="creatures", model=None
+        )
+
+
+def test_sim_join_data_too_large_raises_error(session, text_embedding_generator):
+    bigframes.options.experiments.semantic_operators = True
+    df1 = dataframe.DataFrame(
+        data={"creatures": ["salmon", "cat"]},
+        session=session,
+    )
+    df2 = dataframe.DataFrame(
+        data={"creatures": ["dog", "tuna"]},
+        session=session,
+    )
+
+    with pytest.raises(ValueError):
+        df1.semantics.sim_join(
+            df2,
+            left_on="creatures",
+            right_on="creatures",
+            model=text_embedding_generator,
+            max_rows=1,
+        )
diff --git a/tests/system/large/test_location.py b/tests/system/large/test_location.py
index 204c6b7463..3521e4cd20 100644
--- a/tests/system/large/test_location.py
+++ b/tests/system/large/test_location.py
@@ -22,7 +22,9 @@
 import bigframes.session.clients
 
 
-def _assert_bq_execution_location(session: bigframes.Session):
+def _assert_bq_execution_location(
+    session: bigframes.Session, expected_location: typing.Optional[str] = None
+):
     df = session.read_gbq(
         """
         SELECT "aaa" as name, 111 as number
@@ -33,10 +35,10 @@ def _assert_bq_execution_location(session: bigframes.Session):
     """
     )
 
-    assert (
-        typing.cast(bigquery.QueryJob, df.query_job).location
-        == session.bqclient.location
-    )
+    if expected_location is None:
+        expected_location = session._location
+
+    assert typing.cast(bigquery.QueryJob, df.query_job).location == expected_location
 
     result = (
         df[["name", "number"]]
@@ -47,8 +49,7 @@ def _assert_bq_execution_location(session: bigframes.Session):
     )
 
     assert (
-        typing.cast(bigquery.QueryJob, result.query_job).location
-        == session.bqclient.location
+        typing.cast(bigquery.QueryJob, result.query_job).location == expected_location
     )
 
 
@@ -87,6 +88,30 @@ def test_bq_location(bigquery_location):
     _assert_bq_execution_location(session)
 
 
+@pytest.mark.parametrize(
+    ("set_location", "resolved_location"),
+    # Sort the set to avoid nondeterminism.
+    [
+        (loc.capitalize(), loc)
+        for loc in sorted(bigframes.constants.ALL_BIGQUERY_LOCATIONS)
+    ],
+)
+def test_bq_location_non_canonical(set_location, resolved_location):
+    session = bigframes.Session(
+        context=bigframes.BigQueryOptions(location=set_location)
+    )
+
+    assert session.bqclient.location == resolved_location
+
+    # by default global endpoint is used
+    assert (
+        session.bqclient._connection.API_BASE_URL == "https://bigquery.googleapis.com"
+    )
+
+    # assert that bigframes session honors the location
+    _assert_bq_execution_location(session, resolved_location)
+
+
 @pytest.mark.parametrize(
     "bigquery_location",
     # Sort the set to avoid nondeterminism.
diff --git a/tests/system/large/test_remote_function.py b/tests/system/large/test_remote_function.py
index 18d2609347..2365002857 100644
--- a/tests/system/large/test_remote_function.py
+++ b/tests/system/large/test_remote_function.py
@@ -1670,7 +1670,11 @@ def analyze(row):
                     (3, 4): ["pq", "rs", "tu"],
                     (5.0, "six", 7): [8, 9, 10],
                     'raise Exception("hacked!")': [11, 12, 13],
-                }
+                },
+                # Default pandas index has non-numpy type, whereas bigframes is
+                # always numpy-based type, so let's use the index compatible
+                # with bigframes. See more details in b/369689696.
+                index=pandas.Index([0, 1, 2], dtype=pandas.Int64Dtype()),
             ),
             id="all-kinds-of-column-names",
         ),
@@ -1681,17 +1685,22 @@ def analyze(row):
                     "y": [1.5, 3.75, 5],
                     "z": ["pq", "rs", "tu"],
                 },
-                index=pandas.MultiIndex.from_tuples(
-                    [
-                        ("a", 100),
-                        ("a", 200),
-                        ("b", 300),
-                    ]
+                index=pandas.MultiIndex.from_frame(
+                    pandas.DataFrame(
+                        {
+                            "idx0": pandas.Series(
+                                ["a", "a", "b"], dtype=pandas.StringDtype()
+                            ),
+                            "idx1": pandas.Series(
+                                [100, 200, 300], dtype=pandas.Int64Dtype()
+                            ),
+                        }
+                    )
                 ),
             ),
             id="multiindex",
             marks=pytest.mark.skip(
-                reason="TODO(b/368639580) revert this skip after fix"
+                reason="TODO: revert this skip after this pandas bug is fixed: https://github.com/pandas-dev/pandas/issues/59908"
             ),
         ),
         pytest.param(
@@ -1701,6 +1710,10 @@ def analyze(row):
                     [20, 3.75, "rs"],
                     [30, 8.0, "tu"],
                 ],
+                # Default pandas index has non-numpy type, whereas bigframes is
+                # always numpy-based type, so let's use the index compatible
+                # with bigframes. See more details in b/369689696.
+                index=pandas.Index([0, 1, 2], dtype=pandas.Int64Dtype()),
                 columns=pandas.MultiIndex.from_arrays(
                     [
                         ["first", "last_two", "last_two"],
@@ -1729,10 +1742,8 @@ def test_df_apply_axis_1_complex(session, pd_df):
 
         def serialize_row(row):
             custom = {
-                "name": row.name.item() if hasattr(row.name, "item") else row.name,
-                "index": [
-                    idx.item() if hasattr(idx, "item") else idx for idx in row.index
-                ],
+                "name": row.name,
+                "index": [idx for idx in row.index],
                 "values": [
                     val.item() if hasattr(val, "item") else val for val in row.values
                 ],
@@ -1756,12 +1767,7 @@ def serialize_row(row):
         bf_result = bf_df.apply(serialize_row_remote, axis=1).to_pandas()
         pd_result = pd_df.apply(serialize_row, axis=1)
 
-        # bf_result.dtype is 'string[pyarrow]' while pd_result.dtype is 'object'
-        # , ignore this mismatch by using check_dtype=False.
-        #
-        # bf_result.index[0].dtype is 'string[pyarrow]' while
-        # pd_result.index[0].dtype is 'object', ignore this mismatch by using
-        # check_index_type=False.
+        # ignore known dtype difference between pandas and bigframes
         pandas.testing.assert_series_equal(
             pd_result, bf_result, check_dtype=False, check_index_type=False
         )
diff --git a/tests/system/load/test_llm.py b/tests/system/load/test_llm.py
index 51b45485ad..4b0f50973b 100644
--- a/tests/system/load/test_llm.py
+++ b/tests/system/load/test_llm.py
@@ -38,30 +38,6 @@ def llm_remote_text_df(session, llm_remote_text_pandas_df):
     return session.read_pandas(llm_remote_text_pandas_df)
 
 
-@pytest.mark.flaky(retries=2)
-def test_llm_palm_configure_fit(llm_fine_tune_df_default_index, llm_remote_text_df):
-    model = llm.PaLM2TextGenerator(model_name="text-bison", max_iterations=1)
-
-    X_train = llm_fine_tune_df_default_index[["prompt"]]
-    y_train = llm_fine_tune_df_default_index[["label"]]
-    model.fit(X_train, y_train)
-
-    assert model is not None
-
-    df = model.predict(llm_remote_text_df["prompt"]).to_pandas()
-    utils.check_pandas_df_schema_and_index(
-        df,
-        columns=[
-            "ml_generate_text_llm_result",
-            "ml_generate_text_rai_result",
-            "ml_generate_text_status",
-            "prompt",
-        ],
-        index=3,
-    )
-    # TODO(ashleyxu b/335492787): After bqml rolled out version control: save, load, check parameters to ensure configuration was kept
-
-
 @pytest.mark.flaky(retries=2)
 def test_llm_gemini_configure_fit(llm_fine_tune_df_default_index, llm_remote_text_df):
     model = llm.GeminiTextGenerator(model_name="gemini-pro", max_iterations=1)
diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py
index 8c2912edd4..1fb12d3f82 100644
--- a/tests/system/small/test_dataframe.py
+++ b/tests/system/small/test_dataframe.py
@@ -591,15 +591,19 @@ def test_join_repr(scalars_dfs_maybe_ordered):
     assert actual == expected
 
 
-def test_repr_html_w_all_rows(scalars_dfs):
+def test_repr_html_w_all_rows(scalars_dfs, session):
+    metrics = session._metrics
     scalars_df, _ = scalars_dfs
     # get a pandas df of the expected format
     df, _ = scalars_df._block.to_pandas()
     pandas_df = df.set_axis(scalars_df._block.column_labels, axis=1)
     pandas_df.index.name = scalars_df.index.name
 
+    executions_pre = metrics.execution_count
     # When there are 10 or fewer rows, the outputs should be identical except for the extra note.
     actual = scalars_df.head(10)._repr_html_()
+    executions_post = metrics.execution_count
+
     with display_options.pandas_repr(bigframes.options.display):
         pandas_repr = pandas_df.head(10)._repr_html_()
 
@@ -608,6 +612,7 @@ def test_repr_html_w_all_rows(scalars_dfs):
         + f"[{len(pandas_df.index)} rows x {len(pandas_df.columns)} columns in total]"
     )
     assert actual == expected
+    assert (executions_post - executions_pre) <= 2
 
 
 def test_df_column_name_with_space(scalars_dfs):
@@ -1516,6 +1521,30 @@ def test_shape(scalars_dfs):
     assert bf_result == pd_result
 
 
+@pytest.mark.parametrize(
+    "reference_table, test_table",
+    [
+        (
+            "bigframes-dev.bigframes_tests_sys.base_table",
+            "bigframes-dev.bigframes_tests_sys.base_table_mat_view",
+        ),
+        (
+            "bigframes-dev.bigframes_tests_sys.base_table",
+            "bigframes-dev.bigframes_tests_sys.base_table_view",
+        ),
+        (
+            "bigframes-dev.bigframes_tests_sys.csv_native_table",
+            "bigframes-dev.bigframes_tests_sys.csv_external_table",
+        ),
+    ],
+)
+def test_view_and_external_table_shape(session, reference_table, test_table):
+    reference_df = session.read_gbq(reference_table)
+    test_df = session.read_gbq(test_table)
+
+    assert test_df.shape == reference_df.shape
+
+
 def test_len(scalars_dfs):
     scalars_df, scalars_pandas_df = scalars_dfs
     bf_result = len(scalars_df)
diff --git a/tests/system/small/test_groupby.py b/tests/system/small/test_groupby.py
index 8574860daa..2d5ae21bb4 100644
--- a/tests/system/small/test_groupby.py
+++ b/tests/system/small/test_groupby.py
@@ -421,6 +421,34 @@ def test_dataframe_groupby_getitem(
     pd.testing.assert_series_equal(pd_result, bf_result, check_dtype=False)
 
 
+def test_dataframe_groupby_getitem_error(
+    scalars_df_index,
+    scalars_pandas_df_index,
+):
+    col_names = ["float64_col", "int64_col", "bool_col", "string_col"]
+    with pytest.raises(KeyError, match="\"Columns not found: 'not_in_group'\""):
+        (
+            scalars_df_index[col_names]
+            .groupby("string_col")["not_in_group"]
+            .min()
+            .to_pandas()
+        )
+
+
+def test_dataframe_groupby_getitem_multiple_columns_error(
+    scalars_df_index,
+    scalars_pandas_df_index,
+):
+    col_names = ["float64_col", "int64_col", "bool_col", "string_col"]
+    with pytest.raises(KeyError, match="\"Columns not found: 'col1', 'col2'\""):
+        (
+            scalars_df_index[col_names]
+            .groupby("string_col")["col1", "col2"]
+            .min()
+            .to_pandas()
+        )
+
+
 def test_dataframe_groupby_getitem_list(
     scalars_df_index,
     scalars_pandas_df_index,
diff --git a/tests/system/small/test_multiindex.py b/tests/system/small/test_multiindex.py
index ab2a9c19b8..cab74f617d 100644
--- a/tests/system/small/test_multiindex.py
+++ b/tests/system/small/test_multiindex.py
@@ -1178,7 +1178,7 @@ def test_column_multi_index_dot_not_supported():
         bf1 @ bf2
 
 
-def test_explode_w_multi_index():
+def test_explode_w_column_multi_index():
     data = [[[1, 1], np.nan, [3, 3]], [[2], [5], []]]
     multi_level_columns = pandas.MultiIndex.from_arrays(
         [["col0", "col0", "col1"], ["col00", "col01", "col11"]]
@@ -1197,6 +1197,24 @@ def test_explode_w_multi_index():
     )
 
 
+def test_explode_w_multi_index():
+    data = [[[1, 1], np.nan, [3, 3]], [[2], [5], []]]
+    columns = ["col00", "col01", "col11"]
+    multi_index = pandas.MultiIndex.from_frame(
+        pandas.DataFrame({"idx0": [5, 1], "idx1": ["z", "x"]})
+    )
+
+    df = bpd.DataFrame(data, index=multi_index, columns=columns)
+    pd_df = df.to_pandas()
+
+    pandas.testing.assert_frame_equal(
+        df.explode("col00").to_pandas(),
+        pd_df.explode("col00"),
+        check_dtype=False,
+        check_index_type=False,
+    )
+
+
 def test_column_multi_index_w_na_stack(scalars_df_index, scalars_pandas_df_index):
     columns = ["int64_too", "int64_col", "rowindex_2"]
     level1 = pandas.Index(["b", "c", "d"])
diff --git a/tests/system/small/test_series.py b/tests/system/small/test_series.py
index 624e287f8d..f1c60664a1 100644
--- a/tests/system/small/test_series.py
+++ b/tests/system/small/test_series.py
@@ -3852,6 +3852,28 @@ def test_series_explode(data):
         pytest.param([5, 1, 3, 2], False, id="ignore_unordered_index"),
         pytest.param(["z", "x", "a", "b"], True, id="str_index"),
         pytest.param(["z", "x", "a", "b"], False, id="ignore_str_index"),
+        pytest.param(
+            pd.Index(["z", "x", "a", "b"], name="idx"), True, id="str_named_index"
+        ),
+        pytest.param(
+            pd.Index(["z", "x", "a", "b"], name="idx"),
+            False,
+            id="ignore_str_named_index",
+        ),
+        pytest.param(
+            pd.MultiIndex.from_frame(
+                pd.DataFrame({"idx0": [5, 1, 3, 2], "idx1": ["z", "x", "a", "b"]})
+            ),
+            True,
+            id="multi_index",
+        ),
+        pytest.param(
+            pd.MultiIndex.from_frame(
+                pd.DataFrame({"idx0": [5, 1, 3, 2], "idx1": ["z", "x", "a", "b"]})
+            ),
+            False,
+            id="ignore_multi_index",
+        ),
     ],
 )
 def test_series_explode_w_index(index, ignore_index):
diff --git a/tests/system/small/test_session.py b/tests/system/small/test_session.py
index 17e8b99704..4b48915d2d 100644
--- a/tests/system/small/test_session.py
+++ b/tests/system/small/test_session.py
@@ -390,9 +390,16 @@ def test_read_gbq_twice_with_same_timestamp(session, penguins_table_id):
     assert df3 is not None
 
 
-def test_read_gbq_on_linked_dataset_warns(session):
+@pytest.mark.parametrize(
+    "source_table",
+    [
+        "bigframes-dev.thelook_ecommerce.orders",
+        "bigframes-dev.bigframes_tests_sys.base_table_mat_view",
+    ],
+)
+def test_read_gbq_on_linked_dataset_warns(session, source_table):
     with warnings.catch_warnings(record=True) as warned:
-        session.read_gbq("bigframes-dev.thelook_ecommerce.orders")
+        session.read_gbq(source_table)
         assert len(warned) == 1
         assert warned[0].category == bigframes.exceptions.TimeTravelDisabledWarning
 
diff --git a/tests/unit/_config/test_bigquery_options.py b/tests/unit/_config/test_bigquery_options.py
index b827b0723d..d04b5bd575 100644
--- a/tests/unit/_config/test_bigquery_options.py
+++ b/tests/unit/_config/test_bigquery_options.py
@@ -90,20 +90,33 @@ def test_setter_if_session_started_but_setting_the_same_value(attribute):
     [
         (None,),
         ("us-central1",),
+        ("us-Central1",),
+        ("US-CENTRAL1",),
+        ("US",),
+        ("us",),
     ],
 )
 def test_location_set_to_valid_no_warning(valid_location):
-    options = bigquery_options.BigQueryOptions()
-    # Ensure that no warnings are emitted.
-    # https://docs.pytest.org/en/7.0.x/how-to/capture-warnings.html#additional-use-cases-of-warnings-in-tests
-    with warnings.catch_warnings():
-        # Turn matching UnknownLocationWarning into exceptions.
-        # https://docs.python.org/3/library/warnings.html#warning-filter
-        warnings.simplefilter(
-            "error", category=bigframes.exceptions.UnknownLocationWarning
-        )
+    # test setting location through constructor
+    def set_location_in_ctor():
+        bigquery_options.BigQueryOptions(location=valid_location)
+
+    # test setting location property
+    def set_location_property():
+        options = bigquery_options.BigQueryOptions()
         options.location = valid_location
 
+    for op in [set_location_in_ctor, set_location_property]:
+        # Ensure that no warnings are emitted.
+        # https://docs.pytest.org/en/7.0.x/how-to/capture-warnings.html#additional-use-cases-of-warnings-in-tests
+        with warnings.catch_warnings():
+            # Turn matching UnknownLocationWarning into exceptions.
+            # https://docs.python.org/3/library/warnings.html#warning-filter
+            warnings.simplefilter(
+                "error", category=bigframes.exceptions.UnknownLocationWarning
+            )
+            op()
+
 
 @pytest.mark.parametrize(
     [
@@ -122,11 +135,20 @@ def test_location_set_to_valid_no_warning(valid_location):
     ],
 )
 def test_location_set_to_invalid_warning(invalid_location, possibility):
-    options = bigquery_options.BigQueryOptions()
-    with pytest.warns(
-        bigframes.exceptions.UnknownLocationWarning,
-        match=re.escape(
-            f"The location '{invalid_location}' is set to an unknown value. Did you mean '{possibility}'?"
-        ),
-    ):
+    # test setting location through constructor
+    def set_location_in_ctor():
+        bigquery_options.BigQueryOptions(location=invalid_location)
+
+    # test setting location property
+    def set_location_property():
+        options = bigquery_options.BigQueryOptions()
         options.location = invalid_location
+
+    for op in [set_location_in_ctor, set_location_property]:
+        with pytest.warns(
+            bigframes.exceptions.UnknownLocationWarning,
+            match=re.escape(
+                f"The location '{invalid_location}' is set to an unknown value. Did you mean '{possibility}'?"
+            ),
+        ):
+            op()
diff --git a/tests/unit/_config/test_experiment_options.py b/tests/unit/_config/test_experiment_options.py
new file mode 100644
index 0000000000..49c3d9e53c
--- /dev/null
+++ b/tests/unit/_config/test_experiment_options.py
@@ -0,0 +1,32 @@
+# Copyright 2024 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pytest
+
+import bigframes._config.experiment_options as experiment_options
+
+
+def test_semantic_operators_default_false():
+    options = experiment_options.ExperimentOptions()
+
+    assert options.semantic_operators is False
+
+
+def test_semantic_operators_set_true_shows_warning():
+    options = experiment_options.ExperimentOptions()
+
+    with pytest.warns(UserWarning):
+        options.semantic_operators = True
+
+    assert options.semantic_operators is True
diff --git a/third_party/bigframes_vendored/db_benchmark/groupby_queries.py b/third_party/bigframes_vendored/db_benchmark/groupby_queries.py
index 672a9b5d5f..7758496db5 100644
--- a/third_party/bigframes_vendored/db_benchmark/groupby_queries.py
+++ b/third_party/bigframes_vendored/db_benchmark/groupby_queries.py
@@ -4,10 +4,10 @@
 import bigframes.session
 
 
-def q1(table_id: str, session: bigframes.Session):
+def q1(project_id: str, dataset_id: str, table_id: str, session: bigframes.Session):
     print("Groupby benchmark 1: sum v1 by id1")
 
-    x = session.read_gbq(f"bigframes-dev-perf.dbbenchmark.{table_id}")
+    x = session.read_gbq(f"{project_id}.{dataset_id}.{table_id}")
 
     ans = x.groupby("id1", as_index=False, dropna=False).agg({"v1": "sum"})
     print(ans.shape)
@@ -15,10 +15,10 @@ def q1(table_id: str, session: bigframes.Session):
     print(chk)
 
 
-def q2(table_id: str, session: bigframes.Session):
+def q2(project_id: str, dataset_id: str, table_id: str, session: bigframes.Session):
     print("Groupby benchmark 2: sum v1 by id1:id2")
 
-    x = session.read_gbq(f"bigframes-dev-perf.dbbenchmark.{table_id}")
+    x = session.read_gbq(f"{project_id}.{dataset_id}.{table_id}")
 
     ans = x.groupby(["id1", "id2"], as_index=False, dropna=False).agg({"v1": "sum"})
     print(ans.shape)
@@ -26,10 +26,10 @@ def q2(table_id: str, session: bigframes.Session):
     print(chk)
 
 
-def q3(table_id: str, session: bigframes.Session):
+def q3(project_id: str, dataset_id: str, table_id: str, session: bigframes.Session):
     print("Groupby benchmark 3: sum v1 mean v3 by id3")
 
-    x = session.read_gbq(f"bigframes-dev-perf.dbbenchmark.{table_id}")
+    x = session.read_gbq(f"{project_id}.{dataset_id}.{table_id}")
 
     ans = x.groupby("id3", as_index=False, dropna=False).agg(
         {"v1": "sum", "v3": "mean"}
@@ -39,10 +39,10 @@ def q3(table_id: str, session: bigframes.Session):
     print(chk)
 
 
-def q4(table_id: str, session: bigframes.Session):
+def q4(project_id: str, dataset_id: str, table_id: str, session: bigframes.Session):
     print("Groupby benchmark 4: mean v1:v3 by id4")
 
-    x = session.read_gbq(f"bigframes-dev-perf.dbbenchmark.{table_id}")
+    x = session.read_gbq(f"{project_id}.{dataset_id}.{table_id}")
 
     ans = x.groupby("id4", as_index=False, dropna=False).agg(
         {"v1": "mean", "v2": "mean", "v3": "mean"}
@@ -52,10 +52,10 @@ def q4(table_id: str, session: bigframes.Session):
     print(chk)
 
 
-def q5(table_id: str, session: bigframes.Session):
+def q5(project_id: str, dataset_id: str, table_id: str, session: bigframes.Session):
     print("Groupby benchmark 5: sum v1:v3 by id6")
 
-    x = session.read_gbq(f"bigframes-dev-perf.dbbenchmark.{table_id}")
+    x = session.read_gbq(f"{project_id}.{dataset_id}.{table_id}")
 
     ans = x.groupby("id6", as_index=False, dropna=False).agg(
         {"v1": "sum", "v2": "sum", "v3": "sum"}
@@ -65,10 +65,10 @@ def q5(table_id: str, session: bigframes.Session):
     print(chk)
 
 
-def q6(table_id: str, session: bigframes.Session):
+def q6(project_id: str, dataset_id: str, table_id: str, session: bigframes.Session):
     print("Groupby benchmark 6: median v3 sd v3 by id4 id5")
 
-    x = session.read_gbq(f"bigframes-dev-perf.dbbenchmark.{table_id}")
+    x = session.read_gbq(f"{project_id}.{dataset_id}.{table_id}")
 
     ans = x.groupby(["id4", "id5"], as_index=False, dropna=False).agg(
         {"v3": ["median", "std"]}
@@ -78,10 +78,10 @@ def q6(table_id: str, session: bigframes.Session):
     print(chk)
 
 
-def q7(table_id: str, session: bigframes.Session):
+def q7(project_id: str, dataset_id: str, table_id: str, session: bigframes.Session):
     print("Groupby benchmark 7: max v1 - min v2 by id3")
 
-    x = session.read_gbq(f"bigframes-dev-perf.dbbenchmark.{table_id}")
+    x = session.read_gbq(f"{project_id}.{dataset_id}.{table_id}")
 
     ans = (
         x.groupby("id3", as_index=False, dropna=False)
@@ -93,10 +93,10 @@ def q7(table_id: str, session: bigframes.Session):
     print(chk)
 
 
-def q8(table_id: str, session: bigframes.Session):
+def q8(project_id: str, dataset_id: str, table_id: str, session: bigframes.Session):
     print("Groupby benchmark 8: largest two v3 by id6")
 
-    x = session.read_gbq(f"bigframes-dev-perf.dbbenchmark.{table_id}")
+    x = session.read_gbq(f"{project_id}.{dataset_id}.{table_id}")
 
     ans = (
         x[~x["v3"].isna()][["id6", "v3"]]
@@ -110,10 +110,10 @@ def q8(table_id: str, session: bigframes.Session):
     print(chk)
 
 
-def q10(table_id: str, session: bigframes.Session):
+def q10(project_id: str, dataset_id: str, table_id: str, session: bigframes.Session):
     print("Groupby benchmark 10: sum v3 count by id1:id6")
 
-    x = session.read_gbq(f"bigframes-dev-perf.dbbenchmark.{table_id}")
+    x = session.read_gbq(f"{project_id}.{dataset_id}.{table_id}")
 
     ans = x.groupby(
         ["id1", "id2", "id3", "id4", "id5", "id6"], as_index=False, dropna=False
diff --git a/third_party/bigframes_vendored/db_benchmark/join_queries.py b/third_party/bigframes_vendored/db_benchmark/join_queries.py
index 0c01e427a6..f0073436c0 100644
--- a/third_party/bigframes_vendored/db_benchmark/join_queries.py
+++ b/third_party/bigframes_vendored/db_benchmark/join_queries.py
@@ -4,12 +4,12 @@
 import bigframes
 
 
-def q1(table_id: str, session: bigframes.Session):
+def q1(project_id: str, dataset_id: str, table_id: str, session: bigframes.Session):
     print("Join benchmark 1: small inner on int")
 
-    x = session.read_gbq(f"bigframes-dev-perf.dbbenchmark.{table_id}")
+    x = session.read_gbq(f"{project_id}.{dataset_id}.{table_id}")
     small = session.read_gbq(
-        f"bigframes-dev-perf.dbbenchmark.{_get_join_table_id(table_id, 'small')}"
+        f"{project_id}.{dataset_id}.{_get_join_table_id(table_id, 'small')}"
     )
 
     ans = x.merge(small, on="id1")
@@ -19,12 +19,12 @@ def q1(table_id: str, session: bigframes.Session):
     print(chk)
 
 
-def q2(table_id: str, session: bigframes.Session):
+def q2(project_id: str, dataset_id: str, table_id: str, session: bigframes.Session):
     print("Join benchmark 2: medium inner on int")
 
-    x = session.read_gbq(f"bigframes-dev-perf.dbbenchmark.{table_id}")
+    x = session.read_gbq(f"{project_id}.{dataset_id}.{table_id}")
     medium = session.read_gbq(
-        f"bigframes-dev-perf.dbbenchmark.{_get_join_table_id(table_id, 'medium')}"
+        f"{project_id}.{dataset_id}.{_get_join_table_id(table_id, 'medium')}"
     )
 
     ans = x.merge(medium, on="id2")
@@ -34,12 +34,12 @@ def q2(table_id: str, session: bigframes.Session):
     print(chk)
 
 
-def q3(table_id: str, session: bigframes.Session):
+def q3(project_id: str, dataset_id: str, table_id: str, session: bigframes.Session):
     print("Join benchmark 3: medium outer on int")
 
-    x = session.read_gbq(f"bigframes-dev-perf.dbbenchmark.{table_id}")
+    x = session.read_gbq(f"{project_id}.{dataset_id}.{table_id}")
     medium = session.read_gbq(
-        f"bigframes-dev-perf.dbbenchmark.{_get_join_table_id(table_id, 'medium')}"
+        f"{project_id}.{dataset_id}.{_get_join_table_id(table_id, 'medium')}"
     )
 
     ans = x.merge(medium, how="left", on="id2")
@@ -49,12 +49,12 @@ def q3(table_id: str, session: bigframes.Session):
     print(chk)
 
 
-def q4(table_id: str, session: bigframes.Session):
+def q4(project_id: str, dataset_id: str, table_id: str, session: bigframes.Session):
     print("Join benchmark 4: medium inner on factor")
 
-    x = session.read_gbq(f"bigframes-dev-perf.dbbenchmark.{table_id}")
+    x = session.read_gbq(f"{project_id}.{dataset_id}.{table_id}")
     medium = session.read_gbq(
-        f"bigframes-dev-perf.dbbenchmark.{_get_join_table_id(table_id, 'medium')}"
+        f"{project_id}.{dataset_id}.{_get_join_table_id(table_id, 'medium')}"
     )
 
     ans = x.merge(medium, on="id5")
@@ -64,12 +64,12 @@ def q4(table_id: str, session: bigframes.Session):
     print(chk)
 
 
-def q5(table_id: str, session: bigframes.Session):
+def q5(project_id: str, dataset_id: str, table_id: str, session: bigframes.Session):
     print("Join benchmark 5: big inner on int")
 
-    x = session.read_gbq(f"bigframes-dev-perf.dbbenchmark.{table_id}")
+    x = session.read_gbq(f"{project_id}.{dataset_id}.{table_id}")
     big = session.read_gbq(
-        f"bigframes-dev-perf.dbbenchmark.{_get_join_table_id(table_id, 'big')}"
+        f"{project_id}.{dataset_id}.{_get_join_table_id(table_id, 'big')}"
     )
 
     ans = x.merge(big, on="id3")
diff --git a/third_party/bigframes_vendored/db_benchmark/sort_queries.py b/third_party/bigframes_vendored/db_benchmark/sort_queries.py
index 600df103cf..bbaf46cf27 100644
--- a/third_party/bigframes_vendored/db_benchmark/sort_queries.py
+++ b/third_party/bigframes_vendored/db_benchmark/sort_queries.py
@@ -4,10 +4,12 @@
 import bigframes.session
 
 
-def q1(table_id: str, session: bigframes.Session) -> None:
+def q1(
+    project_id: str, dataset_id: str, table_id: str, session: bigframes.Session
+) -> None:
     print("Sort benchmark 1: sort by int id2")
 
-    x = session.read_gbq(f"bigframes-dev-perf.dbbenchmark.{table_id}")
+    x = session.read_gbq(f"{project_id}.{dataset_id}.{table_id}")
 
     ans = x.sort_values("id2")
     print(ans.shape)
diff --git a/third_party/bigframes_vendored/version.py b/third_party/bigframes_vendored/version.py
index c07f26bc6f..75f66191ca 100644
--- a/third_party/bigframes_vendored/version.py
+++ b/third_party/bigframes_vendored/version.py
@@ -12,4 +12,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-__version__ = "1.21.0"
+__version__ = "1.22.0"