From 8b7b26b9054bfc9c0c47d7886d88640837272c88 Mon Sep 17 00:00:00 2001
From: Trevor Bergeron <tbergeron@google.com>
Date: Tue, 3 Oct 2023 00:44:51 +0000
Subject: [PATCH 1/6] refactor: simplify ArrayValue public interface

---
 bigframes/core/__init__.py            | 218 ++++++++++++--------------
 bigframes/core/blocks.py              |  28 +---
 bigframes/core/groupby/__init__.py    |   4 -
 bigframes/core/indexes/index.py       |   4 +-
 bigframes/core/joins/row_identity.py  |  16 +-
 bigframes/core/joins/single_column.py |  20 +--
 bigframes/dataframe.py                |   6 +-
 bigframes/operations/base.py          |   6 -
 tests/unit/test_core.py               |  32 +---
 9 files changed, 130 insertions(+), 204 deletions(-)

diff --git a/bigframes/core/__init__.py b/bigframes/core/__init__.py
index 5e0675fd13..d94b60c866 100644
--- a/bigframes/core/__init__.py
+++ b/bigframes/core/__init__.py
@@ -16,8 +16,9 @@
 from dataclasses import dataclass
 import functools
 import math
+import textwrap
 import typing
-from typing import Collection, Dict, Iterable, Literal, Optional, Sequence, Tuple
+from typing import Collection, Iterable, Literal, Optional, Sequence, Tuple
 
 from google.cloud import bigquery
 import ibis
@@ -201,31 +202,27 @@ def mem_expr_from_pandas(
             hidden_ordering_columns=(keys_memtable[ORDER_ID_COLUMN],),
         )
 
-    @property
-    def table(self) -> ibis_types.Table:
-        return self._table
-
-    @property
-    def reduced_predicate(self) -> typing.Optional[ibis_types.BooleanValue]:
-        """Returns the frame's predicates as an equivalent boolean value, useful where a single predicate value is preferred."""
-        return (
-            _reduce_predicate_list(self._predicates).name(PREDICATE_COLUMN)
-            if self._predicates
-            else None
-        )
-
     @property
     def columns(self) -> typing.Tuple[ibis_types.Value, ...]:
         return self._columns
 
     @property
-    def column_names(self) -> Dict[str, ibis_types.Value]:
-        return self._column_names
+    def column_ids(self) -> typing.Sequence[str]:
+        return tuple(self._column_names.keys())
 
     @property
     def hidden_ordering_columns(self) -> typing.Tuple[ibis_types.Value, ...]:
         return self._hidden_ordering_columns
 
+    @property
+    def _reduced_predicate(self) -> typing.Optional[ibis_types.BooleanValue]:
+        """Returns the frame's predicates as an equivalent boolean value, useful where a single predicate value is preferred."""
+        return (
+            _reduce_predicate_list(self._predicates).name(PREDICATE_COLUMN)
+            if self._predicates
+            else None
+        )
+
     @property
     def _ibis_order(self) -> Sequence[ibis_types.Value]:
         """Returns a sequence of ibis values which can be directly used to order a table expression. Has direction modifiers applied."""
@@ -265,24 +262,22 @@ def drop_columns(self, columns: Iterable[str]) -> ArrayValue:
 
     def get_column_type(self, key: str) -> bigframes.dtypes.Dtype:
         ibis_type = typing.cast(
-            bigframes.dtypes.IbisDtype, self.get_any_column(key).type()
+            bigframes.dtypes.IbisDtype, self._get_any_column(key).type()
         )
         return typing.cast(
             bigframes.dtypes.Dtype,
             bigframes.dtypes.ibis_dtype_to_bigframes_dtype(ibis_type),
         )
 
-    def get_column(self, key: str) -> ibis_types.Value:
+    def _get_ibis_column(self, key: str) -> ibis_types.Value:
         """Gets the Ibis expression for a given column."""
-        if key not in self._column_names.keys():
+        if key not in self.column_ids:
             raise ValueError(
-                "Column name {} not in set of values: {}".format(
-                    key, self._column_names.keys()
-                )
+                "Column name {} not in set of values: {}".format(key, self.column_ids)
             )
         return typing.cast(ibis_types.Value, self._column_names[key])
 
-    def get_any_column(self, key: str) -> ibis_types.Value:
+    def _get_any_column(self, key: str) -> ibis_types.Value:
         """Gets the Ibis expression for a given column. Will also get hidden columns."""
         all_columns = {**self._column_names, **self._hidden_ordering_column_names}
         if key not in all_columns.keys():
@@ -303,26 +298,11 @@ def _get_hidden_ordering_column(self, key: str) -> ibis_types.Column:
             )
         return typing.cast(ibis_types.Column, self._hidden_ordering_column_names[key])
 
-    def apply_limit(self, max_results: int) -> ArrayValue:
-        table = self._to_ibis_expr(
-            ordering_mode="order_by",
-            expose_hidden_cols=True,
-        ).limit(max_results)
-        columns = [table[column_name] for column_name in self._column_names]
-        hidden_ordering_columns = [
-            table[column_name] for column_name in self._hidden_ordering_column_names
-        ]
-        return ArrayValue(
-            self._session,
-            table,
-            columns=columns,
-            hidden_ordering_columns=hidden_ordering_columns,
-            ordering=self._ordering,
-        )
-
     def filter(self, predicate_id: str, keep_null: bool = False) -> ArrayValue:
         """Filter the table on a given expression, the predicate must be a boolean series aligned with the table expression."""
-        condition = typing.cast(ibis_types.BooleanValue, self.get_column(predicate_id))
+        condition = typing.cast(
+            ibis_types.BooleanValue, self._get_ibis_column(predicate_id)
+        )
         if keep_null:
             condition = typing.cast(
                 ibis_types.BooleanValue,
@@ -357,9 +337,7 @@ def _uniform_sampling(self, fraction: float) -> ArrayValue:
         .. warning::
             The row numbers of result is non-deterministic, avoid to use.
         """
-        table = self._to_ibis_expr(
-            ordering_mode="order_by", expose_hidden_cols=True, fraction=fraction
-        )
+        table = self._to_ibis_expr(expose_hidden_cols=True, fraction=fraction)
         columns = [table[column_name] for column_name in self._column_names]
         hidden_ordering_columns = [
             table[column_name] for column_name in self._hidden_ordering_column_names
@@ -373,7 +351,7 @@ def _uniform_sampling(self, fraction: float) -> ArrayValue:
         )
 
     @property
-    def offsets(self):
+    def _offsets(self) -> ibis_types.Value:
         if not self._ordering.is_sequential:
             raise ValueError(
                 "Expression does not have offsets. Generate them first using project_offsets."
@@ -382,9 +360,9 @@ def offsets(self):
             raise ValueError(
                 "Ordering is invalid. Marked as sequential but no total order columns."
             )
-        return self.get_any_column(self._ordering.total_order_col.column_id)
+        return self._get_any_column(self._ordering.total_order_col.column_id)
 
-    def project_offsets(self) -> ArrayValue:
+    def _project_offsets(self) -> ArrayValue:
         """Create a new expression that contains offsets. Should only be executed when offsets are needed for an operations. Has no effect on expression semantics."""
         if self._ordering.is_sequential:
             return self
@@ -414,7 +392,7 @@ def _hide_column(self, column_id) -> ArrayValue:
         new_name = bigframes.core.guid.generate_guid(prefix="bigframes_hidden_")
         expr_builder.hidden_ordering_columns = [
             *self._hidden_ordering_columns,
-            self.get_column(column_id).name(new_name),
+            self._get_ibis_column(column_id).name(new_name),
         ]
         expr_builder.ordering = self._ordering.with_column_remap({column_id: new_name})
         return expr_builder.build()
@@ -427,26 +405,28 @@ def promote_offsets(self) -> typing.Tuple[ArrayValue, str]:
         ordering = self._ordering
 
         if (not ordering.is_sequential) or (not ordering.total_order_col):
-            return self.project_offsets().promote_offsets()
+            return self._project_offsets().promote_offsets()
         col_id = bigframes.core.guid.generate_guid()
         expr_builder = self.builder()
         expr_builder.columns = [
-            self.get_any_column(ordering.total_order_col.column_id).name(col_id),
+            self._get_any_column(ordering.total_order_col.column_id).name(col_id),
             *self.columns,
         ]
         return expr_builder.build(), col_id
 
     def select_columns(self, column_ids: typing.Sequence[str]):
-        return self.projection([self.get_column(col_id) for col_id in column_ids])
+        return self._projection(
+            [self._get_ibis_column(col_id) for col_id in column_ids]
+        )
 
-    def projection(self, columns: Iterable[ibis_types.Value]) -> ArrayValue:
+    def _projection(self, columns: Iterable[ibis_types.Value]) -> ArrayValue:
         """Creates a new expression based on this expression with new columns."""
         # TODO(swast): We might want to do validation here that columns derive
         # from the same table expression instead of (in addition to?) at
         # construction time.
 
         expr = self
-        for ordering_column in set(self.column_names.keys()).intersection(
+        for ordering_column in set(self.column_ids).intersection(
             [col_ref.column_id for col_ref in self._ordering.ordering_value_columns]
         ):
             # Need to hide ordering columns that are being dropped. Alternatively, could project offsets
@@ -459,7 +439,7 @@ def projection(self, columns: Iterable[ibis_types.Value]) -> ArrayValue:
     def shape(self) -> typing.Tuple[int, int]:
         """Returns dimensions as (length, width) tuple."""
         width = len(self.columns)
-        count_expr = self._to_ibis_expr(ordering_mode="unordered").count()
+        count_expr = self._to_ibis_expr().count()
         sql = self._session.ibis_client.compile(count_expr)
 
         # Support in-memory engines for hermetic unit tests.
@@ -527,7 +507,7 @@ def project_unary_op(
         self, column_name: str, op: ops.UnaryOp, output_name=None
     ) -> ArrayValue:
         """Creates a new expression based on this expression with unary operation applied to one column."""
-        value = op._as_ibis(self.get_column(column_name)).name(
+        value = op._as_ibis(self._get_ibis_column(column_name)).name(
             output_name or column_name
         )
         return self._set_or_replace_by_id(output_name or column_name, value)
@@ -541,7 +521,8 @@ def project_binary_op(
     ) -> ArrayValue:
         """Creates a new expression based on this expression with binary operation applied to two columns."""
         value = op(
-            self.get_column(left_column_id), self.get_column(right_column_id)
+            self._get_ibis_column(left_column_id),
+            self._get_ibis_column(right_column_id),
         ).name(output_column_id)
         return self._set_or_replace_by_id(output_column_id, value)
 
@@ -555,9 +536,9 @@ def project_ternary_op(
     ) -> ArrayValue:
         """Creates a new expression based on this expression with ternary operation applied to three columns."""
         value = op(
-            self.get_column(col_id_1),
-            self.get_column(col_id_2),
-            self.get_column(col_id_3),
+            self._get_ibis_column(col_id_1),
+            self._get_ibis_column(col_id_2),
+            self._get_ibis_column(col_id_3),
         ).name(output_column_id)
         return self._set_or_replace_by_id(output_column_id, value)
 
@@ -574,7 +555,7 @@ def aggregate(
             by_column_id: column id of the aggregation key, this is preserved through the transform
             dropna: whether null keys should be dropped
         """
-        table = self._to_ibis_expr(ordering_mode="unordered")
+        table = self._to_ibis_expr()
         stats = {
             col_out: agg_op._as_ibis(table[col_in])
             for col_in, agg_op, col_out in aggregations
@@ -594,10 +575,10 @@ def aggregate(
             if dropna:
                 for column_id in by_column_ids:
                     expr = expr._filter(
-                        ops.notnull_op._as_ibis(expr.get_column(column_id))
+                        ops.notnull_op._as_ibis(expr._get_ibis_column(column_id))
                     )
             # Can maybe remove this as Ordering id is redundant as by_column is unique after aggregation
-            return expr.project_offsets()
+            return expr._project_offsets()
         else:
             aggregates = {**stats, ORDER_ID_COLUMN: ibis_types.literal(0)}
             result = table.aggregate(**aggregates)
@@ -624,7 +605,7 @@ def corr_aggregate(
         Arguments:
             corr_aggregations: left_column_id, right_column_id, output_column_id tuples
         """
-        table = self._to_ibis_expr(ordering_mode="unordered")
+        table = self._to_ibis_expr()
         stats = {
             col_out: table[col_left].corr(table[col_right], how="pop")
             for col_left, col_right, col_out in corr_aggregations
@@ -664,7 +645,7 @@ def project_window_op(
         never_skip_nulls: will disable null skipping for operators that would otherwise do so
         skip_reproject_unsafe: skips the reprojection step, can be used when performing many non-dependent window operations, user responsible for not nesting window expressions, or using outputs as join, filter or aggregation keys before a reprojection
         """
-        column = typing.cast(ibis_types.Column, self.get_column(column_name))
+        column = typing.cast(ibis_types.Column, self._get_ibis_column(column_name))
         window = self._ibis_window_from_spec(window_spec, allow_ties=op.handles_ties)
 
         window_op = op._as_ibis(column, window)
@@ -700,26 +681,36 @@ def project_window_op(
 
     def to_sql(
         self,
-        ordering_mode: Literal[
-            "order_by", "string_encoded", "offset_col", "unordered"
-        ] = "order_by",
-        order_col_name: Optional[str] = ORDER_ID_COLUMN,
+        offset_column: typing.Optional[str] = None,
         col_id_overrides: typing.Mapping[str, str] = {},
+        sorted: bool = False,
     ) -> str:
+        offsets_id = offset_column or ORDER_ID_COLUMN
+
         sql = self._session.ibis_client.compile(
             self._to_ibis_expr(
-                ordering_mode=ordering_mode,
-                order_col_name=order_col_name,
+                ordering_mode="offset_col"
+                if (offset_column or sorted)
+                else "unordered",
+                order_col_name=offsets_id,
                 col_id_overrides=col_id_overrides,
             )
         )
+        if sorted:
+            sql = textwrap.dedent(
+                f"""
+                SELECT * EXCEPT ({offsets_id})
+                FROM ({sql})
+                ORDER BY {offsets_id}
+                """
+            )
         return typing.cast(str, sql)
 
     def _to_ibis_expr(
         self,
         ordering_mode: Literal[
-            "order_by", "string_encoded", "offset_col", "unordered"
-        ] = "order_by",
+            "string_encoded", "offset_col", "unordered"
+        ] = "unordered",
         order_col_name: Optional[str] = ORDER_ID_COLUMN,
         expose_hidden_cols: bool = False,
         fraction: Optional[float] = None,
@@ -731,8 +722,6 @@ def _to_ibis_expr(
         ArrayValue objects are sorted, so the following options are available
         to reflect this in the ibis expression.
 
-        * "order_by" (Default): The output table will not have an ordering
-          column, however there will be an order_by clause applied to the ouput.
         * "offset_col": Zero-based offsets are generated as a column, this will
           not sort the rows however.
         * "string_encoded": An ordered string column is provided in output table.
@@ -760,7 +749,6 @@ def _to_ibis_expr(
             An ibis expression representing the data help by the ArrayValue object.
         """
         assert ordering_mode in (
-            "order_by",
             "string_encoded",
             "offset_col",
             "unordered",
@@ -775,18 +763,16 @@ def _to_ibis_expr(
             str
         ] = []  # Ordering/Filtering columns that will be dropped at end
 
-        if self.reduced_predicate is not None:
-            columns.append(self.reduced_predicate)
+        if self._reduced_predicate is not None:
+            columns.append(self._reduced_predicate)
             # Usually drop predicate as it is will be all TRUE after filtering
             if not expose_hidden_cols:
-                columns_to_drop.append(self.reduced_predicate.get_name())
+                columns_to_drop.append(self._reduced_predicate.get_name())
 
         order_columns = self._create_order_columns(
             ordering_mode, order_col_name, expose_hidden_cols
         )
         columns.extend(order_columns)
-        if (ordering_mode == "order_by") and not expose_hidden_cols:
-            columns_to_drop.extend(col.get_name() for col in order_columns)
 
         # Special case for empty tables, since we can't create an empty
         # projection.
@@ -799,15 +785,8 @@ def _to_ibis_expr(
             bigframes.dtypes.ibis_value_to_canonical_type(column) for column in columns
         )
         base_table = table
-        if self.reduced_predicate is not None:
+        if self._reduced_predicate is not None:
             table = table.filter(base_table[PREDICATE_COLUMN])
-        if ordering_mode == "order_by":
-            table = table.order_by(
-                _convert_ordering_to_table_values(
-                    {col: base_table[col] for col in table.columns},
-                    self._ordering.all_ordering_columns,
-                )  # type: ignore
-            )
         table = table.drop(*columns_to_drop)
         if col_id_overrides:
             table = table.relabel(col_id_overrides)
@@ -826,24 +805,24 @@ def _create_order_columns(
             return (self._create_offset_column().name(order_col_name),)
         elif ordering_mode == "string_encoded":
             return (self._create_string_ordering_column().name(order_col_name),)
-        elif ordering_mode == "order_by" or expose_hidden_cols:
+        elif expose_hidden_cols:
             return self.hidden_ordering_columns
         return ()
 
     def _create_offset_column(self) -> ibis_types.IntegerColumn:
         if self._ordering.total_order_col and self._ordering.is_sequential:
-            offsets = self.get_any_column(self._ordering.total_order_col.column_id)
+            offsets = self._get_any_column(self._ordering.total_order_col.column_id)
             return typing.cast(ibis_types.IntegerColumn, offsets)
         else:
             window = ibis.window(order_by=self._ibis_order)
             if self._predicates:
-                window = window.group_by(self.reduced_predicate)
+                window = window.group_by(self._reduced_predicate)
             offsets = ibis.row_number().over(window)
             return typing.cast(ibis_types.IntegerColumn, offsets)
 
     def _create_string_ordering_column(self) -> ibis_types.StringColumn:
         if self._ordering.total_order_col and self._ordering.is_string_encoded:
-            string_order_ids = self.get_any_column(
+            string_order_ids = self._get_any_column(
                 self._ordering.total_order_col.column_id
             )
             return typing.cast(ibis_types.StringColumn, string_order_ids)
@@ -852,7 +831,7 @@ def _create_string_ordering_column(self) -> ibis_types.StringColumn:
             and self._ordering.integer_encoding.is_encoded
         ):
             # Special case: non-negative integer ordering id can be converted directly to string without regenerating row numbers
-            int_values = self.get_any_column(self._ordering.total_order_col.column_id)
+            int_values = self._get_any_column(self._ordering.total_order_col.column_id)
             return encode_order_string(
                 typing.cast(ibis_types.IntegerColumn, int_values),
             )
@@ -860,7 +839,7 @@ def _create_string_ordering_column(self) -> ibis_types.StringColumn:
             # Have to build string from scratch
             window = ibis.window(order_by=self._ibis_order)
             if self._predicates:
-                window = window.group_by(self.reduced_predicate)
+                window = window.group_by(self._reduced_predicate)
             row_nums = typing.cast(
                 ibis_types.IntegerColumn, ibis.row_number().over(window)
             )
@@ -870,7 +849,8 @@ def start_query(
         self,
         job_config: Optional[bigquery.job.QueryJobConfig] = None,
         max_results: Optional[int] = None,
-        expose_extra_columns: bool = False,
+        *,
+        sorted: bool = True,
     ) -> Tuple[bigquery.table.RowIterator, bigquery.QueryJob]:
         """Execute a query and return metadata about the results."""
         # TODO(swast): Cache the job ID so we can look it up again if they ask
@@ -883,8 +863,9 @@ def start_query(
         # a LocalSession for unit testing.
         # TODO(swast): Add a timeout here? If the query is taking a long time,
         # maybe we just print the job metadata that we have so far?
-        table = self._to_ibis_expr(expose_hidden_cols=expose_extra_columns)
-        sql = self._session.ibis_client.compile(table)  # type:ignore
+
+        # DO NOT COMMIT: Make this ordered
+        sql = self.to_sql(sorted=True)  # type:ignore
         return self._session._start_query(
             sql=sql,
             job_config=job_config,
@@ -903,7 +884,6 @@ def _reproject_to_table(self) -> ArrayValue:
         recursively in projections.
         """
         table = self._to_ibis_expr(
-            ordering_mode="unordered",
             expose_hidden_cols=True,
         )
         columns = [table[column_name] for column_name in self._column_names]
@@ -926,14 +906,16 @@ def _reproject_to_table(self) -> ArrayValue:
     def _ibis_window_from_spec(self, window_spec: WindowSpec, allow_ties: bool = False):
         group_by: typing.List[ibis_types.Value] = (
             [
-                typing.cast(ibis_types.Column, _as_identity(self.get_column(column)))
+                typing.cast(
+                    ibis_types.Column, _as_identity(self._get_ibis_column(column))
+                )
                 for column in window_spec.grouping_keys
             ]
             if window_spec.grouping_keys
             else []
         )
-        if self.reduced_predicate is not None:
-            group_by.append(self.reduced_predicate)
+        if self._reduced_predicate is not None:
+            group_by.append(self._reduced_predicate)
         if window_spec.ordering:
             order_by = _convert_ordering_to_table_values(
                 {**self._column_names, **self._hidden_ordering_column_names},
@@ -984,7 +966,7 @@ def unpivot(
         """
         if how not in ("left", "right"):
             raise ValueError("'how' must be 'left' or 'right'")
-        table = self._to_ibis_expr(ordering_mode="unordered", expose_hidden_cols=True)
+        table = self._to_ibis_expr(expose_hidden_cols=True)
         row_n = len(row_labels)
         hidden_col_ids = self._hidden_ordering_column_names.keys()
         if not all(
@@ -1107,7 +1089,9 @@ def unpivot(
         )
 
     def assign(self, source_id: str, destination_id: str) -> ArrayValue:
-        return self._set_or_replace_by_id(destination_id, self.get_column(source_id))
+        return self._set_or_replace_by_id(
+            destination_id, self._get_ibis_column(source_id)
+        )
 
     def assign_constant(
         self,
@@ -1134,10 +1118,10 @@ def _set_or_replace_by_id(self, id: str, new_value: ibis_types.Value) -> ArrayVa
             return self._hide_column(id)._set_or_replace_by_id(id, new_value)
 
         builder = self.builder()
-        if id in self.column_names:
+        if id in self.column_ids:
             builder.columns = [
                 val if (col_id != id) else new_value.name(id)
-                for col_id, val in self.column_names.items()
+                for col_id, val in zip(self.column_ids, self._columns)
             ]
         else:
             builder.columns = [*self.columns, new_value.name(id)]
@@ -1155,12 +1139,12 @@ def slice(
         if not step:
             step = 1
 
-        expr_with_offsets = self.project_offsets()
+        expr_with_offsets = self._project_offsets()
 
         # start with True and reduce with start, stop, and step conditions
-        cond_list = [expr_with_offsets.offsets == expr_with_offsets.offsets]
+        cond_list = [expr_with_offsets._offsets == expr_with_offsets._offsets]
 
-        last_offset = expr_with_offsets.offsets.max()
+        last_offset = expr_with_offsets._offsets.max()
 
         # Convert negative indexes to positive indexes
         if start and start < 0:
@@ -1170,20 +1154,20 @@ def slice(
 
         if start is not None:
             if step >= 1:
-                cond_list.append(expr_with_offsets.offsets >= start)
+                cond_list.append(expr_with_offsets._offsets >= start)
             else:
-                cond_list.append(expr_with_offsets.offsets <= start)
+                cond_list.append(expr_with_offsets._offsets <= start)
         if stop is not None:
             if step >= 1:
-                cond_list.append(expr_with_offsets.offsets < stop)
+                cond_list.append(expr_with_offsets._offsets < stop)
             else:
-                cond_list.append(expr_with_offsets.offsets > stop)
+                cond_list.append(expr_with_offsets._offsets > stop)
         if step > 1:
             start = start if (start is not None) else 0
-            cond_list.append((expr_with_offsets.offsets - start) % step == 0)
+            cond_list.append((expr_with_offsets._offsets - start) % step == 0)
         if step < 0:
             start = start if (start is not None) else last_offset
-            cond_list.append((start - expr_with_offsets.offsets) % (-step) == 0)
+            cond_list.append((start - expr_with_offsets._offsets) % (-step) == 0)
 
         sliced_expr = expr_with_offsets._filter(
             functools.reduce(lambda x, y: x & y, cond_list)
@@ -1192,16 +1176,14 @@ def slice(
 
     def cached(self, cluster_cols: typing.Sequence[str]) -> ArrayValue:
         """Write the ArrayValue to a session table and create a new block object that references it."""
-        ibis_expr = self._to_ibis_expr(
-            ordering_mode="unordered", expose_hidden_cols=True
-        )
+        ibis_expr = self._to_ibis_expr(expose_hidden_cols=True)
         destination = self._session._ibis_to_session_table(
             ibis_expr, cluster_cols=cluster_cols, api_name="cache"
         )
         table_expression = self._session.ibis_client.sql(
             f"SELECT * FROM `_SESSION`.`{destination.table_id}`"
         )
-        new_columns = [table_expression[column] for column in self.column_names]
+        new_columns = [table_expression[column] for column in self.column_ids]
         new_hidden_columns = [
             table_expression[column] for column in self._hidden_ordering_column_names
         ]
diff --git a/bigframes/core/blocks.py b/bigframes/core/blocks.py
index 863852c684..510ba32e26 100644
--- a/bigframes/core/blocks.py
+++ b/bigframes/core/blocks.py
@@ -151,7 +151,7 @@ def value_columns(self) -> Sequence[str]:
         """All value columns, mutually exclusive with index columns."""
         return [
             column
-            for column in self._expr.column_names
+            for column in self._expr.column_ids
             if column not in self.index_columns
         ]
 
@@ -443,9 +443,7 @@ def _compute_and_count(
         # TODO(swast): Allow for dry run and timeout.
         expr = self._apply_value_keys_to_expr(value_keys=value_keys)
 
-        results_iterator, query_job = expr.start_query(
-            max_results=max_results, expose_extra_columns=True
-        )
+        results_iterator, query_job = expr.start_query(max_results=max_results)
 
         table_size = expr._get_table_size(query_job.destination) / _BYTES_TO_MEGABYTES
         fraction = (
@@ -482,12 +480,6 @@ def _compute_and_count(
                 if self.index_columns:
                     df.set_index(list(self.index_columns), inplace=True)
                     df.index.names = self.index.names  # type: ignore
-
-                df.drop(
-                    [col for col in df.columns if col not in self.value_columns],
-                    axis=1,
-                    inplace=True,
-                )
             elif (sampling_method == _UNIFORM) and (random_state is None):
                 filtered_expr = self.expr._uniform_sampling(fraction)
                 block = Block(
@@ -519,12 +511,6 @@ def _compute_and_count(
                 df.set_index(list(self.index_columns), inplace=True)
                 df.index.names = self.index.names  # type: ignore
 
-            df.drop(
-                [col for col in df.columns if col not in self.value_columns],
-                axis=1,
-                inplace=True,
-            )
-
         return df, total_rows, query_job
 
     def _split(
@@ -1086,7 +1072,7 @@ def _normalize_expression(
     ):
         """Normalizes expression by moving index columns to left."""
         value_columns = [
-            col_id for col_id in expr.column_names.keys() if col_id not in index_columns
+            col_id for col_id in expr.column_ids if col_id not in index_columns
         ]
         if (assert_value_size is not None) and (
             len(value_columns) != assert_value_size
@@ -1095,7 +1081,7 @@ def _normalize_expression(
         return expr.select_columns([*index_columns, *value_columns])
 
     def slice(
-        self: bigframes.core.blocks.Block,
+        self,
         start: typing.Optional[int] = None,
         stop: typing.Optional[int] = None,
         step: typing.Optional[int] = None,
@@ -1395,7 +1381,7 @@ def concat(
         )
         result_block = Block(
             result_expr,
-            index_columns=list(result_expr.column_names.keys())[:index_nlevels],
+            index_columns=list(result_expr.column_ids)[:index_nlevels],
             column_labels=aligned_blocks[0].column_labels,
             index_labels=result_labels,
         )
@@ -1457,9 +1443,7 @@ def to_sql_query(
             # the BigQuery unicode column name feature?
             substitutions[old_id] = new_id
 
-        sql = array_value.to_sql(
-            ordering_mode="unordered", col_id_overrides=substitutions
-        )
+        sql = array_value.to_sql(col_id_overrides=substitutions)
         return (
             sql,
             new_ids[: len(idx_labels)],
diff --git a/bigframes/core/groupby/__init__.py b/bigframes/core/groupby/__init__.py
index 9be7f22a71..db0843fcbc 100644
--- a/bigframes/core/groupby/__init__.py
+++ b/bigframes/core/groupby/__init__.py
@@ -426,10 +426,6 @@ def __init__(
         self._value_name = value_name
         self._dropna = dropna  # Applies to aggregations but not windowing
 
-    @property
-    def _value(self):
-        return self._block.expr.get_column(self._value_column)
-
     def all(self) -> series.Series:
         return self._aggregate(agg_ops.all_op)
 
diff --git a/bigframes/core/indexes/index.py b/bigframes/core/indexes/index.py
index f211afe4d5..7d15e67649 100644
--- a/bigframes/core/indexes/index.py
+++ b/bigframes/core/indexes/index.py
@@ -398,9 +398,7 @@ def to_pandas(self) -> pandas.Index:
         """Executes deferred operations and downloads the results."""
         # Project down to only the index column. So the query can be cached to visualize other data.
         index_columns = list(self._block.index_columns)
-        expr = self._expr.projection(
-            [self._expr.get_any_column(col) for col in index_columns]
-        )
+        expr = self._expr.select_columns(index_columns)
         results, _ = expr.start_query()
         df = expr._session._rows_to_dataframe(results)
         df = df.set_index(index_columns)
diff --git a/bigframes/core/joins/row_identity.py b/bigframes/core/joins/row_identity.py
index 66eb223990..156e7aef40 100644
--- a/bigframes/core/joins/row_identity.py
+++ b/bigframes/core/joins/row_identity.py
@@ -38,11 +38,11 @@ def join_by_row_identity(
             f"Only how='outer','left','inner' currently supported. {constants.FEEDBACK_LINK}"
         )
 
-    if not left.table.equals(right.table):
+    if not left._table.equals(right._table):
         raise ValueError(
             "Cannot combine objects without an explicit join/merge key. "
-            f"Left based on: {left.table.compile()}, but "
-            f"right based on: {right.table.compile()}"
+            f"Left based on: {left._table.compile()}, but "
+            f"right based on: {right._table.compile()}"
         )
 
     left_predicates = left._predicates
@@ -63,11 +63,11 @@ def join_by_row_identity(
     left_mask = left_relative_predicates if how in ["right", "outer"] else None
     right_mask = right_relative_predicates if how in ["left", "outer"] else None
     joined_columns = [
-        _mask_value(left.get_column(key), left_mask).name(map_left_id(key))
-        for key in left.column_names.keys()
+        _mask_value(left._get_ibis_column(key), left_mask).name(map_left_id(key))
+        for key in left.column_ids
     ] + [
-        _mask_value(right.get_column(key), right_mask).name(map_right_id(key))
-        for key in right.column_names.keys()
+        _mask_value(right._get_ibis_column(key), right_mask).name(map_right_id(key))
+        for key in right.column_ids
     ]
 
     # If left isn't being masked, can just use left ordering
@@ -108,7 +108,7 @@ def join_by_row_identity(
 
     joined_expr = core.ArrayValue(
         left._session,
-        left.table,
+        left._table,
         columns=joined_columns,
         hidden_ordering_columns=hidden_ordering_columns,
         ordering=new_ordering,
diff --git a/bigframes/core/joins/single_column.py b/bigframes/core/joins/single_column.py
index 8a9825cf0b..ea138aa885 100644
--- a/bigframes/core/joins/single_column.py
+++ b/bigframes/core/joins/single_column.py
@@ -79,14 +79,14 @@ def join_by_column(
     if (
         allow_row_identity_join
         and how in bigframes.core.joins.row_identity.SUPPORTED_ROW_IDENTITY_HOW
-        and left.table.equals(right.table)
+        and left._table.equals(right._table)
         # Make sure we're joining on exactly the same column(s), at least with
         # regards to value its possible that they both have the same names but
         # were modified in different ways. Ignore differences in the names.
         and all(
-            left.get_any_column(lcol)
+            left._get_any_column(lcol)
             .name("index")
-            .equals(right.get_any_column(rcol).name("index"))
+            .equals(right._get_any_column(rcol).name("index"))
             for lcol, rcol in zip(left_column_ids, right_column_ids)
         )
     ):
@@ -95,16 +95,18 @@ def join_by_column(
             get_column_right,
         ) = bigframes.core.joins.row_identity.join_by_row_identity(left, right, how=how)
         left_join_keys = [
-            combined_expr.get_column(get_column_left(col)) for col in left_column_ids
+            combined_expr._get_ibis_column(get_column_left(col))
+            for col in left_column_ids
         ]
         right_join_keys = [
-            combined_expr.get_column(get_column_right(col)) for col in right_column_ids
+            combined_expr._get_ibis_column(get_column_right(col))
+            for col in right_column_ids
         ]
         join_key_cols = get_join_cols(
             left_join_keys, right_join_keys, how, coalesce_join_keys
         )
         join_key_ids = [col.get_name() for col in join_key_cols]
-        combined_expr = combined_expr.projection(
+        combined_expr = combined_expr._projection(
             [*join_key_cols, *combined_expr.columns]
         )
         if sort:
@@ -126,13 +128,13 @@ def join_by_column(
         lmapping = {
             col_id: guid.generate_guid()
             for col_id in itertools.chain(
-                left.column_names, left._hidden_ordering_column_names
+                left.column_ids, left._hidden_ordering_column_names
             )
         }
         rmapping = {
             col_id: guid.generate_guid()
             for col_id in itertools.chain(
-                right.column_names, right._hidden_ordering_column_names
+                right.column_ids, right._hidden_ordering_column_names
             )
         }
 
@@ -143,12 +145,10 @@ def get_column_right(col_id):
             return rmapping[col_id]
 
         left_table = left._to_ibis_expr(
-            ordering_mode="unordered",
             expose_hidden_cols=True,
             col_id_overrides=lmapping,
         )
         right_table = right._to_ibis_expr(
-            ordering_mode="unordered",
             expose_hidden_cols=True,
             col_id_overrides=rmapping,
         )
diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py
index ea06e28cdf..5d960d6113 100644
--- a/bigframes/dataframe.py
+++ b/bigframes/dataframe.py
@@ -547,7 +547,7 @@ def _apply_series_binop(
             other._block.index, how=how
         )
 
-        series_column_id = other._value.get_name()
+        series_column_id = other._value_column
         series_col = get_column_right(series_column_id)
         block = joined_index._block
         for column_id, label in zip(
@@ -2404,13 +2404,11 @@ def _create_io_query(self, index: bool, ordering_id: Optional[str]) -> str:
 
         if ordering_id is not None:
             return array_value.to_sql(
-                ordering_mode="offset_col",
+                offset_column=ordering_id,
                 col_id_overrides=id_overrides,
-                order_col_name=ordering_id,
             )
         else:
             return array_value.to_sql(
-                ordering_mode="unordered",
                 col_id_overrides=id_overrides,
             )
 
diff --git a/bigframes/operations/base.py b/bigframes/operations/base.py
index add6af57f4..f6b47f975e 100644
--- a/bigframes/operations/base.py
+++ b/bigframes/operations/base.py
@@ -16,7 +16,6 @@
 
 import typing
 
-import ibis.expr.types as ibis_types
 import pandas as pd
 
 import bigframes.constants as constants
@@ -98,11 +97,6 @@ def __init__(
             if pd_series.name is None:
                 self._block = self._block.with_column_labels([None])
 
-    @property
-    def _value(self) -> ibis_types.Value:
-        """Private property to get Ibis expression for the value column."""
-        return self._block.expr.get_column(self._value_column)
-
     @property
     def _value_column(self) -> str:
         return self._block.value_columns[0]
diff --git a/tests/unit/test_core.py b/tests/unit/test_core.py
index ee0cefb3d2..ba789cab31 100644
--- a/tests/unit/test_core.py
+++ b/tests/unit/test_core.py
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import ibis
 import ibis.expr.types as ibis_types
 import pandas
 
@@ -45,7 +44,7 @@ def test_arrayvalue_constructor_from_ibis_table_adds_all_columns():
     actual = core.ArrayValue(
         session=session, table=ibis_table, columns=columns, ordering=ordering
     )
-    assert actual.table is ibis_table
+    assert actual._table is ibis_table
     assert len(actual.columns) == 3
 
 
@@ -79,37 +78,12 @@ def test_arrayvalue_with_get_column():
         ),
         total_ordering_columns=["col1"],
     )
-    col1 = value.get_column("col1")
+    col1 = value._get_ibis_column("col1")
     assert isinstance(col1, ibis_types.Value)
     assert col1.get_name() == "col1"
     assert col1.type().is_int64()
 
 
-def test_arrayvalue_to_ibis_expr_with_projection():
-    value = resources.create_arrayvalue(
-        pandas.DataFrame(
-            {
-                "col1": [1, 2, 3],
-                "col2": ["a", "b", "c"],
-                "col3": [0.1, 0.2, 0.3],
-            }
-        ),
-        total_ordering_columns=["col1"],
-    )
-    expr = value.projection(
-        [
-            (value.table["col1"] + ibis.literal(-1)).name("int64_col"),
-            ibis.literal(123456789).name("literals"),
-            value.table["col2"].name("string_col"),
-        ]
-    )
-    actual = expr._to_ibis_expr()
-    assert len(actual.columns) == 3
-    assert actual.columns[0] == "int64_col"
-    assert actual.columns[1] == "literals"
-    assert actual.columns[2] == "string_col"
-
-
 def test_arrayvalues_to_ibis_expr_with_get_column():
     value = resources.create_arrayvalue(
         pandas.DataFrame(
@@ -121,7 +95,7 @@ def test_arrayvalues_to_ibis_expr_with_get_column():
         ),
         total_ordering_columns=["col1"],
     )
-    expr = value.get_column("col1")
+    expr = value._get_ibis_column("col1")
     assert expr.get_name() == "col1"
     assert expr.type().is_int64()
 

From 7d7a5077246e77a3a6b1240544197130267dbcd9 Mon Sep 17 00:00:00 2001
From: Trevor Bergeron <tbergeron@google.com>
Date: Tue, 3 Oct 2023 07:36:03 +0000
Subject: [PATCH 2/6] move slice completely to block

---
 bigframes/core/__init__.py | 52 ++--------------------
 bigframes/core/blocks.py   | 91 ++++++++++++++++++++++++++++++++++----
 2 files changed, 86 insertions(+), 57 deletions(-)

diff --git a/bigframes/core/__init__.py b/bigframes/core/__init__.py
index d94b60c866..c4ebc6f07c 100644
--- a/bigframes/core/__init__.py
+++ b/bigframes/core/__init__.py
@@ -351,7 +351,7 @@ def _uniform_sampling(self, fraction: float) -> ArrayValue:
         )
 
     @property
-    def _offsets(self) -> ibis_types.Value:
+    def _offsets(self) -> ibis_types.IntegerColumn:
         if not self._ordering.is_sequential:
             raise ValueError(
                 "Expression does not have offsets. Generate them first using project_offsets."
@@ -360,7 +360,8 @@ def _offsets(self) -> ibis_types.Value:
             raise ValueError(
                 "Ordering is invalid. Marked as sequential but no total order columns."
             )
-        return self._get_any_column(self._ordering.total_order_col.column_id)
+        column = self._get_any_column(self._ordering.total_order_col.column_id)
+        return typing.cast(ibis_types.IntegerColumn, column)
 
     def _project_offsets(self) -> ArrayValue:
         """Create a new expression that contains offsets. Should only be executed when offsets are needed for an operations. Has no effect on expression semantics."""
@@ -1127,53 +1128,6 @@ def _set_or_replace_by_id(self, id: str, new_value: ibis_types.Value) -> ArrayVa
             builder.columns = [*self.columns, new_value.name(id)]
         return builder.build()
 
-    def slice(
-        self,
-        start: typing.Optional[int] = None,
-        stop: typing.Optional[int] = None,
-        step: typing.Optional[int] = None,
-    ) -> ArrayValue:
-        if step == 0:
-            raise ValueError("slice step cannot be zero")
-
-        if not step:
-            step = 1
-
-        expr_with_offsets = self._project_offsets()
-
-        # start with True and reduce with start, stop, and step conditions
-        cond_list = [expr_with_offsets._offsets == expr_with_offsets._offsets]
-
-        last_offset = expr_with_offsets._offsets.max()
-
-        # Convert negative indexes to positive indexes
-        if start and start < 0:
-            start = last_offset + start + 1
-        if stop and stop < 0:
-            stop = last_offset + stop + 1
-
-        if start is not None:
-            if step >= 1:
-                cond_list.append(expr_with_offsets._offsets >= start)
-            else:
-                cond_list.append(expr_with_offsets._offsets <= start)
-        if stop is not None:
-            if step >= 1:
-                cond_list.append(expr_with_offsets._offsets < stop)
-            else:
-                cond_list.append(expr_with_offsets._offsets > stop)
-        if step > 1:
-            start = start if (start is not None) else 0
-            cond_list.append((expr_with_offsets._offsets - start) % step == 0)
-        if step < 0:
-            start = start if (start is not None) else last_offset
-            cond_list.append((start - expr_with_offsets._offsets) % (-step) == 0)
-
-        sliced_expr = expr_with_offsets._filter(
-            functools.reduce(lambda x, y: x & y, cond_list)
-        )
-        return sliced_expr if step > 0 else sliced_expr.reversed()
-
     def cached(self, cluster_cols: typing.Sequence[str]) -> ArrayValue:
         """Write the ArrayValue to a session table and create a new block object that references it."""
         ibis_expr = self._to_ibis_expr(expose_hidden_cols=True)
diff --git a/bigframes/core/blocks.py b/bigframes/core/blocks.py
index 510ba32e26..c793faa00e 100644
--- a/bigframes/core/blocks.py
+++ b/bigframes/core/blocks.py
@@ -1086,15 +1086,90 @@ def slice(
         stop: typing.Optional[int] = None,
         step: typing.Optional[int] = None,
     ) -> bigframes.core.blocks.Block:
-        sliced_expr = self.expr.slice(start=start, stop=stop, step=step)
-        # since this is slice, return a copy even if unchanged
-        block = Block(
-            sliced_expr,
-            index_columns=self.index_columns,
-            column_labels=self.column_labels,
-            index_labels=self._index_labels,
+        start = start or 0
+        if step is None:
+            step = 1
+        if step == 0:
+            raise ValueError("slice step cannot be zero")
+        if step < 0:
+            adj_start = -start + 1 if start > 0 else -start - 1
+            if stop:
+                adj_stop = -stop + 1 if stop > 0 else -stop - 1
+            else:
+                adj_stop = None
+            adj_step = -step
+            return (
+                self.reversed()._forward_slice(adj_start, adj_stop, adj_step).reversed()
+            )
+        return self._forward_slice(start or 0, stop, step)
+
+    def _forward_slice(self, start: int = 0, stop=None, step: int = 1):
+        """Performs slice but only for positive step size."""
+        if step <= 0:
+            raise ValueError("forward_slice only supports positive step size")
+
+        use_postive_offsets = (
+            (start > 0)
+            or ((stop is not None) and (stop >= 0))
+            or ((step > 1) and (start >= 0))
         )
-        return block
+        use_negative_offsets = (
+            (start < 0) or (stop and (stop < 0)) or ((step > 1) and (start < 0))
+        )
+
+        block = self
+
+        # only generate offsets that are used
+        positive_offsets = None
+        negative_offsets = None
+        if use_postive_offsets:
+            block, positive_offsets = self.promote_offsets()
+        if use_negative_offsets:
+            block, negative_offsets = block.reversed().promote_offsets()
+            block = block.reversed()
+
+        conditions = []
+        if start != 0:
+            if start > 0:
+                op = ops.partial_right(ops.ge_op, start)
+                assert positive_offsets
+                block, start_cond = block.apply_unary_op(positive_offsets, op)
+            else:
+                op = ops.partial_right(ops.le_op, -start - 1)
+                assert negative_offsets
+                block, start_cond = block.apply_unary_op(negative_offsets, op)
+            conditions.append(start_cond)
+        if stop is not None:
+            if stop >= 0:
+                op = ops.partial_right(ops.lt_op, stop)
+                assert positive_offsets
+                block, stop_cond = block.apply_unary_op(positive_offsets, op)
+            else:
+                op = ops.partial_right(ops.gt_op, -stop - 1)
+                assert negative_offsets
+                block, stop_cond = block.apply_unary_op(negative_offsets, op)
+            conditions.append(stop_cond)
+
+        if step > 1:
+            op = ops.partial_right(ops.mod_op, step)
+            if start >= 0:
+                op = ops.partial_right(ops.sub_op, start)
+                assert positive_offsets
+                block, start_diff = block.apply_unary_op(positive_offsets, op)
+            else:
+                op = ops.partial_right(ops.sub_op, -start + 1)
+                assert negative_offsets
+                block, start_diff = block.apply_unary_op(negative_offsets, op)
+            modulo_op = ops.partial_right(ops.mod_op, step)
+            block, mod = block.apply_unary_op(start_diff, modulo_op)
+            is_zero_op = ops.partial_right(ops.eq_op, 0)
+            block, step_cond = block.apply_unary_op(mod, is_zero_op)
+            conditions.append(step_cond)
+
+        for cond in conditions:
+            block = block.filter(cond)
+
+        return block.select_columns(self.value_columns)
 
     # Using cache to optimize for Jupyter Notebook's behavior where both '__repr__'
     # and '__repr_html__' are called in a single display action, reducing redundant

From 589b8f92311d92b9c798b2d22ae102dd0203c457 Mon Sep 17 00:00:00 2001
From: Trevor Bergeron <tbergeron@google.com>
Date: Tue, 3 Oct 2023 17:36:46 +0000
Subject: [PATCH 3/6] fix new iloc impl

---
 bigframes/core/blocks.py | 15 ++++++---------
 1 file changed, 6 insertions(+), 9 deletions(-)

diff --git a/bigframes/core/blocks.py b/bigframes/core/blocks.py
index c793faa00e..0760e7e9a0 100644
--- a/bigframes/core/blocks.py
+++ b/bigframes/core/blocks.py
@@ -1086,20 +1086,16 @@ def slice(
         stop: typing.Optional[int] = None,
         step: typing.Optional[int] = None,
     ) -> bigframes.core.blocks.Block:
-        start = start or 0
         if step is None:
             step = 1
         if step == 0:
             raise ValueError("slice step cannot be zero")
         if step < 0:
-            adj_start = -start + 1 if start > 0 else -start - 1
-            if stop:
-                adj_stop = -stop + 1 if stop > 0 else -stop - 1
-            else:
-                adj_stop = None
-            adj_step = -step
-            return (
-                self.reversed()._forward_slice(adj_start, adj_stop, adj_step).reversed()
+            reverse_start = (-start - 1) if start else 0
+            reverse_stop = (-stop - 1) if stop else None
+            reverse_step = -step
+            return self.reversed()._forward_slice(
+                reverse_start, reverse_stop, reverse_step
             )
         return self._forward_slice(start or 0, stop, step)
 
@@ -1122,6 +1118,7 @@ def _forward_slice(self, start: int = 0, stop=None, step: int = 1):
         # only generate offsets that are used
         positive_offsets = None
         negative_offsets = None
+
         if use_postive_offsets:
             block, positive_offsets = self.promote_offsets()
         if use_negative_offsets:

From ec3d2f0a72e0916ee020fad987ca9ced589a8343 Mon Sep 17 00:00:00 2001
From: Trevor Bergeron <tbergeron@google.com>
Date: Thu, 5 Oct 2023 18:06:50 +0000
Subject: [PATCH 4/6] Add reproject to prevent delayed filter application in
 series.apply

---
 bigframes/series.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/bigframes/series.py b/bigframes/series.py
index 2a0b1ff1fa..717a85a93e 100644
--- a/bigframes/series.py
+++ b/bigframes/series.py
@@ -1150,7 +1150,11 @@ def _groupby_values(
     def apply(self, func) -> Series:
         # TODO(shobs, b/274645634): Support convert_dtype, args, **kwargs
         # is actually a ternary op
-        return self._apply_unary_op(ops.RemoteFunctionOp(func))
+        # Reproject as workaround to applying filter too late. This forces the filter
+        # to be applied before passing data to remote function, protecting from bad
+        # inputs causing errors.
+        reprojected_series = Series(self._block._force_reproject())
+        return reprojected_series._apply_unary_op(ops.RemoteFunctionOp(func))
 
     def add_prefix(self, prefix: str, axis: int | str | None = None) -> Series:
         return Series(self._get_block().add_prefix(prefix))

From 45bfc43f3231e513064ab8d3aff25b4fb41c49ef Mon Sep 17 00:00:00 2001
From: Trevor Bergeron <tbergeron@google.com>
Date: Thu, 5 Oct 2023 19:11:35 +0000
Subject: [PATCH 5/6] pr comments

---
 bigframes/core/__init__.py            | 25 ++++++++++++-------------
 bigframes/core/joins/single_column.py |  2 ++
 2 files changed, 14 insertions(+), 13 deletions(-)

diff --git a/bigframes/core/__init__.py b/bigframes/core/__init__.py
index 724c9904f3..ccfd682215 100644
--- a/bigframes/core/__init__.py
+++ b/bigframes/core/__init__.py
@@ -337,7 +337,9 @@ def _uniform_sampling(self, fraction: float) -> ArrayValue:
         .. warning::
             The row numbers of result is non-deterministic, avoid to use.
         """
-        table = self._to_ibis_expr(expose_hidden_cols=True, fraction=fraction)
+        table = self._to_ibis_expr(
+            "unordered", expose_hidden_cols=True, fraction=fraction
+        )
         columns = [table[column_name] for column_name in self._column_names]
         hidden_ordering_columns = [
             table[column_name] for column_name in self._hidden_ordering_column_names
@@ -440,7 +442,7 @@ def _projection(self, columns: Iterable[ibis_types.Value]) -> ArrayValue:
     def shape(self) -> typing.Tuple[int, int]:
         """Returns dimensions as (length, width) tuple."""
         width = len(self.columns)
-        count_expr = self._to_ibis_expr().count()
+        count_expr = self._to_ibis_expr("unordered").count()
         sql = self._session.ibis_client.compile(count_expr)
 
         # Support in-memory engines for hermetic unit tests.
@@ -556,7 +558,7 @@ def aggregate(
             by_column_id: column id of the aggregation key, this is preserved through the transform
             dropna: whether null keys should be dropped
         """
-        table = self._to_ibis_expr()
+        table = self._to_ibis_expr("unordered")
         stats = {
             col_out: agg_op._as_ibis(table[col_in])
             for col_in, agg_op, col_out in aggregations
@@ -606,7 +608,7 @@ def corr_aggregate(
         Arguments:
             corr_aggregations: left_column_id, right_column_id, output_column_id tuples
         """
-        table = self._to_ibis_expr()
+        table = self._to_ibis_expr("unordered")
         stats = {
             col_out: table[col_left].corr(table[col_right], how="pop")
             for col_left, col_right, col_out in corr_aggregations
@@ -700,18 +702,16 @@ def to_sql(
         if sorted:
             sql = textwrap.dedent(
                 f"""
-                SELECT * EXCEPT ({offsets_id})
+                SELECT * EXCEPT (`{offsets_id}`)
                 FROM ({sql})
-                ORDER BY {offsets_id}
+                ORDER BY `{offsets_id}`
                 """
             )
         return typing.cast(str, sql)
 
     def _to_ibis_expr(
         self,
-        ordering_mode: Literal[
-            "string_encoded", "offset_col", "unordered"
-        ] = "unordered",
+        ordering_mode: Literal["string_encoded", "offset_col", "unordered"],
         order_col_name: Optional[str] = ORDER_ID_COLUMN,
         expose_hidden_cols: bool = False,
         fraction: Optional[float] = None,
@@ -864,8 +864,6 @@ def start_query(
         # a LocalSession for unit testing.
         # TODO(swast): Add a timeout here? If the query is taking a long time,
         # maybe we just print the job metadata that we have so far?
-
-        # DO NOT COMMIT: Make this ordered
         sql = self.to_sql(sorted=True)  # type:ignore
         return self._session._start_query(
             sql=sql,
@@ -885,6 +883,7 @@ def _reproject_to_table(self) -> ArrayValue:
         recursively in projections.
         """
         table = self._to_ibis_expr(
+            "unordered",
             expose_hidden_cols=True,
         )
         columns = [table[column_name] for column_name in self._column_names]
@@ -967,7 +966,7 @@ def unpivot(
         """
         if how not in ("left", "right"):
             raise ValueError("'how' must be 'left' or 'right'")
-        table = self._to_ibis_expr(expose_hidden_cols=True)
+        table = self._to_ibis_expr("unordered", expose_hidden_cols=True)
         row_n = len(row_labels)
         hidden_col_ids = self._hidden_ordering_column_names.keys()
         if not all(
@@ -1130,7 +1129,7 @@ def _set_or_replace_by_id(self, id: str, new_value: ibis_types.Value) -> ArrayVa
 
     def cached(self, cluster_cols: typing.Sequence[str]) -> ArrayValue:
         """Write the ArrayValue to a session table and create a new block object that references it."""
-        ibis_expr = self._to_ibis_expr(expose_hidden_cols=True)
+        ibis_expr = self._to_ibis_expr("unordered", expose_hidden_cols=True)
         destination = self._session._ibis_to_session_table(
             ibis_expr, cluster_cols=cluster_cols, api_name="cache"
         )
diff --git a/bigframes/core/joins/single_column.py b/bigframes/core/joins/single_column.py
index 09b2594f2f..f194b8f8c4 100644
--- a/bigframes/core/joins/single_column.py
+++ b/bigframes/core/joins/single_column.py
@@ -138,10 +138,12 @@ def get_column_right(col_id):
             return rmapping[col_id]
 
         left_table = left._to_ibis_expr(
+            "unordered",
             expose_hidden_cols=True,
             col_id_overrides=lmapping,
         )
         right_table = right._to_ibis_expr(
+            "unordered",
             expose_hidden_cols=True,
             col_id_overrides=rmapping,
         )

From 163a84b549656f49f472cc4d3ef357287e369045 Mon Sep 17 00:00:00 2001
From: Trevor Bergeron <tbergeron@google.com>
Date: Thu, 5 Oct 2023 19:18:32 +0000
Subject: [PATCH 6/6] fix unit tests

---
 tests/unit/test_core.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/tests/unit/test_core.py b/tests/unit/test_core.py
index ba789cab31..69b9e79807 100644
--- a/tests/unit/test_core.py
+++ b/tests/unit/test_core.py
@@ -112,7 +112,7 @@ def test_arrayvalues_to_ibis_expr_with_concat():
         total_ordering_columns=["col1"],
     )
     expr = value.concat([value])
-    actual = expr._to_ibis_expr()
+    actual = expr._to_ibis_expr("unordered")
     assert len(actual.columns) == 3
     # TODO(ashleyxu, b/299631930): test out the union expression
     assert actual.columns[0] == "column_0"
@@ -149,7 +149,7 @@ def test_arrayvalues_to_ibis_expr_with_project_binary_op():
     )
     expr = value.project_binary_op("col2", "col3", ops.add_op, "col4")
     assert expr.columns[3].type().is_float64()
-    actual = expr._to_ibis_expr()
+    actual = expr._to_ibis_expr("unordered")
     assert len(expr.columns) == 4
     assert actual.columns[3] == "col4"
 
@@ -168,7 +168,7 @@ def test_arrayvalues_to_ibis_expr_with_project_ternary_op():
     )
     expr = value.project_ternary_op("col2", "col3", "col4", ops.where_op, "col5")
     assert expr.columns[4].type().is_float64()
-    actual = expr._to_ibis_expr()
+    actual = expr._to_ibis_expr("unordered")
     assert len(expr.columns) == 5
     assert actual.columns[4] == "col5"
 
@@ -189,7 +189,7 @@ def test_arrayvalue_to_ibis_expr_with_aggregate():
         by_column_ids=["col1"],
         dropna=False,
     )
-    actual = expr._to_ibis_expr()
+    actual = expr._to_ibis_expr("unordered")
     assert len(expr.columns) == 2
     assert actual.columns[0] == "col1"
     assert actual.columns[1] == "col4"
@@ -208,7 +208,7 @@ def test_arrayvalue_to_ibis_expr_with_corr_aggregate():
         total_ordering_columns=["col1"],
     )
     expr = value.corr_aggregate(corr_aggregations=[("col1", "col3", "col4")])
-    actual = expr._to_ibis_expr()
+    actual = expr._to_ibis_expr("unordered")
     assert len(expr.columns) == 1
     assert actual.columns[0] == "col4"
     assert expr.columns[0].type().is_float64()