From 498b5cd16d3159affb2e40c17bb1fb5e26ab696d Mon Sep 17 00:00:00 2001
From: Trevor Bergeron <tbergeron@google.com>
Date: Fri, 27 Oct 2023 23:10:00 +0000
Subject: [PATCH 01/11] feat: add unordered sql compilation

---
 bigframes/core/__init__.py                  |   43 +-
 bigframes/core/blocks.py                    |    9 +-
 bigframes/core/compile/__init__.py          |    4 +-
 bigframes/core/compile/compiled.py          | 1906 ++++++++++++-------
 bigframes/core/compile/compiler.py          |  164 +-
 bigframes/core/compile/concat.py            |  100 +
 bigframes/core/compile/row_identity.py      |   70 +-
 bigframes/core/compile/single_column.py     |  103 +-
 bigframes/dataframe.py                      |    6 +
 bigframes/session/__init__.py               |    6 +-
 tests/system/large/ml/test_cluster.py       |    4 +-
 tests/system/large/ml/test_pipeline.py      |    4 +-
 tests/system/large/test_remote_function.py  |   28 +-
 tests/system/small/ml/test_cluster.py       |    4 +-
 tests/system/small/ml/test_core.py          |    2 +-
 tests/system/small/ml/test_decomposition.py |    4 +-
 tests/system/small/test_dataframe.py        |   50 +-
 tests/system/small/test_dataframe_io.py     |   13 +-
 tests/system/small/test_multiindex.py       |    6 +-
 tests/system/small/test_pandas.py           |    8 +-
 tests/system/small/test_remote_function.py  |   26 +-
 tests/system/small/test_series.py           |   10 +-
 tests/system/utils.py                       |   25 +-
 tests/unit/test_core.py                     |   20 +-
 24 files changed, 1706 insertions(+), 909 deletions(-)
 create mode 100644 bigframes/core/compile/concat.py

diff --git a/bigframes/core/__init__.py b/bigframes/core/__init__.py
index 4653f0ab6a..7f2e231edb 100644
--- a/bigframes/core/__init__.py
+++ b/bigframes/core/__init__.py
@@ -23,7 +23,8 @@
 import ibis.expr.types as ibis_types
 import pandas
 
-import bigframes.core.compile as compiled
+import bigframes.core.compile.compiled as compiled
+import bigframes.core.compile.compiler as compiler
 import bigframes.core.guid
 import bigframes.core.nodes as nodes
 from bigframes.core.ordering import OrderingColumnReference
@@ -77,7 +78,7 @@ def from_pandas(cls, pd_df: pandas.DataFrame):
 
     @property
     def column_ids(self) -> typing.Sequence[str]:
-        return self.compile().column_ids
+        return self._compile().column_ids
 
     @property
     def session(self) -> Session:
@@ -87,15 +88,18 @@ def session(self) -> Session:
         return self.node.session[0] if required_session else get_global_session()
 
     def get_column_type(self, key: str) -> bigframes.dtypes.Dtype:
-        return self.compile().get_column_type(key)
+        return self._compile().get_column_type(key)
 
-    def compile(self) -> compiled.CompiledArrayValue:
-        return compiled.compile_node(self.node)
+    def _compile(self) -> compiled.OrderedIR:
+        return compiler.compile_ordered(self.node)
+
+    def _compile_unordered(self) -> compiled.UnorderedIR:
+        return compiler.compile_unordered(self.node)
 
     def shape(self) -> typing.Tuple[int, int]:
         """Returns dimensions as (length, width) tuple."""
-        width = len(self.compile().columns)
-        count_expr = self.compile()._to_ibis_expr("unordered").count()
+        width = len(self._compile().columns)
+        count_expr = self._compile()._to_ibis_expr(ordering_mode="unordered").count()
 
         # Support in-memory engines for hermetic unit tests.
         if not self.node.session:
@@ -120,11 +124,14 @@ def to_sql(
         col_id_overrides: typing.Mapping[str, str] = {},
         sorted: bool = False,
     ) -> str:
-        return self.compile().to_sql(
-            offset_column=offset_column,
-            col_id_overrides=col_id_overrides,
-            sorted=sorted,
-        )
+        if sorted or offset_column:
+            return self._compile().to_sql(
+                offset_column=offset_column,
+                col_id_overrides=col_id_overrides,
+                sorted=sorted,
+            )
+        else:
+            return self._compile_unordered().to_sql(col_id_overrides=col_id_overrides)
 
     def start_query(
         self,
@@ -153,8 +160,10 @@ def start_query(
 
     def cached(self, cluster_cols: typing.Sequence[str]) -> ArrayValue:
         """Write the ArrayValue to a session table and create a new block object that references it."""
-        compiled = self.compile()
-        ibis_expr = compiled._to_ibis_expr("unordered", expose_hidden_cols=True)
+        compiled = self._compile()
+        ibis_expr = compiled._to_ibis_expr(
+            ordering_mode="unordered", expose_hidden_cols=True
+        )
         destination = self.session._ibis_to_session_table(
             ibis_expr, cluster_cols=cluster_cols, api_name="cache"
         )
@@ -210,12 +219,6 @@ def select_columns(self, column_ids: typing.Sequence[str]) -> ArrayValue:
             nodes.SelectNode(child=self.node, column_ids=tuple(column_ids))
         )
 
-    def concat(self, other: typing.Sequence[ArrayValue]) -> ArrayValue:
-        """Append together multiple ArrayValue objects."""
-        return ArrayValue(
-            nodes.ConcatNode(children=tuple([self.node, *[val.node for val in other]]))
-        )
-
     def project_unary_op(
         self, column_name: str, op: ops.UnaryOp, output_name=None
     ) -> ArrayValue:
diff --git a/bigframes/core/blocks.py b/bigframes/core/blocks.py
index cc13edeaf9..589fed4199 100644
--- a/bigframes/core/blocks.py
+++ b/bigframes/core/blocks.py
@@ -386,6 +386,8 @@ def to_pandas(
         max_download_size: Optional[int] = None,
         sampling_method: Optional[str] = None,
         random_state: Optional[int] = None,
+        *,
+        ordered: bool = True,
     ) -> Tuple[pd.DataFrame, bigquery.QueryJob]:
         """Run query and download results as a pandas DataFrame."""
         if max_download_size is None:
@@ -412,6 +414,7 @@ def to_pandas(
             max_download_size=max_download_size,
             sampling_method=sampling_method,
             random_state=random_state,
+            ordered=ordered,
         )
         return df, query_job
 
@@ -446,12 +449,16 @@ def _compute_and_count(
         max_download_size: Optional[int] = None,
         sampling_method: Optional[str] = None,
         random_state: Optional[int] = None,
+        *,
+        ordered: bool = True,
     ) -> Tuple[pd.DataFrame, int, bigquery.QueryJob]:
         """Run query and download results as a pandas DataFrame. Return the total number of results as well."""
         # TODO(swast): Allow for dry run and timeout.
         expr = self._apply_value_keys_to_expr(value_keys=value_keys)
 
-        results_iterator, query_job = expr.start_query(max_results=max_results)
+        results_iterator, query_job = expr.start_query(
+            max_results=max_results, sorted=ordered
+        )
 
         table_size = (
             expr.session._get_table_size(query_job.destination) / _BYTES_TO_MEGABYTES
diff --git a/bigframes/core/compile/__init__.py b/bigframes/core/compile/__init__.py
index c86f4463dc..af3f32aefb 100644
--- a/bigframes/core/compile/__init__.py
+++ b/bigframes/core/compile/__init__.py
@@ -13,9 +13,9 @@
 # limitations under the License.
 
 from bigframes.core.compile.compiled import CompiledArrayValue
-from bigframes.core.compile.compiler import compile_node
+from bigframes.core.compile.compiler import compile_ordered
 
 __all__ = [
-    "compile_node",
+    "compile_ordered",
     "CompiledArrayValue",
 ]
diff --git a/bigframes/core/compile/compiled.py b/bigframes/core/compile/compiled.py
index 1134f1aab0..0feecae5b6 100644
--- a/bigframes/core/compile/compiled.py
+++ b/bigframes/core/compile/compiled.py
@@ -14,7 +14,6 @@
 from __future__ import annotations
 
 import functools
-import math
 import textwrap
 import typing
 from typing import Collection, Iterable, Literal, Optional, Sequence
@@ -32,8 +31,6 @@
     ExpressionOrdering,
     IntegerEncoding,
     OrderingColumnReference,
-    reencode_order_string,
-    StringEncoding,
 )
 import bigframes.core.utils as utils
 from bigframes.core.window_spec import WindowSpec
@@ -45,76 +42,250 @@
 PREDICATE_COLUMN = "bigframes_predicate"
 
 
-class CompiledArrayValue:
-    """Immutable BigQuery DataFrames expression tree.
+class CompiledArrayValue(typing.Protocol):
+    @property
+    def column_ids(self) -> typing.Sequence[str]:
+        ...
 
-    Note: Usage of this class is considered to be private and subject to change
-    at any time.
+    def to_sql(self) -> str:
+        ...
 
-    This class is a wrapper around Ibis expressions. Its purpose is to defer
-    Ibis projection operations to keep generated SQL small and correct when
-    mixing and matching columns from different versions of a DataFrame.
+    def _to_ibis_expr(self, *args, **kwargs) -> str:
+        """Exposed for testing purposes only."""
+        ...
 
-    Args:
-        table: An Ibis table expression.
-        columns: Ibis value expressions that can be projected as columns.
-        hidden_ordering_columns: Ibis value expressions to store ordering.
-        ordering: An ordering property of the data frame.
-        predicates: A list of filters on the data frame.
-    """
+    def select_columns(self, column_ids: typing.Sequence[str]) -> CompiledArrayValue:
+        ...
+
+    def drop_columns(self, columns: Iterable[str]) -> CompiledArrayValue:
+        return self.select_columns(
+            [col for col in self.column_ids if col not in columns]
+        )
+
+    def get_column_type(self, key: str) -> bigframes.dtypes.Dtype:
+        ...
+
+    def filter(self, predicate_id: str, keep_null: bool = False) -> CompiledArrayValue:
+        """Filter the table on a given expression, the predicate must be a boolean series aligned with the table expression."""
+        ...
+
+    def order_by(
+        self, by: Sequence[OrderingColumnReference], stable: bool = False
+    ) -> CompiledArrayValue:
+        ...
+
+    def reversed(self) -> CompiledArrayValue:
+        ...
+
+    def project_unary_op(
+        self, column_name: str, op: ops.UnaryOp, output_name=None
+    ) -> CompiledArrayValue:
+        """Creates a new expression based on this expression with unary operation applied to one column."""
+        ...
+
+    def project_binary_op(
+        self,
+        left_column_id: str,
+        right_column_id: str,
+        op: ops.BinaryOp,
+        output_column_id: str,
+    ) -> CompiledArrayValue:
+        """Creates a new expression based on this expression with binary operation applied to two columns."""
+        ...
+
+    def project_ternary_op(
+        self,
+        col_id_1: str,
+        col_id_2: str,
+        col_id_3: str,
+        op: ops.TernaryOp,
+        output_column_id: str,
+    ) -> CompiledArrayValue:
+        """Creates a new expression based on this expression with ternary operation applied to three columns."""
+        ...
+
+    def aggregate(
+        self,
+        aggregations: typing.Sequence[typing.Tuple[str, agg_ops.AggregateOp, str]],
+        by_column_ids: typing.Sequence[str] = (),
+        dropna: bool = True,
+    ) -> CompiledArrayValue:
+        """
+        Apply aggregations to the expression.
+        Arguments:
+            aggregations: input_column_id, operation, output_column_id tuples
+            by_column_id: column id of the aggregation key, this is preserved through the transform
+            dropna: whether null keys should be dropped
+        """
+        ...
+
+    def corr_aggregate(
+        self, corr_aggregations: typing.Sequence[typing.Tuple[str, str, str]]
+    ) -> CompiledArrayValue:
+        """
+        Get correlations between each lef_column_id and right_column_id, stored in the respective output_column_id.
+        This uses BigQuery's CORR under the hood, and thus only Pearson's method is used.
+        Arguments:
+            corr_aggregations: left_column_id, right_column_id, output_column_id tuples
+        """
+        ...
+
+    def assign(self, source_id: str, destination_id: str) -> CompiledArrayValue:
+        ...
+
+    def assign_constant(
+        self,
+        destination_id: str,
+        value: typing.Any,
+        dtype: typing.Optional[bigframes.dtypes.Dtype],
+    ) -> CompiledArrayValue:
+        ...
+
+    def unpivot(
+        self,
+        row_labels: typing.Sequence[typing.Hashable],
+        unpivot_columns: typing.Sequence[
+            typing.Tuple[str, typing.Sequence[typing.Optional[str]]]
+        ],
+        *,
+        passthrough_columns: typing.Sequence[str] = (),
+        index_col_ids: typing.Sequence[str] = ["index"],
+        dtype: typing.Union[
+            bigframes.dtypes.Dtype, typing.Sequence[bigframes.dtypes.Dtype]
+        ] = pandas.Float64Dtype(),
+        how="left",
+    ) -> CompiledArrayValue:
+        """
+        Unpivot ArrayValue columns.
+
+        Args:
+            row_labels: Identifies the source of the row. Must be equal to length to source column list in unpivot_columns argument.
+            unpivot_columns: Mapping of column id to list of input column ids. Lists of input columns may use None.
+            passthrough_columns: Columns that will not be unpivoted. Column id will be preserved.
+            index_col_id (str): The column id to be used for the row labels.
+            dtype (dtype or list of dtype): Dtype to use for the unpivot columns. If list, must be equal in number to unpivot_columns.
+
+        Returns:
+            ArrayValue: The unpivoted ArrayValue
+        """
+        ...
+
+    def _reproject_to_table(self) -> CompiledArrayValue:
+        """
+        Internal operators that projects the internal representation into a
+        new ibis table expression where each value column is a direct
+        reference to a column in that table expression. Needed after
+        some operations such as window operations that cannot be used
+        recursively in projections.
+        """
+        ...
+
+    def _uniform_sampling(self, fraction: float) -> CompiledArrayValue:
+        """Sampling the table on given fraction.
+
+        .. warning::
+            The row numbers of result is non-deterministic, avoid to use.
+        """
+        ...
+
+    # Always ordered operations
+    def project_window_op(
+        self,
+        column_name: str,
+        op: agg_ops.WindowOp,
+        window_spec: WindowSpec,
+        output_name=None,
+        *,
+        never_skip_nulls=False,
+        skip_reproject_unsafe: bool = False,
+    ) -> OrderedIR:
+        """
+        Creates a new expression based on this expression with unary operation applied to one column.
+        column_name: the id of the input column present in the expression
+        op: the windowable operator to apply to the input column
+        window_spec: a specification of the window over which to apply the operator
+        output_name: the id to assign to the output of the operator, by default will replace input col if distinct output id not provided
+        never_skip_nulls: will disable null skipping for operators that would otherwise do so
+        skip_reproject_unsafe: skips the reprojection step, can be used when performing many non-dependent window operations, user responsible for not nesting window expressions, or using outputs as join, filter or aggregation keys before a reprojection
+        """
+        ...
+
+    def promote_offsets(self, col_id: str):
+        """
+        Convenience function to promote copy of column offsets to a value column. Can be used to reset index.
+        """
+        ...
+
+
+class BaseIbisIR:
+    """Implementation detail, contains common logic between ordered and unordered IR"""
 
     def __init__(
         self,
         table: ibis_types.Table,
         columns: Sequence[ibis_types.Value],
-        hidden_ordering_columns: Optional[Sequence[ibis_types.Value]] = None,
-        ordering: ExpressionOrdering = ExpressionOrdering(),
         predicates: Optional[Collection[ibis_types.BooleanValue]] = None,
     ):
         self._table = table
         self._predicates = tuple(predicates) if predicates is not None else ()
-        # TODO: Validate ordering
-        if not ordering.total_ordering_columns:
-            raise ValueError("Must have total ordering defined by one or more columns")
-        self._ordering = ordering
         # Allow creating a DataFrame directly from an Ibis table expression.
         # TODO(swast): Validate that each column references the same table (or
         # no table for literal values).
         self._columns = tuple(columns)
-
-        # Meta columns store ordering, or other data that doesn't correspond to dataframe columns
-        self._hidden_ordering_columns = (
-            tuple(hidden_ordering_columns)
-            if hidden_ordering_columns is not None
-            else ()
-        )
-
         # To allow for more efficient lookup by column name, create a
         # dictionary mapping names to column values.
         self._column_names = {column.get_name(): column for column in self._columns}
-        self._hidden_ordering_column_names = {
-            column.get_name(): column for column in self._hidden_ordering_columns
-        }
-        ### Validation
-        value_col_ids = self._column_names.keys()
-        hidden_col_ids = self._hidden_ordering_column_names.keys()
 
-        all_columns = value_col_ids | hidden_col_ids
-        ordering_valid = all(
-            col.column_id in all_columns for col in ordering.all_ordering_columns
+    @property
+    def columns(self) -> typing.Tuple[ibis_types.Value, ...]:
+        return self._columns
+
+    @property
+    def column_ids(self) -> typing.Sequence[str]:
+        return tuple(self._column_names.keys())
+
+    @property
+    def _reduced_predicate(self) -> typing.Optional[ibis_types.BooleanValue]:
+        """Returns the frame's predicates as an equivalent boolean value, useful where a single predicate value is preferred."""
+        return (
+            _reduce_predicate_list(self._predicates).name(PREDICATE_COLUMN)
+            if self._predicates
+            else None
         )
-        if value_col_ids & hidden_col_ids:
+
+    def _get_ibis_column(self, key: str) -> ibis_types.Value:
+        """Gets the Ibis expression for a given column."""
+        if key not in self.column_ids:
             raise ValueError(
-                f"Keys in both hidden and exposed list: {value_col_ids & hidden_col_ids}"
+                "Column name {} not in set of values: {}".format(key, self.column_ids)
             )
-        if not ordering_valid:
-            raise ValueError(f"Illegal ordering keys: {ordering.all_ordering_columns}")
+        return typing.cast(ibis_types.Value, self._column_names[key])
+
+    def get_column_type(self, key: str) -> bigframes.dtypes.Dtype:
+        ibis_type = typing.cast(
+            bigframes.dtypes.IbisDtype, self._get_ibis_column(key).type()
+        )
+        return typing.cast(
+            bigframes.dtypes.Dtype,
+            bigframes.dtypes.ibis_dtype_to_bigframes_dtype(ibis_type),
+        )
+
+
+# Ibis Implementations
+class UnorderedIR(BaseIbisIR, CompiledArrayValue):
+    def __init__(
+        self,
+        table: ibis_types.Table,
+        columns: Sequence[ibis_types.Value],
+        predicates: Optional[Collection[ibis_types.BooleanValue]] = None,
+    ):
+        super().__init__(table, columns, predicates)
 
     @classmethod
-    def mem_expr_from_pandas(
+    def from_pandas(
         cls,
         pd_df: pandas.DataFrame,
-    ) -> CompiledArrayValue:
+    ) -> UnorderedIR:
         """
         Builds an in-memory only (SQL only) expr from a pandas dataframe.
         """
@@ -123,7 +294,6 @@ def mem_expr_from_pandas(
         column_names = [str(column) for column in pd_df.columns]
         # Make sure column names are all strings.
         pd_df = pd_df.set_axis(column_names, axis="columns")
-        pd_df = pd_df.assign(**{ORDER_ID_COLUMN: range(len(pd_df))})
 
         # ibis memtable cannot handle NA, must convert to None
         pd_df = pd_df.astype("object")  # type: ignore
@@ -134,21 +304,18 @@ def mem_expr_from_pandas(
         schema = keys_memtable.schema()
         new_schema = []
         for column_index, column in enumerate(schema):
-            if column == ORDER_ID_COLUMN:
-                new_type: ibis_dtypes.DataType = ibis_dtypes.int64
-            else:
-                column_type = schema[column]
-                # The autodetected type might not be one we can support, such
-                # as NULL type for empty rows, so convert to a type we do
-                # support.
-                new_type = bigframes.dtypes.bigframes_dtype_to_ibis_dtype(
-                    bigframes.dtypes.ibis_dtype_to_bigframes_dtype(column_type)
-                )
-                # TODO(swast): Ibis memtable doesn't use backticks in struct
-                # field names, so spaces and other characters aren't allowed in
-                # the memtable context. Blocked by
-                # https://github.com/ibis-project/ibis/issues/7187
-                column = f"col_{column_index}"
+            column_type = schema[column]
+            # The autodetected type might not be one we can support, such
+            # as NULL type for empty rows, so convert to a type we do
+            # support.
+            new_type = bigframes.dtypes.bigframes_dtype_to_ibis_dtype(
+                bigframes.dtypes.ibis_dtype_to_bigframes_dtype(column_type)
+            )
+            # TODO(swast): Ibis memtable doesn't use backticks in struct
+            # field names, so spaces and other characters aren't allowed in
+            # the memtable context. Blocked by
+            # https://github.com/ibis-project/ibis/issues/7187
+            column = f"col_{column_index}"
             new_schema.append((column, new_type))
 
         # must set non-null column labels. these are not the user-facing labels
@@ -164,112 +331,108 @@ def mem_expr_from_pandas(
                 keys_memtable[f"col_{column_index}"].name(column)
                 for column_index, column in enumerate(column_names)
             ],
-            ordering=ExpressionOrdering(
-                ordering_value_columns=tuple(
-                    [OrderingColumnReference(ORDER_ID_COLUMN)]
-                ),
-                total_ordering_columns=frozenset([ORDER_ID_COLUMN]),
-            ),
-            hidden_ordering_columns=(keys_memtable[ORDER_ID_COLUMN],),
         )
 
-    @property
-    def columns(self) -> typing.Tuple[ibis_types.Value, ...]:
-        return self._columns
+    def builder(self):
+        """Creates a mutable builder for expressions."""
+        # Since ArrayValue is intended to be immutable (immutability offers
+        # potential opportunities for caching, though we might need to introduce
+        # more node types for that to be useful), we create a builder class.
+        return UnorderedIR.Builder(
+            self._table,
+            columns=self._columns,
+            predicates=self._predicates,
+        )
 
-    @property
-    def column_ids(self) -> typing.Sequence[str]:
-        return tuple(self._column_names.keys())
-
-    @property
-    def _hidden_column_ids(self) -> typing.Sequence[str]:
-        return tuple(self._hidden_ordering_column_names.keys())
-
-    @property
-    def _reduced_predicate(self) -> typing.Optional[ibis_types.BooleanValue]:
-        """Returns the frame's predicates as an equivalent boolean value, useful where a single predicate value is preferred."""
-        return (
-            _reduce_predicate_list(self._predicates).name(PREDICATE_COLUMN)
-            if self._predicates
-            else None
+    def to_sql(
+        self,
+        offset_column: typing.Optional[str] = None,
+        col_id_overrides: typing.Mapping[str, str] = {},
+        sorted: bool = False,
+    ) -> str:
+        if offset_column or sorted:
+            raise ValueError("Cannot produce sorted sql in unordered mode")
+        sql = ibis_bigquery.Backend().compile(
+            self._to_ibis_expr(
+                col_id_overrides=col_id_overrides,
+            )
         )
+        return typing.cast(str, sql)
 
-    @property
-    def _ibis_order(self) -> Sequence[ibis_types.Value]:
-        """Returns a sequence of ibis values which can be directly used to order a table expression. Has direction modifiers applied."""
-        return _convert_ordering_to_table_values(
-            {**self._column_names, **self._hidden_ordering_column_names},
-            self._ordering.all_ordering_columns,
-        )
+    def _to_ibis_expr(
+        self,
+        *,
+        expose_hidden_cols: bool = False,
+        fraction: Optional[float] = None,
+        col_id_overrides: typing.Mapping[str, str] = {},
+        **kwargs,
+    ):
+        """
+        Creates an Ibis table expression representing the DataFrame.
 
-    def builder(self) -> ArrayValueBuilder:
-        """Creates a mutable builder for expressions."""
-        # Since ArrayValue is intended to be immutable (immutability offers
-        # potential opportunities for caching, though we might need to introduce
-        # more node types for that to be useful), we create a builder class.
-        return ArrayValueBuilder(
-            self._table,
-            columns=self._columns,
-            hidden_ordering_columns=self._hidden_ordering_columns,
-            ordering=self._ordering,
-            predicates=self._predicates,
-        )
+        ArrayValue objects are sorted, so the following options are available
+        to reflect this in the ibis expression.
 
-    def drop_columns(self, columns: Iterable[str]) -> CompiledArrayValue:
-        # Must generate offsets if we are dropping a column that ordering depends on
-        expr = self
-        for ordering_column in set(columns).intersection(
-            [col.column_id for col in self._ordering.ordering_value_columns]
-        ):
-            expr = self._hide_column(ordering_column)
+        * "offset_col": Zero-based offsets are generated as a column, this will
+          not sort the rows however.
+        * "string_encoded": An ordered string column is provided in output table.
+        * "unordered": No ordering information will be provided in output. Only
+          value columns are projected.
 
-        expr_builder = expr.builder()
-        remain_cols = [
-            column for column in expr.columns if column.get_name() not in columns
-        ]
-        expr_builder.columns = remain_cols
-        return expr_builder.build()
+        For offset or ordered column, order_col_name can be used to assign the
+        output label for the ordering column. If none is specified, the default
+        column name will be 'bigframes_ordering_id'
 
-    def get_column_type(self, key: str) -> bigframes.dtypes.Dtype:
-        ibis_type = typing.cast(
-            bigframes.dtypes.IbisDtype, self._get_any_column(key).type()
-        )
-        return typing.cast(
-            bigframes.dtypes.Dtype,
-            bigframes.dtypes.ibis_dtype_to_bigframes_dtype(ibis_type),
-        )
+        Args:
+            expose_hidden_cols:
+                If True, include the hidden ordering columns in the results.
+                Only compatible with `order_by` and `unordered`
+                ``ordering_mode``.
+            col_id_overrides:
+                overrides the column ids for the result
+        Returns:
+            An ibis expression representing the data help by the ArrayValue object.
+        """
+        columns = list(self._columns)
+        columns_to_drop: list[
+            str
+        ] = []  # Ordering/Filtering columns that will be dropped at end
 
-    def _get_ibis_column(self, key: str) -> ibis_types.Value:
-        """Gets the Ibis expression for a given column."""
-        if key not in self.column_ids:
-            raise ValueError(
-                "Column name {} not in set of values: {}".format(key, self.column_ids)
-            )
-        return typing.cast(ibis_types.Value, self._column_names[key])
+        if self._reduced_predicate is not None:
+            columns.append(self._reduced_predicate)
+            # Usually drop predicate as it is will be all TRUE after filtering
+            if not expose_hidden_cols:
+                columns_to_drop.append(self._reduced_predicate.get_name())
 
-    def _get_any_column(self, key: str) -> ibis_types.Value:
-        """Gets the Ibis expression for a given column. Will also get hidden columns."""
-        all_columns = {**self._column_names, **self._hidden_ordering_column_names}
-        if key not in all_columns.keys():
-            raise ValueError(
-                "Column name {} not in set of values: {}".format(
-                    key, all_columns.keys()
-                )
-            )
-        return typing.cast(ibis_types.Value, all_columns[key])
+        # Special case for empty tables, since we can't create an empty
+        # projection.
+        if not columns:
+            return ibis.memtable([])
 
-    def _get_hidden_ordering_column(self, key: str) -> ibis_types.Column:
-        """Gets the Ibis expression for a given hidden column."""
-        if key not in self._hidden_ordering_column_names.keys():
-            raise ValueError(
-                "Column name {} not in set of values: {}".format(
-                    key, self._hidden_ordering_column_names.keys()
-                )
-            )
-        return typing.cast(ibis_types.Column, self._hidden_ordering_column_names[key])
+        # Make sure all dtypes are the "canonical" ones for BigFrames. This is
+        # important for operations like UNION where the schema must match.
+        table = self._table.select(
+            bigframes.dtypes.ibis_value_to_canonical_type(column) for column in columns
+        )
+        base_table = table
+        if self._reduced_predicate is not None:
+            table = table.filter(base_table[PREDICATE_COLUMN])
+        table = table.drop(*columns_to_drop)
+        if col_id_overrides:
+            table = table.relabel(col_id_overrides)
+        if fraction is not None:
+            table = table.filter(ibis.random() < ibis.literal(fraction))
+        return table
+
+    def select_columns(self, column_ids: typing.Sequence[str]) -> UnorderedIR:
+        """Creates a new expression based on this expression with new columns."""
+        columns = [self._get_ibis_column(col_id) for col_id in column_ids]
+        builder = self.builder()
+        builder.columns = list(columns)
+        new_expr = builder.build()
+        return new_expr
 
     def filter(self, predicate_id: str, keep_null: bool = False) -> CompiledArrayValue:
-        """Filter the table on a given expression, the predicate must be a boolean series aligned with the table expression."""
         condition = typing.cast(
             ibis_types.BooleanValue, self._get_ibis_column(predicate_id)
         )
@@ -285,172 +448,20 @@ def filter(self, predicate_id: str, keep_null: bool = False) -> CompiledArrayVal
     def _filter(self, predicate_value: ibis_types.BooleanValue) -> CompiledArrayValue:
         """Filter the table on a given expression, the predicate must be a boolean series aligned with the table expression."""
         expr = self.builder()
-        expr.ordering = expr.ordering.with_non_sequential()
         expr.predicates = [*self._predicates, predicate_value]
         return expr.build()
 
     def order_by(
         self, by: Sequence[OrderingColumnReference], stable: bool = False
-    ) -> CompiledArrayValue:
-        expr_builder = self.builder()
-        expr_builder.ordering = self._ordering.with_ordering_columns(by, stable=stable)
-        return expr_builder.build()
-
-    def reversed(self) -> CompiledArrayValue:
-        expr_builder = self.builder()
-        expr_builder.ordering = self._ordering.with_reverse()
-        return expr_builder.build()
-
-    def _uniform_sampling(self, fraction: float) -> CompiledArrayValue:
-        """Sampling the table on given fraction.
-
-        .. warning::
-            The row numbers of result is non-deterministic, avoid to use.
-        """
-        table = self._to_ibis_expr(
-            "unordered", expose_hidden_cols=True, fraction=fraction
-        )
-        columns = [table[column_name] for column_name in self._column_names]
-        hidden_ordering_columns = [
-            table[column_name] for column_name in self._hidden_ordering_column_names
-        ]
-        return CompiledArrayValue(
-            table,
-            columns=columns,
-            hidden_ordering_columns=hidden_ordering_columns,
-            ordering=self._ordering,
-        )
-
-    @property
-    def _offsets(self) -> ibis_types.IntegerColumn:
-        if not self._ordering.is_sequential:
-            raise ValueError(
-                "Expression does not have offsets. Generate them first using project_offsets."
-            )
-        if not self._ordering.total_order_col:
-            raise ValueError(
-                "Ordering is invalid. Marked as sequential but no total order columns."
-            )
-        column = self._get_any_column(self._ordering.total_order_col.column_id)
-        return typing.cast(ibis_types.IntegerColumn, column)
-
-    def _project_offsets(self) -> CompiledArrayValue:
-        """Create a new expression that contains offsets. Should only be executed when offsets are needed for an operations. Has no effect on expression semantics."""
-        if self._ordering.is_sequential:
-            return self
-        # TODO(tbergeron): Enforce total ordering
-        table = self._to_ibis_expr(
-            ordering_mode="offset_col", order_col_name=ORDER_ID_COLUMN
-        )
-        columns = [table[column_name] for column_name in self._column_names]
-        ordering = ExpressionOrdering(
-            ordering_value_columns=tuple([OrderingColumnReference(ORDER_ID_COLUMN)]),
-            total_ordering_columns=frozenset([ORDER_ID_COLUMN]),
-            integer_encoding=IntegerEncoding(True, is_sequential=True),
-        )
-        return CompiledArrayValue(
-            table,
-            columns=columns,
-            hidden_ordering_columns=[table[ORDER_ID_COLUMN]],
-            ordering=ordering,
-        )
-
-    def _hide_column(self, column_id) -> CompiledArrayValue:
-        """Pushes columns to hidden columns list. Used to hide ordering columns that have been dropped or destructively mutated."""
-        expr_builder = self.builder()
-        # Need to rename column as caller might be creating a new row with the same name but different values.
-        # Can avoid this if don't allow callers to determine ids and instead generate unique ones in this class.
-        new_name = bigframes.core.guid.generate_guid(prefix="bigframes_hidden_")
-        expr_builder.hidden_ordering_columns = [
-            *self._hidden_ordering_columns,
-            self._get_ibis_column(column_id).name(new_name),
-        ]
-        expr_builder.ordering = self._ordering.with_column_remap({column_id: new_name})
-        return expr_builder.build()
-
-    def promote_offsets(self, col_id: str) -> CompiledArrayValue:
-        """
-        Convenience function to promote copy of column offsets to a value column. Can be used to reset index.
-        """
-        # Special case: offsets already exist
-        ordering = self._ordering
-
-        if (not ordering.is_sequential) or (not ordering.total_order_col):
-            return self._project_offsets().promote_offsets(col_id)
-        expr_builder = self.builder()
-        expr_builder.columns = [
-            self._get_any_column(ordering.total_order_col.column_id).name(col_id),
-            *self.columns,
-        ]
-        return expr_builder.build()
-
-    def select_columns(self, column_ids: typing.Sequence[str]) -> CompiledArrayValue:
-        """Creates a new expression based on this expression with new columns."""
-        columns = [self._get_ibis_column(col_id) for col_id in column_ids]
-        expr = self
-        for ordering_column in set(self.column_ids).intersection(
-            [col_ref.column_id for col_ref in self._ordering.ordering_value_columns]
-        ):
-            # Need to hide ordering columns that are being dropped. Alternatively, could project offsets
-            expr = expr._hide_column(ordering_column)
-        builder = expr.builder()
-        builder.columns = list(columns)
-        new_expr = builder.build()
-        return new_expr
+    ) -> UnorderedIR:
+        return self
 
-    def concat(self, other: typing.Sequence[CompiledArrayValue]) -> CompiledArrayValue:
-        """Append together multiple ArrayValue objects."""
-        if len(other) == 0:
-            return self
-        tables = []
-        prefix_base = 10
-        prefix_size = math.ceil(math.log(len(other) + 1, prefix_base))
-        # Must normalize all ids to the same encoding size
-        max_encoding_size = max(
-            self._ordering.string_encoding.length,
-            *[expression._ordering.string_encoding.length for expression in other],
-        )
-        for i, expr in enumerate([self, *other]):
-            ordering_prefix = str(i).zfill(prefix_size)
-            table = expr._to_ibis_expr(
-                ordering_mode="string_encoded", order_col_name=ORDER_ID_COLUMN
-            )
-            # Rename the value columns based on horizontal offset before applying union.
-            table = table.select(
-                [
-                    table[col].name(f"column_{i}")
-                    if col != ORDER_ID_COLUMN
-                    else (
-                        ordering_prefix
-                        + reencode_order_string(
-                            table[ORDER_ID_COLUMN], max_encoding_size
-                        )
-                    ).name(ORDER_ID_COLUMN)
-                    for i, col in enumerate(table.columns)
-                ]
-            )
-            tables.append(table)
-        combined_table = ibis.union(*tables)
-        ordering = ExpressionOrdering(
-            ordering_value_columns=tuple([OrderingColumnReference(ORDER_ID_COLUMN)]),
-            total_ordering_columns=frozenset([ORDER_ID_COLUMN]),
-            string_encoding=StringEncoding(True, prefix_size + max_encoding_size),
-        )
-        return CompiledArrayValue(
-            combined_table,
-            columns=[
-                combined_table[col]
-                for col in combined_table.columns
-                if col != ORDER_ID_COLUMN
-            ],
-            hidden_ordering_columns=[combined_table[ORDER_ID_COLUMN]],
-            ordering=ordering,
-        )
+    def reversed(self) -> UnorderedIR:
+        return self
 
     def project_unary_op(
         self, column_name: str, op: ops.UnaryOp, output_name=None
-    ) -> CompiledArrayValue:
-        """Creates a new expression based on this expression with unary operation applied to one column."""
+    ) -> UnorderedIR:
         value = op._as_ibis(self._get_ibis_column(column_name)).name(
             output_name or column_name
         )
@@ -462,8 +473,7 @@ def project_binary_op(
         right_column_id: str,
         op: ops.BinaryOp,
         output_column_id: str,
-    ) -> CompiledArrayValue:
-        """Creates a new expression based on this expression with binary operation applied to two columns."""
+    ) -> UnorderedIR:
         value = op(
             self._get_ibis_column(left_column_id),
             self._get_ibis_column(right_column_id),
@@ -477,8 +487,7 @@ def project_ternary_op(
         col_id_3: str,
         op: ops.TernaryOp,
         output_column_id: str,
-    ) -> CompiledArrayValue:
-        """Creates a new expression based on this expression with ternary operation applied to three columns."""
+    ) -> UnorderedIR:
         value = op(
             self._get_ibis_column(col_id_1),
             self._get_ibis_column(col_id_2),
@@ -486,20 +495,140 @@ def project_ternary_op(
         ).name(output_column_id)
         return self._set_or_replace_by_id(output_column_id, value)
 
+    def assign(self, source_id: str, destination_id: str) -> UnorderedIR:
+        return self._set_or_replace_by_id(
+            destination_id, self._get_ibis_column(source_id)
+        )
+
+    def assign_constant(
+        self,
+        destination_id: str,
+        value: typing.Any,
+        dtype: typing.Optional[bigframes.dtypes.Dtype],
+    ) -> UnorderedIR:
+        # TODO(b/281587571): Solve scalar constant aggregation problem w/Ibis.
+        ibis_value = bigframes.dtypes.literal_to_ibis_scalar(value, dtype)
+        if ibis_value is None:
+            raise NotImplementedError(
+                f"Type not supported as scalar value {type(value)}. {constants.FEEDBACK_LINK}"
+            )
+        expr = self._set_or_replace_by_id(destination_id, ibis_value)
+        return expr._reproject_to_table()
+
+    def unpivot(
+        self,
+        row_labels: typing.Sequence[typing.Hashable],
+        unpivot_columns: typing.Sequence[
+            typing.Tuple[str, typing.Sequence[typing.Optional[str]]]
+        ],
+        *,
+        passthrough_columns: typing.Sequence[str] = (),
+        index_col_ids: typing.Sequence[str] = ["index"],
+        dtype: typing.Union[
+            bigframes.dtypes.Dtype, typing.Sequence[bigframes.dtypes.Dtype]
+        ] = pandas.Float64Dtype(),
+        how="left",
+    ) -> UnorderedIR:
+        if how not in ("left", "right"):
+            raise ValueError("'how' must be 'left' or 'right'")
+        table = self._to_ibis_expr()
+        row_n = len(row_labels)
+        if not all(
+            len(source_columns) == row_n for _, source_columns in unpivot_columns
+        ):
+            raise ValueError("Columns and row labels must all be same length.")
+
+        unpivot_offset_id = bigframes.core.guid.generate_guid("unpivot_offsets_")
+        unpivot_table = table.cross_join(
+            ibis.memtable({unpivot_offset_id: range(row_n)})
+        )
+        # Use ibis memtable to infer type of rowlabels (if possible)
+        # TODO: Allow caller to specify dtype
+        if isinstance(row_labels[0], tuple):
+            labels_table = ibis.memtable(row_labels)
+            labels_ibis_types = [
+                labels_table[col].type() for col in labels_table.columns
+            ]
+        else:
+            labels_ibis_types = [ibis.memtable({"col": row_labels})["col"].type()]
+        labels_dtypes = [
+            bigframes.dtypes.ibis_dtype_to_bigframes_dtype(ibis_type)
+            for ibis_type in labels_ibis_types
+        ]
+
+        label_columns = []
+        for label_part, (col_id, label_dtype) in enumerate(
+            zip(index_col_ids, labels_dtypes)
+        ):
+            # interpret as tuples even if it wasn't originally so can apply same logic for multi-column labels
+            labels_as_tuples = [
+                label if isinstance(label, tuple) else (label,) for label in row_labels
+            ]
+            cases = [
+                (
+                    i,
+                    bigframes.dtypes.literal_to_ibis_scalar(
+                        label_tuple[label_part],  # type:ignore
+                        force_dtype=label_dtype,  # type:ignore
+                    ),
+                )
+                for i, label_tuple in enumerate(labels_as_tuples)
+            ]
+            labels_value = (
+                typing.cast(ibis_types.IntegerColumn, unpivot_table[unpivot_offset_id])
+                .cases(cases, default=None)  # type:ignore
+                .name(col_id)
+            )
+            label_columns.append(labels_value)
+
+        unpivot_values = []
+        for j in range(len(unpivot_columns)):
+            col_dtype = dtype[j] if utils.is_list_like(dtype) else dtype
+            result_col, source_cols = unpivot_columns[j]
+            null_value = bigframes.dtypes.literal_to_ibis_scalar(
+                None, force_dtype=col_dtype
+            )
+            ibis_values = [
+                ops.AsTypeOp(col_dtype)._as_ibis(unpivot_table[col])
+                if col is not None
+                else null_value
+                for col in source_cols
+            ]
+            cases = [(i, ibis_values[i]) for i in range(len(ibis_values))]
+            unpivot_value = typing.cast(
+                ibis_types.IntegerColumn, unpivot_table[unpivot_offset_id]
+            ).cases(
+                cases, default=null_value  # type:ignore
+            )
+            unpivot_values.append(unpivot_value.name(result_col))
+
+        unpivot_table = unpivot_table.select(
+            passthrough_columns,
+            *label_columns,
+            *unpivot_values,
+            unpivot_offset_id,
+        )
+
+        value_columns = [
+            unpivot_table[value_col_id] for value_col_id, _ in unpivot_columns
+        ]
+        passthrough_values = [unpivot_table[col] for col in passthrough_columns]
+        return UnorderedIR(
+            table=unpivot_table,
+            columns=[
+                *[unpivot_table[col_id] for col_id in index_col_ids],
+                *value_columns,
+                *passthrough_values,
+            ],
+        )
+
     def aggregate(
         self,
         aggregations: typing.Sequence[typing.Tuple[str, agg_ops.AggregateOp, str]],
         by_column_ids: typing.Sequence[str] = (),
         dropna: bool = True,
-    ) -> CompiledArrayValue:
-        """
-        Apply aggregations to the expression.
-        Arguments:
-            aggregations: input_column_id, operation, output_column_id tuples
-            by_column_id: column id of the aggregation key, this is preserved through the transform
-            dropna: whether null keys should be dropped
-        """
-        table = self._to_ibis_expr("unordered")
+    ) -> OrderedIR:
+        table = self._to_ibis_expr()
         stats = {
             col_out: agg_op._as_ibis(table[col_in])
             for col_in, agg_op, col_out in aggregations
@@ -514,117 +643,621 @@ def aggregate(
                         for column_id in by_column_ids
                     ]
                 ),
-                total_ordering_columns=frozenset(by_column_ids),
+                total_ordering_columns=frozenset(by_column_ids),
+            )
+            columns = tuple(result[key] for key in result.columns)
+            expr = OrderedIR(result, columns=columns, ordering=ordering)
+            if dropna:
+                for column_id in by_column_ids:
+                    expr = expr._filter(
+                        ops.notnull_op._as_ibis(expr._get_ibis_column(column_id))
+                    )
+            # Can maybe remove this as Ordering id is redundant as by_column is unique after aggregation
+            return expr._project_offsets()
+        else:
+            aggregates = {**stats, ORDER_ID_COLUMN: ibis_types.literal(0)}
+            result = table.aggregate(**aggregates)
+            # Ordering is irrelevant for single-row output, but set ordering id regardless as other ops(join etc.) expect it.
+            ordering = ExpressionOrdering(
+                ordering_value_columns=tuple(
+                    [OrderingColumnReference(ORDER_ID_COLUMN)]
+                ),
+                total_ordering_columns=frozenset([ORDER_ID_COLUMN]),
+                integer_encoding=IntegerEncoding(is_encoded=True, is_sequential=True),
+            )
+            return OrderedIR(
+                result,
+                columns=[result[col_id] for col_id in [*stats.keys()]],
+                hidden_ordering_columns=[result[ORDER_ID_COLUMN]],
+                ordering=ordering,
+            )
+
+    def corr_aggregate(
+        self, corr_aggregations: typing.Sequence[typing.Tuple[str, str, str]]
+    ) -> OrderedIR:
+        table = self._to_ibis_expr()
+        stats = {
+            col_out: table[col_left].corr(table[col_right], how="pop")
+            for col_left, col_right, col_out in corr_aggregations
+        }
+        aggregates = {**stats, ORDER_ID_COLUMN: ibis_types.literal(0)}
+        result = table.aggregate(**aggregates)
+        # Ordering is irrelevant for single-row output, but set ordering id regardless as other ops(join etc.) expect it.
+        ordering = ExpressionOrdering(
+            ordering_value_columns=tuple([OrderingColumnReference(ORDER_ID_COLUMN)]),
+            total_ordering_columns=frozenset([ORDER_ID_COLUMN]),
+            integer_encoding=IntegerEncoding(is_encoded=True, is_sequential=True),
+        )
+        return OrderedIR(
+            result,
+            columns=[result[col_id] for col_id in [*stats.keys()]],
+            hidden_ordering_columns=[result[ORDER_ID_COLUMN]],
+            ordering=ordering,
+        )
+
+    def _uniform_sampling(self, fraction: float) -> UnorderedIR:
+        """Sampling the table on given fraction.
+
+        .. warning::
+            The row numbers of result is non-deterministic, avoid to use.
+        """
+        table = self._to_ibis_expr(fraction=fraction)
+        columns = [table[column_name] for column_name in self._column_names]
+        return UnorderedIR(
+            table,
+            columns=columns,
+        )
+
+    # Unsupported operations, need ordering
+    def project_window_op(
+        self,
+        column_name: str,
+        op: agg_ops.WindowOp,
+        window_spec: WindowSpec,
+        output_name=None,
+        *,
+        never_skip_nulls=False,
+        skip_reproject_unsafe: bool = False,
+    ) -> OrderedIR:
+        raise ValueError("Window ops must be compiled in ordered mode")
+
+    def promote_offsets(self, col_id: str):
+        raise ValueError("Window ops must be compiled in ordered mode")
+
+    ## Helpers
+    def _set_or_replace_by_id(
+        self, id: str, new_value: ibis_types.Value
+    ) -> UnorderedIR:
+        """Safely assign by id while maintaining ordering integrity."""
+        builder = self.builder()
+        if id in self.column_ids:
+            builder.columns = [
+                val if (col_id != id) else new_value.name(id)
+                for col_id, val in zip(self.column_ids, self._columns)
+            ]
+        else:
+            builder.columns = [*self.columns, new_value.name(id)]
+        return builder.build()
+
+    def _reproject_to_table(self) -> UnorderedIR:
+        """
+        Internal operators that projects the internal representation into a
+        new ibis table expression where each value column is a direct
+        reference to a column in that table expression. Needed after
+        some operations such as window operations that cannot be used
+        recursively in projections.
+        """
+        table = self._to_ibis_expr()
+        columns = [table[column_name] for column_name in self._column_names]
+        return UnorderedIR(
+            table,
+            columns=columns,
+        )
+
+    class Builder:
+        def __init__(
+            self,
+            table: ibis_types.Table,
+            columns: Collection[ibis_types.Value] = (),
+            predicates: Optional[Collection[ibis_types.BooleanValue]] = None,
+        ):
+            self.table = table
+            self.columns = list(columns)
+            self.predicates = list(predicates) if predicates is not None else None
+
+        def build(self) -> UnorderedIR:
+            return UnorderedIR(
+                table=self.table,
+                columns=self.columns,
+                predicates=self.predicates,
+            )
+
+
+class OrderedIR(BaseIbisIR, CompiledArrayValue):
+    """Immutable BigQuery DataFrames expression tree.
+
+    Note: Usage of this class is considered to be private and subject to change
+    at any time.
+
+    This class is a wrapper around Ibis expressions. Its purpose is to defer
+    Ibis projection operations to keep generated SQL small and correct when
+    mixing and matching columns from different versions of a DataFrame.
+
+    Args:
+        table: An Ibis table expression.
+        columns: Ibis value expressions that can be projected as columns.
+        hidden_ordering_columns: Ibis value expressions to store ordering.
+        ordering: An ordering property of the data frame.
+        predicates: A list of filters on the data frame.
+    """
+
+    def __init__(
+        self,
+        table: ibis_types.Table,
+        columns: Sequence[ibis_types.Value],
+        hidden_ordering_columns: Optional[Sequence[ibis_types.Value]] = None,
+        ordering: ExpressionOrdering = ExpressionOrdering(),
+        predicates: Optional[Collection[ibis_types.BooleanValue]] = None,
+    ):
+        super().__init__(table, columns, predicates)
+        # TODO: Validate ordering
+        if not ordering.total_ordering_columns:
+            raise ValueError("Must have total ordering defined by one or more columns")
+        self._ordering = ordering
+        # Meta columns store ordering, or other data that doesn't correspond to dataframe columns
+        self._hidden_ordering_columns = (
+            tuple(hidden_ordering_columns)
+            if hidden_ordering_columns is not None
+            else ()
+        )
+
+        # To allow for more efficient lookup by column name, create a
+        # dictionary mapping names to column values.
+        self._column_names = {column.get_name(): column for column in self._columns}
+        self._hidden_ordering_column_names = {
+            column.get_name(): column for column in self._hidden_ordering_columns
+        }
+        ### Validation
+        value_col_ids = self._column_names.keys()
+        hidden_col_ids = self._hidden_ordering_column_names.keys()
+
+        all_columns = value_col_ids | hidden_col_ids
+        ordering_valid = all(
+            col.column_id in all_columns for col in ordering.all_ordering_columns
+        )
+        if value_col_ids & hidden_col_ids:
+            raise ValueError(
+                f"Keys in both hidden and exposed list: {value_col_ids & hidden_col_ids}"
+            )
+        if not ordering_valid:
+            raise ValueError(f"Illegal ordering keys: {ordering.all_ordering_columns}")
+
+    @classmethod
+    def from_pandas(
+        cls,
+        pd_df: pandas.DataFrame,
+    ) -> OrderedIR:
+        """
+        Builds an in-memory only (SQL only) expr from a pandas dataframe.
+        """
+        # We can't include any hidden columns in the ArrayValue constructor, so
+        # grab the column names before we add the hidden ordering column.
+        column_names = [str(column) for column in pd_df.columns]
+        # Make sure column names are all strings.
+        pd_df = pd_df.set_axis(column_names, axis="columns")
+        pd_df = pd_df.assign(**{ORDER_ID_COLUMN: range(len(pd_df))})
+
+        # ibis memtable cannot handle NA, must convert to None
+        pd_df = pd_df.astype("object")  # type: ignore
+        pd_df = pd_df.where(pandas.notnull(pd_df), None)
+
+        # NULL type isn't valid in BigQuery, so retry with an explicit schema in these cases.
+        keys_memtable = ibis.memtable(pd_df)
+        schema = keys_memtable.schema()
+        new_schema = []
+        for column_index, column in enumerate(schema):
+            if column == ORDER_ID_COLUMN:
+                new_type: ibis_dtypes.DataType = ibis_dtypes.int64
+            else:
+                column_type = schema[column]
+                # The autodetected type might not be one we can support, such
+                # as NULL type for empty rows, so convert to a type we do
+                # support.
+                new_type = bigframes.dtypes.bigframes_dtype_to_ibis_dtype(
+                    bigframes.dtypes.ibis_dtype_to_bigframes_dtype(column_type)
+                )
+                # TODO(swast): Ibis memtable doesn't use backticks in struct
+                # field names, so spaces and other characters aren't allowed in
+                # the memtable context. Blocked by
+                # https://github.com/ibis-project/ibis/issues/7187
+                column = f"col_{column_index}"
+            new_schema.append((column, new_type))
+
+        # must set non-null column labels. these are not the user-facing labels
+        pd_df = pd_df.set_axis(
+            [column for column, _ in new_schema],
+            axis="columns",
+        )
+        keys_memtable = ibis.memtable(pd_df, schema=ibis.schema(new_schema))
+
+        return cls(
+            keys_memtable,
+            columns=[
+                keys_memtable[f"col_{column_index}"].name(column)
+                for column_index, column in enumerate(column_names)
+            ],
+            ordering=ExpressionOrdering(
+                ordering_value_columns=tuple(
+                    [OrderingColumnReference(ORDER_ID_COLUMN)]
+                ),
+                total_ordering_columns=frozenset([ORDER_ID_COLUMN]),
+            ),
+            hidden_ordering_columns=(keys_memtable[ORDER_ID_COLUMN],),
+        )
+
+    @property
+    def _hidden_column_ids(self) -> typing.Sequence[str]:
+        return tuple(self._hidden_ordering_column_names.keys())
+
+    @property
+    def _ibis_order(self) -> Sequence[ibis_types.Value]:
+        """Returns a sequence of ibis values which can be directly used to order a table expression. Has direction modifiers applied."""
+        return _convert_ordering_to_table_values(
+            {**self._column_names, **self._hidden_ordering_column_names},
+            self._ordering.all_ordering_columns,
+        )
+
+    def to_unordered(self) -> UnorderedIR:
+        return UnorderedIR(self._table, self._columns, self._predicates)
+
+    def builder(self) -> OrderedIR.Builder:
+        """Creates a mutable builder for expressions."""
+        # Since ArrayValue is intended to be immutable (immutability offers
+        # potential opportunities for caching, though we might need to introduce
+        # more node types for that to be useful), we create a builder class.
+        return OrderedIR.Builder(
+            self._table,
+            columns=self._columns,
+            hidden_ordering_columns=self._hidden_ordering_columns,
+            ordering=self._ordering,
+            predicates=self._predicates,
+        )
+
+    def order_by(
+        self, by: Sequence[OrderingColumnReference], stable: bool = False
+    ) -> OrderedIR:
+        expr_builder = self.builder()
+        expr_builder.ordering = self._ordering.with_ordering_columns(by, stable=stable)
+        return expr_builder.build()
+
+    def reversed(self) -> OrderedIR:
+        expr_builder = self.builder()
+        expr_builder.ordering = self._ordering.with_reverse()
+        return expr_builder.build()
+
+    def project_unary_op(
+        self, column_name: str, op: ops.UnaryOp, output_name=None
+    ) -> OrderedIR:
+        value = op._as_ibis(self._get_ibis_column(column_name)).name(
+            output_name or column_name
+        )
+        return self._set_or_replace_by_id(output_name or column_name, value)
+
+    def project_binary_op(
+        self,
+        left_column_id: str,
+        right_column_id: str,
+        op: ops.BinaryOp,
+        output_column_id: str,
+    ) -> OrderedIR:
+        value = op(
+            self._get_ibis_column(left_column_id),
+            self._get_ibis_column(right_column_id),
+        ).name(output_column_id)
+        return self._set_or_replace_by_id(output_column_id, value)
+
+    def project_ternary_op(
+        self,
+        col_id_1: str,
+        col_id_2: str,
+        col_id_3: str,
+        op: ops.TernaryOp,
+        output_column_id: str,
+    ) -> OrderedIR:
+        value = op(
+            self._get_ibis_column(col_id_1),
+            self._get_ibis_column(col_id_2),
+            self._get_ibis_column(col_id_3),
+        ).name(output_column_id)
+        return self._set_or_replace_by_id(output_column_id, value)
+
+    def assign(self, source_id: str, destination_id: str) -> OrderedIR:
+        return self._set_or_replace_by_id(
+            destination_id, self._get_ibis_column(source_id)
+        )
+
+    def assign_constant(
+        self,
+        destination_id: str,
+        value: typing.Any,
+        dtype: typing.Optional[bigframes.dtypes.Dtype],
+    ) -> OrderedIR:
+        # TODO(b/281587571): Solve scalar constant aggregation problem w/Ibis.
+        ibis_value = bigframes.dtypes.literal_to_ibis_scalar(value, dtype)
+        if ibis_value is None:
+            raise NotImplementedError(
+                f"Type not supported as scalar value {type(value)}. {constants.FEEDBACK_LINK}"
+            )
+        expr = self._set_or_replace_by_id(destination_id, ibis_value)
+        return expr._reproject_to_table()
+
+    def _uniform_sampling(self, fraction: float) -> OrderedIR:
+        """Sampling the table on given fraction.
+
+        .. warning::
+            The row numbers of result is non-deterministic, avoid to use.
+        """
+        table = self._to_ibis_expr(
+            ordering_mode="unordered", expose_hidden_cols=True, fraction=fraction
+        )
+        columns = [table[column_name] for column_name in self._column_names]
+        hidden_ordering_columns = [
+            table[column_name] for column_name in self._hidden_ordering_column_names
+        ]
+        return OrderedIR(
+            table,
+            columns=columns,
+            hidden_ordering_columns=hidden_ordering_columns,
+            ordering=self._ordering,
+        )
+
+    def promote_offsets(self, col_id: str) -> OrderedIR:
+        # Special case: offsets already exist
+        ordering = self._ordering
+
+        if (not ordering.is_sequential) or (not ordering.total_order_col):
+            return self._project_offsets().promote_offsets(col_id)
+        expr_builder = self.builder()
+        expr_builder.columns = [
+            self._get_any_column(ordering.total_order_col.column_id).name(col_id),
+            *self.columns,
+        ]
+        return expr_builder.build()
+
+    def select_columns(self, column_ids: typing.Sequence[str]) -> OrderedIR:
+        """Creates a new expression based on this expression with new columns."""
+        columns = [self._get_ibis_column(col_id) for col_id in column_ids]
+        expr = self
+        for ordering_column in set(self.column_ids).intersection(
+            [col_ref.column_id for col_ref in self._ordering.ordering_value_columns]
+        ):
+            # Need to hide ordering columns that are being dropped. Alternatively, could project offsets
+            expr = expr._hide_column(ordering_column)
+        builder = expr.builder()
+        builder.columns = list(columns)
+        new_expr = builder.build()
+        return new_expr
+
+    def aggregate(
+        self,
+        aggregations: typing.Sequence[typing.Tuple[str, agg_ops.AggregateOp, str]],
+        by_column_ids: typing.Sequence[str] = (),
+        dropna: bool = True,
+    ) -> OrderedIR:
+        return self.to_unordered().aggregate(aggregations, by_column_ids, dropna)
+
+    def corr_aggregate(
+        self, corr_aggregations: typing.Sequence[typing.Tuple[str, str, str]]
+    ) -> OrderedIR:
+        return self.to_unordered().corr_aggregate(corr_aggregations)
+
+    ## Methods that only work with ordering
+    def project_window_op(
+        self,
+        column_name: str,
+        op: agg_ops.WindowOp,
+        window_spec: WindowSpec,
+        output_name=None,
+        *,
+        never_skip_nulls=False,
+        skip_reproject_unsafe: bool = False,
+    ) -> OrderedIR:
+        column = typing.cast(ibis_types.Column, self._get_ibis_column(column_name))
+        window = self._ibis_window_from_spec(window_spec, allow_ties=op.handles_ties)
+
+        window_op = op._as_ibis(column, window)
+
+        clauses = []
+        if op.skips_nulls and not never_skip_nulls:
+            clauses.append((column.isnull(), ibis.NA))
+        if window_spec.min_periods:
+            if op.skips_nulls:
+                # Most operations do not count NULL values towards min_periods
+                observation_count = agg_ops.count_op._as_ibis(column, window)
+            else:
+                # Operations like count treat even NULLs as valid observations for the sake of min_periods
+                # notnull is just used to convert null values to non-null (FALSE) values to be counted
+                denulled_value = typing.cast(ibis_types.BooleanColumn, column.notnull())
+                observation_count = agg_ops.count_op._as_ibis(denulled_value, window)
+            clauses.append(
+                (
+                    observation_count < ibis_types.literal(window_spec.min_periods),
+                    ibis.NA,
+                )
+            )
+        if clauses:
+            case_statement = ibis.case()
+            for clause in clauses:
+                case_statement = case_statement.when(clause[0], clause[1])
+            case_statement = case_statement.else_(window_op).end()
+            window_op = case_statement
+
+        result = self._set_or_replace_by_id(output_name or column_name, window_op)
+        # TODO(tbergeron): Automatically track analytic expression usage and defer reprojection until required for valid query generation.
+        return result._reproject_to_table() if not skip_reproject_unsafe else result
+
+    def unpivot(
+        self,
+        row_labels: typing.Sequence[typing.Hashable],
+        unpivot_columns: typing.Sequence[
+            typing.Tuple[str, typing.Sequence[typing.Optional[str]]]
+        ],
+        *,
+        passthrough_columns: typing.Sequence[str] = (),
+        index_col_ids: typing.Sequence[str] = ["index"],
+        dtype: typing.Union[
+            bigframes.dtypes.Dtype, typing.Sequence[bigframes.dtypes.Dtype]
+        ] = pandas.Float64Dtype(),
+        how="left",
+    ) -> OrderedIR:
+        if how not in ("left", "right"):
+            raise ValueError("'how' must be 'left' or 'right'")
+        table = self._to_ibis_expr(ordering_mode="unordered", expose_hidden_cols=True)
+        row_n = len(row_labels)
+        hidden_col_ids = self._hidden_ordering_column_names.keys()
+        if not all(
+            len(source_columns) == row_n for _, source_columns in unpivot_columns
+        ):
+            raise ValueError("Columns and row labels must all be same length.")
+
+        unpivot_offset_id = bigframes.core.guid.generate_guid("unpivot_offsets_")
+        unpivot_table = table.cross_join(
+            ibis.memtable({unpivot_offset_id: range(row_n)})
+        )
+        # Use ibis memtable to infer type of rowlabels (if possible)
+        # TODO: Allow caller to specify dtype
+        if isinstance(row_labels[0], tuple):
+            labels_table = ibis.memtable(row_labels)
+            labels_ibis_types = [
+                labels_table[col].type() for col in labels_table.columns
+            ]
+        else:
+            labels_ibis_types = [ibis.memtable({"col": row_labels})["col"].type()]
+        labels_dtypes = [
+            bigframes.dtypes.ibis_dtype_to_bigframes_dtype(ibis_type)
+            for ibis_type in labels_ibis_types
+        ]
+
+        label_columns = []
+        for label_part, (col_id, label_dtype) in enumerate(
+            zip(index_col_ids, labels_dtypes)
+        ):
+            # interpret as tuples even if it wasn't originally so can apply same logic for multi-column labels
+            labels_as_tuples = [
+                label if isinstance(label, tuple) else (label,) for label in row_labels
+            ]
+            cases = [
+                (
+                    i,
+                    bigframes.dtypes.literal_to_ibis_scalar(
+                        label_tuple[label_part],  # type:ignore
+                        force_dtype=label_dtype,  # type:ignore
+                    ),
+                )
+                for i, label_tuple in enumerate(labels_as_tuples)
+            ]
+            labels_value = (
+                typing.cast(ibis_types.IntegerColumn, unpivot_table[unpivot_offset_id])
+                .cases(cases, default=None)  # type:ignore
+                .name(col_id)
+            )
+            label_columns.append(labels_value)
+
+        unpivot_values = []
+        for j in range(len(unpivot_columns)):
+            col_dtype = dtype[j] if utils.is_list_like(dtype) else dtype
+            result_col, source_cols = unpivot_columns[j]
+            null_value = bigframes.dtypes.literal_to_ibis_scalar(
+                None, force_dtype=col_dtype
+            )
+            ibis_values = [
+                ops.AsTypeOp(col_dtype)._as_ibis(unpivot_table[col])
+                if col is not None
+                else null_value
+                for col in source_cols
+            ]
+            cases = [(i, ibis_values[i]) for i in range(len(ibis_values))]
+            unpivot_value = typing.cast(
+                ibis_types.IntegerColumn, unpivot_table[unpivot_offset_id]
+            ).cases(
+                cases, default=null_value  # type:ignore
+            )
+            unpivot_values.append(unpivot_value.name(result_col))
+
+        unpivot_table = unpivot_table.select(
+            passthrough_columns,
+            *label_columns,
+            *unpivot_values,
+            *hidden_col_ids,
+            unpivot_offset_id,
+        )
+
+        # Extend the original ordering using unpivot_offset_id
+        old_ordering = self._ordering
+        if how == "left":
+            new_ordering = ExpressionOrdering(
+                ordering_value_columns=tuple(
+                    [
+                        *old_ordering.ordering_value_columns,
+                        OrderingColumnReference(unpivot_offset_id),
+                    ]
+                ),
+                total_ordering_columns=frozenset(
+                    [*old_ordering.total_ordering_columns, unpivot_offset_id]
+                ),
             )
-            columns = tuple(result[key] for key in result.columns)
-            expr = CompiledArrayValue(result, columns=columns, ordering=ordering)
-            if dropna:
-                for column_id in by_column_ids:
-                    expr = expr._filter(
-                        ops.notnull_op._as_ibis(expr._get_ibis_column(column_id))
-                    )
-            # Can maybe remove this as Ordering id is redundant as by_column is unique after aggregation
-            return expr._project_offsets()
-        else:
-            aggregates = {**stats, ORDER_ID_COLUMN: ibis_types.literal(0)}
-            result = table.aggregate(**aggregates)
-            # Ordering is irrelevant for single-row output, but set ordering id regardless as other ops(join etc.) expect it.
-            ordering = ExpressionOrdering(
+        else:  # how=="right"
+            new_ordering = ExpressionOrdering(
                 ordering_value_columns=tuple(
-                    [OrderingColumnReference(ORDER_ID_COLUMN)]
+                    [
+                        OrderingColumnReference(unpivot_offset_id),
+                        *old_ordering.ordering_value_columns,
+                    ]
+                ),
+                total_ordering_columns=frozenset(
+                    [*old_ordering.total_ordering_columns, unpivot_offset_id]
                 ),
-                total_ordering_columns=frozenset([ORDER_ID_COLUMN]),
-                integer_encoding=IntegerEncoding(is_encoded=True, is_sequential=True),
-            )
-            return CompiledArrayValue(
-                result,
-                columns=[result[col_id] for col_id in [*stats.keys()]],
-                hidden_ordering_columns=[result[ORDER_ID_COLUMN]],
-                ordering=ordering,
             )
+        value_columns = [
+            unpivot_table[value_col_id] for value_col_id, _ in unpivot_columns
+        ]
+        passthrough_values = [unpivot_table[col] for col in passthrough_columns]
+        hidden_ordering_columns = [
+            unpivot_table[unpivot_offset_id],
+            *[unpivot_table[hidden_col] for hidden_col in hidden_col_ids],
+        ]
+        return OrderedIR(
+            table=unpivot_table,
+            columns=[
+                *[unpivot_table[col_id] for col_id in index_col_ids],
+                *value_columns,
+                *passthrough_values,
+            ],
+            hidden_ordering_columns=hidden_ordering_columns,
+            ordering=new_ordering,
+        )
 
-    def corr_aggregate(
-        self, corr_aggregations: typing.Sequence[typing.Tuple[str, str, str]]
-    ) -> CompiledArrayValue:
-        """
-        Get correlations between each lef_column_id and right_column_id, stored in the respective output_column_id.
-        This uses BigQuery's CORR under the hood, and thus only Pearson's method is used.
-        Arguments:
-            corr_aggregations: left_column_id, right_column_id, output_column_id tuples
-        """
-        table = self._to_ibis_expr("unordered")
-        stats = {
-            col_out: table[col_left].corr(table[col_right], how="pop")
-            for col_left, col_right, col_out in corr_aggregations
-        }
-        aggregates = {**stats, ORDER_ID_COLUMN: ibis_types.literal(0)}
-        result = table.aggregate(**aggregates)
-        # Ordering is irrelevant for single-row output, but set ordering id regardless as other ops(join etc.) expect it.
-        ordering = ExpressionOrdering(
-            ordering_value_columns=tuple([OrderingColumnReference(ORDER_ID_COLUMN)]),
-            total_ordering_columns=frozenset([ORDER_ID_COLUMN]),
-            integer_encoding=IntegerEncoding(is_encoded=True, is_sequential=True),
+    def _reproject_to_table(self) -> OrderedIR:
+        table = self._to_ibis_expr(
+            ordering_mode="unordered",
+            expose_hidden_cols=True,
         )
-        return CompiledArrayValue(
-            result,
-            columns=[result[col_id] for col_id in [*stats.keys()]],
-            hidden_ordering_columns=[result[ORDER_ID_COLUMN]],
-            ordering=ordering,
+        columns = [table[column_name] for column_name in self._column_names]
+        ordering_col_ids = [
+            ref.column_id for ref in self._ordering.all_ordering_columns
+        ]
+        hidden_ordering_columns = [
+            table[column_name]
+            for column_name in self._hidden_ordering_column_names
+            if column_name in ordering_col_ids
+        ]
+        return OrderedIR(
+            table,
+            columns=columns,
+            hidden_ordering_columns=hidden_ordering_columns,
+            ordering=self._ordering,
         )
 
-    def project_window_op(
-        self,
-        column_name: str,
-        op: agg_ops.WindowOp,
-        window_spec: WindowSpec,
-        output_name=None,
-        *,
-        never_skip_nulls=False,
-        skip_reproject_unsafe: bool = False,
-    ) -> CompiledArrayValue:
-        """
-        Creates a new expression based on this expression with unary operation applied to one column.
-        column_name: the id of the input column present in the expression
-        op: the windowable operator to apply to the input column
-        window_spec: a specification of the window over which to apply the operator
-        output_name: the id to assign to the output of the operator, by default will replace input col if distinct output id not provided
-        never_skip_nulls: will disable null skipping for operators that would otherwise do so
-        skip_reproject_unsafe: skips the reprojection step, can be used when performing many non-dependent window operations, user responsible for not nesting window expressions, or using outputs as join, filter or aggregation keys before a reprojection
-        """
-        column = typing.cast(ibis_types.Column, self._get_ibis_column(column_name))
-        window = self._ibis_window_from_spec(window_spec, allow_ties=op.handles_ties)
-
-        window_op = op._as_ibis(column, window)
-
-        clauses = []
-        if op.skips_nulls and not never_skip_nulls:
-            clauses.append((column.isnull(), ibis.NA))
-        if window_spec.min_periods:
-            if op.skips_nulls:
-                # Most operations do not count NULL values towards min_periods
-                observation_count = agg_ops.count_op._as_ibis(column, window)
-            else:
-                # Operations like count treat even NULLs as valid observations for the sake of min_periods
-                # notnull is just used to convert null values to non-null (FALSE) values to be counted
-                denulled_value = typing.cast(ibis_types.BooleanColumn, column.notnull())
-                observation_count = agg_ops.count_op._as_ibis(denulled_value, window)
-            clauses.append(
-                (
-                    observation_count < ibis_types.literal(window_spec.min_periods),
-                    ibis.NA,
-                )
-            )
-        if clauses:
-            case_statement = ibis.case()
-            for clause in clauses:
-                case_statement = case_statement.when(clause[0], clause[1])
-            case_statement = case_statement.else_(window_op).end()
-            window_op = case_statement
-
-        result = self._set_or_replace_by_id(output_name or column_name, window_op)
-        # TODO(tbergeron): Automatically track analytic expression usage and defer reprojection until required for valid query generation.
-        return result._reproject_to_table() if not skip_reproject_unsafe else result
-
     def to_sql(
         self,
         offset_column: typing.Optional[str] = None,
@@ -654,11 +1287,13 @@ def to_sql(
 
     def _to_ibis_expr(
         self,
-        ordering_mode: Literal["string_encoded", "offset_col", "unordered"],
-        order_col_name: Optional[str] = ORDER_ID_COLUMN,
+        *,
         expose_hidden_cols: bool = False,
         fraction: Optional[float] = None,
         col_id_overrides: typing.Mapping[str, str] = {},
+        ordering_mode: Literal["string_encoded", "offset_col", "unordered"],
+        order_col_name: Optional[str] = ORDER_ID_COLUMN,
+        **kwargs,
     ):
         """
         Creates an Ibis table expression representing the DataFrame.
@@ -677,16 +1312,16 @@ def _to_ibis_expr(
         column name will be 'bigframes_ordering_id'
 
         Args:
+            expose_hidden_cols:
+                If True, include the hidden ordering columns in the results.
+                Only compatible with `order_by` and `unordered`
+                ``ordering_mode``.
             ordering_mode:
                 How to construct the Ibis expression from the ArrayValue. See
                 above for details.
             order_col_name:
                 If the ordering mode outputs a single ordering or offsets
                 column, use this as the column name.
-            expose_hidden_cols:
-                If True, include the hidden ordering columns in the results.
-                Only compatible with `order_by` and `unordered`
-                ``ordering_mode``.
             col_id_overrides:
                 overrides the column ids for the result
         Returns:
@@ -716,27 +1351,122 @@ def _to_ibis_expr(
         order_columns = self._create_order_columns(
             ordering_mode, order_col_name, expose_hidden_cols
         )
-        columns.extend(order_columns)
-
-        # Special case for empty tables, since we can't create an empty
-        # projection.
-        if not columns:
-            return ibis.memtable([])
-
-        # Make sure all dtypes are the "canonical" ones for BigFrames. This is
-        # important for operations like UNION where the schema must match.
-        table = self._table.select(
-            bigframes.dtypes.ibis_value_to_canonical_type(column) for column in columns
+        columns.extend(order_columns)
+
+        # Special case for empty tables, since we can't create an empty
+        # projection.
+        if not columns:
+            return ibis.memtable([])
+
+        # Make sure all dtypes are the "canonical" ones for BigFrames. This is
+        # important for operations like UNION where the schema must match.
+        table = self._table.select(
+            bigframes.dtypes.ibis_value_to_canonical_type(column) for column in columns
+        )
+        base_table = table
+        if self._reduced_predicate is not None:
+            table = table.filter(base_table[PREDICATE_COLUMN])
+        table = table.drop(*columns_to_drop)
+        if col_id_overrides:
+            table = table.relabel(col_id_overrides)
+        if fraction is not None:
+            table = table.filter(ibis.random() < ibis.literal(fraction))
+        return table
+
+    def _set_or_replace_by_id(self, id: str, new_value: ibis_types.Value) -> OrderedIR:
+        """Safely assign by id while maintaining ordering integrity."""
+        # TODO: Split into explicit set and replace methods
+        ordering_col_ids = [
+            col_ref.column_id for col_ref in self._ordering.ordering_value_columns
+        ]
+        if id in ordering_col_ids:
+            return self._hide_column(id)._set_or_replace_by_id(id, new_value)
+
+        builder = self.builder()
+        if id in self.column_ids:
+            builder.columns = [
+                val if (col_id != id) else new_value.name(id)
+                for col_id, val in zip(self.column_ids, self._columns)
+            ]
+        else:
+            builder.columns = [*self.columns, new_value.name(id)]
+        return builder.build()
+
+    def filter(self, predicate_id: str, keep_null: bool = False) -> CompiledArrayValue:
+        condition = typing.cast(
+            ibis_types.BooleanValue, self._get_ibis_column(predicate_id)
+        )
+        if keep_null:
+            condition = typing.cast(
+                ibis_types.BooleanValue,
+                condition.fillna(
+                    typing.cast(ibis_types.BooleanScalar, ibis_types.literal(True))
+                ),
+            )
+        return self._filter(condition)
+
+    def _filter(self, predicate_value: ibis_types.BooleanValue) -> OrderedIR:
+        """Filter the table on a given expression, the predicate must be a boolean series aligned with the table expression."""
+        expr = self.builder()
+        expr.ordering = expr.ordering.with_non_sequential()
+        expr.predicates = [*self._predicates, predicate_value]
+        return expr.build()
+
+    ## Ordering specific helpers
+    def _get_any_column(self, key: str) -> ibis_types.Value:
+        """Gets the Ibis expression for a given column. Will also get hidden columns."""
+        all_columns = {**self._column_names, **self._hidden_ordering_column_names}
+        if key not in all_columns.keys():
+            raise ValueError(
+                "Column name {} not in set of values: {}".format(
+                    key, all_columns.keys()
+                )
+            )
+        return typing.cast(ibis_types.Value, all_columns[key])
+
+    def _get_hidden_ordering_column(self, key: str) -> ibis_types.Column:
+        """Gets the Ibis expression for a given hidden column."""
+        if key not in self._hidden_ordering_column_names.keys():
+            raise ValueError(
+                "Column name {} not in set of values: {}".format(
+                    key, self._hidden_ordering_column_names.keys()
+                )
+            )
+        return typing.cast(ibis_types.Column, self._hidden_ordering_column_names[key])
+
+    def _hide_column(self, column_id) -> OrderedIR:
+        """Pushes columns to hidden columns list. Used to hide ordering columns that have been dropped or destructively mutated."""
+        expr_builder = self.builder()
+        # Need to rename column as caller might be creating a new row with the same name but different values.
+        # Can avoid this if don't allow callers to determine ids and instead generate unique ones in this class.
+        new_name = bigframes.core.guid.generate_guid(prefix="bigframes_hidden_")
+        expr_builder.hidden_ordering_columns = [
+            *self._hidden_ordering_columns,
+            self._get_ibis_column(column_id).name(new_name),
+        ]
+        expr_builder.ordering = self._ordering.with_column_remap({column_id: new_name})
+        return expr_builder.build()
+
+    def _project_offsets(self) -> OrderedIR:
+        """Create a new expression that contains offsets. Should only be executed when offsets are needed for an operations. Has no effect on expression semantics."""
+        if self._ordering.is_sequential:
+            return self
+        # TODO(tbergeron): Enforce total ordering
+        table = self._to_ibis_expr(
+            ordering_mode="offset_col", order_col_name=ORDER_ID_COLUMN
+        )
+        columns = [table[column_name] for column_name in self._column_names]
+        ordering = ExpressionOrdering(
+            ordering_value_columns=tuple([OrderingColumnReference(ORDER_ID_COLUMN)]),
+            total_ordering_columns=frozenset([ORDER_ID_COLUMN]),
+            integer_encoding=IntegerEncoding(True, is_sequential=True),
+        )
+        return OrderedIR(
+            table,
+            columns=columns,
+            hidden_ordering_columns=[table[ORDER_ID_COLUMN]],
+            ordering=ordering,
         )
-        base_table = table
-        if self._reduced_predicate is not None:
-            table = table.filter(base_table[PREDICATE_COLUMN])
-        table = table.drop(*columns_to_drop)
-        if col_id_overrides:
-            table = table.relabel(col_id_overrides)
-        if fraction is not None:
-            table = table.filter(ibis.random() < ibis.literal(fraction))
-        return table
 
     def _create_order_columns(
         self,
@@ -789,34 +1519,6 @@ def _create_string_ordering_column(self) -> ibis_types.StringColumn:
             )
             return encode_order_string(row_nums)
 
-    def _reproject_to_table(self) -> CompiledArrayValue:
-        """
-        Internal operators that projects the internal representation into a
-        new ibis table expression where each value column is a direct
-        reference to a column in that table expression. Needed after
-        some operations such as window operations that cannot be used
-        recursively in projections.
-        """
-        table = self._to_ibis_expr(
-            "unordered",
-            expose_hidden_cols=True,
-        )
-        columns = [table[column_name] for column_name in self._column_names]
-        ordering_col_ids = [
-            ref.column_id for ref in self._ordering.all_ordering_columns
-        ]
-        hidden_ordering_columns = [
-            table[column_name]
-            for column_name in self._hidden_ordering_column_names
-            if column_name in ordering_col_ids
-        ]
-        return CompiledArrayValue(
-            table,
-            columns=columns,
-            hidden_ordering_columns=hidden_ordering_columns,
-            ordering=self._ordering,
-        )
-
     def _ibis_window_from_spec(self, window_spec: WindowSpec, allow_ties: bool = False):
         group_by: typing.List[ibis_types.Value] = (
             [
@@ -851,229 +1553,29 @@ def _ibis_window_from_spec(self, window_spec: WindowSpec, allow_ties: bool = Fal
             group_by=group_by,
         )
 
-    def unpivot(
-        self,
-        row_labels: typing.Sequence[typing.Hashable],
-        unpivot_columns: typing.Sequence[
-            typing.Tuple[str, typing.Sequence[typing.Optional[str]]]
-        ],
-        *,
-        passthrough_columns: typing.Sequence[str] = (),
-        index_col_ids: typing.Sequence[str] = ["index"],
-        dtype: typing.Union[
-            bigframes.dtypes.Dtype, typing.Sequence[bigframes.dtypes.Dtype]
-        ] = pandas.Float64Dtype(),
-        how="left",
-    ) -> CompiledArrayValue:
-        """
-        Unpivot ArrayValue columns.
-
-        Args:
-            row_labels: Identifies the source of the row. Must be equal to length to source column list in unpivot_columns argument.
-            unpivot_columns: Mapping of column id to list of input column ids. Lists of input columns may use None.
-            passthrough_columns: Columns that will not be unpivoted. Column id will be preserved.
-            index_col_id (str): The column id to be used for the row labels.
-            dtype (dtype or list of dtype): Dtype to use for the unpivot columns. If list, must be equal in number to unpivot_columns.
-
-        Returns:
-            ArrayValue: The unpivoted ArrayValue
-        """
-        if how not in ("left", "right"):
-            raise ValueError("'how' must be 'left' or 'right'")
-        table = self._to_ibis_expr("unordered", expose_hidden_cols=True)
-        row_n = len(row_labels)
-        hidden_col_ids = self._hidden_ordering_column_names.keys()
-        if not all(
-            len(source_columns) == row_n for _, source_columns in unpivot_columns
-        ):
-            raise ValueError("Columns and row labels must all be same length.")
-
-        unpivot_offset_id = bigframes.core.guid.generate_guid("unpivot_offsets_")
-        unpivot_table = table.cross_join(
-            ibis.memtable({unpivot_offset_id: range(row_n)})
-        )
-        # Use ibis memtable to infer type of rowlabels (if possible)
-        # TODO: Allow caller to specify dtype
-        if isinstance(row_labels[0], tuple):
-            labels_table = ibis.memtable(row_labels)
-            labels_ibis_types = [
-                labels_table[col].type() for col in labels_table.columns
-            ]
-        else:
-            labels_ibis_types = [ibis.memtable({"col": row_labels})["col"].type()]
-        labels_dtypes = [
-            bigframes.dtypes.ibis_dtype_to_bigframes_dtype(ibis_type)
-            for ibis_type in labels_ibis_types
-        ]
-
-        label_columns = []
-        for label_part, (col_id, label_dtype) in enumerate(
-            zip(index_col_ids, labels_dtypes)
+    class Builder:
+        def __init__(
+            self,
+            table: ibis_types.Table,
+            ordering: ExpressionOrdering,
+            columns: Collection[ibis_types.Value] = (),
+            hidden_ordering_columns: Collection[ibis_types.Value] = (),
+            predicates: Optional[Collection[ibis_types.BooleanValue]] = None,
         ):
-            # interpret as tuples even if it wasn't originally so can apply same logic for multi-column labels
-            labels_as_tuples = [
-                label if isinstance(label, tuple) else (label,) for label in row_labels
-            ]
-            cases = [
-                (
-                    i,
-                    bigframes.dtypes.literal_to_ibis_scalar(
-                        label_tuple[label_part],  # type:ignore
-                        force_dtype=label_dtype,  # type:ignore
-                    ),
-                )
-                for i, label_tuple in enumerate(labels_as_tuples)
-            ]
-            labels_value = (
-                typing.cast(ibis_types.IntegerColumn, unpivot_table[unpivot_offset_id])
-                .cases(cases, default=None)  # type:ignore
-                .name(col_id)
-            )
-            label_columns.append(labels_value)
-
-        unpivot_values = []
-        for j in range(len(unpivot_columns)):
-            col_dtype = dtype[j] if utils.is_list_like(dtype) else dtype
-            result_col, source_cols = unpivot_columns[j]
-            null_value = bigframes.dtypes.literal_to_ibis_scalar(
-                None, force_dtype=col_dtype
-            )
-            ibis_values = [
-                ops.AsTypeOp(col_dtype)._as_ibis(unpivot_table[col])
-                if col is not None
-                else null_value
-                for col in source_cols
-            ]
-            cases = [(i, ibis_values[i]) for i in range(len(ibis_values))]
-            unpivot_value = typing.cast(
-                ibis_types.IntegerColumn, unpivot_table[unpivot_offset_id]
-            ).cases(
-                cases, default=null_value  # type:ignore
-            )
-            unpivot_values.append(unpivot_value.name(result_col))
-
-        unpivot_table = unpivot_table.select(
-            passthrough_columns,
-            *label_columns,
-            *unpivot_values,
-            *hidden_col_ids,
-            unpivot_offset_id,
-        )
-
-        # Extend the original ordering using unpivot_offset_id
-        old_ordering = self._ordering
-        if how == "left":
-            new_ordering = ExpressionOrdering(
-                ordering_value_columns=tuple(
-                    [
-                        *old_ordering.ordering_value_columns,
-                        OrderingColumnReference(unpivot_offset_id),
-                    ]
-                ),
-                total_ordering_columns=frozenset(
-                    [*old_ordering.total_ordering_columns, unpivot_offset_id]
-                ),
-            )
-        else:  # how=="right"
-            new_ordering = ExpressionOrdering(
-                ordering_value_columns=tuple(
-                    [
-                        OrderingColumnReference(unpivot_offset_id),
-                        *old_ordering.ordering_value_columns,
-                    ]
-                ),
-                total_ordering_columns=frozenset(
-                    [*old_ordering.total_ordering_columns, unpivot_offset_id]
-                ),
-            )
-        value_columns = [
-            unpivot_table[value_col_id] for value_col_id, _ in unpivot_columns
-        ]
-        passthrough_values = [unpivot_table[col] for col in passthrough_columns]
-        hidden_ordering_columns = [
-            unpivot_table[unpivot_offset_id],
-            *[unpivot_table[hidden_col] for hidden_col in hidden_col_ids],
-        ]
-        return CompiledArrayValue(
-            table=unpivot_table,
-            columns=[
-                *[unpivot_table[col_id] for col_id in index_col_ids],
-                *value_columns,
-                *passthrough_values,
-            ],
-            hidden_ordering_columns=hidden_ordering_columns,
-            ordering=new_ordering,
-        )
-
-    def assign(self, source_id: str, destination_id: str) -> CompiledArrayValue:
-        return self._set_or_replace_by_id(
-            destination_id, self._get_ibis_column(source_id)
-        )
-
-    def assign_constant(
-        self,
-        destination_id: str,
-        value: typing.Any,
-        dtype: typing.Optional[bigframes.dtypes.Dtype],
-    ) -> CompiledArrayValue:
-        # TODO(b/281587571): Solve scalar constant aggregation problem w/Ibis.
-        ibis_value = bigframes.dtypes.literal_to_ibis_scalar(value, dtype)
-        if ibis_value is None:
-            raise NotImplementedError(
-                f"Type not supported as scalar value {type(value)}. {constants.FEEDBACK_LINK}"
+            self.table = table
+            self.columns = list(columns)
+            self.hidden_ordering_columns = list(hidden_ordering_columns)
+            self.ordering = ordering
+            self.predicates = list(predicates) if predicates is not None else None
+
+        def build(self) -> OrderedIR:
+            return OrderedIR(
+                table=self.table,
+                columns=self.columns,
+                hidden_ordering_columns=self.hidden_ordering_columns,
+                ordering=self.ordering,
+                predicates=self.predicates,
             )
-        expr = self._set_or_replace_by_id(destination_id, ibis_value)
-        return expr._reproject_to_table()
-
-    def _set_or_replace_by_id(
-        self, id: str, new_value: ibis_types.Value
-    ) -> CompiledArrayValue:
-        """Safely assign by id while maintaining ordering integrity."""
-        # TODO: Split into explicit set and replace methods
-        ordering_col_ids = [
-            col_ref.column_id for col_ref in self._ordering.ordering_value_columns
-        ]
-        if id in ordering_col_ids:
-            return self._hide_column(id)._set_or_replace_by_id(id, new_value)
-
-        builder = self.builder()
-        if id in self.column_ids:
-            builder.columns = [
-                val if (col_id != id) else new_value.name(id)
-                for col_id, val in zip(self.column_ids, self._columns)
-            ]
-        else:
-            builder.columns = [*self.columns, new_value.name(id)]
-        return builder.build()
-
-
-class ArrayValueBuilder:
-    """Mutable expression class.
-    Use ArrayValue.builder() to create from a ArrayValue object.
-    """
-
-    def __init__(
-        self,
-        table: ibis_types.Table,
-        ordering: ExpressionOrdering,
-        columns: Collection[ibis_types.Value] = (),
-        hidden_ordering_columns: Collection[ibis_types.Value] = (),
-        predicates: Optional[Collection[ibis_types.BooleanValue]] = None,
-    ):
-        self.table = table
-        self.columns = list(columns)
-        self.hidden_ordering_columns = list(hidden_ordering_columns)
-        self.ordering = ordering
-        self.predicates = list(predicates) if predicates is not None else None
-
-    def build(self) -> CompiledArrayValue:
-        return CompiledArrayValue(
-            table=self.table,
-            columns=self.columns,
-            hidden_ordering_columns=self.hidden_ordering_columns,
-            ordering=self.ordering,
-            predicates=self.predicates,
-        )
 
 
 def _reduce_predicate_list(
diff --git a/bigframes/core/compile/compiler.py b/bigframes/core/compile/compiler.py
index 195d830122..5959695b30 100644
--- a/bigframes/core/compile/compiler.py
+++ b/bigframes/core/compile/compiler.py
@@ -19,7 +19,8 @@
 
 import pandas as pd
 
-import bigframes.core.compile as compiled
+import bigframes.core.compile.compiled as compiled
+import bigframes.core.compile.concat as concat_impl
 import bigframes.core.compile.single_column
 import bigframes.core.nodes as nodes
 
@@ -28,120 +29,160 @@
     import bigframes.session
 
 
+def compile_ordered(node: nodes.BigFrameNode) -> compiled.OrderedIR:
+    return typing.cast(compiled.OrderedIR, compile_node(node, True))
+
+
+def compile_unordered(node: nodes.BigFrameNode) -> compiled.UnorderedIR:
+    return typing.cast(compiled.UnorderedIR, compile_node(node, False))
+
+
 @functools.cache
-def compile_node(node: nodes.BigFrameNode) -> compiled.CompiledArrayValue:
+def compile_node(
+    node: nodes.BigFrameNode, ordered: bool = True
+) -> compiled.UnorderedIR | compiled.OrderedIR:
     """Compile node into CompileArrayValue. Caches result."""
-    return _compile_node(node)
+    return _compile_node(node, ordered)
 
 
 @functools.singledispatch
-def _compile_node(node: nodes.BigFrameNode) -> compiled.CompiledArrayValue:
+def _compile_node(
+    node: nodes.BigFrameNode, ordered: bool = True
+) -> compiled.UnorderedIR:
     """Defines transformation but isn't cached, always use compile_node instead"""
     raise ValueError(f"Can't compile unnrecognized node: {node}")
 
 
 @_compile_node.register
-def compile_join(node: nodes.JoinNode):
-    compiled_left = compile_node(node.left_child)
-    compiled_right = compile_node(node.right_child)
-    return bigframes.core.compile.single_column.join_by_column(
-        compiled_left,
-        node.left_column_ids,
-        compiled_right,
-        node.right_column_ids,
-        how=node.how,
-        allow_row_identity_join=node.allow_row_identity_join,
-    )
+def compile_join(node: nodes.JoinNode, ordered: bool = True):
+    if ordered:
+        left_ordered = compile_ordered(node.left_child)
+        right_ordered = compile_ordered(node.right_child)
+        return bigframes.core.compile.single_column.join_by_column_ordered(
+            left_ordered,
+            node.left_column_ids,
+            right_ordered,
+            node.right_column_ids,
+            how=node.how,
+            allow_row_identity_join=node.allow_row_identity_join,
+        )
+    else:
+        left_unordered = compile_unordered(node.left_child)
+        right_unordered = compile_unordered(node.right_child)
+        return bigframes.core.compile.single_column.join_by_column_unordered(
+            left_unordered,
+            node.left_column_ids,
+            right_unordered,
+            node.right_column_ids,
+            how=node.how,
+            allow_row_identity_join=node.allow_row_identity_join,
+        )
 
 
 @_compile_node.register
-def compile_select(node: nodes.SelectNode):
-    return compile_node(node.child).select_columns(node.column_ids)
+def compile_select(node: nodes.SelectNode, ordered: bool = True):
+    return compile_node(node.child, ordered).select_columns(node.column_ids)
 
 
 @_compile_node.register
-def compile_drop(node: nodes.DropColumnsNode):
-    return compile_node(node.child).drop_columns(node.columns)
+def compile_drop(node: nodes.DropColumnsNode, ordered: bool = True):
+    return compile_node(node.child, ordered).drop_columns(node.columns)
 
 
 @_compile_node.register
-def compile_readlocal(node: nodes.ReadLocalNode):
+def compile_readlocal(node: nodes.ReadLocalNode, ordered: bool = True):
     array_as_pd = pd.read_feather(io.BytesIO(node.feather_bytes))
-    return compiled.CompiledArrayValue.mem_expr_from_pandas(array_as_pd)
+    if ordered:
+        return compiled.OrderedIR.from_pandas(array_as_pd)
+    else:
+        return compiled.UnorderedIR.from_pandas(array_as_pd)
 
 
 @_compile_node.register
-def compile_readgbq(node: nodes.ReadGbqNode):
-    return compiled.CompiledArrayValue(
-        node.table,
-        node.columns,
-        node.hidden_ordering_columns,
-        node.ordering,
-    )
+def compile_readgbq(node: nodes.ReadGbqNode, ordered: bool = True):
+    if ordered:
+        return compiled.OrderedIR(
+            node.table,
+            node.columns,
+            node.hidden_ordering_columns,
+            node.ordering,
+        )
+    else:
+        return compiled.UnorderedIR(
+            node.table,
+            node.columns,
+        )
 
 
 @_compile_node.register
-def compile_promote_offsets(node: nodes.PromoteOffsetsNode):
-    return compile_node(node.child).promote_offsets(node.col_id)
+def compile_promote_offsets(node: nodes.PromoteOffsetsNode, ordered: bool = True):
+    result = compile_node(node.child, True).promote_offsets(node.col_id)
+    return result if ordered else result.to_unordered()
 
 
 @_compile_node.register
-def compile_filter(node: nodes.FilterNode):
-    return compile_node(node.child).filter(node.predicate_id, node.keep_null)
+def compile_filter(node: nodes.FilterNode, ordered: bool = True):
+    return compile_node(node.child, ordered).filter(node.predicate_id, node.keep_null)
 
 
 @_compile_node.register
-def compile_orderby(node: nodes.OrderByNode):
-    return compile_node(node.child).order_by(node.by, node.stable)
+def compile_orderby(node: nodes.OrderByNode, ordered: bool = True):
+    return compile_node(node.child, ordered).order_by(node.by, node.stable)
 
 
 @_compile_node.register
-def compile_reversed(node: nodes.ReversedNode):
-    return compile_node(node.child).reversed()
+def compile_reversed(node: nodes.ReversedNode, ordered: bool = True):
+    return compile_node(node.child, ordered).reversed()
 
 
 @_compile_node.register
-def compile_project_unary(node: nodes.ProjectUnaryOpNode):
-    return compile_node(node.child).project_unary_op(
+def compile_project_unary(node: nodes.ProjectUnaryOpNode, ordered: bool = True):
+    return compile_node(node.child, ordered).project_unary_op(
         node.input_id, node.op, node.output_id
     )
 
 
 @_compile_node.register
-def compile_project_binary(node: nodes.ProjectBinaryOpNode):
-    return compile_node(node.child).project_binary_op(
+def compile_project_binary(node: nodes.ProjectBinaryOpNode, ordered: bool = True):
+    return compile_node(node.child, ordered).project_binary_op(
         node.left_input_id, node.right_input_id, node.op, node.output_id
     )
 
 
 @_compile_node.register
-def compile_project_ternary(node: nodes.ProjectTernaryOpNode):
-    return compile_node(node.child).project_ternary_op(
+def compile_project_ternary(node: nodes.ProjectTernaryOpNode, ordered: bool = True):
+    return compile_node(node.child, ordered).project_ternary_op(
         node.input_id1, node.input_id2, node.input_id3, node.op, node.output_id
     )
 
 
 @_compile_node.register
-def compile_concat(node: nodes.ConcatNode):
-    compiled_nodes = [compile_node(node) for node in node.children]
-    return compiled_nodes[0].concat(compiled_nodes[1:])
+def compile_concat(node: nodes.ConcatNode, ordered: bool = True):
+    if ordered:
+        compiled_ordered = [compile_ordered(node) for node in node.children]
+        return concat_impl.concat_ordered(compiled_ordered)
+    else:
+        compiled_unordered = [compile_unordered(node) for node in node.children]
+        return concat_impl.concat_unordered(compiled_unordered)
 
 
 @_compile_node.register
-def compile_aggregate(node: nodes.AggregateNode):
-    return compile_node(node.child).aggregate(
+def compile_aggregate(node: nodes.AggregateNode, ordered: bool = True):
+    result = compile_node(node.child, False).aggregate(
         node.aggregations, node.by_column_ids, node.dropna
     )
+    return result if ordered else result.to_unordered()
 
 
 @_compile_node.register
-def compile_corr(node: nodes.CorrNode):
-    return compile_node(node.child).corr_aggregate(node.corr_aggregations)
+def compile_corr(node: nodes.CorrNode, ordered: bool = True):
+    result = compile_node(node.child, False).corr_aggregate(node.corr_aggregations)
+    return result if ordered else result.to_unordered()
 
 
 @_compile_node.register
-def compile_window(node: nodes.WindowOpNode):
-    return compile_node(node.child).project_window_op(
+def compile_window(node: nodes.WindowOpNode, ordered: bool = True):
+    result = compile_node(node.child, True).project_window_op(
         node.column_name,
         node.op,
         node.window_spec,
@@ -149,16 +190,17 @@ def compile_window(node: nodes.WindowOpNode):
         never_skip_nulls=node.never_skip_nulls,
         skip_reproject_unsafe=node.skip_reproject_unsafe,
     )
+    return result if ordered else result.to_unordered()
 
 
 @_compile_node.register
-def compile_reproject(node: nodes.ReprojectOpNode):
-    return compile_node(node.child)._reproject_to_table()
+def compile_reproject(node: nodes.ReprojectOpNode, ordered: bool = True):
+    return compile_node(node.child, ordered)._reproject_to_table()
 
 
 @_compile_node.register
-def compile_unpivot(node: nodes.UnpivotNode):
-    return compile_node(node.child).unpivot(
+def compile_unpivot(node: nodes.UnpivotNode, ordered: bool = True):
+    return compile_node(node.child, ordered).unpivot(
         node.row_labels,
         node.unpivot_columns,
         passthrough_columns=node.passthrough_columns,
@@ -169,17 +211,17 @@ def compile_unpivot(node: nodes.UnpivotNode):
 
 
 @_compile_node.register
-def compile_assign(node: nodes.AssignNode):
-    return compile_node(node.child).assign(node.source_id, node.destination_id)
+def compile_assign(node: nodes.AssignNode, ordered: bool = True):
+    return compile_node(node.child, ordered).assign(node.source_id, node.destination_id)
 
 
 @_compile_node.register
-def compile_assign_constant(node: nodes.AssignConstantNode):
-    return compile_node(node.child).assign_constant(
+def compile_assign_constant(node: nodes.AssignConstantNode, ordered: bool = True):
+    return compile_node(node.child, ordered).assign_constant(
         node.destination_id, node.value, node.dtype
     )
 
 
 @_compile_node.register
-def compiler_random_sample(node: nodes.RandomSampleNode):
-    return compile_node(node.child)._uniform_sampling(node.fraction)
+def compiler_random_sample(node: nodes.RandomSampleNode, ordered: bool = True):
+    return compile_node(node.child, ordered)._uniform_sampling(node.fraction)
diff --git a/bigframes/core/compile/concat.py b/bigframes/core/compile/concat.py
new file mode 100644
index 0000000000..d39569370e
--- /dev/null
+++ b/bigframes/core/compile/concat.py
@@ -0,0 +1,100 @@
+# Copyright 2023 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import annotations
+
+import math
+import typing
+
+import ibis
+
+import bigframes.core.compile.compiled as compiled
+from bigframes.core.ordering import (
+    ExpressionOrdering,
+    OrderingColumnReference,
+    reencode_order_string,
+    StringEncoding,
+)
+
+ORDER_ID_COLUMN = "bigframes_ordering_id"
+
+
+def concat_unordered(
+    items: typing.Sequence[compiled.UnorderedIR],
+) -> compiled.UnorderedIR:
+    """Append together multiple ArrayValue objects."""
+    if len(items) == 1:
+        return items[0]
+    tables = []
+    for expr in items:
+        table = expr._to_ibis_expr()
+        # Rename the value columns based on horizontal offset before applying union.
+        table = table.select(
+            [table[col].name(f"column_{i}") for i, col in enumerate(table.columns)]
+        )
+        tables.append(table)
+    combined_table = ibis.union(*tables)
+    return compiled.UnorderedIR(
+        combined_table,
+        columns=[combined_table[col] for col in combined_table.columns],
+    )
+
+
+def concat_ordered(
+    items: typing.Sequence[compiled.OrderedIR],
+) -> compiled.OrderedIR:
+    """Append together multiple ArrayValue objects."""
+    if len(items) == 1:
+        return items[0]
+
+    tables = []
+    prefix_base = 10
+    prefix_size = math.ceil(math.log(len(items), prefix_base))
+    # Must normalize all ids to the same encoding size
+    max_encoding_size = max(
+        *[expression._ordering.string_encoding.length for expression in items],
+    )
+    for i, expr in enumerate(items):
+        ordering_prefix = str(i).zfill(prefix_size)
+        table = expr._to_ibis_expr(
+            ordering_mode="string_encoded", order_col_name=ORDER_ID_COLUMN
+        )
+        # Rename the value columns based on horizontal offset before applying union.
+        table = table.select(
+            [
+                table[col].name(f"column_{i}")
+                if col != ORDER_ID_COLUMN
+                else (
+                    ordering_prefix
+                    + reencode_order_string(table[ORDER_ID_COLUMN], max_encoding_size)
+                ).name(ORDER_ID_COLUMN)
+                for i, col in enumerate(table.columns)
+            ]
+        )
+        tables.append(table)
+    combined_table = ibis.union(*tables)
+    ordering = ExpressionOrdering(
+        ordering_value_columns=tuple([OrderingColumnReference(ORDER_ID_COLUMN)]),
+        total_ordering_columns=frozenset([ORDER_ID_COLUMN]),
+        string_encoding=StringEncoding(True, prefix_size + max_encoding_size),
+    )
+    return compiled.OrderedIR(
+        combined_table,
+        columns=[
+            combined_table[col]
+            for col in combined_table.columns
+            if col != ORDER_ID_COLUMN
+        ],
+        hidden_ordering_columns=[combined_table[ORDER_ID_COLUMN]],
+        ordering=ordering,
+    )
diff --git a/bigframes/core/compile/row_identity.py b/bigframes/core/compile/row_identity.py
index 2e9bc0527c..71d53f90dc 100644
--- a/bigframes/core/compile/row_identity.py
+++ b/bigframes/core/compile/row_identity.py
@@ -23,16 +23,76 @@
 import ibis.expr.types as ibis_types
 
 import bigframes.constants as constants
-import bigframes.core.compile as compiled
+import bigframes.core.compile.compiled as compiled
 import bigframes.core.joins.name_resolution as naming
 import bigframes.core.ordering as orderings
 
 SUPPORTED_ROW_IDENTITY_HOW = {"outer", "left", "inner"}
 
 
-def join_by_row_identity(
-    left: compiled.CompiledArrayValue, right: compiled.CompiledArrayValue, *, how: str
-) -> compiled.CompiledArrayValue:
+def join_by_row_identity_unordered(
+    left: compiled.UnorderedIR,
+    right: compiled.UnorderedIR,
+    *,
+    how: str,
+) -> compiled.UnorderedIR:
+    """Compute join when we are joining by row identity not a specific column."""
+    if how not in SUPPORTED_ROW_IDENTITY_HOW:
+        raise NotImplementedError(
+            f"Only how='outer','left','inner' currently supported. {constants.FEEDBACK_LINK}"
+        )
+
+    if not left._table.equals(right._table):
+        raise ValueError(
+            "Cannot combine objects without an explicit join/merge key. "
+            f"Left based on: {left._table.compile()}, but "
+            f"right based on: {right._table.compile()}"
+        )
+
+    left_predicates = left._predicates
+    right_predicates = right._predicates
+    # TODO(tbergeron): Skip generating these for inner part of join
+    (
+        left_relative_predicates,
+        right_relative_predicates,
+    ) = _get_relative_predicates(left_predicates, right_predicates)
+
+    combined_predicates = []
+    if left_predicates or right_predicates:
+        joined_predicates = _join_predicates(
+            left_predicates, right_predicates, join_type=how
+        )
+        combined_predicates = list(joined_predicates)  # builder expects mutable list
+
+    left_mask = left_relative_predicates if how in ["right", "outer"] else None
+    right_mask = right_relative_predicates if how in ["left", "outer"] else None
+
+    # Public mapping must use JOIN_NAME_REMAPPER to stay in sync with consumers of join result
+    map_left_id, map_right_id = naming.JOIN_NAME_REMAPPER(
+        left.column_ids, right.column_ids
+    )
+    joined_columns = [
+        _mask_value(left._get_ibis_column(key), left_mask).name(map_left_id[key])
+        for key in left.column_ids
+    ] + [
+        _mask_value(right._get_ibis_column(key), right_mask).name(map_right_id[key])
+        for key in right.column_ids
+    ]
+
+    joined_expr = compiled.UnorderedIR(
+        left._table,
+        columns=joined_columns,
+        predicates=combined_predicates,
+    )
+    return joined_expr
+
+
+def join_by_row_identity_ordered(
+    left: compiled.OrderedIR,
+    right: compiled.OrderedIR,
+    *,
+    how: str,
+) -> compiled.OrderedIR:
     """Compute join when we are joining by row identity not a specific column."""
     if how not in SUPPORTED_ROW_IDENTITY_HOW:
         raise NotImplementedError(
@@ -118,7 +178,7 @@ def join_by_row_identity(
         if key.column_id in right._hidden_ordering_column_names.keys()
     ]
 
-    joined_expr = compiled.CompiledArrayValue(
+    joined_expr = compiled.OrderedIR(
         left._table,
         columns=joined_columns,
         hidden_ordering_columns=hidden_ordering_columns,
diff --git a/bigframes/core/compile/single_column.py b/bigframes/core/compile/single_column.py
index b992aa1d1d..cf206ae95f 100644
--- a/bigframes/core/compile/single_column.py
+++ b/bigframes/core/compile/single_column.py
@@ -23,16 +23,16 @@
 import ibis.expr.datatypes as ibis_dtypes
 import ibis.expr.types as ibis_types
 
-import bigframes.core.compile as compiled
+import bigframes.core.compile.compiled as compiled
 import bigframes.core.compile.row_identity
 import bigframes.core.joins as joining
 import bigframes.core.ordering as orderings
 
 
-def join_by_column(
-    left: compiled.CompiledArrayValue,
+def join_by_column_ordered(
+    left: compiled.OrderedIR,
     left_column_ids: typing.Sequence[str],
-    right: compiled.CompiledArrayValue,
+    right: compiled.OrderedIR,
     right_column_ids: typing.Sequence[str],
     *,
     how: Literal[
@@ -42,7 +42,7 @@ def join_by_column(
         "right",
     ],
     allow_row_identity_join: bool = True,
-) -> compiled.CompiledArrayValue:
+) -> compiled.OrderedIR:
     """Join two expressions by column equality.
 
     Arguments:
@@ -67,13 +67,13 @@ def join_by_column(
         # regards to value its possible that they both have the same names but
         # were modified in different ways. Ignore differences in the names.
         and all(
-            left._get_any_column(lcol)
+            left._get_ibis_column(lcol)
             .name("index")
-            .equals(right._get_any_column(rcol).name("index"))
+            .equals(right._get_ibis_column(rcol).name("index"))
             for lcol, rcol in zip(left_column_ids, right_column_ids)
         )
     ):
-        return bigframes.core.compile.row_identity.join_by_row_identity(
+        return bigframes.core.compile.row_identity.join_by_row_identity_ordered(
             left, right, how=how
         )
     else:
@@ -88,12 +88,12 @@ def join_by_column(
         r_mapping = {**r_public_mapping, **r_hidden_mapping}
 
         left_table = left._to_ibis_expr(
-            "unordered",
+            ordering_mode="unordered",
             expose_hidden_cols=True,
             col_id_overrides=l_mapping,
         )
         right_table = right._to_ibis_expr(
-            "unordered",
+            ordering_mode="unordered",
             expose_hidden_cols=True,
             col_id_overrides=r_mapping,
         )
@@ -134,7 +134,7 @@ def join_by_column(
                 for col in right._hidden_ordering_columns
             ],
         ]
-        return compiled.CompiledArrayValue(
+        return compiled.OrderedIR(
             combined_table,
             columns=columns,
             hidden_ordering_columns=hidden_ordering_columns,
@@ -142,6 +142,87 @@ def join_by_column(
         )
 
 
+def join_by_column_unordered(
+    left: compiled.UnorderedIR,
+    left_column_ids: typing.Sequence[str],
+    right: compiled.UnorderedIR,
+    right_column_ids: typing.Sequence[str],
+    *,
+    how: Literal[
+        "inner",
+        "left",
+        "outer",
+        "right",
+    ],
+    allow_row_identity_join: bool = True,
+) -> compiled.UnorderedIR:
+    """Join two expressions by column equality.
+
+    Arguments:
+        left: Expression for left table to join.
+        left_column_ids: Column IDs (not label) to join by.
+        right: Expression for right table to join.
+        right_column_ids: Column IDs (not label) to join by.
+        how: The type of join to perform.
+        allow_row_identity_join (bool):
+            If True, allow matching by row identity. Set to False to always
+            perform a true JOIN in generated SQL.
+    Returns:
+        The joined expression. The resulting columns will be, in order,
+        first the coalesced join keys, then, all the left columns, and
+        finally, all the right columns.
+    """
+    if (
+        allow_row_identity_join
+        and how in bigframes.core.compile.row_identity.SUPPORTED_ROW_IDENTITY_HOW
+        and left._table.equals(right._table)
+        # Make sure we're joining on exactly the same column(s), at least with
+        # regards to value its possible that they both have the same names but
+        # were modified in different ways. Ignore differences in the names.
+        and all(
+            left._get_ibis_column(lcol)
+            .name("index")
+            .equals(right._get_ibis_column(rcol).name("index"))
+            for lcol, rcol in zip(left_column_ids, right_column_ids)
+        )
+    ):
+        return bigframes.core.compile.row_identity.join_by_row_identity_unordered(
+            left, right, how=how
+        )
+    else:
+        # Value column mapping must use JOIN_NAME_REMAPPER to stay in sync with consumers of join result
+        l_mapping, r_mapping = joining.JOIN_NAME_REMAPPER(
+            left.column_ids, right.column_ids
+        )
+        left_table = left._to_ibis_expr(
+            col_id_overrides=l_mapping,
+        )
+        right_table = right._to_ibis_expr(
+            col_id_overrides=r_mapping,
+        )
+        join_conditions = [
+            value_to_join_key(left_table[l_mapping[left_index]])
+            == value_to_join_key(right_table[r_mapping[right_index]])
+            for left_index, right_index in zip(left_column_ids, right_column_ids)
+        ]
+
+        combined_table = ibis.join(
+            left_table,
+            right_table,
+            predicates=join_conditions,
+            how=how,
+        )
+        # We could filter out the original join columns, but predicates/ordering
+        # might still reference them in implicit joins.
+        columns = [
+            combined_table[l_mapping[col.get_name()]] for col in left.columns
+        ] + [combined_table[r_mapping[col.get_name()]] for col in right.columns]
+        return compiled.UnorderedIR(
+            combined_table,
+            columns=columns,
+        )
+
+
 def value_to_join_key(value: ibis_types.Value):
     """Converts nullable values to non-null string SQL will not match null keys together - but pandas does."""
     if not value.type().is_string():
diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py
index 01117d3e0a..db68033c51 100644
--- a/bigframes/dataframe.py
+++ b/bigframes/dataframe.py
@@ -857,6 +857,8 @@ def to_pandas(
         max_download_size: Optional[int] = None,
         sampling_method: Optional[str] = None,
         random_state: Optional[int] = None,
+        *,
+        ordered: bool = True,
     ) -> pandas.DataFrame:
         """Write DataFrame to pandas DataFrame.
 
@@ -876,6 +878,9 @@ def to_pandas(
                 The seed for the uniform downsampling algorithm. If provided, the uniform method may
                 take longer to execute and require more computation. If set to a value other than
                 None, this will supersede the global config.
+            ordered (bool, default True):
+                Determines whether the resulting dataframe will be sorted. In some cases,
+                unordered may result in a faster-executing query.
 
         Returns:
             pandas.DataFrame: A pandas DataFrame with all rows and columns of this DataFrame if the
@@ -887,6 +892,7 @@ def to_pandas(
             max_download_size=max_download_size,
             sampling_method=sampling_method,
             random_state=random_state,
+            ordered=ordered,
         )
         self._set_internal_query_job(query_job)
         return df.set_axis(self._block.column_labels, axis=1, copy=False)
diff --git a/bigframes/session/__init__.py b/bigframes/session/__init__.py
index 473de62f53..8661678588 100644
--- a/bigframes/session/__init__.py
+++ b/bigframes/session/__init__.py
@@ -599,10 +599,8 @@ def _read_gbq_table(
             is_total_ordering = True
             ordering = orderings.ExpressionOrdering(
                 ordering_value_columns=tuple(
-                    [
-                        core.OrderingColumnReference(column_id)
-                        for column_id in total_ordering_cols
-                    ]
+                    core.OrderingColumnReference(column_id)
+                    for column_id in total_ordering_cols
                 ),
                 total_ordering_columns=frozenset(total_ordering_cols),
             )
diff --git a/tests/system/large/ml/test_cluster.py b/tests/system/large/ml/test_cluster.py
index eae6896669..cef167d4ac 100644
--- a/tests/system/large/ml/test_cluster.py
+++ b/tests/system/large/ml/test_cluster.py
@@ -16,7 +16,7 @@
 import pytest
 
 from bigframes.ml import cluster
-from tests.system.utils import assert_pandas_df_equal_ignore_ordering
+from tests.system.utils import assert_pandas_df_equal
 
 
 @pytest.mark.flaky(retries=2, delay=120)
@@ -105,7 +105,7 @@ def test_cluster_configure_fit_score_predict(
         index=pd.Index(["test1", "test2", "test3", "test4"], dtype="string[pyarrow]"),
     )
     expected.index.name = "observation"
-    assert_pandas_df_equal_ignore_ordering(result, expected)
+    assert_pandas_df_equal(result, expected)
 
     # save, load, check n_clusters to ensure configuration was kept
     reloaded_model = model.to_gbq(
diff --git a/tests/system/large/ml/test_pipeline.py b/tests/system/large/ml/test_pipeline.py
index 6874a9f301..3197320047 100644
--- a/tests/system/large/ml/test_pipeline.py
+++ b/tests/system/large/ml/test_pipeline.py
@@ -24,7 +24,7 @@
     pipeline,
     preprocessing,
 )
-from tests.system.utils import assert_pandas_df_equal_ignore_ordering
+from tests.system.utils import assert_pandas_df_equal
 
 
 def test_pipeline_linear_regression_fit_score_predict(
@@ -555,7 +555,7 @@ def test_pipeline_standard_scaler_kmeans_fit_score_predict(
         ),
     )
     expected.index.name = "observation"
-    assert_pandas_df_equal_ignore_ordering(result, expected)
+    assert_pandas_df_equal(result, expected)
 
 
 def test_pipeline_columntransformer_fit_predict(session, penguins_df_default_index):
diff --git a/tests/system/large/test_remote_function.py b/tests/system/large/test_remote_function.py
index 730a1dbde4..57188b0470 100644
--- a/tests/system/large/test_remote_function.py
+++ b/tests/system/large/test_remote_function.py
@@ -31,7 +31,7 @@
     get_cloud_function_name,
     get_remote_function_locations,
 )
-from tests.system.utils import assert_pandas_df_equal_ignore_ordering
+from tests.system.utils import assert_pandas_df_equal
 
 # Use this to control the number of cloud functions being deleted in a single
 # test session. This should help soften the spike of the number of mutations per
@@ -356,7 +356,7 @@ def square(x):
         pd_result_col = pd_result_col.astype(pandas.Int64Dtype())
         pd_result = pd_int64_col_filtered.to_frame().assign(result=pd_result_col)
 
-        assert_pandas_df_equal_ignore_ordering(bf_result, pd_result)
+        assert_pandas_df_equal(bf_result, pd_result)
     finally:
         # clean up the gcp assets created for the remote function
         cleanup_remote_function_assets(session.bqclient, functions_client, square)
@@ -400,7 +400,7 @@ def add_one(x):
         pd_result_col = pd_result_col.astype(pandas.Int64Dtype())
         pd_result = pd_int64_col_filtered.to_frame().assign(result=pd_result_col)
 
-        assert_pandas_df_equal_ignore_ordering(bf_result, pd_result)
+        assert_pandas_df_equal(bf_result, pd_result)
     finally:
         # clean up the gcp assets created for the remote function
         cleanup_remote_function_assets(
@@ -445,7 +445,7 @@ def square(x):
         pd_result_col = pd_result_col.astype(pandas.Int64Dtype())
         pd_result = pd_int64_col_filtered.to_frame().assign(result=pd_result_col)
 
-        assert_pandas_df_equal_ignore_ordering(bf_result, pd_result)
+        assert_pandas_df_equal(bf_result, pd_result)
     finally:
         # clean up the gcp assets created for the remote function
         cleanup_remote_function_assets(session.bqclient, functions_client, square)
@@ -496,7 +496,7 @@ def sign(num):
         pd_result_col = pd_result_col.astype(pandas.Int64Dtype())
         pd_result = pd_int64_col_filtered.to_frame().assign(result=pd_result_col)
 
-        assert_pandas_df_equal_ignore_ordering(bf_result, pd_result)
+        assert_pandas_df_equal(bf_result, pd_result)
     finally:
         # clean up the gcp assets created for the remote function
         cleanup_remote_function_assets(session.bqclient, functions_client, remote_sign)
@@ -541,7 +541,7 @@ def circumference(radius):
         pd_result_col = pd_result_col.astype(pandas.Float64Dtype())
         pd_result = pd_float64_col_filtered.to_frame().assign(result=pd_result_col)
 
-        assert_pandas_df_equal_ignore_ordering(bf_result, pd_result)
+        assert_pandas_df_equal(bf_result, pd_result)
     finally:
         # clean up the gcp assets created for the remote function
         cleanup_remote_function_assets(
@@ -590,7 +590,7 @@ def find_team(num):
         pd_result_col = pd_result_col.astype(pandas.StringDtype(storage="pyarrow"))
         pd_result = pd_float64_col_filtered.to_frame().assign(result=pd_result_col)
 
-        assert_pandas_df_equal_ignore_ordering(bf_result, pd_result)
+        assert_pandas_df_equal(bf_result, pd_result)
     finally:
         # clean up the gcp assets created for the remote function
         cleanup_remote_function_assets(
@@ -674,7 +674,7 @@ def inner_test():
             pd_result_col = pd_result_col.astype(pandas.Int64Dtype())
             pd_result = pd_int64_col_filtered.to_frame().assign(result=pd_result_col)
 
-            assert_pandas_df_equal_ignore_ordering(bf_result, pd_result)
+            assert_pandas_df_equal(bf_result, pd_result)
 
         # Test that the remote function works as expected
         inner_test()
@@ -764,7 +764,7 @@ def is_odd(num):
         pd_result_col = pd_int64_col.mask(is_odd)
         pd_result = pd_int64_col.to_frame().assign(result=pd_result_col)
 
-        assert_pandas_df_equal_ignore_ordering(bf_result, pd_result)
+        assert_pandas_df_equal(bf_result, pd_result)
     finally:
         # clean up the gcp assets created for the remote function
         cleanup_remote_function_assets(
@@ -807,7 +807,7 @@ def is_odd(num):
         pd_result_col = pd_int64_col[pd_int64_col.notnull()].mask(is_odd, -1)
         pd_result = pd_int64_col.to_frame().assign(result=pd_result_col)
 
-        assert_pandas_df_equal_ignore_ordering(bf_result, pd_result)
+        assert_pandas_df_equal(bf_result, pd_result)
     finally:
         # clean up the gcp assets created for the remote function
         cleanup_remote_function_assets(
@@ -851,7 +851,7 @@ def test_remote_udf_lambda(
         pd_result_col = pd_result_col.astype(pandas.Int64Dtype())
         pd_result = pd_int64_col_filtered.to_frame().assign(result=pd_result_col)
 
-        assert_pandas_df_equal_ignore_ordering(bf_result, pd_result)
+        assert_pandas_df_equal(bf_result, pd_result)
     finally:
         # clean up the gcp assets created for the remote function
         cleanup_remote_function_assets(
@@ -908,7 +908,7 @@ def square(x):
         pd_result_col = pd_result_col.astype(pandas.Int64Dtype())
         pd_result = pd_int64_col.to_frame().assign(result=pd_result_col)
 
-        assert_pandas_df_equal_ignore_ordering(bf_result, pd_result)
+        assert_pandas_df_equal(bf_result, pd_result)
     finally:
         # clean up the gcp assets created for the remote function
         cleanup_remote_function_assets(
@@ -953,7 +953,7 @@ def pd_np_foo(x):
         # comparing for the purpose of this test
         pd_result.result = pd_result.result.astype(pandas.Float64Dtype())
 
-        assert_pandas_df_equal_ignore_ordering(bf_result, pd_result)
+        assert_pandas_df_equal(bf_result, pd_result)
     finally:
         # clean up the gcp assets created for the remote function
         cleanup_remote_function_assets(
@@ -997,7 +997,7 @@ def test_internal(rf, udf):
             pd_result_col = pd_result_col.astype(pandas.Int64Dtype())
             pd_result = pd_int64_col.to_frame().assign(result=pd_result_col)
 
-            assert_pandas_df_equal_ignore_ordering(bf_result, pd_result)
+            assert_pandas_df_equal(bf_result, pd_result)
 
         # Create an explicit name for the remote function
         prefixer = test_utils.prefixer.Prefixer("foo", "")
diff --git a/tests/system/small/ml/test_cluster.py b/tests/system/small/ml/test_cluster.py
index d95a1e1bc2..a3af71892f 100644
--- a/tests/system/small/ml/test_cluster.py
+++ b/tests/system/small/ml/test_cluster.py
@@ -15,7 +15,7 @@
 import pandas as pd
 
 from bigframes.ml import cluster
-from tests.system.utils import assert_pandas_df_equal_ignore_ordering
+from tests.system.utils import assert_pandas_df_equal
 
 _PD_NEW_PENGUINS = pd.DataFrame.from_dict(
     {
@@ -68,7 +68,7 @@ def test_kmeans_predict(session, penguins_kmeans_model: cluster.KMeans):
         dtype="Int64",
         index=pd.Index(["test1", "test2", "test3", "test4"], dtype="string[pyarrow]"),
     )
-    assert_pandas_df_equal_ignore_ordering(result, expected)
+    assert_pandas_df_equal(result, expected)
 
 
 def test_kmeans_score(session, penguins_kmeans_model: cluster.KMeans):
diff --git a/tests/system/small/ml/test_core.py b/tests/system/small/ml/test_core.py
index f911dd7eeb..cb6507e4e3 100644
--- a/tests/system/small/ml/test_core.py
+++ b/tests/system/small/ml/test_core.py
@@ -225,7 +225,7 @@ def test_pca_model_principal_component_info(penguins_bqml_pca_model: core.BqmlMo
             "cumulative_explained_variance_ratio": [0.469357, 0.651283, 0.812383],
         },
     )
-    tests.system.utils.assert_pandas_df_equal_ignore_ordering(
+    tests.system.utils.assert_pandas_df_equal(
         result,
         expected,
         check_exact=False,
diff --git a/tests/system/small/ml/test_decomposition.py b/tests/system/small/ml/test_decomposition.py
index e31681f4a0..b46b3d103d 100644
--- a/tests/system/small/ml/test_decomposition.py
+++ b/tests/system/small/ml/test_decomposition.py
@@ -130,7 +130,7 @@ def test_pca_explained_variance_(penguins_pca_model: decomposition.PCA):
             "explained_variance": [3.278657, 1.270829, 1.125354],
         },
     )
-    tests.system.utils.assert_pandas_df_equal_ignore_ordering(
+    tests.system.utils.assert_pandas_df_equal(
         result,
         expected,
         check_exact=False,
@@ -149,7 +149,7 @@ def test_pca_explained_variance_ratio_(penguins_pca_model: decomposition.PCA):
             "explained_variance_ratio": [0.469357, 0.181926, 0.1611],
         },
     )
-    tests.system.utils.assert_pandas_df_equal_ignore_ordering(
+    tests.system.utils.assert_pandas_df_equal(
         result,
         expected,
         check_exact=False,
diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py
index e459e3bee3..9494723ef7 100644
--- a/tests/system/small/test_dataframe.py
+++ b/tests/system/small/test_dataframe.py
@@ -29,7 +29,7 @@
 import bigframes.dataframe as dataframe
 import bigframes.series as series
 from tests.system.utils import (
-    assert_pandas_df_equal_ignore_ordering,
+    assert_pandas_df_equal,
     assert_series_equal_ignoring_order,
 )
 
@@ -246,7 +246,7 @@ def test_drop_with_custom_column_labels(scalars_dfs):
     pd_result = scalars_pandas_df.rename(columns=rename_mapping).drop(
         columns=dropped_columns
     )
-    assert_pandas_df_equal_ignore_ordering(bf_result, pd_result)
+    assert_pandas_df_equal(bf_result, pd_result)
 
 
 def test_drop_index(scalars_dfs):
@@ -420,7 +420,7 @@ def test_filter_df(scalars_dfs):
     pd_bool_series = scalars_pandas_df["bool_col"]
     pd_result = scalars_pandas_df[pd_bool_series]
 
-    assert_pandas_df_equal_ignore_ordering(bf_result, pd_result)
+    assert_pandas_df_equal(bf_result, pd_result)
 
 
 def test_assign_new_column(scalars_dfs):
@@ -433,7 +433,7 @@ def test_assign_new_column(scalars_dfs):
     # Convert default pandas dtypes `int64` to match BigQuery DataFrames dtypes.
     pd_result["new_col"] = pd_result["new_col"].astype("Int64")
 
-    assert_pandas_df_equal_ignore_ordering(bf_result, pd_result)
+    assert_pandas_df_equal(bf_result, pd_result)
 
 
 def test_assign_new_column_w_loc(scalars_dfs):
@@ -564,7 +564,7 @@ def test_assign_existing_column(scalars_dfs):
     # Convert default pandas dtypes `int64` to match BigQuery DataFrames dtypes.
     pd_result["int64_col"] = pd_result["int64_col"].astype("Int64")
 
-    assert_pandas_df_equal_ignore_ordering(bf_result, pd_result)
+    assert_pandas_df_equal(bf_result, pd_result)
 
 
 def test_assign_series(scalars_dfs):
@@ -574,7 +574,7 @@ def test_assign_series(scalars_dfs):
     bf_result = df.to_pandas()
     pd_result = scalars_pandas_df.assign(new_col=scalars_pandas_df[column_name])
 
-    assert_pandas_df_equal_ignore_ordering(bf_result, pd_result)
+    assert_pandas_df_equal(bf_result, pd_result)
 
 
 def test_assign_series_overwrite(scalars_dfs):
@@ -586,7 +586,7 @@ def test_assign_series_overwrite(scalars_dfs):
         **{column_name: scalars_pandas_df[column_name] + 3}
     )
 
-    assert_pandas_df_equal_ignore_ordering(bf_result, pd_result)
+    assert_pandas_df_equal(bf_result, pd_result)
 
 
 def test_assign_sequential(scalars_dfs):
@@ -601,7 +601,7 @@ def test_assign_sequential(scalars_dfs):
     pd_result["new_col"] = pd_result["new_col"].astype("Int64")
     pd_result["new_col2"] = pd_result["new_col2"].astype("Int64")
 
-    assert_pandas_df_equal_ignore_ordering(bf_result, pd_result)
+    assert_pandas_df_equal(bf_result, pd_result)
 
 
 # Require an index so that the self-join is consistent each time.
@@ -635,7 +635,7 @@ def test_assign_different_df(
         new_col=scalars_pandas_df_index[column_name]
     )
 
-    assert_pandas_df_equal_ignore_ordering(bf_result, pd_result)
+    assert_pandas_df_equal(bf_result, pd_result)
 
 
 def test_assign_different_df_w_loc(
@@ -686,7 +686,7 @@ def test_assign_callable_lambda(scalars_dfs):
     # Convert default pandas dtypes `int64` to match BigQuery DataFrames dtypes.
     pd_result["new_col"] = pd_result["new_col"].astype("Int64")
 
-    assert_pandas_df_equal_ignore_ordering(bf_result, pd_result)
+    assert_pandas_df_equal(bf_result, pd_result)
 
 
 @pytest.mark.parametrize(
@@ -852,7 +852,7 @@ def test_df_merge(scalars_dfs, merge_how):
         sort=True,
     )
 
-    assert_pandas_df_equal_ignore_ordering(bf_result, pd_result)
+    assert_pandas_df_equal(bf_result, pd_result, ignore_order=True)
 
 
 @pytest.mark.parametrize(
@@ -885,7 +885,7 @@ def test_df_merge_multi_key(scalars_dfs, left_on, right_on):
         sort=True,
     )
 
-    assert_pandas_df_equal_ignore_ordering(bf_result, pd_result)
+    assert_pandas_df_equal(bf_result, pd_result, ignore_order=True)
 
 
 @pytest.mark.parametrize(
@@ -915,7 +915,7 @@ def test_merge_custom_col_name(scalars_dfs, merge_how):
     pandas_right_df = scalars_pandas_df[right_columns]
     pd_result = pandas_left_df.merge(pandas_right_df, merge_how, on, sort=True)
 
-    assert_pandas_df_equal_ignore_ordering(bf_result, pd_result)
+    assert_pandas_df_equal(bf_result, pd_result, ignore_order=True)
 
 
 @pytest.mark.parametrize(
@@ -948,7 +948,7 @@ def test_merge_left_on_right_on(scalars_dfs, merge_how):
         sort=True,
     )
 
-    assert_pandas_df_equal_ignore_ordering(bf_result, pd_result)
+    assert_pandas_df_equal(bf_result, pd_result, ignore_order=True)
 
 
 def test_get_dtypes(scalars_df_default_index):
@@ -1240,7 +1240,7 @@ def test_df_abs(scalars_dfs):
     bf_result = scalars_df[columns].abs().to_pandas()
     pd_result = scalars_pandas_df[columns].abs()
 
-    assert_pandas_df_equal_ignore_ordering(bf_result, pd_result)
+    assert_pandas_df_equal(bf_result, pd_result)
 
 
 def test_df_isnull(scalars_dfs):
@@ -1257,7 +1257,7 @@ def test_df_isnull(scalars_dfs):
     pd_result["string_col"] = pd_result["string_col"].astype(pd.BooleanDtype())
     pd_result["bool_col"] = pd_result["bool_col"].astype(pd.BooleanDtype())
 
-    assert_pandas_df_equal_ignore_ordering(bf_result, pd_result)
+    assert_pandas_df_equal(bf_result, pd_result)
 
 
 def test_df_notnull(scalars_dfs):
@@ -1274,7 +1274,7 @@ def test_df_notnull(scalars_dfs):
     pd_result["string_col"] = pd_result["string_col"].astype(pd.BooleanDtype())
     pd_result["bool_col"] = pd_result["bool_col"].astype(pd.BooleanDtype())
 
-    assert_pandas_df_equal_ignore_ordering(bf_result, pd_result)
+    assert_pandas_df_equal(bf_result, pd_result)
 
 
 @pytest.mark.parametrize(
@@ -1494,7 +1494,7 @@ def test_scalar_binop(scalars_dfs, op, other_scalar, reverse_operands):
     bf_result = maybe_reversed_op(scalars_df[columns], other_scalar).to_pandas()
     pd_result = maybe_reversed_op(scalars_pandas_df[columns], other_scalar)
 
-    assert_pandas_df_equal_ignore_ordering(bf_result, pd_result)
+    assert_pandas_df_equal(bf_result, pd_result)
 
 
 @pytest.mark.parametrize(("other_scalar"), [1, -2])
@@ -1506,7 +1506,7 @@ def test_mod(scalars_dfs, other_scalar):
     bf_result = (scalars_df[["int64_col", "int64_too"]] % other_scalar).to_pandas()
     pd_result = scalars_pandas_df[["int64_col", "int64_too"]] % other_scalar
 
-    assert_pandas_df_equal_ignore_ordering(bf_result, pd_result)
+    assert_pandas_df_equal(bf_result, pd_result)
 
 
 def test_scalar_binop_str_exception(scalars_dfs):
@@ -1562,7 +1562,7 @@ def test_series_binop_axis_index(
     bf_result = op(scalars_df[df_columns], scalars_df[series_column]).to_pandas()
     pd_result = op(scalars_pandas_df[df_columns], scalars_pandas_df[series_column])
 
-    assert_pandas_df_equal_ignore_ordering(bf_result, pd_result)
+    assert_pandas_df_equal(bf_result, pd_result)
 
 
 @pytest.mark.parametrize(
@@ -1623,7 +1623,7 @@ def test_series_binop_add_different_table(
         scalars_pandas_df_index[series_column], axis="index"
     )
 
-    assert_pandas_df_equal_ignore_ordering(bf_result, pd_result)
+    assert_pandas_df_equal(bf_result, pd_result)
 
 
 # TODO(garrettwu): Test series binop with different index
@@ -1649,7 +1649,7 @@ def test_join_same_table(scalars_dfs, how):
     pd_df_a = pd_df.set_index("int64_too")[["string_col", "int64_col"]]
     pd_df_b = pd_df.set_index("int64_too")[["float64_col"]]
     pd_result = pd_df_a.join(pd_df_b, how=how)
-    assert_pandas_df_equal_ignore_ordering(bf_result, pd_result)
+    assert_pandas_df_equal(bf_result, pd_result, ignore_order=True)
 
 
 @all_joins
@@ -1662,7 +1662,7 @@ def test_join_different_table(
     pd_df_a = scalars_pandas_df_index[["string_col", "int64_col"]]
     pd_df_b = scalars_pandas_df_index.dropna()[["float64_col"]]
     pd_result = pd_df_a.join(pd_df_b, how=how)
-    assert_pandas_df_equal_ignore_ordering(bf_result, pd_result)
+    assert_pandas_df_equal(bf_result, pd_result, ignore_order=True)
 
 
 def test_join_duplicate_columns_raises_not_implemented(scalars_dfs):
@@ -1686,7 +1686,7 @@ def test_join_param_on(scalars_dfs, how):
     pd_df_a = pd_df_a.assign(rowindex_2=pd_df_a["rowindex_2"] + 2)
     pd_df_b = pd_df[["float64_col"]]
     pd_result = pd_df_a.join(pd_df_b, on="rowindex_2", how=how)
-    assert_pandas_df_equal_ignore_ordering(bf_result, pd_result)
+    assert_pandas_df_equal(bf_result, pd_result, ignore_order=True)
 
 
 @pytest.mark.parametrize(
@@ -2553,7 +2553,7 @@ def test_df_rows_filter_items(scalars_df_index, scalars_pandas_df_index):
     # Pandas uses int64 instead of Int64 (nullable) dtype.
     pd_result.index = pd_result.index.astype(pd.Int64Dtype())
     # Ignore ordering as pandas order differently depending on version
-    assert_pandas_df_equal_ignore_ordering(
+    assert_pandas_df_equal(
         bf_result,
         pd_result,
         check_names=False,
diff --git a/tests/system/small/test_dataframe_io.py b/tests/system/small/test_dataframe_io.py
index 8f5d706f62..a235845937 100644
--- a/tests/system/small/test_dataframe_io.py
+++ b/tests/system/small/test_dataframe_io.py
@@ -19,10 +19,7 @@
 import pyarrow as pa
 import pytest
 
-from tests.system.utils import (
-    assert_pandas_df_equal_ignore_ordering,
-    convert_pandas_dtypes,
-)
+from tests.system.utils import assert_pandas_df_equal, convert_pandas_dtypes
 
 try:
     import pandas_gbq  # type: ignore
@@ -380,7 +377,7 @@ def test_to_sql_query_unnamed_index_included(
     pd_df = scalars_pandas_df_default_index.reset_index(drop=True)
     roundtrip = session.read_gbq(sql, index_col=idx_ids)
     roundtrip.index.names = [None]
-    assert_pandas_df_equal_ignore_ordering(roundtrip.to_pandas(), pd_df)
+    assert_pandas_df_equal(roundtrip.to_pandas(), pd_df)
 
 
 def test_to_sql_query_named_index_included(
@@ -397,7 +394,7 @@ def test_to_sql_query_named_index_included(
 
     pd_df = scalars_pandas_df_default_index.set_index("rowindex_2", drop=True)
     roundtrip = session.read_gbq(sql, index_col=idx_ids)
-    assert_pandas_df_equal_ignore_ordering(roundtrip.to_pandas(), pd_df)
+    assert_pandas_df_equal(roundtrip.to_pandas(), pd_df)
 
 
 def test_to_sql_query_unnamed_index_excluded(
@@ -412,7 +409,7 @@ def test_to_sql_query_unnamed_index_excluded(
 
     pd_df = scalars_pandas_df_default_index.reset_index(drop=True)
     roundtrip = session.read_gbq(sql)
-    assert_pandas_df_equal_ignore_ordering(roundtrip.to_pandas(), pd_df)
+    assert_pandas_df_equal(roundtrip.to_pandas(), pd_df)
 
 
 def test_to_sql_query_named_index_excluded(
@@ -429,4 +426,4 @@ def test_to_sql_query_named_index_excluded(
         "rowindex_2", drop=True
     ).reset_index(drop=True)
     roundtrip = session.read_gbq(sql)
-    assert_pandas_df_equal_ignore_ordering(roundtrip.to_pandas(), pd_df)
+    assert_pandas_df_equal(roundtrip.to_pandas(), pd_df)
diff --git a/tests/system/small/test_multiindex.py b/tests/system/small/test_multiindex.py
index a87dacae04..4eadb6fe86 100644
--- a/tests/system/small/test_multiindex.py
+++ b/tests/system/small/test_multiindex.py
@@ -16,7 +16,7 @@
 import pytest
 
 import bigframes.pandas as bpd
-from tests.system.utils import assert_pandas_df_equal_ignore_ordering
+from tests.system.utils import assert_pandas_df_equal
 
 
 # Row Multi-index tests
@@ -429,7 +429,7 @@ def test_multi_index_dataframe_join(scalars_dfs, how):
         (["bool_col", "rowindex_2"])
     )[["float64_col"]]
     pd_result = pd_df_a.join(pd_df_b, how=how)
-    assert_pandas_df_equal_ignore_ordering(bf_result, pd_result)
+    assert_pandas_df_equal(bf_result, pd_result, ignore_order=True)
 
 
 @all_joins
@@ -450,7 +450,7 @@ def test_multi_index_dataframe_join_on(scalars_dfs, how):
     pd_df_a = pd_df_a.assign(rowindex_2=pd_df_a["rowindex_2"] + 2)
     pd_df_b = pd_df[["float64_col"]]
     pd_result = pd_df_a.join(pd_df_b, on="rowindex_2", how=how)
-    assert_pandas_df_equal_ignore_ordering(bf_result, pd_result)
+    assert_pandas_df_equal(bf_result, pd_result, ignore_order=True)
 
 
 @pytest.mark.parametrize(
diff --git a/tests/system/small/test_pandas.py b/tests/system/small/test_pandas.py
index f8fa78587f..56bea42ad5 100644
--- a/tests/system/small/test_pandas.py
+++ b/tests/system/small/test_pandas.py
@@ -16,7 +16,7 @@
 import pytest
 
 import bigframes.pandas as bpd
-from tests.system.utils import assert_pandas_df_equal_ignore_ordering
+from tests.system.utils import assert_pandas_df_equal
 
 
 def test_concat_dataframe(scalars_dfs):
@@ -140,7 +140,7 @@ def test_merge(scalars_dfs, merge_how):
         sort=True,
     )
 
-    assert_pandas_df_equal_ignore_ordering(bf_result, pd_result)
+    assert_pandas_df_equal(bf_result, pd_result)
 
 
 @pytest.mark.parametrize(
@@ -174,7 +174,7 @@ def test_merge_left_on_right_on(scalars_dfs, merge_how):
         sort=True,
     )
 
-    assert_pandas_df_equal_ignore_ordering(bf_result, pd_result)
+    assert_pandas_df_equal(bf_result, pd_result)
 
 
 @pytest.mark.parametrize(
@@ -208,7 +208,7 @@ def test_merge_series(scalars_dfs, merge_how):
         sort=True,
     )
 
-    assert_pandas_df_equal_ignore_ordering(bf_result, pd_result)
+    assert_pandas_df_equal(bf_result, pd_result)
 
 
 def test_cut(scalars_dfs):
diff --git a/tests/system/small/test_remote_function.py b/tests/system/small/test_remote_function.py
index d024a57ded..853ddff78a 100644
--- a/tests/system/small/test_remote_function.py
+++ b/tests/system/small/test_remote_function.py
@@ -21,7 +21,7 @@
 import bigframes
 from bigframes import remote_function as rf
 import bigframes.pandas as bpd
-from tests.system.utils import assert_pandas_df_equal_ignore_ordering
+from tests.system.utils import assert_pandas_df_equal
 
 
 @pytest.fixture(scope="module")
@@ -155,7 +155,7 @@ def square(x):
     pd_result_col = pd_result_col.astype(pd.Int64Dtype())
     pd_result = pd_int64_col_filtered.to_frame().assign(result=pd_result_col)
 
-    assert_pandas_df_equal_ignore_ordering(bf_result, pd_result)
+    assert_pandas_df_equal(bf_result, pd_result)
 
 
 @pytest.mark.flaky(retries=2, delay=120)
@@ -204,7 +204,7 @@ def square(x):
     pd_result_col = pd_result_col.astype(pd.Int64Dtype())
     pd_result = pd_int64_col_filtered.to_frame().assign(result=pd_result_col)
 
-    assert_pandas_df_equal_ignore_ordering(bf_result, pd_result)
+    assert_pandas_df_equal(bf_result, pd_result)
 
 
 @pytest.mark.flaky(retries=2, delay=120)
@@ -280,7 +280,7 @@ def square(x):
     pd_result_col = pd_result_col.astype(pd.Int64Dtype())
     pd_result = pd_int64_col_filtered.to_frame().assign(result=pd_result_col)
 
-    assert_pandas_df_equal_ignore_ordering(bf_result, pd_result)
+    assert_pandas_df_equal(bf_result, pd_result)
 
 
 @pytest.mark.flaky(retries=2, delay=120)
@@ -341,7 +341,7 @@ def square(x):
     pd_result_col = pd_result_col.astype(pd.Int64Dtype())
     pd_result = pd_int64_col_filtered.to_frame().assign(result=pd_result_col)
 
-    assert_pandas_df_equal_ignore_ordering(bf_result, pd_result)
+    assert_pandas_df_equal(bf_result, pd_result)
 
 
 @pytest.mark.flaky(retries=2, delay=120)
@@ -378,7 +378,7 @@ def square(x):
     pd_result_col = pd_result_col.astype(pd.Int64Dtype())
     pd_result = pd_int64_col_filtered.to_frame().assign(result=pd_result_col)
 
-    assert_pandas_df_equal_ignore_ordering(bf_result, pd_result)
+    assert_pandas_df_equal(bf_result, pd_result)
 
 
 @pytest.mark.flaky(retries=2, delay=120)
@@ -417,7 +417,7 @@ def square(x):
     pd_result_col = pd_result_col.astype(pd.Int64Dtype())
     pd_result = pd_int64_col_filtered.to_frame().assign(result=pd_result_col)
 
-    assert_pandas_df_equal_ignore_ordering(bf_result, pd_result)
+    assert_pandas_df_equal(bf_result, pd_result)
 
 
 @pytest.mark.flaky(retries=2, delay=120)
@@ -463,7 +463,7 @@ def square(x):
     pd_result_col = pd_result_col.astype(pd.Int64Dtype())
     pd_result = pd_int64_col_filtered.to_frame().assign(result=pd_result_col)
 
-    assert_pandas_df_equal_ignore_ordering(bf_result, pd_result)
+    assert_pandas_df_equal(bf_result, pd_result)
 
 
 @pytest.mark.flaky(retries=2, delay=120)
@@ -493,7 +493,7 @@ def square(x):
     pd_result_col = pd_result_col.astype(pd.Int64Dtype())
     pd_result = pd_int64_col_filtered.to_frame().assign(result=pd_result_col)
 
-    assert_pandas_df_equal_ignore_ordering(bf_result, pd_result)
+    assert_pandas_df_equal(bf_result, pd_result)
 
 
 @pytest.mark.flaky(retries=2, delay=120)
@@ -520,7 +520,7 @@ def add_one(x):
     for col in pd_result:
         pd_result[col] = pd_result[col].astype(pd_int64_df_filtered[col].dtype)
 
-    assert_pandas_df_equal_ignore_ordering(bf_result, pd_result)
+    assert_pandas_df_equal(bf_result, pd_result)
 
 
 @pytest.mark.flaky(retries=2, delay=120)
@@ -545,7 +545,7 @@ def add_one(x):
     for col in pd_result:
         pd_result[col] = pd_result[col].astype(pd_int64_df[col].dtype)
 
-    assert_pandas_df_equal_ignore_ordering(bf_result, pd_result)
+    assert_pandas_df_equal(bf_result, pd_result)
 
 
 @pytest.mark.flaky(retries=2, delay=120)
@@ -631,7 +631,7 @@ def square1(x):
     s2_result_col = int64_col_filtered.apply(square2)
     s2_result = int64_col_filtered.to_frame().assign(result=s2_result_col)
 
-    assert_pandas_df_equal_ignore_ordering(s1_result.to_pandas(), s2_result.to_pandas())
+    assert_pandas_df_equal(s1_result.to_pandas(), s2_result.to_pandas())
 
 
 @pytest.mark.flaky(retries=2, delay=120)
@@ -679,7 +679,7 @@ def test_read_gbq_function_reads_udfs(bigquery_client, scalars_dfs, dataset_id):
         indirect_df = indirect_df.assign(y=indirect_df.x.apply(square))
         indirect_df = indirect_df.to_pandas()
 
-        assert_pandas_df_equal_ignore_ordering(direct_df, indirect_df)
+        assert_pandas_df_equal(direct_df, indirect_df)
 
 
 @pytest.mark.flaky(retries=2, delay=120)
diff --git a/tests/system/small/test_series.py b/tests/system/small/test_series.py
index 05d8b84185..5e494fbd21 100644
--- a/tests/system/small/test_series.py
+++ b/tests/system/small/test_series.py
@@ -25,7 +25,7 @@
 import bigframes.pandas
 import bigframes.series as series
 from tests.system.utils import (
-    assert_pandas_df_equal_ignore_ordering,
+    assert_pandas_df_equal,
     assert_series_equal_ignoring_order,
 )
 
@@ -2256,7 +2256,7 @@ def test_to_frame(scalars_dfs):
     bf_result = scalars_df["int64_col"].to_frame().to_pandas()
     pd_result = scalars_pandas_df["int64_col"].to_frame()
 
-    assert_pandas_df_equal_ignore_ordering(bf_result, pd_result)
+    assert_pandas_df_equal(bf_result, pd_result)
 
 
 def test_to_json(scalars_df_index, scalars_pandas_df_index):
@@ -2424,7 +2424,7 @@ def test_mask_default_value(scalars_dfs):
     pd_col_masked = pd_col.mask(pd_col % 2 == 1)
     pd_result = pd_col.to_frame().assign(int64_col_masked=pd_col_masked)
 
-    assert_pandas_df_equal_ignore_ordering(bf_result, pd_result)
+    assert_pandas_df_equal(bf_result, pd_result)
 
 
 def test_mask_custom_value(scalars_dfs):
@@ -2442,7 +2442,7 @@ def test_mask_custom_value(scalars_dfs):
     # odd so should be left as is, but it is being masked in pandas.
     # Accidentally the bigframes bahavior matches, but it should be updated
     # after the resolution of https://github.com/pandas-dev/pandas/issues/52955
-    assert_pandas_df_equal_ignore_ordering(bf_result, pd_result)
+    assert_pandas_df_equal(bf_result, pd_result)
 
 
 @pytest.mark.parametrize(
@@ -2538,7 +2538,7 @@ def test_loc_bool_series_default_index(
         scalars_pandas_df_default_index.bool_col
     ]
 
-    assert_pandas_df_equal_ignore_ordering(
+    assert_pandas_df_equal(
         bf_result.to_frame(),
         pd_result.to_frame(),
     )
diff --git a/tests/system/utils.py b/tests/system/utils.py
index e2daf3b8bf..c68acf10f3 100644
--- a/tests/system/utils.py
+++ b/tests/system/utils.py
@@ -21,18 +21,19 @@
 import pyarrow as pa  # type: ignore
 
 
-def assert_pandas_df_equal_ignore_ordering(df0, df1, **kwargs):
-    # Sort by a column to get consistent results.
-    if df0.index.name != "rowindex":
-        df0 = df0.sort_values(
-            list(df0.columns.drop("geography_col", errors="ignore"))
-        ).reset_index(drop=True)
-        df1 = df1.sort_values(
-            list(df1.columns.drop("geography_col", errors="ignore"))
-        ).reset_index(drop=True)
-    else:
-        df0 = df0.sort_index()
-        df1 = df1.sort_index()
+def assert_pandas_df_equal(df0, df1, ignore_order: bool = False, **kwargs):
+    if ignore_order:
+        # Sort by a column to get consistent results.
+        if df0.index.name != "rowindex":
+            df0 = df0.sort_values(
+                list(df0.columns.drop("geography_col", errors="ignore"))
+            ).reset_index(drop=True)
+            df1 = df1.sort_values(
+                list(df1.columns.drop("geography_col", errors="ignore"))
+            ).reset_index(drop=True)
+        else:
+            df0 = df0.sort_index()
+            df1 = df1.sort_index()
 
     pd.testing.assert_frame_equal(df0, df1, **kwargs)
 
diff --git a/tests/unit/test_core.py b/tests/unit/test_core.py
index d9672b2635..e7026ebd87 100644
--- a/tests/unit/test_core.py
+++ b/tests/unit/test_core.py
@@ -49,7 +49,7 @@ def test_arrayvalue_constructor_from_ibis_table_adds_all_columns():
         ordering=ordering,
         hidden_ordering_columns=(),
     )
-    assert actual.compile()._table is ibis_table
+    assert actual._compile()._table is ibis_table
     assert len(actual.column_ids) == 3
 
 
@@ -83,7 +83,7 @@ def test_arrayvalue_with_get_column():
         ),
         total_ordering_columns=["col1"],
     )
-    col1 = value.compile()._get_ibis_column("col1")
+    col1 = value._compile()._get_ibis_column("col1")
     assert isinstance(col1, ibis_types.Value)
     assert col1.get_name() == "col1"
     assert col1.type().is_int64()
@@ -100,7 +100,7 @@ def test_arrayvalues_to_ibis_expr_with_get_column():
         ),
         total_ordering_columns=["col1"],
     )
-    expr = value.compile()._get_ibis_column("col1")
+    expr = value._compile()._get_ibis_column("col1")
     assert expr.get_name() == "col1"
     assert expr.type().is_int64()
 
@@ -117,7 +117,7 @@ def test_arrayvalues_to_ibis_expr_with_concat():
         total_ordering_columns=["col1"],
     )
     expr = value.concat([value])
-    actual = expr.compile()._to_ibis_expr("unordered")
+    actual = expr._compile()._to_ibis_expr("unordered")
     assert len(actual.columns) == 3
     # TODO(ashleyxu, b/299631930): test out the union expression
     assert actual.columns[0] == "column_0"
@@ -136,8 +136,8 @@ def test_arrayvalues_to_ibis_expr_with_project_unary_op():
         ),
         total_ordering_columns=["col1"],
     )
-    expr = value.project_unary_op("col1", ops.AsTypeOp("string")).compile()
-    assert value.compile().columns[0].type().is_int64()
+    expr = value.project_unary_op("col1", ops.AsTypeOp("string"))._compile()
+    assert value._compile().columns[0].type().is_int64()
     assert expr.columns[0].type().is_string()
 
 
@@ -152,7 +152,7 @@ def test_arrayvalues_to_ibis_expr_with_project_binary_op():
         ),
         total_ordering_columns=["col1"],
     )
-    expr = value.project_binary_op("col2", "col3", ops.add_op, "col4").compile()
+    expr = value.project_binary_op("col2", "col3", ops.add_op, "col4")._compile()
     assert expr.columns[3].type().is_float64()
     actual = expr._to_ibis_expr("unordered")
     assert len(expr.columns) == 4
@@ -173,7 +173,7 @@ def test_arrayvalues_to_ibis_expr_with_project_ternary_op():
     )
     expr = value.project_ternary_op(
         "col2", "col3", "col4", ops.where_op, "col5"
-    ).compile()
+    )._compile()
     assert expr.columns[4].type().is_float64()
     actual = expr._to_ibis_expr("unordered")
     assert len(expr.columns) == 5
@@ -195,7 +195,7 @@ def test_arrayvalue_to_ibis_expr_with_aggregate():
         aggregations=(("col1", agg_ops.sum_op, "col4"),),
         by_column_ids=["col1"],
         dropna=False,
-    ).compile()
+    )._compile()
     actual = expr._to_ibis_expr("unordered")
     assert len(expr.columns) == 2
     assert actual.columns[0] == "col1"
@@ -214,7 +214,7 @@ def test_arrayvalue_to_ibis_expr_with_corr_aggregate():
         ),
         total_ordering_columns=["col1"],
     )
-    expr = value.corr_aggregate(corr_aggregations=[("col1", "col3", "col4")]).compile()
+    expr = value.corr_aggregate(corr_aggregations=[("col1", "col3", "col4")])._compile()
     actual = expr._to_ibis_expr("unordered")
     assert len(expr.columns) == 1
     assert actual.columns[0] == "col4"

From 5aa31372f9a2d22e4d4265f1e7b646b9239ed2b0 Mon Sep 17 00:00:00 2001
From: Trevor Bergeron <tbergeron@google.com>
Date: Sat, 28 Oct 2023 00:49:46 +0000
Subject: [PATCH 02/11] add tests

---
 bigframes/core/__init__.py                    |  6 ++
 bigframes/series.py                           |  3 +
 .../system/small/operations/test_datetimes.py | 22 +++---
 tests/system/small/operations/test_strings.py | 42 +++++------
 tests/system/small/test_dataframe.py          | 26 ++++---
 tests/system/small/test_series.py             | 73 +++++++++----------
 tests/system/utils.py                         | 17 +++--
 tests/unit/test_core.py                       | 10 +--
 8 files changed, 107 insertions(+), 92 deletions(-)

diff --git a/bigframes/core/__init__.py b/bigframes/core/__init__.py
index 7f2e231edb..d36a50ff37 100644
--- a/bigframes/core/__init__.py
+++ b/bigframes/core/__init__.py
@@ -219,6 +219,12 @@ def select_columns(self, column_ids: typing.Sequence[str]) -> ArrayValue:
             nodes.SelectNode(child=self.node, column_ids=tuple(column_ids))
         )
 
+    def concat(self, other: typing.Sequence[ArrayValue]) -> ArrayValue:
+        """Append together multiple ArrayValue objects."""
+        return ArrayValue(
+            nodes.ConcatNode(children=tuple([self.node, *[val.node for val in other]]))
+        )
+
     def project_unary_op(
         self, column_name: str, op: ops.UnaryOp, output_name=None
     ) -> ArrayValue:
diff --git a/bigframes/series.py b/bigframes/series.py
index 37d00d16f3..5b22756d19 100644
--- a/bigframes/series.py
+++ b/bigframes/series.py
@@ -265,6 +265,8 @@ def to_pandas(
         max_download_size: Optional[int] = None,
         sampling_method: Optional[str] = None,
         random_state: Optional[int] = None,
+        *,
+        ordered: bool = True,
     ) -> pandas.Series:
         """Writes Series to pandas Series.
 
@@ -294,6 +296,7 @@ def to_pandas(
             max_download_size=max_download_size,
             sampling_method=sampling_method,
             random_state=random_state,
+            ordered=ordered,
         )
         self._set_internal_query_job(query_job)
         series = df[self._value_column]
diff --git a/tests/system/small/operations/test_datetimes.py b/tests/system/small/operations/test_datetimes.py
index 7dc55b9367..177194c7a8 100644
--- a/tests/system/small/operations/test_datetimes.py
+++ b/tests/system/small/operations/test_datetimes.py
@@ -16,7 +16,7 @@
 import pytest
 
 import bigframes.series
-from tests.system.utils import assert_series_equal_ignoring_order
+from tests.system.utils import assert_series_equal
 
 DATETIME_COL_NAMES = [("datetime_col",), ("timestamp_col",)]
 
@@ -33,7 +33,7 @@ def test_day(scalars_dfs, col_name):
     bf_result = bf_series.dt.day.to_pandas()
     pd_result = scalars_pandas_df[col_name].dt.day
 
-    assert_series_equal_ignoring_order(
+    assert_series_equal(
         pd_result.astype(pd.Int64Dtype()),
         bf_result,
     )
@@ -51,7 +51,7 @@ def test_date(scalars_dfs, col_name):
     bf_result = bf_series.dt.date.to_pandas()
     pd_result = scalars_pandas_df[col_name].dt.date
 
-    assert_series_equal_ignoring_order(
+    assert_series_equal(
         pd_result,
         bf_result,
     )
@@ -69,7 +69,7 @@ def test_dayofweek(scalars_dfs, col_name):
     bf_result = bf_series.dt.dayofweek.to_pandas()
     pd_result = scalars_pandas_df[col_name].dt.dayofweek
 
-    assert_series_equal_ignoring_order(pd_result, bf_result, check_dtype=False)
+    assert_series_equal(pd_result, bf_result, check_dtype=False)
 
 
 @pytest.mark.parametrize(
@@ -84,7 +84,7 @@ def test_hour(scalars_dfs, col_name):
     bf_result = bf_series.dt.hour.to_pandas()
     pd_result = scalars_pandas_df[col_name].dt.hour
 
-    assert_series_equal_ignoring_order(
+    assert_series_equal(
         pd_result.astype(pd.Int64Dtype()),
         bf_result,
     )
@@ -102,7 +102,7 @@ def test_minute(scalars_dfs, col_name):
     bf_result = bf_series.dt.minute.to_pandas()
     pd_result = scalars_pandas_df[col_name].dt.minute
 
-    assert_series_equal_ignoring_order(
+    assert_series_equal(
         pd_result.astype(pd.Int64Dtype()),
         bf_result,
     )
@@ -120,7 +120,7 @@ def test_month(scalars_dfs, col_name):
     bf_result = bf_series.dt.month.to_pandas()
     pd_result = scalars_pandas_df[col_name].dt.month
 
-    assert_series_equal_ignoring_order(
+    assert_series_equal(
         pd_result.astype(pd.Int64Dtype()),
         bf_result,
     )
@@ -138,7 +138,7 @@ def test_quarter(scalars_dfs, col_name):
     bf_result = bf_series.dt.quarter.to_pandas()
     pd_result = scalars_pandas_df[col_name].dt.quarter
 
-    assert_series_equal_ignoring_order(
+    assert_series_equal(
         pd_result.astype(pd.Int64Dtype()),
         bf_result,
     )
@@ -156,7 +156,7 @@ def test_second(scalars_dfs, col_name):
     bf_result = bf_series.dt.second.to_pandas()
     pd_result = scalars_pandas_df[col_name].dt.second
 
-    assert_series_equal_ignoring_order(
+    assert_series_equal(
         pd_result.astype(pd.Int64Dtype()),
         bf_result,
     )
@@ -174,7 +174,7 @@ def test_time(scalars_dfs, col_name):
     bf_result = bf_series.dt.time.to_pandas()
     pd_result = scalars_pandas_df[col_name].dt.time
 
-    assert_series_equal_ignoring_order(
+    assert_series_equal(
         pd_result,
         bf_result,
     )
@@ -192,7 +192,7 @@ def test_year(scalars_dfs, col_name):
     bf_result = bf_series.dt.year.to_pandas()
     pd_result = scalars_pandas_df[col_name].dt.year
 
-    assert_series_equal_ignoring_order(
+    assert_series_equal(
         pd_result.astype(pd.Int64Dtype()),
         bf_result,
     )
diff --git a/tests/system/small/operations/test_strings.py b/tests/system/small/operations/test_strings.py
index 241cbd576b..27a35134d4 100644
--- a/tests/system/small/operations/test_strings.py
+++ b/tests/system/small/operations/test_strings.py
@@ -19,7 +19,7 @@
 
 import bigframes.series
 
-from ...utils import assert_series_equal_ignoring_order
+from ...utils import assert_series_equal
 
 
 def test_find(scalars_dfs):
@@ -31,7 +31,7 @@ def test_find(scalars_dfs):
 
     # One of type mismatches to be documented. Here, the `bf_result.dtype` is `Int64` but
     # the `pd_result.dtype` is `float64`: https://github.com/pandas-dev/pandas/issues/51948
-    assert_series_equal_ignoring_order(
+    assert_series_equal(
         pd_result.astype(pd.Int64Dtype()),
         bf_result,
     )
@@ -173,7 +173,7 @@ def test_len(scalars_dfs):
 
     # One of dtype mismatches to be documented. Here, the `bf_result.dtype` is `Int64` but
     # the `pd_result.dtype` is `float64`: https://github.com/pandas-dev/pandas/issues/51948
-    assert_series_equal_ignoring_order(
+    assert_series_equal(
         pd_result.astype(pd.Int64Dtype()),
         bf_result,
     )
@@ -186,7 +186,7 @@ def test_lower(scalars_dfs):
     bf_result = bf_series.str.lower().to_pandas()
     pd_result = scalars_pandas_df[col_name].str.lower()
 
-    assert_series_equal_ignoring_order(
+    assert_series_equal(
         pd_result,
         bf_result,
     )
@@ -205,7 +205,7 @@ def test_reverse(scalars_dfs):
         else:
             pd_result.loc[i] = cell[::-1]
 
-    assert_series_equal_ignoring_order(
+    assert_series_equal(
         pd_result,
         bf_result,
     )
@@ -222,7 +222,7 @@ def test_slice(scalars_dfs, start, stop):
     pd_series = scalars_pandas_df[col_name]
     pd_result = pd_series.str.slice(start, stop)
 
-    assert_series_equal_ignoring_order(
+    assert_series_equal(
         pd_result,
         bf_result,
     )
@@ -235,7 +235,7 @@ def test_strip(scalars_dfs):
     bf_result = bf_series.str.strip().to_pandas()
     pd_result = scalars_pandas_df[col_name].str.strip()
 
-    assert_series_equal_ignoring_order(
+    assert_series_equal(
         pd_result,
         bf_result,
     )
@@ -248,7 +248,7 @@ def test_upper(scalars_dfs):
     bf_result = bf_series.str.upper().to_pandas()
     pd_result = scalars_pandas_df[col_name].str.upper()
 
-    assert_series_equal_ignoring_order(
+    assert_series_equal(
         pd_result,
         bf_result,
     )
@@ -330,7 +330,7 @@ def test_islower(weird_strings, weird_strings_pd):
     pd_result = weird_strings_pd.str.islower()
     bf_result = weird_strings.str.islower().to_pandas()
 
-    assert_series_equal_ignoring_order(
+    assert_series_equal(
         bf_result,
         pd_result.astype(pd.BooleanDtype())
         # the dtype here is a case of intentional diversion from pandas
@@ -342,7 +342,7 @@ def test_isupper(weird_strings, weird_strings_pd):
     pd_result = weird_strings_pd.str.isupper()
     bf_result = weird_strings.str.isupper().to_pandas()
 
-    assert_series_equal_ignoring_order(
+    assert_series_equal(
         bf_result,
         pd_result.astype(pd.BooleanDtype())
         # the dtype here is a case of intentional diversion from pandas
@@ -357,7 +357,7 @@ def test_rstrip(scalars_dfs):
     bf_result = bf_series.str.rstrip().to_pandas()
     pd_result = scalars_pandas_df[col_name].str.rstrip()
 
-    assert_series_equal_ignoring_order(
+    assert_series_equal(
         pd_result,
         bf_result,
     )
@@ -370,7 +370,7 @@ def test_lstrip(scalars_dfs):
     bf_result = bf_series.str.lstrip().to_pandas()
     pd_result = scalars_pandas_df[col_name].str.lstrip()
 
-    assert_series_equal_ignoring_order(
+    assert_series_equal(
         pd_result,
         bf_result,
     )
@@ -384,7 +384,7 @@ def test_repeat(scalars_dfs, repeats):
     bf_result = bf_series.str.repeat(repeats).to_pandas()
     pd_result = scalars_pandas_df[col_name].str.repeat(repeats)
 
-    assert_series_equal_ignoring_order(
+    assert_series_equal(
         pd_result,
         bf_result,
     )
@@ -397,7 +397,7 @@ def test_capitalize(scalars_dfs):
     bf_result = bf_series.str.capitalize().to_pandas()
     pd_result = scalars_pandas_df[col_name].str.capitalize()
 
-    assert_series_equal_ignoring_order(
+    assert_series_equal(
         pd_result,
         bf_result,
     )
@@ -415,7 +415,7 @@ def test_cat_with_series(scalars_dfs):
     pd_right = scalars_pandas_df[col_name]
     pd_result = pd_left.str.cat(others=pd_right)
 
-    assert_series_equal_ignoring_order(
+    assert_series_equal(
         pd_result,
         bf_result,
     )
@@ -429,7 +429,7 @@ def test_str_match(scalars_dfs):
     bf_result = bf_series.str.match(pattern).to_pandas()
     pd_result = scalars_pandas_df[col_name].str.match(pattern)
 
-    assert_series_equal_ignoring_order(
+    assert_series_equal(
         pd_result,
         bf_result,
     )
@@ -443,7 +443,7 @@ def test_str_fullmatch(scalars_dfs):
     bf_result = bf_series.str.fullmatch(pattern).to_pandas()
     pd_result = scalars_pandas_df[col_name].str.fullmatch(pattern)
 
-    assert_series_equal_ignoring_order(
+    assert_series_equal(
         pd_result,
         bf_result,
     )
@@ -456,7 +456,7 @@ def test_str_get(scalars_dfs):
     bf_result = bf_series.str.get(8).to_pandas()
     pd_result = scalars_pandas_df[col_name].str.get(8)
 
-    assert_series_equal_ignoring_order(
+    assert_series_equal(
         pd_result,
         bf_result,
     )
@@ -469,7 +469,7 @@ def test_str_pad(scalars_dfs):
     bf_result = bf_series.str.pad(8, side="both", fillchar="%").to_pandas()
     pd_result = scalars_pandas_df[col_name].str.pad(8, side="both", fillchar="%")
 
-    assert_series_equal_ignoring_order(
+    assert_series_equal(
         pd_result,
         bf_result,
     )
@@ -492,7 +492,7 @@ def test_str_ljust(scalars_dfs):
     bf_result = bf_series.str.ljust(7, fillchar="%").to_pandas()
     pd_result = scalars_pandas_df[col_name].str.ljust(7, fillchar="%")
 
-    assert_series_equal_ignoring_order(
+    assert_series_equal(
         pd_result,
         bf_result,
     )
@@ -505,7 +505,7 @@ def test_str_rjust(scalars_dfs):
     bf_result = bf_series.str.rjust(9, fillchar="%").to_pandas()
     pd_result = scalars_pandas_df[col_name].str.rjust(9, fillchar="%")
 
-    assert_series_equal_ignoring_order(
+    assert_series_equal(
         pd_result,
         bf_result,
     )
diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py
index 9494723ef7..3adda34c40 100644
--- a/tests/system/small/test_dataframe.py
+++ b/tests/system/small/test_dataframe.py
@@ -28,10 +28,7 @@
 import bigframes._config.display_options as display_options
 import bigframes.dataframe as dataframe
 import bigframes.series as series
-from tests.system.utils import (
-    assert_pandas_df_equal,
-    assert_series_equal_ignoring_order,
-)
+from tests.system.utils import assert_pandas_df_equal, assert_series_equal
 
 
 def test_df_construct_copy(scalars_dfs):
@@ -98,7 +95,7 @@ def test_get_column(scalars_dfs):
     series = scalars_df[col_name]
     bf_result = series.to_pandas()
     pd_result = scalars_pandas_df[col_name]
-    assert_series_equal_ignoring_order(bf_result, pd_result)
+    assert_series_equal(bf_result, pd_result)
 
 
 def test_get_column_nonstring(scalars_dfs):
@@ -106,7 +103,7 @@ def test_get_column_nonstring(scalars_dfs):
     series = scalars_df.rename(columns={"int64_col": 123.1})[123.1]
     bf_result = series.to_pandas()
     pd_result = scalars_pandas_df.rename(columns={"int64_col": 123.1})[123.1]
-    assert_series_equal_ignoring_order(bf_result, pd_result)
+    assert_series_equal(bf_result, pd_result)
 
 
 def test_hasattr(scalars_dfs):
@@ -183,7 +180,7 @@ def test_get_column_by_attr(scalars_dfs):
     series = scalars_df.int64_col
     bf_result = series.to_pandas()
     pd_result = scalars_pandas_df.int64_col
-    assert_series_equal_ignoring_order(bf_result, pd_result)
+    assert_series_equal(bf_result, pd_result)
 
 
 def test_get_columns(scalars_dfs):
@@ -2279,6 +2276,13 @@ def test_loc_setitem_bool_series_scalar_type_error(scalars_dfs):
         pd_df.loc[pd_df["int64_too"] == 1, "string_col"] = 99
 
 
+@pytest.mark.parametrize(
+    ("ordered"),
+    [
+        (True),
+        (False),
+    ],
+)
 @pytest.mark.parametrize(
     ("op"),
     [
@@ -2293,16 +2297,18 @@ def test_loc_setitem_bool_series_scalar_type_error(scalars_dfs):
     ],
     ids=["sum", "mean", "min", "max", "std", "var", "count", "nunique"],
 )
-def test_dataframe_aggregates(scalars_df_index, scalars_pandas_df_index, op):
+def test_dataframe_aggregates(scalars_df_index, scalars_pandas_df_index, op, ordered):
     col_names = ["int64_too", "float64_col", "string_col", "int64_col", "bool_col"]
     bf_series = op(scalars_df_index[col_names])
     pd_series = op(scalars_pandas_df_index[col_names])
-    bf_result = bf_series.to_pandas()
+    bf_result = bf_series.to_pandas(ordered=ordered)
 
     # Pandas may produce narrower numeric types, but bigframes always produces Float64
     pd_series = pd_series.astype("Float64")
     # Pandas has object index type
-    pd.testing.assert_series_equal(pd_series, bf_result, check_index_type=False)
+    assert_series_equal(
+        pd_series, bf_result, check_index_type=False, ignore_order=not ordered
+    )
 
 
 @pytest.mark.parametrize(
diff --git a/tests/system/small/test_series.py b/tests/system/small/test_series.py
index 5e494fbd21..d7578bc985 100644
--- a/tests/system/small/test_series.py
+++ b/tests/system/small/test_series.py
@@ -24,10 +24,7 @@
 
 import bigframes.pandas
 import bigframes.series as series
-from tests.system.utils import (
-    assert_pandas_df_equal,
-    assert_series_equal_ignoring_order,
-)
+from tests.system.utils import assert_pandas_df_equal, assert_series_equal
 
 
 def test_series_construct_copy(scalars_dfs):
@@ -210,7 +207,7 @@ def test_abs(scalars_dfs, col_name):
     bf_result = scalars_df[col_name].abs().to_pandas()
     pd_result = scalars_pandas_df[col_name].abs()
 
-    assert_series_equal_ignoring_order(pd_result, bf_result)
+    assert_series_equal(pd_result, bf_result)
 
 
 def test_fillna(scalars_dfs):
@@ -218,7 +215,7 @@ def test_fillna(scalars_dfs):
     col_name = "string_col"
     bf_result = scalars_df[col_name].fillna("Missing").to_pandas()
     pd_result = scalars_pandas_df[col_name].fillna("Missing")
-    assert_series_equal_ignoring_order(
+    assert_series_equal(
         pd_result,
         bf_result,
     )
@@ -465,7 +462,7 @@ def test_series_int_int_operators_scalar(
     bf_result = maybe_reversed_op(scalars_df["int64_col"], other_scalar).to_pandas()
     pd_result = maybe_reversed_op(scalars_pandas_df["int64_col"], other_scalar)
 
-    assert_series_equal_ignoring_order(pd_result, bf_result)
+    assert_series_equal(pd_result, bf_result)
 
 
 def test_series_pow_scalar(scalars_dfs):
@@ -474,7 +471,7 @@ def test_series_pow_scalar(scalars_dfs):
     bf_result = (scalars_df["int64_col"] ** 2).to_pandas()
     pd_result = scalars_pandas_df["int64_col"] ** 2
 
-    assert_series_equal_ignoring_order(pd_result, bf_result)
+    assert_series_equal(pd_result, bf_result)
 
 
 def test_series_pow_scalar_reverse(scalars_dfs):
@@ -483,7 +480,7 @@ def test_series_pow_scalar_reverse(scalars_dfs):
     bf_result = (0.8 ** scalars_df["int64_col"]).to_pandas()
     pd_result = 0.8 ** scalars_pandas_df["int64_col"]
 
-    assert_series_equal_ignoring_order(pd_result, bf_result)
+    assert_series_equal(pd_result, bf_result)
 
 
 @pytest.mark.parametrize(
@@ -509,7 +506,7 @@ def test_series_bool_bool_operators_scalar(
     bf_result = maybe_reversed_op(scalars_df["bool_col"], other_scalar).to_pandas()
     pd_result = maybe_reversed_op(scalars_pandas_df["bool_col"], other_scalar)
 
-    assert_series_equal_ignoring_order(pd_result.astype(pd.BooleanDtype()), bf_result)
+    assert_series_equal(pd_result.astype(pd.BooleanDtype()), bf_result)
 
 
 @pytest.mark.parametrize(
@@ -547,7 +544,7 @@ def test_series_int_int_operators_series(scalars_dfs, operator):
     scalars_df, scalars_pandas_df = scalars_dfs
     bf_result = operator(scalars_df["int64_col"], scalars_df["int64_too"]).to_pandas()
     pd_result = operator(scalars_pandas_df["int64_col"], scalars_pandas_df["int64_too"])
-    assert_series_equal_ignoring_order(pd_result, bf_result)
+    assert_series_equal(pd_result, bf_result)
 
 
 @pytest.mark.parametrize(
@@ -697,7 +694,7 @@ def test_series_add_scalar(scalars_dfs, other):
     bf_result = (scalars_df["float64_col"] + other).to_pandas()
     pd_result = scalars_pandas_df["float64_col"] + other
 
-    assert_series_equal_ignoring_order(pd_result, bf_result)
+    assert_series_equal(pd_result, bf_result)
 
 
 @pytest.mark.parametrize(
@@ -713,7 +710,7 @@ def test_series_add_bigframes_series(scalars_dfs, left_col, right_col):
     bf_result = (scalars_df[left_col] + scalars_df[right_col]).to_pandas()
     pd_result = scalars_pandas_df[left_col] + scalars_pandas_df[right_col]
 
-    assert_series_equal_ignoring_order(pd_result, bf_result)
+    assert_series_equal(pd_result, bf_result)
 
 
 @pytest.mark.parametrize(
@@ -735,7 +732,7 @@ def test_series_add_bigframes_series_nested(
         scalars_pandas_df[left_col] + scalars_pandas_df[right_col]
     ) + scalars_pandas_df[righter_col]
 
-    assert_series_equal_ignoring_order(pd_result, bf_result)
+    assert_series_equal(pd_result, bf_result)
 
 
 def test_series_add_different_table_default_index(
@@ -893,7 +890,7 @@ def test_isnull(scalars_dfs):
 
     # One of dtype mismatches to be documented. Here, the `bf_series.dtype` is `BooleanDtype` but
     # the `pd_series.dtype` is `bool`.
-    assert_series_equal_ignoring_order(pd_series.astype(pd.BooleanDtype()), bf_series)
+    assert_series_equal(pd_series.astype(pd.BooleanDtype()), bf_series)
 
 
 def test_notnull(scalars_dfs):
@@ -904,7 +901,7 @@ def test_notnull(scalars_dfs):
 
     # One of dtype mismatches to be documented. Here, the `bf_series.dtype` is `BooleanDtype` but
     # the `pd_series.dtype` is `bool`.
-    assert_series_equal_ignoring_order(pd_series.astype(pd.BooleanDtype()), bf_series)
+    assert_series_equal(pd_series.astype(pd.BooleanDtype()), bf_series)
 
 
 def test_round(scalars_dfs):
@@ -913,7 +910,7 @@ def test_round(scalars_dfs):
     bf_result = scalars_df[col_name].round().to_pandas()
     pd_result = scalars_pandas_df[col_name].round()
 
-    assert_series_equal_ignoring_order(pd_result, bf_result)
+    assert_series_equal(pd_result, bf_result)
 
 
 def test_eq_scalar(scalars_dfs):
@@ -922,7 +919,7 @@ def test_eq_scalar(scalars_dfs):
     bf_result = scalars_df[col_name].eq(0).to_pandas()
     pd_result = scalars_pandas_df[col_name].eq(0)
 
-    assert_series_equal_ignoring_order(pd_result, bf_result)
+    assert_series_equal(pd_result, bf_result)
 
 
 def test_eq_wider_type_scalar(scalars_dfs):
@@ -931,7 +928,7 @@ def test_eq_wider_type_scalar(scalars_dfs):
     bf_result = scalars_df[col_name].eq(1.0).to_pandas()
     pd_result = scalars_pandas_df[col_name].eq(1.0)
 
-    assert_series_equal_ignoring_order(pd_result, bf_result)
+    assert_series_equal(pd_result, bf_result)
 
 
 def test_ne_scalar(scalars_dfs):
@@ -940,7 +937,7 @@ def test_ne_scalar(scalars_dfs):
     bf_result = (scalars_df[col_name] != 0).to_pandas()
     pd_result = scalars_pandas_df[col_name] != 0
 
-    assert_series_equal_ignoring_order(pd_result, bf_result)
+    assert_series_equal(pd_result, bf_result)
 
 
 def test_eq_int_scalar(scalars_dfs):
@@ -949,7 +946,7 @@ def test_eq_int_scalar(scalars_dfs):
     bf_result = (scalars_df[col_name] == 0).to_pandas()
     pd_result = scalars_pandas_df[col_name] == 0
 
-    assert_series_equal_ignoring_order(pd_result, bf_result)
+    assert_series_equal(pd_result, bf_result)
 
 
 @pytest.mark.parametrize(
@@ -968,7 +965,7 @@ def test_eq_same_type_series(scalars_dfs, col_name):
 
     # One of dtype mismatches to be documented. Here, the `bf_series.dtype` is `BooleanDtype` but
     # the `pd_series.dtype` is `bool`.
-    assert_series_equal_ignoring_order(pd_result.astype(pd.BooleanDtype()), bf_result)
+    assert_series_equal(pd_result.astype(pd.BooleanDtype()), bf_result)
 
 
 def test_loc_setitem_cell(scalars_df_index, scalars_pandas_df_index):
@@ -994,7 +991,7 @@ def test_ne_obj_series(scalars_dfs):
 
     # One of dtype mismatches to be documented. Here, the `bf_series.dtype` is `BooleanDtype` but
     # the `pd_series.dtype` is `bool`.
-    assert_series_equal_ignoring_order(pd_result.astype(pd.BooleanDtype()), bf_result)
+    assert_series_equal(pd_result.astype(pd.BooleanDtype()), bf_result)
 
 
 def test_indexing_using_unselected_series(scalars_dfs):
@@ -1003,7 +1000,7 @@ def test_indexing_using_unselected_series(scalars_dfs):
     bf_result = scalars_df[col_name][scalars_df["int64_too"].eq(0)].to_pandas()
     pd_result = scalars_pandas_df[col_name][scalars_pandas_df["int64_too"].eq(0)]
 
-    assert_series_equal_ignoring_order(
+    assert_series_equal(
         pd_result,
         bf_result,
     )
@@ -1019,7 +1016,7 @@ def test_indexing_using_selected_series(scalars_dfs):
         scalars_pandas_df["string_col"].eq("Hello, World!")
     ]
 
-    assert_series_equal_ignoring_order(
+    assert_series_equal(
         pd_result,
         bf_result,
     )
@@ -1041,7 +1038,7 @@ def test_nested_filter(scalars_dfs):
     )  # Convert from nullable bool to nonnullable bool usable as indexer
     pd_result = pd_string_col[pd_int64_too == 0][~pd_bool_col]
 
-    assert_series_equal_ignoring_order(
+    assert_series_equal(
         pd_result,
         bf_result,
     )
@@ -1060,7 +1057,7 @@ def test_binop_repeated_application_does_row_identity_joins(scalars_dfs):
 
     bf_result = bf_series.to_pandas()
     pd_result = pd_series
-    assert_series_equal_ignoring_order(
+    assert_series_equal(
         bf_result,
         pd_result,
     )
@@ -1082,7 +1079,7 @@ def test_binop_opposite_filters(scalars_dfs):
     pd_bool_col = scalars_pandas_df["bool_col"]
     pd_result = pd_int64_col1[pd_bool_col] + pd_int64_col2[pd_bool_col.__invert__()]
 
-    assert_series_equal_ignoring_order(
+    assert_series_equal(
         bf_result,
         pd_result,
     )
@@ -1100,7 +1097,7 @@ def test_binop_left_filtered(scalars_dfs):
     pd_bool_col = scalars_pandas_df["bool_col"]
     pd_result = pd_int64_col[pd_bool_col] + pd_float64_col
 
-    assert_series_equal_ignoring_order(
+    assert_series_equal(
         bf_result,
         pd_result,
     )
@@ -1118,7 +1115,7 @@ def test_binop_right_filtered(scalars_dfs):
     pd_bool_col = scalars_pandas_df["bool_col"]
     pd_result = pd_float64_col + pd_int64_col[pd_bool_col]
 
-    assert_series_equal_ignoring_order(
+    assert_series_equal(
         bf_result,
         pd_result,
     )
@@ -1223,7 +1220,7 @@ def test_groupby_sum(scalars_dfs):
     )
     # TODO(swast): Update groupby to use index based on group by key(s).
     bf_result = bf_series.to_pandas()
-    assert_series_equal_ignoring_order(
+    assert_series_equal(
         pd_series,
         bf_result,
         check_exact=False,
@@ -1241,7 +1238,7 @@ def test_groupby_std(scalars_dfs):
         .astype(pd.Float64Dtype())
     )
     bf_result = bf_series.to_pandas()
-    assert_series_equal_ignoring_order(
+    assert_series_equal(
         pd_series,
         bf_result,
         check_exact=False,
@@ -1256,7 +1253,7 @@ def test_groupby_var(scalars_dfs):
         scalars_pandas_df[col_name].groupby(scalars_pandas_df["string_col"]).var()
     )
     bf_result = bf_series.to_pandas()
-    assert_series_equal_ignoring_order(
+    assert_series_equal(
         pd_series,
         bf_result,
         check_exact=False,
@@ -1308,7 +1305,7 @@ def test_groupby_mean(scalars_dfs):
     )
     # TODO(swast): Update groupby to use index based on group by key(s).
     bf_result = bf_series.to_pandas()
-    assert_series_equal_ignoring_order(
+    assert_series_equal(
         pd_series,
         bf_result,
     )
@@ -1346,7 +1343,7 @@ def test_groupby_prod(scalars_dfs):
     )
     # TODO(swast): Update groupby to use index based on group by key(s).
     bf_result = bf_series.to_pandas()
-    assert_series_equal_ignoring_order(
+    assert_series_equal(
         pd_series,
         bf_result,
     )
@@ -1556,7 +1553,7 @@ def test_head(scalars_dfs):
     bf_result = scalars_df["string_col"].head(2).to_pandas()
     pd_result = scalars_pandas_df["string_col"].head(2)
 
-    assert_series_equal_ignoring_order(
+    assert_series_equal(
         pd_result,
         bf_result,
     )
@@ -1571,7 +1568,7 @@ def test_tail(scalars_dfs):
     bf_result = scalars_df["string_col"].tail(2).to_pandas()
     pd_result = scalars_pandas_df["string_col"].tail(2)
 
-    assert_series_equal_ignoring_order(
+    assert_series_equal(
         pd_result,
         bf_result,
     )
@@ -2039,7 +2036,7 @@ def test_series_filter_items(scalars_df_index, scalars_pandas_df_index):
     # Pandas uses int64 instead of Int64 (nullable) dtype.
     pd_result.index = pd_result.index.astype(pd.Int64Dtype())
     # Ignore ordering as pandas order differently depending on version
-    assert_series_equal_ignoring_order(
+    assert_series_equal(
         bf_result,
         pd_result,
         check_names=False,
diff --git a/tests/system/utils.py b/tests/system/utils.py
index c68acf10f3..f7831972b8 100644
--- a/tests/system/utils.py
+++ b/tests/system/utils.py
@@ -38,13 +38,16 @@ def assert_pandas_df_equal(df0, df1, ignore_order: bool = False, **kwargs):
     pd.testing.assert_frame_equal(df0, df1, **kwargs)
 
 
-def assert_series_equal_ignoring_order(left: pd.Series, right: pd.Series, **kwargs):
-    if left.index.name is None:
-        left = left.sort_values().reset_index(drop=True)
-        right = right.sort_values().reset_index(drop=True)
-    else:
-        left = left.sort_index()
-        right = right.sort_index()
+def assert_series_equal(
+    left: pd.Series, right: pd.Series, ignore_order: bool = False, **kwargs
+):
+    if ignore_order:
+        if left.index.name is None:
+            left = left.sort_values().reset_index(drop=True)
+            right = right.sort_values().reset_index(drop=True)
+        else:
+            left = left.sort_index()
+            right = right.sort_index()
 
     pd.testing.assert_series_equal(left, right, **kwargs)
 
diff --git a/tests/unit/test_core.py b/tests/unit/test_core.py
index e7026ebd87..f223bd416c 100644
--- a/tests/unit/test_core.py
+++ b/tests/unit/test_core.py
@@ -117,7 +117,7 @@ def test_arrayvalues_to_ibis_expr_with_concat():
         total_ordering_columns=["col1"],
     )
     expr = value.concat([value])
-    actual = expr._compile()._to_ibis_expr("unordered")
+    actual = expr._compile()._to_ibis_expr(ordering_mode="unordered")
     assert len(actual.columns) == 3
     # TODO(ashleyxu, b/299631930): test out the union expression
     assert actual.columns[0] == "column_0"
@@ -154,7 +154,7 @@ def test_arrayvalues_to_ibis_expr_with_project_binary_op():
     )
     expr = value.project_binary_op("col2", "col3", ops.add_op, "col4")._compile()
     assert expr.columns[3].type().is_float64()
-    actual = expr._to_ibis_expr("unordered")
+    actual = expr._to_ibis_expr(ordering_mode="unordered")
     assert len(expr.columns) == 4
     assert actual.columns[3] == "col4"
 
@@ -175,7 +175,7 @@ def test_arrayvalues_to_ibis_expr_with_project_ternary_op():
         "col2", "col3", "col4", ops.where_op, "col5"
     )._compile()
     assert expr.columns[4].type().is_float64()
-    actual = expr._to_ibis_expr("unordered")
+    actual = expr._to_ibis_expr(ordering_mode="unordered")
     assert len(expr.columns) == 5
     assert actual.columns[4] == "col5"
 
@@ -196,7 +196,7 @@ def test_arrayvalue_to_ibis_expr_with_aggregate():
         by_column_ids=["col1"],
         dropna=False,
     )._compile()
-    actual = expr._to_ibis_expr("unordered")
+    actual = expr._to_ibis_expr(ordering_mode="unordered")
     assert len(expr.columns) == 2
     assert actual.columns[0] == "col1"
     assert actual.columns[1] == "col4"
@@ -215,7 +215,7 @@ def test_arrayvalue_to_ibis_expr_with_corr_aggregate():
         total_ordering_columns=["col1"],
     )
     expr = value.corr_aggregate(corr_aggregations=[("col1", "col3", "col4")])._compile()
-    actual = expr._to_ibis_expr("unordered")
+    actual = expr._to_ibis_expr(ordering_mode="unordered")
     assert len(expr.columns) == 1
     assert actual.columns[0] == "col4"
     assert expr.columns[0].type().is_float64()

From 8edabcf09533fca1b2431227ca6707f8de7eecb4 Mon Sep 17 00:00:00 2001
From: Trevor Bergeron <tbergeron@google.com>
Date: Sat, 28 Oct 2023 01:21:35 +0000
Subject: [PATCH 03/11] fix tests

---
 bigframes/dataframe.py                      |   4 +-
 bigframes/series.py                         |   4 +
 tests/system/small/ml/test_cluster.py       |   2 +-
 tests/system/small/ml/test_core.py          |   1 +
 tests/system/small/ml/test_decomposition.py |   2 +
 tests/system/small/test_dataframe.py        | 104 +++++++++++++++-----
 tests/system/small/test_dataframe_io.py     |   6 +-
 tests/system/small/test_pandas.py           |   6 +-
 tests/system/small/test_series.py           |   6 +-
 9 files changed, 96 insertions(+), 39 deletions(-)

diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py
index db68033c51..bd5cb517b6 100644
--- a/bigframes/dataframe.py
+++ b/bigframes/dataframe.py
@@ -879,8 +879,8 @@ def to_pandas(
                 take longer to execute and require more computation. If set to a value other than
                 None, this will supersede the global config.
             ordered (bool, default True):
-                Determines whether the resulting dataframe will be sorted. In some cases,
-                unordered may result in a faster-executing query.
+                Determines whether the resulting pandas dataframe will be deterministically ordered.
+                In some cases, unordered may result in a faster-executing query.
 
         Returns:
             pandas.DataFrame: A pandas DataFrame with all rows and columns of this DataFrame if the
diff --git a/bigframes/series.py b/bigframes/series.py
index 5b22756d19..ed2868713b 100644
--- a/bigframes/series.py
+++ b/bigframes/series.py
@@ -286,6 +286,10 @@ def to_pandas(
                 The seed for the uniform downsampling algorithm. If provided, the uniform method may
                 take longer to execute and require more computation. If set to a value other than
                 None, this will supersede the global config.
+            ordered (bool, default True):
+                Determines whether the resulting pandas series will be ordered. In some cases,
+                unordered may result in a faster-executing query.
+
 
         Returns:
             pandas.Series: A pandas Series with all rows of this Series if the data_sampling_threshold_mb
diff --git a/tests/system/small/ml/test_cluster.py b/tests/system/small/ml/test_cluster.py
index a3af71892f..caeffa7768 100644
--- a/tests/system/small/ml/test_cluster.py
+++ b/tests/system/small/ml/test_cluster.py
@@ -68,7 +68,7 @@ def test_kmeans_predict(session, penguins_kmeans_model: cluster.KMeans):
         dtype="Int64",
         index=pd.Index(["test1", "test2", "test3", "test4"], dtype="string[pyarrow]"),
     )
-    assert_pandas_df_equal(result, expected)
+    assert_pandas_df_equal(result, expected, ignore_order=True)
 
 
 def test_kmeans_score(session, penguins_kmeans_model: cluster.KMeans):
diff --git a/tests/system/small/ml/test_core.py b/tests/system/small/ml/test_core.py
index cb6507e4e3..ec1f351d87 100644
--- a/tests/system/small/ml/test_core.py
+++ b/tests/system/small/ml/test_core.py
@@ -233,6 +233,7 @@ def test_pca_model_principal_component_info(penguins_bqml_pca_model: core.BqmlMo
         # int64 Index by default in pandas versus Int64 (nullable) Index in BigQuery DataFrame
         check_index_type=False,
         check_dtype=False,
+        ignore_order=True,
     )
 
 
diff --git a/tests/system/small/ml/test_decomposition.py b/tests/system/small/ml/test_decomposition.py
index b46b3d103d..cc4d2e5801 100644
--- a/tests/system/small/ml/test_decomposition.py
+++ b/tests/system/small/ml/test_decomposition.py
@@ -137,6 +137,7 @@ def test_pca_explained_variance_(penguins_pca_model: decomposition.PCA):
         rtol=0.1,
         check_index_type=False,
         check_dtype=False,
+        ignore_order=True,
     )
 
 
@@ -156,4 +157,5 @@ def test_pca_explained_variance_ratio_(penguins_pca_model: decomposition.PCA):
         rtol=0.1,
         check_index_type=False,
         check_dtype=False,
+        ignore_order=True,
     )
diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py
index 3adda34c40..fe8d7d917a 100644
--- a/tests/system/small/test_dataframe.py
+++ b/tests/system/small/test_dataframe.py
@@ -113,15 +113,24 @@ def test_hasattr(scalars_dfs):
     assert not hasattr(scalars_df, "not_exist")
 
 
-def test_head_with_custom_column_labels(scalars_df_index, scalars_pandas_df_index):
+@pytest.mark.parametrize(
+    ("ordered"),
+    [
+        (True),
+        (False),
+    ],
+)
+def test_head_with_custom_column_labels(
+    scalars_df_index, scalars_pandas_df_index, ordered
+):
     rename_mapping = {
         "int64_col": "Integer Column",
         "string_col": "言語列",
     }
     bf_df = scalars_df_index.rename(columns=rename_mapping).head(3)
-    bf_result = bf_df.to_pandas()
+    bf_result = bf_df.to_pandas(ordered=ordered)
     pd_result = scalars_pandas_df_index.rename(columns=rename_mapping).head(3)
-    pandas.testing.assert_frame_equal(bf_result, pd_result)
+    assert_pandas_df_equal(bf_result, pd_result, ignore_order=not ordered)
 
 
 def test_tail_with_custom_column_labels(scalars_df_index, scalars_pandas_df_index):
@@ -564,14 +573,21 @@ def test_assign_existing_column(scalars_dfs):
     assert_pandas_df_equal(bf_result, pd_result)
 
 
-def test_assign_series(scalars_dfs):
+@pytest.mark.parametrize(
+    ("ordered"),
+    [
+        (True),
+        (False),
+    ],
+)
+def test_assign_series(scalars_dfs, ordered):
     scalars_df, scalars_pandas_df = scalars_dfs
     column_name = "int64_col"
     df = scalars_df.assign(new_col=scalars_df[column_name])
-    bf_result = df.to_pandas()
+    bf_result = df.to_pandas(ordered=ordered)
     pd_result = scalars_pandas_df.assign(new_col=scalars_pandas_df[column_name])
 
-    assert_pandas_df_equal(bf_result, pd_result)
+    assert_pandas_df_equal(bf_result, pd_result, ignore_order=not ordered)
 
 
 def test_assign_series_overwrite(scalars_dfs):
@@ -849,7 +865,9 @@ def test_df_merge(scalars_dfs, merge_how):
         sort=True,
     )
 
-    assert_pandas_df_equal(bf_result, pd_result, ignore_order=True)
+    assert_pandas_df_equal(
+        bf_result, pd_result, ignore_order=True, check_index_type=False
+    )
 
 
 @pytest.mark.parametrize(
@@ -882,7 +900,9 @@ def test_df_merge_multi_key(scalars_dfs, left_on, right_on):
         sort=True,
     )
 
-    assert_pandas_df_equal(bf_result, pd_result, ignore_order=True)
+    assert_pandas_df_equal(
+        bf_result, pd_result, ignore_order=True, check_index_type=False
+    )
 
 
 @pytest.mark.parametrize(
@@ -912,7 +932,9 @@ def test_merge_custom_col_name(scalars_dfs, merge_how):
     pandas_right_df = scalars_pandas_df[right_columns]
     pd_result = pandas_left_df.merge(pandas_right_df, merge_how, on, sort=True)
 
-    assert_pandas_df_equal(bf_result, pd_result, ignore_order=True)
+    assert_pandas_df_equal(
+        bf_result, pd_result, ignore_order=True, check_index_type=False
+    )
 
 
 @pytest.mark.parametrize(
@@ -945,7 +967,9 @@ def test_merge_left_on_right_on(scalars_dfs, merge_how):
         sort=True,
     )
 
-    assert_pandas_df_equal(bf_result, pd_result, ignore_order=True)
+    assert_pandas_df_equal(
+        bf_result, pd_result, ignore_order=True, check_index_type=False
+    )
 
 
 def test_get_dtypes(scalars_df_default_index):
@@ -1605,8 +1629,15 @@ def test_binop_df_df_binary_op(
 
 
 # Differnt table will only work for explicit index, since default index orders are arbitrary.
+@pytest.mark.parametrize(
+    ("ordered"),
+    [
+        (True),
+        (False),
+    ],
+)
 def test_series_binop_add_different_table(
-    scalars_df_index, scalars_pandas_df_index, scalars_df_2_index
+    scalars_df_index, scalars_pandas_df_index, scalars_df_2_index, ordered
 ):
     df_columns = ["int64_col", "float64_col"]
     series_column = "int64_too"
@@ -1614,13 +1645,13 @@ def test_series_binop_add_different_table(
     bf_result = (
         scalars_df_index[df_columns]
         .add(scalars_df_2_index[series_column], axis="index")
-        .to_pandas()
+        .to_pandas(ordered=ordered)
     )
     pd_result = scalars_pandas_df_index[df_columns].add(
         scalars_pandas_df_index[series_column], axis="index"
     )
 
-    assert_pandas_df_equal(bf_result, pd_result)
+    assert_pandas_df_equal(bf_result, pd_result, ignore_order=not ordered)
 
 
 # TODO(garrettwu): Test series binop with different index
@@ -1899,7 +1930,14 @@ def test_df_describe(scalars_dfs):
     ).all()
 
 
-def test_df_stack(scalars_dfs):
+@pytest.mark.parametrize(
+    ("ordered"),
+    [
+        (True),
+        (False),
+    ],
+)
+def test_df_stack(scalars_dfs, ordered):
     if pandas.__version__.startswith("1.") or pandas.__version__.startswith("2.0"):
         pytest.skip("pandas <2.1 uses different stack implementation")
     scalars_df, scalars_pandas_df = scalars_dfs
@@ -1909,14 +1947,23 @@ def test_df_stack(scalars_dfs):
     # Can only stack identically-typed columns
     columns = ["int64_col", "int64_too", "rowindex_2"]
 
-    bf_result = scalars_df[columns].stack().to_pandas()
+    bf_result = scalars_df[columns].stack().to_pandas(ordered=ordered)
     pd_result = scalars_pandas_df[columns].stack(future_stack=True)
 
     # Pandas produces NaN, where bq dataframes produces pd.NA
-    pd.testing.assert_series_equal(bf_result, pd_result, check_dtype=False)
+    assert_series_equal(
+        bf_result, pd_result, check_dtype=False, ignore_order=not ordered
+    )
 
 
-def test_df_unstack(scalars_dfs):
+@pytest.mark.parametrize(
+    ("ordered"),
+    [
+        (True),
+        (False),
+    ],
+)
+def test_df_unstack(scalars_dfs, ordered):
     scalars_df, scalars_pandas_df = scalars_dfs
     # To match bigquery dataframes
     scalars_pandas_df = scalars_pandas_df.copy()
@@ -1929,11 +1976,13 @@ def test_df_unstack(scalars_dfs):
     ]
 
     # unstack on mono-index produces series
-    bf_result = scalars_df[columns].unstack().to_pandas()
+    bf_result = scalars_df[columns].unstack().to_pandas(ordered=ordered)
     pd_result = scalars_pandas_df[columns].unstack()
 
     # Pandas produces NaN, where bq dataframes produces pd.NA
-    pd.testing.assert_series_equal(bf_result, pd_result, check_dtype=False)
+    assert_series_equal(
+        bf_result, pd_result, check_dtype=False, ignore_order=not ordered
+    )
 
 
 @pytest.mark.parametrize(
@@ -2078,14 +2127,18 @@ def test_iloc_slice_zero_step(scalars_df_index):
         scalars_df_index.iloc[0:0:0]
 
 
-def test_iloc_slice_nested(scalars_df_index, scalars_pandas_df_index):
-    bf_result = scalars_df_index.iloc[1:].iloc[1:].to_pandas()
+@pytest.mark.parametrize(
+    ("ordered"),
+    [
+        (True),
+        (False),
+    ],
+)
+def test_iloc_slice_nested(scalars_df_index, scalars_pandas_df_index, ordered):
+    bf_result = scalars_df_index.iloc[1:].iloc[1:].to_pandas(ordered=ordered)
     pd_result = scalars_pandas_df_index.iloc[1:].iloc[1:]
 
-    pd.testing.assert_frame_equal(
-        bf_result,
-        pd_result,
-    )
+    assert_pandas_df_equal(bf_result, pd_result, ignore_order=not ordered)
 
 
 @pytest.mark.parametrize(
@@ -2562,6 +2615,7 @@ def test_df_rows_filter_items(scalars_df_index, scalars_pandas_df_index):
     assert_pandas_df_equal(
         bf_result,
         pd_result,
+        ignore_order=True,
         check_names=False,
     )
 
diff --git a/tests/system/small/test_dataframe_io.py b/tests/system/small/test_dataframe_io.py
index a235845937..3600dda56d 100644
--- a/tests/system/small/test_dataframe_io.py
+++ b/tests/system/small/test_dataframe_io.py
@@ -377,7 +377,7 @@ def test_to_sql_query_unnamed_index_included(
     pd_df = scalars_pandas_df_default_index.reset_index(drop=True)
     roundtrip = session.read_gbq(sql, index_col=idx_ids)
     roundtrip.index.names = [None]
-    assert_pandas_df_equal(roundtrip.to_pandas(), pd_df)
+    assert_pandas_df_equal(roundtrip.to_pandas(), pd_df, check_index_type=False)
 
 
 def test_to_sql_query_named_index_included(
@@ -409,7 +409,7 @@ def test_to_sql_query_unnamed_index_excluded(
 
     pd_df = scalars_pandas_df_default_index.reset_index(drop=True)
     roundtrip = session.read_gbq(sql)
-    assert_pandas_df_equal(roundtrip.to_pandas(), pd_df)
+    assert_pandas_df_equal(roundtrip.to_pandas(), pd_df, check_index_type=False)
 
 
 def test_to_sql_query_named_index_excluded(
@@ -426,4 +426,4 @@ def test_to_sql_query_named_index_excluded(
         "rowindex_2", drop=True
     ).reset_index(drop=True)
     roundtrip = session.read_gbq(sql)
-    assert_pandas_df_equal(roundtrip.to_pandas(), pd_df)
+    assert_pandas_df_equal(roundtrip.to_pandas(), pd_df, check_index_type=False)
diff --git a/tests/system/small/test_pandas.py b/tests/system/small/test_pandas.py
index 56bea42ad5..17ba905c9f 100644
--- a/tests/system/small/test_pandas.py
+++ b/tests/system/small/test_pandas.py
@@ -140,7 +140,7 @@ def test_merge(scalars_dfs, merge_how):
         sort=True,
     )
 
-    assert_pandas_df_equal(bf_result, pd_result)
+    assert_pandas_df_equal(bf_result, pd_result, ignore_order=True)
 
 
 @pytest.mark.parametrize(
@@ -174,7 +174,7 @@ def test_merge_left_on_right_on(scalars_dfs, merge_how):
         sort=True,
     )
 
-    assert_pandas_df_equal(bf_result, pd_result)
+    assert_pandas_df_equal(bf_result, pd_result, ignore_order=True)
 
 
 @pytest.mark.parametrize(
@@ -208,7 +208,7 @@ def test_merge_series(scalars_dfs, merge_how):
         sort=True,
     )
 
-    assert_pandas_df_equal(bf_result, pd_result)
+    assert_pandas_df_equal(bf_result, pd_result, ignore_order=True)
 
 
 def test_cut(scalars_dfs):
diff --git a/tests/system/small/test_series.py b/tests/system/small/test_series.py
index d7578bc985..dc6669c695 100644
--- a/tests/system/small/test_series.py
+++ b/tests/system/small/test_series.py
@@ -2036,11 +2036,7 @@ def test_series_filter_items(scalars_df_index, scalars_pandas_df_index):
     # Pandas uses int64 instead of Int64 (nullable) dtype.
     pd_result.index = pd_result.index.astype(pd.Int64Dtype())
     # Ignore ordering as pandas order differently depending on version
-    assert_series_equal(
-        bf_result,
-        pd_result,
-        check_names=False,
-    )
+    assert_series_equal(bf_result, pd_result, check_names=False, ignore_order=True)
 
 
 def test_series_filter_like(scalars_df_index, scalars_pandas_df_index):

From 449ca103bbde0fdfc85fb603083a1ba2e075fcba Mon Sep 17 00:00:00 2001
From: Trevor Bergeron <tbergeron@google.com>
Date: Sat, 28 Oct 2023 01:21:35 +0000
Subject: [PATCH 04/11] fix tests

---
 tests/system/small/test_dataframe_io.py    |  8 ++++++--
 tests/system/small/test_remote_function.py |  4 +++-
 tests/system/small/test_series.py          | 14 ++++++--------
 3 files changed, 15 insertions(+), 11 deletions(-)

diff --git a/tests/system/small/test_dataframe_io.py b/tests/system/small/test_dataframe_io.py
index 3600dda56d..d700d93be9 100644
--- a/tests/system/small/test_dataframe_io.py
+++ b/tests/system/small/test_dataframe_io.py
@@ -409,7 +409,9 @@ def test_to_sql_query_unnamed_index_excluded(
 
     pd_df = scalars_pandas_df_default_index.reset_index(drop=True)
     roundtrip = session.read_gbq(sql)
-    assert_pandas_df_equal(roundtrip.to_pandas(), pd_df, check_index_type=False)
+    assert_pandas_df_equal(
+        roundtrip.to_pandas(), pd_df, check_index_type=False, ignore_order=True
+    )
 
 
 def test_to_sql_query_named_index_excluded(
@@ -426,4 +428,6 @@ def test_to_sql_query_named_index_excluded(
         "rowindex_2", drop=True
     ).reset_index(drop=True)
     roundtrip = session.read_gbq(sql)
-    assert_pandas_df_equal(roundtrip.to_pandas(), pd_df, check_index_type=False)
+    assert_pandas_df_equal(
+        roundtrip.to_pandas(), pd_df, check_index_type=False, ignore_order=True
+    )
diff --git a/tests/system/small/test_remote_function.py b/tests/system/small/test_remote_function.py
index 6cbe7eee21..3d8532a13b 100644
--- a/tests/system/small/test_remote_function.py
+++ b/tests/system/small/test_remote_function.py
@@ -583,7 +583,9 @@ def test_read_gbq_function_reads_udfs(bigquery_client, dataset_id):
         indirect_df = indirect_df.assign(y=indirect_df.x.apply(square))
         indirect_df = indirect_df.to_pandas()
 
-        assert_pandas_df_equal(direct_df, indirect_df)
+        assert_pandas_df_equal(
+            direct_df, indirect_df, ignore_order=True, check_index_type=False
+        )
 
 
 @pytest.mark.flaky(retries=2, delay=120)
diff --git a/tests/system/small/test_series.py b/tests/system/small/test_series.py
index dc6669c695..e0f4416c73 100644
--- a/tests/system/small/test_series.py
+++ b/tests/system/small/test_series.py
@@ -1079,10 +1079,9 @@ def test_binop_opposite_filters(scalars_dfs):
     pd_bool_col = scalars_pandas_df["bool_col"]
     pd_result = pd_int64_col1[pd_bool_col] + pd_int64_col2[pd_bool_col.__invert__()]
 
-    assert_series_equal(
-        bf_result,
-        pd_result,
-    )
+    # Passes with ignore_order=False only with some dependency sets
+    # TODO: Determine desired behavior and make test more strict
+    assert_series_equal(bf_result, pd_result, ignore_order=True)
 
 
 def test_binop_left_filtered(scalars_dfs):
@@ -1097,10 +1096,9 @@ def test_binop_left_filtered(scalars_dfs):
     pd_bool_col = scalars_pandas_df["bool_col"]
     pd_result = pd_int64_col[pd_bool_col] + pd_float64_col
 
-    assert_series_equal(
-        bf_result,
-        pd_result,
-    )
+    # Passes with ignore_order=False only with some dependency sets
+    # TODO: Determine desired behavior and make test more strict
+    assert_series_equal(bf_result, pd_result, ignore_order=True)
 
 
 def test_binop_right_filtered(scalars_dfs):

From 7b382804bd02a2fc48b044d124776931e4838dcb Mon Sep 17 00:00:00 2001
From: Trevor Bergeron <tbergeron@google.com>
Date: Sat, 28 Oct 2023 22:45:12 +0000
Subject: [PATCH 05/11] more tests, prettier sql

---
 bigframes/core/compile/compiled.py    | 10 ++++----
 bigframes/series.py                   |  4 +--
 tests/system/large/ml/test_cluster.py |  2 +-
 tests/system/small/test_groupby.py    | 35 ++++++++++++++++++++-------
 4 files changed, 34 insertions(+), 17 deletions(-)

diff --git a/bigframes/core/compile/compiled.py b/bigframes/core/compile/compiled.py
index 0feecae5b6..681a841e1c 100644
--- a/bigframes/core/compile/compiled.py
+++ b/bigframes/core/compile/compiled.py
@@ -1277,11 +1277,11 @@ def to_sql(
         )
         if sorted:
             sql = textwrap.dedent(
-                f"""
-                SELECT * EXCEPT (`{offsets_id}`)
-                FROM ({sql})
-                ORDER BY `{offsets_id}`
-                """
+                f"SELECT * EXCEPT (`{offsets_id}`)\n"
+                "FROM (\n"
+                f"{sql}\n"
+                ")\n"
+                f"ORDER BY `{offsets_id}`\n"
             )
         return typing.cast(str, sql)
 
diff --git a/bigframes/series.py b/bigframes/series.py
index ed2868713b..52df00ef87 100644
--- a/bigframes/series.py
+++ b/bigframes/series.py
@@ -287,8 +287,8 @@ def to_pandas(
                 take longer to execute and require more computation. If set to a value other than
                 None, this will supersede the global config.
             ordered (bool, default True):
-                Determines whether the resulting pandas series will be ordered. In some cases,
-                unordered may result in a faster-executing query.
+                Determines whether the resulting pandas series will be deterministically ordered.
+                In some cases, unordered may result in a faster-executing query.
 
 
         Returns:
diff --git a/tests/system/large/ml/test_cluster.py b/tests/system/large/ml/test_cluster.py
index cef167d4ac..f01116665f 100644
--- a/tests/system/large/ml/test_cluster.py
+++ b/tests/system/large/ml/test_cluster.py
@@ -105,7 +105,7 @@ def test_cluster_configure_fit_score_predict(
         index=pd.Index(["test1", "test2", "test3", "test4"], dtype="string[pyarrow]"),
     )
     expected.index.name = "observation"
-    assert_pandas_df_equal(result, expected)
+    assert_pandas_df_equal(result, expected, ignore_order=True)
 
     # save, load, check n_clusters to ensure configuration was kept
     reloaded_model = model.to_gbq(
diff --git a/tests/system/small/test_groupby.py b/tests/system/small/test_groupby.py
index 05154f7ab7..a24713c2b3 100644
--- a/tests/system/small/test_groupby.py
+++ b/tests/system/small/test_groupby.py
@@ -16,6 +16,7 @@
 import pytest
 
 import bigframes.pandas as bpd
+from tests.system.utils import assert_pandas_df_equal
 
 
 @pytest.mark.parametrize(
@@ -88,16 +89,23 @@ def test_dataframe_groupby_aggregate(
     pd.testing.assert_frame_equal(pd_result, bf_result_computed, check_dtype=False)
 
 
-def test_dataframe_groupby_agg_string(scalars_df_index, scalars_pandas_df_index):
+@pytest.mark.parametrize(
+    ("ordered"),
+    [
+        (True),
+        (False),
+    ],
+)
+def test_dataframe_groupby_agg_string(
+    scalars_df_index, scalars_pandas_df_index, ordered
+):
     col_names = ["int64_too", "float64_col", "int64_col", "bool_col", "string_col"]
     bf_result = scalars_df_index[col_names].groupby("string_col").agg("count")
     pd_result = scalars_pandas_df_index[col_names].groupby("string_col").agg("count")
-    bf_result_computed = bf_result.to_pandas()
+    bf_result_computed = bf_result.to_pandas(ordered=ordered)
 
-    pd.testing.assert_frame_equal(
-        pd_result,
-        bf_result_computed,
-        check_dtype=False,
+    assert_pandas_df_equal(
+        pd_result, bf_result_computed, check_dtype=False, ignore_order=not ordered
     )
 
 
@@ -270,13 +278,22 @@ def test_dataframe_groupby_kurt(scalars_df_index, scalars_pandas_df_index):
     pd.testing.assert_frame_equal(pd_result, bf_result, check_dtype=False)
 
 
-def test_dataframe_groupby_diff(scalars_df_index, scalars_pandas_df_index):
+@pytest.mark.parametrize(
+    ("ordered"),
+    [
+        (True),
+        (False),
+    ],
+)
+def test_dataframe_groupby_diff(scalars_df_index, scalars_pandas_df_index, ordered):
     col_names = ["float64_col", "int64_col", "string_col"]
     bf_result = scalars_df_index[col_names].groupby("string_col").diff(-1)
     pd_result = scalars_pandas_df_index[col_names].groupby("string_col").diff(-1)
-    bf_result_computed = bf_result.to_pandas()
+    bf_result_computed = bf_result.to_pandas(ordered=ordered)
 
-    pd.testing.assert_frame_equal(pd_result, bf_result_computed, check_dtype=False)
+    assert_pandas_df_equal(
+        pd_result, bf_result_computed, check_dtype=False, ignore_order=not ordered
+    )
 
 
 def test_dataframe_groupby_getitem(

From 1ab8e30a3af067c8c8360872dfa5c88134b73baa Mon Sep 17 00:00:00 2001
From: Trevor Bergeron <tbergeron@google.com>
Date: Mon, 6 Nov 2023 19:16:02 +0000
Subject: [PATCH 06/11] simplify ir classes

---
 bigframes/core/__init__.py         |   4 +-
 bigframes/core/compile/__init__.py |   8 +-
 bigframes/core/compile/compiled.py | 532 ++++++++---------------------
 bigframes/core/compile/compiler.py |  23 +-
 4 files changed, 173 insertions(+), 394 deletions(-)

diff --git a/bigframes/core/__init__.py b/bigframes/core/__init__.py
index d36a50ff37..1c291a69c2 100644
--- a/bigframes/core/__init__.py
+++ b/bigframes/core/__init__.py
@@ -99,7 +99,9 @@ def _compile_unordered(self) -> compiled.UnorderedIR:
     def shape(self) -> typing.Tuple[int, int]:
         """Returns dimensions as (length, width) tuple."""
         width = len(self._compile().columns)
-        count_expr = self._compile()._to_ibis_expr(ordering_mode="unordered").count()
+        count_expr = (
+            self._compile_unordered()._to_ibis_expr(ordering_mode="unordered").count()
+        )
 
         # Support in-memory engines for hermetic unit tests.
         if not self.node.session:
diff --git a/bigframes/core/compile/__init__.py b/bigframes/core/compile/__init__.py
index af3f32aefb..761fd9a465 100644
--- a/bigframes/core/compile/__init__.py
+++ b/bigframes/core/compile/__init__.py
@@ -12,10 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from bigframes.core.compile.compiled import CompiledArrayValue
-from bigframes.core.compile.compiler import compile_ordered
+from bigframes.core.compile.compiled import OrderedIR, UnorderedIR
+from bigframes.core.compile.compiler import compile_ordered, compile_unordered
 
 __all__ = [
     "compile_ordered",
-    "CompiledArrayValue",
+    "compile_unordered",
+    "OrderedIR",
+    "UnorderedIR",
 ]
diff --git a/bigframes/core/compile/compiled.py b/bigframes/core/compile/compiled.py
index 681a841e1c..6fad1a7645 100644
--- a/bigframes/core/compile/compiled.py
+++ b/bigframes/core/compile/compiled.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 from __future__ import annotations
 
+import abc
 import functools
 import textwrap
 import typing
@@ -41,108 +42,63 @@
 ORDER_ID_COLUMN = "bigframes_ordering_id"
 PREDICATE_COLUMN = "bigframes_predicate"
 
+T = typing.TypeVar("T", bound="BaseIbisIR")
+
+
+class BaseIbisIR(abc.ABC):
+    """Implementation detail, contains common logic between ordered and unordered IR"""
+
+    def __init__(
+        self,
+        table: ibis_types.Table,
+        columns: Sequence[ibis_types.Value],
+        predicates: Optional[Collection[ibis_types.BooleanValue]] = None,
+    ):
+        self._table = table
+        self._predicates = tuple(predicates) if predicates is not None else ()
+        # Allow creating a DataFrame directly from an Ibis table expression.
+        # TODO(swast): Validate that each column references the same table (or
+        # no table for literal values).
+        self._columns = tuple(columns)
+        # To allow for more efficient lookup by column name, create a
+        # dictionary mapping names to column values.
+        self._column_names = {column.get_name(): column for column in self._columns}
 
-class CompiledArrayValue(typing.Protocol):
     @property
-    def column_ids(self) -> typing.Sequence[str]:
-        ...
+    def columns(self) -> typing.Tuple[ibis_types.Value, ...]:
+        return self._columns
 
-    def to_sql(self) -> str:
-        ...
+    @property
+    def column_ids(self) -> typing.Sequence[str]:
+        return tuple(self._column_names.keys())
 
-    def _to_ibis_expr(self, *args, **kwargs) -> str:
-        """Exposed for testing purposes only."""
-        ...
+    @property
+    def _reduced_predicate(self) -> typing.Optional[ibis_types.BooleanValue]:
+        """Returns the frame's predicates as an equivalent boolean value, useful where a single predicate value is preferred."""
+        return (
+            _reduce_predicate_list(self._predicates).name(PREDICATE_COLUMN)
+            if self._predicates
+            else None
+        )
 
-    def select_columns(self, column_ids: typing.Sequence[str]) -> CompiledArrayValue:
+    @abc.abstractmethod
+    def select_columns(self: T, column_ids: typing.Sequence[str]) -> T:
+        """Creates a new expression based on this expression with new columns."""
         ...
 
-    def drop_columns(self, columns: Iterable[str]) -> CompiledArrayValue:
+    def drop_columns(self: T, columns: Iterable[str]) -> T:
         return self.select_columns(
             [col for col in self.column_ids if col not in columns]
         )
 
-    def get_column_type(self, key: str) -> bigframes.dtypes.Dtype:
-        ...
-
-    def filter(self, predicate_id: str, keep_null: bool = False) -> CompiledArrayValue:
+    @abc.abstractmethod
+    def filter(self: T, predicate_id: str, keep_null: bool = False) -> T:
         """Filter the table on a given expression, the predicate must be a boolean series aligned with the table expression."""
         ...
 
-    def order_by(
-        self, by: Sequence[OrderingColumnReference], stable: bool = False
-    ) -> CompiledArrayValue:
-        ...
-
-    def reversed(self) -> CompiledArrayValue:
-        ...
-
-    def project_unary_op(
-        self, column_name: str, op: ops.UnaryOp, output_name=None
-    ) -> CompiledArrayValue:
-        """Creates a new expression based on this expression with unary operation applied to one column."""
-        ...
-
-    def project_binary_op(
-        self,
-        left_column_id: str,
-        right_column_id: str,
-        op: ops.BinaryOp,
-        output_column_id: str,
-    ) -> CompiledArrayValue:
-        """Creates a new expression based on this expression with binary operation applied to two columns."""
-        ...
-
-    def project_ternary_op(
-        self,
-        col_id_1: str,
-        col_id_2: str,
-        col_id_3: str,
-        op: ops.TernaryOp,
-        output_column_id: str,
-    ) -> CompiledArrayValue:
-        """Creates a new expression based on this expression with ternary operation applied to three columns."""
-        ...
-
-    def aggregate(
-        self,
-        aggregations: typing.Sequence[typing.Tuple[str, agg_ops.AggregateOp, str]],
-        by_column_ids: typing.Sequence[str] = (),
-        dropna: bool = True,
-    ) -> CompiledArrayValue:
-        """
-        Apply aggregations to the expression.
-        Arguments:
-            aggregations: input_column_id, operation, output_column_id tuples
-            by_column_id: column id of the aggregation key, this is preserved through the transform
-            dropna: whether null keys should be dropped
-        """
-        ...
-
-    def corr_aggregate(
-        self, corr_aggregations: typing.Sequence[typing.Tuple[str, str, str]]
-    ) -> CompiledArrayValue:
-        """
-        Get correlations between each lef_column_id and right_column_id, stored in the respective output_column_id.
-        This uses BigQuery's CORR under the hood, and thus only Pearson's method is used.
-        Arguments:
-            corr_aggregations: left_column_id, right_column_id, output_column_id tuples
-        """
-        ...
-
-    def assign(self, source_id: str, destination_id: str) -> CompiledArrayValue:
-        ...
-
-    def assign_constant(
-        self,
-        destination_id: str,
-        value: typing.Any,
-        dtype: typing.Optional[bigframes.dtypes.Dtype],
-    ) -> CompiledArrayValue:
-        ...
-
+    @abc.abstractmethod
     def unpivot(
-        self,
+        self: T,
         row_labels: typing.Sequence[typing.Hashable],
         unpivot_columns: typing.Sequence[
             typing.Tuple[str, typing.Sequence[typing.Optional[str]]]
@@ -154,7 +110,7 @@ def unpivot(
             bigframes.dtypes.Dtype, typing.Sequence[bigframes.dtypes.Dtype]
         ] = pandas.Float64Dtype(),
         how="left",
-    ) -> CompiledArrayValue:
+    ) -> T:
         """
         Unpivot ArrayValue columns.
 
@@ -170,7 +126,8 @@ def unpivot(
         """
         ...
 
-    def _reproject_to_table(self) -> CompiledArrayValue:
+    @abc.abstractmethod
+    def _reproject_to_table(self: T) -> T:
         """
         Internal operators that projects the internal representation into a
         new ibis table expression where each value column is a direct
@@ -180,78 +137,66 @@ def _reproject_to_table(self) -> CompiledArrayValue:
         """
         ...
 
-    def _uniform_sampling(self, fraction: float) -> CompiledArrayValue:
-        """Sampling the table on given fraction.
-
-        .. warning::
-            The row numbers of result is non-deterministic, avoid to use.
-        """
-        ...
-
-    # Always ordered operations
-    def project_window_op(
-        self,
-        column_name: str,
-        op: agg_ops.WindowOp,
-        window_spec: WindowSpec,
-        output_name=None,
-        *,
-        never_skip_nulls=False,
-        skip_reproject_unsafe: bool = False,
-    ) -> OrderedIR:
-        """
-        Creates a new expression based on this expression with unary operation applied to one column.
-        column_name: the id of the input column present in the expression
-        op: the windowable operator to apply to the input column
-        window_spec: a specification of the window over which to apply the operator
-        output_name: the id to assign to the output of the operator, by default will replace input col if distinct output id not provided
-        never_skip_nulls: will disable null skipping for operators that would otherwise do so
-        skip_reproject_unsafe: skips the reprojection step, can be used when performing many non-dependent window operations, user responsible for not nesting window expressions, or using outputs as join, filter or aggregation keys before a reprojection
-        """
-        ...
-
-    def promote_offsets(self, col_id: str):
-        """
-        Convenience function to promote copy of column offsets to a value column. Can be used to reset index.
-        """
-        ...
-
+    def project_unary_op(
+        self: T, column_name: str, op: ops.UnaryOp, output_name=None
+    ) -> T:
+        """Creates a new expression based on this expression with unary operation applied to one column."""
+        value = op._as_ibis(self._get_ibis_column(column_name)).name(output_name)
+        return self._set_or_replace_by_id(output_name, value)
 
-class BaseIbisIR:
-    """Implementation detail, contains common logic between ordered and unordered IR"""
+    def project_binary_op(
+        self: T,
+        left_column_id: str,
+        right_column_id: str,
+        op: ops.BinaryOp,
+        output_column_id: str,
+    ) -> T:
+        """Creates a new expression based on this expression with binary operation applied to two columns."""
+        value = op(
+            self._get_ibis_column(left_column_id),
+            self._get_ibis_column(right_column_id),
+        ).name(output_column_id)
+        return self._set_or_replace_by_id(output_column_id, value)
 
-    def __init__(
-        self,
-        table: ibis_types.Table,
-        columns: Sequence[ibis_types.Value],
-        predicates: Optional[Collection[ibis_types.BooleanValue]] = None,
-    ):
-        self._table = table
-        self._predicates = tuple(predicates) if predicates is not None else ()
-        # Allow creating a DataFrame directly from an Ibis table expression.
-        # TODO(swast): Validate that each column references the same table (or
-        # no table for literal values).
-        self._columns = tuple(columns)
-        # To allow for more efficient lookup by column name, create a
-        # dictionary mapping names to column values.
-        self._column_names = {column.get_name(): column for column in self._columns}
+    def project_ternary_op(
+        self: T,
+        col_id_1: str,
+        col_id_2: str,
+        col_id_3: str,
+        op: ops.TernaryOp,
+        output_column_id: str,
+    ) -> T:
+        """Creates a new expression based on this expression with ternary operation applied to three columns."""
+        value = op(
+            self._get_ibis_column(col_id_1),
+            self._get_ibis_column(col_id_2),
+            self._get_ibis_column(col_id_3),
+        ).name(output_column_id)
+        return self._set_or_replace_by_id(output_column_id, value)
 
-    @property
-    def columns(self) -> typing.Tuple[ibis_types.Value, ...]:
-        return self._columns
+    def assign(self: T, source_id: str, destination_id: str) -> T:
+        return self._set_or_replace_by_id(
+            destination_id, self._get_ibis_column(source_id)
+        )
 
-    @property
-    def column_ids(self) -> typing.Sequence[str]:
-        return tuple(self._column_names.keys())
+    def assign_constant(
+        self: T,
+        destination_id: str,
+        value: typing.Any,
+        dtype: typing.Optional[bigframes.dtypes.Dtype],
+    ) -> T:
+        # TODO(b/281587571): Solve scalar constant aggregation problem w/Ibis.
+        ibis_value = bigframes.dtypes.literal_to_ibis_scalar(value, dtype)
+        if ibis_value is None:
+            raise NotImplementedError(
+                f"Type not supported as scalar value {type(value)}. {constants.FEEDBACK_LINK}"
+            )
+        expr = self._set_or_replace_by_id(destination_id, ibis_value)
+        return expr._reproject_to_table()
 
-    @property
-    def _reduced_predicate(self) -> typing.Optional[ibis_types.BooleanValue]:
-        """Returns the frame's predicates as an equivalent boolean value, useful where a single predicate value is preferred."""
-        return (
-            _reduce_predicate_list(self._predicates).name(PREDICATE_COLUMN)
-            if self._predicates
-            else None
-        )
+    @abc.abstractmethod
+    def _set_or_replace_by_id(self: T, id: str, new_value: ibis_types.Value) -> T:
+        ...
 
     def _get_ibis_column(self, key: str) -> ibis_types.Value:
         """Gets the Ibis expression for a given column."""
@@ -272,7 +217,7 @@ def get_column_type(self, key: str) -> bigframes.dtypes.Dtype:
 
 
 # Ibis Implementations
-class UnorderedIR(BaseIbisIR, CompiledArrayValue):
+class UnorderedIR(BaseIbisIR):
     def __init__(
         self,
         table: ibis_types.Table,
@@ -281,58 +226,6 @@ def __init__(
     ):
         super().__init__(table, columns, predicates)
 
-    @classmethod
-    def from_pandas(
-        cls,
-        pd_df: pandas.DataFrame,
-    ) -> UnorderedIR:
-        """
-        Builds an in-memory only (SQL only) expr from a pandas dataframe.
-        """
-        # We can't include any hidden columns in the ArrayValue constructor, so
-        # grab the column names before we add the hidden ordering column.
-        column_names = [str(column) for column in pd_df.columns]
-        # Make sure column names are all strings.
-        pd_df = pd_df.set_axis(column_names, axis="columns")
-
-        # ibis memtable cannot handle NA, must convert to None
-        pd_df = pd_df.astype("object")  # type: ignore
-        pd_df = pd_df.where(pandas.notnull(pd_df), None)
-
-        # NULL type isn't valid in BigQuery, so retry with an explicit schema in these cases.
-        keys_memtable = ibis.memtable(pd_df)
-        schema = keys_memtable.schema()
-        new_schema = []
-        for column_index, column in enumerate(schema):
-            column_type = schema[column]
-            # The autodetected type might not be one we can support, such
-            # as NULL type for empty rows, so convert to a type we do
-            # support.
-            new_type = bigframes.dtypes.bigframes_dtype_to_ibis_dtype(
-                bigframes.dtypes.ibis_dtype_to_bigframes_dtype(column_type)
-            )
-            # TODO(swast): Ibis memtable doesn't use backticks in struct
-            # field names, so spaces and other characters aren't allowed in
-            # the memtable context. Blocked by
-            # https://github.com/ibis-project/ibis/issues/7187
-            column = f"col_{column_index}"
-            new_schema.append((column, new_type))
-
-        # must set non-null column labels. these are not the user-facing labels
-        pd_df = pd_df.set_axis(
-            [column for column, _ in new_schema],
-            axis="columns",
-        )
-        keys_memtable = ibis.memtable(pd_df, schema=ibis.schema(new_schema))
-
-        return cls(
-            keys_memtable,
-            columns=[
-                keys_memtable[f"col_{column_index}"].name(column)
-                for column_index, column in enumerate(column_names)
-            ],
-        )
-
     def builder(self):
         """Creates a mutable builder for expressions."""
         # Since ArrayValue is intended to be immutable (immutability offers
@@ -432,7 +325,7 @@ def select_columns(self, column_ids: typing.Sequence[str]) -> UnorderedIR:
         new_expr = builder.build()
         return new_expr
 
-    def filter(self, predicate_id: str, keep_null: bool = False) -> CompiledArrayValue:
+    def filter(self, predicate_id: str, keep_null: bool = False) -> UnorderedIR:
         condition = typing.cast(
             ibis_types.BooleanValue, self._get_ibis_column(predicate_id)
         )
@@ -445,76 +338,12 @@ def filter(self, predicate_id: str, keep_null: bool = False) -> CompiledArrayVal
             )
         return self._filter(condition)
 
-    def _filter(self, predicate_value: ibis_types.BooleanValue) -> CompiledArrayValue:
+    def _filter(self, predicate_value: ibis_types.BooleanValue) -> UnorderedIR:
         """Filter the table on a given expression, the predicate must be a boolean series aligned with the table expression."""
         expr = self.builder()
         expr.predicates = [*self._predicates, predicate_value]
         return expr.build()
 
-    def order_by(
-        self, by: Sequence[OrderingColumnReference], stable: bool = False
-    ) -> UnorderedIR:
-        return self
-
-    def reversed(self) -> UnorderedIR:
-        return self
-
-    def project_unary_op(
-        self, column_name: str, op: ops.UnaryOp, output_name=None
-    ) -> UnorderedIR:
-        value = op._as_ibis(self._get_ibis_column(column_name)).name(
-            output_name or column_name
-        )
-        return self._set_or_replace_by_id(output_name or column_name, value)
-
-    def project_binary_op(
-        self,
-        left_column_id: str,
-        right_column_id: str,
-        op: ops.BinaryOp,
-        output_column_id: str,
-    ) -> UnorderedIR:
-        value = op(
-            self._get_ibis_column(left_column_id),
-            self._get_ibis_column(right_column_id),
-        ).name(output_column_id)
-        return self._set_or_replace_by_id(output_column_id, value)
-
-    def project_ternary_op(
-        self,
-        col_id_1: str,
-        col_id_2: str,
-        col_id_3: str,
-        op: ops.TernaryOp,
-        output_column_id: str,
-    ) -> UnorderedIR:
-        value = op(
-            self._get_ibis_column(col_id_1),
-            self._get_ibis_column(col_id_2),
-            self._get_ibis_column(col_id_3),
-        ).name(output_column_id)
-        return self._set_or_replace_by_id(output_column_id, value)
-
-    def assign(self, source_id: str, destination_id: str) -> UnorderedIR:
-        return self._set_or_replace_by_id(
-            destination_id, self._get_ibis_column(source_id)
-        )
-
-    def assign_constant(
-        self,
-        destination_id: str,
-        value: typing.Any,
-        dtype: typing.Optional[bigframes.dtypes.Dtype],
-    ) -> UnorderedIR:
-        # TODO(b/281587571): Solve scalar constant aggregation problem w/Ibis.
-        ibis_value = bigframes.dtypes.literal_to_ibis_scalar(value, dtype)
-        if ibis_value is None:
-            raise NotImplementedError(
-                f"Type not supported as scalar value {type(value)}. {constants.FEEDBACK_LINK}"
-            )
-        expr = self._set_or_replace_by_id(destination_id, ibis_value)
-        return expr._reproject_to_table()
-
     def unpivot(
         self,
         row_labels: typing.Sequence[typing.Hashable],
@@ -628,6 +457,13 @@ def aggregate(
         by_column_ids: typing.Sequence[str] = (),
         dropna: bool = True,
     ) -> OrderedIR:
+        """
+        Apply aggregations to the expression.
+        Arguments:
+            aggregations: input_column_id, operation, output_column_id tuples
+            by_column_id: column id of the aggregation key, this is preserved through the transform
+            dropna: whether null keys should be dropped
+        """
         table = self._to_ibis_expr()
         stats = {
             col_out: agg_op._as_ibis(table[col_in])
@@ -675,6 +511,12 @@ def aggregate(
     def corr_aggregate(
         self, corr_aggregations: typing.Sequence[typing.Tuple[str, str, str]]
     ) -> OrderedIR:
+        """
+        Get correlations between each lef_column_id and right_column_id, stored in the respective output_column_id.
+        This uses BigQuery's CORR under the hood, and thus only Pearson's method is used.
+        Arguments:
+            corr_aggregations: left_column_id, right_column_id, output_column_id tuples
+        """
         table = self._to_ibis_expr()
         stats = {
             col_out: table[col_left].corr(table[col_right], how="pop")
@@ -708,27 +550,10 @@ def _uniform_sampling(self, fraction: float) -> UnorderedIR:
             columns=columns,
         )
 
-    # Unsupported operations, need ordering
-    def project_window_op(
-        self,
-        column_name: str,
-        op: agg_ops.WindowOp,
-        window_spec: WindowSpec,
-        output_name=None,
-        *,
-        never_skip_nulls=False,
-        skip_reproject_unsafe: bool = False,
-    ) -> OrderedIR:
-        raise ValueError("Window ops must be compiled in ordered mode")
-
-    def promote_offsets(self, col_id: str):
-        raise ValueError("Window ops must be compiled in ordered mode")
-
     ## Helpers
     def _set_or_replace_by_id(
         self, id: str, new_value: ibis_types.Value
     ) -> UnorderedIR:
-        """Safely assign by id while maintaining ordering integrity."""
         builder = self.builder()
         if id in self.column_ids:
             builder.columns = [
@@ -773,7 +598,7 @@ def build(self) -> UnorderedIR:
             )
 
 
-class OrderedIR(BaseIbisIR, CompiledArrayValue):
+class OrderedIR(BaseIbisIR):
     """Immutable BigQuery DataFrames expression tree.
 
     Note: Usage of this class is considered to be private and subject to change
@@ -935,62 +760,6 @@ def reversed(self) -> OrderedIR:
         expr_builder.ordering = self._ordering.with_reverse()
         return expr_builder.build()
 
-    def project_unary_op(
-        self, column_name: str, op: ops.UnaryOp, output_name=None
-    ) -> OrderedIR:
-        value = op._as_ibis(self._get_ibis_column(column_name)).name(
-            output_name or column_name
-        )
-        return self._set_or_replace_by_id(output_name or column_name, value)
-
-    def project_binary_op(
-        self,
-        left_column_id: str,
-        right_column_id: str,
-        op: ops.BinaryOp,
-        output_column_id: str,
-    ) -> OrderedIR:
-        value = op(
-            self._get_ibis_column(left_column_id),
-            self._get_ibis_column(right_column_id),
-        ).name(output_column_id)
-        return self._set_or_replace_by_id(output_column_id, value)
-
-    def project_ternary_op(
-        self,
-        col_id_1: str,
-        col_id_2: str,
-        col_id_3: str,
-        op: ops.TernaryOp,
-        output_column_id: str,
-    ) -> OrderedIR:
-        value = op(
-            self._get_ibis_column(col_id_1),
-            self._get_ibis_column(col_id_2),
-            self._get_ibis_column(col_id_3),
-        ).name(output_column_id)
-        return self._set_or_replace_by_id(output_column_id, value)
-
-    def assign(self, source_id: str, destination_id: str) -> OrderedIR:
-        return self._set_or_replace_by_id(
-            destination_id, self._get_ibis_column(source_id)
-        )
-
-    def assign_constant(
-        self,
-        destination_id: str,
-        value: typing.Any,
-        dtype: typing.Optional[bigframes.dtypes.Dtype],
-    ) -> OrderedIR:
-        # TODO(b/281587571): Solve scalar constant aggregation problem w/Ibis.
-        ibis_value = bigframes.dtypes.literal_to_ibis_scalar(value, dtype)
-        if ibis_value is None:
-            raise NotImplementedError(
-                f"Type not supported as scalar value {type(value)}. {constants.FEEDBACK_LINK}"
-            )
-        expr = self._set_or_replace_by_id(destination_id, ibis_value)
-        return expr._reproject_to_table()
-
     def _uniform_sampling(self, fraction: float) -> OrderedIR:
         """Sampling the table on given fraction.
 
@@ -1012,6 +781,9 @@ def _uniform_sampling(self, fraction: float) -> OrderedIR:
         )
 
     def promote_offsets(self, col_id: str) -> OrderedIR:
+        """
+        Convenience function to promote copy of column offsets to a value column. Can be used to reset index.
+        """
         # Special case: offsets already exist
         ordering = self._ordering
 
@@ -1038,19 +810,6 @@ def select_columns(self, column_ids: typing.Sequence[str]) -> OrderedIR:
         new_expr = builder.build()
         return new_expr
 
-    def aggregate(
-        self,
-        aggregations: typing.Sequence[typing.Tuple[str, agg_ops.AggregateOp, str]],
-        by_column_ids: typing.Sequence[str] = (),
-        dropna: bool = True,
-    ) -> OrderedIR:
-        return self.to_unordered().aggregate(aggregations, by_column_ids, dropna)
-
-    def corr_aggregate(
-        self, corr_aggregations: typing.Sequence[typing.Tuple[str, str, str]]
-    ) -> OrderedIR:
-        return self.to_unordered().corr_aggregate(corr_aggregations)
-
     ## Methods that only work with ordering
     def project_window_op(
         self,
@@ -1062,6 +821,15 @@ def project_window_op(
         never_skip_nulls=False,
         skip_reproject_unsafe: bool = False,
     ) -> OrderedIR:
+        """
+        Creates a new expression based on this expression with unary operation applied to one column.
+        column_name: the id of the input column present in the expression
+        op: the windowable operator to apply to the input column
+        window_spec: a specification of the window over which to apply the operator
+        output_name: the id to assign to the output of the operator, by default will replace input col if distinct output id not provided
+        never_skip_nulls: will disable null skipping for operators that would otherwise do so
+        skip_reproject_unsafe: skips the reprojection step, can be used when performing many non-dependent window operations, user responsible for not nesting window expressions, or using outputs as join, filter or aggregation keys before a reprojection
+        """
         column = typing.cast(ibis_types.Column, self._get_ibis_column(column_name))
         window = self._ibis_window_from_spec(window_spec, allow_ties=op.handles_ties)
 
@@ -1373,26 +1141,7 @@ def _to_ibis_expr(
             table = table.filter(ibis.random() < ibis.literal(fraction))
         return table
 
-    def _set_or_replace_by_id(self, id: str, new_value: ibis_types.Value) -> OrderedIR:
-        """Safely assign by id while maintaining ordering integrity."""
-        # TODO: Split into explicit set and replace methods
-        ordering_col_ids = [
-            col_ref.column_id for col_ref in self._ordering.ordering_value_columns
-        ]
-        if id in ordering_col_ids:
-            return self._hide_column(id)._set_or_replace_by_id(id, new_value)
-
-        builder = self.builder()
-        if id in self.column_ids:
-            builder.columns = [
-                val if (col_id != id) else new_value.name(id)
-                for col_id, val in zip(self.column_ids, self._columns)
-            ]
-        else:
-            builder.columns = [*self.columns, new_value.name(id)]
-        return builder.build()
-
-    def filter(self, predicate_id: str, keep_null: bool = False) -> CompiledArrayValue:
+    def filter(self, predicate_id: str, keep_null: bool = False) -> OrderedIR:
         condition = typing.cast(
             ibis_types.BooleanValue, self._get_ibis_column(predicate_id)
         )
@@ -1412,6 +1161,25 @@ def _filter(self, predicate_value: ibis_types.BooleanValue) -> OrderedIR:
         expr.predicates = [*self._predicates, predicate_value]
         return expr.build()
 
+    def _set_or_replace_by_id(self, id: str, new_value: ibis_types.Value) -> OrderedIR:
+        """Safely assign by id while maintaining ordering integrity."""
+        # TODO: Split into explicit set and replace methods
+        ordering_col_ids = [
+            col_ref.column_id for col_ref in self._ordering.ordering_value_columns
+        ]
+        if id in ordering_col_ids:
+            return self._hide_column(id)._set_or_replace_by_id(id, new_value)
+
+        builder = self.builder()
+        if id in self.column_ids:
+            builder.columns = [
+                val if (col_id != id) else new_value.name(id)
+                for col_id, val in zip(self.column_ids, self._columns)
+            ]
+        else:
+            builder.columns = [*self.columns, new_value.name(id)]
+        return builder.build()
+
     ## Ordering specific helpers
     def _get_any_column(self, key: str) -> ibis_types.Value:
         """Gets the Ibis expression for a given column. Will also get hidden columns."""
diff --git a/bigframes/core/compile/compiler.py b/bigframes/core/compile/compiler.py
index 5959695b30..7202042869 100644
--- a/bigframes/core/compile/compiler.py
+++ b/bigframes/core/compile/compiler.py
@@ -92,10 +92,11 @@ def compile_drop(node: nodes.DropColumnsNode, ordered: bool = True):
 @_compile_node.register
 def compile_readlocal(node: nodes.ReadLocalNode, ordered: bool = True):
     array_as_pd = pd.read_feather(io.BytesIO(node.feather_bytes))
+    ordered_ir = compiled.OrderedIR.from_pandas(array_as_pd)
     if ordered:
-        return compiled.OrderedIR.from_pandas(array_as_pd)
+        return ordered_ir
     else:
-        return compiled.UnorderedIR.from_pandas(array_as_pd)
+        ordered_ir.to_unordered()
 
 
 @_compile_node.register
@@ -116,7 +117,7 @@ def compile_readgbq(node: nodes.ReadGbqNode, ordered: bool = True):
 
 @_compile_node.register
 def compile_promote_offsets(node: nodes.PromoteOffsetsNode, ordered: bool = True):
-    result = compile_node(node.child, True).promote_offsets(node.col_id)
+    result = compile_ordered(node.child).promote_offsets(node.col_id)
     return result if ordered else result.to_unordered()
 
 
@@ -127,12 +128,18 @@ def compile_filter(node: nodes.FilterNode, ordered: bool = True):
 
 @_compile_node.register
 def compile_orderby(node: nodes.OrderByNode, ordered: bool = True):
-    return compile_node(node.child, ordered).order_by(node.by, node.stable)
+    if ordered:
+        return compile_ordered(node.child).order_by(node.by, node.stable)
+    else:
+        return compile_unordered(node.child)
 
 
 @_compile_node.register
 def compile_reversed(node: nodes.ReversedNode, ordered: bool = True):
-    return compile_node(node.child, ordered).reversed()
+    if ordered:
+        return compile_ordered(node.child).reversed()
+    else:
+        return compile_unordered(node.child)
 
 
 @_compile_node.register
@@ -168,7 +175,7 @@ def compile_concat(node: nodes.ConcatNode, ordered: bool = True):
 
 @_compile_node.register
 def compile_aggregate(node: nodes.AggregateNode, ordered: bool = True):
-    result = compile_node(node.child, False).aggregate(
+    result = compile_unordered(node.child).aggregate(
         node.aggregations, node.by_column_ids, node.dropna
     )
     return result if ordered else result.to_unordered()
@@ -176,13 +183,13 @@ def compile_aggregate(node: nodes.AggregateNode, ordered: bool = True):
 
 @_compile_node.register
 def compile_corr(node: nodes.CorrNode, ordered: bool = True):
-    result = compile_node(node.child, False).corr_aggregate(node.corr_aggregations)
+    result = compile_unordered(node.child).corr_aggregate(node.corr_aggregations)
     return result if ordered else result.to_unordered()
 
 
 @_compile_node.register
 def compile_window(node: nodes.WindowOpNode, ordered: bool = True):
-    result = compile_node(node.child, True).project_window_op(
+    result = compile_ordered(node.child).project_window_op(
         node.column_name,
         node.op,
         node.window_spec,

From 57f8bc23570ea190cda6a4673d1c191dc67d3f52 Mon Sep 17 00:00:00 2001
From: Trevor Bergeron <tbergeron@google.com>
Date: Tue, 7 Nov 2023 01:42:38 +0000
Subject: [PATCH 07/11] fix missing return statement

---
 bigframes/core/compile/compiler.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bigframes/core/compile/compiler.py b/bigframes/core/compile/compiler.py
index 7202042869..feba392305 100644
--- a/bigframes/core/compile/compiler.py
+++ b/bigframes/core/compile/compiler.py
@@ -96,7 +96,7 @@ def compile_readlocal(node: nodes.ReadLocalNode, ordered: bool = True):
     if ordered:
         return ordered_ir
     else:
-        ordered_ir.to_unordered()
+        return ordered_ir.to_unordered()
 
 
 @_compile_node.register

From 43430fc3920187c1459480430247d5b6627fd3cd Mon Sep 17 00:00:00 2001
From: Trevor Bergeron <tbergeron@google.com>
Date: Tue, 7 Nov 2023 01:57:27 +0000
Subject: [PATCH 08/11] fix unary op bug

---
 bigframes/core/__init__.py         |  4 +---
 bigframes/core/compile/compiled.py | 14 +++++++++-----
 bigframes/core/compile/compiler.py |  2 +-
 3 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/bigframes/core/__init__.py b/bigframes/core/__init__.py
index 1c291a69c2..931174cb56 100644
--- a/bigframes/core/__init__.py
+++ b/bigframes/core/__init__.py
@@ -99,9 +99,7 @@ def _compile_unordered(self) -> compiled.UnorderedIR:
     def shape(self) -> typing.Tuple[int, int]:
         """Returns dimensions as (length, width) tuple."""
         width = len(self._compile().columns)
-        count_expr = (
-            self._compile_unordered()._to_ibis_expr(ordering_mode="unordered").count()
-        )
+        count_expr = self._compile_unordered()._to_ibis_expr().count()
 
         # Support in-memory engines for hermetic unit tests.
         if not self.node.session:
diff --git a/bigframes/core/compile/compiled.py b/bigframes/core/compile/compiled.py
index 6fad1a7645..4ba5e6bd08 100644
--- a/bigframes/core/compile/compiled.py
+++ b/bigframes/core/compile/compiled.py
@@ -138,11 +138,17 @@ def _reproject_to_table(self: T) -> T:
         ...
 
     def project_unary_op(
-        self: T, column_name: str, op: ops.UnaryOp, output_name=None
+        self: T,
+        input_column_id: str,
+        op: ops.UnaryOp,
+        output_column_id: typing.Optional[str] = None,
     ) -> T:
         """Creates a new expression based on this expression with unary operation applied to one column."""
-        value = op._as_ibis(self._get_ibis_column(column_name)).name(output_name)
-        return self._set_or_replace_by_id(output_name, value)
+        result_id = (
+            output_column_id or input_column_id
+        )  # overwrite input if not output id provided
+        value = op._as_ibis(self._get_ibis_column(input_column_id)).name(result_id)
+        return self._set_or_replace_by_id(result_id, value)
 
     def project_binary_op(
         self: T,
@@ -258,7 +264,6 @@ def _to_ibis_expr(
         expose_hidden_cols: bool = False,
         fraction: Optional[float] = None,
         col_id_overrides: typing.Mapping[str, str] = {},
-        **kwargs,
     ):
         """
         Creates an Ibis table expression representing the DataFrame.
@@ -1061,7 +1066,6 @@ def _to_ibis_expr(
         col_id_overrides: typing.Mapping[str, str] = {},
         ordering_mode: Literal["string_encoded", "offset_col", "unordered"],
         order_col_name: Optional[str] = ORDER_ID_COLUMN,
-        **kwargs,
     ):
         """
         Creates an Ibis table expression representing the DataFrame.
diff --git a/bigframes/core/compile/compiler.py b/bigframes/core/compile/compiler.py
index feba392305..662e73a433 100644
--- a/bigframes/core/compile/compiler.py
+++ b/bigframes/core/compile/compiler.py
@@ -50,7 +50,7 @@ def _compile_node(
     node: nodes.BigFrameNode, ordered: bool = True
 ) -> compiled.UnorderedIR:
     """Defines transformation but isn't cached, always use compile_node instead"""
-    raise ValueError(f"Can't compile unnrecognized node: {node}")
+    raise ValueError(f"Can't compile unrecognized node: {node}")
 
 
 @_compile_node.register

From 5fc62fce8f764e5d6ad93ac3b1df68bba6df0d40 Mon Sep 17 00:00:00 2001
From: Trevor Bergeron <tbergeron@google.com>
Date: Tue, 7 Nov 2023 02:40:34 +0000
Subject: [PATCH 09/11] fix flaky test, add more unordered tests

---
 tests/system/large/ml/test_pipeline.py |  2 +-
 tests/system/small/test_dataframe.py   | 15 ++++++++++++---
 tests/system/small/test_pandas.py      | 13 ++++++++++---
 tests/system/small/test_series.py      | 16 ++++++++++------
 4 files changed, 33 insertions(+), 13 deletions(-)

diff --git a/tests/system/large/ml/test_pipeline.py b/tests/system/large/ml/test_pipeline.py
index 3197320047..3e56954058 100644
--- a/tests/system/large/ml/test_pipeline.py
+++ b/tests/system/large/ml/test_pipeline.py
@@ -555,7 +555,7 @@ def test_pipeline_standard_scaler_kmeans_fit_score_predict(
         ),
     )
     expected.index.name = "observation"
-    assert_pandas_df_equal(result, expected)
+    assert_pandas_df_equal(result, expected, ignore_order=True)
 
 
 def test_pipeline_columntransformer_fit_predict(session, penguins_df_default_index):
diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py
index 269f1bfc65..e5dae4b250 100644
--- a/tests/system/small/test_dataframe.py
+++ b/tests/system/small/test_dataframe.py
@@ -2560,16 +2560,25 @@ def test_df_skew_too_few_values(scalars_dfs):
     pd.testing.assert_series_equal(pd_result, bf_result, check_index_type=False)
 
 
-def test_df_skew(scalars_dfs):
+@pytest.mark.parametrize(
+    ("ordered"),
+    [
+        (True),
+        (False),
+    ],
+)
+def test_df_skew(scalars_dfs, ordered):
     columns = ["float64_col", "int64_col"]
     scalars_df, scalars_pandas_df = scalars_dfs
-    bf_result = scalars_df[columns].skew().to_pandas()
+    bf_result = scalars_df[columns].skew().to_pandas(ordered=ordered)
     pd_result = scalars_pandas_df[columns].skew()
 
     # Pandas may produce narrower numeric types, but bigframes always produces Float64
     pd_result = pd_result.astype("Float64")
 
-    pd.testing.assert_series_equal(pd_result, bf_result, check_index_type=False)
+    assert_series_equal(
+        pd_result, bf_result, check_index_type=False, ignore_order=not ordered
+    )
 
 
 def test_df_kurt_too_few_values(scalars_dfs):
diff --git a/tests/system/small/test_pandas.py b/tests/system/small/test_pandas.py
index eafe5e00b5..8795a67e2a 100644
--- a/tests/system/small/test_pandas.py
+++ b/tests/system/small/test_pandas.py
@@ -19,13 +19,20 @@
 from tests.system.utils import assert_pandas_df_equal
 
 
-def test_concat_dataframe(scalars_dfs):
+@pytest.mark.parametrize(
+    ("ordered"),
+    [
+        (True),
+        (False),
+    ],
+)
+def test_concat_dataframe(scalars_dfs, ordered):
     scalars_df, scalars_pandas_df = scalars_dfs
     bf_result = bpd.concat(11 * [scalars_df])
-    bf_result = bf_result.to_pandas()
+    bf_result = bf_result.to_pandas(ordered=ordered)
     pd_result = pd.concat(11 * [scalars_pandas_df])
 
-    pd.testing.assert_frame_equal(bf_result, pd_result)
+    assert_pandas_df_equal(bf_result, pd_result, ignore_order=not ordered)
 
 
 def test_concat_series(scalars_dfs):
diff --git a/tests/system/small/test_series.py b/tests/system/small/test_series.py
index 96293fb7bb..66d1f02a45 100644
--- a/tests/system/small/test_series.py
+++ b/tests/system/small/test_series.py
@@ -2188,21 +2188,25 @@ def test_where_with_default(scalars_df_index, scalars_pandas_df_index):
     )
 
 
-def test_clip(scalars_df_index, scalars_pandas_df_index):
+@pytest.mark.parametrize(
+    ("ordered"),
+    [
+        (True),
+        (False),
+    ],
+)
+def test_clip(scalars_df_index, scalars_pandas_df_index, ordered):
     col_bf = scalars_df_index["int64_col"]
     lower_bf = scalars_df_index["int64_too"] - 1
     upper_bf = scalars_df_index["int64_too"] + 1
-    bf_result = col_bf.clip(lower_bf, upper_bf).to_pandas()
+    bf_result = col_bf.clip(lower_bf, upper_bf).to_pandas(ordered=ordered)
 
     col_pd = scalars_pandas_df_index["int64_col"]
     lower_pd = scalars_pandas_df_index["int64_too"] - 1
     upper_pd = scalars_pandas_df_index["int64_too"] + 1
     pd_result = col_pd.clip(lower_pd, upper_pd)
 
-    pd.testing.assert_series_equal(
-        bf_result,
-        pd_result,
-    )
+    assert_series_equal(bf_result, pd_result, ignore_order=not ordered)
 
 
 def test_clip_filtered_two_sided(scalars_df_index, scalars_pandas_df_index):

From 8b7afb5d60747d4c53a6faaee5247c0b0e1e3be0 Mon Sep 17 00:00:00 2001
From: Trevor Bergeron <tbergeron@google.com>
Date: Wed, 8 Nov 2023 22:46:55 +0000
Subject: [PATCH 10/11] rename _compile to _compile_unordered for arrayvalue

---
 bigframes/core/__init__.py | 12 ++++++------
 tests/unit/test_core.py    | 24 ++++++++++++++----------
 2 files changed, 20 insertions(+), 16 deletions(-)

diff --git a/bigframes/core/__init__.py b/bigframes/core/__init__.py
index dd5ee5351b..63f36d4ddd 100644
--- a/bigframes/core/__init__.py
+++ b/bigframes/core/__init__.py
@@ -79,7 +79,7 @@ def from_pandas(cls, pd_df: pandas.DataFrame):
 
     @property
     def column_ids(self) -> typing.Sequence[str]:
-        return self._compile().column_ids
+        return self._compile_ordered().column_ids
 
     @property
     def session(self) -> Session:
@@ -89,9 +89,9 @@ def session(self) -> Session:
         return self.node.session[0] if required_session else get_global_session()
 
     def get_column_type(self, key: str) -> bigframes.dtypes.Dtype:
-        return self._compile().get_column_type(key)
+        return self._compile_ordered().get_column_type(key)
 
-    def _compile(self) -> compiled.OrderedIR:
+    def _compile_ordered(self) -> compiled.OrderedIR:
         return compiler.compile_ordered(self.node)
 
     def _compile_unordered(self) -> compiled.UnorderedIR:
@@ -99,7 +99,7 @@ def _compile_unordered(self) -> compiled.UnorderedIR:
 
     def shape(self) -> typing.Tuple[int, int]:
         """Returns dimensions as (length, width) tuple."""
-        width = len(self._compile().columns)
+        width = len(self._compile_unordered().columns)
         count_expr = self._compile_unordered()._to_ibis_expr().count()
 
         # Support in-memory engines for hermetic unit tests.
@@ -126,7 +126,7 @@ def to_sql(
         sorted: bool = False,
     ) -> str:
         if sorted or offset_column:
-            return self._compile().to_sql(
+            return self._compile_ordered().to_sql(
                 offset_column=offset_column,
                 col_id_overrides=col_id_overrides,
                 sorted=sorted,
@@ -161,7 +161,7 @@ def start_query(
 
     def cached(self, cluster_cols: typing.Sequence[str]) -> ArrayValue:
         """Write the ArrayValue to a session table and create a new block object that references it."""
-        compiled_value = self._compile()
+        compiled_value = self._compile_ordered()
         ibis_expr = compiled_value._to_ibis_expr(
             ordering_mode="unordered", expose_hidden_cols=True
         )
diff --git a/tests/unit/test_core.py b/tests/unit/test_core.py
index f223bd416c..623448b3aa 100644
--- a/tests/unit/test_core.py
+++ b/tests/unit/test_core.py
@@ -49,7 +49,7 @@ def test_arrayvalue_constructor_from_ibis_table_adds_all_columns():
         ordering=ordering,
         hidden_ordering_columns=(),
     )
-    assert actual._compile()._table is ibis_table
+    assert actual._compile_ordered()._table is ibis_table
     assert len(actual.column_ids) == 3
 
 
@@ -83,7 +83,7 @@ def test_arrayvalue_with_get_column():
         ),
         total_ordering_columns=["col1"],
     )
-    col1 = value._compile()._get_ibis_column("col1")
+    col1 = value._compile_ordered()._get_ibis_column("col1")
     assert isinstance(col1, ibis_types.Value)
     assert col1.get_name() == "col1"
     assert col1.type().is_int64()
@@ -100,7 +100,7 @@ def test_arrayvalues_to_ibis_expr_with_get_column():
         ),
         total_ordering_columns=["col1"],
     )
-    expr = value._compile()._get_ibis_column("col1")
+    expr = value._compile_ordered()._get_ibis_column("col1")
     assert expr.get_name() == "col1"
     assert expr.type().is_int64()
 
@@ -117,7 +117,7 @@ def test_arrayvalues_to_ibis_expr_with_concat():
         total_ordering_columns=["col1"],
     )
     expr = value.concat([value])
-    actual = expr._compile()._to_ibis_expr(ordering_mode="unordered")
+    actual = expr._compile_ordered()._to_ibis_expr(ordering_mode="unordered")
     assert len(actual.columns) == 3
     # TODO(ashleyxu, b/299631930): test out the union expression
     assert actual.columns[0] == "column_0"
@@ -136,8 +136,8 @@ def test_arrayvalues_to_ibis_expr_with_project_unary_op():
         ),
         total_ordering_columns=["col1"],
     )
-    expr = value.project_unary_op("col1", ops.AsTypeOp("string"))._compile()
-    assert value._compile().columns[0].type().is_int64()
+    expr = value.project_unary_op("col1", ops.AsTypeOp("string"))._compile_ordered()
+    assert value._compile_ordered().columns[0].type().is_int64()
     assert expr.columns[0].type().is_string()
 
 
@@ -152,7 +152,9 @@ def test_arrayvalues_to_ibis_expr_with_project_binary_op():
         ),
         total_ordering_columns=["col1"],
     )
-    expr = value.project_binary_op("col2", "col3", ops.add_op, "col4")._compile()
+    expr = value.project_binary_op(
+        "col2", "col3", ops.add_op, "col4"
+    )._compile_ordered()
     assert expr.columns[3].type().is_float64()
     actual = expr._to_ibis_expr(ordering_mode="unordered")
     assert len(expr.columns) == 4
@@ -173,7 +175,7 @@ def test_arrayvalues_to_ibis_expr_with_project_ternary_op():
     )
     expr = value.project_ternary_op(
         "col2", "col3", "col4", ops.where_op, "col5"
-    )._compile()
+    )._compile_ordered()
     assert expr.columns[4].type().is_float64()
     actual = expr._to_ibis_expr(ordering_mode="unordered")
     assert len(expr.columns) == 5
@@ -195,7 +197,7 @@ def test_arrayvalue_to_ibis_expr_with_aggregate():
         aggregations=(("col1", agg_ops.sum_op, "col4"),),
         by_column_ids=["col1"],
         dropna=False,
-    )._compile()
+    )._compile_ordered()
     actual = expr._to_ibis_expr(ordering_mode="unordered")
     assert len(expr.columns) == 2
     assert actual.columns[0] == "col1"
@@ -214,7 +216,9 @@ def test_arrayvalue_to_ibis_expr_with_corr_aggregate():
         ),
         total_ordering_columns=["col1"],
     )
-    expr = value.corr_aggregate(corr_aggregations=[("col1", "col3", "col4")])._compile()
+    expr = value.corr_aggregate(
+        corr_aggregations=[("col1", "col3", "col4")]
+    )._compile_ordered()
     actual = expr._to_ibis_expr(ordering_mode="unordered")
     assert len(expr.columns) == 1
     assert actual.columns[0] == "col4"

From 146b74cec1583c12e0761d478bfb882900c117f5 Mon Sep 17 00:00:00 2001
From: Trevor Bergeron <tbergeron@google.com>
Date: Wed, 8 Nov 2023 23:05:42 +0000
Subject: [PATCH 11/11] fix mypy issue on join how arg

---
 bigframes/core/compile/single_column.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bigframes/core/compile/single_column.py b/bigframes/core/compile/single_column.py
index c193624e6d..a9088feb49 100644
--- a/bigframes/core/compile/single_column.py
+++ b/bigframes/core/compile/single_column.py
@@ -212,7 +212,7 @@ def join_by_column_unordered(
             left_table,
             right_table,
             predicates=join_conditions,
-            how=how,
+            how=how,  # type: ignore
         )
         # We could filter out the original join columns, but predicates/ordering
         # might still reference them in implicit joins.