From 21b044f63d58c73d26080e6e1559d70f808fc70d Mon Sep 17 00:00:00 2001
From: Tim Swast <swast@google.com>
Date: Thu, 28 Sep 2023 17:28:20 -0500
Subject: [PATCH 01/11] feat: support STRUCT data type with
 `Series.struct.field` to extract subfields

---
 bigframes/dtypes.py                           | 66 +++++++++++++++++--
 bigframes/operations/structs.py               | 47 +++++++++++++
 .../pandas/core/arrays/__init__.py            |  0
 .../pandas/core/arrays/arrow/__init__.py      |  0
 .../pandas/core/arrays/arrow/accessors.py     | 63 ++++++++++++++++++
 5 files changed, 172 insertions(+), 4 deletions(-)
 create mode 100644 bigframes/operations/structs.py
 create mode 100644 third_party/bigframes_vendored/pandas/core/arrays/__init__.py
 create mode 100644 third_party/bigframes_vendored/pandas/core/arrays/arrow/__init__.py
 create mode 100644 third_party/bigframes_vendored/pandas/core/arrays/arrow/accessors.py

diff --git a/bigframes/dtypes.py b/bigframes/dtypes.py
index 271b8aa2f2..644fda0913 100644
--- a/bigframes/dtypes.py
+++ b/bigframes/dtypes.py
@@ -84,10 +84,10 @@
 
 BIDIRECTIONAL_MAPPINGS: Iterable[Tuple[IbisDtype, Dtype]] = (
     (ibis_dtypes.boolean, pd.BooleanDtype()),
+    (ibis_dtypes.date, pd.ArrowDtype(pa.date32())),
     (ibis_dtypes.float64, pd.Float64Dtype()),
     (ibis_dtypes.int64, pd.Int64Dtype()),
     (ibis_dtypes.string, pd.StringDtype(storage="pyarrow")),
-    (ibis_dtypes.date, pd.ArrowDtype(pa.date32())),
     (ibis_dtypes.time, pd.ArrowDtype(pa.time64("us"))),
     (ibis_dtypes.Timestamp(timezone=None), pd.ArrowDtype(pa.timestamp("us"))),
     (
@@ -100,6 +100,19 @@
     pandas: ibis for ibis, pandas in BIDIRECTIONAL_MAPPINGS
 }
 
+IBIS_TO_ARROW: Dict[ibis_dtypes.DataType, pa.DataType] = {
+    ibis_dtypes.boolean: pa.bool_(),
+    ibis_dtypes.date: pa.date32(),
+    ibis_dtypes.float64: pa.float64(),
+    ibis_dtypes.int64: pa.int64(),
+    ibis_dtypes.string: pa.string(),
+    ibis_dtypes.time: pa.time64("us"),
+    ibis_dtypes.Timestamp(timezone=None): pa.timestamp("us"),
+    ibis_dtypes.Timestamp(timezone="UTC"): pa.timestamp("us", tz="UTC"),
+}
+
+ARROW_TO_IBIS = {arrow: ibis for ibis, arrow in IBIS_TO_ARROW.items()}
+
 IBIS_TO_BIGFRAMES: Dict[ibis_dtypes.DataType, Union[Dtype, np.dtype[Any]]] = {
     ibis: pandas for ibis, pandas in BIDIRECTIONAL_MAPPINGS
 }
@@ -148,11 +161,12 @@ def ibis_dtype_to_bigframes_dtype(
     # Special cases: Ibis supports variations on these types, but currently
     # our IO returns them as objects. Eventually, we should support them as
     # ArrowDType (and update the IO accordingly)
-    if isinstance(ibis_dtype, ibis_dtypes.Array) or isinstance(
-        ibis_dtype, ibis_dtypes.Struct
-    ):
+    if isinstance(ibis_dtype, ibis_dtypes.Array):
         return np.dtype("O")
 
+    if isinstance(ibis_dtype, ibis_dtypes.Struct):
+        return pd.ArrowDtype(ibis_dtype_to_arrow_dtype(ibis_dtype))
+
     if ibis_dtype in IBIS_TO_BIGFRAMES:
         return IBIS_TO_BIGFRAMES[ibis_dtype]
     elif isinstance(ibis_dtype, ibis_dtypes.Null):
@@ -164,6 +178,29 @@ def ibis_dtype_to_bigframes_dtype(
         )
 
 
+def ibis_dtype_to_arrow_dtype(ibis_dtype: ibis_dtypes.DataType) -> pa.DataType:
+    if isinstance(ibis_dtype, ibis_dtypes.Array):
+        return pa.list_(ibis_dtype_to_arrow_dtype(ibis_dtype.value_type))
+
+    if isinstance(ibis_dtype, ibis_dtypes.Struct):
+        return pa.struct(
+            [
+                (name, ibis_dtype_to_arrow_dtype(dtype))
+                for name, dtype in ibis_dtype.fields.items()
+            ]
+        )
+
+    if ibis_dtype in IBIS_TO_ARROW:
+        return IBIS_TO_ARROW[ibis_dtype]
+    elif isinstance(ibis_dtype, ibis_dtypes.Null):
+        # Fallback to STRING for NULL values for most flexibility in SQL.
+        return IBIS_TO_ARROW[ibis_dtypes.string]
+    else:
+        raise ValueError(
+            f"Unexpected Ibis data type {ibis_dtype}. {constants.FEEDBACK_LINK}"
+        )
+
+
 def ibis_value_to_canonical_type(value: ibis_types.Value) -> ibis_types.Value:
     """Converts an Ibis expression to canonical type.
 
@@ -187,6 +224,24 @@ def ibis_table_to_canonical_types(table: ibis_types.Table) -> ibis_types.Table:
     return table.select(*casted_columns)
 
 
+def arrow_dtype_to_ibis_dtype(arrow_dtype: pa.DataType) -> ibis_dtypes.DataType:
+    if pa.types.is_struct(arrow_dtype):
+        struct_dtype = typing.cast(pa.StructType, arrow_dtype)
+        return ibis_dtypes.Struct.from_tuples(
+            [
+                (field.name, arrow_dtype_to_ibis_dtype(field.type))
+                for field in struct_dtype
+            ]
+        )
+
+    if arrow_dtype in ARROW_TO_IBIS:
+        return ARROW_TO_IBIS[arrow_dtype]
+    else:
+        raise ValueError(
+            f"Unexpected Arrow data type {arrow_dtype}. {constants.FEEDBACK_LINK}"
+        )
+
+
 def bigframes_dtype_to_ibis_dtype(
     bigframes_dtype: Union[DtypeString, Dtype, np.dtype[Any]]
 ) -> ibis_dtypes.DataType:
@@ -202,6 +257,9 @@ def bigframes_dtype_to_ibis_dtype(
     Raises:
         ValueError: If passed a dtype not supported by BigQuery DataFrames.
     """
+    if isinstance(bigframes_dtype, pd.ArrowDtype):
+        return arrow_dtype_to_ibis_dtype(bigframes_dtype.pyarrow_dtype)
+
     type_string = str(bigframes_dtype)
     if type_string in BIGFRAMES_STRING_TO_BIGFRAMES:
         bigframes_dtype = BIGFRAMES_STRING_TO_BIGFRAMES[
diff --git a/bigframes/operations/structs.py b/bigframes/operations/structs.py
new file mode 100644
index 0000000000..f12c97981b
--- /dev/null
+++ b/bigframes/operations/structs.py
@@ -0,0 +1,47 @@
+# Copyright 2023 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import annotations
+
+import typing
+
+import ibis.expr.types as ibis_types
+
+import bigframes.dataframe
+import bigframes.operations
+import bigframes.operations.base
+import bigframes.series
+import third_party.bigframes_vendored.pandas.core.arrays.arrow.accessors as vendoracessors
+
+
+class StructField(bigframes.operations.UnaryOp):
+    def __init__(self, name_or_index: str | int):
+        self._name_or_index = name_or_index
+
+    def _as_ibis(self, x: ibis_types.Value):
+        struct_value = typing.cast(ibis_types.StructValue, x)
+        if isinstance(self._name_or_index, str):
+            name = self._name_or_index
+        else:
+            name = struct_value.names[self._name_or_index]
+        return struct_value[name]
+
+
+class StructAccessor(
+    bigframes.operations.base.SeriesMethods, vendoracessors.StructAccessor
+):
+    __doc__ = vendoracessors.StructAccessor.__doc__
+
+    def field(self, name_or_index: str | int) -> bigframes.series.Series:
+        return self._apply_unary_op(StructField(name_or_index))
diff --git a/third_party/bigframes_vendored/pandas/core/arrays/__init__.py b/third_party/bigframes_vendored/pandas/core/arrays/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/third_party/bigframes_vendored/pandas/core/arrays/arrow/__init__.py b/third_party/bigframes_vendored/pandas/core/arrays/arrow/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/third_party/bigframes_vendored/pandas/core/arrays/arrow/accessors.py b/third_party/bigframes_vendored/pandas/core/arrays/arrow/accessors.py
new file mode 100644
index 0000000000..cabb3566ee
--- /dev/null
+++ b/third_party/bigframes_vendored/pandas/core/arrays/arrow/accessors.py
@@ -0,0 +1,63 @@
+# Contains code from https://github.com/pandas-dev/pandas/blob/main/pandas/core/arrays/arrow/accessors.py
+"""Accessors for arrow-backed data."""
+
+from __future__ import annotations
+
+from bigframes import constants
+
+
+class StructAccessor:
+    """
+    Accessor object for structured data properties of the Series values.
+    """
+
+    def field(self, name_or_index: str | int):
+        """
+        Extract a child field of a struct as a Series.
+
+        Parameters
+        ----------
+        name_or_index : str | int
+            Name or index of the child field to extract.
+
+        Returns
+        -------
+        pandas.Series
+            The data corresponding to the selected child field.
+
+        See Also
+        --------
+        Series.struct.explode : Return all child fields as a DataFrame.
+
+        Examples
+        --------
+        >>> import bigframes.pandas as bpd
+        >>> import pyarrow as pa
+        >>> s = bpd.Series(
+        ...     [
+        ...         {"version": 1, "project": "pandas"},
+        ...         {"version": 2, "project": "pandas"},
+        ...         {"version": 1, "project": "numpy"},
+        ...     ],
+        ...     dtype=bpd.ArrowDtype(pa.struct(
+        ...         [("version", pa.int64()), ("project", pa.string())]
+        ...     ))
+        ... )
+
+        Extract by field name.
+
+        >>> s.struct.field("project")
+        0    pandas
+        1    pandas
+        2     numpy
+        Name: project, dtype: string[pyarrow]
+
+        Extract by field index.
+
+        >>> s.struct.field(0)
+        0    1
+        1    2
+        2    1
+        Name: version, dtype: int64[pyarrow]
+        """
+        raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)

From 05105a8de1bd9e1510fa62def8e16849a725c8d8 Mon Sep 17 00:00:00 2001
From: Tim Swast <swast@google.com>
Date: Fri, 29 Sep 2023 10:55:02 -0500
Subject: [PATCH 02/11] implement explode

---
 bigframes/dataframe.py                        | 10 +++++-
 bigframes/operations/base.py                  | 10 +++++-
 bigframes/operations/structs.py               | 18 ++++++++--
 bigframes/series.py                           |  5 +++
 .../pandas/core/arrays/arrow/accessors.py     | 36 +++++++++++++++++++
 5 files changed, 75 insertions(+), 4 deletions(-)

diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py
index 0d357e7c3d..5a3834f84f 100644
--- a/bigframes/dataframe.py
+++ b/bigframes/dataframe.py
@@ -161,7 +161,15 @@ def __init__(
                 columns=columns,  # type:ignore
                 dtype=dtype,  # type:ignore
             )
-            if pd_dataframe.size < MAX_INLINE_DF_SIZE:
+            if (
+                pd_dataframe.size < MAX_INLINE_DF_SIZE
+                # TODO(swast): Workaround data types limitation in inline data.
+                and not any(
+                    dt.pyarrow_dtype
+                    for dt in pd_dataframe.dtypes
+                    if isinstance(dt, pandas.ArrowDtype)
+                )
+            ):
                 self._block = blocks.block_from_local(
                     pd_dataframe, session or bigframes.pandas.get_global_session()
                 )
diff --git a/bigframes/operations/base.py b/bigframes/operations/base.py
index add6af57f4..51eaad18b9 100644
--- a/bigframes/operations/base.py
+++ b/bigframes/operations/base.py
@@ -86,7 +86,15 @@ def __init__(
             if pd_series.name is None:
                 # to_frame will set default numeric column label if unnamed, but we do not support int column label, so must rename
                 pd_dataframe = pd_dataframe.set_axis(["unnamed_col"], axis=1)
-            if pd_dataframe.size < MAX_INLINE_SERIES_SIZE:
+            if (
+                pd_dataframe.size < MAX_INLINE_SERIES_SIZE
+                # TODO(swast): Workaround data types limitation in inline data.
+                and not any(
+                    dt.pyarrow_dtype
+                    for dt in pd_dataframe.dtypes
+                    if isinstance(dt, pd.ArrowDtype)
+                )
+            ):
                 self._block = blocks.block_from_local(
                     pd_dataframe, session or bigframes.pandas.get_global_session()
                 )
diff --git a/bigframes/operations/structs.py b/bigframes/operations/structs.py
index f12c97981b..80d51115d0 100644
--- a/bigframes/operations/structs.py
+++ b/bigframes/operations/structs.py
@@ -35,7 +35,7 @@ def _as_ibis(self, x: ibis_types.Value):
             name = self._name_or_index
         else:
             name = struct_value.names[self._name_or_index]
-        return struct_value[name]
+        return struct_value[name].name(name)
 
 
 class StructAccessor(
@@ -44,4 +44,18 @@ class StructAccessor(
     __doc__ = vendoracessors.StructAccessor.__doc__
 
     def field(self, name_or_index: str | int) -> bigframes.series.Series:
-        return self._apply_unary_op(StructField(name_or_index))
+        series = self._apply_unary_op(StructField(name_or_index))
+        if isinstance(name_or_index, str):
+            name = name_or_index
+        else:
+            struct_field = self._dtype.pyarrow_dtype[name_or_index]
+            name = struct_field.name
+        return series.rename(name)
+
+    def explode(self) -> bigframes.dataframe.DataFrame:
+        import bigframes.pandas
+
+        pa_type = self._dtype.pyarrow_dtype
+        return bigframes.pandas.concat(
+            [self.field(i) for i in range(pa_type.num_fields)], axis="columns"
+        )
diff --git a/bigframes/series.py b/bigframes/series.py
index c1c0cb0537..5efe7b3365 100644
--- a/bigframes/series.py
+++ b/bigframes/series.py
@@ -51,6 +51,7 @@
 import bigframes.operations.base
 import bigframes.operations.datetimes as dt
 import bigframes.operations.strings as strings
+import bigframes.operations.structs as structs
 import third_party.bigframes_vendored.pandas.core.series as vendored_pandas_series
 
 LevelType = typing.Union[str, int]
@@ -118,6 +119,10 @@ def query_job(self) -> Optional[bigquery.QueryJob]:
             self._set_internal_query_job(self._compute_dry_run())
         return self._query_job
 
+    @property
+    def struct(self) -> structs.StructAccessor:
+        return structs.StructAccessor(self._block)
+
     def _set_internal_query_job(self, query_job: bigquery.QueryJob):
         self._query_job = query_job
 
diff --git a/third_party/bigframes_vendored/pandas/core/arrays/arrow/accessors.py b/third_party/bigframes_vendored/pandas/core/arrays/arrow/accessors.py
index cabb3566ee..7268775f25 100644
--- a/third_party/bigframes_vendored/pandas/core/arrays/arrow/accessors.py
+++ b/third_party/bigframes_vendored/pandas/core/arrays/arrow/accessors.py
@@ -61,3 +61,39 @@ def field(self, name_or_index: str | int):
         Name: version, dtype: int64[pyarrow]
         """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
+
+    def explode(self):
+        """
+        Extract all child fields of a struct as a DataFrame.
+
+        Returns
+        -------
+        pandas.DataFrame
+            The data corresponding to all child fields.
+
+        See Also
+        --------
+        Series.struct.field : Return a single child field as a Series.
+
+        Examples
+        --------
+        >>> import bigframes.pandas as bpd
+        >>> import pyarrow as pa
+        >>> s = bpd.Series(
+        ...     [
+        ...         {"version": 1, "project": "pandas"},
+        ...         {"version": 2, "project": "pandas"},
+        ...         {"version": 1, "project": "numpy"},
+        ...     ],
+        ...     dtype=bpd.ArrowDtype(pa.struct(
+        ...         [("version", pa.int64()), ("project", pa.string())]
+        ...     ))
+        ... )
+
+        >>> s.struct.explode()
+           version project
+        0        1  pandas
+        1        2  pandas
+        2        1   numpy
+        """
+        raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)

From 31290425af4b161fc0c395d133540d4592b1c2e4 Mon Sep 17 00:00:00 2001
From: Tim Swast <swast@google.com>
Date: Mon, 2 Oct 2023 10:46:23 -0500
Subject: [PATCH 03/11] fix docstrings

---
 noxfile.py                                    |   2 +-
 .../pandas/core/arrays/arrow/accessors.py     | 129 +++++++++---------
 .../bigframes_vendored/sklearn/__init__.py    |   0
 .../sklearn/ensemble/__init__.py              |   0
 .../bigframes_vendored/xgboost/__init__.py    |   0
 5 files changed, 63 insertions(+), 68 deletions(-)
 create mode 100644 third_party/bigframes_vendored/sklearn/__init__.py
 create mode 100644 third_party/bigframes_vendored/sklearn/ensemble/__init__.py
 create mode 100644 third_party/bigframes_vendored/xgboost/__init__.py

diff --git a/noxfile.py b/noxfile.py
index 033bbfefe4..da9dff92fe 100644
--- a/noxfile.py
+++ b/noxfile.py
@@ -362,7 +362,7 @@ def doctest(session: nox.sessions.Session):
     run_system(
         session=session,
         prefix_name="doctest",
-        extra_pytest_options=("--doctest-modules",),
+        extra_pytest_options=("--doctest-modules", "third_party"),
         test_folder="bigframes",
         check_cov=True,
     )
diff --git a/third_party/bigframes_vendored/pandas/core/arrays/arrow/accessors.py b/third_party/bigframes_vendored/pandas/core/arrays/arrow/accessors.py
index 7268775f25..8e3ea06a3d 100644
--- a/third_party/bigframes_vendored/pandas/core/arrays/arrow/accessors.py
+++ b/third_party/bigframes_vendored/pandas/core/arrays/arrow/accessors.py
@@ -15,50 +15,45 @@ def field(self, name_or_index: str | int):
         """
         Extract a child field of a struct as a Series.
 
-        Parameters
-        ----------
-        name_or_index : str | int
-            Name or index of the child field to extract.
-
-        Returns
-        -------
-        pandas.Series
-            The data corresponding to the selected child field.
-
-        See Also
-        --------
-        Series.struct.explode : Return all child fields as a DataFrame.
-
-        Examples
-        --------
-        >>> import bigframes.pandas as bpd
-        >>> import pyarrow as pa
-        >>> s = bpd.Series(
-        ...     [
-        ...         {"version": 1, "project": "pandas"},
-        ...         {"version": 2, "project": "pandas"},
-        ...         {"version": 1, "project": "numpy"},
-        ...     ],
-        ...     dtype=bpd.ArrowDtype(pa.struct(
-        ...         [("version", pa.int64()), ("project", pa.string())]
-        ...     ))
-        ... )
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> import pyarrow as pa
+            >>> bpd.options.display.progress_bar = None
+            >>> s = bpd.Series(
+            ...     [
+            ...         {"version": 1, "project": "pandas"},
+            ...         {"version": 2, "project": "pandas"},
+            ...         {"version": 1, "project": "numpy"},
+            ...     ],
+            ...     dtype=bpd.ArrowDtype(pa.struct(
+            ...         [("version", pa.int64()), ("project", pa.string())]
+            ...     ))
+            ... )
 
         Extract by field name.
 
-        >>> s.struct.field("project")
-        0    pandas
-        1    pandas
-        2     numpy
-        Name: project, dtype: string[pyarrow]
+            >>> s.struct.field("project")
+            0    pandas
+            1    pandas
+            2     numpy
+            Name: project, dtype: string
 
         Extract by field index.
 
-        >>> s.struct.field(0)
-        0    1
-        1    2
-        2    1
-        Name: version, dtype: int64[pyarrow]
+            >>> s.struct.field(0)
+            0    1
+            1    2
+            2    1
+            Name: version, dtype: Int64
+
+        Args:
+            name_or_index:
+                Name (str) or index (int) of the child field to extract.
+
+        Returns:
+            Series:
+                The data corresponding to the selected child field.
         """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 
@@ -66,34 +61,34 @@ def explode(self):
         """
         Extract all child fields of a struct as a DataFrame.
 
-        Returns
-        -------
-        pandas.DataFrame
-            The data corresponding to all child fields.
-
-        See Also
-        --------
-        Series.struct.field : Return a single child field as a Series.
-
-        Examples
-        --------
-        >>> import bigframes.pandas as bpd
-        >>> import pyarrow as pa
-        >>> s = bpd.Series(
-        ...     [
-        ...         {"version": 1, "project": "pandas"},
-        ...         {"version": 2, "project": "pandas"},
-        ...         {"version": 1, "project": "numpy"},
-        ...     ],
-        ...     dtype=bpd.ArrowDtype(pa.struct(
-        ...         [("version", pa.int64()), ("project", pa.string())]
-        ...     ))
-        ... )
-
-        >>> s.struct.explode()
-           version project
-        0        1  pandas
-        1        2  pandas
-        2        1   numpy
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> import pyarrow as pa
+            >>> bpd.options.display.progress_bar = None
+            >>> s = bpd.Series(
+            ...     [
+            ...         {"version": 1, "project": "pandas"},
+            ...         {"version": 2, "project": "pandas"},
+            ...         {"version": 1, "project": "numpy"},
+            ...     ],
+            ...     dtype=bpd.ArrowDtype(pa.struct(
+            ...         [("version", pa.int64()), ("project", pa.string())]
+            ...     ))
+            ... )
+
+        Extract all child fields.
+
+            >>> s.struct.explode()
+               version project
+            0        1  pandas
+            1        2  pandas
+            2        1   numpy
+            <BLANKLINE>
+            [3 rows x 2 columns]
+
+        Returns:
+            DataFrame:
+                The data corresponding to all child fields.
         """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
diff --git a/third_party/bigframes_vendored/sklearn/__init__.py b/third_party/bigframes_vendored/sklearn/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/third_party/bigframes_vendored/sklearn/ensemble/__init__.py b/third_party/bigframes_vendored/sklearn/ensemble/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/third_party/bigframes_vendored/xgboost/__init__.py b/third_party/bigframes_vendored/xgboost/__init__.py
new file mode 100644
index 0000000000..e69de29bb2

From f4671fce4134bbcd88715a6beff639f47978f478 Mon Sep 17 00:00:00 2001
From: Tim Swast <swast@google.com>
Date: Mon, 2 Oct 2023 10:57:52 -0500
Subject: [PATCH 04/11] add unit tests

---
 tests/unit/test_dtypes.py | 64 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 64 insertions(+)

diff --git a/tests/unit/test_dtypes.py b/tests/unit/test_dtypes.py
index bb8ae570dc..3baff2e1f5 100644
--- a/tests/unit/test_dtypes.py
+++ b/tests/unit/test_dtypes.py
@@ -85,6 +85,70 @@ def test_ibis_float32_raises_unexpected_datatype():
         bigframes.dtypes.ibis_dtype_to_bigframes_dtype(ibis_dtypes.float32)
 
 
+IBIS_ARROW_DTYPES = (
+    (ibis_dtypes.boolean, pa.bool_()),
+    (ibis_dtypes.date, pa.date32()),
+    (ibis_dtypes.Timestamp(), pa.timestamp("us")),
+    (ibis_dtypes.float64, pa.float64()),
+    (
+        ibis_dtypes.Timestamp(timezone="UTC"),
+        pa.timestamp("us", tz="UTC"),
+    ),
+    (
+        ibis_dtypes.Struct.from_tuples(
+            [
+                ("name", ibis_dtypes.string()),
+                ("version", ibis_dtypes.int64()),
+            ]
+        ),
+        pa.struct(
+            [
+                ("name", pa.string()),
+                ("version", pa.int64()),
+            ]
+        ),
+    ),
+    (
+        ibis_dtypes.Struct.from_tuples(
+            [
+                (
+                    "nested",
+                    ibis_dtypes.Struct.from_tuples(
+                        [
+                            ("field", ibis_dtypes.string()),
+                        ]
+                    ),
+                ),
+            ]
+        ),
+        pa.struct(
+            [
+                (
+                    "nested",
+                    pa.struct(
+                        [
+                            ("field", pa.string()),
+                        ]
+                    ),
+                ),
+            ]
+        ),
+    ),
+)
+
+
+@pytest.mark.parametrize(("ibis_dtype", "arrow_dtype"), IBIS_ARROW_DTYPES)
+def test_arrow_dtype_to_ibis_dtype(ibis_dtype, arrow_dtype):
+    result = bigframes.dtypes.arrow_dtype_to_ibis_dtype(arrow_dtype)
+    assert result == ibis_dtype
+
+
+@pytest.mark.parametrize(("ibis_dtype", "arrow_dtype"), IBIS_ARROW_DTYPES)
+def test_ibis_dtype_to_arrow_dtype(ibis_dtype, arrow_dtype):
+    result = bigframes.dtypes.ibis_dtype_to_arrow_dtype(ibis_dtype)
+    assert result == arrow_dtype
+
+
 @pytest.mark.parametrize(
     ["bigframes_dtype", "ibis_dtype"],
     [

From a18370888ef720235b2b75d9bbc788ff727cf123 Mon Sep 17 00:00:00 2001
From: Tim Swast <swast@google.com>
Date: Mon, 2 Oct 2023 11:37:09 -0500
Subject: [PATCH 05/11] update struct dtype tests

---
 tests/system/small/test_dataframe.py | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py
index e71b1430e6..43dfbed426 100644
--- a/tests/system/small/test_dataframe.py
+++ b/tests/system/small/test_dataframe.py
@@ -884,7 +884,19 @@ def test_get_dtypes_array_struct(session):
     dtypes = df.dtypes
     pd.testing.assert_series_equal(
         dtypes,
-        pd.Series({"array_column": np.dtype("O"), "struct_column": np.dtype("O")}),
+        pd.Series(
+            {
+                "array_column": np.dtype("O"),
+                "struct_column": pd.ArrowDtype(
+                    pa.struct(
+                        [
+                            ("string_field", pa.string()),
+                            ("float_field", pa.float64()),
+                        ]
+                    )
+                ),
+            }
+        ),
     )
 
 

From d600a1c7e53a686f14f3959f150be78b5edb4241 Mon Sep 17 00:00:00 2001
From: Tim Swast <swast@google.com>
Date: Mon, 2 Oct 2023 12:49:36 -0500
Subject: [PATCH 06/11] cleanup before doctest

---
 noxfile.py | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/noxfile.py b/noxfile.py
index a113e1fcde..1ce3965d6e 100644
--- a/noxfile.py
+++ b/noxfile.py
@@ -275,6 +275,20 @@ def install_systemtest_dependencies(session, install_test_extra, *constraints):
         session.install("-e", ".", *constraints)
 
 
+def clean_pycache():
+    paths = CURRENT_DIRECTORY.glob("**/__pycache__/**/*")
+    for path in paths:
+        path.unlink()
+
+    paths = CURRENT_DIRECTORY.glob("**/__pycache__")
+    for path in paths:
+        path.rmdir()
+
+    paths = CURRENT_DIRECTORY.glob("**/*.pyc")
+    for path in paths:
+        path.unlink()
+
+
 def run_system(
     session: nox.sessions.Session,
     prefix_name,
@@ -286,6 +300,7 @@ def run_system(
     extra_pytest_options=(),
 ):
     """Run the system test suite."""
+    clean_pycache()
     constraints_path = str(
         CURRENT_DIRECTORY / "testing" / f"constraints-{session.python}.txt"
     )

From 9e90b1919401dd9a3869f07a317f2797b7868423 Mon Sep 17 00:00:00 2001
From: Tim Swast <swast@google.com>
Date: Mon, 2 Oct 2023 12:58:49 -0500
Subject: [PATCH 07/11] alternative workaround for mismatch import error

---
 noxfile.py | 18 +++---------------
 1 file changed, 3 insertions(+), 15 deletions(-)

diff --git a/noxfile.py b/noxfile.py
index 1ce3965d6e..15c87746f4 100644
--- a/noxfile.py
+++ b/noxfile.py
@@ -275,20 +275,6 @@ def install_systemtest_dependencies(session, install_test_extra, *constraints):
         session.install("-e", ".", *constraints)
 
 
-def clean_pycache():
-    paths = CURRENT_DIRECTORY.glob("**/__pycache__/**/*")
-    for path in paths:
-        path.unlink()
-
-    paths = CURRENT_DIRECTORY.glob("**/__pycache__")
-    for path in paths:
-        path.rmdir()
-
-    paths = CURRENT_DIRECTORY.glob("**/*.pyc")
-    for path in paths:
-        path.unlink()
-
-
 def run_system(
     session: nox.sessions.Session,
     prefix_name,
@@ -300,7 +286,6 @@ def run_system(
     extra_pytest_options=(),
 ):
     """Run the system test suite."""
-    clean_pycache()
     constraints_path = str(
         CURRENT_DIRECTORY / "testing" / f"constraints-{session.python}.txt"
     )
@@ -374,6 +359,9 @@ def system_noextras(session: nox.sessions.Session):
 @nox.session(python=SYSTEM_TEST_PYTHON_VERSIONS[-1])
 def doctest(session: nox.sessions.Session):
     """Run the system test suite."""
+    # Workaround https://github.com/pytest-dev/pytest/issues/9567
+    os.environ["PY_IGNORE_IMPORTMISMATCH"] = "1"
+
     run_system(
         session=session,
         prefix_name="doctest",

From e55be81e42a8c8a7f92fe15241ce6929947a558e Mon Sep 17 00:00:00 2001
From: Tim Swast <swast@google.com>
Date: Mon, 2 Oct 2023 13:00:30 -0500
Subject: [PATCH 08/11] alternative workaround for mismatch import error

---
 .kokoro/build.sh | 3 +++
 noxfile.py       | 3 ---
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/.kokoro/build.sh b/.kokoro/build.sh
index a0fa4bc787..58eaa7fedf 100755
--- a/.kokoro/build.sh
+++ b/.kokoro/build.sh
@@ -26,6 +26,9 @@ cd "${PROJECT_ROOT}"
 # Disable buffering, so that the logs stream through.
 export PYTHONUNBUFFERED=1
 
+# Workaround https://github.com/pytest-dev/pytest/issues/9567
+export PY_IGNORE_IMPORTMISMATCH=1
+
 # Debug: show build environment
 env | grep KOKORO
 
diff --git a/noxfile.py b/noxfile.py
index 15c87746f4..a113e1fcde 100644
--- a/noxfile.py
+++ b/noxfile.py
@@ -359,9 +359,6 @@ def system_noextras(session: nox.sessions.Session):
 @nox.session(python=SYSTEM_TEST_PYTHON_VERSIONS[-1])
 def doctest(session: nox.sessions.Session):
     """Run the system test suite."""
-    # Workaround https://github.com/pytest-dev/pytest/issues/9567
-    os.environ["PY_IGNORE_IMPORTMISMATCH"] = "1"
-
     run_system(
         session=session,
         prefix_name="doctest",

From 6c133143f450949295424da5c03b3f96eac7529a Mon Sep 17 00:00:00 2001
From: Tim Swast <swast@google.com>
Date: Mon, 2 Oct 2023 17:07:30 -0500
Subject: [PATCH 09/11] remove dead ibis null to arrow check

---
 bigframes/dtypes.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/bigframes/dtypes.py b/bigframes/dtypes.py
index 85b473ebbd..46a7a1cb50 100644
--- a/bigframes/dtypes.py
+++ b/bigframes/dtypes.py
@@ -192,9 +192,6 @@ def ibis_dtype_to_arrow_dtype(ibis_dtype: ibis_dtypes.DataType) -> pa.DataType:
 
     if ibis_dtype in IBIS_TO_ARROW:
         return IBIS_TO_ARROW[ibis_dtype]
-    elif isinstance(ibis_dtype, ibis_dtypes.Null):
-        # Fallback to STRING for NULL values for most flexibility in SQL.
-        return IBIS_TO_ARROW[ibis_dtypes.string]
     else:
         raise ValueError(
             f"Unexpected Ibis data type {ibis_dtype}. {constants.FEEDBACK_LINK}"

From e70084995f88ebc990dad2e9172047218c62baf3 Mon Sep 17 00:00:00 2001
From: Tim Swast <swast@google.com>
Date: Tue, 3 Oct 2023 15:54:59 -0500
Subject: [PATCH 10/11] feat: use ArrowDtype for STRUCT columns in `to_pandas`

---
 bigframes/core/blocks.py                |  2 ++
 bigframes/dtypes.py                     | 17 +++++++++++++++++
 tests/system/small/test_dataframe_io.py | 23 ++++++++++++++++++++---
 3 files changed, 39 insertions(+), 3 deletions(-)

diff --git a/bigframes/core/blocks.py b/bigframes/core/blocks.py
index 0161d17361..262682b0b0 100644
--- a/bigframes/core/blocks.py
+++ b/bigframes/core/blocks.py
@@ -374,7 +374,9 @@ def _to_dataframe(
         cls, result, schema: typing.Mapping[str, bigframes.dtypes.Dtype]
     ) -> pd.DataFrame:
         """Convert BigQuery data to pandas DataFrame with specific dtypes."""
+        dtypes = bigframes.dtypes.to_pandas_dtypes_overrides(result.schema)
         df = result.to_dataframe(
+            dtypes=dtypes,
             bool_dtype=pd.BooleanDtype(),
             int_dtype=pd.Int64Dtype(),
             float_dtype=pd.Float64Dtype(),
diff --git a/bigframes/dtypes.py b/bigframes/dtypes.py
index 46a7a1cb50..d8b40c6a5a 100644
--- a/bigframes/dtypes.py
+++ b/bigframes/dtypes.py
@@ -19,6 +19,8 @@
 from typing import Any, Dict, Iterable, Literal, Tuple, Union
 
 import geopandas as gpd  # type: ignore
+import google.cloud.bigquery as bigquery
+import google.cloud.bigquery._pandas_helpers
 import ibis
 import ibis.expr.datatypes as ibis_dtypes
 import ibis.expr.types as ibis_types
@@ -401,3 +403,18 @@ def cast_ibis_value(
     raise TypeError(
         f"Unsupported cast {value.type()} to {to_type}. {constants.FEEDBACK_LINK}"
     )
+
+
+def to_pandas_dtypes_overrides(schema: Iterable[bigquery.SchemaField]) -> Dict:
+    """For each STRUCT field, make sure we specify the full type to use."""
+    # TODO(swast): Also override ARRAY fields.
+    dtypes = {}
+    for field in schema:
+        if field.field_type == "RECORD" and field.mode != "REPEATED":
+            # TODO(swast): We're using a private API here. Would likely be
+            # better if we called `to_arrow()` and converted to a pandas
+            # DataFrame ourselves from that.
+            dtypes[field.name] = pd.ArrowDtype(
+                google.cloud.bigquery._pandas_helpers.bq_to_arrow_data_type(field)
+            )
+    return dtypes
diff --git a/tests/system/small/test_dataframe_io.py b/tests/system/small/test_dataframe_io.py
index 3886b85f40..d60083a837 100644
--- a/tests/system/small/test_dataframe_io.py
+++ b/tests/system/small/test_dataframe_io.py
@@ -16,6 +16,7 @@
 
 import google.api_core.exceptions
 import pandas as pd
+import pyarrow as pa
 import pytest
 
 from tests.system.utils import (
@@ -44,7 +45,7 @@ def test_to_pandas_w_correct_dtypes(scalars_df_default_index):
 
 
 def test_to_pandas_array_struct_correct_result(session):
-    """In future, we should support arrays and structs with arrow types.
+    """In future, we should support arrays with arrow types.
     For now we fall back to the current connector behavior of converting
     to Python objects"""
     df = session.read_gbq(
@@ -59,11 +60,27 @@ def test_to_pandas_array_struct_correct_result(session):
     expected = pd.DataFrame(
         {
             "array_column": [[1, 3, 2]],
-            "struct_column": [{"string_field": "a", "float_field": 1.2}],
+            "struct_column": pd.Series(
+                [{"string_field": "a", "float_field": 1.2}],
+                dtype=pd.ArrowDtype(
+                    pa.struct(
+                        [
+                            ("string_field", pa.string()),
+                            ("float_field", pa.float64()),
+                        ]
+                    )
+                ),
+            ),
         }
     )
     expected.index = expected.index.astype("Int64")
-    pd.testing.assert_frame_equal(result, expected)
+    pd.testing.assert_series_equal(result.dtypes, expected.dtypes)
+    pd.testing.assert_series_equal(result["array_column"], expected["array_column"])
+    # assert_series_equal not implemented for struct columns yet. Compare
+    # values as Python objects, instead.
+    pd.testing.assert_series_equal(
+        result["struct_column"].astype("O"), expected["struct_column"].astype("O")
+    )
 
 
 @pytest.mark.parametrize(

From 884ab1647823d5e44fcc2fddf0855f35e18a026c Mon Sep 17 00:00:00 2001
From: Tim Swast <swast@google.com>
Date: Wed, 18 Oct 2023 16:07:21 +0000
Subject: [PATCH 11/11] pull private method into third-party

---
 bigframes/dtypes.py                           |   4 +-
 noxfile.py                                    |   2 +
 .../google_cloud_bigquery/LICENSE             | 202 +++++++++
 .../google_cloud_bigquery/__init__.py         |  13 +
 .../google_cloud_bigquery/_pandas_helpers.py  | 158 +++++++
 .../google_cloud_bigquery/tests/__init__.py   |  13 +
 .../tests/unit/__init__.py                    |  13 +
 .../tests/unit/test_pandas_helpers.py         | 413 ++++++++++++++++++
 8 files changed, 816 insertions(+), 2 deletions(-)
 create mode 100644 third_party/bigframes_vendored/google_cloud_bigquery/LICENSE
 create mode 100644 third_party/bigframes_vendored/google_cloud_bigquery/__init__.py
 create mode 100644 third_party/bigframes_vendored/google_cloud_bigquery/_pandas_helpers.py
 create mode 100644 third_party/bigframes_vendored/google_cloud_bigquery/tests/__init__.py
 create mode 100644 third_party/bigframes_vendored/google_cloud_bigquery/tests/unit/__init__.py
 create mode 100644 third_party/bigframes_vendored/google_cloud_bigquery/tests/unit/test_pandas_helpers.py

diff --git a/bigframes/dtypes.py b/bigframes/dtypes.py
index d8b40c6a5a..da221a95ac 100644
--- a/bigframes/dtypes.py
+++ b/bigframes/dtypes.py
@@ -20,7 +20,6 @@
 
 import geopandas as gpd  # type: ignore
 import google.cloud.bigquery as bigquery
-import google.cloud.bigquery._pandas_helpers
 import ibis
 import ibis.expr.datatypes as ibis_dtypes
 import ibis.expr.types as ibis_types
@@ -29,6 +28,7 @@
 import pyarrow as pa
 
 import bigframes.constants as constants
+import third_party.bigframes_vendored.google_cloud_bigquery._pandas_helpers as gcb3p_pandas_helpers
 
 # Type hints for Pandas dtypes supported by BigQuery DataFrame
 Dtype = Union[
@@ -415,6 +415,6 @@ def to_pandas_dtypes_overrides(schema: Iterable[bigquery.SchemaField]) -> Dict:
             # better if we called `to_arrow()` and converted to a pandas
             # DataFrame ourselves from that.
             dtypes[field.name] = pd.ArrowDtype(
-                google.cloud.bigquery._pandas_helpers.bq_to_arrow_data_type(field)
+                gcb3p_pandas_helpers.bq_to_arrow_data_type(field)
             )
     return dtypes
diff --git a/noxfile.py b/noxfile.py
index 54ccdb9a87..1864da9fe7 100644
--- a/noxfile.py
+++ b/noxfile.py
@@ -185,6 +185,7 @@ def run_unit(session, install_test_extra):
 
     # Run py.test against the unit tests.
     tests_path = os.path.join("tests", "unit")
+    third_party_tests_path = os.path.join("third_party", "bigframes_vendored")
     session.run(
         "py.test",
         "--quiet",
@@ -196,6 +197,7 @@ def run_unit(session, install_test_extra):
         "--cov-report=term-missing",
         "--cov-fail-under=0",
         tests_path,
+        third_party_tests_path,
         *session.posargs,
     )
 
diff --git a/third_party/bigframes_vendored/google_cloud_bigquery/LICENSE b/third_party/bigframes_vendored/google_cloud_bigquery/LICENSE
new file mode 100644
index 0000000000..d645695673
--- /dev/null
+++ b/third_party/bigframes_vendored/google_cloud_bigquery/LICENSE
@@ -0,0 +1,202 @@
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
diff --git a/third_party/bigframes_vendored/google_cloud_bigquery/__init__.py b/third_party/bigframes_vendored/google_cloud_bigquery/__init__.py
new file mode 100644
index 0000000000..1dc90d1848
--- /dev/null
+++ b/third_party/bigframes_vendored/google_cloud_bigquery/__init__.py
@@ -0,0 +1,13 @@
+# Copyright 2023 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/third_party/bigframes_vendored/google_cloud_bigquery/_pandas_helpers.py b/third_party/bigframes_vendored/google_cloud_bigquery/_pandas_helpers.py
new file mode 100644
index 0000000000..5e2a7a7ef0
--- /dev/null
+++ b/third_party/bigframes_vendored/google_cloud_bigquery/_pandas_helpers.py
@@ -0,0 +1,158 @@
+# Original: https://github.com/googleapis/python-bigquery/blob/main/google/cloud/bigquery/_pandas_helpers.py
+# Copyright 2019 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Shared helper functions for connecting BigQuery and pandas."""
+
+import warnings
+
+import google.cloud.bigquery.schema as schema
+import pyarrow
+
+
+def pyarrow_datetime():
+    return pyarrow.timestamp("us", tz=None)
+
+
+def pyarrow_numeric():
+    return pyarrow.decimal128(38, 9)
+
+
+def pyarrow_bignumeric():
+    # 77th digit is partial.
+    # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#decimal_types
+    return pyarrow.decimal256(76, 38)
+
+
+def pyarrow_time():
+    return pyarrow.time64("us")
+
+
+def pyarrow_timestamp():
+    return pyarrow.timestamp("us", tz="UTC")
+
+
+# This dictionary is duplicated in bigquery_storage/test/unite/test_reader.py
+# When modifying it be sure to update it there as well.
+BQ_TO_ARROW_SCALARS = {
+    "BOOL": pyarrow.bool_,
+    "BOOLEAN": pyarrow.bool_,
+    "BYTES": pyarrow.binary,
+    "DATE": pyarrow.date32,
+    "DATETIME": pyarrow_datetime,
+    "FLOAT": pyarrow.float64,
+    "FLOAT64": pyarrow.float64,
+    "GEOGRAPHY": pyarrow.string,
+    "INT64": pyarrow.int64,
+    "INTEGER": pyarrow.int64,
+    "NUMERIC": pyarrow_numeric,
+    "STRING": pyarrow.string,
+    "TIME": pyarrow_time,
+    "TIMESTAMP": pyarrow_timestamp,
+    "BIGNUMERIC": pyarrow_bignumeric,
+}
+ARROW_SCALAR_IDS_TO_BQ = {
+    # https://arrow.apache.org/docs/python/api/datatypes.html#type-classes
+    pyarrow.bool_().id: "BOOL",
+    pyarrow.int8().id: "INT64",
+    pyarrow.int16().id: "INT64",
+    pyarrow.int32().id: "INT64",
+    pyarrow.int64().id: "INT64",
+    pyarrow.uint8().id: "INT64",
+    pyarrow.uint16().id: "INT64",
+    pyarrow.uint32().id: "INT64",
+    pyarrow.uint64().id: "INT64",
+    pyarrow.float16().id: "FLOAT64",
+    pyarrow.float32().id: "FLOAT64",
+    pyarrow.float64().id: "FLOAT64",
+    pyarrow.time32("ms").id: "TIME",
+    pyarrow.time64("ns").id: "TIME",
+    pyarrow.timestamp("ns").id: "TIMESTAMP",
+    pyarrow.date32().id: "DATE",
+    pyarrow.date64().id: "DATETIME",  # because millisecond resolution
+    pyarrow.binary().id: "BYTES",
+    pyarrow.string().id: "STRING",  # also alias for pyarrow.utf8()
+    # The exact scale and precision don't matter. Only the type ID matters,
+    # and it's the same for all decimal128/decimal256 instances.
+    pyarrow.decimal128(38, scale=9).id: "NUMERIC",
+    pyarrow.decimal256(76, scale=38).id: "BIGNUMERIC",
+}
+
+
+BQ_FIELD_TYPE_TO_ARROW_FIELD_METADATA = {
+    "GEOGRAPHY": {
+        b"ARROW:extension:name": b"google:sqlType:geography",
+        b"ARROW:extension:metadata": b'{"encoding": "WKT"}',
+    },
+    "DATETIME": {b"ARROW:extension:name": b"google:sqlType:datetime"},
+}
+
+
+def bq_to_arrow_struct_data_type(field):
+    arrow_fields = []
+    for subfield in field.fields:
+        arrow_subfield = bq_to_arrow_field(subfield)
+        if arrow_subfield:
+            arrow_fields.append(arrow_subfield)
+        else:
+            # Could not determine a subfield type. Fallback to type
+            # inference.
+            return None
+    return pyarrow.struct(arrow_fields)
+
+
+def bq_to_arrow_data_type(field):
+    """Return the Arrow data type, corresponding to a given BigQuery column.
+
+    Returns:
+        None: if default Arrow type inspection should be used.
+    """
+    if field.mode is not None and field.mode.upper() == "REPEATED":
+        inner_type = bq_to_arrow_data_type(
+            schema.SchemaField(field.name, field.field_type, fields=field.fields)
+        )
+        if inner_type:
+            return pyarrow.list_(inner_type)
+        return None
+
+    field_type_upper = field.field_type.upper() if field.field_type else ""
+    if field_type_upper in schema._STRUCT_TYPES:
+        return bq_to_arrow_struct_data_type(field)
+
+    data_type_constructor = BQ_TO_ARROW_SCALARS.get(field_type_upper)
+    if data_type_constructor is None:
+        return None
+    return data_type_constructor()
+
+
+def bq_to_arrow_field(bq_field, array_type=None):
+    """Return the Arrow field, corresponding to a given BigQuery column.
+
+    Returns:
+        None: if the Arrow type cannot be determined.
+    """
+    arrow_type = bq_to_arrow_data_type(bq_field)
+    if arrow_type is not None:
+        if array_type is not None:
+            arrow_type = array_type  # For GEOGRAPHY, at least initially
+        is_nullable = bq_field.mode.upper() == "NULLABLE"
+        metadata = BQ_FIELD_TYPE_TO_ARROW_FIELD_METADATA.get(
+            bq_field.field_type.upper() if bq_field.field_type else ""
+        )
+        return pyarrow.field(
+            bq_field.name, arrow_type, nullable=is_nullable, metadata=metadata
+        )
+
+    warnings.warn("Unable to determine type for field '{}'.".format(bq_field.name))
+    return None
diff --git a/third_party/bigframes_vendored/google_cloud_bigquery/tests/__init__.py b/third_party/bigframes_vendored/google_cloud_bigquery/tests/__init__.py
new file mode 100644
index 0000000000..1dc90d1848
--- /dev/null
+++ b/third_party/bigframes_vendored/google_cloud_bigquery/tests/__init__.py
@@ -0,0 +1,13 @@
+# Copyright 2023 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/third_party/bigframes_vendored/google_cloud_bigquery/tests/unit/__init__.py b/third_party/bigframes_vendored/google_cloud_bigquery/tests/unit/__init__.py
new file mode 100644
index 0000000000..1dc90d1848
--- /dev/null
+++ b/third_party/bigframes_vendored/google_cloud_bigquery/tests/unit/__init__.py
@@ -0,0 +1,13 @@
+# Copyright 2023 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/third_party/bigframes_vendored/google_cloud_bigquery/tests/unit/test_pandas_helpers.py b/third_party/bigframes_vendored/google_cloud_bigquery/tests/unit/test_pandas_helpers.py
new file mode 100644
index 0000000000..dc4a09cc54
--- /dev/null
+++ b/third_party/bigframes_vendored/google_cloud_bigquery/tests/unit/test_pandas_helpers.py
@@ -0,0 +1,413 @@
+# Original: https://github.com/googleapis/python-bigquery/blob/main/tests/unit/test__pandas_helpers.py
+# Copyright 2019 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import functools
+import warnings
+
+from google.cloud.bigquery import schema
+import pyarrow
+import pyarrow.parquet
+import pyarrow.types
+import pytest
+
+
+@pytest.fixture
+def module_under_test():
+    from third_party.bigframes_vendored.google_cloud_bigquery import _pandas_helpers
+
+    return _pandas_helpers
+
+
+def is_none(value):
+    return value is None
+
+
+def is_datetime(type_):
+    # See: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#datetime-type
+    return all_(
+        pyarrow.types.is_timestamp,
+        lambda type_: type_.unit == "us",
+        lambda type_: type_.tz is None,
+    )(type_)
+
+
+def is_numeric(type_):
+    # See: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#numeric-type
+    return all_(
+        pyarrow.types.is_decimal,
+        lambda type_: type_.precision == 38,
+        lambda type_: type_.scale == 9,
+    )(type_)
+
+
+def is_bignumeric(type_):
+    # See: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#numeric-type
+    return all_(
+        pyarrow.types.is_decimal,
+        lambda type_: type_.precision == 76,
+        lambda type_: type_.scale == 38,
+    )(type_)
+
+
+def is_timestamp(type_):
+    # See: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#timestamp-type
+    return all_(
+        pyarrow.types.is_timestamp,
+        lambda type_: type_.unit == "us",
+        lambda type_: type_.tz == "UTC",
+    )(type_)
+
+
+def do_all(functions, value):
+    return all((func(value) for func in functions))
+
+
+def all_(*functions):
+    return functools.partial(do_all, functions)
+
+
+def test_is_datetime():
+    assert is_datetime(pyarrow.timestamp("us", tz=None))
+    assert not is_datetime(pyarrow.timestamp("ms", tz=None))
+    assert not is_datetime(pyarrow.timestamp("us", tz="UTC"))
+    assert not is_datetime(pyarrow.timestamp("ns", tz="UTC"))
+    assert not is_datetime(pyarrow.string())
+
+
+def test_do_all():
+    assert do_all((lambda _: True, lambda _: True), None)
+    assert not do_all((lambda _: True, lambda _: False), None)
+    assert not do_all((lambda _: False,), None)
+
+
+def test_all_():
+    assert all_(lambda _: True, lambda _: True)(None)
+    assert not all_(lambda _: True, lambda _: False)(None)
+
+
+@pytest.mark.parametrize(
+    "bq_type,bq_mode,is_correct_type",
+    [
+        ("STRING", "NULLABLE", pyarrow.types.is_string),
+        ("STRING", None, pyarrow.types.is_string),
+        ("string", "NULLABLE", pyarrow.types.is_string),
+        ("StRiNg", "NULLABLE", pyarrow.types.is_string),
+        ("BYTES", "NULLABLE", pyarrow.types.is_binary),
+        ("INTEGER", "NULLABLE", pyarrow.types.is_int64),
+        ("INT64", "NULLABLE", pyarrow.types.is_int64),
+        ("FLOAT", "NULLABLE", pyarrow.types.is_float64),
+        ("FLOAT64", "NULLABLE", pyarrow.types.is_float64),
+        ("NUMERIC", "NULLABLE", is_numeric),
+        pytest.param(
+            "BIGNUMERIC",
+            "NULLABLE",
+            is_bignumeric,
+        ),
+        ("BOOLEAN", "NULLABLE", pyarrow.types.is_boolean),
+        ("BOOL", "NULLABLE", pyarrow.types.is_boolean),
+        ("TIMESTAMP", "NULLABLE", is_timestamp),
+        ("DATE", "NULLABLE", pyarrow.types.is_date32),
+        ("TIME", "NULLABLE", pyarrow.types.is_time64),
+        ("DATETIME", "NULLABLE", is_datetime),
+        ("GEOGRAPHY", "NULLABLE", pyarrow.types.is_string),
+        ("UNKNOWN_TYPE", "NULLABLE", is_none),
+        # Use pyarrow.list_(item_type) for repeated (array) fields.
+        (
+            "STRING",
+            "REPEATED",
+            all_(
+                pyarrow.types.is_list,
+                lambda type_: pyarrow.types.is_string(type_.value_type),
+            ),
+        ),
+        (
+            "STRING",
+            "repeated",
+            all_(
+                pyarrow.types.is_list,
+                lambda type_: pyarrow.types.is_string(type_.value_type),
+            ),
+        ),
+        (
+            "STRING",
+            "RePeAtEd",
+            all_(
+                pyarrow.types.is_list,
+                lambda type_: pyarrow.types.is_string(type_.value_type),
+            ),
+        ),
+        (
+            "BYTES",
+            "REPEATED",
+            all_(
+                pyarrow.types.is_list,
+                lambda type_: pyarrow.types.is_binary(type_.value_type),
+            ),
+        ),
+        (
+            "INTEGER",
+            "REPEATED",
+            all_(
+                pyarrow.types.is_list,
+                lambda type_: pyarrow.types.is_int64(type_.value_type),
+            ),
+        ),
+        (
+            "INT64",
+            "REPEATED",
+            all_(
+                pyarrow.types.is_list,
+                lambda type_: pyarrow.types.is_int64(type_.value_type),
+            ),
+        ),
+        (
+            "FLOAT",
+            "REPEATED",
+            all_(
+                pyarrow.types.is_list,
+                lambda type_: pyarrow.types.is_float64(type_.value_type),
+            ),
+        ),
+        (
+            "FLOAT64",
+            "REPEATED",
+            all_(
+                pyarrow.types.is_list,
+                lambda type_: pyarrow.types.is_float64(type_.value_type),
+            ),
+        ),
+        (
+            "NUMERIC",
+            "REPEATED",
+            all_(pyarrow.types.is_list, lambda type_: is_numeric(type_.value_type)),
+        ),
+        pytest.param(
+            "BIGNUMERIC",
+            "REPEATED",
+            all_(pyarrow.types.is_list, lambda type_: is_bignumeric(type_.value_type)),
+        ),
+        (
+            "BOOLEAN",
+            "REPEATED",
+            all_(
+                pyarrow.types.is_list,
+                lambda type_: pyarrow.types.is_boolean(type_.value_type),
+            ),
+        ),
+        (
+            "BOOL",
+            "REPEATED",
+            all_(
+                pyarrow.types.is_list,
+                lambda type_: pyarrow.types.is_boolean(type_.value_type),
+            ),
+        ),
+        (
+            "TIMESTAMP",
+            "REPEATED",
+            all_(pyarrow.types.is_list, lambda type_: is_timestamp(type_.value_type)),
+        ),
+        (
+            "DATE",
+            "REPEATED",
+            all_(
+                pyarrow.types.is_list,
+                lambda type_: pyarrow.types.is_date32(type_.value_type),
+            ),
+        ),
+        (
+            "TIME",
+            "REPEATED",
+            all_(
+                pyarrow.types.is_list,
+                lambda type_: pyarrow.types.is_time64(type_.value_type),
+            ),
+        ),
+        (
+            "DATETIME",
+            "REPEATED",
+            all_(pyarrow.types.is_list, lambda type_: is_datetime(type_.value_type)),
+        ),
+        (
+            "GEOGRAPHY",
+            "REPEATED",
+            all_(
+                pyarrow.types.is_list,
+                lambda type_: pyarrow.types.is_string(type_.value_type),
+            ),
+        ),
+        ("RECORD", "REPEATED", is_none),
+        ("UNKNOWN_TYPE", "REPEATED", is_none),
+    ],
+)
+def test_bq_to_arrow_data_type(module_under_test, bq_type, bq_mode, is_correct_type):
+    field = schema.SchemaField("ignored_name", bq_type, mode=bq_mode)
+    actual = module_under_test.bq_to_arrow_data_type(field)
+    assert is_correct_type(actual)
+
+
+@pytest.mark.parametrize("bq_type", ["RECORD", "record", "STRUCT", "struct"])
+def test_bq_to_arrow_data_type_w_struct(module_under_test, bq_type):
+    fields = (
+        schema.SchemaField("field01", "STRING"),
+        schema.SchemaField("field02", "BYTES"),
+        schema.SchemaField("field03", "INTEGER"),
+        schema.SchemaField("field04", "INT64"),
+        schema.SchemaField("field05", "FLOAT"),
+        schema.SchemaField("field06", "FLOAT64"),
+        schema.SchemaField("field07", "NUMERIC"),
+        schema.SchemaField("field08", "BIGNUMERIC"),
+        schema.SchemaField("field09", "BOOLEAN"),
+        schema.SchemaField("field10", "BOOL"),
+        schema.SchemaField("field11", "TIMESTAMP"),
+        schema.SchemaField("field12", "DATE"),
+        schema.SchemaField("field13", "TIME"),
+        schema.SchemaField("field14", "DATETIME"),
+        schema.SchemaField("field15", "GEOGRAPHY"),
+    )
+
+    field = schema.SchemaField("ignored_name", bq_type, mode="NULLABLE", fields=fields)
+    actual = module_under_test.bq_to_arrow_data_type(field)
+
+    expected = (
+        pyarrow.field("field01", pyarrow.string()),
+        pyarrow.field("field02", pyarrow.binary()),
+        pyarrow.field("field03", pyarrow.int64()),
+        pyarrow.field("field04", pyarrow.int64()),
+        pyarrow.field("field05", pyarrow.float64()),
+        pyarrow.field("field06", pyarrow.float64()),
+        pyarrow.field("field07", module_under_test.pyarrow_numeric()),
+        pyarrow.field("field08", module_under_test.pyarrow_bignumeric()),
+        pyarrow.field("field09", pyarrow.bool_()),
+        pyarrow.field("field10", pyarrow.bool_()),
+        pyarrow.field("field11", module_under_test.pyarrow_timestamp()),
+        pyarrow.field("field12", pyarrow.date32()),
+        pyarrow.field("field13", module_under_test.pyarrow_time()),
+        pyarrow.field("field14", module_under_test.pyarrow_datetime()),
+        pyarrow.field("field15", pyarrow.string()),
+    )
+    expected = pyarrow.struct(expected)
+
+    assert pyarrow.types.is_struct(actual)
+    assert actual.num_fields == len(fields)
+    assert actual.equals(expected)
+
+
+@pytest.mark.parametrize("bq_type", ["RECORD", "record", "STRUCT", "struct"])
+def test_bq_to_arrow_data_type_w_array_struct(module_under_test, bq_type):
+    fields = (
+        schema.SchemaField("field01", "STRING"),
+        schema.SchemaField("field02", "BYTES"),
+        schema.SchemaField("field03", "INTEGER"),
+        schema.SchemaField("field04", "INT64"),
+        schema.SchemaField("field05", "FLOAT"),
+        schema.SchemaField("field06", "FLOAT64"),
+        schema.SchemaField("field07", "NUMERIC"),
+        schema.SchemaField("field08", "BIGNUMERIC"),
+        schema.SchemaField("field09", "BOOLEAN"),
+        schema.SchemaField("field10", "BOOL"),
+        schema.SchemaField("field11", "TIMESTAMP"),
+        schema.SchemaField("field12", "DATE"),
+        schema.SchemaField("field13", "TIME"),
+        schema.SchemaField("field14", "DATETIME"),
+        schema.SchemaField("field15", "GEOGRAPHY"),
+    )
+
+    field = schema.SchemaField("ignored_name", bq_type, mode="REPEATED", fields=fields)
+    actual = module_under_test.bq_to_arrow_data_type(field)
+
+    expected = (
+        pyarrow.field("field01", pyarrow.string()),
+        pyarrow.field("field02", pyarrow.binary()),
+        pyarrow.field("field03", pyarrow.int64()),
+        pyarrow.field("field04", pyarrow.int64()),
+        pyarrow.field("field05", pyarrow.float64()),
+        pyarrow.field("field06", pyarrow.float64()),
+        pyarrow.field("field07", module_under_test.pyarrow_numeric()),
+        pyarrow.field("field08", module_under_test.pyarrow_bignumeric()),
+        pyarrow.field("field09", pyarrow.bool_()),
+        pyarrow.field("field10", pyarrow.bool_()),
+        pyarrow.field("field11", module_under_test.pyarrow_timestamp()),
+        pyarrow.field("field12", pyarrow.date32()),
+        pyarrow.field("field13", module_under_test.pyarrow_time()),
+        pyarrow.field("field14", module_under_test.pyarrow_datetime()),
+        pyarrow.field("field15", pyarrow.string()),
+    )
+    expected_value_type = pyarrow.struct(expected)
+
+    assert pyarrow.types.is_list(actual)
+    assert pyarrow.types.is_struct(actual.value_type)
+    assert actual.value_type.num_fields == len(fields)
+    assert actual.value_type.equals(expected_value_type)
+
+
+def test_bq_to_arrow_data_type_w_struct_unknown_subfield(module_under_test):
+    fields = (
+        schema.SchemaField("field1", "STRING"),
+        schema.SchemaField("field2", "INTEGER"),
+        # Don't know what to convert UNKNOWN_TYPE to, let type inference work,
+        # instead.
+        schema.SchemaField("field3", "UNKNOWN_TYPE"),
+    )
+    field = schema.SchemaField("ignored_name", "RECORD", mode="NULLABLE", fields=fields)
+
+    with warnings.catch_warnings(record=True) as warned:
+        actual = module_under_test.bq_to_arrow_data_type(field)
+
+    assert actual is None
+    assert len(warned) == 1
+    warning = warned[0]
+    assert "field3" in str(warning)
+
+
+def test_bq_to_arrow_field_type_override(module_under_test):
+    # When loading pandas data, we may need to override the type
+    # decision based on data contents, because GEOGRAPHY data can be
+    # stored as either text or binary.
+
+    assert (
+        module_under_test.bq_to_arrow_field(schema.SchemaField("g", "GEOGRAPHY")).type
+        == pyarrow.string()
+    )
+
+    assert (
+        module_under_test.bq_to_arrow_field(
+            schema.SchemaField("g", "GEOGRAPHY"),
+            pyarrow.binary(),
+        ).type
+        == pyarrow.binary()
+    )
+
+
+@pytest.mark.parametrize(
+    "field_type, metadata",
+    [
+        ("datetime", {b"ARROW:extension:name": b"google:sqlType:datetime"}),
+        (
+            "geography",
+            {
+                b"ARROW:extension:name": b"google:sqlType:geography",
+                b"ARROW:extension:metadata": b'{"encoding": "WKT"}',
+            },
+        ),
+    ],
+)
+def test_bq_to_arrow_field_metadata(module_under_test, field_type, metadata):
+    assert (
+        module_under_test.bq_to_arrow_field(
+            schema.SchemaField("g", field_type)
+        ).metadata
+        == metadata
+    )