From 26b50754d72784b4da0701207f13764b66fac1f2 Mon Sep 17 00:00:00 2001
From: Trevor Bergeron <tbergeron@google.com>
Date: Thu, 30 Nov 2023 00:08:45 +0000
Subject: [PATCH 1/2] feat: add DataFrame from_dict and from_records methods

---
 bigframes/dataframe.py                        | 26 +++++++
 tests/system/small/test_dataframe.py          | 48 ++++++++++++
 .../bigframes_vendored/pandas/core/frame.py   | 75 +++++++++++++++++++
 3 files changed, 149 insertions(+)

diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py
index f7796291b9..fe5a18b3bd 100644
--- a/bigframes/dataframe.py
+++ b/bigframes/dataframe.py
@@ -2368,6 +2368,32 @@ def _split(
         blocks = self._block._split(ns=ns, fracs=fracs, random_state=random_state)
         return [DataFrame(block) for block in blocks]
 
+    @classmethod
+    def from_dict(
+        cls,
+        data: dict,
+        orient: str = "columns",
+        dtype=None,
+        columns=None,
+    ) -> DataFrame:
+        return cls(pandas.DataFrame.from_dict(data, orient, dtype, columns))
+
+    @classmethod
+    def from_records(
+        cls,
+        data,
+        index=None,
+        exclude=None,
+        columns=None,
+        coerce_float: bool = False,
+        nrows: int | None = None,
+    ) -> DataFrame:
+        return cls(
+            pandas.DataFrame.from_records(
+                data, index, exclude, columns, coerce_float, nrows
+            )
+        )
+
     def to_csv(
         self, path_or_buf: str, sep=",", *, header: bool = True, index: bool = True
     ) -> None:
diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py
index 9744d3f6e9..b6834dd2b5 100644
--- a/tests/system/small/test_dataframe.py
+++ b/tests/system/small/test_dataframe.py
@@ -3289,6 +3289,54 @@ def test_df_duplicated(scalars_df_index, scalars_pandas_df_index, keep, subset):
     pd.testing.assert_series_equal(pd_series, bf_series, check_dtype=False)
 
 
+def test_df_from_dict_columns_orient():
+    data = {"a": [1, 2], "b": [3.3, 2.4]}
+    bf_result = dataframe.DataFrame.from_dict(data, orient="columns").to_pandas()
+    pd_result = pd.DataFrame.from_dict(data, orient="columns")
+    assert_pandas_df_equal(
+        pd_result, bf_result, check_dtype=False, check_index_type=False
+    )
+
+
+def test_df_from_dict_index_orient():
+    data = {"a": [1, 2], "b": [3.3, 2.4]}
+    bf_result = dataframe.DataFrame.from_dict(
+        data, orient="index", columns=["col1", "col2"]
+    ).to_pandas()
+    pd_result = pd.DataFrame.from_dict(data, orient="index", columns=["col1", "col2"])
+    assert_pandas_df_equal(
+        pd_result, bf_result, check_dtype=False, check_index_type=False
+    )
+
+
+def test_df_from_dict_tight_orient():
+    data = {
+        "index": [("i1", "i2"), ("i3", "i4")],
+        "columns": ["col1", "col2"],
+        "data": [[1, 2.6], [3, 4.5]],
+        "index_names": ["in1", "in2"],
+        "column_names": ["column_axis"],
+    }
+
+    bf_result = dataframe.DataFrame.from_dict(data, orient="tight").to_pandas()
+    pd_result = pd.DataFrame.from_dict(data, orient="tight")
+    assert_pandas_df_equal(
+        pd_result, bf_result, check_dtype=False, check_index_type=False
+    )
+
+
+def test_df_from_records():
+    records = ((1, "a"), (2.5, "b"), (3.3, "c"), (4.9, "d"))
+
+    bf_result = dataframe.DataFrame.from_records(
+        records, columns=["c1", "c2"]
+    ).to_pandas()
+    pd_result = pd.DataFrame.from_records(records, columns=["c1", "c2"])
+    assert_pandas_df_equal(
+        pd_result, bf_result, check_dtype=False, check_index_type=False
+    )
+
+
 def test_df_to_dict(scalars_df_index, scalars_pandas_df_index):
     unsupported = ["numeric_col"]  # formatted differently
     bf_result = scalars_df_index.drop(columns=unsupported).to_dict()
diff --git a/third_party/bigframes_vendored/pandas/core/frame.py b/third_party/bigframes_vendored/pandas/core/frame.py
index 099d8b8e66..384036681a 100644
--- a/third_party/bigframes_vendored/pandas/core/frame.py
+++ b/third_party/bigframes_vendored/pandas/core/frame.py
@@ -160,6 +160,81 @@ def memory_usage(self, index: bool = True):
 
     # ----------------------------------------------------------------------
     # IO methods (to / from other formats)
+    @classmethod
+    def from_dict(
+        cls,
+        data: dict,
+        orient="columns",
+        dtype=None,
+        columns=None,
+    ) -> DataFrame:
+        """
+        Construct DataFrame from dict of array-like or dicts.
+
+        Creates DataFrame object from dictionary by columns or by index
+        allowing dtype specification.
+
+        Args:
+            data (dict):
+                Of the form {field : array-like} or {field : dict}.
+            orient ({'columns', 'index', 'tight'}, default 'columns'):
+                The "orientation" of the data. If the keys of the passed dict
+                should be the columns of the resulting DataFrame, pass 'columns'
+                (default). Otherwise if the keys should be rows, pass 'index'.
+                If 'tight', assume a dict with keys ['index', 'columns', 'data',
+                'index_names', 'column_names'].
+            dtype (dtype, default None):
+                Data type to force after DataFrame construction, otherwise infer.
+            columns (list, default None):
+                Column labels to use when ``orient='index'``. Raises a ValueError
+                if used with ``orient='columns'`` or ``orient='tight'``.
+
+        Returns:
+            DataFrame
+        """
+        raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
+
+    @classmethod
+    def from_records(
+        cls,
+        data,
+        index=None,
+        exclude=None,
+        columns=None,
+        coerce_float: bool = False,
+        nrows: int | None = None,
+    ) -> DataFrame:
+        """
+        Convert structured or record ndarray to DataFrame.
+
+        Creates a DataFrame object from a structured ndarray, sequence of
+        tuples or dicts, or DataFrame.
+
+        Args:
+            data (structured ndarray, sequence of tuples or dicts):
+                Structured input data.
+            index (str, list of fields, array-like):
+                Field of array to use as the index, alternately a specific set of
+                input labels to use.
+            exclude (sequence, default None):
+                Columns or fields to exclude.
+            columns (sequence, default None):
+                Column names to use. If the passed data do not have names
+                associated with them, this argument provides names for the
+                columns. Otherwise this argument indicates the order of the columns
+                in the result (any names not found in the data will become all-NA
+                columns).
+            coerce_float (bool, default False):
+                Attempt to convert values of non-string, non-numeric objects (like
+                decimal.Decimal) to floating point, useful for SQL result sets.
+            nrows (int, default None):
+                Number of rows to read if data is an iterator.
+
+        Returns:
+            DataFrame
+        """
+        raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
+
     def to_numpy(self, dtype=None, copy=False, na_value=None, **kwargs) -> np.ndarray:
         """
         Convert the DataFrame to a NumPy array.

From 8e7e5e238ca2426c2d0b78fe4e5ebd2adbdb5629 Mon Sep 17 00:00:00 2001
From: Trevor Bergeron <tbergeron@google.com>
Date: Thu, 30 Nov 2023 17:32:11 +0000
Subject: [PATCH 2/2] ignore mypy error

---
 bigframes/dataframe.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py
index fe5a18b3bd..79714b02e5 100644
--- a/bigframes/dataframe.py
+++ b/bigframes/dataframe.py
@@ -2376,7 +2376,7 @@ def from_dict(
         dtype=None,
         columns=None,
     ) -> DataFrame:
-        return cls(pandas.DataFrame.from_dict(data, orient, dtype, columns))
+        return cls(pandas.DataFrame.from_dict(data, orient, dtype, columns))  # type: ignore
 
     @classmethod
     def from_records(