diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py index c6b28f1b01..3b0fd7008a 100644 --- a/bigframes/dataframe.py +++ b/bigframes/dataframe.py @@ -2381,6 +2381,32 @@ def _split( blocks = self._block._split(ns=ns, fracs=fracs, random_state=random_state) return [DataFrame(block) for block in blocks] + @classmethod + def from_dict( + cls, + data: dict, + orient: str = "columns", + dtype=None, + columns=None, + ) -> DataFrame: + return cls(pandas.DataFrame.from_dict(data, orient, dtype, columns)) # type: ignore + + @classmethod + def from_records( + cls, + data, + index=None, + exclude=None, + columns=None, + coerce_float: bool = False, + nrows: int | None = None, + ) -> DataFrame: + return cls( + pandas.DataFrame.from_records( + data, index, exclude, columns, coerce_float, nrows + ) + ) + def to_csv( self, path_or_buf: str, sep=",", *, header: bool = True, index: bool = True ) -> None: diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py index 5940df590c..9318a5d9d2 100644 --- a/tests/system/small/test_dataframe.py +++ b/tests/system/small/test_dataframe.py @@ -3309,6 +3309,54 @@ def test_df_duplicated(scalars_df_index, scalars_pandas_df_index, keep, subset): pd.testing.assert_series_equal(pd_series, bf_series, check_dtype=False) +def test_df_from_dict_columns_orient(): + data = {"a": [1, 2], "b": [3.3, 2.4]} + bf_result = dataframe.DataFrame.from_dict(data, orient="columns").to_pandas() + pd_result = pd.DataFrame.from_dict(data, orient="columns") + assert_pandas_df_equal( + pd_result, bf_result, check_dtype=False, check_index_type=False + ) + + +def test_df_from_dict_index_orient(): + data = {"a": [1, 2], "b": [3.3, 2.4]} + bf_result = dataframe.DataFrame.from_dict( + data, orient="index", columns=["col1", "col2"] + ).to_pandas() + pd_result = pd.DataFrame.from_dict(data, orient="index", columns=["col1", "col2"]) + assert_pandas_df_equal( + pd_result, bf_result, check_dtype=False, check_index_type=False + ) + + +def test_df_from_dict_tight_orient(): + data = { + "index": [("i1", "i2"), ("i3", "i4")], + "columns": ["col1", "col2"], + "data": [[1, 2.6], [3, 4.5]], + "index_names": ["in1", "in2"], + "column_names": ["column_axis"], + } + + bf_result = dataframe.DataFrame.from_dict(data, orient="tight").to_pandas() + pd_result = pd.DataFrame.from_dict(data, orient="tight") + assert_pandas_df_equal( + pd_result, bf_result, check_dtype=False, check_index_type=False + ) + + +def test_df_from_records(): + records = ((1, "a"), (2.5, "b"), (3.3, "c"), (4.9, "d")) + + bf_result = dataframe.DataFrame.from_records( + records, columns=["c1", "c2"] + ).to_pandas() + pd_result = pd.DataFrame.from_records(records, columns=["c1", "c2"]) + assert_pandas_df_equal( + pd_result, bf_result, check_dtype=False, check_index_type=False + ) + + def test_df_to_dict(scalars_df_index, scalars_pandas_df_index): unsupported = ["numeric_col"] # formatted differently bf_result = scalars_df_index.drop(columns=unsupported).to_dict() diff --git a/third_party/bigframes_vendored/pandas/core/frame.py b/third_party/bigframes_vendored/pandas/core/frame.py index 6b5a580e99..08fe8e2de0 100644 --- a/third_party/bigframes_vendored/pandas/core/frame.py +++ b/third_party/bigframes_vendored/pandas/core/frame.py @@ -196,6 +196,81 @@ def select_dtypes(self, include=None, exclude=None) -> DataFrame: # ---------------------------------------------------------------------- # IO methods (to / from other formats) + @classmethod + def from_dict( + cls, + data: dict, + orient="columns", + dtype=None, + columns=None, + ) -> DataFrame: + """ + Construct DataFrame from dict of array-like or dicts. + + Creates DataFrame object from dictionary by columns or by index + allowing dtype specification. + + Args: + data (dict): + Of the form {field : array-like} or {field : dict}. + orient ({'columns', 'index', 'tight'}, default 'columns'): + The "orientation" of the data. If the keys of the passed dict + should be the columns of the resulting DataFrame, pass 'columns' + (default). Otherwise if the keys should be rows, pass 'index'. + If 'tight', assume a dict with keys ['index', 'columns', 'data', + 'index_names', 'column_names']. + dtype (dtype, default None): + Data type to force after DataFrame construction, otherwise infer. + columns (list, default None): + Column labels to use when ``orient='index'``. Raises a ValueError + if used with ``orient='columns'`` or ``orient='tight'``. + + Returns: + DataFrame + """ + raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) + + @classmethod + def from_records( + cls, + data, + index=None, + exclude=None, + columns=None, + coerce_float: bool = False, + nrows: int | None = None, + ) -> DataFrame: + """ + Convert structured or record ndarray to DataFrame. + + Creates a DataFrame object from a structured ndarray, sequence of + tuples or dicts, or DataFrame. + + Args: + data (structured ndarray, sequence of tuples or dicts): + Structured input data. + index (str, list of fields, array-like): + Field of array to use as the index, alternately a specific set of + input labels to use. + exclude (sequence, default None): + Columns or fields to exclude. + columns (sequence, default None): + Column names to use. If the passed data do not have names + associated with them, this argument provides names for the + columns. Otherwise this argument indicates the order of the columns + in the result (any names not found in the data will become all-NA + columns). + coerce_float (bool, default False): + Attempt to convert values of non-string, non-numeric objects (like + decimal.Decimal) to floating point, useful for SQL result sets. + nrows (int, default None): + Number of rows to read if data is an iterator. + + Returns: + DataFrame + """ + raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) + def to_numpy(self, dtype=None, copy=False, na_value=None, **kwargs) -> np.ndarray: """ Convert the DataFrame to a NumPy array.