diff --git a/bigframes/session/__init__.py b/bigframes/session/__init__.py index acaad4a5b7..dfee41c90b 100644 --- a/bigframes/session/__init__.py +++ b/bigframes/session/__init__.py @@ -794,13 +794,14 @@ def _read_pandas( ) if write_engine == "default": - inline_df = self._read_pandas_inline(pandas_dataframe, should_raise=False) - if inline_df is not None: + try: + inline_df = self._read_pandas_inline(pandas_dataframe) return inline_df + except ValueError: + pass return self._read_pandas_load_job(pandas_dataframe, api_name) elif write_engine == "bigquery_inline": - # Regarding the type: ignore, with should_raise=True, this should never return None. - return self._read_pandas_inline(pandas_dataframe, should_raise=True) # type: ignore + return self._read_pandas_inline(pandas_dataframe) elif write_engine == "bigquery_load": return self._read_pandas_load_job(pandas_dataframe, api_name) elif write_engine == "bigquery_streaming": @@ -809,12 +810,16 @@ def _read_pandas( raise ValueError(f"Got unexpected write_engine '{write_engine}'") def _read_pandas_inline( - self, pandas_dataframe: pandas.DataFrame, should_raise=False - ) -> Optional[dataframe.DataFrame]: + self, pandas_dataframe: pandas.DataFrame + ) -> dataframe.DataFrame: import bigframes.dataframe as dataframe - if pandas_dataframe.memory_usage(deep=True).sum() > MAX_INLINE_DF_BYTES: - return None + memory_usage = pandas_dataframe.memory_usage(deep=True).sum() + if memory_usage > MAX_INLINE_DF_BYTES: + raise ValueError( + f"DataFrame size ({memory_usage} bytes) exceeds the maximum allowed " + f"for inline data ({MAX_INLINE_DF_BYTES} bytes)." + ) try: local_block = blocks.Block.from_local(pandas_dataframe, self) @@ -825,29 +830,22 @@ def _read_pandas_inline( ValueError, # Thrown by ibis for some unhandled types TypeError, # Not all types handleable by local code path ) as exc: - if should_raise: - raise ValueError( - f"Could not convert with a BigQuery type: `{exc}`. " - ) from exc - else: - return None - - inline_types = inline_df._block.expr.schema.dtypes + raise ValueError( + f"Could not convert with a BigQuery type: `{exc}`. " + ) from exc # Make sure all types are inlinable to avoid escaping errors. + inline_types = inline_df._block.expr.schema.dtypes noninlinable_types = [ dtype for dtype in inline_types if dtype not in INLINABLE_DTYPES ] - if len(noninlinable_types) == 0: - return inline_df - - if should_raise: + if len(noninlinable_types) != 0: raise ValueError( f"Could not inline with a BigQuery type: `{noninlinable_types}`. " f"{constants.FEEDBACK_LINK}" ) - else: - return None + + return inline_df def _read_pandas_load_job( self, diff --git a/tests/unit/session/test_session.py b/tests/unit/session/test_session.py index d024d332d4..b35449f291 100644 --- a/tests/unit/session/test_session.py +++ b/tests/unit/session/test_session.py @@ -22,6 +22,8 @@ import google.api_core.exceptions import google.cloud.bigquery import google.cloud.bigquery.table +import pandas as pd +import pyarrow as pa import pytest import bigframes @@ -458,3 +460,34 @@ def today(cls): with pytest.warns(bigframes.exceptions.ObsoleteVersionWarning): resources.create_bigquery_session() + + +@mock.patch("bigframes.session.MAX_INLINE_DF_BYTES", 1) +def test_read_pandas_inline_exceeds_limit_raises_error(): + session = resources.create_bigquery_session() + pd_df = pd.DataFrame([[1, 2, 3], [4, 5, 6]]) + with pytest.raises( + ValueError, + match=r"DataFrame size \(.* bytes\) exceeds the maximum allowed for inline data \(1 bytes\)\.", + ): + session.read_pandas(pd_df, write_engine="bigquery_inline") + + +def test_read_pandas_inline_w_interval_type_raises_error(): + session = resources.create_bigquery_session() + df = pd.DataFrame(pd.arrays.IntervalArray.from_breaks([0, 10, 20, 30, 40, 50])) + with pytest.raises(ValueError, match="Could not convert with a BigQuery type: "): + session.read_pandas(df, write_engine="bigquery_inline") + + +def test_read_pandas_inline_w_noninlineable_type_raises_error(): + session = resources.create_bigquery_session() + data = [ + [1, 2, 3], + [4, 5], + None, + [6, 7, 8, 9], + ] + s = pd.Series(data, dtype=pd.ArrowDtype(pa.list_(pa.int64()))) + with pytest.raises(ValueError, match="Could not inline with a BigQuery type:"): + session.read_pandas(s, write_engine="bigquery_inline")