fix: read_pandas inline returns None when exceeds limit (#1525)

chelsea-lin · web-flow · commit 578081e978f2 · 2025-03-24T10:41:45.000-07:00
diff --git a/bigframes/session/__init__.py b/bigframes/session/__init__.py
@@ -794,13 +794,14 @@ def _read_pandas(
             )
 
         if write_engine == "default":
-            inline_df = self._read_pandas_inline(pandas_dataframe, should_raise=False)
-            if inline_df is not None:
+            try:
+                inline_df = self._read_pandas_inline(pandas_dataframe)
                 return inline_df
+            except ValueError:
+                pass
             return self._read_pandas_load_job(pandas_dataframe, api_name)
         elif write_engine == "bigquery_inline":
-            # Regarding the type: ignore, with should_raise=True, this should never return None.
-            return self._read_pandas_inline(pandas_dataframe, should_raise=True)  # type: ignore
+            return self._read_pandas_inline(pandas_dataframe)
         elif write_engine == "bigquery_load":
             return self._read_pandas_load_job(pandas_dataframe, api_name)
         elif write_engine == "bigquery_streaming":
@@ -809,12 +810,16 @@ def _read_pandas(
             raise ValueError(f"Got unexpected write_engine '{write_engine}'")
 
     def _read_pandas_inline(
-        self, pandas_dataframe: pandas.DataFrame, should_raise=False
-    ) -> Optional[dataframe.DataFrame]:
+        self, pandas_dataframe: pandas.DataFrame
+    ) -> dataframe.DataFrame:
         import bigframes.dataframe as dataframe
 
-        if pandas_dataframe.memory_usage(deep=True).sum() > MAX_INLINE_DF_BYTES:
-            return None
+        memory_usage = pandas_dataframe.memory_usage(deep=True).sum()
+        if memory_usage > MAX_INLINE_DF_BYTES:
+            raise ValueError(
+                f"DataFrame size ({memory_usage} bytes) exceeds the maximum allowed "
+                f"for inline data ({MAX_INLINE_DF_BYTES} bytes)."
+            )
 
         try:
             local_block = blocks.Block.from_local(pandas_dataframe, self)
@@ -825,29 +830,22 @@ def _read_pandas_inline(
             ValueError,  # Thrown by ibis for some unhandled types
             TypeError,  # Not all types handleable by local code path
         ) as exc:
-            if should_raise:
-                raise ValueError(
-                    f"Could not convert with a BigQuery type: `{exc}`. "
-                ) from exc
-            else:
-                return None
-
-        inline_types = inline_df._block.expr.schema.dtypes
+            raise ValueError(
+                f"Could not convert with a BigQuery type: `{exc}`. "
+            ) from exc
 
         # Make sure all types are inlinable to avoid escaping errors.
+        inline_types = inline_df._block.expr.schema.dtypes
         noninlinable_types = [
             dtype for dtype in inline_types if dtype not in INLINABLE_DTYPES
         ]
-        if len(noninlinable_types) == 0:
-            return inline_df
-
-        if should_raise:
+        if len(noninlinable_types) != 0:
             raise ValueError(
                 f"Could not inline with a BigQuery type: `{noninlinable_types}`. "
                 f"{constants.FEEDBACK_LINK}"
             )
-        else:
-            return None
+
+        return inline_df
 
     def _read_pandas_load_job(
         self,
diff --git a/tests/unit/session/test_session.py b/tests/unit/session/test_session.py
@@ -22,6 +22,8 @@
 import google.api_core.exceptions
 import google.cloud.bigquery
 import google.cloud.bigquery.table
+import pandas as pd
+import pyarrow as pa
 import pytest
 
 import bigframes
@@ -458,3 +460,34 @@ def today(cls):
 
     with pytest.warns(bigframes.exceptions.ObsoleteVersionWarning):
         resources.create_bigquery_session()
+
+
+@mock.patch("bigframes.session.MAX_INLINE_DF_BYTES", 1)
+def test_read_pandas_inline_exceeds_limit_raises_error():
+    session = resources.create_bigquery_session()
+    pd_df = pd.DataFrame([[1, 2, 3], [4, 5, 6]])
+    with pytest.raises(
+        ValueError,
+        match=r"DataFrame size \(.* bytes\) exceeds the maximum allowed for inline data \(1 bytes\)\.",
+    ):
+        session.read_pandas(pd_df, write_engine="bigquery_inline")
+
+
+def test_read_pandas_inline_w_interval_type_raises_error():
+    session = resources.create_bigquery_session()
+    df = pd.DataFrame(pd.arrays.IntervalArray.from_breaks([0, 10, 20, 30, 40, 50]))
+    with pytest.raises(ValueError, match="Could not convert with a BigQuery type: "):
+        session.read_pandas(df, write_engine="bigquery_inline")
+
+
+def test_read_pandas_inline_w_noninlineable_type_raises_error():
+    session = resources.create_bigquery_session()
+    data = [
+        [1, 2, 3],
+        [4, 5],
+        None,
+        [6, 7, 8, 9],
+    ]
+    s = pd.Series(data, dtype=pd.ArrowDtype(pa.list_(pa.int64())))
+    with pytest.raises(ValueError, match="Could not inline with a BigQuery type:"):
+        session.read_pandas(s, write_engine="bigquery_inline")