Skip to content

Commit 578081e

Browse files
authored
fix: read_pandas inline returns None when exceeds limit (#1525)
1 parent 499e00a commit 578081e

File tree

2 files changed

+53
-22
lines changed

2 files changed

+53
-22
lines changed

bigframes/session/__init__.py

+20-22
Original file line numberDiff line numberDiff line change
@@ -794,13 +794,14 @@ def _read_pandas(
794794
)
795795

796796
if write_engine == "default":
797-
inline_df = self._read_pandas_inline(pandas_dataframe, should_raise=False)
798-
if inline_df is not None:
797+
try:
798+
inline_df = self._read_pandas_inline(pandas_dataframe)
799799
return inline_df
800+
except ValueError:
801+
pass
800802
return self._read_pandas_load_job(pandas_dataframe, api_name)
801803
elif write_engine == "bigquery_inline":
802-
# Regarding the type: ignore, with should_raise=True, this should never return None.
803-
return self._read_pandas_inline(pandas_dataframe, should_raise=True) # type: ignore
804+
return self._read_pandas_inline(pandas_dataframe)
804805
elif write_engine == "bigquery_load":
805806
return self._read_pandas_load_job(pandas_dataframe, api_name)
806807
elif write_engine == "bigquery_streaming":
@@ -809,12 +810,16 @@ def _read_pandas(
809810
raise ValueError(f"Got unexpected write_engine '{write_engine}'")
810811

811812
def _read_pandas_inline(
812-
self, pandas_dataframe: pandas.DataFrame, should_raise=False
813-
) -> Optional[dataframe.DataFrame]:
813+
self, pandas_dataframe: pandas.DataFrame
814+
) -> dataframe.DataFrame:
814815
import bigframes.dataframe as dataframe
815816

816-
if pandas_dataframe.memory_usage(deep=True).sum() > MAX_INLINE_DF_BYTES:
817-
return None
817+
memory_usage = pandas_dataframe.memory_usage(deep=True).sum()
818+
if memory_usage > MAX_INLINE_DF_BYTES:
819+
raise ValueError(
820+
f"DataFrame size ({memory_usage} bytes) exceeds the maximum allowed "
821+
f"for inline data ({MAX_INLINE_DF_BYTES} bytes)."
822+
)
818823

819824
try:
820825
local_block = blocks.Block.from_local(pandas_dataframe, self)
@@ -825,29 +830,22 @@ def _read_pandas_inline(
825830
ValueError, # Thrown by ibis for some unhandled types
826831
TypeError, # Not all types handleable by local code path
827832
) as exc:
828-
if should_raise:
829-
raise ValueError(
830-
f"Could not convert with a BigQuery type: `{exc}`. "
831-
) from exc
832-
else:
833-
return None
834-
835-
inline_types = inline_df._block.expr.schema.dtypes
833+
raise ValueError(
834+
f"Could not convert with a BigQuery type: `{exc}`. "
835+
) from exc
836836

837837
# Make sure all types are inlinable to avoid escaping errors.
838+
inline_types = inline_df._block.expr.schema.dtypes
838839
noninlinable_types = [
839840
dtype for dtype in inline_types if dtype not in INLINABLE_DTYPES
840841
]
841-
if len(noninlinable_types) == 0:
842-
return inline_df
843-
844-
if should_raise:
842+
if len(noninlinable_types) != 0:
845843
raise ValueError(
846844
f"Could not inline with a BigQuery type: `{noninlinable_types}`. "
847845
f"{constants.FEEDBACK_LINK}"
848846
)
849-
else:
850-
return None
847+
848+
return inline_df
851849

852850
def _read_pandas_load_job(
853851
self,

tests/unit/session/test_session.py

+33
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@
2222
import google.api_core.exceptions
2323
import google.cloud.bigquery
2424
import google.cloud.bigquery.table
25+
import pandas as pd
26+
import pyarrow as pa
2527
import pytest
2628

2729
import bigframes
@@ -458,3 +460,34 @@ def today(cls):
458460

459461
with pytest.warns(bigframes.exceptions.ObsoleteVersionWarning):
460462
resources.create_bigquery_session()
463+
464+
465+
@mock.patch("bigframes.session.MAX_INLINE_DF_BYTES", 1)
466+
def test_read_pandas_inline_exceeds_limit_raises_error():
467+
session = resources.create_bigquery_session()
468+
pd_df = pd.DataFrame([[1, 2, 3], [4, 5, 6]])
469+
with pytest.raises(
470+
ValueError,
471+
match=r"DataFrame size \(.* bytes\) exceeds the maximum allowed for inline data \(1 bytes\)\.",
472+
):
473+
session.read_pandas(pd_df, write_engine="bigquery_inline")
474+
475+
476+
def test_read_pandas_inline_w_interval_type_raises_error():
477+
session = resources.create_bigquery_session()
478+
df = pd.DataFrame(pd.arrays.IntervalArray.from_breaks([0, 10, 20, 30, 40, 50]))
479+
with pytest.raises(ValueError, match="Could not convert with a BigQuery type: "):
480+
session.read_pandas(df, write_engine="bigquery_inline")
481+
482+
483+
def test_read_pandas_inline_w_noninlineable_type_raises_error():
484+
session = resources.create_bigquery_session()
485+
data = [
486+
[1, 2, 3],
487+
[4, 5],
488+
None,
489+
[6, 7, 8, 9],
490+
]
491+
s = pd.Series(data, dtype=pd.ArrowDtype(pa.list_(pa.int64())))
492+
with pytest.raises(ValueError, match="Could not inline with a BigQuery type:"):
493+
session.read_pandas(s, write_engine="bigquery_inline")

0 commit comments

Comments
 (0)