Skip to content

Commit 02de9e7

Browse files
feat: Series binary ops compatible with more types
1 parent 458bfb2 commit 02de9e7

File tree

5 files changed

+82
-38
lines changed

5 files changed

+82
-38
lines changed

bigframes/core/convert.py

+20-5
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,22 @@
2121
import bigframes.series as series
2222

2323

24-
def to_bf_series(obj, default_index: Optional[index.Index]) -> series.Series:
24+
def is_series_convertible(obj) -> bool:
25+
if isinstance(obj, series.Series):
26+
return True
27+
if isinstance(obj, pd.Series):
28+
return True
29+
if isinstance(obj, index.Index):
30+
return True
31+
if isinstance(obj, pd.Index):
32+
return True
33+
if pd.api.types.is_list_like(obj):
34+
return True
35+
else:
36+
return False
37+
38+
39+
def to_bf_series(obj, default_index: Optional[index.Index], session) -> series.Series:
2540
"""
2641
Convert a an object to a bigframes series
2742
@@ -37,13 +52,13 @@ def to_bf_series(obj, default_index: Optional[index.Index]) -> series.Series:
3752
if isinstance(obj, series.Series):
3853
return obj
3954
if isinstance(obj, pd.Series):
40-
return series.Series(obj)
55+
return series.Series(obj, session=session)
4156
if isinstance(obj, index.Index):
42-
return series.Series(obj, default_index)
57+
return series.Series(obj, default_index, session=session)
4358
if isinstance(obj, pd.Index):
44-
return series.Series(obj, default_index)
59+
return series.Series(obj, default_index, session=session)
4560
if pd.api.types.is_list_like(obj):
46-
return series.Series(obj, default_index)
61+
return series.Series(obj, default_index, session=session)
4762
else:
4863
raise TypeError(f"Cannot interpret {obj} as series.")
4964

bigframes/dataframe.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -668,7 +668,9 @@ def _apply_binop(
668668
DataFrame(other), op, how=how, reverse=reverse
669669
)
670670
elif utils.get_axis_number(axis) == 0:
671-
bf_series = bigframes.core.convert.to_bf_series(other, self.index)
671+
bf_series = bigframes.core.convert.to_bf_series(
672+
other, self.index, self._session
673+
)
672674
return self._apply_series_binop_axis_0(bf_series, op, how, reverse)
673675
elif utils.get_axis_number(axis) == 1:
674676
pd_series = bigframes.core.convert.to_pd_series(other, self.columns)

bigframes/operations/base.py

+39-29
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121

2222
import bigframes.constants as constants
2323
import bigframes.core.blocks as blocks
24+
import bigframes.core.convert
2425
import bigframes.core.expression as ex
2526
import bigframes.core.indexes as indexes
2627
import bigframes.core.scalar as scalars
@@ -44,7 +45,15 @@ def __init__(
4445
*,
4546
session: typing.Optional[bigframes.session.Session] = None,
4647
):
47-
block = None
48+
import bigframes.pandas
49+
50+
read_pandas_func = (
51+
session.read_pandas
52+
if (session is not None)
53+
else (lambda x: bigframes.pandas.read_pandas(x))
54+
)
55+
56+
block: typing.Optional[blocks.Block] = None
4857
if copy is not None and not copy:
4958
raise ValueError(
5059
f"Series constructor only supports copy=True. {constants.FEEDBACK_LINK}"
@@ -55,29 +64,36 @@ def __init__(
5564
assert index is None
5665
block = data
5766

58-
elif isinstance(data, SeriesMethods):
59-
block = data._block
67+
elif isinstance(data, SeriesMethods) or isinstance(data, pd.Series):
68+
if isinstance(data, pd.Series):
69+
data = read_pandas_func(data)
70+
data_block = data._block
6071
if index is not None:
6172
# reindex
62-
bf_index = indexes.Index(index)
73+
bf_index = indexes.Index(index, session=session)
6374
idx_block = bf_index._block
6475
idx_cols = idx_block.value_columns
65-
block_idx, _ = idx_block.join(block, how="left")
66-
block = block_idx.with_index_labels(bf_index.names)
76+
block_idx, _ = idx_block.join(data_block, how="left")
77+
data_block = block_idx.with_index_labels(bf_index.names)
78+
block = data_block
6779

68-
elif isinstance(data, indexes.Index):
80+
elif isinstance(data, indexes.Index) or pd.api.types.is_list_like(data):
81+
data = indexes.Index(data, session=session)
6982
if data.nlevels != 1:
7083
raise NotImplementedError("Cannot interpret multi-index as Series.")
7184
# Reset index to promote index columns to value columns, set default index
72-
block = data._block.reset_index(drop=False)
85+
data_block = data._block.reset_index(drop=False)
7386
if index is not None:
7487
# Align by offset
75-
bf_index = indexes.Index(index)
76-
idx_block = bf_index._block.reset_index(drop=False)
88+
bf_index = indexes.Index(index, session=session)
89+
idx_block = bf_index._block.reset_index(
90+
drop=False
91+
) # reset to align by offsets, and then reset back
7792
idx_cols = idx_block.value_columns
78-
block, (l_mapping, _) = idx_block.join(block, how="left")
79-
block = block.set_index([l_mapping[col] for col in idx_cols])
80-
block = block.with_index_labels(bf_index.names)
93+
data_block, (l_mapping, _) = idx_block.join(data_block, how="left")
94+
data_block = data_block.set_index([l_mapping[col] for col in idx_cols])
95+
data_block = data_block.with_index_labels(bf_index.names)
96+
block = data_block
8197

8298
if block:
8399
if name:
@@ -91,22 +107,17 @@ def __init__(
91107
block.value_columns, ops.AsTypeOp(to_type=dtype)
92108
)
93109
else:
94-
import bigframes.pandas
95-
96110
pd_series = pd.Series(
97111
data=data, index=index, dtype=dtype, name=name # type:ignore
98112
)
99113
pd_dataframe = pd_series.to_frame()
100114
if pd_series.name is None:
101115
# to_frame will set default numeric column label if unnamed, but we do not support int column label, so must rename
102116
pd_dataframe = pd_dataframe.set_axis(["unnamed_col"], axis=1)
103-
if session:
104-
block = session.read_pandas(pd_dataframe)._get_block()
105-
else:
106-
# Uses default global session
107-
block = bigframes.pandas.read_pandas(pd_dataframe)._get_block()
117+
block = read_pandas_func(pd_dataframe)._get_block() # type: ignore
108118
if pd_series.name is None:
109-
block = block.with_column_labels([None])
119+
block = block.with_column_labels([None]) # type: ignore
120+
assert block is not None
110121
self._block: blocks.Block = block
111122

112123
@property
@@ -145,17 +156,16 @@ def _apply_binary_op(
145156
reverse: bool = False,
146157
) -> series.Series:
147158
"""Applies a binary operator to the series and other."""
148-
if isinstance(other, pd.Series):
149-
# TODO: Convert to BigQuery DataFrames series
150-
raise NotImplementedError(
151-
f"Pandas series not supported as operand. {constants.FEEDBACK_LINK}"
159+
if bigframes.core.convert.is_series_convertible(other):
160+
self_index = indexes.Index(self._block)
161+
other_series = bigframes.core.convert.to_bf_series(
162+
other, self_index, self._block.session
152163
)
153-
if isinstance(other, series.Series):
154-
(self_col, other_col, block) = self._align(other, how=alignment)
164+
(self_col, other_col, block) = self._align(other_series, how=alignment)
155165

156166
name = self._name
157167
if (
158-
isinstance(other, series.Series)
168+
hasattr(other, "name")
159169
and other.name != self._name
160170
and alignment == "outer"
161171
):
@@ -166,7 +176,7 @@ def _apply_binary_op(
166176
block, result_id = block.project_expr(expr, name)
167177
return series.Series(block.select_column(result_id))
168178

169-
else:
179+
else: # Scalar binop
170180
name = self._name
171181
expr = op.as_expr(
172182
ex.const(other) if reverse else self._value_column,

bigframes/series.py

-3
Original file line numberDiff line numberDiff line change
@@ -812,9 +812,6 @@ def combine_first(self, other: Series) -> Series:
812812
return result
813813

814814
def update(self, other: Union[Series, Sequence, Mapping]) -> None:
815-
import bigframes.core.convert
816-
817-
other = bigframes.core.convert.to_bf_series(other, default_index=None)
818815
result = self._apply_binary_op(
819816
other, ops.coalesce_op, reverse=True, alignment="left"
820817
)

tests/system/small/test_series.py

+20
Original file line numberDiff line numberDiff line change
@@ -1269,6 +1269,26 @@ def test_binop_right_filtered(scalars_dfs):
12691269
)
12701270

12711271

1272+
@pytest.mark.parametrize(
1273+
("other",),
1274+
[
1275+
([-1.4, 2.3, None],),
1276+
(pd.Index([-1.4, 2.3, None]),),
1277+
(pd.Series([-1.4, 2.3, None], index=[44, 2, 1]),),
1278+
],
1279+
)
1280+
def test_series_binop_w_other_types(scalars_dfs, other):
1281+
scalars_df, scalars_pandas_df = scalars_dfs
1282+
1283+
bf_result = (scalars_df["int64_col"].head(3) + other).to_pandas()
1284+
pd_result = scalars_pandas_df["int64_col"].head(3) + other
1285+
1286+
assert_series_equal(
1287+
bf_result,
1288+
pd_result,
1289+
)
1290+
1291+
12721292
@skip_legacy_pandas
12731293
def test_series_combine_first(scalars_dfs):
12741294
scalars_df, scalars_pandas_df = scalars_dfs

0 commit comments

Comments
 (0)