Skip to content

Commit adde263

Browse files
refactor: unify row operators to same interface
1 parent ec10c4a commit adde263

File tree

11 files changed

+1452
-1100
lines changed

11 files changed

+1452
-1100
lines changed

bigframes/core/__init__.py

+19-10
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
import bigframes.dtypes
4141
import bigframes.operations as ops
4242
import bigframes.operations.aggregations as agg_ops
43+
import bigframes.operations.ibis_mappings as ops_ibis
4344

4445
if typing.TYPE_CHECKING:
4546
from bigframes.session import Session
@@ -509,9 +510,10 @@ def project_unary_op(
509510
self, column_name: str, op: ops.UnaryOp, output_name=None
510511
) -> ArrayValue:
511512
"""Creates a new expression based on this expression with unary operation applied to one column."""
512-
value = op._as_ibis(self._get_ibis_column(column_name)).name(
513-
output_name or column_name
513+
ibis_compiled = ops_ibis.ibis_compiler.compile_row_op(
514+
op, (self._get_ibis_column(column_name),)
514515
)
516+
value = ibis_compiled.name(output_name or column_name)
515517
return self._set_or_replace_by_id(output_name or column_name, value)
516518

517519
def project_binary_op(
@@ -522,10 +524,12 @@ def project_binary_op(
522524
output_column_id: str,
523525
) -> ArrayValue:
524526
"""Creates a new expression based on this expression with binary operation applied to two columns."""
525-
value = op(
526-
self._get_ibis_column(left_column_id),
527-
self._get_ibis_column(right_column_id),
528-
).name(output_column_id)
527+
left_input = self._get_ibis_column(left_column_id)
528+
right_input = self._get_ibis_column(right_column_id)
529+
ibis_compiled = ops_ibis.ibis_compiler.compile_row_op(
530+
op, (left_input, right_input)
531+
)
532+
value = ibis_compiled.name(output_column_id)
529533
return self._set_or_replace_by_id(output_column_id, value)
530534

531535
def project_ternary_op(
@@ -537,11 +541,14 @@ def project_ternary_op(
537541
output_column_id: str,
538542
) -> ArrayValue:
539543
"""Creates a new expression based on this expression with ternary operation applied to three columns."""
540-
value = op(
544+
ibis_inputs = (
541545
self._get_ibis_column(col_id_1),
542546
self._get_ibis_column(col_id_2),
543547
self._get_ibis_column(col_id_3),
544-
).name(output_column_id)
548+
)
549+
value = ops_ibis.ibis_compiler.compile_row_op(op, ibis_inputs).name(
550+
output_column_id
551+
)
545552
return self._set_or_replace_by_id(output_column_id, value)
546553

547554
def aggregate(
@@ -574,10 +581,11 @@ def aggregate(
574581
)
575582
columns = tuple(result[key] for key in result.columns)
576583
expr = ArrayValue(self._session, result, columns=columns, ordering=ordering)
584+
577585
if dropna:
578586
for column_id in by_column_ids:
579587
expr = expr._filter(
580-
ops.notnull_op._as_ibis(expr._get_ibis_column(column_id))
588+
ops_ibis.notnull_op_impl(expr._get_ibis_column(column_id))
581589
)
582590
# Can maybe remove this as Ordering id is redundant as by_column is unique after aggregation
583591
return expr._project_offsets()
@@ -1023,8 +1031,9 @@ def unpivot(
10231031
null_value = bigframes.dtypes.literal_to_ibis_scalar(
10241032
None, force_dtype=col_dtype
10251033
)
1034+
cast_op = ops.AsTypeOp(to_type=col_dtype)
10261035
ibis_values = [
1027-
ops.AsTypeOp(col_dtype)._as_ibis(unpivot_table[col])
1036+
ops_ibis.astype_op_impl(unpivot_table[col], cast_op)
10281037
if col is not None
10291038
else null_value
10301039
for col in source_cols

bigframes/core/block_transforms.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ def equals(block1: blocks.Block, block2: blocks.Block) -> bool:
4343
lcolmapped = lmap[lcol]
4444
rcolmapped = rmap[rcol]
4545
joined_block, result_id = joined_block.apply_binary_op(
46-
lcolmapped, rcolmapped, ops.eq_nulls_match_op
46+
lcolmapped, rcolmapped, ops.eq_null_match_op
4747
)
4848
joined_block, result_id = joined_block.apply_unary_op(
4949
result_id, ops.partial_right(ops.fillna_op, False)
@@ -239,7 +239,7 @@ def rank(
239239
if method in ["min", "max", "first", "dense"]:
240240
# Pandas rank always produces Float64, so must cast for aggregation types that produce ints
241241
block = block.multi_apply_unary_op(
242-
rownum_col_ids, ops.AsTypeOp(pd.Float64Dtype())
242+
rownum_col_ids, ops.AsTypeOp(to_type=pd.Float64Dtype())
243243
)
244244
if na_option == "keep":
245245
# For na_option "keep", null inputs must produce null outputs

bigframes/core/blocks.py

+6-6
Original file line numberDiff line numberDiff line change
@@ -554,12 +554,12 @@ def _split(
554554
# Create an ordering col and convert to string
555555
block, ordering_col = block.promote_offsets()
556556
block, string_ordering_col = block.apply_unary_op(
557-
ordering_col, ops.AsTypeOp("string[pyarrow]")
557+
ordering_col, ops.AsTypeOp(to_type="string[pyarrow]")
558558
)
559559

560560
# Apply hash method to sum col and order by it.
561561
block, string_sum_col = block.apply_binary_op(
562-
string_ordering_col, random_state_col, ops.concat_op
562+
string_ordering_col, random_state_col, ops.strconcat_op
563563
)
564564
block, hash_string_sum_col = block.apply_unary_op(string_sum_col, ops.hash_op)
565565
block = block.order_by([ordering.OrderingColumnReference(hash_string_sum_col)])
@@ -1223,8 +1223,8 @@ def add_prefix(self, prefix: str, axis: str | int | None = None) -> Block:
12231223
if axis_number == 0:
12241224
expr = self._expr
12251225
for index_col in self._index_columns:
1226-
expr = expr.project_unary_op(index_col, ops.AsTypeOp("string"))
1227-
prefix_op = ops.BinopPartialLeft(ops.add_op, prefix)
1226+
expr = expr.project_unary_op(index_col, ops.AsTypeOp(to_type="string"))
1227+
prefix_op = ops.ApplyLeft(base_op=ops.add_op, left_scalar=prefix)
12281228
expr = expr.project_unary_op(index_col, prefix_op)
12291229
return Block(
12301230
expr,
@@ -1242,8 +1242,8 @@ def add_suffix(self, suffix: str, axis: str | int | None = None) -> Block:
12421242
if axis_number == 0:
12431243
expr = self._expr
12441244
for index_col in self._index_columns:
1245-
expr = expr.project_unary_op(index_col, ops.AsTypeOp("string"))
1246-
prefix_op = ops.BinopPartialRight(ops.add_op, suffix)
1245+
expr = expr.project_unary_op(index_col, ops.AsTypeOp(to_type="string"))
1246+
prefix_op = ops.ApplyRight(base_op=ops.add_op, right_scalar=suffix)
12471247
expr = expr.project_unary_op(index_col, prefix_op)
12481248
return Block(
12491249
expr,

bigframes/core/indexes/index.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -179,7 +179,7 @@ def astype(
179179
) -> Index:
180180
if self.nlevels > 1:
181181
raise TypeError("Multiindex does not support 'astype'")
182-
return self._apply_unary_op(ops.AsTypeOp(dtype))
182+
return self._apply_unary_op(ops.AsTypeOp(to_type=dtype))
183183

184184
def all(self) -> bool:
185185
if self.nlevels > 1:
@@ -271,7 +271,7 @@ def drop(
271271
level_id = self._block.index_columns[0]
272272
if utils.is_list_like(labels):
273273
block, inverse_condition_id = block.apply_unary_op(
274-
level_id, ops.IsInOp(labels, match_nulls=True)
274+
level_id, ops.IsInOp(values=tuple(labels), match_nulls=True)
275275
)
276276
block, condition_id = block.apply_unary_op(
277277
inverse_condition_id, ops.invert_op
@@ -301,9 +301,9 @@ def isin(self, values) -> Index:
301301
f"isin(), you passed a [{type(values).__name__}]"
302302
)
303303

304-
return self._apply_unary_op(ops.IsInOp(values, match_nulls=True)).fillna(
305-
value=False
306-
)
304+
return self._apply_unary_op(
305+
ops.IsInOp(values=tuple(values), match_nulls=True)
306+
).fillna(value=False)
307307

308308
def _apply_unary_op(
309309
self,

bigframes/dataframe.py

+15-13
Original file line numberDiff line numberDiff line change
@@ -148,7 +148,7 @@ def __init__(
148148
block = block.select_columns(list(columns)) # type:ignore
149149
if dtype:
150150
block = block.multi_apply_unary_op(
151-
block.value_columns, ops.AsTypeOp(dtype)
151+
block.value_columns, ops.AsTypeOp(to_type=dtype)
152152
)
153153
self._block = block
154154

@@ -309,7 +309,7 @@ def astype(
309309
self,
310310
dtype: Union[bigframes.dtypes.DtypeString, bigframes.dtypes.Dtype],
311311
) -> DataFrame:
312-
return self._apply_unary_op(ops.AsTypeOp(dtype))
312+
return self._apply_unary_op(ops.AsTypeOp(to_type=dtype))
313313

314314
def _to_sql_query(
315315
self, include_index: bool
@@ -536,7 +536,7 @@ def _apply_binop(
536536

537537
def _apply_scalar_binop(self, other: float | int, op: ops.BinaryOp) -> DataFrame:
538538
block = self._block
539-
partial_op = ops.BinopPartialRight(op, other)
539+
partial_op = ops.ApplyRight(base_op=op, right_scalar=other)
540540
for column_id, label in zip(
541541
self._block.value_columns, self._block.column_labels
542542
):
@@ -951,7 +951,7 @@ def drop(
951951

952952
if utils.is_list_like(index):
953953
block, inverse_condition_id = block.apply_unary_op(
954-
level_id, ops.IsInOp(index, match_nulls=True)
954+
level_id, ops.IsInOp(values=tuple(index), match_nulls=True)
955955
)
956956
block, condition_id = block.apply_unary_op(
957957
inverse_condition_id, ops.invert_op
@@ -1318,16 +1318,16 @@ def _filter_rows(
13181318
block = self._block
13191319
block, label_string_id = block.apply_unary_op(
13201320
self._block.index_columns[0],
1321-
ops.AsTypeOp(pandas.StringDtype(storage="pyarrow")),
1321+
ops.AsTypeOp(to_type=pandas.StringDtype(storage="pyarrow")),
13221322
)
13231323
if like is not None:
13241324
block, mask_id = block.apply_unary_op(
1325-
label_string_id, ops.ContainsStringOp(pat=like)
1325+
label_string_id, ops.StrContainsOp(pat=like)
13261326
)
13271327
else: # regex
13281328
assert regex is not None
13291329
block, mask_id = block.apply_unary_op(
1330-
label_string_id, ops.ContainsRegexOp(pat=regex)
1330+
label_string_id, ops.StrContainsRegexOp(pat=regex)
13311331
)
13321332

13331333
block = block.filter(mask_id)
@@ -1337,7 +1337,7 @@ def _filter_rows(
13371337
# Behavior matches pandas 2.1+, older pandas versions would reindex
13381338
block = self._block
13391339
block, mask_id = block.apply_unary_op(
1340-
self._block.index_columns[0], ops.IsInOp(values=list(items))
1340+
self._block.index_columns[0], ops.IsInOp(values=tuple(items))
13411341
)
13421342
block = block.filter(mask_id)
13431343
block = block.select_columns(self._block.value_columns)
@@ -1467,7 +1467,9 @@ def isin(self, values) -> DataFrame:
14671467
if label in values.keys():
14681468
value_for_key = values[label]
14691469
block, result_id = block.apply_unary_op(
1470-
col, ops.IsInOp(value_for_key, match_nulls=True), label
1470+
col,
1471+
ops.IsInOp(values=tuple(value_for_key), match_nulls=True),
1472+
label,
14711473
)
14721474
result_ids.append(result_id)
14731475
else:
@@ -1477,9 +1479,9 @@ def isin(self, values) -> DataFrame:
14771479
result_ids.append(result_id)
14781480
return DataFrame(block.select_columns(result_ids)).fillna(value=False)
14791481
elif utils.is_list_like(values):
1480-
return self._apply_unary_op(ops.IsInOp(values, match_nulls=True)).fillna(
1481-
value=False
1482-
)
1482+
return self._apply_unary_op(
1483+
ops.IsInOp(values=tuple(values), match_nulls=True)
1484+
).fillna(value=False)
14831485
else:
14841486
raise TypeError(
14851487
"only list-like objects are allowed to be passed to "
@@ -2478,7 +2480,7 @@ def map(self, func, na_action: Optional[str] = None) -> DataFrame:
24782480
# inputs causing errors.
24792481
reprojected_df = DataFrame(self._block._force_reproject())
24802482
return reprojected_df._apply_unary_op(
2481-
ops.RemoteFunctionOp(func, apply_on_null=(na_action is None))
2483+
ops.RemoteFunctionOp(func=func, apply_on_null=(na_action is None))
24822484
)
24832485

24842486
def apply(self, func, *, args: typing.Tuple = (), **kwargs):

0 commit comments

Comments
 (0)