Skip to content

Commit 20d9738

Browse files
committed
feat: add GroupBy.size() to get number of rows in each group
1 parent 718a00c commit 20d9738

File tree

6 files changed

+89
-1
lines changed

6 files changed

+89
-1
lines changed

bigframes/core/blocks.py

+28
Original file line numberDiff line numberDiff line change
@@ -1129,6 +1129,34 @@ def corr(self):
11291129
index_labels=self.column_labels.names,
11301130
)
11311131

1132+
def size(self):
1133+
"""Returns a block object to compute the size(s) of this block."""
1134+
agg_specs = [
1135+
(ex.NullaryAggregation(agg_ops.SizeOp()), "size"),
1136+
]
1137+
output_col_ids = [agg_spec[1] for agg_spec in agg_specs]
1138+
result_expr = self.expr.aggregate(agg_specs)
1139+
1140+
# # TODO: where do group by columns come in if as_index=False?
1141+
# aggregate_labels = self._get_labels_for_columns(
1142+
# [agg[0] for agg in aggregations]
1143+
# )
1144+
# names: typing.List[Label] = []
1145+
# for by_col_id in by_column_ids:
1146+
# if by_col_id in self.value_columns:
1147+
# names.append(self.col_id_to_label[by_col_id])
1148+
# else:
1149+
# names.append(self.col_id_to_index_name[by_col_id])
1150+
return (
1151+
Block(
1152+
result_expr,
1153+
# index_columns=by_column_ids,
1154+
# column_labels=aggregate_labels,
1155+
# index_labels=names,
1156+
),
1157+
output_col_ids,
1158+
)
1159+
11321160
def _standard_stats(self, column_id) -> typing.Sequence[agg_ops.UnaryAggregateOp]:
11331161
"""
11341162
Gets a standard set of stats to preemptively fetch for a column if

bigframes/core/compile/aggregate_compiler.py

+19-1
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,8 @@ def compile_aggregate(
3535
aggregate: ex.Aggregation,
3636
bindings: typing.Dict[str, ibis_types.Value],
3737
) -> ibis_types.Value:
38+
if isinstance(aggregate, ex.NullaryAggregation):
39+
return compile_nullary_agg(aggregate.op)
3840
if isinstance(aggregate, ex.UnaryAggregation):
3941
input = scalar_compiler.compile_expression(aggregate.arg, bindings=bindings)
4042
return compile_unary_agg(
@@ -54,7 +56,9 @@ def compile_analytic(
5456
window: window_spec.WindowSpec,
5557
bindings: typing.Dict[str, ibis_types.Value],
5658
) -> ibis_types.Value:
57-
if isinstance(aggregate, ex.UnaryAggregation):
59+
if isinstance(aggregate, ex.NullaryAggregation):
60+
return compile_nullary_agg(aggregate.op, window)
61+
elif isinstance(aggregate, ex.UnaryAggregation):
5862
input = scalar_compiler.compile_expression(aggregate.arg, bindings=bindings)
5963
return compile_unary_agg(aggregate.op, input, window)
6064
elif isinstance(aggregate, ex.BinaryAggregation):
@@ -81,6 +85,14 @@ def compile_unary_agg(
8185
raise ValueError(f"Can't compile unrecognized operation: {op}")
8286

8387

88+
@functools.singledispatch
89+
def compile_nullary_agg(
90+
op: agg_ops.WindowOp,
91+
window: Optional[window_spec.WindowSpec] = None,
92+
) -> ibis_types.Value:
93+
raise ValueError(f"Can't compile unrecognized operation: {op}")
94+
95+
8496
def numeric_op(operation):
8597
@functools.wraps(operation)
8698
def constrained_op(op, column: ibis_types.Column, window=None):
@@ -101,6 +113,12 @@ def constrained_op(op, column: ibis_types.Column, window=None):
101113
### Specific Op implementations Below
102114

103115

116+
@compile_nullary_agg.register
117+
@numeric_op
118+
def _(op: agg_ops.SizeOp, window=None) -> ibis_types.NumericValue:
119+
return _apply_window_if_present(vendored_ibis_ops.count(1), window)
120+
121+
104122
@compile_unary_agg.register
105123
@numeric_op
106124
def _(

bigframes/core/expression.py

+5
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,11 @@ class Aggregation(abc.ABC):
4040
op: agg_ops.WindowOp = dataclasses.field()
4141

4242

43+
@dataclasses.dataclass(frozen=True)
44+
class NullaryAggregation(Aggregation):
45+
op: agg_ops.NullaryWindowOp = dataclasses.field()
46+
47+
4348
@dataclasses.dataclass(frozen=True)
4449
class UnaryAggregation(Aggregation):
4550
op: agg_ops.UnaryWindowOp = dataclasses.field()

bigframes/core/groupby/__init__.py

+10
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,16 @@ def __getitem__(
102102
dropna=self._dropna,
103103
)
104104

105+
def size(self) -> df.DataFrame:
106+
agg_block, _ = self._block.aggregate(
107+
by_column_ids=self._by_col_ids,
108+
# aggregations=aggregations,
109+
dropna=self._dropna,
110+
)
111+
# agg_block = agg_block.with_column_labels(column_labels)
112+
dataframe = df.DataFrame(agg_block)
113+
return dataframe if self._as_index else self._convert_index(dataframe)
114+
105115
def sum(self, numeric_only: bool = False, *args) -> df.DataFrame:
106116
if not numeric_only:
107117
self._raise_on_non_numeric("sum")

bigframes/operations/aggregations.py

+20
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,13 @@ def handles_ties(self):
3333
return False
3434

3535

36+
@dataclasses.dataclass(frozen=True)
37+
class NullaryWindowOp(WindowOp):
38+
@property
39+
def arguments(self) -> int:
40+
return 0
41+
42+
3643
@dataclasses.dataclass(frozen=True)
3744
class UnaryWindowOp(WindowOp):
3845
@property
@@ -55,6 +62,13 @@ def arguments(self) -> int:
5562
...
5663

5764

65+
@dataclasses.dataclass(frozen=True)
66+
class NullaryAggregateOp(AggregateOp, NullaryWindowOp):
67+
@property
68+
def arguments(self) -> int:
69+
return 0
70+
71+
5872
@dataclasses.dataclass(frozen=True)
5973
class UnaryAggregateOp(AggregateOp, UnaryWindowOp):
6074
@property
@@ -69,6 +83,11 @@ def arguments(self) -> int:
6983
return 2
7084

7185

86+
@dataclasses.dataclass(frozen=True)
87+
class SizeOp(NullaryAggregateOp):
88+
name: ClassVar[str] = "size"
89+
90+
7291
@dataclasses.dataclass(frozen=True)
7392
class SumOp(UnaryAggregateOp):
7493
name: ClassVar[str] = "sum"
@@ -270,6 +289,7 @@ class CovOp(BinaryAggregateOp):
270289
name: ClassVar[str] = "cov"
271290

272291

292+
size_op = SizeOp()
273293
sum_op = SumOp()
274294
mean_op = MeanOp()
275295
median_op = MedianOp()

third_party/bigframes_vendored/ibis/expr/operations/analytic.py

+7
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,17 @@
22

33
from __future__ import annotations
44

5+
import ibis
56
import ibis.expr.operations as ops
67
import ibis.expr.rules as rlz
78

89

10+
@ibis.udf.agg.builtin
11+
def count(value: int) -> int:
12+
"""Count of a scalar."""
13+
return 0 # pragma: NO COVER
14+
15+
916
class FirstNonNullValue(ops.Analytic):
1017
"""Retrieve the first element."""
1118

0 commit comments

Comments
 (0)