Skip to content

Commit 412f28b

Browse files
authored
feat: bigframes.bigquery.array_agg(SeriesGroupBy|DataFrameGroupby) (#663)
1 parent ab1bc04 commit 412f28b

File tree

10 files changed

+506
-85
lines changed

10 files changed

+506
-85
lines changed

bigframes/bigquery/__init__.py

+61
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,13 @@
2222

2323
import typing
2424

25+
import bigframes.constants as constants
26+
import bigframes.core.groupby as groupby
2527
import bigframes.operations as ops
28+
import bigframes.operations.aggregations as agg_ops
2629

2730
if typing.TYPE_CHECKING:
31+
import bigframes.dataframe as dataframe
2832
import bigframes.series as series
2933

3034

@@ -52,9 +56,66 @@ def array_length(series: series.Series) -> series.Series:
5256
2 2
5357
dtype: Int64
5458
59+
Args:
60+
series (bigframes.series.Series):
61+
A Series with array columns.
62+
5563
Returns:
5664
bigframes.series.Series: A Series of integer values indicating
5765
the length of each element in the Series.
5866
5967
"""
6068
return series._apply_unary_op(ops.len_op)
69+
70+
71+
def array_agg(
72+
obj: groupby.SeriesGroupBy | groupby.DataFrameGroupBy,
73+
) -> series.Series | dataframe.DataFrame:
74+
"""Group data and create arrays from selected columns, omitting NULLs to avoid
75+
BigQuery errors (NULLs not allowed in arrays).
76+
77+
**Examples:**
78+
79+
>>> import bigframes.pandas as bpd
80+
>>> import bigframes.bigquery as bbq
81+
>>> import numpy as np
82+
>>> bpd.options.display.progress_bar = None
83+
84+
For a SeriesGroupBy object:
85+
86+
>>> lst = ['a', 'a', 'b', 'b', 'a']
87+
>>> s = bpd.Series([1, 2, 3, 4, np.nan], index=lst)
88+
>>> bbq.array_agg(s.groupby(level=0))
89+
a [1. 2.]
90+
b [3. 4.]
91+
dtype: list<item: double>[pyarrow]
92+
93+
For a DataFrameGroupBy object:
94+
95+
>>> l = [[1, 2, 3], [1, None, 4], [2, 1, 3], [1, 2, 2]]
96+
>>> df = bpd.DataFrame(l, columns=["a", "b", "c"])
97+
>>> bbq.array_agg(df.groupby(by=["b"]))
98+
a c
99+
b
100+
1.0 [2] [3]
101+
2.0 [1 1] [3 2]
102+
<BLANKLINE>
103+
[2 rows x 2 columns]
104+
105+
Args:
106+
obj (groupby.SeriesGroupBy | groupby.DataFrameGroupBy):
107+
A GroupBy object to be applied the function.
108+
109+
Returns:
110+
bigframes.series.Series | bigframes.dataframe.DataFrame: A Series or
111+
DataFrame containing aggregated array columns, and indexed by the
112+
original group columns.
113+
"""
114+
if isinstance(obj, groupby.SeriesGroupBy):
115+
return obj._aggregate(agg_ops.ArrayAggOp())
116+
elif isinstance(obj, groupby.DataFrameGroupBy):
117+
return obj._aggregate_all(agg_ops.ArrayAggOp(), numeric_only=False)
118+
else:
119+
raise ValueError(
120+
f"Unsupported type {type(obj)} to apply `array_agg` function. {constants.FEEDBACK_LINK}"
121+
)

0 commit comments

Comments
 (0)