|
22 | 22 |
|
23 | 23 | import typing
|
24 | 24 |
|
| 25 | +import bigframes.constants as constants |
| 26 | +import bigframes.core.groupby as groupby |
25 | 27 | import bigframes.operations as ops
|
| 28 | +import bigframes.operations.aggregations as agg_ops |
26 | 29 |
|
27 | 30 | if typing.TYPE_CHECKING:
|
| 31 | + import bigframes.dataframe as dataframe |
28 | 32 | import bigframes.series as series
|
29 | 33 |
|
30 | 34 |
|
@@ -52,9 +56,66 @@ def array_length(series: series.Series) -> series.Series:
|
52 | 56 | 2 2
|
53 | 57 | dtype: Int64
|
54 | 58 |
|
| 59 | + Args: |
| 60 | + series (bigframes.series.Series): |
| 61 | + A Series with array columns. |
| 62 | +
|
55 | 63 | Returns:
|
56 | 64 | bigframes.series.Series: A Series of integer values indicating
|
57 | 65 | the length of each element in the Series.
|
58 | 66 |
|
59 | 67 | """
|
60 | 68 | return series._apply_unary_op(ops.len_op)
|
| 69 | + |
| 70 | + |
| 71 | +def array_agg( |
| 72 | + obj: groupby.SeriesGroupBy | groupby.DataFrameGroupBy, |
| 73 | +) -> series.Series | dataframe.DataFrame: |
| 74 | + """Group data and create arrays from selected columns, omitting NULLs to avoid |
| 75 | + BigQuery errors (NULLs not allowed in arrays). |
| 76 | +
|
| 77 | + **Examples:** |
| 78 | +
|
| 79 | + >>> import bigframes.pandas as bpd |
| 80 | + >>> import bigframes.bigquery as bbq |
| 81 | + >>> import numpy as np |
| 82 | + >>> bpd.options.display.progress_bar = None |
| 83 | +
|
| 84 | + For a SeriesGroupBy object: |
| 85 | +
|
| 86 | + >>> lst = ['a', 'a', 'b', 'b', 'a'] |
| 87 | + >>> s = bpd.Series([1, 2, 3, 4, np.nan], index=lst) |
| 88 | + >>> bbq.array_agg(s.groupby(level=0)) |
| 89 | + a [1. 2.] |
| 90 | + b [3. 4.] |
| 91 | + dtype: list<item: double>[pyarrow] |
| 92 | +
|
| 93 | + For a DataFrameGroupBy object: |
| 94 | +
|
| 95 | + >>> l = [[1, 2, 3], [1, None, 4], [2, 1, 3], [1, 2, 2]] |
| 96 | + >>> df = bpd.DataFrame(l, columns=["a", "b", "c"]) |
| 97 | + >>> bbq.array_agg(df.groupby(by=["b"])) |
| 98 | + a c |
| 99 | + b |
| 100 | + 1.0 [2] [3] |
| 101 | + 2.0 [1 1] [3 2] |
| 102 | + <BLANKLINE> |
| 103 | + [2 rows x 2 columns] |
| 104 | +
|
| 105 | + Args: |
| 106 | + obj (groupby.SeriesGroupBy | groupby.DataFrameGroupBy): |
| 107 | + A GroupBy object to be applied the function. |
| 108 | +
|
| 109 | + Returns: |
| 110 | + bigframes.series.Series | bigframes.dataframe.DataFrame: A Series or |
| 111 | + DataFrame containing aggregated array columns, and indexed by the |
| 112 | + original group columns. |
| 113 | + """ |
| 114 | + if isinstance(obj, groupby.SeriesGroupBy): |
| 115 | + return obj._aggregate(agg_ops.ArrayAggOp()) |
| 116 | + elif isinstance(obj, groupby.DataFrameGroupBy): |
| 117 | + return obj._aggregate_all(agg_ops.ArrayAggOp(), numeric_only=False) |
| 118 | + else: |
| 119 | + raise ValueError( |
| 120 | + f"Unsupported type {type(obj)} to apply `array_agg` function. {constants.FEEDBACK_LINK}" |
| 121 | + ) |
0 commit comments