googleapis · chelsea-lin · May 30, 2024 · May 29, 2024 · tswast · May 30, 2024
@@ -57,8 +57,7 @@ def array_length(series: series.Series) -> series.Series:
         dtype: Int64
 
     Args:
-        series (bigframes.series.Series):
-                A Series with array columns.
+        series (bigframes.series.Series): A Series with array columns.
 
     Returns:
         bigframes.series.Series: A Series of integer values indicating
@@ -104,7 +103,7 @@ def array_agg(
 
     Args:
         obj (groupby.SeriesGroupBy | groupby.DataFrameGroupBy):
-                A GroupBy object to be applied the function.
+            A GroupBy object to be applied the function.
 
     Returns:
         bigframes.series.Series | bigframes.dataframe.DataFrame: A Series or
@@ -119,3 +118,33 @@ def array_agg(
         raise ValueError(
             f"Unsupported type {type(obj)} to apply `array_agg` function. {constants.FEEDBACK_LINK}"
         )
+
+
+def array_to_string(series: series.Series, delimiter: str) -> series.Series:
+    """Converts array elements within a Series into delimited strings.
+
+    **Examples:**
+
+        >>> import bigframes.pandas as bpd
+        >>> import bigframes.bigquery as bbq
+        >>> import numpy as np
+        >>> bpd.options.display.progress_bar = None
+
+        >>> s = bpd.Series([["H", "i", "!"], ["Hello", "World"], np.nan, [], ["Hi"]])
+        >>> bbq.array_to_string(s, delimiter=", ")
+            0         H, i, !
+            1    Hello, World
+            2
+            3
+            4              Hi
+            dtype: string
+
+    Args:
+        series (bigframes.series.Series): A Series containing arrays.
+        delimiter (str): The string used to separate array elements.
+
+    Returns:
+        bigframes.series.Series: A Series containing delimited strings.
+
+    """
+    return series._apply_unary_op(ops.ArrayToStringOp(delimiter=delimiter))
@@ -875,6 +875,12 @@ def map_op_impl(x: ibis_types.Value, op: ops.MapOp):
     return case.else_(x).end()
 
 
+# Array Ops
+@scalar_op_compiler.register_unary_op(ops.ArrayToStringOp, pass_op=True)
+def array_to_string_op_impl(x: ibis_types.Value, op: ops.ArrayToStringOp):
+    return typing.cast(ibis_types.ArrayValue, x).join(op.delimiter)
+
+
 ### Binary Ops
 def short_circuit_nulls(type_override: typing.Optional[ibis_dtypes.DataType] = None):
     """Wraps a binary operator to generate nulls of the expected type if either input is a null scalar."""

@@ -132,6 +132,14 @@ def is_array_like(type: ExpressionType) -> bool:
     )
 
 
+def is_array_string_like(type: ExpressionType) -> bool:
+    return (
+        isinstance(type, pd.ArrowDtype)
+        and isinstance(type.pyarrow_dtype, pa.ListType)
+        and pa.types.is_string(type.pyarrow_dtype.value_type)
+    )
+
+
 def is_struct_like(type: ExpressionType) -> bool:
     return isinstance(type, pd.ArrowDtype) and isinstance(
         type.pyarrow_dtype, pa.StructType

@@ -578,6 +578,19 @@ def output_type(self, *input_types):
         return input_types[0]
 
 
+## Array Ops
+@dataclasses.dataclass(frozen=True)
+class ArrayToStringOp(UnaryOp):
+    name: typing.ClassVar[str] = "array_to_string"
+    delimiter: str
+
+    def output_type(self, *input_types):
+        input_type = input_types[0]
+        if not dtypes.is_array_string_like(input_type):
+            raise TypeError("Input type must be an array of string type.")
+        return dtypes.STRING_DTYPE
+
+
 # Binary Ops
 fillna_op = create_binary_op(name="fillna", type_signature=op_typing.COERCE)
 maximum_op = create_binary_op(name="maximum", type_signature=op_typing.COERCE)

@@ -139,3 +139,16 @@ def test_array_agg_matches_after_explode():
         result.to_pandas(),  # type: ignore
         df.to_pandas(),
     )
+
+
+@pytest.mark.parametrize(
+    ("data"),
+    [
+        pytest.param([[1, 2], [3, 4], [5]], id="int_array"),
+        pytest.param(["hello", "world"], id="string"),
+    ],
+)
+def test_array_to_string_w_type_checks(data):
+    series = bpd.Series(data)
+    with pytest.raises(TypeError):
+        bbq.array_to_string(series, delimiter=", ")