Skip to content

Commit 9963f85

Browse files
chelsea-lintswast
andauthored
feat: add the bigframes.bigquery sub-package with a bigframes.bigquery.array_length function (#630)
* feat: creats bigquery namespace and adds bigquery.array_length function * add docs * minor fix * fixing docs * add more doc tests * sentence-case * TODO for null arrays --------- Co-authored-by: Tim Sweña (Swast) <[email protected]>
1 parent 9a9f2bc commit 9963f85

File tree

7 files changed

+120
-0
lines changed

7 files changed

+120
-0
lines changed

bigframes/bigquery/__init__.py

+60
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
# Copyright 2024 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
16+
"""This module integrates BigQuery built-in functions for use with DataFrame objects,
17+
such as array functions:
18+
https://cloud.google.com/bigquery/docs/reference/standard-sql/array_functions. """
19+
20+
21+
from __future__ import annotations
22+
23+
import typing
24+
25+
import bigframes.operations as ops
26+
27+
if typing.TYPE_CHECKING:
28+
import bigframes.series as series
29+
30+
31+
def array_length(series: series.Series) -> series.Series:
32+
"""Compute the length of each array element in the Series.
33+
34+
**Examples:**
35+
36+
>>> import bigframes.pandas as bpd
37+
>>> import bigframes.bigquery as bbq
38+
>>> bpd.options.display.progress_bar = None
39+
40+
>>> s = bpd.Series([[1, 2, 8, 3], [], [3, 4]])
41+
>>> bbq.array_length(s)
42+
0 4
43+
1 0
44+
2 2
45+
dtype: Int64
46+
47+
You can also apply this function directly to Series.
48+
49+
>>> s.apply(bbq.array_length, by_row=False)
50+
0 4
51+
1 0
52+
2 2
53+
dtype: Int64
54+
55+
Returns:
56+
bigframes.series.Series: A Series of integer values indicating
57+
the length of each element in the Series.
58+
59+
"""
60+
return series._apply_unary_op(ops.len_op)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
2+
===========================
3+
BigQuery Built-in Functions
4+
===========================
5+
6+
.. automodule:: bigframes.bigquery
7+
:members:
8+
:inherited-members:
9+
:undoc-members:

docs/reference/bigframes/index.rst

+1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11

2+
============
23
Core objects
34
============
45

docs/reference/index.rst

+1
Original file line numberDiff line numberDiff line change
@@ -10,3 +10,4 @@ packages.
1010
bigframes/index
1111
bigframes.pandas/index
1212
bigframes.ml/index
13+
bigframes.bigquery/index

docs/templates/toc.yml

+4
Original file line numberDiff line numberDiff line change
@@ -189,5 +189,9 @@
189189
uid: bigframes.ml.remote.VertexAIModel
190190
name: remote
191191
name: bigframes.ml
192+
- items:
193+
- name: BigQuery built-in functions
194+
uid: bigframes.bigquery
195+
name: bigframes.bigquery
192196
name: BigQuery DataFrames
193197
status: beta
+13
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
# Copyright 2024 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
+32
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
# Copyright 2024 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import numpy as np
16+
import pandas as pd
17+
18+
import bigframes.bigquery as bbq
19+
import bigframes.pandas as bpd
20+
21+
22+
def test_array_length():
23+
series = bpd.Series([["A", "AA", "AAA"], ["BB", "B"], np.nan, [], ["C"]])
24+
# TODO(b/336880368): Allow for NULL values to be input for ARRAY columns.
25+
# Once we actually store NULL values, this will be NULL where the input is NULL.
26+
expected = pd.Series([3, 2, 0, 0, 1])
27+
pd.testing.assert_series_equal(
28+
bbq.array_length(series).to_pandas(),
29+
expected,
30+
check_dtype=False,
31+
check_index_type=False,
32+
)

0 commit comments

Comments
 (0)