@@ -145,7 +145,13 @@ def __init__(
145
145
self ._cloud_function_docker_repository = cloud_function_docker_repository
146
146
147
147
def create_bq_remote_function (
148
- self , input_args , input_types , output_type , endpoint , bq_function_name
148
+ self ,
149
+ input_args ,
150
+ input_types ,
151
+ output_type ,
152
+ endpoint ,
153
+ bq_function_name ,
154
+ max_batching_rows ,
149
155
):
150
156
"""Create a BigQuery remote function given the artifacts of a user defined
151
157
function and the http endpoint of a corresponding cloud function."""
@@ -169,14 +175,25 @@ def create_bq_remote_function(
169
175
bq_function_args .append (
170
176
f"{ name } { third_party_ibis_bqtypes .BigQueryType .from_ibis (input_types [idx ])} "
171
177
)
178
+
179
+ remote_function_options = {
180
+ "endpoint" : endpoint ,
181
+ "max_batching_rows" : max_batching_rows ,
182
+ }
183
+
184
+ remote_function_options_str = ", " .join (
185
+ [
186
+ f'{ key } ="{ val } "' if isinstance (val , str ) else f"{ key } ={ val } "
187
+ for key , val in remote_function_options .items ()
188
+ if val is not None
189
+ ]
190
+ )
191
+
172
192
create_function_ddl = f"""
173
193
CREATE OR REPLACE FUNCTION `{ self ._gcp_project_id } .{ self ._bq_dataset } `.{ bq_function_name } ({ ',' .join (bq_function_args )} )
174
194
RETURNS { bq_function_return_type }
175
195
REMOTE WITH CONNECTION `{ self ._gcp_project_id } .{ self ._bq_location } .{ self ._bq_connection_id } `
176
- OPTIONS (
177
- endpoint = "{ endpoint } ",
178
- max_batching_rows = 1000
179
- )"""
196
+ OPTIONS ({ remote_function_options_str } )"""
180
197
181
198
logger .info (f"Creating BQ remote function: { create_function_ddl } " )
182
199
@@ -438,6 +455,7 @@ def provision_bq_remote_function(
438
455
reuse ,
439
456
name ,
440
457
package_requirements ,
458
+ max_batching_rows ,
441
459
):
442
460
"""Provision a BigQuery remote function."""
443
461
# If reuse of any existing function with the same name (indicated by the
@@ -485,7 +503,12 @@ def provision_bq_remote_function(
485
503
"Exactly one type should be provided for every input arg."
486
504
)
487
505
self .create_bq_remote_function (
488
- input_args , input_types , output_type , cf_endpoint , remote_function_name
506
+ input_args ,
507
+ input_types ,
508
+ output_type ,
509
+ cf_endpoint ,
510
+ remote_function_name ,
511
+ max_batching_rows ,
489
512
)
490
513
else :
491
514
logger .info (f"Remote function { remote_function_name } already exists." )
@@ -607,6 +630,7 @@ def remote_function(
607
630
cloud_function_service_account : Optional [str ] = None ,
608
631
cloud_function_kms_key_name : Optional [str ] = None ,
609
632
cloud_function_docker_repository : Optional [str ] = None ,
633
+ max_batching_rows : Optional [int ] = 1000 ,
610
634
):
611
635
"""Decorator to turn a user defined function into a BigQuery remote function.
612
636
@@ -723,6 +747,15 @@ def remote_function(
723
747
projects/PROJECT_ID/locations/LOCATION/repositories/REPOSITORY_NAME.
724
748
For more details see
725
749
https://cloud.google.com/functions/docs/securing/cmek#before_you_begin.
750
+ max_batching_rows (int, Optional):
751
+ The maximum number of rows to be batched for processing in the
752
+ BQ remote function. Default value is 1000. A lower number can be
753
+ passed to avoid timeouts in case the user code is too complex to
754
+ process large number of rows fast enough. A higher number can be
755
+ used to increase throughput in case the user code is fast enough.
756
+ `None` can be passed to let BQ remote functions service apply
757
+ default batching. See for more details
758
+ https://cloud.google.com/bigquery/docs/remote-functions#limiting_number_of_rows_in_a_batch_request.
726
759
"""
727
760
import bigframes .pandas as bpd
728
761
@@ -846,6 +879,7 @@ def wrapper(f):
846
879
reuse ,
847
880
name ,
848
881
packages ,
882
+ max_batching_rows ,
849
883
)
850
884
851
885
# TODO: Move ibis logic to compiler step
0 commit comments