Skip to content

Commit 9ca92d0

Browse files
authored
feat: support gcf vpc connector in remote_function (#677)
1 parent 21bd3e4 commit 9ca92d0

File tree

4 files changed

+97
-3
lines changed

4 files changed

+97
-3
lines changed

bigframes/functions/remote_function.py

+13-1
Original file line numberDiff line numberDiff line change
@@ -441,6 +441,7 @@ def create_cloud_function(
441441
timeout_seconds=600,
442442
max_instance_count=None,
443443
is_row_processor=False,
444+
vpc_connector=None,
444445
):
445446
"""Create a cloud function from the given user defined function."""
446447

@@ -519,6 +520,8 @@ def create_cloud_function(
519520
function.service_config.timeout_seconds = timeout_seconds
520521
if max_instance_count is not None:
521522
function.service_config.max_instance_count = max_instance_count
523+
if vpc_connector is not None:
524+
function.service_config.vpc_connector = vpc_connector
522525
function.service_config.service_account_email = (
523526
self._cloud_function_service_account
524527
)
@@ -568,6 +571,7 @@ def provision_bq_remote_function(
568571
cloud_function_timeout,
569572
cloud_function_max_instance_count,
570573
is_row_processor,
574+
cloud_function_vpc_connector,
571575
):
572576
"""Provision a BigQuery remote function."""
573577
# If reuse of any existing function with the same name (indicated by the
@@ -595,6 +599,7 @@ def provision_bq_remote_function(
595599
cloud_function_timeout,
596600
cloud_function_max_instance_count,
597601
is_row_processor,
602+
cloud_function_vpc_connector,
598603
)
599604
else:
600605
logger.info(f"Cloud function {cloud_function_name} already exists.")
@@ -750,6 +755,7 @@ def remote_function(
750755
max_batching_rows: Optional[int] = 1000,
751756
cloud_function_timeout: Optional[int] = 600,
752757
cloud_function_max_instances: Optional[int] = None,
758+
cloud_function_vpc_connector: Optional[str] = None,
753759
):
754760
"""Decorator to turn a user defined function into a BigQuery remote function.
755761
@@ -894,7 +900,12 @@ def remote_function(
894900
control the spike in the billing. Higher setting can help
895901
support processing larger scale data. When not specified, cloud
896902
function's default setting applies. For more details see
897-
https://cloud.google.com/functions/docs/configuring/max-instances
903+
https://cloud.google.com/functions/docs/configuring/max-instances.
904+
cloud_function_vpc_connector (str, Optional):
905+
The VPC connector you would like to configure for your cloud
906+
function. This is useful if your code needs access to data or
907+
service(s) that are on a VPC network. See for more details
908+
https://cloud.google.com/functions/docs/networking/connecting-vpc.
898909
"""
899910
is_row_processor = False
900911

@@ -1041,6 +1052,7 @@ def wrapper(f):
10411052
cloud_function_timeout,
10421053
cloud_function_max_instances,
10431054
is_row_processor,
1055+
cloud_function_vpc_connector,
10441056
)
10451057

10461058
# TODO: Move ibis logic to compiler step

bigframes/pandas/__init__.py

+2
Original file line numberDiff line numberDiff line change
@@ -654,6 +654,7 @@ def remote_function(
654654
max_batching_rows: Optional[int] = 1000,
655655
cloud_function_timeout: Optional[int] = 600,
656656
cloud_function_max_instances: Optional[int] = None,
657+
cloud_function_vpc_connector: Optional[str] = None,
657658
):
658659
return global_session.with_default_session(
659660
bigframes.session.Session.remote_function,
@@ -670,6 +671,7 @@ def remote_function(
670671
max_batching_rows=max_batching_rows,
671672
cloud_function_timeout=cloud_function_timeout,
672673
cloud_function_max_instances=cloud_function_max_instances,
674+
cloud_function_vpc_connector=cloud_function_vpc_connector,
673675
)
674676

675677

bigframes/session/__init__.py

+8-1
Original file line numberDiff line numberDiff line change
@@ -1412,6 +1412,7 @@ def remote_function(
14121412
max_batching_rows: Optional[int] = 1000,
14131413
cloud_function_timeout: Optional[int] = 600,
14141414
cloud_function_max_instances: Optional[int] = None,
1415+
cloud_function_vpc_connector: Optional[str] = None,
14151416
):
14161417
"""Decorator to turn a user defined function into a BigQuery remote function. Check out
14171418
the code samples at: https://cloud.google.com/bigquery/docs/remote-functions#bigquery-dataframes.
@@ -1537,7 +1538,12 @@ def remote_function(
15371538
control the spike in the billing. Higher setting can help
15381539
support processing larger scale data. When not specified, cloud
15391540
function's default setting applies. For more details see
1540-
https://cloud.google.com/functions/docs/configuring/max-instances
1541+
https://cloud.google.com/functions/docs/configuring/max-instances.
1542+
cloud_function_vpc_connector (str, Optional):
1543+
The VPC connector you would like to configure for your cloud
1544+
function. This is useful if your code needs access to data or
1545+
service(s) that are on a VPC network. See for more details
1546+
https://cloud.google.com/functions/docs/networking/connecting-vpc.
15411547
Returns:
15421548
callable: A remote function object pointing to the cloud assets created
15431549
in the background to support the remote execution. The cloud assets can be
@@ -1562,6 +1568,7 @@ def remote_function(
15621568
max_batching_rows=max_batching_rows,
15631569
cloud_function_timeout=cloud_function_timeout,
15641570
cloud_function_max_instances=cloud_function_max_instances,
1571+
cloud_function_vpc_connector=cloud_function_vpc_connector,
15651572
)
15661573

15671574
def read_gbq_function(

tests/system/large/test_remote_function.py

+74-1
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
import tempfile
2222
import textwrap
2323

24-
from google.api_core.exceptions import BadRequest, NotFound
24+
from google.api_core.exceptions import BadRequest, InvalidArgument, NotFound
2525
from google.cloud import bigquery, storage
2626
import pandas
2727
import pytest
@@ -1333,6 +1333,79 @@ def square_num(x):
13331333
)
13341334

13351335

1336+
@pytest.mark.flaky(retries=2, delay=120)
1337+
def test_remote_function_via_session_vpc(scalars_dfs):
1338+
# TODO(shobs): Automate the following set-up during testing in the test project.
1339+
#
1340+
# For upfront convenience, the following set up has been statically created
1341+
# in the project bigfrmames-dev-perf via cloud console:
1342+
#
1343+
# 1. Create a vpc connector as per
1344+
# https://cloud.google.com/vpc/docs/configure-serverless-vpc-access#gcloud
1345+
#
1346+
# $ gcloud compute networks vpc-access connectors create bigframes-vpc --project=bigframes-dev-perf --region=us-central1 --range 10.8.0.0/28
1347+
# Create request issued for: [bigframes-vpc]
1348+
# Waiting for operation [projects/bigframes-dev-perf/locations/us-central1/operations/f9f90df6-7cf4-4420-8c2f-b3952775dcfb] to complete...done.
1349+
# Created connector [bigframes-vpc].
1350+
#
1351+
# $ gcloud compute networks vpc-access connectors list --project=bigframes-dev-perf --region=us-central1
1352+
# CONNECTOR_ID REGION NETWORK IP_CIDR_RANGE SUBNET SUBNET_PROJECT MACHINE_TYPE MIN_INSTANCES MAX_INSTANCES MIN_THROUGHPUT MAX_THROUGHPUT STATE
1353+
# bigframes-vpc us-central1 default 10.8.0.0/28 e2-micro 2 10 200 1000 READY
1354+
1355+
project = "bigframes-dev-perf"
1356+
gcf_vpc_connector = "bigframes-vpc"
1357+
1358+
rf_session = bigframes.Session(context=bigframes.BigQueryOptions(project=project))
1359+
1360+
try:
1361+
1362+
def square_num(x):
1363+
if x is None:
1364+
return x
1365+
return x * x
1366+
1367+
square_num_remote = rf_session.remote_function(
1368+
[int], int, reuse=False, cloud_function_vpc_connector=gcf_vpc_connector
1369+
)(square_num)
1370+
1371+
scalars_df, scalars_pandas_df = scalars_dfs
1372+
1373+
bf_int64_col = scalars_df["int64_col"]
1374+
bf_result_col = bf_int64_col.apply(square_num_remote)
1375+
bf_result = bf_int64_col.to_frame().assign(result=bf_result_col).to_pandas()
1376+
1377+
pd_int64_col = scalars_pandas_df["int64_col"]
1378+
pd_result_col = pd_int64_col.apply(square_num)
1379+
pd_result = pd_int64_col.to_frame().assign(result=pd_result_col)
1380+
1381+
assert_pandas_df_equal(bf_result, pd_result, check_dtype=False)
1382+
1383+
# Assert that the GCF is created with the intended vpc connector
1384+
gcf = rf_session.cloudfunctionsclient.get_function(
1385+
name=square_num_remote.bigframes_cloud_function
1386+
)
1387+
assert gcf.service_config.vpc_connector == gcf_vpc_connector
1388+
finally:
1389+
# clean up the gcp assets created for the remote function
1390+
cleanup_remote_function_assets(
1391+
rf_session.bqclient, rf_session.cloudfunctionsclient, square_num_remote
1392+
)
1393+
1394+
1395+
def test_remote_function_via_session_vpc_invalid(session):
1396+
with pytest.raises(
1397+
InvalidArgument, match="400.*Serverless VPC Access connector is not found"
1398+
):
1399+
1400+
@session.remote_function(
1401+
[int], int, reuse=False, cloud_function_vpc_connector="does-not-exist"
1402+
)
1403+
def square_num(x):
1404+
if x is None:
1405+
return x
1406+
return x * x
1407+
1408+
13361409
@pytest.mark.parametrize(
13371410
("max_batching_rows"),
13381411
[

0 commit comments

Comments
 (0)