Skip to content

Commit 2fd69f4

Browse files
authored
feat: support CMEK for remote_function cloud functions (#430)
* feat: support CMEK for `remote_function` cloud functions * add retry in test * bump up min version of google-python-functions for CMEK compliance
1 parent e502e6b commit 2fd69f4

File tree

6 files changed

+129
-4
lines changed

6 files changed

+129
-4
lines changed

bigframes/functions/remote_function.py

+39-1
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,8 @@ def __init__(
130130
bq_connection_id,
131131
cloud_resource_manager_client,
132132
cloud_function_service_account,
133+
cloud_function_kms_key_name,
134+
cloud_function_docker_repository,
133135
):
134136
self._gcp_project_id = gcp_project_id
135137
self._cloud_function_region = cloud_function_region
@@ -142,6 +144,8 @@ def __init__(
142144
bq_connection_client, cloud_resource_manager_client
143145
)
144146
self._cloud_function_service_account = cloud_function_service_account
147+
self._cloud_function_kms_key_name = cloud_function_kms_key_name
148+
self._cloud_function_docker_repository = cloud_function_docker_repository
145149

146150
def create_bq_remote_function(
147151
self, input_args, input_types, output_type, endpoint, bq_function_name
@@ -344,7 +348,9 @@ def create_cloud_function(self, def_, cf_name, package_requirements=None):
344348
)
345349

346350
# Determine an upload URL for user code
347-
upload_url_request = functions_v2.GenerateUploadUrlRequest()
351+
upload_url_request = functions_v2.GenerateUploadUrlRequest(
352+
kms_key_name=self._cloud_function_kms_key_name
353+
)
348354
upload_url_request.parent = self.get_cloud_function_fully_qualified_parent()
349355
upload_url_response = self._cloud_functions_client.generate_upload_url(
350356
request=upload_url_request
@@ -383,12 +389,16 @@ def create_cloud_function(self, def_, cf_name, package_requirements=None):
383389
function.build_config.source.storage_source.object_ = (
384390
upload_url_response.storage_source.object_
385391
)
392+
function.build_config.docker_repository = (
393+
self._cloud_function_docker_repository
394+
)
386395
function.service_config = functions_v2.ServiceConfig()
387396
function.service_config.available_memory = "1024M"
388397
function.service_config.timeout_seconds = 600
389398
function.service_config.service_account_email = (
390399
self._cloud_function_service_account
391400
)
401+
function.kms_key_name = self._cloud_function_kms_key_name
392402
create_function_request.function = function
393403

394404
# Create the cloud function and wait for it to be ready to use
@@ -597,6 +607,8 @@ def remote_function(
597607
name: Optional[str] = None,
598608
packages: Optional[Sequence[str]] = None,
599609
cloud_function_service_account: Optional[str] = None,
610+
cloud_function_kms_key_name: Optional[str] = None,
611+
cloud_function_docker_repository: Optional[str] = None,
600612
):
601613
"""Decorator to turn a user defined function into a BigQuery remote function.
602614
@@ -699,6 +711,20 @@ def remote_function(
699711
for more details. Please make sure the service account has the
700712
necessary IAM permissions configured as described in
701713
https://cloud.google.com/functions/docs/reference/iam/roles#additional-configuration.
714+
cloud_function_kms_key_name (str, Optional):
715+
Customer managed encryption key to protect cloud functions and
716+
related data at rest. This is of the format
717+
projects/PROJECT_ID/locations/LOCATION/keyRings/KEYRING/cryptoKeys/KEY.
718+
Read https://cloud.google.com/functions/docs/securing/cmek for
719+
more details including granting necessary service accounts
720+
access to the key.
721+
cloud_function_docker_repository (str, Optional):
722+
Docker repository created with the same encryption key as
723+
`cloud_function_kms_key_name` to store encrypted artifacts
724+
created to support the cloud function. This is of the format
725+
projects/PROJECT_ID/locations/LOCATION/repositories/REPOSITORY_NAME.
726+
For more details see
727+
https://cloud.google.com/functions/docs/securing/cmek#before_you_begin.
702728
"""
703729
import bigframes.pandas as bpd
704730

@@ -780,6 +806,16 @@ def remote_function(
780806
f"{bq_location}."
781807
)
782808

809+
# If any CMEK is intended then check that a docker repository is also specified
810+
if (
811+
cloud_function_kms_key_name is not None
812+
and cloud_function_docker_repository is None
813+
):
814+
raise ValueError(
815+
"cloud_function_docker_repository must be specified with cloud_function_kms_key_name."
816+
" For more details see https://cloud.google.com/functions/docs/securing/cmek#before_you_begin"
817+
)
818+
783819
def wrapper(f):
784820
if not callable(f):
785821
raise TypeError("f must be callable, got {}".format(f))
@@ -800,6 +836,8 @@ def wrapper(f):
800836
bq_connection_id,
801837
resource_manager_client,
802838
cloud_function_service_account,
839+
cloud_function_kms_key_name,
840+
cloud_function_docker_repository,
803841
)
804842

805843
rf_name, cf_name = remote_function_client.provision_bq_remote_function(

bigframes/pandas/__init__.py

+4
Original file line numberDiff line numberDiff line change
@@ -620,6 +620,8 @@ def remote_function(
620620
name: Optional[str] = None,
621621
packages: Optional[Sequence[str]] = None,
622622
cloud_function_service_account: Optional[str] = None,
623+
cloud_function_kms_key_name: Optional[str] = None,
624+
cloud_function_docker_repository: Optional[str] = None,
623625
):
624626
return global_session.with_default_session(
625627
bigframes.session.Session.remote_function,
@@ -631,6 +633,8 @@ def remote_function(
631633
name=name,
632634
packages=packages,
633635
cloud_function_service_account=cloud_function_service_account,
636+
cloud_function_kms_key_name=cloud_function_kms_key_name,
637+
cloud_function_docker_repository=cloud_function_docker_repository,
634638
)
635639

636640

bigframes/session/__init__.py

+18
Original file line numberDiff line numberDiff line change
@@ -1364,6 +1364,8 @@ def remote_function(
13641364
name: Optional[str] = None,
13651365
packages: Optional[Sequence[str]] = None,
13661366
cloud_function_service_account: Optional[str] = None,
1367+
cloud_function_kms_key_name: Optional[str] = None,
1368+
cloud_function_docker_repository: Optional[str] = None,
13671369
):
13681370
"""Decorator to turn a user defined function into a BigQuery remote function. Check out
13691371
the code samples at: https://cloud.google.com/bigquery/docs/remote-functions#bigquery-dataframes.
@@ -1444,6 +1446,20 @@ def remote_function(
14441446
for more details. Please make sure the service account has the
14451447
necessary IAM permissions configured as described in
14461448
https://cloud.google.com/functions/docs/reference/iam/roles#additional-configuration.
1449+
cloud_function_kms_key_name (str, Optional):
1450+
Customer managed encryption key to protect cloud functions and
1451+
related data at rest. This is of the format
1452+
projects/PROJECT_ID/locations/LOCATION/keyRings/KEYRING/cryptoKeys/KEY.
1453+
Read https://cloud.google.com/functions/docs/securing/cmek for
1454+
more details including granting necessary service accounts
1455+
access to the key.
1456+
cloud_function_docker_repository (str, Optional):
1457+
Docker repository created with the same encryption key as
1458+
`cloud_function_kms_key_name` to store encrypted artifacts
1459+
created to support the cloud function. This is of the format
1460+
projects/PROJECT_ID/locations/LOCATION/repositories/REPOSITORY_NAME.
1461+
For more details see
1462+
https://cloud.google.com/functions/docs/securing/cmek#before_you_begin.
14471463
Returns:
14481464
callable: A remote function object pointing to the cloud assets created
14491465
in the background to support the remote execution. The cloud assets can be
@@ -1463,6 +1479,8 @@ def remote_function(
14631479
name=name,
14641480
packages=packages,
14651481
cloud_function_service_account=cloud_function_service_account,
1482+
cloud_function_kms_key_name=cloud_function_kms_key_name,
1483+
cloud_function_docker_repository=cloud_function_docker_repository,
14661484
)
14671485

14681486
def read_gbq_function(

setup.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@
3939
"geopandas >=0.12.2",
4040
"google-auth >=2.15.0,<3.0dev",
4141
"google-cloud-bigquery[bqstorage,pandas] >=3.10.0",
42-
"google-cloud-functions >=1.10.1",
42+
"google-cloud-functions >=1.12.0",
4343
"google-cloud-bigquery-connection >=1.12.0",
4444
"google-cloud-iam >=2.12.1",
4545
"google-cloud-resource-manager >=1.10.3",

testing/constraints-3.9.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ gcsfs==2023.3.0
55
geopandas==0.12.2
66
google-auth==2.15.0
77
google-cloud-bigquery==3.10.0
8-
google-cloud-functions==1.10.1
8+
google-cloud-functions==1.12.0
99
google-cloud-bigquery-connection==1.12.0
1010
google-cloud-iam==2.12.1
1111
google-cloud-resource-manager==1.10.3

tests/system/large/test_remote_function.py

+66-1
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
import textwrap
2323

2424
from google.api_core.exceptions import BadRequest, NotFound, ResourceExhausted
25-
from google.cloud import bigquery, functions_v2
25+
from google.cloud import bigquery, functions_v2, storage
2626
import pandas
2727
import pytest
2828
import test_utils.prefixer
@@ -1322,3 +1322,68 @@ def square_num(x):
13221322
cleanup_remote_function_assets(
13231323
rf_session.bqclient, rf_session.cloudfunctionsclient, square_num
13241324
)
1325+
1326+
1327+
@pytest.mark.flaky(retries=2, delay=120)
1328+
def test_remote_function_with_gcf_cmek():
1329+
# TODO(shobs): Automate the following set-up during testing in the test project.
1330+
#
1331+
# For upfront convenience, the following set up has been statically created
1332+
# in the project bigfrmames-dev-perf via cloud console:
1333+
#
1334+
# 1. Created an encryption key and granting the necessary service accounts
1335+
# the required IAM permissions as per https://cloud.google.com/kms/docs/create-key
1336+
# 2. Created a docker repository with CMEK (created in step 1) enabled as per
1337+
# https://cloud.google.com/artifact-registry/docs/repositories/create-repos#overview
1338+
#
1339+
project = "bigframes-dev-perf"
1340+
cmek = "projects/bigframes-dev-perf/locations/us-central1/keyRings/bigframesKeyRing/cryptoKeys/bigframesKey"
1341+
docker_repository = (
1342+
"projects/bigframes-dev-perf/locations/us-central1/repositories/rf-artifacts"
1343+
)
1344+
1345+
session = bigframes.Session(context=bigframes.BigQueryOptions(project=project))
1346+
try:
1347+
1348+
@session.remote_function(
1349+
[int],
1350+
int,
1351+
reuse=False,
1352+
cloud_function_kms_key_name=cmek,
1353+
cloud_function_docker_repository=docker_repository,
1354+
)
1355+
def square_num(x):
1356+
if x is None:
1357+
return x
1358+
return x * x
1359+
1360+
df = pandas.DataFrame({"num": [-1, 0, None, 1]}, dtype="Int64")
1361+
bf = session.read_pandas(df)
1362+
1363+
bf_result_col = bf["num"].apply(square_num)
1364+
bf_result = bf.assign(result=bf_result_col).to_pandas()
1365+
1366+
pd_result_col = df["num"].apply(lambda x: x if x is None else x * x)
1367+
pd_result = df.assign(result=pd_result_col)
1368+
1369+
assert_pandas_df_equal(
1370+
bf_result, pd_result, check_dtype=False, check_index_type=False
1371+
)
1372+
1373+
# Assert that the GCF is created with the intended SA
1374+
gcf = session.cloudfunctionsclient.get_function(
1375+
name=square_num.bigframes_cloud_function
1376+
)
1377+
assert gcf.kms_key_name == cmek
1378+
1379+
# Assert that GCS artifact has CMEK applied
1380+
storage_client = storage.Client()
1381+
bucket = storage_client.bucket(gcf.build_config.source.storage_source.bucket)
1382+
blob = bucket.get_blob(gcf.build_config.source.storage_source.object_)
1383+
assert blob.kms_key_name.startswith(cmek)
1384+
1385+
finally:
1386+
# clean up the gcp assets created for the remote function
1387+
cleanup_remote_function_assets(
1388+
session.bqclient, session.cloudfunctionsclient, square_num
1389+
)

0 commit comments

Comments
 (0)