Skip to content

feat: expose gcf max timeout in remote_function #639

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Apr 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 25 additions & 3 deletions bigframes/functions/remote_function.py
Original file line number Diff line number Diff line change
Expand Up @@ -341,7 +341,9 @@ def generate_cloud_function_code(self, def_, dir, package_requirements=None):
entry_point = self.generate_cloud_function_main_code(def_, dir)
return entry_point

def create_cloud_function(self, def_, cf_name, package_requirements=None):
def create_cloud_function(
self, def_, cf_name, package_requirements=None, cloud_function_timeout=600
):
"""Create a cloud function from the given user defined function."""

# Build and deploy folder structure containing cloud function
Expand Down Expand Up @@ -409,7 +411,14 @@ def create_cloud_function(self, def_, cf_name, package_requirements=None):
)
function.service_config = functions_v2.ServiceConfig()
function.service_config.available_memory = "1024M"
function.service_config.timeout_seconds = 600
if cloud_function_timeout is not None:
if cloud_function_timeout > 1200:
raise ValueError(
"BigQuery remote function can wait only up to 20 minutes"
", see for more details "
"https://cloud.google.com/bigquery/quotas#remote_function_limits."
)
function.service_config.timeout_seconds = cloud_function_timeout
function.service_config.service_account_email = (
self._cloud_function_service_account
)
Expand Down Expand Up @@ -456,6 +465,7 @@ def provision_bq_remote_function(
name,
package_requirements,
max_batching_rows,
cloud_function_timeout,
):
"""Provision a BigQuery remote function."""
# If reuse of any existing function with the same name (indicated by the
Expand All @@ -477,7 +487,7 @@ def provision_bq_remote_function(
# Create the cloud function if it does not exist
if not cf_endpoint:
cf_endpoint = self.create_cloud_function(
def_, cloud_function_name, package_requirements
def_, cloud_function_name, package_requirements, cloud_function_timeout
)
else:
logger.info(f"Cloud function {cloud_function_name} already exists.")
Expand Down Expand Up @@ -631,6 +641,7 @@ def remote_function(
cloud_function_kms_key_name: Optional[str] = None,
cloud_function_docker_repository: Optional[str] = None,
max_batching_rows: Optional[int] = 1000,
cloud_function_timeout: Optional[int] = 600,
):
"""Decorator to turn a user defined function into a BigQuery remote function.

Expand Down Expand Up @@ -756,6 +767,16 @@ def remote_function(
`None` can be passed to let BQ remote functions service apply
default batching. See for more details
https://cloud.google.com/bigquery/docs/remote-functions#limiting_number_of_rows_in_a_batch_request.
cloud_function_timeout (int, Optional):
The maximum amount of time (in seconds) BigQuery should wait for
the cloud function to return a response. See for more details
https://cloud.google.com/functions/docs/configuring/timeout.
Please note that even though the cloud function (2nd gen) itself
allows seeting up to 60 minutes of timeout, BigQuery remote
function can wait only up to 20 minutes, see for more details
https://cloud.google.com/bigquery/quotas#remote_function_limits.
By default BigQuery DataFrames uses a 10 minute timeout. `None`
can be passed to let the cloud functions default timeout take effect.
"""
import bigframes.pandas as bpd

Expand Down Expand Up @@ -880,6 +901,7 @@ def wrapper(f):
name,
packages,
max_batching_rows,
cloud_function_timeout,
)

# TODO: Move ibis logic to compiler step
Expand Down
2 changes: 2 additions & 0 deletions bigframes/pandas/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -644,6 +644,7 @@ def remote_function(
cloud_function_kms_key_name: Optional[str] = None,
cloud_function_docker_repository: Optional[str] = None,
max_batching_rows: Optional[int] = 1000,
cloud_function_timeout: Optional[int] = 600,
):
return global_session.with_default_session(
bigframes.session.Session.remote_function,
Expand All @@ -658,6 +659,7 @@ def remote_function(
cloud_function_kms_key_name=cloud_function_kms_key_name,
cloud_function_docker_repository=cloud_function_docker_repository,
max_batching_rows=max_batching_rows,
cloud_function_timeout=cloud_function_timeout,
)


Expand Down
12 changes: 12 additions & 0 deletions bigframes/session/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1545,6 +1545,7 @@ def remote_function(
cloud_function_kms_key_name: Optional[str] = None,
cloud_function_docker_repository: Optional[str] = None,
max_batching_rows: Optional[int] = 1000,
cloud_function_timeout: Optional[int] = 600,
):
"""Decorator to turn a user defined function into a BigQuery remote function. Check out
the code samples at: https://cloud.google.com/bigquery/docs/remote-functions#bigquery-dataframes.
Expand Down Expand Up @@ -1648,6 +1649,16 @@ def remote_function(
`None` can be passed to let BQ remote functions service apply
default batching. See for more details
https://cloud.google.com/bigquery/docs/remote-functions#limiting_number_of_rows_in_a_batch_request.
cloud_function_timeout (int, Optional):
The maximum amount of time (in seconds) BigQuery should wait for
the cloud function to return a response. See for more details
https://cloud.google.com/functions/docs/configuring/timeout.
Please note that even though the cloud function (2nd gen) itself
allows seeting up to 60 minutes of timeout, BigQuery remote
function can wait only up to 20 minutes, see for more details
https://cloud.google.com/bigquery/quotas#remote_function_limits.
By default BigQuery DataFrames uses a 10 minute timeout. `None`
can be passed to let the cloud functions default timeout take effect.
Returns:
callable: A remote function object pointing to the cloud assets created
in the background to support the remote execution. The cloud assets can be
Expand All @@ -1670,6 +1681,7 @@ def remote_function(
cloud_function_kms_key_name=cloud_function_kms_key_name,
cloud_function_docker_repository=cloud_function_docker_repository,
max_batching_rows=max_batching_rows,
cloud_function_timeout=cloud_function_timeout,
)

def read_gbq_function(
Expand Down
49 changes: 49 additions & 0 deletions tests/system/large/test_remote_function.py
Original file line number Diff line number Diff line change
Expand Up @@ -1336,3 +1336,52 @@ def square(x):
cleanup_remote_function_assets(
session.bqclient, session.cloudfunctionsclient, square_remote
)


@pytest.mark.parametrize(
("timeout_args", "effective_gcf_timeout"),
[
pytest.param({}, 600, id="no-set"),
pytest.param({"cloud_function_timeout": None}, 60, id="set-None"),
pytest.param({"cloud_function_timeout": 1200}, 1200, id="set-max-allowed"),
],
)
@pytest.mark.flaky(retries=2, delay=120)
def test_remote_function_gcf_timeout(
session, scalars_dfs, timeout_args, effective_gcf_timeout
):
try:

def square(x):
return x * x

square_remote = session.remote_function(
[int], int, reuse=False, **timeout_args
)(square)

# Assert that the GCF is created with the intended maximum timeout
gcf = session.cloudfunctionsclient.get_function(
name=square_remote.bigframes_cloud_function
)
assert gcf.service_config.timeout_seconds == effective_gcf_timeout

scalars_df, scalars_pandas_df = scalars_dfs

bf_result = scalars_df["int64_too"].apply(square_remote).to_pandas()
pd_result = scalars_pandas_df["int64_too"].apply(square)

pandas.testing.assert_series_equal(bf_result, pd_result, check_dtype=False)
finally:
# clean up the gcp assets created for the remote function
cleanup_remote_function_assets(
session.bqclient, session.cloudfunctionsclient, square_remote
)


@pytest.mark.flaky(retries=2, delay=120)
def test_remote_function_gcf_timeout_max_supported_exceeded(session):
with pytest.raises(ValueError):

@session.remote_function([int], int, reuse=False, cloud_function_timeout=1201)
def square(x):
return x * x