Skip to content

feat: add bigframes.options.compute.maximum_bytes_billed option that sets maximum bytes billed on query jobs #133

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 15 commits into from
Oct 31, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion bigframes/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

"""BigQuery DataFrames provides a DataFrame API scaled by the BigQuery engine."""

from bigframes._config import options
from bigframes._config import option_context, options
from bigframes._config.bigquery_options import BigQueryOptions
from bigframes.core.global_session import close_session, get_global_session
from bigframes.session import connect, Session
Expand All @@ -28,4 +28,5 @@
"connect",
"Session",
"__version__",
"option_context",
]
11 changes: 11 additions & 0 deletions bigframes/_config/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,10 @@
"""

import bigframes._config.bigquery_options as bigquery_options
import bigframes._config.compute_options as compute_options
import bigframes._config.display_options as display_options
import bigframes._config.sampling_options as sampling_options
import third_party.bigframes_vendored.pandas._config.config as pandas_config


class Options:
Expand All @@ -29,6 +31,7 @@ def __init__(self):
self._bigquery_options = bigquery_options.BigQueryOptions()
self._display_options = display_options.DisplayOptions()
self._sampling_options = sampling_options.SamplingOptions()
self._compute_options = compute_options.ComputeOptions()

@property
def bigquery(self) -> bigquery_options.BigQueryOptions:
Expand All @@ -49,6 +52,11 @@ def sampling(self) -> sampling_options.SamplingOptions:
parameters in specific functions."""
return self._sampling_options

@property
def compute(self) -> compute_options.ComputeOptions:
"""Options controlling object computation."""
return self._compute_options


options = Options()
"""Global options for default session."""
Expand All @@ -58,3 +66,6 @@ def sampling(self) -> sampling_options.SamplingOptions:
"Options",
"options",
)


option_context = pandas_config.option_context
35 changes: 35 additions & 0 deletions bigframes/_config/compute_options.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Options for displaying objects."""

import dataclasses
from typing import Optional


@dataclasses.dataclass
class ComputeOptions:
"""
Encapsulates configuration for compute options.

Attributes:
maximum_bytes_billed (int, Options):
Limits the bytes billed for query jobs. Queries that will have
bytes billed beyond this limit will fail (without incurring a
charge). If unspecified, this will be set to your project default.
See `maximum_bytes_billed <https://cloud.google.com/python/docs/reference/bigquery/latest/google.cloud.bigquery.job.QueryJobConfig#google_cloud_bigquery_job_QueryJobConfig_maximum_bytes_billed>`_.

"""

maximum_bytes_billed: Optional[int] = None
23 changes: 9 additions & 14 deletions bigframes/_config/display_options.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,17 +40,12 @@ def pandas_repr(display_options: DisplayOptions):
This context manager makes sure we reset the pandas options when we're done
so that we don't override pandas behavior.
"""
original_max_cols = pd.options.display.max_columns
original_max_rows = pd.options.display.max_rows
original_show_dimensions = pd.options.display.show_dimensions

pd.options.display.max_columns = display_options.max_columns
pd.options.display.max_rows = display_options.max_rows
pd.options.display.show_dimensions = True # type: ignore

try:
yield
finally:
pd.options.display.max_columns = original_max_cols
pd.options.display.max_rows = original_max_rows
pd.options.display.show_dimensions = original_show_dimensions
with pd.option_context(
"display.max_columns",
display_options.max_columns,
"display.max_rows",
display_options.max_rows,
"display.show_dimensions",
True,
) as pandas_context:
yield (pandas_context)
4 changes: 4 additions & 0 deletions bigframes/pandas/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -462,6 +462,9 @@ def read_gbq_function(function_name: str):
options = config.options
"""Global :class:`~bigframes._config.Options` to configure BigQuery DataFrames."""

option_context = config.option_context
"""Global :class:`~bigframes._config.option_context` to configure BigQuery DataFrames."""

# Session management APIs
get_global_session = global_session.get_global_session
close_session = global_session.close_session
Expand Down Expand Up @@ -494,6 +497,7 @@ def read_gbq_function(function_name: str):
# Other public pandas attributes
"NamedAgg",
"options",
"option_context",
# Session management APIs
"get_global_session",
"close_session",
Expand Down
19 changes: 14 additions & 5 deletions bigframes/session/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1504,12 +1504,10 @@ def _start_query(
max_results: Optional[int] = None,
) -> Tuple[bigquery.table.RowIterator, bigquery.QueryJob]:
"""
Starts query job and waits for results
Starts query job and waits for results.
"""
if job_config is not None:
query_job = self.bqclient.query(sql, job_config=job_config)
else:
query_job = self.bqclient.query(sql)
job_config = self._prepare_job_config(job_config)
query_job = self.bqclient.query(sql, job_config=job_config)

opts = bigframes.options.display
if opts.progress_bar is not None and not query_job.configuration.dry_run:
Expand Down Expand Up @@ -1538,6 +1536,17 @@ def _start_generic_job(self, job: formatting_helpers.GenericJob):
else:
job.result()

def _prepare_job_config(
self, job_config: Optional[bigquery.QueryJobConfig] = None
) -> bigquery.QueryJobConfig:
if job_config is None:
job_config = self.bqclient.default_query_job_config
if bigframes.options.compute.maximum_bytes_billed is not None:
job_config.maximum_bytes_billed = (
bigframes.options.compute.maximum_bytes_billed
)
return job_config


def connect(context: Optional[bigquery_options.BigQueryOptions] = None) -> Session:
return Session(context)
Expand Down
2 changes: 2 additions & 0 deletions docs/reference/bigframes/options.rst
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,5 @@ Options and settings
.. autoclass:: bigframes._config.display_options.DisplayOptions

.. autoclass:: bigframes._config.sampling_options.SamplingOptions

.. autoclass:: bigframes._config.compute_options.ComputeOptions
2 changes: 2 additions & 0 deletions docs/templates/toc.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
uid: bigframes._config.display_options.DisplayOptions
- name: SamplingOptions
uid: bigframes._config.sampling_options.SamplingOptions
- name: ComputeOptions
uid: bigframes._config.compute_options.ComputeOptions
name: Options and settings
- items:
- name: Session
Expand Down
7 changes: 0 additions & 7 deletions tests/system/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -898,13 +898,6 @@ def usa_names_grouped_table(
return session.bqclient.get_table(table_id)


@pytest.fixture()
def deferred_repr():
bigframes.options.display.repr_mode = "deferred"
yield
bigframes.options.display.repr_mode = "head"


@pytest.fixture()
def restore_sampling_settings():
enable_downsampling = bigframes.options.sampling.enable_downsampling
Expand Down
17 changes: 8 additions & 9 deletions tests/system/small/test_progress_bar.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,12 +135,11 @@ def test_query_job_repr(penguins_df_default_index: bf.dataframe.DataFrame):
assert string in query_job_repr


def test_query_job_dry_run(
penguins_df_default_index: bf.dataframe.DataFrame, capsys, deferred_repr
):
repr(penguins_df_default_index)
repr(penguins_df_default_index["body_mass_g"])
lines = capsys.readouterr().out.split("\n")
lines = filter(None, lines)
for line in lines:
assert "Computation deferred. Computation will process" in line
def test_query_job_dry_run(penguins_df_default_index: bf.dataframe.DataFrame, capsys):
with bf.option_context("display.repr_mode", "deferred"):
repr(penguins_df_default_index)
repr(penguins_df_default_index["body_mass_g"])
lines = capsys.readouterr().out.split("\n")
lines = filter(None, lines)
for line in lines:
assert "Computation deferred. Computation will process" in line
30 changes: 30 additions & 0 deletions tests/unit/test_compute_options.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import bigframes as bf

from . import resources


def test_maximum_bytes_option():
session = resources.create_bigquery_session()
num_query_calls = 0
with bf.option_context("compute.maximum_bytes_billed", 10000):
# clear initial method calls
session.bqclient.method_calls = []
session._start_query("query")
for call in session.bqclient.method_calls:
_, _, kwargs = call
num_query_calls += 1
assert kwargs["job_config"].maximum_bytes_billed == 10000
assert num_query_calls > 0
45 changes: 45 additions & 0 deletions third_party/bigframes_vendored/pandas/_config/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
# Contains code from https://github.com/pandas-dev/pandas/blob/main/pandas/_config/config.py
import contextlib
import operator

import bigframes


class option_context(contextlib.ContextDecorator):
"""
Context manager to temporarily set options in the `with` statement context.

You need to invoke as ``option_context(pat, val, [(pat, val), ...])``.

Examples
--------
>>> import bigframes
>>> with bigframes.option_context('display.max_rows', 10, 'display.max_columns', 5):
... pass
"""

def __init__(self, *args) -> None:
if len(args) % 2 != 0 or len(args) < 2:
raise ValueError(
"Need to invoke as option_context(pat, val, [(pat, val), ...])."
)

self.ops = list(zip(args[::2], args[1::2]))

def __enter__(self) -> None:
self.undo = [
(pat, operator.attrgetter(pat)(bigframes.options)) for pat, val in self.ops
]

for pat, val in self.ops:
self._set_option(pat, val)

def __exit__(self, *args) -> None:
if self.undo:
for pat, val in self.undo:
self._set_option(pat, val)

def _set_option(self, pat, val):
root, attr = pat.rsplit(".", 1)
parent = operator.attrgetter(root)(bigframes.options)
setattr(parent, attr, val)