-
Notifications
You must be signed in to change notification settings - Fork 315
feat: reconfigure tqdm progress bar in %%bigquery magic #1355
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
ac10e76
9db15fc
6ed2f03
c3ec846
f50bf8b
0c41c9e
d679134
87212a5
be991b7
c6317a4
13331b9
d86ae58
a6c3e88
adcef3b
f50600e
7162f0e
cef0491
46c3d95
11966b8
dfd965f
e785c84
2932a52
54b90e7
3ea255f
b94c863
4114221
09a6608
ef809a0
9f174bb
eabc228
de9ee35
bd35f96
f471b8c
99e108b
3f53ad5
4e901d4
30a3688
122c5a5
bf4b1c2
84a5073
fd6b101
9bedca6
a7e361c
0dfc48d
ef3fc98
846e66a
05423ff
f06e44d
beb9675
f84f75c
b0c40c5
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -15,13 +15,15 @@ | |
"""Shared helper functions for tqdm progress bar.""" | ||
|
||
import concurrent.futures | ||
import sys | ||
import time | ||
import typing | ||
from typing import Optional | ||
import warnings | ||
|
||
try: | ||
import tqdm # type: ignore | ||
import tqdm.notebook as notebook # type: ignore | ||
|
||
except ImportError: # pragma: NO COVER | ||
tqdm = None | ||
|
@@ -47,9 +49,22 @@ def get_progress_bar(progress_bar_type, description, total, unit): | |
|
||
try: | ||
if progress_bar_type == "tqdm": | ||
return tqdm.tqdm(desc=description, total=total, unit=unit) | ||
return tqdm.tqdm( | ||
bar_format="{l_bar}{bar}|", | ||
colour="green", | ||
desc=description, | ||
file=sys.stdout, | ||
total=total, | ||
unit=unit, | ||
) | ||
elif progress_bar_type == "tqdm_notebook": | ||
return tqdm.notebook.tqdm(desc=description, total=total, unit=unit) | ||
return notebook.tqdm( | ||
bar_format="{l_bar}{bar}|", | ||
desc=description, | ||
file=sys.stdout, | ||
total=total, | ||
unit=unit, | ||
) | ||
elif progress_bar_type == "tqdm_gui": | ||
return tqdm.tqdm_gui(desc=description, total=total, unit=unit) | ||
except (KeyError, TypeError): | ||
|
@@ -80,7 +95,7 @@ def wait_for_query( | |
""" | ||
default_total = 1 | ||
current_stage = None | ||
start_time = time.time() | ||
start_time = time.perf_counter() | ||
|
||
progress_bar = get_progress_bar( | ||
progress_bar_type, "Query is running", default_total, "query" | ||
|
@@ -95,19 +110,15 @@ def wait_for_query( | |
current_stage = query_job.query_plan[i] | ||
progress_bar.total = len(query_job.query_plan) | ||
progress_bar.set_description( | ||
"Query executing stage {} and status {} : {:0.2f}s".format( | ||
current_stage.name, | ||
current_stage.status, | ||
time.time() - start_time, | ||
), | ||
f"Query executing stage {current_stage.name} and status {current_stage.status} : {time.perf_counter() - start_time:.2f}s" | ||
) | ||
try: | ||
query_result = query_job.result( | ||
timeout=_PROGRESS_BAR_UPDATE_INTERVAL, max_results=max_results | ||
) | ||
progress_bar.update(default_total) | ||
progress_bar.set_description( | ||
"Query complete after {:0.2f}s".format(time.time() - start_time), | ||
f"Job ID {query_job.job_id} successfully executed", | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Might want to include the location in the output too, formatted like the bq cli as location:I'd https://cloud.google.com/python/docs/reference/bigquery/latest/google.cloud.bigquery.job.QueryJob#google_cloud_bigquery_job_QueryJob_location I believe the location is required to fetch the job metadata for the non-default location. |
||
) | ||
break | ||
except concurrent.futures.TimeoutError: | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -125,7 +125,7 @@ def __init__(self): | |
self._default_query_job_config = bigquery.QueryJobConfig() | ||
self._bigquery_client_options = client_options.ClientOptions() | ||
self._bqstorage_client_options = client_options.ClientOptions() | ||
self._progress_bar_type = "tqdm" | ||
self._progress_bar_type = "tqdm_notebook" | ||
|
||
@property | ||
def credentials(self): | ||
|
@@ -269,7 +269,7 @@ def progress_bar_type(self): | |
Manually setting the progress_bar_type: | ||
|
||
>>> from google.cloud.bigquery import magics | ||
>>> magics.context.progress_bar_type = "tqdm" | ||
>>> magics.context.progress_bar_type = "tqdm_notebook" | ||
""" | ||
return self._progress_bar_type | ||
|
||
|
@@ -286,7 +286,7 @@ def _handle_error(error, destination_var=None): | |
|
||
Args: | ||
error (Exception): | ||
An exception that ocurred during the query exectution. | ||
An exception that ocurred during the query execution. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Change the word
|
||
destination_var (Optional[str]): | ||
The name of the IPython session variable to store the query job. | ||
""" | ||
|
@@ -329,22 +329,25 @@ def _run_query(client, query, job_config=None): | |
Query complete after 2.07s | ||
'bf633912-af2c-4780-b568-5d868058632b' | ||
""" | ||
start_time = time.time() | ||
start_time = time.perf_counter() | ||
query_job = client.query(query, job_config=job_config) | ||
|
||
if job_config and job_config.dry_run: | ||
return query_job | ||
|
||
print("Executing query with job ID: {}".format(query_job.job_id)) | ||
print(f"Executing query with job ID: {query_job.job_id}") | ||
|
||
while True: | ||
print("\rQuery executing: {:0.2f}s".format(time.time() - start_time), end="") | ||
print( | ||
f"\rQuery executing: {time.perf_counter() - start_time:.2f}s".format(), | ||
end="", | ||
) | ||
try: | ||
query_job.result(timeout=0.5) | ||
break | ||
except futures.TimeoutError: | ||
continue | ||
print("\nQuery complete after {:0.2f}s".format(time.time() - start_time)) | ||
print(f"\nJob ID {query_job.job_id} successfully executed") | ||
return query_job | ||
|
||
|
||
|
@@ -365,7 +368,7 @@ def _create_dataset_if_necessary(client, dataset_id): | |
pass | ||
dataset = bigquery.Dataset(dataset_reference) | ||
dataset.location = client.location | ||
print("Creating dataset: {}".format(dataset_id)) | ||
print(f"Creating dataset: {dataset_id}") | ||
dataset = client.create_dataset(dataset) | ||
|
||
|
||
|
@@ -500,7 +503,7 @@ def _create_dataset_if_necessary(client, dataset_id): | |
default=None, | ||
help=( | ||
"Sets progress bar type to display a progress bar while executing the query." | ||
"Defaults to use tqdm. Install the ``tqdm`` package to use this feature." | ||
"Defaults to use tqdm_notebook. Install the ``tqdm`` package to use this feature." | ||
), | ||
) | ||
def _cell_magic(line, query): | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -71,8 +71,7 @@ def test_bigquery_magic(ipython_interactive): | |
# Removes blanks & terminal code (result of display clearing) | ||
updates = list(filter(lambda x: bool(x) and x != "\x1b[2K", lines)) | ||
assert re.match("Executing query with job ID: .*", updates[0]) | ||
assert all(re.match("Query executing: .*s", line) for line in updates[1:-1]) | ||
assert re.match("Query complete after .*s", updates[-1]) | ||
assert (re.match("Query executing: .*s", line) for line in updates[1:-1]) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Using
|
||
assert isinstance(result, pandas.DataFrame) | ||
assert len(result) == 10 # verify row count | ||
assert list(result) == ["url", "view_count"] # verify column names | ||
|
Uh oh!
There was an error while loading. Please reload this page.