Skip to content

Commit 2e841cf

Browse files
authored
Fix list_rows() max results with BQ storage client (#9178)
If max results is set, the streaming API should not be used, and a user warning is issued.
1 parent f00b60b commit 2e841cf

File tree

3 files changed

+65
-0
lines changed

3 files changed

+65
-0
lines changed

bigquery/google/cloud/bigquery/table.py

+8
Original file line numberDiff line numberDiff line change
@@ -1630,6 +1630,14 @@ def to_dataframe(self, bqstorage_client=None, dtypes=None, progress_bar_type=Non
16301630
if dtypes is None:
16311631
dtypes = {}
16321632

1633+
if bqstorage_client and self.max_results is not None:
1634+
warnings.warn(
1635+
"Cannot use bqstorage_client if max_results is set, "
1636+
"reverting to fetching data with the tabledata.list endpoint.",
1637+
stacklevel=2,
1638+
)
1639+
bqstorage_client = None
1640+
16331641
progress_bar = self._get_progress_bar(progress_bar_type)
16341642

16351643
frames = []

bigquery/tests/system.py

+21
Original file line numberDiff line numberDiff line change
@@ -2315,6 +2315,27 @@ def test_list_rows_page_size(self):
23152315
page = next(pages)
23162316
self.assertEqual(page.num_items, num_last_page)
23172317

2318+
@unittest.skipIf(pandas is None, "Requires `pandas`")
2319+
@unittest.skipIf(
2320+
bigquery_storage_v1beta1 is None, "Requires `google-cloud-bigquery-storage`"
2321+
)
2322+
def test_list_rows_max_results_w_bqstorage(self):
2323+
table_ref = DatasetReference("bigquery-public-data", "utility_us").table(
2324+
"country_code_iso"
2325+
)
2326+
bqstorage_client = bigquery_storage_v1beta1.BigQueryStorageClient(
2327+
credentials=Config.CLIENT._credentials
2328+
)
2329+
2330+
row_iterator = Config.CLIENT.list_rows(
2331+
table_ref,
2332+
selected_fields=[bigquery.SchemaField("country_name", "STRING")],
2333+
max_results=100,
2334+
)
2335+
dataframe = row_iterator.to_dataframe(bqstorage_client=bqstorage_client)
2336+
2337+
self.assertEqual(len(dataframe.index), 100)
2338+
23182339
def temp_dataset(self, dataset_id, location=None):
23192340
dataset = Dataset(Config.CLIENT.dataset(dataset_id))
23202341
if location:

bigquery/tests/unit/test_table.py

+36
Original file line numberDiff line numberDiff line change
@@ -2208,6 +2208,42 @@ def test_to_dataframe_error_if_pandas_is_none(self):
22082208
with self.assertRaises(ValueError):
22092209
row_iterator.to_dataframe()
22102210

2211+
@unittest.skipIf(pandas is None, "Requires `pandas`")
2212+
def test_to_dataframe_max_results_w_bqstorage_warning(self):
2213+
from google.cloud.bigquery.table import SchemaField
2214+
2215+
schema = [
2216+
SchemaField("name", "STRING", mode="REQUIRED"),
2217+
SchemaField("age", "INTEGER", mode="REQUIRED"),
2218+
]
2219+
rows = [
2220+
{"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]},
2221+
{"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]},
2222+
]
2223+
path = "/foo"
2224+
api_request = mock.Mock(return_value={"rows": rows})
2225+
bqstorage_client = mock.Mock()
2226+
2227+
row_iterator = self._make_one(
2228+
client=_mock_client(),
2229+
api_request=api_request,
2230+
path=path,
2231+
schema=schema,
2232+
max_results=42,
2233+
)
2234+
2235+
with warnings.catch_warnings(record=True) as warned:
2236+
row_iterator.to_dataframe(bqstorage_client=bqstorage_client)
2237+
2238+
matches = [
2239+
warning
2240+
for warning in warned
2241+
if warning.category is UserWarning
2242+
and "cannot use bqstorage_client" in str(warning).lower()
2243+
and "tabledata.list" in str(warning)
2244+
]
2245+
self.assertEqual(len(matches), 1, msg="User warning was not emitted.")
2246+
22112247
@unittest.skipIf(pandas is None, "Requires `pandas`")
22122248
@unittest.skipIf(
22132249
bigquery_storage_v1beta1 is None, "Requires `google-cloud-bigquery-storage`"

0 commit comments

Comments
 (0)