Skip to content

Commit d843100

Browse files
tswastabdelmegahedgoogle
authored andcommitted
feat: add QueryJob.schema property for dry run queries (googleapis#1014)
Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) Issue discovered while investigating what properties are needed in googleapis#967
1 parent 0742cf2 commit d843100

File tree

4 files changed

+89
-24
lines changed

4 files changed

+89
-24
lines changed

google/cloud/bigquery/job/base.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -1005,7 +1005,9 @@ def from_api_repr(cls, resource: dict, client) -> "UnknownJob":
10051005
Returns:
10061006
UnknownJob: Job corresponding to the resource.
10071007
"""
1008-
job_ref_properties = resource.get("jobReference", {"projectId": client.project})
1008+
job_ref_properties = resource.get(
1009+
"jobReference", {"projectId": client.project, "jobId": None}
1010+
)
10091011
job_ref = _JobReference._from_api_repr(job_ref_properties)
10101012
job = cls(job_ref, client)
10111013
# Populate the job reference with the project, even if it has been

google/cloud/bigquery/job/query.py

+20-4
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
import copy
1919
import re
2020
import typing
21-
from typing import Any, Dict, Optional, Union
21+
from typing import Any, Dict, List, Optional, Union
2222

2323
from google.api_core import exceptions
2424
from google.api_core.future import polling as polling_future
@@ -38,6 +38,7 @@
3838
from google.cloud.bigquery.query import UDFResource
3939
from google.cloud.bigquery.retry import DEFAULT_RETRY, DEFAULT_JOB_RETRY
4040
from google.cloud.bigquery.routine import RoutineReference
41+
from google.cloud.bigquery.schema import SchemaField
4142
from google.cloud.bigquery.table import _EmptyRowIterator
4243
from google.cloud.bigquery.table import RangePartitioning
4344
from google.cloud.bigquery.table import _table_arg_to_table_ref
@@ -57,6 +58,7 @@
5758
import pyarrow
5859
from google.api_core import retry as retries
5960
from google.cloud import bigquery_storage
61+
from google.cloud.bigquery.client import Client
6062
from google.cloud.bigquery.table import RowIterator
6163

6264

@@ -853,7 +855,7 @@ def to_api_repr(self):
853855
}
854856

855857
@classmethod
856-
def from_api_repr(cls, resource: dict, client) -> "QueryJob":
858+
def from_api_repr(cls, resource: dict, client: "Client") -> "QueryJob":
857859
"""Factory: construct a job given its API representation
858860
859861
Args:
@@ -866,8 +868,10 @@ def from_api_repr(cls, resource: dict, client) -> "QueryJob":
866868
Returns:
867869
google.cloud.bigquery.job.QueryJob: Job parsed from ``resource``.
868870
"""
869-
cls._check_resource_config(resource)
870-
job_ref = _JobReference._from_api_repr(resource["jobReference"])
871+
job_ref_properties = resource.setdefault(
872+
"jobReference", {"projectId": client.project, "jobId": None}
873+
)
874+
job_ref = _JobReference._from_api_repr(job_ref_properties)
871875
job = cls(job_ref, None, client=client)
872876
job._set_properties(resource)
873877
return job
@@ -887,6 +891,18 @@ def query_plan(self):
887891
plan_entries = self._job_statistics().get("queryPlan", ())
888892
return [QueryPlanEntry.from_api_repr(entry) for entry in plan_entries]
889893

894+
@property
895+
def schema(self) -> Optional[List[SchemaField]]:
896+
"""The schema of the results.
897+
898+
Present only for successful dry run of non-legacy SQL queries.
899+
"""
900+
resource = self._job_statistics().get("schema")
901+
if resource is None:
902+
return None
903+
fields = resource.get("fields", [])
904+
return [SchemaField.from_api_repr(field) for field in fields]
905+
890906
@property
891907
def timeline(self):
892908
"""List(TimelineEntry): Return the query execution timeline

tests/system/test_query.py

+29
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
# Copyright 2021 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
from google.cloud import bigquery
16+
17+
18+
def test_dry_run(bigquery_client: bigquery.Client, scalars_table: str):
19+
query_config = bigquery.QueryJobConfig()
20+
query_config.dry_run = True
21+
22+
query_string = f"SELECT * FROM {scalars_table}"
23+
query_job = bigquery_client.query(query_string, job_config=query_config,)
24+
25+
# Note: `query_job.result()` is not necessary on a dry run query. All
26+
# necessary information is returned in the initial response.
27+
assert query_job.dry_run is True
28+
assert query_job.total_bytes_processed > 0
29+
assert len(query_job.schema) > 0

tests/unit/job/test_query.py

+37-19
Original file line numberDiff line numberDiff line change
@@ -269,25 +269,6 @@ def test_ctor_w_query_parameters(self):
269269
job = self._make_one(self.JOB_ID, self.QUERY, client, job_config=config)
270270
self.assertEqual(job.query_parameters, query_parameters)
271271

272-
def test_from_api_repr_missing_identity(self):
273-
self._setUpConstants()
274-
client = _make_client(project=self.PROJECT)
275-
RESOURCE = {}
276-
klass = self._get_target_class()
277-
with self.assertRaises(KeyError):
278-
klass.from_api_repr(RESOURCE, client=client)
279-
280-
def test_from_api_repr_missing_config(self):
281-
self._setUpConstants()
282-
client = _make_client(project=self.PROJECT)
283-
RESOURCE = {
284-
"id": "%s:%s" % (self.PROJECT, self.DS_ID),
285-
"jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID},
286-
}
287-
klass = self._get_target_class()
288-
with self.assertRaises(KeyError):
289-
klass.from_api_repr(RESOURCE, client=client)
290-
291272
def test_from_api_repr_bare(self):
292273
self._setUpConstants()
293274
client = _make_client(project=self.PROJECT)
@@ -1405,6 +1386,43 @@ def test_result_transport_timeout_error(self):
14051386
with call_api_patch, self.assertRaises(concurrent.futures.TimeoutError):
14061387
job.result(timeout=1)
14071388

1389+
def test_no_schema(self):
1390+
client = _make_client(project=self.PROJECT)
1391+
resource = {}
1392+
klass = self._get_target_class()
1393+
job = klass.from_api_repr(resource, client=client)
1394+
assert job.schema is None
1395+
1396+
def test_schema(self):
1397+
client = _make_client(project=self.PROJECT)
1398+
resource = {
1399+
"statistics": {
1400+
"query": {
1401+
"schema": {
1402+
"fields": [
1403+
{"mode": "NULLABLE", "name": "bool_col", "type": "BOOLEAN"},
1404+
{
1405+
"mode": "NULLABLE",
1406+
"name": "string_col",
1407+
"type": "STRING",
1408+
},
1409+
{
1410+
"mode": "NULLABLE",
1411+
"name": "timestamp_col",
1412+
"type": "TIMESTAMP",
1413+
},
1414+
]
1415+
},
1416+
},
1417+
},
1418+
}
1419+
klass = self._get_target_class()
1420+
job = klass.from_api_repr(resource, client=client)
1421+
assert len(job.schema) == 3
1422+
assert job.schema[0].field_type == "BOOLEAN"
1423+
assert job.schema[1].field_type == "STRING"
1424+
assert job.schema[2].field_type == "TIMESTAMP"
1425+
14081426
def test__begin_error(self):
14091427
from google.cloud import exceptions
14101428

0 commit comments

Comments
 (0)