Skip to content

Commit 36fe86f

Browse files
authored
feat: add support for more detailed DML stats (#758)
* feat: add support for more detailed DML stats * Move is None check of DmlStats one level higher
1 parent c45a738 commit 36fe86f

File tree

7 files changed

+199
-0
lines changed

7 files changed

+199
-0
lines changed

docs/reference.rst

+1
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ Job-Related Types
5858
job.Compression
5959
job.CreateDisposition
6060
job.DestinationFormat
61+
job.DmlStats
6162
job.Encoding
6263
job.OperationType
6364
job.QueryPlanEntry

google/cloud/bigquery/__init__.py

+2
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@
5656
from google.cloud.bigquery.job import CopyJobConfig
5757
from google.cloud.bigquery.job import CreateDisposition
5858
from google.cloud.bigquery.job import DestinationFormat
59+
from google.cloud.bigquery.job import DmlStats
5960
from google.cloud.bigquery.job import Encoding
6061
from google.cloud.bigquery.job import ExtractJob
6162
from google.cloud.bigquery.job import ExtractJobConfig
@@ -142,6 +143,7 @@
142143
"BigtableOptions",
143144
"BigtableColumnFamily",
144145
"BigtableColumn",
146+
"DmlStats",
145147
"CSVOptions",
146148
"GoogleSheetsOptions",
147149
"ParquetOptions",

google/cloud/bigquery/job/__init__.py

+2
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
from google.cloud.bigquery.job.load import LoadJob
3232
from google.cloud.bigquery.job.load import LoadJobConfig
3333
from google.cloud.bigquery.job.query import _contains_order_by
34+
from google.cloud.bigquery.job.query import DmlStats
3435
from google.cloud.bigquery.job.query import QueryJob
3536
from google.cloud.bigquery.job.query import QueryJobConfig
3637
from google.cloud.bigquery.job.query import QueryPlanEntry
@@ -66,6 +67,7 @@
6667
"LoadJob",
6768
"LoadJobConfig",
6869
"_contains_order_by",
70+
"DmlStats",
6971
"QueryJob",
7072
"QueryJobConfig",
7173
"QueryPlanEntry",

google/cloud/bigquery/job/query.py

+37
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,35 @@ def _to_api_repr_table_defs(value):
114114
return {k: ExternalConfig.to_api_repr(v) for k, v in value.items()}
115115

116116

117+
class DmlStats(typing.NamedTuple):
118+
"""Detailed statistics for DML statements.
119+
120+
https://cloud.google.com/bigquery/docs/reference/rest/v2/DmlStats
121+
"""
122+
123+
inserted_row_count: int = 0
124+
"""Number of inserted rows. Populated by DML INSERT and MERGE statements."""
125+
126+
deleted_row_count: int = 0
127+
"""Number of deleted rows. populated by DML DELETE, MERGE and TRUNCATE statements.
128+
"""
129+
130+
updated_row_count: int = 0
131+
"""Number of updated rows. Populated by DML UPDATE and MERGE statements."""
132+
133+
@classmethod
134+
def from_api_repr(cls, stats: Dict[str, str]) -> "DmlStats":
135+
# NOTE: The field order here must match the order of fields set at the
136+
# class level.
137+
api_fields = ("insertedRowCount", "deletedRowCount", "updatedRowCount")
138+
139+
args = (
140+
int(stats.get(api_field, default_val))
141+
for api_field, default_val in zip(api_fields, cls.__new__.__defaults__)
142+
)
143+
return cls(*args)
144+
145+
117146
class ScriptOptions:
118147
"""Options controlling the execution of scripts.
119148
@@ -1079,6 +1108,14 @@ def estimated_bytes_processed(self):
10791108
result = int(result)
10801109
return result
10811110

1111+
@property
1112+
def dml_stats(self) -> Optional[DmlStats]:
1113+
stats = self._job_statistics().get("dmlStats")
1114+
if stats is None:
1115+
return None
1116+
else:
1117+
return DmlStats.from_api_repr(stats)
1118+
10821119
def _blocking_poll(self, timeout=None, **kwargs):
10831120
self._done_timeout = timeout
10841121
self._transport_timeout = timeout

tests/system/test_client.py

+56
Original file line numberDiff line numberDiff line change
@@ -1521,6 +1521,62 @@ def test_query_statistics(self):
15211521
self.assertGreater(stages_with_inputs, 0)
15221522
self.assertGreater(len(plan), stages_with_inputs)
15231523

1524+
def test_dml_statistics(self):
1525+
table_schema = (
1526+
bigquery.SchemaField("foo", "STRING"),
1527+
bigquery.SchemaField("bar", "INTEGER"),
1528+
)
1529+
1530+
dataset_id = _make_dataset_id("bq_system_test")
1531+
self.temp_dataset(dataset_id)
1532+
table_id = "{}.{}.test_dml_statistics".format(Config.CLIENT.project, dataset_id)
1533+
1534+
# Create the table before loading so that the column order is deterministic.
1535+
table = helpers.retry_403(Config.CLIENT.create_table)(
1536+
Table(table_id, schema=table_schema)
1537+
)
1538+
self.to_delete.insert(0, table)
1539+
1540+
# Insert a few rows and check the stats.
1541+
sql = f"""
1542+
INSERT INTO `{table_id}`
1543+
VALUES ("one", 1), ("two", 2), ("three", 3), ("four", 4);
1544+
"""
1545+
query_job = Config.CLIENT.query(sql)
1546+
query_job.result()
1547+
1548+
assert query_job.dml_stats is not None
1549+
assert query_job.dml_stats.inserted_row_count == 4
1550+
assert query_job.dml_stats.updated_row_count == 0
1551+
assert query_job.dml_stats.deleted_row_count == 0
1552+
1553+
# Update some of the rows.
1554+
sql = f"""
1555+
UPDATE `{table_id}`
1556+
SET bar = bar + 1
1557+
WHERE bar > 2;
1558+
"""
1559+
query_job = Config.CLIENT.query(sql)
1560+
query_job.result()
1561+
1562+
assert query_job.dml_stats is not None
1563+
assert query_job.dml_stats.inserted_row_count == 0
1564+
assert query_job.dml_stats.updated_row_count == 2
1565+
assert query_job.dml_stats.deleted_row_count == 0
1566+
1567+
# Now delete a few rows and check the stats.
1568+
sql = f"""
1569+
DELETE FROM `{table_id}`
1570+
WHERE foo != "two";
1571+
"""
1572+
query_job = Config.CLIENT.query(sql)
1573+
query_job.result()
1574+
1575+
assert query_job.dml_stats is not None
1576+
assert query_job.dml_stats.inserted_row_count == 0
1577+
assert query_job.dml_stats.updated_row_count == 0
1578+
assert query_job.dml_stats.deleted_row_count == 3
1579+
15241580
def test_dbapi_w_standard_sql_types(self):
15251581
for sql, expected in helpers.STANDARD_SQL_EXAMPLES:
15261582
Config.CURSOR.execute(sql)

tests/unit/job/test_query.py

+64
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,24 @@ def _verify_table_definitions(self, job, config):
110110
self.assertIsNotNone(expected_ec)
111111
self.assertEqual(found_ec.to_api_repr(), expected_ec)
112112

113+
def _verify_dml_stats_resource_properties(self, job, resource):
114+
query_stats = resource.get("statistics", {}).get("query", {})
115+
116+
if "dmlStats" in query_stats:
117+
resource_dml_stats = query_stats["dmlStats"]
118+
job_dml_stats = job.dml_stats
119+
assert str(job_dml_stats.inserted_row_count) == resource_dml_stats.get(
120+
"insertedRowCount", "0"
121+
)
122+
assert str(job_dml_stats.updated_row_count) == resource_dml_stats.get(
123+
"updatedRowCount", "0"
124+
)
125+
assert str(job_dml_stats.deleted_row_count) == resource_dml_stats.get(
126+
"deletedRowCount", "0"
127+
)
128+
else:
129+
assert job.dml_stats is None
130+
113131
def _verify_configuration_properties(self, job, configuration):
114132
if "dryRun" in configuration:
115133
self.assertEqual(job.dry_run, configuration["dryRun"])
@@ -118,6 +136,7 @@ def _verify_configuration_properties(self, job, configuration):
118136

119137
def _verifyResourceProperties(self, job, resource):
120138
self._verifyReadonlyResourceProperties(job, resource)
139+
self._verify_dml_stats_resource_properties(job, resource)
121140

122141
configuration = resource.get("configuration", {})
123142
self._verify_configuration_properties(job, configuration)
@@ -130,16 +149,19 @@ def _verifyResourceProperties(self, job, resource):
130149
self._verify_table_definitions(job, query_config)
131150

132151
self.assertEqual(job.query, query_config["query"])
152+
133153
if "createDisposition" in query_config:
134154
self.assertEqual(job.create_disposition, query_config["createDisposition"])
135155
else:
136156
self.assertIsNone(job.create_disposition)
157+
137158
if "defaultDataset" in query_config:
138159
ds_ref = job.default_dataset
139160
ds_ref = {"projectId": ds_ref.project, "datasetId": ds_ref.dataset_id}
140161
self.assertEqual(ds_ref, query_config["defaultDataset"])
141162
else:
142163
self.assertIsNone(job.default_dataset)
164+
143165
if "destinationTable" in query_config:
144166
table = job.destination
145167
tb_ref = {
@@ -150,14 +172,17 @@ def _verifyResourceProperties(self, job, resource):
150172
self.assertEqual(tb_ref, query_config["destinationTable"])
151173
else:
152174
self.assertIsNone(job.destination)
175+
153176
if "priority" in query_config:
154177
self.assertEqual(job.priority, query_config["priority"])
155178
else:
156179
self.assertIsNone(job.priority)
180+
157181
if "writeDisposition" in query_config:
158182
self.assertEqual(job.write_disposition, query_config["writeDisposition"])
159183
else:
160184
self.assertIsNone(job.write_disposition)
185+
161186
if "destinationEncryptionConfiguration" in query_config:
162187
self.assertIsNotNone(job.destination_encryption_configuration)
163188
self.assertEqual(
@@ -166,6 +191,7 @@ def _verifyResourceProperties(self, job, resource):
166191
)
167192
else:
168193
self.assertIsNone(job.destination_encryption_configuration)
194+
169195
if "schemaUpdateOptions" in query_config:
170196
self.assertEqual(
171197
job.schema_update_options, query_config["schemaUpdateOptions"]
@@ -190,6 +216,7 @@ def test_ctor_defaults(self):
190216
self.assertIsNone(job.create_disposition)
191217
self.assertIsNone(job.default_dataset)
192218
self.assertIsNone(job.destination)
219+
self.assertIsNone(job.dml_stats)
193220
self.assertIsNone(job.flatten_results)
194221
self.assertIsNone(job.priority)
195222
self.assertIsNone(job.use_query_cache)
@@ -278,6 +305,26 @@ def test_from_api_repr_with_encryption(self):
278305
self.assertIs(job._client, client)
279306
self._verifyResourceProperties(job, RESOURCE)
280307

308+
def test_from_api_repr_with_dml_stats(self):
309+
self._setUpConstants()
310+
client = _make_client(project=self.PROJECT)
311+
RESOURCE = {
312+
"id": self.JOB_ID,
313+
"jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID},
314+
"configuration": {"query": {"query": self.QUERY}},
315+
"statistics": {
316+
"query": {
317+
"dmlStats": {"insertedRowCount": "15", "updatedRowCount": "2"},
318+
},
319+
},
320+
}
321+
klass = self._get_target_class()
322+
323+
job = klass.from_api_repr(RESOURCE, client=client)
324+
325+
self.assertIs(job._client, client)
326+
self._verifyResourceProperties(job, RESOURCE)
327+
281328
def test_from_api_repr_w_properties(self):
282329
from google.cloud.bigquery.job import CreateDisposition
283330
from google.cloud.bigquery.job import SchemaUpdateOption
@@ -815,6 +862,23 @@ def test_estimated_bytes_processed(self):
815862
query_stats["estimatedBytesProcessed"] = str(est_bytes)
816863
self.assertEqual(job.estimated_bytes_processed, est_bytes)
817864

865+
def test_dml_stats(self):
866+
from google.cloud.bigquery.job.query import DmlStats
867+
868+
client = _make_client(project=self.PROJECT)
869+
job = self._make_one(self.JOB_ID, self.QUERY, client)
870+
assert job.dml_stats is None
871+
872+
statistics = job._properties["statistics"] = {}
873+
assert job.dml_stats is None
874+
875+
query_stats = statistics["query"] = {}
876+
assert job.dml_stats is None
877+
878+
query_stats["dmlStats"] = {"insertedRowCount": "35"}
879+
assert isinstance(job.dml_stats, DmlStats)
880+
assert job.dml_stats.inserted_row_count == 35
881+
818882
def test_result(self):
819883
from google.cloud.bigquery.table import RowIterator
820884

tests/unit/job/test_query_stats.py

+37
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,43 @@
1515
from .helpers import _Base
1616

1717

18+
class TestDmlStats:
19+
@staticmethod
20+
def _get_target_class():
21+
from google.cloud.bigquery.job import DmlStats
22+
23+
return DmlStats
24+
25+
def _make_one(self, *args, **kw):
26+
return self._get_target_class()(*args, **kw)
27+
28+
def test_ctor_defaults(self):
29+
dml_stats = self._make_one()
30+
assert dml_stats.inserted_row_count == 0
31+
assert dml_stats.deleted_row_count == 0
32+
assert dml_stats.updated_row_count == 0
33+
34+
def test_from_api_repr_partial_stats(self):
35+
klass = self._get_target_class()
36+
result = klass.from_api_repr({"deletedRowCount": "12"})
37+
38+
assert isinstance(result, klass)
39+
assert result.inserted_row_count == 0
40+
assert result.deleted_row_count == 12
41+
assert result.updated_row_count == 0
42+
43+
def test_from_api_repr_full_stats(self):
44+
klass = self._get_target_class()
45+
result = klass.from_api_repr(
46+
{"updatedRowCount": "4", "insertedRowCount": "7", "deletedRowCount": "25"}
47+
)
48+
49+
assert isinstance(result, klass)
50+
assert result.inserted_row_count == 7
51+
assert result.deleted_row_count == 25
52+
assert result.updated_row_count == 4
53+
54+
1855
class TestQueryPlanEntryStep(_Base):
1956
KIND = "KIND"
2057
SUBSTEPS = ("SUB1", "SUB2")

0 commit comments

Comments
 (0)