Skip to content

Commit f83c00a

Browse files
authored
fix: disambiguate missing policy tags from explicitly unset policy tags (#983)
Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) Fixes #981 Fixes #982 Towards googleapis/python-bigquery-pandas#387 🦕
1 parent ee1e25c commit f83c00a

File tree

8 files changed

+139
-219
lines changed

8 files changed

+139
-219
lines changed

docs/reference.rst

+1
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,7 @@ Schema
128128
:toctree: generated
129129

130130
schema.SchemaField
131+
schema.PolicyTagList
131132

132133

133134
Query

google/cloud/bigquery/__init__.py

+2
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@
8888
from google.cloud.bigquery.routine import RoutineReference
8989
from google.cloud.bigquery.routine import RoutineType
9090
from google.cloud.bigquery.schema import SchemaField
91+
from google.cloud.bigquery.schema import PolicyTagList
9192
from google.cloud.bigquery.table import PartitionRange
9293
from google.cloud.bigquery.table import RangePartitioning
9394
from google.cloud.bigquery.table import Row
@@ -140,6 +141,7 @@
140141
"RoutineReference",
141142
# Shared helpers
142143
"SchemaField",
144+
"PolicyTagList",
143145
"UDFResource",
144146
"ExternalConfig",
145147
"BigtableOptions",

google/cloud/bigquery/schema.py

+48-52
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,12 @@
1515
"""Schemas for BigQuery tables / queries."""
1616

1717
import collections
18-
from typing import Optional
18+
import enum
19+
from typing import Iterable, Union
1920

2021
from google.cloud.bigquery_v2 import types
2122

2223

23-
_DEFAULT_VALUE = object()
2424
_STRUCT_TYPES = ("RECORD", "STRUCT")
2525

2626
# SQL types reference:
@@ -49,47 +49,62 @@
4949
"""String names of the legacy SQL types to integer codes of Standard SQL types."""
5050

5151

52+
class _DefaultSentinel(enum.Enum):
53+
"""Object used as 'sentinel' indicating default value should be used.
54+
55+
Uses enum so that pytype/mypy knows that this is the only possible value.
56+
https://stackoverflow.com/a/60605919/101923
57+
58+
Literal[_DEFAULT_VALUE] is an alternative, but only added in Python 3.8.
59+
https://docs.python.org/3/library/typing.html#typing.Literal
60+
"""
61+
62+
DEFAULT_VALUE = object()
63+
64+
65+
_DEFAULT_VALUE = _DefaultSentinel.DEFAULT_VALUE
66+
67+
5268
class SchemaField(object):
5369
"""Describe a single field within a table schema.
5470
5571
Args:
56-
name (str): The name of the field.
72+
name: The name of the field.
5773
58-
field_type (str): The type of the field. See
74+
field_type:
75+
The type of the field. See
5976
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#TableFieldSchema.FIELDS.type
6077
61-
mode (Optional[str]): The mode of the field. See
78+
mode:
79+
Defaults to ``'NULLABLE'``. The mode of the field. See
6280
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#TableFieldSchema.FIELDS.mode
6381
64-
description (Optional[str]): Description for the field.
82+
description: Description for the field.
6583
66-
fields (Optional[Tuple[google.cloud.bigquery.schema.SchemaField]]):
67-
Subfields (requires ``field_type`` of 'RECORD').
84+
fields: Subfields (requires ``field_type`` of 'RECORD').
6885
69-
policy_tags (Optional[PolicyTagList]): The policy tag list for the field.
86+
policy_tags: The policy tag list for the field.
7087
71-
precision (Optional[int]):
88+
precision:
7289
Precison (number of digits) of fields with NUMERIC or BIGNUMERIC type.
7390
74-
scale (Optional[int]):
91+
scale:
7592
Scale (digits after decimal) of fields with NUMERIC or BIGNUMERIC type.
7693
77-
max_length (Optional[int]):
78-
Maximim length of fields with STRING or BYTES type.
79-
94+
max_length: Maximum length of fields with STRING or BYTES type.
8095
"""
8196

8297
def __init__(
8398
self,
84-
name,
85-
field_type,
86-
mode="NULLABLE",
87-
description=_DEFAULT_VALUE,
88-
fields=(),
89-
policy_tags=None,
90-
precision=_DEFAULT_VALUE,
91-
scale=_DEFAULT_VALUE,
92-
max_length=_DEFAULT_VALUE,
99+
name: str,
100+
field_type: str,
101+
mode: str = "NULLABLE",
102+
description: Union[str, _DefaultSentinel] = _DEFAULT_VALUE,
103+
fields: Iterable["SchemaField"] = (),
104+
policy_tags: Union["PolicyTagList", None, _DefaultSentinel] = _DEFAULT_VALUE,
105+
precision: Union[int, _DefaultSentinel] = _DEFAULT_VALUE,
106+
scale: Union[int, _DefaultSentinel] = _DEFAULT_VALUE,
107+
max_length: Union[int, _DefaultSentinel] = _DEFAULT_VALUE,
93108
):
94109
self._properties = {
95110
"name": name,
@@ -105,28 +120,12 @@ def __init__(
105120
self._properties["scale"] = scale
106121
if max_length is not _DEFAULT_VALUE:
107122
self._properties["maxLength"] = max_length
123+
if policy_tags is not _DEFAULT_VALUE:
124+
self._properties["policyTags"] = (
125+
policy_tags.to_api_repr() if policy_tags is not None else None
126+
)
108127
self._fields = tuple(fields)
109128

110-
self._policy_tags = self._determine_policy_tags(field_type, policy_tags)
111-
112-
@staticmethod
113-
def _determine_policy_tags(
114-
field_type: str, given_policy_tags: Optional["PolicyTagList"]
115-
) -> Optional["PolicyTagList"]:
116-
"""Return the given policy tags, or their suitable representation if `None`.
117-
118-
Args:
119-
field_type: The type of the schema field.
120-
given_policy_tags: The policy tags to maybe ajdust.
121-
"""
122-
if given_policy_tags is not None:
123-
return given_policy_tags
124-
125-
if field_type is not None and field_type.upper() in _STRUCT_TYPES:
126-
return None
127-
128-
return PolicyTagList()
129-
130129
@staticmethod
131130
def __get_int(api_repr, name):
132131
v = api_repr.get(name, _DEFAULT_VALUE)
@@ -152,10 +151,10 @@ def from_api_repr(cls, api_repr: dict) -> "SchemaField":
152151
mode = api_repr.get("mode", "NULLABLE")
153152
description = api_repr.get("description", _DEFAULT_VALUE)
154153
fields = api_repr.get("fields", ())
154+
policy_tags = api_repr.get("policyTags", _DEFAULT_VALUE)
155155

156-
policy_tags = cls._determine_policy_tags(
157-
field_type, PolicyTagList.from_api_repr(api_repr.get("policyTags"))
158-
)
156+
if policy_tags is not None and policy_tags is not _DEFAULT_VALUE:
157+
policy_tags = PolicyTagList.from_api_repr(policy_tags)
159158

160159
return cls(
161160
field_type=field_type,
@@ -230,7 +229,8 @@ def policy_tags(self):
230229
"""Optional[google.cloud.bigquery.schema.PolicyTagList]: Policy tag list
231230
definition for this field.
232231
"""
233-
return self._policy_tags
232+
resource = self._properties.get("policyTags")
233+
return PolicyTagList.from_api_repr(resource) if resource is not None else None
234234

235235
def to_api_repr(self) -> dict:
236236
"""Return a dictionary representing this schema field.
@@ -244,10 +244,6 @@ def to_api_repr(self) -> dict:
244244
# add this to the serialized representation.
245245
if self.field_type.upper() in _STRUCT_TYPES:
246246
answer["fields"] = [f.to_api_repr() for f in self.fields]
247-
else:
248-
# Explicitly include policy tag definition (we must not do it for RECORD
249-
# fields, because those are not leaf fields).
250-
answer["policyTags"] = self.policy_tags.to_api_repr()
251247

252248
# Done; return the serialized dictionary.
253249
return answer
@@ -272,7 +268,7 @@ def _key(self):
272268
field_type = f"{field_type}({self.precision})"
273269

274270
policy_tags = (
275-
() if self._policy_tags is None else tuple(sorted(self._policy_tags.names))
271+
() if self.policy_tags is None else tuple(sorted(self.policy_tags.names))
276272
)
277273

278274
return (

tests/system/test_client.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -673,14 +673,15 @@ def test_unset_table_schema_attributes(self):
673673
mode=old_field.mode,
674674
description=None,
675675
fields=old_field.fields,
676-
policy_tags=None,
676+
policy_tags=PolicyTagList(),
677677
)
678678

679679
table.schema = new_schema
680680
updated_table = Config.CLIENT.update_table(table, ["schema"])
681681

682682
self.assertFalse(updated_table.schema[1].description) # Empty string or None.
683-
self.assertEqual(updated_table.schema[1].policy_tags.names, ())
683+
# policyTags key expected to be missing from response.
684+
self.assertIsNone(updated_table.schema[1].policy_tags)
684685

685686
def test_update_table_clustering_configuration(self):
686687
dataset = self.temp_dataset(_make_dataset_id("update_table"))

tests/unit/job/test_load_config.py

-4
Original file line numberDiff line numberDiff line change
@@ -484,13 +484,11 @@ def test_schema_setter_fields(self):
484484
"name": "full_name",
485485
"type": "STRING",
486486
"mode": "REQUIRED",
487-
"policyTags": {"names": []},
488487
}
489488
age_repr = {
490489
"name": "age",
491490
"type": "INTEGER",
492491
"mode": "REQUIRED",
493-
"policyTags": {"names": []},
494492
}
495493
self.assertEqual(
496494
config._properties["load"]["schema"], {"fields": [full_name_repr, age_repr]}
@@ -503,13 +501,11 @@ def test_schema_setter_valid_mappings_list(self):
503501
"name": "full_name",
504502
"type": "STRING",
505503
"mode": "REQUIRED",
506-
"policyTags": {"names": []},
507504
}
508505
age_repr = {
509506
"name": "age",
510507
"type": "INTEGER",
511508
"mode": "REQUIRED",
512-
"policyTags": {"names": []},
513509
}
514510
schema = [full_name_repr, age_repr]
515511
config.schema = schema

0 commit comments

Comments
 (0)