Skip to content

Commit 1a9431d

Browse files
authored
feat: add AvroOptions to configure AVRO external data (#994)
* feat: add `AvroOptions` to configure AVRO external data Also: * Unify `ExternalConfig` class to use `_properties` for everything. This does result in more code, but it should make maintenance easier as it aligns with our other mutable resource classes. * Adds `bigtable_options`, `csv_options`, and `google_sheets_options` properties. This aligns with `parquet_options`. * remove unnecessary check for options in to_api_repr * add missing tests for to_api_repr * remove redundant type identifiers
1 parent d9a03b4 commit 1a9431d

File tree

7 files changed

+518
-48
lines changed

7 files changed

+518
-48
lines changed

docs/format_options.rst

+6
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
BigQuery Format Options
2+
=======================
3+
4+
.. automodule:: google.cloud.bigquery.format_options
5+
:members:
6+
:undoc-members:

docs/reference.rst

+5
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,11 @@ External Configuration
167167
external_config.CSVOptions
168168
external_config.GoogleSheetsOptions
169169

170+
.. toctree::
171+
:maxdepth: 2
172+
173+
format_options
174+
170175

171176
Magics
172177
======

google/cloud/bigquery/__init__.py

+2
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@
5050
from google.cloud.bigquery.external_config import CSVOptions
5151
from google.cloud.bigquery.external_config import GoogleSheetsOptions
5252
from google.cloud.bigquery.external_config import ExternalSourceFormat
53+
from google.cloud.bigquery.format_options import AvroOptions
5354
from google.cloud.bigquery.format_options import ParquetOptions
5455
from google.cloud.bigquery.job import Compression
5556
from google.cloud.bigquery.job import CopyJob
@@ -144,6 +145,7 @@
144145
"PolicyTagList",
145146
"UDFResource",
146147
"ExternalConfig",
148+
"AvroOptions",
147149
"BigtableOptions",
148150
"BigtableColumnFamily",
149151
"BigtableColumn",

google/cloud/bigquery/external_config.py

+123-26
Original file line numberDiff line numberDiff line change
@@ -22,13 +22,13 @@
2222

2323
import base64
2424
import copy
25-
from typing import FrozenSet, Iterable, Optional
25+
from typing import FrozenSet, Iterable, Optional, Union
2626

2727
from google.cloud.bigquery._helpers import _to_bytes
2828
from google.cloud.bigquery._helpers import _bytes_to_json
2929
from google.cloud.bigquery._helpers import _int_or_none
3030
from google.cloud.bigquery._helpers import _str_or_none
31-
from google.cloud.bigquery.format_options import ParquetOptions
31+
from google.cloud.bigquery.format_options import AvroOptions, ParquetOptions
3232
from google.cloud.bigquery.schema import SchemaField
3333

3434

@@ -548,7 +548,13 @@ def from_api_repr(cls, resource: dict) -> "GoogleSheetsOptions":
548548
return config
549549

550550

551-
_OPTION_CLASSES = (BigtableOptions, CSVOptions, GoogleSheetsOptions, ParquetOptions)
551+
_OPTION_CLASSES = (
552+
AvroOptions,
553+
BigtableOptions,
554+
CSVOptions,
555+
GoogleSheetsOptions,
556+
ParquetOptions,
557+
)
552558

553559

554560
class HivePartitioningOptions(object):
@@ -646,11 +652,6 @@ class ExternalConfig(object):
646652

647653
def __init__(self, source_format):
648654
self._properties = {"sourceFormat": source_format}
649-
self._options = None
650-
for optcls in _OPTION_CLASSES:
651-
if source_format == optcls._SOURCE_FORMAT:
652-
self._options = optcls()
653-
break
654655

655656
@property
656657
def source_format(self):
@@ -663,9 +664,17 @@ def source_format(self):
663664
return self._properties["sourceFormat"]
664665

665666
@property
666-
def options(self):
667-
"""Optional[Dict[str, Any]]: Source-specific options."""
668-
return self._options
667+
def options(self) -> Optional[Union[_OPTION_CLASSES]]:
668+
"""Source-specific options."""
669+
for optcls in _OPTION_CLASSES:
670+
if self.source_format == optcls._SOURCE_FORMAT:
671+
options = optcls()
672+
self._properties.setdefault(optcls._RESOURCE_NAME, {})
673+
options._properties = self._properties[optcls._RESOURCE_NAME]
674+
return options
675+
676+
# No matching source format found.
677+
return None
669678

670679
@property
671680
def autodetect(self):
@@ -815,23 +824,120 @@ def schema(self, value):
815824
self._properties["schema"] = prop
816825

817826
@property
818-
def parquet_options(self):
819-
"""Optional[google.cloud.bigquery.format_options.ParquetOptions]: Additional
820-
properties to set if ``sourceFormat`` is set to PARQUET.
827+
def avro_options(self) -> Optional[AvroOptions]:
828+
"""Additional properties to set if ``sourceFormat`` is set to AVRO.
829+
830+
See:
831+
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.avro_options
832+
"""
833+
if self.source_format == ExternalSourceFormat.AVRO:
834+
self._properties.setdefault(AvroOptions._RESOURCE_NAME, {})
835+
resource = self._properties.get(AvroOptions._RESOURCE_NAME)
836+
if resource is None:
837+
return None
838+
options = AvroOptions()
839+
options._properties = resource
840+
return options
841+
842+
@avro_options.setter
843+
def avro_options(self, value):
844+
if self.source_format != ExternalSourceFormat.AVRO:
845+
msg = f"Cannot set Avro options, source format is {self.source_format}"
846+
raise TypeError(msg)
847+
self._properties[AvroOptions._RESOURCE_NAME] = value._properties
848+
849+
@property
850+
def bigtable_options(self) -> Optional[BigtableOptions]:
851+
"""Additional properties to set if ``sourceFormat`` is set to BIGTABLE.
852+
853+
See:
854+
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.bigtable_options
855+
"""
856+
if self.source_format == ExternalSourceFormat.BIGTABLE:
857+
self._properties.setdefault(BigtableOptions._RESOURCE_NAME, {})
858+
resource = self._properties.get(BigtableOptions._RESOURCE_NAME)
859+
if resource is None:
860+
return None
861+
options = BigtableOptions()
862+
options._properties = resource
863+
return options
864+
865+
@bigtable_options.setter
866+
def bigtable_options(self, value):
867+
if self.source_format != ExternalSourceFormat.BIGTABLE:
868+
msg = f"Cannot set Bigtable options, source format is {self.source_format}"
869+
raise TypeError(msg)
870+
self._properties[BigtableOptions._RESOURCE_NAME] = value._properties
871+
872+
@property
873+
def csv_options(self) -> Optional[CSVOptions]:
874+
"""Additional properties to set if ``sourceFormat`` is set to CSV.
875+
876+
See:
877+
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.csv_options
878+
"""
879+
if self.source_format == ExternalSourceFormat.CSV:
880+
self._properties.setdefault(CSVOptions._RESOURCE_NAME, {})
881+
resource = self._properties.get(CSVOptions._RESOURCE_NAME)
882+
if resource is None:
883+
return None
884+
options = CSVOptions()
885+
options._properties = resource
886+
return options
887+
888+
@csv_options.setter
889+
def csv_options(self, value):
890+
if self.source_format != ExternalSourceFormat.CSV:
891+
msg = f"Cannot set CSV options, source format is {self.source_format}"
892+
raise TypeError(msg)
893+
self._properties[CSVOptions._RESOURCE_NAME] = value._properties
894+
895+
@property
896+
def google_sheets_options(self) -> Optional[GoogleSheetsOptions]:
897+
"""Additional properties to set if ``sourceFormat`` is set to
898+
GOOGLE_SHEETS.
899+
900+
See:
901+
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.google_sheets_options
902+
"""
903+
if self.source_format == ExternalSourceFormat.GOOGLE_SHEETS:
904+
self._properties.setdefault(GoogleSheetsOptions._RESOURCE_NAME, {})
905+
resource = self._properties.get(GoogleSheetsOptions._RESOURCE_NAME)
906+
if resource is None:
907+
return None
908+
options = GoogleSheetsOptions()
909+
options._properties = resource
910+
return options
911+
912+
@google_sheets_options.setter
913+
def google_sheets_options(self, value):
914+
if self.source_format != ExternalSourceFormat.GOOGLE_SHEETS:
915+
msg = f"Cannot set Google Sheets options, source format is {self.source_format}"
916+
raise TypeError(msg)
917+
self._properties[GoogleSheetsOptions._RESOURCE_NAME] = value._properties
918+
919+
@property
920+
def parquet_options(self) -> Optional[ParquetOptions]:
921+
"""Additional properties to set if ``sourceFormat`` is set to PARQUET.
821922
822923
See:
823924
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.parquet_options
824925
"""
825-
if self.source_format != ExternalSourceFormat.PARQUET:
926+
if self.source_format == ExternalSourceFormat.PARQUET:
927+
self._properties.setdefault(ParquetOptions._RESOURCE_NAME, {})
928+
resource = self._properties.get(ParquetOptions._RESOURCE_NAME)
929+
if resource is None:
826930
return None
827-
return self._options
931+
options = ParquetOptions()
932+
options._properties = resource
933+
return options
828934

829935
@parquet_options.setter
830936
def parquet_options(self, value):
831937
if self.source_format != ExternalSourceFormat.PARQUET:
832938
msg = f"Cannot set Parquet options, source format is {self.source_format}"
833939
raise TypeError(msg)
834-
self._options = value
940+
self._properties[ParquetOptions._RESOURCE_NAME] = value._properties
835941

836942
def to_api_repr(self) -> dict:
837943
"""Build an API representation of this object.
@@ -841,10 +947,6 @@ def to_api_repr(self) -> dict:
841947
A dictionary in the format used by the BigQuery API.
842948
"""
843949
config = copy.deepcopy(self._properties)
844-
if self.options is not None:
845-
r = self.options.to_api_repr()
846-
if r != {}:
847-
config[self.options._RESOURCE_NAME] = r
848950
return config
849951

850952
@classmethod
@@ -862,10 +964,5 @@ def from_api_repr(cls, resource: dict) -> "ExternalConfig":
862964
ExternalConfig: Configuration parsed from ``resource``.
863965
"""
864966
config = cls(resource["sourceFormat"])
865-
for optcls in _OPTION_CLASSES:
866-
opts = resource.get(optcls._RESOURCE_NAME)
867-
if opts is not None:
868-
config._options = optcls.from_api_repr(opts)
869-
break
870967
config._properties = copy.deepcopy(resource)
871968
return config

google/cloud/bigquery/format_options.py

+53-1
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,59 @@
1313
# limitations under the License.
1414

1515
import copy
16-
from typing import Dict
16+
from typing import Dict, Optional
17+
18+
19+
class AvroOptions:
20+
"""Options if source format is set to AVRO."""
21+
22+
_SOURCE_FORMAT = "AVRO"
23+
_RESOURCE_NAME = "avroOptions"
24+
25+
def __init__(self):
26+
self._properties = {}
27+
28+
@property
29+
def use_avro_logical_types(self) -> Optional[bool]:
30+
"""[Optional] If sourceFormat is set to 'AVRO', indicates whether to
31+
interpret logical types as the corresponding BigQuery data type (for
32+
example, TIMESTAMP), instead of using the raw type (for example,
33+
INTEGER).
34+
35+
See
36+
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#AvroOptions.FIELDS.use_avro_logical_types
37+
"""
38+
return self._properties.get("useAvroLogicalTypes")
39+
40+
@use_avro_logical_types.setter
41+
def use_avro_logical_types(self, value):
42+
self._properties["useAvroLogicalTypes"] = value
43+
44+
@classmethod
45+
def from_api_repr(cls, resource: Dict[str, bool]) -> "AvroOptions":
46+
"""Factory: construct an instance from a resource dict.
47+
48+
Args:
49+
resource (Dict[str, bool]):
50+
Definition of a :class:`~.format_options.AvroOptions` instance in
51+
the same representation as is returned from the API.
52+
53+
Returns:
54+
:class:`~.format_options.AvroOptions`:
55+
Configuration parsed from ``resource``.
56+
"""
57+
config = cls()
58+
config._properties = copy.deepcopy(resource)
59+
return config
60+
61+
def to_api_repr(self) -> dict:
62+
"""Build an API representation of this object.
63+
64+
Returns:
65+
Dict[str, bool]:
66+
A dictionary in the format used by the BigQuery API.
67+
"""
68+
return copy.deepcopy(self._properties)
1769

1870

1971
class ParquetOptions:

0 commit comments

Comments
 (0)