diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index 7672b49b6..10cf433a8 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -1,4 +1,4 @@ -# Copyright 2024 Google LLC +# Copyright 2025 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -13,5 +13,5 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:5cddfe2fb5019bbf78335bc55f15bc13e18354a56b3ff46e1834f8e540807f05 -# created: 2024-10-31T01:41:07.349286254Z + digest: sha256:8ff1efe878e18bd82a0fb7b70bb86f77e7ab6901fed394440b6135db0ba8d84a +# created: 2025-01-09T12:01:16.422459506Z diff --git a/.github/release-trigger.yml b/.github/release-trigger.yml index 4bb79e58e..b975c190d 100644 --- a/.github/release-trigger.yml +++ b/.github/release-trigger.yml @@ -1,2 +1,2 @@ enabled: true -multiScmName: +multiScmName: python-bigquery diff --git a/.kokoro/docker/docs/requirements.txt b/.kokoro/docker/docs/requirements.txt index 66eacc82f..f99a5c4aa 100644 --- a/.kokoro/docker/docs/requirements.txt +++ b/.kokoro/docker/docs/requirements.txt @@ -1,16 +1,16 @@ # -# This file is autogenerated by pip-compile with Python 3.9 +# This file is autogenerated by pip-compile with Python 3.10 # by the following command: # -# pip-compile --allow-unsafe --generate-hashes requirements.in +# pip-compile --allow-unsafe --generate-hashes synthtool/gcp/templates/python_library/.kokoro/docker/docs/requirements.in # -argcomplete==3.5.1 \ - --hash=sha256:1a1d148bdaa3e3b93454900163403df41448a248af01b6e849edc5ac08e6c363 \ - --hash=sha256:eb1ee355aa2557bd3d0145de7b06b2a45b0ce461e1e7813f5d066039ab4177b4 +argcomplete==3.5.2 \ + --hash=sha256:036d020d79048a5d525bc63880d7a4b8d1668566b8a76daf1144c0bbe0f63472 \ + --hash=sha256:23146ed7ac4403b70bd6026402468942ceba34a6732255b9edf5b7354f68a6bb # via nox -colorlog==6.8.2 \ - --hash=sha256:3e3e079a41feb5a1b64f978b5ea4f46040a94f11f0e8bbb8261e3dbbeca64d44 \ - --hash=sha256:4dcbb62368e2800cb3c5abd348da7e53f6c362dda502ec27c560b2e58a66bd33 +colorlog==6.9.0 \ + --hash=sha256:5906e71acd67cb07a71e779c47c4bcb45fb8c2993eebe9e5adcd6a6f1b283eff \ + --hash=sha256:bfba54a1b93b94f54e1f4fe48395725a3d92fd2a4af702f6bd70946bdc0c6ac2 # via nox distlib==0.3.9 \ --hash=sha256:47f8c22fd27c27e25a65601af709b38e4f0a45ea4fc2e710f65755fa8caaaf87 \ @@ -23,20 +23,50 @@ filelock==3.16.1 \ nox==2024.10.9 \ --hash=sha256:1d36f309a0a2a853e9bccb76bbef6bb118ba92fa92674d15604ca99adeb29eab \ --hash=sha256:7aa9dc8d1c27e9f45ab046ffd1c3b2c4f7c91755304769df231308849ebded95 - # via -r requirements.in -packaging==24.1 \ - --hash=sha256:026ed72c8ed3fcce5bf8950572258698927fd1dbda10a5e981cdf0ac37f4f002 \ - --hash=sha256:5b8f2217dbdbd2f7f384c41c628544e6d52f2d0f53c6d0c3ea61aa5d1d7ff124 + # via -r synthtool/gcp/templates/python_library/.kokoro/docker/docs/requirements.in +packaging==24.2 \ + --hash=sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759 \ + --hash=sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f # via nox platformdirs==4.3.6 \ --hash=sha256:357fb2acbc885b0419afd3ce3ed34564c13c9b95c89360cd9563f73aa5e2b907 \ --hash=sha256:73e575e1408ab8103900836b97580d5307456908a03e92031bab39e4554cc3fb # via virtualenv -tomli==2.0.2 \ - --hash=sha256:2ebe24485c53d303f690b0ec092806a085f07af5a5aa1464f3931eec36caaa38 \ - --hash=sha256:d46d457a85337051c36524bc5349dd91b1877838e2979ac5ced3e710ed8a60ed +tomli==2.2.1 \ + --hash=sha256:023aa114dd824ade0100497eb2318602af309e5a55595f76b626d6d9f3b7b0a6 \ + --hash=sha256:02abe224de6ae62c19f090f68da4e27b10af2b93213d36cf44e6e1c5abd19fdd \ + --hash=sha256:286f0ca2ffeeb5b9bd4fcc8d6c330534323ec51b2f52da063b11c502da16f30c \ + --hash=sha256:2d0f2fdd22b02c6d81637a3c95f8cd77f995846af7414c5c4b8d0545afa1bc4b \ + --hash=sha256:33580bccab0338d00994d7f16f4c4ec25b776af3ffaac1ed74e0b3fc95e885a8 \ + --hash=sha256:400e720fe168c0f8521520190686ef8ef033fb19fc493da09779e592861b78c6 \ + --hash=sha256:40741994320b232529c802f8bc86da4e1aa9f413db394617b9a256ae0f9a7f77 \ + --hash=sha256:465af0e0875402f1d226519c9904f37254b3045fc5084697cefb9bdde1ff99ff \ + --hash=sha256:4a8f6e44de52d5e6c657c9fe83b562f5f4256d8ebbfe4ff922c495620a7f6cea \ + --hash=sha256:4e340144ad7ae1533cb897d406382b4b6fede8890a03738ff1683af800d54192 \ + --hash=sha256:678e4fa69e4575eb77d103de3df8a895e1591b48e740211bd1067378c69e8249 \ + --hash=sha256:6972ca9c9cc9f0acaa56a8ca1ff51e7af152a9f87fb64623e31d5c83700080ee \ + --hash=sha256:7fc04e92e1d624a4a63c76474610238576942d6b8950a2d7f908a340494e67e4 \ + --hash=sha256:889f80ef92701b9dbb224e49ec87c645ce5df3fa2cc548664eb8a25e03127a98 \ + --hash=sha256:8d57ca8095a641b8237d5b079147646153d22552f1c637fd3ba7f4b0b29167a8 \ + --hash=sha256:8dd28b3e155b80f4d54beb40a441d366adcfe740969820caf156c019fb5c7ec4 \ + --hash=sha256:9316dc65bed1684c9a98ee68759ceaed29d229e985297003e494aa825ebb0281 \ + --hash=sha256:a198f10c4d1b1375d7687bc25294306e551bf1abfa4eace6650070a5c1ae2744 \ + --hash=sha256:a38aa0308e754b0e3c67e344754dff64999ff9b513e691d0e786265c93583c69 \ + --hash=sha256:a92ef1a44547e894e2a17d24e7557a5e85a9e1d0048b0b5e7541f76c5032cb13 \ + --hash=sha256:ac065718db92ca818f8d6141b5f66369833d4a80a9d74435a268c52bdfa73140 \ + --hash=sha256:b82ebccc8c8a36f2094e969560a1b836758481f3dc360ce9a3277c65f374285e \ + --hash=sha256:c954d2250168d28797dd4e3ac5cf812a406cd5a92674ee4c8f123c889786aa8e \ + --hash=sha256:cb55c73c5f4408779d0cf3eef9f762b9c9f147a77de7b258bef0a5628adc85cc \ + --hash=sha256:cd45e1dc79c835ce60f7404ec8119f2eb06d38b1deba146f07ced3bbc44505ff \ + --hash=sha256:d3f5614314d758649ab2ab3a62d4f2004c825922f9e370b29416484086b264ec \ + --hash=sha256:d920f33822747519673ee656a4b6ac33e382eca9d331c87770faa3eef562aeb2 \ + --hash=sha256:db2b95f9de79181805df90bedc5a5ab4c165e6ec3fe99f970d0e302f384ad222 \ + --hash=sha256:e59e304978767a54663af13c07b3d1af22ddee3bb2fb0618ca1593e4f593a106 \ + --hash=sha256:e85e99945e688e32d5a35c1ff38ed0b3f41f43fad8df0bdf79f72b2ba7bc5272 \ + --hash=sha256:ece47d672db52ac607a3d9599a9d48dcb2f2f735c6c2d1f34130085bb12b112a \ + --hash=sha256:f4039b9cbc3048b2416cc57ab3bda989a6fcf9b36cf8937f01a6e731b64f80d7 # via nox -virtualenv==20.26.6 \ - --hash=sha256:280aede09a2a5c317e409a00102e7077c6432c5a38f0ef938e643805a7ad2c48 \ - --hash=sha256:7345cc5b25405607a624d8418154577459c3e0277f5466dd79c49d5e492995f2 +virtualenv==20.28.0 \ + --hash=sha256:23eae1b4516ecd610481eda647f3a7c09aea295055337331bb4e6892ecce47b0 \ + --hash=sha256:2c9c3262bb8e7b87ea801d715fae4495e6032450c71d2309be9550e7364049aa # via nox diff --git a/.kokoro/requirements.txt b/.kokoro/requirements.txt index 006d8ef93..16db448c1 100644 --- a/.kokoro/requirements.txt +++ b/.kokoro/requirements.txt @@ -254,9 +254,9 @@ jeepney==0.8.0 \ # via # keyring # secretstorage -jinja2==3.1.4 \ - --hash=sha256:4a3aee7acbbe7303aede8e9648d13b8bf88a429282aa6122a993f0ac800cb369 \ - --hash=sha256:bc5dd2abb727a5319567b7a813e6a2e7318c39f4f487cfe6c89c6f9c7d25197d +jinja2==3.1.5 \ + --hash=sha256:8fefff8dc3034e27bb80d67c671eb8a9bc424c0ef4c0826edbff304cceff43bb \ + --hash=sha256:aba0f4dc9ed8013c424088f68a5c226f7d6097ed89b246d7749c2ec4175c6adb # via gcp-releasetool keyring==25.4.1 \ --hash=sha256:5426f817cf7f6f007ba5ec722b1bcad95a75b27d780343772ad76b17cb47b0bf \ diff --git a/CHANGELOG.md b/CHANGELOG.md index 989b7f020..6a7ff5641 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,34 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.28.0](https://github.com/googleapis/python-bigquery/compare/v3.27.0...v3.28.0) (2025-01-15) + + +### Features + +* Add property for `allowNonIncrementalDefinition` for materialized view ([#2084](https://github.com/googleapis/python-bigquery/issues/2084)) ([3359ef3](https://github.com/googleapis/python-bigquery/commit/3359ef37b90243bea2d9e68bb996fe5d736f304c)) +* Add property for maxStaleness in table definitions ([#2087](https://github.com/googleapis/python-bigquery/issues/2087)) ([729322c](https://github.com/googleapis/python-bigquery/commit/729322c2288a30464f2f135ba18b9c4aa7d2f0da)) +* Add type hints to Client ([#2044](https://github.com/googleapis/python-bigquery/issues/2044)) ([40529de](https://github.com/googleapis/python-bigquery/commit/40529de923e25c41c6728c121b9c82a042967ada)) +* Adds ExternalCatalogDatasetOptions and tests ([#2111](https://github.com/googleapis/python-bigquery/issues/2111)) ([b929a90](https://github.com/googleapis/python-bigquery/commit/b929a900d49e2c15897134209ed9de5fc7f238cd)) +* Adds ForeignTypeInfo class and tests ([#2110](https://github.com/googleapis/python-bigquery/issues/2110)) ([55ca63c](https://github.com/googleapis/python-bigquery/commit/55ca63c23fcb56573e2de67e4f7899939628c4a1)) +* Adds new input validation function similar to isinstance. ([#2107](https://github.com/googleapis/python-bigquery/issues/2107)) ([a2bebb9](https://github.com/googleapis/python-bigquery/commit/a2bebb95c5ef32ac7c7cbe19c3e7a9412cbee60d)) +* Adds StorageDescriptor and tests ([#2109](https://github.com/googleapis/python-bigquery/issues/2109)) ([6be0272](https://github.com/googleapis/python-bigquery/commit/6be0272ff25dac97a38ae4ee5aa02016dc82a0d8)) +* Adds the SerDeInfo class and tests ([#2108](https://github.com/googleapis/python-bigquery/issues/2108)) ([62960f2](https://github.com/googleapis/python-bigquery/commit/62960f255d05b15940a8d2cdc595592175fada11)) +* Migrate to pyproject.toml ([#2041](https://github.com/googleapis/python-bigquery/issues/2041)) ([1061611](https://github.com/googleapis/python-bigquery/commit/106161180ead01aca1ead909cf06ca559f68666d)) +* Preserve unknown fields from the REST API representation in `SchemaField` ([#2097](https://github.com/googleapis/python-bigquery/issues/2097)) ([aaf1eb8](https://github.com/googleapis/python-bigquery/commit/aaf1eb85ada95ab866be0199812ea7f5c7f50766)) +* Resource tags in dataset ([#2090](https://github.com/googleapis/python-bigquery/issues/2090)) ([3e13016](https://github.com/googleapis/python-bigquery/commit/3e130166f43dcc06704fe90edf9068dfd44842a6)) +* Support setting max_stream_count when fetching query result ([#2051](https://github.com/googleapis/python-bigquery/issues/2051)) ([d461297](https://github.com/googleapis/python-bigquery/commit/d4612979b812d2a835e47200f27a87a66bcb856a)) + + +### Bug Fixes + +* Allow geopandas 1.x ([#2065](https://github.com/googleapis/python-bigquery/issues/2065)) ([f2ab8cb](https://github.com/googleapis/python-bigquery/commit/f2ab8cbfe00d442ad3b40683ecfec320e53b4688)) + + +### Documentation + +* Render fields correctly for update calls ([#2055](https://github.com/googleapis/python-bigquery/issues/2055)) ([a4d9534](https://github.com/googleapis/python-bigquery/commit/a4d9534a900f13ae7355904cda05097d781f27e3)) + ## [3.27.0](https://github.com/googleapis/python-bigquery/compare/v3.26.0...v3.27.0) (2024-11-01) diff --git a/google/cloud/bigquery/_helpers.py b/google/cloud/bigquery/_helpers.py index 1eda80712..ea47af28d 100644 --- a/google/cloud/bigquery/_helpers.py +++ b/google/cloud/bigquery/_helpers.py @@ -22,7 +22,7 @@ import re import os import warnings -from typing import Optional, Union +from typing import Optional, Union, Any, Tuple, Type from dateutil import relativedelta from google.cloud._helpers import UTC # type: ignore @@ -1004,3 +1004,33 @@ def _verify_job_config_type(job_config, expected_type, param_name="job_config"): job_config=job_config, ) ) + + +def _isinstance_or_raise( + value: Any, + dtype: Union[Type, Tuple[Type, ...]], + none_allowed: Optional[bool] = False, +) -> Any: + """Determine whether a value type matches a given datatype or None. + Args: + value (Any): Value to be checked. + dtype (type): Expected data type or tuple of data types. + none_allowed Optional(bool): whether value is allowed to be None. Default + is False. + Returns: + Any: Returns the input value if the type check is successful. + Raises: + TypeError: If the input value's type does not match the expected data type(s). + """ + if none_allowed and value is None: + return value + + if isinstance(value, dtype): + return value + + or_none = "" + if none_allowed: + or_none = " (or None)" + + msg = f"Pass {value} as a '{dtype}'{or_none}. Got {type(value)}." + raise TypeError(msg) diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index 52c5084e3..03ded93b1 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -44,6 +44,8 @@ import uuid import warnings +import requests + from google import resumable_media # type: ignore from google.resumable_media.requests import MultipartUpload # type: ignore from google.resumable_media.requests import ResumableUpload @@ -65,6 +67,7 @@ DEFAULT_BQSTORAGE_CLIENT_INFO = None # type: ignore +from google.auth.credentials import Credentials from google.cloud.bigquery._http import Connection from google.cloud.bigquery import _job_helpers from google.cloud.bigquery import _pandas_helpers @@ -126,6 +129,7 @@ _versions_helpers.PANDAS_VERSIONS.try_import() ) # mypy check fails because pandas import is outside module, there are type: ignore comments related to this + ResumableTimeoutType = Union[ None, float, Tuple[float, float] ] # for resumable media methods @@ -133,8 +137,6 @@ if typing.TYPE_CHECKING: # pragma: NO COVER # os.PathLike is only subscriptable in Python 3.9+, thus shielding with a condition. PathType = Union[str, bytes, os.PathLike[str], os.PathLike[bytes]] - import requests # required by api-core - _DEFAULT_CHUNKSIZE = 100 * 1024 * 1024 # 100 MB _MAX_MULTIPART_SIZE = 5 * 1024 * 1024 _DEFAULT_NUM_RETRIES = 6 @@ -231,15 +233,23 @@ class Client(ClientWithProject): def __init__( self, - project=None, - credentials=None, - _http=None, - location=None, - default_query_job_config=None, - default_load_job_config=None, - client_info=None, - client_options=None, + project: Optional[str] = None, + credentials: Optional[Credentials] = None, + _http: Optional[requests.Session] = None, + location: Optional[str] = None, + default_query_job_config: Optional[QueryJobConfig] = None, + default_load_job_config: Optional[LoadJobConfig] = None, + client_info: Optional[google.api_core.client_info.ClientInfo] = None, + client_options: Optional[ + Union[google.api_core.client_options.ClientOptions, Dict[str, Any]] + ] = None, ) -> None: + if client_options is None: + client_options = {} + if isinstance(client_options, dict): + client_options = google.api_core.client_options.from_dict(client_options) + # assert isinstance(client_options, google.api_core.client_options.ClientOptions) + super(Client, self).__init__( project=project, credentials=credentials, @@ -247,14 +257,10 @@ def __init__( _http=_http, ) - kw_args = {"client_info": client_info} + kw_args: Dict[str, Any] = {"client_info": client_info} bq_host = _get_bigquery_host() kw_args["api_endpoint"] = bq_host if bq_host != _DEFAULT_HOST else None client_universe = None - if client_options is None: - client_options = {} - if isinstance(client_options, dict): - client_options = google.api_core.client_options.from_dict(client_options) if client_options.api_endpoint: api_endpoint = client_options.api_endpoint kw_args["api_endpoint"] = api_endpoint @@ -1184,6 +1190,19 @@ def update_dataset( must be provided. If a field is listed in ``fields`` and is ``None`` in ``dataset``, it will be deleted. + For example, to update the default expiration times, specify + both properties in the ``fields`` argument: + + .. code-block:: python + + bigquery_client.update_dataset( + dataset, + [ + "default_partition_expiration_ms", + "default_table_expiration_ms", + ] + ) + If ``dataset.etag`` is not ``None``, the update will only succeed if the dataset on the server has the same ETag. Thus reading a dataset with ``get_dataset``, changing its fields, @@ -1198,19 +1217,6 @@ def update_dataset( The properties of ``dataset`` to change. These are strings corresponding to the properties of :class:`~google.cloud.bigquery.dataset.Dataset`. - - For example, to update the default expiration times, specify - both properties in the ``fields`` argument: - - .. code-block:: python - - bigquery_client.update_dataset( - dataset, - [ - "default_partition_expiration_ms", - "default_table_expiration_ms", - ] - ) retry (Optional[google.api_core.retry.Retry]): How to retry the RPC. timeout (Optional[float]): @@ -1254,6 +1260,15 @@ def update_model( must be provided. If a field is listed in ``fields`` and is ``None`` in ``model``, the field value will be deleted. + For example, to update the descriptive properties of the model, + specify them in the ``fields`` argument: + + .. code-block:: python + + bigquery_client.update_model( + model, ["description", "friendly_name"] + ) + If ``model.etag`` is not ``None``, the update will only succeed if the model on the server has the same ETag. Thus reading a model with ``get_model``, changing its fields, and then passing it to @@ -1266,15 +1281,6 @@ def update_model( The properties of ``model`` to change. These are strings corresponding to the properties of :class:`~google.cloud.bigquery.model.Model`. - - For example, to update the descriptive properties of the model, - specify them in the ``fields`` argument: - - .. code-block:: python - - bigquery_client.update_model( - model, ["description", "friendly_name"] - ) retry (Optional[google.api_core.retry.Retry]): A description of how to retry the API call. timeout (Optional[float]): @@ -1318,6 +1324,15 @@ def update_routine( must be provided. If a field is listed in ``fields`` and is ``None`` in ``routine``, the field value will be deleted. + For example, to update the description property of the routine, + specify it in the ``fields`` argument: + + .. code-block:: python + + bigquery_client.update_routine( + routine, ["description"] + ) + .. warning:: During beta, partial updates are not supported. You must provide all fields in the resource. @@ -1336,15 +1351,6 @@ def update_routine( fields (Sequence[str]): The fields of ``routine`` to change, spelled as the :class:`~google.cloud.bigquery.routine.Routine` properties. - - For example, to update the description property of the routine, - specify it in the ``fields`` argument: - - .. code-block:: python - - bigquery_client.update_routine( - routine, ["description"] - ) retry (Optional[google.api_core.retry.Retry]): A description of how to retry the API call. timeout (Optional[float]): @@ -1392,6 +1398,16 @@ def update_table( must be provided. If a field is listed in ``fields`` and is ``None`` in ``table``, the field value will be deleted. + For example, to update the descriptive properties of the table, + specify them in the ``fields`` argument: + + .. code-block:: python + + bigquery_client.update_table( + table, + ["description", "friendly_name"] + ) + If ``table.etag`` is not ``None``, the update will only succeed if the table on the server has the same ETag. Thus reading a table with ``get_table``, changing its fields, and then passing it to @@ -1403,16 +1419,6 @@ def update_table( fields (Sequence[str]): The fields of ``table`` to change, spelled as the :class:`~google.cloud.bigquery.table.Table` properties. - - For example, to update the descriptive properties of the table, - specify them in the ``fields`` argument: - - .. code-block:: python - - bigquery_client.update_table( - table, - ["description", "friendly_name"] - ) retry (Optional[google.api_core.retry.Retry]): A description of how to retry the API call. timeout (Optional[float]): diff --git a/google/cloud/bigquery/dataset.py b/google/cloud/bigquery/dataset.py index c49a52faf..15a11fb40 100644 --- a/google/cloud/bigquery/dataset.py +++ b/google/cloud/bigquery/dataset.py @@ -27,6 +27,7 @@ from google.cloud.bigquery.routine import Routine, RoutineReference from google.cloud.bigquery.table import Table, TableReference from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration +from google.cloud.bigquery import external_config from typing import Optional, List, Dict, Any, Union @@ -530,6 +531,8 @@ class Dataset(object): "storage_billing_model": "storageBillingModel", "max_time_travel_hours": "maxTimeTravelHours", "default_rounding_mode": "defaultRoundingMode", + "resource_tags": "resourceTags", + "external_catalog_dataset_options": "externalCatalogDatasetOptions", } def __init__(self, dataset_ref) -> None: @@ -801,6 +804,28 @@ def labels(self, value): raise ValueError("Pass a dict") self._properties["labels"] = value + @property + def resource_tags(self): + """Dict[str, str]: Resource tags of the dataset. + + Optional. The tags attached to this dataset. Tag keys are globally + unique. Tag key is expected to be in the namespaced format, for + example "123456789012/environment" where 123456789012 is + the ID of the parent organization or project resource for this tag + key. Tag value is expected to be the short name, for example + "Production". + + Raises: + ValueError: for invalid value types. + """ + return self._properties.setdefault("resourceTags", {}) + + @resource_tags.setter + def resource_tags(self, value): + if not isinstance(value, dict) and value is not None: + raise ValueError("Pass a dict") + self._properties["resourceTags"] = value + @property def default_encryption_configuration(self): """google.cloud.bigquery.encryption_configuration.EncryptionConfiguration: Custom @@ -875,6 +900,29 @@ def storage_billing_model(self, value): ) self._properties["storageBillingModel"] = value + @property + def external_catalog_dataset_options(self): + """Options defining open source compatible datasets living in the + BigQuery catalog. Contains metadata of open source database, schema + or namespace represented by the current dataset.""" + + prop = _helpers._get_sub_prop( + self._properties, ["externalCatalogDatasetOptions"] + ) + + if prop is not None: + prop = external_config.ExternalCatalogDatasetOptions.from_api_repr(prop) + return prop + + @external_catalog_dataset_options.setter + def external_catalog_dataset_options(self, value): + value = _helpers._isinstance_or_raise( + value, external_config.ExternalCatalogDatasetOptions, none_allowed=True + ) + self._properties[ + self._PROPERTY_TO_API_FIELD["external_catalog_dataset_options"] + ] = (value.to_api_repr() if value is not None else None) + @classmethod def from_string(cls, full_dataset_id: str) -> "Dataset": """Construct a dataset from fully-qualified dataset ID. diff --git a/google/cloud/bigquery/external_config.py b/google/cloud/bigquery/external_config.py index a891bc232..7f2b58f2b 100644 --- a/google/cloud/bigquery/external_config.py +++ b/google/cloud/bigquery/external_config.py @@ -18,7 +18,7 @@ Job.configuration.query.tableDefinitions. """ -from __future__ import absolute_import +from __future__ import absolute_import, annotations import base64 import copy @@ -28,6 +28,7 @@ from google.cloud.bigquery._helpers import _bytes_to_json from google.cloud.bigquery._helpers import _int_or_none from google.cloud.bigquery._helpers import _str_or_none +from google.cloud.bigquery import _helpers from google.cloud.bigquery.format_options import AvroOptions, ParquetOptions from google.cloud.bigquery.schema import SchemaField @@ -1003,3 +1004,76 @@ def from_api_repr(cls, resource: dict) -> "ExternalConfig": config = cls(resource["sourceFormat"]) config._properties = copy.deepcopy(resource) return config + + +class ExternalCatalogDatasetOptions: + """Options defining open source compatible datasets living in the BigQuery catalog. + Contains metadata of open source database, schema or namespace represented + by the current dataset. + + Args: + default_storage_location_uri (Optional[str]): The storage location URI for all + tables in the dataset. Equivalent to hive metastore's database + locationUri. Maximum length of 1024 characters. (str) + parameters (Optional[dict[str, Any]]): A map of key value pairs defining the parameters + and properties of the open source schema. Maximum size of 2Mib. + """ + + def __init__( + self, + default_storage_location_uri: Optional[str] = None, + parameters: Optional[Dict[str, Any]] = None, + ): + self._properties: Dict[str, Any] = {} + self.default_storage_location_uri = default_storage_location_uri + self.parameters = parameters + + @property + def default_storage_location_uri(self) -> Optional[str]: + """Optional. The storage location URI for all tables in the dataset. + Equivalent to hive metastore's database locationUri. Maximum length of + 1024 characters.""" + + return self._properties.get("defaultStorageLocationUri") + + @default_storage_location_uri.setter + def default_storage_location_uri(self, value: Optional[str]): + value = _helpers._isinstance_or_raise(value, str, none_allowed=True) + self._properties["defaultStorageLocationUri"] = value + + @property + def parameters(self) -> Optional[Dict[str, Any]]: + """Optional. A map of key value pairs defining the parameters and + properties of the open source schema. Maximum size of 2Mib.""" + + return self._properties.get("parameters") + + @parameters.setter + def parameters(self, value: Optional[Dict[str, Any]]): + value = _helpers._isinstance_or_raise(value, dict, none_allowed=True) + self._properties["parameters"] = value + + def to_api_repr(self) -> dict: + """Build an API representation of this object. + + Returns: + Dict[str, Any]: + A dictionary in the format used by the BigQuery API. + """ + return self._properties + + @classmethod + def from_api_repr(cls, api_repr: dict) -> ExternalCatalogDatasetOptions: + """Factory: constructs an instance of the class (cls) + given its API representation. + + Args: + api_repr (Dict[str, Any]): + API representation of the object to be instantiated. + + Returns: + An instance of the class initialized with data from 'resource'. + """ + config = cls() + config._properties = api_repr + return config diff --git a/google/cloud/bigquery/schema.py b/google/cloud/bigquery/schema.py index f5b03cbef..b278b686a 100644 --- a/google/cloud/bigquery/schema.py +++ b/google/cloud/bigquery/schema.py @@ -14,10 +14,13 @@ """Schemas for BigQuery tables / queries.""" +from __future__ import annotations import collections import enum -from typing import Any, Dict, Iterable, Optional, Union, cast +import typing +from typing import Any, cast, Dict, Iterable, Optional, Union +from google.cloud.bigquery import _helpers from google.cloud.bigquery import standard_sql from google.cloud.bigquery.enums import StandardSqlTypeNames @@ -203,15 +206,8 @@ def __init__( self._properties["rangeElementType"] = {"type": range_element_type} if isinstance(range_element_type, FieldElementType): self._properties["rangeElementType"] = range_element_type.to_api_repr() - - self._fields = tuple(fields) - - @staticmethod - def __get_int(api_repr, name): - v = api_repr.get(name, _DEFAULT_VALUE) - if v is not _DEFAULT_VALUE: - v = int(v) - return v + if fields: # Don't set the property if it's not set. + self._properties["fields"] = [field.to_api_repr() for field in fields] @classmethod def from_api_repr(cls, api_repr: dict) -> "SchemaField": @@ -225,43 +221,19 @@ def from_api_repr(cls, api_repr: dict) -> "SchemaField": Returns: google.cloud.bigquery.schema.SchemaField: The ``SchemaField`` object. """ - field_type = api_repr["type"].upper() + placeholder = cls("this_will_be_replaced", "PLACEHOLDER") - # Handle optional properties with default values - mode = api_repr.get("mode", "NULLABLE") - description = api_repr.get("description", _DEFAULT_VALUE) - fields = api_repr.get("fields", ()) - policy_tags = api_repr.get("policyTags", _DEFAULT_VALUE) + # Note: we don't make a copy of api_repr because this can cause + # unnecessary slowdowns, especially on deeply nested STRUCT / RECORD + # fields. See https://github.com/googleapis/python-bigquery/issues/6 + placeholder._properties = api_repr - default_value_expression = api_repr.get("defaultValueExpression", None) - - if policy_tags is not None and policy_tags is not _DEFAULT_VALUE: - policy_tags = PolicyTagList.from_api_repr(policy_tags) - - if api_repr.get("rangeElementType"): - range_element_type = cast(dict, api_repr.get("rangeElementType")) - element_type = range_element_type.get("type") - else: - element_type = None - - return cls( - field_type=field_type, - fields=[cls.from_api_repr(f) for f in fields], - mode=mode.upper(), - default_value_expression=default_value_expression, - description=description, - name=api_repr["name"], - policy_tags=policy_tags, - precision=cls.__get_int(api_repr, "precision"), - scale=cls.__get_int(api_repr, "scale"), - max_length=cls.__get_int(api_repr, "maxLength"), - range_element_type=element_type, - ) + return placeholder @property def name(self): """str: The name of the field.""" - return self._properties["name"] + return self._properties.get("name", "") @property def field_type(self): @@ -270,7 +242,10 @@ def field_type(self): See: https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#TableFieldSchema.FIELDS.type """ - return self._properties["type"] + type_ = self._properties.get("type") + if type_ is None: # Shouldn't happen, but some unit tests do this. + return None + return cast(str, type_).upper() @property def mode(self): @@ -279,7 +254,7 @@ def mode(self): See: https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#TableFieldSchema.FIELDS.mode """ - return self._properties.get("mode") + return cast(str, self._properties.get("mode", "NULLABLE")).upper() @property def is_nullable(self): @@ -299,17 +274,17 @@ def description(self): @property def precision(self): """Optional[int]: Precision (number of digits) for the NUMERIC field.""" - return self._properties.get("precision") + return _helpers._int_or_none(self._properties.get("precision")) @property def scale(self): """Optional[int]: Scale (digits after decimal) for the NUMERIC field.""" - return self._properties.get("scale") + return _helpers._int_or_none(self._properties.get("scale")) @property def max_length(self): """Optional[int]: Maximum length for the STRING or BYTES field.""" - return self._properties.get("maxLength") + return _helpers._int_or_none(self._properties.get("maxLength")) @property def range_element_type(self): @@ -329,7 +304,7 @@ def fields(self): Must be empty unset if ``field_type`` is not 'RECORD'. """ - return self._fields + return tuple(_to_schema_fields(self._properties.get("fields", []))) @property def policy_tags(self): @@ -345,15 +320,10 @@ def to_api_repr(self) -> dict: Returns: Dict: A dictionary representing the SchemaField in a serialized form. """ - answer = self._properties.copy() - - # If this is a RECORD type, then sub-fields are also included, - # add this to the serialized representation. - if self.field_type.upper() in _STRUCT_TYPES: - answer["fields"] = [f.to_api_repr() for f in self.fields] - - # Done; return the serialized dictionary. - return answer + # Note: we don't make a copy of _properties because this can cause + # unnecessary slowdowns, especially on deeply nested STRUCT / RECORD + # fields. See https://github.com/googleapis/python-bigquery/issues/6 + return self._properties def _key(self): """A tuple key that uniquely describes this field. @@ -389,7 +359,7 @@ def _key(self): self.mode.upper(), # pytype: disable=attribute-error self.default_value_expression, self.description, - self._fields, + self.fields, policy_tags, ) @@ -588,3 +558,267 @@ def to_api_repr(self) -> dict: """ answer = {"names": list(self.names)} return answer + + +class ForeignTypeInfo: + """Metadata about the foreign data type definition such as the system in which the + type is defined. + + Args: + type_system (str): Required. Specifies the system which defines the + foreign data type. + + TypeSystem enum currently includes: + * "TYPE_SYSTEM_UNSPECIFIED" + * "HIVE" + """ + + def __init__(self, type_system: Optional[str] = None): + self._properties: Dict[str, Any] = {} + self.type_system = type_system + + @property + def type_system(self) -> Optional[str]: + """Required. Specifies the system which defines the foreign data + type.""" + + return self._properties.get("typeSystem") + + @type_system.setter + def type_system(self, value: Optional[str]): + value = _helpers._isinstance_or_raise(value, str, none_allowed=True) + self._properties["typeSystem"] = value + + def to_api_repr(self) -> dict: + """Build an API representation of this object. + + Returns: + Dict[str, Any]: + A dictionary in the format used by the BigQuery API. + """ + + return self._properties + + @classmethod + def from_api_repr(cls, api_repr: Dict[str, Any]) -> "ForeignTypeInfo": + """Factory: constructs an instance of the class (cls) + given its API representation. + + Args: + api_repr (Dict[str, Any]): + API representation of the object to be instantiated. + + Returns: + An instance of the class initialized with data from 'api_repr'. + """ + + config = cls() + config._properties = api_repr + return config + + +class SerDeInfo: + """Serializer and deserializer information. + + Args: + serialization_library (str): Required. Specifies a fully-qualified class + name of the serialization library that is responsible for the + translation of data between table representation and the underlying + low-level input and output format structures. The maximum length is + 256 characters. + name (Optional[str]): Name of the SerDe. The maximum length is 256 + characters. + parameters: (Optional[dict[str, str]]): Key-value pairs that define the initialization + parameters for the serialization library. Maximum size 10 Kib. + """ + + def __init__( + self, + serialization_library: str, + name: Optional[str] = None, + parameters: Optional[dict[str, str]] = None, + ): + self._properties: Dict[str, Any] = {} + self.serialization_library = serialization_library + self.name = name + self.parameters = parameters + + @property + def serialization_library(self) -> str: + """Required. Specifies a fully-qualified class name of the serialization + library that is responsible for the translation of data between table + representation and the underlying low-level input and output format + structures. The maximum length is 256 characters.""" + + return typing.cast(str, self._properties.get("serializationLibrary")) + + @serialization_library.setter + def serialization_library(self, value: str): + value = _helpers._isinstance_or_raise(value, str, none_allowed=False) + self._properties["serializationLibrary"] = value + + @property + def name(self) -> Optional[str]: + """Optional. Name of the SerDe. The maximum length is 256 characters.""" + + return self._properties.get("name") + + @name.setter + def name(self, value: Optional[str] = None): + value = _helpers._isinstance_or_raise(value, str, none_allowed=True) + self._properties["name"] = value + + @property + def parameters(self) -> Optional[dict[str, str]]: + """Optional. Key-value pairs that define the initialization parameters + for the serialization library. Maximum size 10 Kib.""" + + return self._properties.get("parameters") + + @parameters.setter + def parameters(self, value: Optional[dict[str, str]] = None): + value = _helpers._isinstance_or_raise(value, dict, none_allowed=True) + self._properties["parameters"] = value + + def to_api_repr(self) -> dict: + """Build an API representation of this object. + + Returns: + Dict[str, Any]: + A dictionary in the format used by the BigQuery API. + """ + return self._properties + + @classmethod + def from_api_repr(cls, api_repr: dict) -> SerDeInfo: + """Factory: constructs an instance of the class (cls) + given its API representation. + + Args: + api_repr (Dict[str, Any]): + API representation of the object to be instantiated. + + Returns: + An instance of the class initialized with data from 'api_repr'. + """ + config = cls("PLACEHOLDER") + config._properties = api_repr + return config + + +class StorageDescriptor: + """Contains information about how a table's data is stored and accessed by open + source query engines. + + Args: + input_format (Optional[str]): Specifies the fully qualified class name of + the InputFormat (e.g. + "org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"). The maximum + length is 128 characters. + location_uri (Optional[str]): The physical location of the table (e.g. + 'gs://spark-dataproc-data/pangea-data/case_sensitive/' or + 'gs://spark-dataproc-data/pangea-data/'). The maximum length is + 2056 bytes. + output_format (Optional[str]): Specifies the fully qualified class name + of the OutputFormat (e.g. + "org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat"). The maximum + length is 128 characters. + serde_info (Union[SerDeInfo, dict, None]): Serializer and deserializer information. + """ + + def __init__( + self, + input_format: Optional[str] = None, + location_uri: Optional[str] = None, + output_format: Optional[str] = None, + serde_info: Union[SerDeInfo, dict, None] = None, + ): + self._properties: Dict[str, Any] = {} + self.input_format = input_format + self.location_uri = location_uri + self.output_format = output_format + # Using typing.cast() because mypy cannot wrap it's head around the fact that: + # the setter can accept Union[SerDeInfo, dict, None] + # but the getter will only ever return Optional[SerDeInfo]. + self.serde_info = typing.cast(Optional[SerDeInfo], serde_info) + + @property + def input_format(self) -> Optional[str]: + """Optional. Specifies the fully qualified class name of the InputFormat + (e.g. "org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"). The maximum + length is 128 characters.""" + + return self._properties.get("inputFormat") + + @input_format.setter + def input_format(self, value: Optional[str]): + value = _helpers._isinstance_or_raise(value, str, none_allowed=True) + self._properties["inputFormat"] = value + + @property + def location_uri(self) -> Optional[str]: + """Optional. The physical location of the table (e.g. 'gs://spark- + dataproc-data/pangea-data/case_sensitive/' or 'gs://spark-dataproc- + data/pangea-data/'). The maximum length is 2056 bytes.""" + + return self._properties.get("locationUri") + + @location_uri.setter + def location_uri(self, value: Optional[str]): + value = _helpers._isinstance_or_raise(value, str, none_allowed=True) + self._properties["locationUri"] = value + + @property + def output_format(self) -> Optional[str]: + """Optional. Specifies the fully qualified class name of the + OutputFormat (e.g. "org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat"). + The maximum length is 128 characters.""" + + return self._properties.get("outputFormat") + + @output_format.setter + def output_format(self, value: Optional[str]): + value = _helpers._isinstance_or_raise(value, str, none_allowed=True) + self._properties["outputFormat"] = value + + @property + def serde_info(self) -> Optional[SerDeInfo]: + """Optional. Serializer and deserializer information.""" + + prop = _helpers._get_sub_prop(self._properties, ["serDeInfo"]) + if prop is not None: + return typing.cast(SerDeInfo, SerDeInfo.from_api_repr(prop)) + return None + + @serde_info.setter + def serde_info(self, value: Union[SerDeInfo, dict, None]): + value = _helpers._isinstance_or_raise( + value, (SerDeInfo, dict), none_allowed=True + ) + + if isinstance(value, SerDeInfo): + self._properties["serDeInfo"] = value.to_api_repr() + else: + self._properties["serDeInfo"] = value + + def to_api_repr(self) -> dict: + """Build an API representation of this object. + Returns: + Dict[str, Any]: + A dictionary in the format used by the BigQuery API. + """ + return self._properties + + @classmethod + def from_api_repr(cls, resource: dict) -> StorageDescriptor: + """Factory: constructs an instance of the class (cls) + given its API representation. + Args: + resource (Dict[str, Any]): + API representation of the object to be instantiated. + Returns: + An instance of the class initialized with data from 'resource'. + """ + config = cls() + config._properties = resource + return config diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index faf827be4..80ab330ba 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -390,6 +390,7 @@ class Table(_TableBase): "mview_last_refresh_time": ["materializedView", "lastRefreshTime"], "mview_query": "materializedView", "mview_refresh_interval": "materializedView", + "mview_allow_non_incremental_definition": "materializedView", "num_bytes": "numBytes", "num_rows": "numRows", "partition_expiration": "timePartitioning", @@ -406,6 +407,7 @@ class Table(_TableBase): "view_query": "view", "require_partition_filter": "requirePartitionFilter", "table_constraints": "tableConstraints", + "max_staleness": "maxStaleness", } def __init__(self, table_ref, schema=None) -> None: @@ -928,6 +930,28 @@ def mview_refresh_interval(self, value): refresh_interval_ms, ) + @property + def mview_allow_non_incremental_definition(self): + """Optional[bool]: This option declares the intention to construct a + materialized view that isn't refreshed incrementally. + The default value is :data:`False`. + """ + api_field = self._PROPERTY_TO_API_FIELD[ + "mview_allow_non_incremental_definition" + ] + return _helpers._get_sub_prop( + self._properties, [api_field, "allowNonIncrementalDefinition"] + ) + + @mview_allow_non_incremental_definition.setter + def mview_allow_non_incremental_definition(self, value): + api_field = self._PROPERTY_TO_API_FIELD[ + "mview_allow_non_incremental_definition" + ] + _helpers._set_sub_prop( + self._properties, [api_field, "allowNonIncrementalDefinition"], value + ) + @property def streaming_buffer(self): """google.cloud.bigquery.StreamingBuffer: Information about a table's @@ -1092,6 +1116,40 @@ def __repr__(self): def __str__(self): return f"{self.project}.{self.dataset_id}.{self.table_id}" + @property + def max_staleness(self): + """Union[str, None]: The maximum staleness of data that could be returned when the table is queried. + + Staleness encoded as a string encoding of sql IntervalValue type. + This property is optional and defaults to None. + + According to the BigQuery API documentation, maxStaleness specifies the maximum time + interval for which stale data can be returned when querying the table. + It helps control data freshness in scenarios like metadata-cached external tables. + + Returns: + Optional[str]: A string representing the maximum staleness interval + (e.g., '1h', '30m', '15s' for hours, minutes, seconds respectively). + """ + return self._properties.get(self._PROPERTY_TO_API_FIELD["max_staleness"]) + + @max_staleness.setter + def max_staleness(self, value): + """Set the maximum staleness for the table. + + Args: + value (Optional[str]): A string representing the maximum staleness interval. + Must be a valid time interval string. + Examples include '1h' (1 hour), '30m' (30 minutes), '15s' (15 seconds). + + Raises: + ValueError: If the value is not None and not a string. + """ + if value is not None and not isinstance(value, str): + raise ValueError("max_staleness must be a string or None") + + self._properties[self._PROPERTY_TO_API_FIELD["max_staleness"]] = value + class TableListItem(_TableBase): """A read-only table resource from a list operation. @@ -1812,6 +1870,7 @@ def to_arrow_iterable( self, bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None, max_queue_size: int = _pandas_helpers._MAX_QUEUE_SIZE_DEFAULT, # type: ignore + max_stream_count: Optional[int] = None, ) -> Iterator["pyarrow.RecordBatch"]: """[Beta] Create an iterable of class:`pyarrow.RecordBatch`, to process the table as a stream. @@ -1836,6 +1895,22 @@ def to_arrow_iterable( created by the server. If ``max_queue_size`` is :data:`None`, the queue size is infinite. + max_stream_count (Optional[int]): + The maximum number of parallel download streams when + using BigQuery Storage API. Ignored if + BigQuery Storage API is not used. + + This setting also has no effect if the query result + is deterministically ordered with ORDER BY, + in which case, the number of download stream is always 1. + + If set to 0 or None (the default), the number of download + streams is determined by BigQuery the server. However, this behaviour + can require a lot of memory to store temporary download result, + especially with very large queries. In that case, + setting this parameter value to a value > 0 can help + reduce system resource consumption. + Returns: pyarrow.RecordBatch: A generator of :class:`~pyarrow.RecordBatch`. @@ -1852,6 +1927,7 @@ def to_arrow_iterable( preserve_order=self._preserve_order, selected_fields=self._selected_fields, max_queue_size=max_queue_size, + max_stream_count=max_stream_count, ) tabledata_list_download = functools.partial( _pandas_helpers.download_arrow_row_iterator, iter(self.pages), self.schema @@ -1978,6 +2054,7 @@ def to_dataframe_iterable( bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None, dtypes: Optional[Dict[str, Any]] = None, max_queue_size: int = _pandas_helpers._MAX_QUEUE_SIZE_DEFAULT, # type: ignore + max_stream_count: Optional[int] = None, ) -> "pandas.DataFrame": """Create an iterable of pandas DataFrames, to process the table as a stream. @@ -2008,6 +2085,22 @@ def to_dataframe_iterable( .. versionadded:: 2.14.0 + max_stream_count (Optional[int]): + The maximum number of parallel download streams when + using BigQuery Storage API. Ignored if + BigQuery Storage API is not used. + + This setting also has no effect if the query result + is deterministically ordered with ORDER BY, + in which case, the number of download stream is always 1. + + If set to 0 or None (the default), the number of download + streams is determined by BigQuery the server. However, this behaviour + can require a lot of memory to store temporary download result, + especially with very large queries. In that case, + setting this parameter value to a value > 0 can help + reduce system resource consumption. + Returns: pandas.DataFrame: A generator of :class:`~pandas.DataFrame`. @@ -2034,6 +2127,7 @@ def to_dataframe_iterable( preserve_order=self._preserve_order, selected_fields=self._selected_fields, max_queue_size=max_queue_size, + max_stream_count=max_stream_count, ) tabledata_list_download = functools.partial( _pandas_helpers.download_dataframe_row_iterator, @@ -2690,6 +2784,7 @@ def to_dataframe_iterable( bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None, dtypes: Optional[Dict[str, Any]] = None, max_queue_size: Optional[int] = None, + max_stream_count: Optional[int] = None, ) -> Iterator["pandas.DataFrame"]: """Create an iterable of pandas DataFrames, to process the table as a stream. @@ -2705,6 +2800,9 @@ def to_dataframe_iterable( max_queue_size: Ignored. Added for compatibility with RowIterator. + max_stream_count: + Ignored. Added for compatibility with RowIterator. + Returns: An iterator yielding a single empty :class:`~pandas.DataFrame`. @@ -2719,6 +2817,7 @@ def to_arrow_iterable( self, bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None, max_queue_size: Optional[int] = None, + max_stream_count: Optional[int] = None, ) -> Iterator["pyarrow.RecordBatch"]: """Create an iterable of pandas DataFrames, to process the table as a stream. @@ -2731,6 +2830,9 @@ def to_arrow_iterable( max_queue_size: Ignored. Added for compatibility with RowIterator. + max_stream_count: + Ignored. Added for compatibility with RowIterator. + Returns: An iterator yielding a single empty :class:`~pyarrow.RecordBatch`. """ diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index 8f4418777..7da2c534f 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.27.0" +__version__ = "3.28.0" diff --git a/noxfile.py b/noxfile.py index 2376309ff..e08956b11 100644 --- a/noxfile.py +++ b/noxfile.py @@ -219,6 +219,9 @@ def system(session): # Data Catalog needed for the column ACL test with a real Policy Tag. session.install("google-cloud-datacatalog", "-c", constraints_path) + # Resource Manager needed for test with a real Resource Tag. + session.install("google-cloud-resource-manager", "-c", constraints_path) + if session.python in ["3.11", "3.12"]: extras = "[bqstorage,ipywidgets,pandas,tqdm,opentelemetry]" else: @@ -366,6 +369,7 @@ def prerelease_deps(session): session.install( "freezegun", "google-cloud-datacatalog", + "google-cloud-resource-manager", "google-cloud-storage", "google-cloud-testutils", "psutil", @@ -462,7 +466,7 @@ def blacken(session): session.run("black", *BLACK_PATHS) -@nox.session(python="3.9") +@nox.session(python="3.10") @_calculate_duration def docs(session): """Build the docs.""" diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 000000000..ecf21d922 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,104 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +[build-system] +requires = ["setuptools"] +build-backend = "setuptools.build_meta" + +[project] +name = "google-cloud-bigquery" +authors = [{ name = "Google LLC", email = "googleapis-packages@google.com" }] +license = { text = "Apache 2.0" } +requires-python = ">=3.7" +description = "Google BigQuery API client library" +readme = "README.rst" +classifiers = [ + # Should be one of: + # "Development Status :: 3 - Alpha" + # "Development Status :: 4 - Beta" + # "Development Status :: 5 - Production/Stable" + "Development Status :: 5 - Production/Stable", + "Intended Audience :: Developers", + "License :: OSI Approved :: Apache Software License", + "Programming Language :: Python", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Operating System :: OS Independent", + "Topic :: Internet", +] +dependencies = [ + "google-api-core[grpc] >= 2.11.1, < 3.0.0dev", + "google-auth >= 2.14.1, < 3.0.0dev", + "google-cloud-core >= 2.4.1, < 3.0.0dev", + "google-resumable-media >= 2.0.0, < 3.0dev", + "packaging >= 20.0.0", + "python-dateutil >= 2.7.3, < 3.0dev", + "requests >= 2.21.0, < 3.0.0dev", +] +dynamic = ["version"] + +[project.urls] +Repository = "https://github.com/googleapis/python-bigquery" + +[project.optional-dependencies] +# bqstorage had a period where it was a required dependency, and has been +# moved back to optional due to bloat. See +# https://github.com/googleapis/python-bigquery/issues/1196 for more background. +bqstorage = [ + "google-cloud-bigquery-storage >= 2.6.0, < 3.0.0dev", + # Due to an issue in pip's dependency resolver, the `grpc` extra is not + # installed, even though `google-cloud-bigquery-storage` specifies it + # as `google-api-core[grpc]`. We thus need to explicitly specify it here. + # See: https://github.com/googleapis/python-bigquery/issues/83 The + # grpc.Channel.close() method isn't added until 1.32.0. + # https://github.com/grpc/grpc/pull/15254 + "grpcio >= 1.47.0, < 2.0dev", + "grpcio >= 1.49.1, < 2.0dev; python_version >= '3.11'", + "pyarrow >= 3.0.0", +] +pandas = [ + "pandas >= 1.1.0", + "pyarrow >= 3.0.0", + "db-dtypes >= 0.3.0, < 2.0.0dev", + "importlib_metadata >= 1.0.0; python_version < '3.8'", +] +ipywidgets = ["ipywidgets >= 7.7.0", "ipykernel >= 6.0.0"] +geopandas = ["geopandas >= 0.9.0, < 2.0dev", "Shapely >= 1.8.4, < 3.0.0dev"] +ipython = ["bigquery-magics >= 0.1.0"] +tqdm = ["tqdm >= 4.7.4, < 5.0.0dev"] +opentelemetry = [ + "opentelemetry-api >= 1.1.0", + "opentelemetry-sdk >= 1.1.0", + "opentelemetry-instrumentation >= 0.20b0", +] +bigquery_v2 = [ + "proto-plus >= 1.22.3, < 2.0.0dev", + "protobuf >= 3.20.2, < 6.0.0dev, != 4.21.0, != 4.21.1, != 4.21.2, != 4.21.3, != 4.21.4, != 4.21.5", # For the legacy proto-based types. +] +all = [ + "google-cloud-bigquery[bqstorage,pandas,ipywidgets,geopandas,ipython,tqdm,opentelemetry,bigquery_v2]", +] + +[tool.setuptools.dynamic] +version = { attr = "google.cloud.bigquery.version.__version__" } + +[tool.setuptools.packages.find] +# Only include packages under the 'google' namespace. Do not include tests, +# benchmarks, etc. +include = ["google*"] diff --git a/renovate.json b/renovate.json index 39b2a0ec9..c7875c469 100644 --- a/renovate.json +++ b/renovate.json @@ -5,7 +5,7 @@ ":preserveSemverRanges", ":disableDependencyDashboard" ], - "ignorePaths": [".pre-commit-config.yaml", ".kokoro/requirements.txt", "setup.py"], + "ignorePaths": [".pre-commit-config.yaml", ".kokoro/requirements.txt", "setup.py", ".github/workflows/unittest.yml"], "pip_requirements": { "fileMatch": ["requirements-test.txt", "samples/[\\S/]*constraints.txt", "samples/[\\S/]*constraints-test.txt"] } diff --git a/samples/desktopapp/requirements-test.txt b/samples/desktopapp/requirements-test.txt index 1640e1a95..827b02dcf 100644 --- a/samples/desktopapp/requirements-test.txt +++ b/samples/desktopapp/requirements-test.txt @@ -1,4 +1,4 @@ -google-cloud-testutils==1.4.0 +google-cloud-testutils==1.5.0 pytest===7.4.4; python_version == '3.7' -pytest==8.3.3; python_version >= '3.8' +pytest==8.3.4; python_version >= '3.8' mock==5.1.0 diff --git a/samples/desktopapp/requirements.txt b/samples/desktopapp/requirements.txt index 383829d7d..165800741 100644 --- a/samples/desktopapp/requirements.txt +++ b/samples/desktopapp/requirements.txt @@ -1,2 +1,2 @@ -google-cloud-bigquery==3.26.0 +google-cloud-bigquery==3.27.0 google-auth-oauthlib==1.2.1 diff --git a/samples/geography/requirements-test.txt b/samples/geography/requirements-test.txt index 1ccebd9cd..ef38acb4f 100644 --- a/samples/geography/requirements-test.txt +++ b/samples/geography/requirements-test.txt @@ -1,3 +1,3 @@ pytest===7.4.4; python_version == '3.7' -pytest==8.3.3; python_version >= '3.8' +pytest==8.3.4; python_version >= '3.8' mock==5.1.0 diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 1089dc195..71579867f 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,48 +1,51 @@ -attrs==24.2.0 -certifi==2024.8.30 +attrs===24.2.0; python_version == '3.7' +attrs==24.3.0; python_version >= '3.8' +certifi==2024.12.14 cffi===1.15.1; python_version == '3.7' cffi==1.17.1; python_version >= '3.8' -charset-normalizer==3.3.2 -click==8.1.7 +charset-normalizer==3.4.1 +click==8.1.8 click-plugins==1.1.1 cligj==0.7.2 dataclasses==0.8; python_version < '3.7' -db-dtypes==1.3.0 +db-dtypes==1.3.1 Fiona===1.9.6; python_version == '3.7' Fiona==1.10.1; python_version >= '3.8' -geojson==3.1.0 +geojson==3.2.0 geopandas===0.10.2; python_version == '3.7' geopandas===0.13.2; python_version == '3.8' geopandas==1.0.1; python_version >= '3.9' -google-api-core==2.20.0 -google-auth==2.35.0 -google-cloud-bigquery==3.26.0 -google-cloud-bigquery-storage==2.26.0 +google-api-core==2.24.0 +google-auth==2.37.0 +google-cloud-bigquery==3.27.0 +google-cloud-bigquery-storage==2.27.0 google-cloud-core==2.4.1 google-crc32c===1.5.0; python_version < '3.9' google-crc32c==1.6.0; python_version >= '3.9' google-resumable-media==2.7.2 -googleapis-common-protos==1.65.0 +googleapis-common-protos==1.66.0 grpcio===1.62.2; python_version == '3.7' -grpcio==1.66.1; python_version >= '3.8' +grpcio==1.69.0; python_version >= '3.8' idna==3.10 munch==4.0.0 mypy-extensions==1.0.0 packaging===24.0; python_version == '3.7' -packaging==24.1; python_version >= '3.8' +packaging==24.2; python_version >= '3.8' pandas===1.3.5; python_version == '3.7' pandas===2.0.3; python_version == '3.8' pandas==2.2.3; python_version >= '3.9' -proto-plus==1.24.0 -pyarrow==12.0.1; python_version == '3.7' -pyarrow==17.0.0; python_version >= '3.8' +proto-plus==1.25.0 +pyarrow===12.0.1; python_version == '3.7' +pyarrow===17.0.0; python_version == '3.8' +pyarrow==18.1.0; python_version >= '3.9' pyasn1===0.5.1; python_version == '3.7' pyasn1==0.6.1; python_version >= '3.8' pyasn1-modules===0.3.0; python_version == '3.7' pyasn1-modules==0.4.1; python_version >= '3.8' pycparser===2.21; python_version == '3.7' pycparser==2.22; python_version >= '3.8' -pyparsing==3.1.4 +pyparsing===3.1.4; python_version < '3.9' +pyparsing==3.2.1; python_version >= '3.9' python-dateutil==2.9.0.post0 pytz==2024.2 PyYAML===6.0.1; python_version == '3.7' @@ -51,9 +54,10 @@ requests==2.31.0; python_version == '3.7' requests==2.32.3; python_version >= '3.8' rsa==4.9 Shapely==2.0.6 -six==1.16.0 +six==1.17.0 typing-extensions===4.7.1; python_version == '3.7' typing-extensions==4.12.2; python_version >= '3.8' typing-inspect==0.9.0 urllib3===1.26.18; python_version == '3.7' -urllib3==2.2.3; python_version >= '3.8' +urllib3===2.2.3; python_version == '3.8' +urllib3==2.3.0; python_version >= '3.9' diff --git a/samples/magics/requirements-test.txt b/samples/magics/requirements-test.txt index 1640e1a95..827b02dcf 100644 --- a/samples/magics/requirements-test.txt +++ b/samples/magics/requirements-test.txt @@ -1,4 +1,4 @@ -google-cloud-testutils==1.4.0 +google-cloud-testutils==1.5.0 pytest===7.4.4; python_version == '3.7' -pytest==8.3.3; python_version >= '3.8' +pytest==8.3.4; python_version >= '3.8' mock==5.1.0 diff --git a/samples/magics/requirements.txt b/samples/magics/requirements.txt index 6386fb6d2..87efa3dec 100644 --- a/samples/magics/requirements.txt +++ b/samples/magics/requirements.txt @@ -1,7 +1,7 @@ -bigquery_magics==0.4.0 -db-dtypes==1.3.0 -google.cloud.bigquery==3.26.0 -google-cloud-bigquery-storage==2.26.0 +bigquery_magics==0.5.0 +db-dtypes==1.3.1 +google.cloud.bigquery==3.27.0 +google-cloud-bigquery-storage==2.27.0 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' ipython===8.18.1; python_version >= '3.9' diff --git a/samples/notebooks/requirements-test.txt b/samples/notebooks/requirements-test.txt index 1640e1a95..827b02dcf 100644 --- a/samples/notebooks/requirements-test.txt +++ b/samples/notebooks/requirements-test.txt @@ -1,4 +1,4 @@ -google-cloud-testutils==1.4.0 +google-cloud-testutils==1.5.0 pytest===7.4.4; python_version == '3.7' -pytest==8.3.3; python_version >= '3.8' +pytest==8.3.4; python_version >= '3.8' mock==5.1.0 diff --git a/samples/notebooks/requirements.txt b/samples/notebooks/requirements.txt index 7463e1afc..77103a338 100644 --- a/samples/notebooks/requirements.txt +++ b/samples/notebooks/requirements.txt @@ -1,13 +1,14 @@ -bigquery-magics==0.4.0 -db-dtypes==1.3.0 -google-cloud-bigquery==3.26.0 -google-cloud-bigquery-storage==2.26.0 +bigquery-magics==0.5.0 +db-dtypes==1.3.1 +google-cloud-bigquery==3.27.0 +google-cloud-bigquery-storage==2.27.0 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' ipython===8.18.1; python_version >= '3.9' matplotlib===3.5.3; python_version == '3.7' matplotlib===3.7.4; python_version == '3.8' -matplotlib==3.9.2; python_version >= '3.9' +matplotlib===3.9.2; python_version == '3.9' +matplotlib==3.10.0; python_version >= '3.10' pandas===1.3.5; python_version == '3.7' pandas===2.0.3; python_version == '3.8' pandas==2.2.3; python_version >= '3.9' diff --git a/samples/snippets/requirements-test.txt b/samples/snippets/requirements-test.txt index bb0b2a6bf..077e465cf 100644 --- a/samples/snippets/requirements-test.txt +++ b/samples/snippets/requirements-test.txt @@ -1,5 +1,5 @@ # samples/snippets should be runnable with no "extras" -google-cloud-testutils==1.4.0 +google-cloud-testutils==1.5.0 pytest===7.4.4; python_version == '3.7' -pytest==8.3.3; python_version >= '3.8' +pytest==8.3.4; python_version >= '3.8' mock==5.1.0 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 65ce0be9f..307ebac24 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,2 +1,2 @@ # samples/snippets should be runnable with no "extras" -google-cloud-bigquery==3.26.0 +google-cloud-bigquery==3.27.0 diff --git a/setup.py b/setup.py index 617685543..2ad29ecbf 100644 --- a/setup.py +++ b/setup.py @@ -12,131 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -import io -import os +import setuptools # type: ignore -import setuptools - -# Package metadata. - -name = "google-cloud-bigquery" -description = "Google BigQuery API client library" - -# Should be one of: -# 'Development Status :: 3 - Alpha' -# 'Development Status :: 4 - Beta' -# 'Development Status :: 5 - Production/Stable' -release_status = "Development Status :: 5 - Production/Stable" -dependencies = [ - "google-api-core[grpc] >= 2.11.1, <3.0.0dev", - "google-auth >= 2.14.1, <3.0.0dev", - "google-cloud-core >= 2.4.1, <3.0.0dev", - "google-resumable-media >= 2.0.0, < 3.0dev", - "packaging >= 20.0.0", - "python-dateutil >= 2.7.3, <3.0dev", - "requests >= 2.21.0, < 3.0.0dev", -] -pyarrow_dependency = "pyarrow >= 3.0.0" -extras = { - # bqstorage had a period where it was a required dependency, and has been - # moved back to optional due to bloat. See - # https://github.com/googleapis/python-bigquery/issues/1196 for more background. - "bqstorage": [ - "google-cloud-bigquery-storage >= 2.6.0, <3.0.0dev", - # Due to an issue in pip's dependency resolver, the `grpc` extra is not - # installed, even though `google-cloud-bigquery-storage` specifies it - # as `google-api-core[grpc]`. We thus need to explicitly specify it here. - # See: https://github.com/googleapis/python-bigquery/issues/83 The - # grpc.Channel.close() method isn't added until 1.32.0. - # https://github.com/grpc/grpc/pull/15254 - "grpcio >= 1.47.0, < 2.0dev", - "grpcio >= 1.49.1, < 2.0dev; python_version>='3.11'", - pyarrow_dependency, - ], - "pandas": [ - "pandas>=1.1.0", - pyarrow_dependency, - "db-dtypes>=0.3.0,<2.0.0dev", - "importlib_metadata>=1.0.0; python_version<'3.8'", - ], - "ipywidgets": [ - "ipywidgets>=7.7.0", - "ipykernel>=6.0.0", - ], - "geopandas": ["geopandas>=0.9.0, <1.0dev", "Shapely>=1.8.4, <3.0.0dev"], - "ipython": [ - "bigquery-magics >= 0.1.0", - ], - "tqdm": ["tqdm >= 4.7.4, <5.0.0dev"], - "opentelemetry": [ - "opentelemetry-api >= 1.1.0", - "opentelemetry-sdk >= 1.1.0", - "opentelemetry-instrumentation >= 0.20b0", - ], - "bigquery_v2": [ - "proto-plus >= 1.22.3, <2.0.0dev", - "protobuf>=3.20.2,<6.0.0dev,!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5", # For the legacy proto-based types. - ], -} - -all_extras = [] - -for extra in extras: - all_extras.extend(extras[extra]) - -extras["all"] = all_extras - -# Setup boilerplate below this line. - -package_root = os.path.abspath(os.path.dirname(__file__)) - -readme_filename = os.path.join(package_root, "README.rst") -with io.open(readme_filename, encoding="utf-8") as readme_file: - readme = readme_file.read() - -version = {} -with open(os.path.join(package_root, "google/cloud/bigquery/version.py")) as fp: - exec(fp.read(), version) -version = version["__version__"] - -# Only include packages under the 'google' namespace. Do not include tests, -# benchmarks, etc. -packages = [ - package - for package in setuptools.find_namespace_packages() - if package.startswith("google") -] - -setuptools.setup( - name=name, - version=version, - description=description, - long_description=readme, - author="Google LLC", - author_email="googleapis-packages@google.com", - license="Apache 2.0", - url="https://github.com/googleapis/python-bigquery", - classifiers=[ - release_status, - "Intended Audience :: Developers", - "License :: OSI Approved :: Apache Software License", - "Programming Language :: Python", - "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.7", - "Programming Language :: Python :: 3.8", - "Programming Language :: Python :: 3.9", - "Programming Language :: Python :: 3.10", - "Programming Language :: Python :: 3.11", - "Programming Language :: Python :: 3.12", - "Operating System :: OS Independent", - "Topic :: Internet", - ], - platforms="Posix; MacOS X; Windows", - packages=packages, - install_requires=dependencies, - extras_require=extras, - python_requires=">=3.7", - include_package_data=True, - zip_safe=False, -) +setuptools.setup() diff --git a/tests/system/test_client.py b/tests/system/test_client.py index 95c679a14..c0dd83b12 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -25,6 +25,8 @@ import time import unittest import uuid +import random +import string from typing import Optional from google.api_core.exceptions import PreconditionFailed @@ -45,6 +47,8 @@ from google.cloud import storage from google.cloud.datacatalog_v1 import types as datacatalog_types from google.cloud.datacatalog_v1 import PolicyTagManagerClient +from google.cloud.resourcemanager_v3 import types as resourcemanager_types +from google.cloud.resourcemanager_v3 import TagKeysClient, TagValuesClient import psutil import pytest from test_utils.retry import RetryErrors @@ -156,9 +160,12 @@ def setUpModule(): class TestBigQuery(unittest.TestCase): def setUp(self): self.to_delete = [] + self.to_delete_tag_keys_values = [] def tearDown(self): policy_tag_client = PolicyTagManagerClient() + tag_keys_client = TagKeysClient() + tag_values_client = TagValuesClient() def _still_in_use(bad_request): return any( @@ -181,6 +188,18 @@ def _still_in_use(bad_request): else: doomed.delete() + # The TagKey cannot be deleted if it has any child TagValues. + for key_values in self.to_delete_tag_keys_values: + tag_key = key_values.pop() + + # Delete tag values first + [ + tag_values_client.delete_tag_value(name=tag_value.name).result() + for tag_value in key_values + ] + + tag_keys_client.delete_tag_key(name=tag_key.name).result() + def test_get_service_account_email(self): client = Config.CLIENT @@ -278,24 +297,74 @@ def test_create_dataset_with_default_rounding_mode(self): self.assertTrue(_dataset_exists(dataset)) self.assertEqual(dataset.default_rounding_mode, "ROUND_HALF_EVEN") + def _create_resource_tag_key_and_values(self, key, values): + tag_key_client = TagKeysClient() + tag_value_client = TagValuesClient() + + tag_key_parent = f"projects/{Config.CLIENT.project}" + new_tag_key = resourcemanager_types.TagKey( + short_name=key, parent=tag_key_parent + ) + tag_key = tag_key_client.create_tag_key(tag_key=new_tag_key).result() + self.to_delete_tag_keys_values.insert(0, [tag_key]) + + for value in values: + new_tag_value = resourcemanager_types.TagValue( + short_name=value, parent=tag_key.name + ) + tag_value = tag_value_client.create_tag_value( + tag_value=new_tag_value + ).result() + self.to_delete_tag_keys_values[0].insert(0, tag_value) + def test_update_dataset(self): dataset = self.temp_dataset(_make_dataset_id("update_dataset")) self.assertTrue(_dataset_exists(dataset)) self.assertIsNone(dataset.friendly_name) self.assertIsNone(dataset.description) self.assertEqual(dataset.labels, {}) + self.assertEqual(dataset.resource_tags, {}) self.assertIs(dataset.is_case_insensitive, False) + # This creates unique tag keys for each of test runnings for different Python versions + tag_postfix = "".join(random.choices(string.ascii_letters + string.digits, k=4)) + tag_1 = f"env_{tag_postfix}" + tag_2 = f"component_{tag_postfix}" + tag_3 = f"project_{tag_postfix}" + + # Tags need to be created before they can be used in a dataset. + self._create_resource_tag_key_and_values(tag_1, ["prod", "dev"]) + self._create_resource_tag_key_and_values(tag_2, ["batch"]) + self._create_resource_tag_key_and_values(tag_3, ["atlas"]) + dataset.friendly_name = "Friendly" dataset.description = "Description" dataset.labels = {"priority": "high", "color": "blue"} + dataset.resource_tags = { + f"{Config.CLIENT.project}/{tag_1}": "prod", + f"{Config.CLIENT.project}/{tag_2}": "batch", + } dataset.is_case_insensitive = True ds2 = Config.CLIENT.update_dataset( - dataset, ("friendly_name", "description", "labels", "is_case_insensitive") + dataset, + ( + "friendly_name", + "description", + "labels", + "resource_tags", + "is_case_insensitive", + ), ) self.assertEqual(ds2.friendly_name, "Friendly") self.assertEqual(ds2.description, "Description") self.assertEqual(ds2.labels, {"priority": "high", "color": "blue"}) + self.assertEqual( + ds2.resource_tags, + { + f"{Config.CLIENT.project}/{tag_1}": "prod", + f"{Config.CLIENT.project}/{tag_2}": "batch", + }, + ) self.assertIs(ds2.is_case_insensitive, True) ds2.labels = { @@ -303,8 +372,25 @@ def test_update_dataset(self): "shape": "circle", # add "priority": None, # delete } - ds3 = Config.CLIENT.update_dataset(ds2, ["labels"]) + ds2.resource_tags = { + f"{Config.CLIENT.project}/{tag_1}": "dev", # change + f"{Config.CLIENT.project}/{tag_3}": "atlas", # add + f"{Config.CLIENT.project}/{tag_2}": None, # delete + } + ds3 = Config.CLIENT.update_dataset(ds2, ["labels", "resource_tags"]) self.assertEqual(ds3.labels, {"color": "green", "shape": "circle"}) + self.assertEqual( + ds3.resource_tags, + { + f"{Config.CLIENT.project}/{tag_1}": "dev", + f"{Config.CLIENT.project}/{tag_3}": "atlas", + }, + ) + + # Remove all tags + ds3.resource_tags = None + ds4 = Config.CLIENT.update_dataset(ds3, ["resource_tags"]) + self.assertEqual(ds4.resource_tags, {}) # If we try to update using d2 again, it will fail because the # previous update changed the ETag. diff --git a/tests/unit/job/test_load_config.py b/tests/unit/job/test_load_config.py index becf3e959..3a681c476 100644 --- a/tests/unit/job/test_load_config.py +++ b/tests/unit/job/test_load_config.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import copy import warnings import pytest @@ -571,16 +572,34 @@ def test_schema_setter_valid_mappings_list(self): config._properties["load"]["schema"], {"fields": [full_name_repr, age_repr]} ) - def test_schema_setter_invalid_mappings_list(self): + def test_schema_setter_allows_unknown_properties(self): config = self._get_target_class()() schema = [ - {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, - {"name": "age", "typeoo": "INTEGER", "mode": "REQUIRED"}, + { + "name": "full_name", + "type": "STRING", + "mode": "REQUIRED", + "someNewProperty": "test-value", + }, + { + "name": "age", + # Note: This type should be included, too. Avoid client-side + # validation, as it could prevent backwards-compatible + # evolution of the server-side behavior. + "typo": "INTEGER", + "mode": "REQUIRED", + "anotherNewProperty": "another-test", + }, ] - with self.assertRaises(Exception): - config.schema = schema + # Make sure the setter doesn't mutate schema. + expected_schema = copy.deepcopy(schema) + + config.schema = schema + + # _properties should include all fields, including unknown ones. + assert config._properties["load"]["schema"]["fields"] == expected_schema def test_schema_setter_unsetting_schema(self): from google.cloud.bigquery.schema import SchemaField diff --git a/tests/unit/test__helpers.py b/tests/unit/test__helpers.py index 0a307498f..adba6327c 100644 --- a/tests/unit/test__helpers.py +++ b/tests/unit/test__helpers.py @@ -24,6 +24,7 @@ from unittest import mock import google.api_core +from google.cloud.bigquery._helpers import _isinstance_or_raise @pytest.mark.skipif( @@ -1661,3 +1662,34 @@ def test_w_env_var(self): host = self._call_fut() self.assertEqual(host, HOST) + + +class Test__isinstance_or_raise: + @pytest.mark.parametrize( + "value,dtype,none_allowed,expected", + [ + (None, str, True, None), + ("hello world.uri", str, True, "hello world.uri"), + ("hello world.uri", str, False, "hello world.uri"), + (None, (str, float), True, None), + ("hello world.uri", (str, float), True, "hello world.uri"), + ("hello world.uri", (str, float), False, "hello world.uri"), + ], + ) + def test__valid_isinstance_or_raise(self, value, dtype, none_allowed, expected): + result = _isinstance_or_raise(value, dtype, none_allowed=none_allowed) + assert result == expected + + @pytest.mark.parametrize( + "value,dtype,none_allowed,expected", + [ + (None, str, False, pytest.raises(TypeError)), + ({"key": "value"}, str, True, pytest.raises(TypeError)), + ({"key": "value"}, str, False, pytest.raises(TypeError)), + ({"key": "value"}, (str, float), True, pytest.raises(TypeError)), + ({"key": "value"}, (str, float), False, pytest.raises(TypeError)), + ], + ) + def test__invalid_isinstance_or_raise(self, value, dtype, none_allowed, expected): + with expected: + _isinstance_or_raise(value, dtype, none_allowed=none_allowed) diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index cd336b73f..14089b031 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -2028,6 +2028,7 @@ def test_update_dataset(self): LABELS = {"priority": "high"} ACCESS = [{"role": "OWNER", "userByEmail": "phred@example.com"}] EXP = 17 + RESOURCE_TAGS = {"123456789012/key": "value"} RESOURCE = { "datasetReference": {"projectId": self.PROJECT, "datasetId": self.DS_ID}, "etag": "etag", @@ -2037,6 +2038,7 @@ def test_update_dataset(self): "defaultTableExpirationMs": EXP, "labels": LABELS, "access": ACCESS, + "resourceTags": RESOURCE_TAGS, } creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) @@ -2048,12 +2050,14 @@ def test_update_dataset(self): ds.default_table_expiration_ms = EXP ds.labels = LABELS ds.access_entries = [AccessEntry("OWNER", "userByEmail", "phred@example.com")] + ds.resource_tags = RESOURCE_TAGS fields = [ "description", "friendly_name", "location", "labels", "access_entries", + "resource_tags", ] with mock.patch( @@ -2077,6 +2081,7 @@ def test_update_dataset(self): "location": LOCATION, "labels": LABELS, "access": ACCESS, + "resourceTags": RESOURCE_TAGS, }, path="/" + PATH, timeout=7.5, @@ -2086,6 +2091,7 @@ def test_update_dataset(self): self.assertEqual(ds2.location, ds.location) self.assertEqual(ds2.labels, ds.labels) self.assertEqual(ds2.access_entries, ds.access_entries) + self.assertEqual(ds2.resource_tags, ds.resource_tags) # ETag becomes If-Match header. ds._properties["etag"] = "etag" diff --git a/tests/unit/test_create_dataset.py b/tests/unit/test_create_dataset.py index a2491a812..bd7c6a8f8 100644 --- a/tests/unit/test_create_dataset.py +++ b/tests/unit/test_create_dataset.py @@ -65,6 +65,7 @@ def test_create_dataset_w_attrs(client, PROJECT, DS_ID): "tableId": "northern-hemisphere", } DEFAULT_ROUNDING_MODE = "ROUND_HALF_EVEN" + RESOURCE_TAGS = {"123456789012/foo": "bar"} RESOURCE = { "datasetReference": {"projectId": PROJECT, "datasetId": DS_ID}, "etag": "etag", @@ -76,6 +77,7 @@ def test_create_dataset_w_attrs(client, PROJECT, DS_ID): "labels": LABELS, "access": [{"role": "OWNER", "userByEmail": USER_EMAIL}, {"view": VIEW}], "defaultRoundingMode": DEFAULT_ROUNDING_MODE, + "resourceTags": RESOURCE_TAGS, } conn = client._connection = make_connection(RESOURCE) entries = [ @@ -91,6 +93,7 @@ def test_create_dataset_w_attrs(client, PROJECT, DS_ID): before.default_table_expiration_ms = 3600 before.location = LOCATION before.labels = LABELS + before.resource_tags = RESOURCE_TAGS before.default_rounding_mode = DEFAULT_ROUNDING_MODE after = client.create_dataset(before) assert after.dataset_id == DS_ID @@ -103,6 +106,7 @@ def test_create_dataset_w_attrs(client, PROJECT, DS_ID): assert after.default_table_expiration_ms == 3600 assert after.labels == LABELS assert after.default_rounding_mode == DEFAULT_ROUNDING_MODE + assert after.resource_tags == RESOURCE_TAGS conn.api_request.assert_called_once_with( method="POST", @@ -119,6 +123,7 @@ def test_create_dataset_w_attrs(client, PROJECT, DS_ID): {"view": VIEW, "role": None}, ], "labels": LABELS, + "resourceTags": RESOURCE_TAGS, }, timeout=DEFAULT_TIMEOUT, ) diff --git a/tests/unit/test_dataset.py b/tests/unit/test_dataset.py index c0164bc73..8ab8dffec 100644 --- a/tests/unit/test_dataset.py +++ b/tests/unit/test_dataset.py @@ -650,6 +650,16 @@ class TestDataset(unittest.TestCase): DS_ID = "dataset-id" DS_REF = DatasetReference(PROJECT, DS_ID) KMS_KEY_NAME = "projects/1/locations/us/keyRings/1/cryptoKeys/1" + DEFAULT_STORAGE_LOCATION_URI = "gs://test-bucket/test-path" + PARAMETERS = {"key": "value"} + API_REPR = { + "datasetReference": {"projectId": "project", "datasetId": "dataset-id"}, + "labels": {}, + "externalCatalogDatasetOptions": { + "defaultStorageLocationUri": DEFAULT_STORAGE_LOCATION_URI, + "parameters": PARAMETERS, + }, + } @staticmethod def _get_target_class(): @@ -894,6 +904,28 @@ def test_location_setter(self): dataset.location = "LOCATION" self.assertEqual(dataset.location, "LOCATION") + def test_resource_tags_update_in_place(self): + dataset = self._make_one(self.DS_REF) + tags = dataset.resource_tags + tags["123456789012/foo"] = "bar" # update in place + self.assertEqual(dataset.resource_tags, {"123456789012/foo": "bar"}) + + def test_resource_tags_setter(self): + dataset = self._make_one(self.DS_REF) + dataset.resource_tags = {"123456789012/foo": "bar"} + self.assertEqual(dataset.resource_tags, {"123456789012/foo": "bar"}) + + def test_resource_tags_setter_bad_value(self): + dataset = self._make_one(self.DS_REF) + with self.assertRaises(ValueError): + dataset.resource_tags = "invalid" + with self.assertRaises(ValueError): + dataset.resource_tags = 123 + + def test_resource_tags_getter_missing_value(self): + dataset = self._make_one(self.DS_REF) + self.assertEqual(dataset.resource_tags, {}) + def test_labels_update_in_place(self): dataset = self._make_one(self.DS_REF) del dataset._properties["labels"] # don't start w/ existing dict @@ -1045,6 +1077,80 @@ def test___repr__(self): expected = "Dataset(DatasetReference('project1', 'dataset1'))" self.assertEqual(repr(dataset), expected) + def test_external_catalog_dataset_options_setter(self): + # GIVEN the parameters DEFAULT_STORAGE_LOCATION_URI and PARAMETERS + # WHEN an ExternalCatalogDatasetOptions obj is created + # and added to a dataset. + # THEN the api representation of the dataset will match API_REPR + + from google.cloud.bigquery.external_config import ExternalCatalogDatasetOptions + + dataset = self._make_one(self.DS_REF) + + ecdo_obj = ExternalCatalogDatasetOptions( + default_storage_location_uri=self.DEFAULT_STORAGE_LOCATION_URI, + parameters=self.PARAMETERS, + ) + dataset.external_catalog_dataset_options = ecdo_obj + + result = dataset.to_api_repr() + expected = self.API_REPR + assert result == expected + + def test_external_catalog_dataset_options_getter_prop_exists(self): + # GIVEN default dataset PLUS an ExternalCatalogDatasetOptions + # THEN confirm that the api_repr of the ExternalCatalogDatasetsOptions + # matches the api_repr of the external_catalog_dataset_options attribute. + + from google.cloud.bigquery.external_config import ExternalCatalogDatasetOptions + + dataset = self._make_one(self.DS_REF) + ecdo_obj = ExternalCatalogDatasetOptions( + default_storage_location_uri=self.DEFAULT_STORAGE_LOCATION_URI, + parameters=self.PARAMETERS, + ) + dataset.external_catalog_dataset_options = ecdo_obj + result = dataset.external_catalog_dataset_options.to_api_repr() + expected = ecdo_obj.to_api_repr() + assert result == expected + + def test_external_catalog_dataset_options_getter_prop_is_none(self): + # GIVEN only a default dataset + # THEN confirm that external_catalog_dataset_options is None + + dataset = self._make_one(self.DS_REF) + expected = None + result = dataset.external_catalog_dataset_options + assert result == expected + + def test_external_catalog_dataset_options_from_api_repr(self): + # GIVEN default dataset including an ExternalCatalogDatasetOptions + # THEN confirm that the api_repr of the ExternalCatalogDatasetsOptions + # on a dataset object created via from_api_repr matches the api_repr + # of the "externalCatalogDatasetOptions" key. + + api_repr = self.API_REPR + klass = self._get_target_class() + dataset = klass.from_api_repr(api_repr) + + result = dataset.external_catalog_dataset_options.to_api_repr() + expected = api_repr["externalCatalogDatasetOptions"] + assert result == expected + + def test_external_catalog_dataset_options_to_api_repr(self): + # GIVEN a dataset api_repr including an ExternalCatalogDatasetOptions key + # THEN confirm that the api_repr of that key from a dataset object created + # via the to_api_repr() method matches the value of the key + # used to create the dataset object + + api_repr = self.API_REPR + klass = self._get_target_class() + dataset = klass.from_api_repr(api_repr) + + result = dataset.to_api_repr()["externalCatalogDatasetOptions"] + expected = api_repr["externalCatalogDatasetOptions"] + assert result == expected + class TestDatasetListItem(unittest.TestCase): @staticmethod diff --git a/tests/unit/test_external_config.py b/tests/unit/test_external_config.py index 9fd16e699..0c27d8e56 100644 --- a/tests/unit/test_external_config.py +++ b/tests/unit/test_external_config.py @@ -19,6 +19,8 @@ from google.cloud.bigquery import external_config from google.cloud.bigquery import schema +import pytest + class TestExternalConfig(unittest.TestCase): SOURCE_URIS = ["gs://foo", "gs://bar"] @@ -890,3 +892,90 @@ def _copy_and_update(d, u): d = copy.deepcopy(d) d.update(u) return d + + +class TestExternalCatalogDatasetOptions: + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.external_config import ExternalCatalogDatasetOptions + + return ExternalCatalogDatasetOptions + + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) + + DEFAULT_STORAGE_LOCATION_URI = "gs://test-bucket/test-path" + PARAMETERS = {"key": "value"} + + @pytest.mark.parametrize( + "default_storage_location_uri,parameters", + [ + (DEFAULT_STORAGE_LOCATION_URI, PARAMETERS), # set all params + (DEFAULT_STORAGE_LOCATION_URI, None), # set only one argument at a time + (None, PARAMETERS), + (None, None), # use default parameters + ], + ) + def test_ctor_initialization( + self, + default_storage_location_uri, + parameters, + ): + """Test ExternalCatalogDatasetOptions constructor with explicit values.""" + + instance = self._make_one( + default_storage_location_uri=default_storage_location_uri, + parameters=parameters, + ) + + assert instance.default_storage_location_uri == default_storage_location_uri + assert instance.parameters == parameters + + @pytest.mark.parametrize( + "default_storage_location_uri,parameters", + [ + (123, None), # does not accept integers + (None, 123), + ], + ) + def test_ctor_invalid_input(self, default_storage_location_uri, parameters): + """Test ExternalCatalogDatasetOptions constructor with invalid input.""" + + with pytest.raises(TypeError) as e: + self._make_one( + default_storage_location_uri=default_storage_location_uri, + parameters=parameters, + ) + + # Looking for the first word from the string "Pass as..." + assert "Pass " in str(e.value) + + def test_to_api_repr(self): + """Test ExternalCatalogDatasetOptions.to_api_repr method.""" + + instance = self._make_one( + default_storage_location_uri=self.DEFAULT_STORAGE_LOCATION_URI, + parameters=self.PARAMETERS, + ) + resource = instance.to_api_repr() + assert ( + resource["defaultStorageLocationUri"] == self.DEFAULT_STORAGE_LOCATION_URI + ) + assert resource["parameters"] == self.PARAMETERS + + def test_from_api_repr(self): + """GIVEN an api representation of an ExternalCatalogDatasetOptions object (i.e. api_repr) + WHEN converted into an ExternalCatalogDatasetOptions object using from_api_repr() + THEN it will have the representation in dict format as an ExternalCatalogDatasetOptions + object made directly (via _make_one()) and represented in dict format. + """ + + instance = self._make_one() + api_repr = { + "defaultStorageLocationUri": self.DEFAULT_STORAGE_LOCATION_URI, + "parameters": self.PARAMETERS, + } + result = instance.from_api_repr(api_repr) + + assert isinstance(result, external_config.ExternalCatalogDatasetOptions) + assert result._properties == api_repr diff --git a/tests/unit/test_schema.py b/tests/unit/test_schema.py index b17cd0281..efbc5d26f 100644 --- a/tests/unit/test_schema.py +++ b/tests/unit/test_schema.py @@ -12,14 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. -from google.cloud import bigquery -from google.cloud.bigquery.standard_sql import StandardSqlStructType -from google.cloud.bigquery.schema import PolicyTagList +import copy import unittest from unittest import mock import pytest +from google.cloud import bigquery +from google.cloud.bigquery.standard_sql import StandardSqlStructType +from google.cloud.bigquery import schema +from google.cloud.bigquery.schema import PolicyTagList + class TestSchemaField(unittest.TestCase): @staticmethod @@ -128,8 +131,6 @@ def test_constructor_range_str(self): self.assertEqual(field.range_element_type.element_type, "DATETIME") def test_to_api_repr(self): - from google.cloud.bigquery.schema import PolicyTagList - policy = PolicyTagList(names=("foo", "bar")) self.assertEqual( policy.to_api_repr(), @@ -821,13 +822,32 @@ def test_schema_fields_sequence(self): result = self._call_fut(schema) self.assertEqual(result, schema) - def test_invalid_mapping_representation(self): + def test_unknown_properties(self): schema = [ - {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, - {"name": "address", "typeooo": "STRING", "mode": "REQUIRED"}, + { + "name": "full_name", + "type": "STRING", + "mode": "REQUIRED", + "someNewProperty": "test-value", + }, + { + "name": "age", + # Note: This type should be included, too. Avoid client-side + # validation, as it could prevent backwards-compatible + # evolution of the server-side behavior. + "typo": "INTEGER", + "mode": "REQUIRED", + "anotherNewProperty": "another-test", + }, ] - with self.assertRaises(Exception): - self._call_fut(schema) + + # Make sure the setter doesn't mutate schema. + expected_schema = copy.deepcopy(schema) + + result = self._call_fut(schema) + + for api_repr, field in zip(expected_schema, result): + assert field.to_api_repr() == api_repr def test_valid_mapping_representation(self): from google.cloud.bigquery.schema import SchemaField @@ -865,8 +885,6 @@ def test_valid_mapping_representation(self): class TestPolicyTags(unittest.TestCase): @staticmethod def _get_target_class(): - from google.cloud.bigquery.schema import PolicyTagList - return PolicyTagList def _make_one(self, *args, **kw): @@ -1108,3 +1126,285 @@ def test_to_api_repr_parameterized(field, api): from google.cloud.bigquery.schema import SchemaField assert SchemaField(**field).to_api_repr() == api + + +class TestForeignTypeInfo: + """Tests for ForeignTypeInfo objects.""" + + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.schema import ForeignTypeInfo + + return ForeignTypeInfo + + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) + + @pytest.mark.parametrize( + "type_system,expected", + [ + (None, None), + ("TYPE_SYSTEM_UNSPECIFIED", "TYPE_SYSTEM_UNSPECIFIED"), + ("HIVE", "HIVE"), + ], + ) + def test_ctor_valid_input(self, type_system, expected): + result = self._make_one(type_system=type_system) + + assert result.type_system == expected + + def test_ctor_invalid_input(self): + with pytest.raises(TypeError) as e: + self._make_one(type_system=123) + + # Looking for the first word from the string "Pass as..." + assert "Pass " in str(e.value) + + @pytest.mark.parametrize( + "type_system,expected", + [ + ("TYPE_SYSTEM_UNSPECIFIED", {"typeSystem": "TYPE_SYSTEM_UNSPECIFIED"}), + ("HIVE", {"typeSystem": "HIVE"}), + (None, {"typeSystem": None}), + ], + ) + def test_to_api_repr(self, type_system, expected): + result = self._make_one(type_system=type_system) + + assert result.to_api_repr() == expected + + def test_from_api_repr(self): + """GIVEN an api representation of a ForeignTypeInfo object (i.e. api_repr) + WHEN converted into a ForeignTypeInfo object using from_api_repr() + THEN it will have the same representation in dict format as a ForeignTypeInfo + object made directly (via _make_one()) and represented in dict format. + """ + api_repr = { + "typeSystem": "TYPE_SYSTEM_UNSPECIFIED", + } + + expected = self._make_one( + type_system="TYPE_SYSTEM_UNSPECIFIED", + ) + + klass = self._get_target_class() + result = klass.from_api_repr(api_repr) + + # We convert both to dict format because these classes do not have a + # __eq__() method to facilitate direct equality comparisons. + assert result.to_api_repr() == expected.to_api_repr() + + +class TestSerDeInfo: + """Tests for the SerDeInfo class.""" + + @staticmethod + def _get_target_class(): + return schema.SerDeInfo + + def _make_one(self, *args, **kwargs): + return self._get_target_class()(*args, **kwargs) + + @pytest.mark.parametrize( + "serialization_library,name,parameters", + [ + ("testpath.to.LazySimpleSerDe", None, None), + ("testpath.to.LazySimpleSerDe", "serde_name", None), + ("testpath.to.LazySimpleSerDe", None, {"key": "value"}), + ("testpath.to.LazySimpleSerDe", "serde_name", {"key": "value"}), + ], + ) + def test_ctor_valid_input(self, serialization_library, name, parameters): + serde_info = self._make_one( + serialization_library=serialization_library, + name=name, + parameters=parameters, + ) + assert serde_info.serialization_library == serialization_library + assert serde_info.name == name + assert serde_info.parameters == parameters + + @pytest.mark.parametrize( + "serialization_library,name,parameters", + [ + (123, None, None), + ("testpath.to.LazySimpleSerDe", 123, None), + ("testpath.to.LazySimpleSerDe", None, ["test", "list"]), + ("testpath.to.LazySimpleSerDe", None, 123), + ], + ) + def test_ctor_invalid_input(self, serialization_library, name, parameters): + with pytest.raises(TypeError) as e: + self._make_one( + serialization_library=serialization_library, + name=name, + parameters=parameters, + ) + # Looking for the first word from the string "Pass as..." + assert "Pass " in str(e.value) + + def test_to_api_repr(self): + serde_info = self._make_one( + serialization_library="testpath.to.LazySimpleSerDe", + name="serde_name", + parameters={"key": "value"}, + ) + expected_repr = { + "serializationLibrary": "testpath.to.LazySimpleSerDe", + "name": "serde_name", + "parameters": {"key": "value"}, + } + assert serde_info.to_api_repr() == expected_repr + + def test_from_api_repr(self): + """GIVEN an api representation of a SerDeInfo object (i.e. api_repr) + WHEN converted into a SerDeInfo object using from_api_repr() + THEN it will have the same representation in dict format as a SerDeInfo + object made directly (via _make_one()) and represented in dict format. + """ + api_repr = { + "serializationLibrary": "testpath.to.LazySimpleSerDe", + "name": "serde_name", + "parameters": {"key": "value"}, + } + + expected = self._make_one( + serialization_library="testpath.to.LazySimpleSerDe", + name="serde_name", + parameters={"key": "value"}, + ) + + klass = self._get_target_class() + result = klass.from_api_repr(api_repr) + + # We convert both to dict format because these classes do not have a + # __eq__() method to facilitate direct equality comparisons. + assert result.to_api_repr() == expected.to_api_repr() + + +class TestStorageDescriptor: + """Tests for the StorageDescriptor class.""" + + @staticmethod + def _get_target_class(): + return schema.StorageDescriptor + + def _make_one(self, *args, **kwargs): + return self._get_target_class()(*args, **kwargs) + + serdeinfo_resource = { + "serialization_library": "testpath.to.LazySimpleSerDe", + "name": "serde_lib_name", + "parameters": {"key": "value"}, + } + + SERDEINFO = schema.SerDeInfo("PLACEHOLDER").from_api_repr(serdeinfo_resource) + + STORAGEDESCRIPTOR = { + "inputFormat": "testpath.to.OrcInputFormat", + "locationUri": "gs://test/path/", + "outputFormat": "testpath.to.OrcOutputFormat", + "serDeInfo": SERDEINFO.to_api_repr(), + } + + @pytest.mark.parametrize( + "input_format,location_uri,output_format,serde_info", + [ + (None, None, None, None), + ("testpath.to.OrcInputFormat", None, None, None), + (None, "gs://test/path/", None, None), + (None, None, "testpath.to.OrcOutputFormat", None), + (None, None, None, SERDEINFO), + ( + "testpath.to.OrcInputFormat", + "gs://test/path/", + "testpath.to.OrcOutputFormat", + SERDEINFO, # uses SERDEINFO class format + ), + ( + "testpath.to.OrcInputFormat", + "gs://test/path/", + "testpath.to.OrcOutputFormat", + serdeinfo_resource, # uses api resource format (dict) + ), + ], + ) + def test_ctor_valid_input( + self, input_format, location_uri, output_format, serde_info + ): + storage_descriptor = self._make_one( + input_format=input_format, + location_uri=location_uri, + output_format=output_format, + serde_info=serde_info, + ) + assert storage_descriptor.input_format == input_format + assert storage_descriptor.location_uri == location_uri + assert storage_descriptor.output_format == output_format + if isinstance(serde_info, schema.SerDeInfo): + assert ( + storage_descriptor.serde_info.to_api_repr() == serde_info.to_api_repr() + ) + elif isinstance(serde_info, dict): + assert storage_descriptor.serde_info.to_api_repr() == serde_info + else: + assert storage_descriptor.serde_info is None + + @pytest.mark.parametrize( + "input_format,location_uri,output_format,serde_info", + [ + (123, None, None, None), + (None, 123, None, None), + (None, None, 123, None), + (None, None, None, 123), + ], + ) + def test_ctor_invalid_input( + self, input_format, location_uri, output_format, serde_info + ): + with pytest.raises(TypeError) as e: + self._make_one( + input_format=input_format, + location_uri=location_uri, + output_format=output_format, + serde_info=serde_info, + ) + + # Looking for the first word from the string "Pass as..." + assert "Pass " in str(e.value) + + def test_to_api_repr(self): + storage_descriptor = self._make_one( + input_format="input_format", + location_uri="location_uri", + output_format="output_format", + serde_info=self.SERDEINFO, + ) + expected_repr = { + "inputFormat": "input_format", + "locationUri": "location_uri", + "outputFormat": "output_format", + "serDeInfo": self.SERDEINFO.to_api_repr(), + } + assert storage_descriptor.to_api_repr() == expected_repr + + def test_from_api_repr(self): + """GIVEN an api representation of a StorageDescriptor (i.e. STORAGEDESCRIPTOR) + WHEN converted into a StorageDescriptor using from_api_repr() and + displayed as a dict + THEN it will have the same representation a StorageDescriptor created + directly (via the _make_one() func) and displayed as a dict. + """ + + # generate via STORAGEDESCRIPTOR + resource = self.STORAGEDESCRIPTOR + result = self._get_target_class().from_api_repr(resource) + # result = klass.from_api_repr(resource) + + expected = self._make_one( + input_format="testpath.to.OrcInputFormat", + location_uri="gs://test/path/", + output_format="testpath.to.OrcOutputFormat", + serde_info=self.SERDEINFO, + ) + assert result.to_api_repr() == expected.to_api_repr() diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index d6febcfb1..e9d461e9d 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import copy import datetime import logging import re @@ -711,14 +712,35 @@ def test_schema_setter_valid_fields(self): table.schema = [full_name, age] self.assertEqual(table.schema, [full_name, age]) - def test_schema_setter_invalid_mapping_representation(self): + def test_schema_setter_allows_unknown_properties(self): dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) table = self._make_one(table_ref) - full_name = {"name": "full_name", "type": "STRING", "mode": "REQUIRED"} - invalid_field = {"name": "full_name", "typeooo": "STRING", "mode": "REQUIRED"} - with self.assertRaises(Exception): - table.schema = [full_name, invalid_field] + schema = [ + { + "name": "full_name", + "type": "STRING", + "mode": "REQUIRED", + "someNewProperty": "test-value", + }, + { + "name": "age", + # Note: This type should be included, too. Avoid client-side + # validation, as it could prevent backwards-compatible + # evolution of the server-side behavior. + "typo": "INTEGER", + "mode": "REQUIRED", + "anotherNewProperty": "another-test", + }, + ] + + # Make sure the setter doesn't mutate schema. + expected_schema = copy.deepcopy(schema) + + table.schema = schema + + # _properties should include all fields, including unknown ones. + assert table._properties["schema"]["fields"] == expected_schema def test_schema_setter_valid_mapping_representation(self): from google.cloud.bigquery.schema import SchemaField @@ -1050,6 +1072,16 @@ def test_mview_refresh_interval(self): table.mview_refresh_interval = None self.assertIsNone(table.mview_refresh_interval) + def test_mview_allow_non_incremental_definition(self): + table = self._make_one() + self.assertIsNone(table.mview_allow_non_incremental_definition) + table.mview_allow_non_incremental_definition = True + self.assertTrue(table.mview_allow_non_incremental_definition) + table.mview_allow_non_incremental_definition = False + self.assertFalse(table.mview_allow_non_incremental_definition) + table.mview_allow_non_incremental_definition = None + self.assertIsNone(table.mview_allow_non_incremental_definition) + def test_from_string(self): cls = self._get_target_class() got = cls.from_string("string-project.string_dataset.string_table") @@ -1465,6 +1497,49 @@ def test___str__(self): table1 = self._make_one(TableReference(dataset, "table1")) self.assertEqual(str(table1), "project1.dataset1.table1") + def test_max_staleness_getter(self): + """Test getting max_staleness property.""" + dataset = DatasetReference("test-project", "test_dataset") + table_ref = dataset.table("test_table") + table = self._make_one(table_ref) + # Initially None + self.assertIsNone(table.max_staleness) + # Set max_staleness using setter + table.max_staleness = "1h" + self.assertEqual(table.max_staleness, "1h") + + def test_max_staleness_setter(self): + """Test setting max_staleness property.""" + dataset = DatasetReference("test-project", "test_dataset") + table_ref = dataset.table("test_table") + table = self._make_one(table_ref) + # Set valid max_staleness + table.max_staleness = "30m" + self.assertEqual(table.max_staleness, "30m") + # Set to None + table.max_staleness = None + self.assertIsNone(table.max_staleness) + + def test_max_staleness_setter_invalid_type(self): + """Test setting max_staleness with an invalid type raises ValueError.""" + dataset = DatasetReference("test-project", "test_dataset") + table_ref = dataset.table("test_table") + table = self._make_one(table_ref) + # Try setting invalid type + with self.assertRaises(ValueError): + table.max_staleness = 123 # Not a string + + def test_max_staleness_to_api_repr(self): + """Test max_staleness is correctly represented in API representation.""" + dataset = DatasetReference("test-project", "test_dataset") + table_ref = dataset.table("test_table") + table = self._make_one(table_ref) + # Set max_staleness + table.max_staleness = "1h" + # Convert to API representation + resource = table.to_api_repr() + self.assertEqual(resource.get("maxStaleness"), "1h") + class Test_row_from_mapping(unittest.TestCase, _SchemaBase): PROJECT = "prahj-ekt" @@ -2011,7 +2086,7 @@ def _make_one( path=None, schema=None, table=None, - **kwargs + **kwargs, ): from google.cloud.bigquery.table import TableReference @@ -5822,3 +5897,73 @@ def test_table_reference_to_bqstorage_v1_stable(table_path): for klass in (mut.TableReference, mut.Table, mut.TableListItem): got = klass.from_string(table_path).to_bqstorage() assert got == expected + + +@pytest.mark.parametrize("preserve_order", [True, False]) +def test_to_arrow_iterable_w_bqstorage_max_stream_count(preserve_order): + pytest.importorskip("pandas") + pytest.importorskip("google.cloud.bigquery_storage") + from google.cloud.bigquery import schema + from google.cloud.bigquery import table as mut + from google.cloud import bigquery_storage + + bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) + session = bigquery_storage.types.ReadSession() + bqstorage_client.create_read_session.return_value = session + + row_iterator = mut.RowIterator( + _mock_client(), + api_request=None, + path=None, + schema=[ + schema.SchemaField("colA", "INTEGER"), + ], + table=mut.TableReference.from_string("proj.dset.tbl"), + ) + row_iterator._preserve_order = preserve_order + + max_stream_count = 132 + result_iterable = row_iterator.to_arrow_iterable( + bqstorage_client=bqstorage_client, max_stream_count=max_stream_count + ) + list(result_iterable) + bqstorage_client.create_read_session.assert_called_once_with( + parent=mock.ANY, + read_session=mock.ANY, + max_stream_count=max_stream_count if not preserve_order else 1, + ) + + +@pytest.mark.parametrize("preserve_order", [True, False]) +def test_to_dataframe_iterable_w_bqstorage_max_stream_count(preserve_order): + pytest.importorskip("pandas") + pytest.importorskip("google.cloud.bigquery_storage") + from google.cloud.bigquery import schema + from google.cloud.bigquery import table as mut + from google.cloud import bigquery_storage + + bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) + session = bigquery_storage.types.ReadSession() + bqstorage_client.create_read_session.return_value = session + + row_iterator = mut.RowIterator( + _mock_client(), + api_request=None, + path=None, + schema=[ + schema.SchemaField("colA", "INTEGER"), + ], + table=mut.TableReference.from_string("proj.dset.tbl"), + ) + row_iterator._preserve_order = preserve_order + + max_stream_count = 132 + result_iterable = row_iterator.to_dataframe_iterable( + bqstorage_client=bqstorage_client, max_stream_count=max_stream_count + ) + list(result_iterable) + bqstorage_client.create_read_session.assert_called_once_with( + parent=mock.ANY, + read_session=mock.ANY, + max_stream_count=max_stream_count if not preserve_order else 1, + )