diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index e2b39f946..cb06536da 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -1,3 +1,3 @@ docker: image: gcr.io/repo-automation-bots/owlbot-python:latest - digest: sha256:99d90d097e4a4710cc8658ee0b5b963f4426d0e424819787c3ac1405c9a26719 + digest: sha256:5ff7446edeaede81c3ed58b23a4e76a5403fba1350ce28478045657303b6479d diff --git a/.kokoro/docker/docs/Dockerfile b/.kokoro/docker/docs/Dockerfile index 412b0b56a..4e1b1fb8b 100644 --- a/.kokoro/docker/docs/Dockerfile +++ b/.kokoro/docker/docs/Dockerfile @@ -40,6 +40,7 @@ RUN apt-get update \ libssl-dev \ libsqlite3-dev \ portaudio19-dev \ + python3-distutils \ redis-server \ software-properties-common \ ssh \ @@ -59,40 +60,8 @@ RUN apt-get update \ && rm -rf /var/lib/apt/lists/* \ && rm -f /var/cache/apt/archives/*.deb - -COPY fetch_gpg_keys.sh /tmp -# Install the desired versions of Python. -RUN set -ex \ - && export GNUPGHOME="$(mktemp -d)" \ - && echo "disable-ipv6" >> "${GNUPGHOME}/dirmngr.conf" \ - && /tmp/fetch_gpg_keys.sh \ - && for PYTHON_VERSION in 3.7.8 3.8.5; do \ - wget --no-check-certificate -O python-${PYTHON_VERSION}.tar.xz "https://www.python.org/ftp/python/${PYTHON_VERSION%%[a-z]*}/Python-$PYTHON_VERSION.tar.xz" \ - && wget --no-check-certificate -O python-${PYTHON_VERSION}.tar.xz.asc "https://www.python.org/ftp/python/${PYTHON_VERSION%%[a-z]*}/Python-$PYTHON_VERSION.tar.xz.asc" \ - && gpg --batch --verify python-${PYTHON_VERSION}.tar.xz.asc python-${PYTHON_VERSION}.tar.xz \ - && rm -r python-${PYTHON_VERSION}.tar.xz.asc \ - && mkdir -p /usr/src/python-${PYTHON_VERSION} \ - && tar -xJC /usr/src/python-${PYTHON_VERSION} --strip-components=1 -f python-${PYTHON_VERSION}.tar.xz \ - && rm python-${PYTHON_VERSION}.tar.xz \ - && cd /usr/src/python-${PYTHON_VERSION} \ - && ./configure \ - --enable-shared \ - # This works only on Python 2.7 and throws a warning on every other - # version, but seems otherwise harmless. - --enable-unicode=ucs4 \ - --with-system-ffi \ - --without-ensurepip \ - && make -j$(nproc) \ - && make install \ - && ldconfig \ - ; done \ - && rm -rf "${GNUPGHOME}" \ - && rm -rf /usr/src/python* \ - && rm -rf ~/.cache/ - RUN wget -O /tmp/get-pip.py 'https://bootstrap.pypa.io/get-pip.py' \ - && python3.7 /tmp/get-pip.py \ && python3.8 /tmp/get-pip.py \ && rm /tmp/get-pip.py -CMD ["python3.7"] +CMD ["python3.8"] diff --git a/.kokoro/test-samples-impl.sh b/.kokoro/test-samples-impl.sh index cf5de74c1..311a8d54b 100755 --- a/.kokoro/test-samples-impl.sh +++ b/.kokoro/test-samples-impl.sh @@ -20,9 +20,9 @@ set -eo pipefail # Enables `**` to include files nested inside sub-folders shopt -s globstar -# Exit early if samples directory doesn't exist -if [ ! -d "./samples" ]; then - echo "No tests run. `./samples` not found" +# Exit early if samples don't exist +if ! find samples -name 'requirements.txt' | grep -q .; then + echo "No tests run. './samples/**/requirements.txt' not found" exit 0 fi diff --git a/CHANGELOG.md b/CHANGELOG.md index 7344542b4..2439d64b0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,32 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history + +## [2.22.0](https://www.github.com/googleapis/python-bigquery/compare/v2.21.0...v2.22.0) (2021-07-19) + + +### Features + +* add `LoadJobConfig.projection_fields` to select DATASTORE_BACKUP fields ([#736](https://www.github.com/googleapis/python-bigquery/issues/736)) ([c45a738](https://www.github.com/googleapis/python-bigquery/commit/c45a7380871af3dfbd3c45524cb606c60e1a01d1)) +* add standard sql table type, update scalar type enums ([#777](https://www.github.com/googleapis/python-bigquery/issues/777)) ([b8b5433](https://www.github.com/googleapis/python-bigquery/commit/b8b5433898ec881f8da1303614780a660d94733a)) +* add support for more detailed DML stats ([#758](https://www.github.com/googleapis/python-bigquery/issues/758)) ([36fe86f](https://www.github.com/googleapis/python-bigquery/commit/36fe86f41c1a8f46167284f752a6d6bbf886a04b)) +* add support for user defined Table View Functions ([#724](https://www.github.com/googleapis/python-bigquery/issues/724)) ([8c7b839](https://www.github.com/googleapis/python-bigquery/commit/8c7b839a6ac1491c1c3b6b0e8755f4b70ed72ee3)) + + +### Bug Fixes + +* avoid possible job already exists error ([#751](https://www.github.com/googleapis/python-bigquery/issues/751)) ([45b9308](https://www.github.com/googleapis/python-bigquery/commit/45b93089f5398740413104285cc8acfd5ebc9c08)) + + +### Dependencies + +* allow 2.x versions of `google-api-core`, `google-cloud-core`, `google-resumable-media` ([#770](https://www.github.com/googleapis/python-bigquery/issues/770)) ([87a09fa](https://www.github.com/googleapis/python-bigquery/commit/87a09fa3f2a9ab35728a1ac925f9d5f2e6616c65)) + + +### Documentation + +* add loading data from Firestore backup sample ([#737](https://www.github.com/googleapis/python-bigquery/issues/737)) ([22fd848](https://www.github.com/googleapis/python-bigquery/commit/22fd848cae4af1148040e1faa31dd15a4d674687)) + ## [2.21.0](https://www.github.com/googleapis/python-bigquery/compare/v2.20.0...v2.21.0) (2021-07-12) diff --git a/docs/reference.rst b/docs/reference.rst index cb2faa5ec..8a5bff9a4 100644 --- a/docs/reference.rst +++ b/docs/reference.rst @@ -58,6 +58,7 @@ Job-Related Types job.Compression job.CreateDisposition job.DestinationFormat + job.DmlStats job.Encoding job.OperationType job.QueryPlanEntry @@ -117,6 +118,7 @@ Routine routine.Routine routine.RoutineArgument routine.RoutineReference + routine.RoutineType Schema ====== diff --git a/google/cloud/bigquery/__init__.py b/google/cloud/bigquery/__init__.py index 65dde5d94..222aadcc9 100644 --- a/google/cloud/bigquery/__init__.py +++ b/google/cloud/bigquery/__init__.py @@ -56,6 +56,7 @@ from google.cloud.bigquery.job import CopyJobConfig from google.cloud.bigquery.job import CreateDisposition from google.cloud.bigquery.job import DestinationFormat +from google.cloud.bigquery.job import DmlStats from google.cloud.bigquery.job import Encoding from google.cloud.bigquery.job import ExtractJob from google.cloud.bigquery.job import ExtractJobConfig @@ -84,6 +85,7 @@ from google.cloud.bigquery.routine import Routine from google.cloud.bigquery.routine import RoutineArgument from google.cloud.bigquery.routine import RoutineReference +from google.cloud.bigquery.routine import RoutineType from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery.table import PartitionRange from google.cloud.bigquery.table import RangePartitioning @@ -142,6 +144,7 @@ "BigtableOptions", "BigtableColumnFamily", "BigtableColumn", + "DmlStats", "CSVOptions", "GoogleSheetsOptions", "ParquetOptions", @@ -160,6 +163,7 @@ "KeyResultStatementKind", "OperationType", "QueryPriority", + "RoutineType", "SchemaUpdateOption", "SourceFormat", "SqlTypeNames", diff --git a/google/cloud/bigquery/_helpers.py b/google/cloud/bigquery/_helpers.py index 77054542a..bf0f80e22 100644 --- a/google/cloud/bigquery/_helpers.py +++ b/google/cloud/bigquery/_helpers.py @@ -26,7 +26,7 @@ from google.cloud._helpers import _RFC3339_MICROS from google.cloud._helpers import _RFC3339_NO_FRACTION from google.cloud._helpers import _to_bytes -import pkg_resources +import packaging.version from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError @@ -41,31 +41,65 @@ re.VERBOSE, ) -_MIN_BQ_STORAGE_VERSION = pkg_resources.parse_version("2.0.0") +_MIN_BQ_STORAGE_VERSION = packaging.version.Version("2.0.0") +_BQ_STORAGE_OPTIONAL_READ_SESSION_VERSION = packaging.version.Version("2.6.0") -def _verify_bq_storage_version(): - """Verify that a recent enough version of BigQuery Storage extra is installed. +class BQStorageVersions: + """Version comparisons for google-cloud-bigqueyr-storage package.""" - The function assumes that google-cloud-bigquery-storage extra is installed, and - should thus be used in places where this assumption holds. + def __init__(self): + self._installed_version = None - Because `pip` can install an outdated version of this extra despite the constraints - in setup.py, the the calling code can use this helper to verify the version - compatibility at runtime. - """ - from google.cloud import bigquery_storage + @property + def installed_version(self) -> packaging.version.Version: + """Return the parsed version of google-cloud-bigquery-storage.""" + if self._installed_version is None: + from google.cloud import bigquery_storage - installed_version = pkg_resources.parse_version( - getattr(bigquery_storage, "__version__", "legacy") - ) + self._installed_version = packaging.version.parse( + # Use 0.0.0, since it is earlier than any released version. + # Legacy versions also have the same property, but + # creating a LegacyVersion has been deprecated. + # https://github.com/pypa/packaging/issues/321 + getattr(bigquery_storage, "__version__", "0.0.0") + ) - if installed_version < _MIN_BQ_STORAGE_VERSION: - msg = ( - "Dependency google-cloud-bigquery-storage is outdated, please upgrade " - f"it to version >= 2.0.0 (version found: {installed_version})." - ) - raise LegacyBigQueryStorageError(msg) + return self._installed_version + + @property + def is_read_session_optional(self) -> bool: + """True if read_session is optional to rows(). + + See: https://github.com/googleapis/python-bigquery-storage/pull/228 + """ + return self.installed_version >= _BQ_STORAGE_OPTIONAL_READ_SESSION_VERSION + + def verify_version(self): + """Verify that a recent enough version of BigQuery Storage extra is + installed. + + The function assumes that google-cloud-bigquery-storage extra is + installed, and should thus be used in places where this assumption + holds. + + Because `pip` can install an outdated version of this extra despite the + constraints in `setup.py`, the calling code can use this helper to + verify the version compatibility at runtime. + + Raises: + LegacyBigQueryStorageError: + If the google-cloud-bigquery-storage package is outdated. + """ + if self.installed_version < _MIN_BQ_STORAGE_VERSION: + msg = ( + "Dependency google-cloud-bigquery-storage is outdated, please upgrade " + f"it to version >= 2.0.0 (version found: {self.installed_version})." + ) + raise LegacyBigQueryStorageError(msg) + + +BQ_STORAGE_VERSIONS = BQStorageVersions() def _not_null(value, field): diff --git a/google/cloud/bigquery/_pandas_helpers.py b/google/cloud/bigquery/_pandas_helpers.py index 285c0e83c..2ff96da4d 100644 --- a/google/cloud/bigquery/_pandas_helpers.py +++ b/google/cloud/bigquery/_pandas_helpers.py @@ -41,6 +41,7 @@ # Having BQ Storage available implies that pyarrow >=1.0.0 is available, too. _ARROW_COMPRESSION_SUPPORT = True +from google.cloud.bigquery import _helpers from google.cloud.bigquery import schema @@ -590,7 +591,14 @@ def _bqstorage_page_to_dataframe(column_names, dtypes, page): def _download_table_bqstorage_stream( download_state, bqstorage_client, session, stream, worker_queue, page_to_item ): - rowstream = bqstorage_client.read_rows(stream.name).rows(session) + reader = bqstorage_client.read_rows(stream.name) + + # Avoid deprecation warnings for passing in unnecessary read session. + # https://github.com/googleapis/python-bigquery-storage/issues/229 + if _helpers.BQ_STORAGE_VERSIONS.is_read_session_optional: + rowstream = reader.rows() + else: + rowstream = reader.rows(session) for page in rowstream.pages: if download_state.done: diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index 2a02c7629..8572ba911 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -61,7 +61,7 @@ from google.cloud.bigquery._helpers import _get_sub_prop from google.cloud.bigquery._helpers import _record_field_to_json from google.cloud.bigquery._helpers import _str_or_none -from google.cloud.bigquery._helpers import _verify_bq_storage_version +from google.cloud.bigquery._helpers import BQ_STORAGE_VERSIONS from google.cloud.bigquery._helpers import _verify_job_config_type from google.cloud.bigquery._http import Connection from google.cloud.bigquery import _pandas_helpers @@ -508,7 +508,7 @@ def _ensure_bqstorage_client( return None try: - _verify_bq_storage_version() + BQ_STORAGE_VERSIONS.verify_version() except LegacyBigQueryStorageError as exc: warnings.warn(str(exc)) return None @@ -3190,6 +3190,7 @@ def query( If ``job_config`` is not an instance of :class:`~google.cloud.bigquery.job.QueryJobConfig` class. """ + job_id_given = job_id is not None job_id = _make_job_id(job_id, job_id_prefix) if project is None: @@ -3221,9 +3222,30 @@ def query( job_ref = job._JobReference(job_id, project=project, location=location) query_job = job.QueryJob(job_ref, query, client=self, job_config=job_config) - query_job._begin(retry=retry, timeout=timeout) - return query_job + try: + query_job._begin(retry=retry, timeout=timeout) + except core_exceptions.Conflict as create_exc: + # The thought is if someone is providing their own job IDs and they get + # their job ID generation wrong, this could end up returning results for + # the wrong query. We thus only try to recover if job ID was not given. + if job_id_given: + raise create_exc + + try: + query_job = self.get_job( + job_id, + project=project, + location=location, + retry=retry, + timeout=timeout, + ) + except core_exceptions.GoogleAPIError: # (includes RetryError) + raise create_exc + else: + return query_job + else: + return query_job def insert_rows( self, diff --git a/google/cloud/bigquery/enums.py b/google/cloud/bigquery/enums.py index ef35dffe0..0da01d665 100644 --- a/google/cloud/bigquery/enums.py +++ b/google/cloud/bigquery/enums.py @@ -191,9 +191,11 @@ class KeyResultStatementKind: "DATE", "TIME", "DATETIME", + "INTERVAL", "GEOGRAPHY", "NUMERIC", "BIGNUMERIC", + "JSON", ) ) diff --git a/google/cloud/bigquery/job/__init__.py b/google/cloud/bigquery/job/__init__.py index 6bdfa09be..4c16d0e20 100644 --- a/google/cloud/bigquery/job/__init__.py +++ b/google/cloud/bigquery/job/__init__.py @@ -31,6 +31,7 @@ from google.cloud.bigquery.job.load import LoadJob from google.cloud.bigquery.job.load import LoadJobConfig from google.cloud.bigquery.job.query import _contains_order_by +from google.cloud.bigquery.job.query import DmlStats from google.cloud.bigquery.job.query import QueryJob from google.cloud.bigquery.job.query import QueryJobConfig from google.cloud.bigquery.job.query import QueryPlanEntry @@ -66,6 +67,7 @@ "LoadJob", "LoadJobConfig", "_contains_order_by", + "DmlStats", "QueryJob", "QueryJobConfig", "QueryPlanEntry", diff --git a/google/cloud/bigquery/job/load.py b/google/cloud/bigquery/job/load.py index bdee5cb6b..f1b045412 100644 --- a/google/cloud/bigquery/job/load.py +++ b/google/cloud/bigquery/job/load.py @@ -14,7 +14,7 @@ """Classes for load jobs.""" -from typing import FrozenSet, Iterable, Optional +from typing import FrozenSet, List, Iterable, Optional from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration from google.cloud.bigquery.external_config import HivePartitioningOptions @@ -25,7 +25,6 @@ from google.cloud.bigquery.table import RangePartitioning from google.cloud.bigquery.table import TableReference from google.cloud.bigquery.table import TimePartitioning - from google.cloud.bigquery.job.base import _AsyncJob from google.cloud.bigquery.job.base import _JobConfig from google.cloud.bigquery.job.base import _JobReference @@ -300,6 +299,27 @@ def null_marker(self): def null_marker(self, value): self._set_sub_prop("nullMarker", value) + @property + def projection_fields(self) -> Optional[List[str]]: + """Optional[List[str]]: If + :attr:`google.cloud.bigquery.job.LoadJobConfig.source_format` is set to + "DATASTORE_BACKUP", indicates which entity properties to load into + BigQuery from a Cloud Datastore backup. + + Property names are case sensitive and must be top-level properties. If + no properties are specified, BigQuery loads all properties. If any + named property isn't found in the Cloud Datastore backup, an invalid + error is returned in the job result. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.projection_fields + """ + return self._get_sub_prop("projectionFields") + + @projection_fields.setter + def projection_fields(self, value: Optional[List[str]]): + self._set_sub_prop("projectionFields", value) + @property def quote_character(self): """Optional[str]: Character used to quote data sections (CSV only). diff --git a/google/cloud/bigquery/job/query.py b/google/cloud/bigquery/job/query.py index 6ff9f2647..2cb7ee28e 100644 --- a/google/cloud/bigquery/job/query.py +++ b/google/cloud/bigquery/job/query.py @@ -114,6 +114,35 @@ def _to_api_repr_table_defs(value): return {k: ExternalConfig.to_api_repr(v) for k, v in value.items()} +class DmlStats(typing.NamedTuple): + """Detailed statistics for DML statements. + + https://cloud.google.com/bigquery/docs/reference/rest/v2/DmlStats + """ + + inserted_row_count: int = 0 + """Number of inserted rows. Populated by DML INSERT and MERGE statements.""" + + deleted_row_count: int = 0 + """Number of deleted rows. populated by DML DELETE, MERGE and TRUNCATE statements. + """ + + updated_row_count: int = 0 + """Number of updated rows. Populated by DML UPDATE and MERGE statements.""" + + @classmethod + def from_api_repr(cls, stats: Dict[str, str]) -> "DmlStats": + # NOTE: The field order here must match the order of fields set at the + # class level. + api_fields = ("insertedRowCount", "deletedRowCount", "updatedRowCount") + + args = ( + int(stats.get(api_field, default_val)) + for api_field, default_val in zip(api_fields, cls.__new__.__defaults__) + ) + return cls(*args) + + class ScriptOptions: """Options controlling the execution of scripts. @@ -1079,6 +1108,14 @@ def estimated_bytes_processed(self): result = int(result) return result + @property + def dml_stats(self) -> Optional[DmlStats]: + stats = self._job_statistics().get("dmlStats") + if stats is None: + return None + else: + return DmlStats.from_api_repr(stats) + def _blocking_poll(self, timeout=None, **kwargs): self._done_timeout = timeout self._transport_timeout = timeout @@ -1349,12 +1386,12 @@ def to_arrow( This argument does nothing if ``bqstorage_client`` is supplied. - ..versionadded:: 1.24.0 + .. versionadded:: 1.24.0 max_results (Optional[int]): Maximum number of rows to include in the result. No limit by default. - ..versionadded:: 2.21.0 + .. versionadded:: 2.21.0 Returns: pyarrow.Table @@ -1366,7 +1403,7 @@ def to_arrow( ValueError: If the :mod:`pyarrow` library cannot be imported. - ..versionadded:: 1.17.0 + .. versionadded:: 1.17.0 """ query_result = wait_for_query(self, progress_bar_type, max_results=max_results) return query_result.to_arrow( @@ -1415,7 +1452,7 @@ def to_dataframe( :func:`~google.cloud.bigquery.table.RowIterator.to_dataframe` for details. - ..versionadded:: 1.11.0 + .. versionadded:: 1.11.0 create_bqstorage_client (Optional[bool]): If ``True`` (default), create a BigQuery Storage API client using the default API settings. The BigQuery Storage API @@ -1424,18 +1461,18 @@ def to_dataframe( This argument does nothing if ``bqstorage_client`` is supplied. - ..versionadded:: 1.24.0 + .. versionadded:: 1.24.0 date_as_object (Optional[bool]): If ``True`` (default), cast dates to objects. If ``False``, convert to datetime64[ns] dtype. - ..versionadded:: 1.26.0 + .. versionadded:: 1.26.0 max_results (Optional[int]): Maximum number of rows to include in the result. No limit by default. - ..versionadded:: 2.21.0 + .. versionadded:: 2.21.0 Returns: A :class:`~pandas.DataFrame` populated with row data and column diff --git a/google/cloud/bigquery/routine/__init__.py b/google/cloud/bigquery/routine/__init__.py index d1c79b05e..7353073c8 100644 --- a/google/cloud/bigquery/routine/__init__.py +++ b/google/cloud/bigquery/routine/__init__.py @@ -19,6 +19,7 @@ from google.cloud.bigquery.routine.routine import Routine from google.cloud.bigquery.routine.routine import RoutineArgument from google.cloud.bigquery.routine.routine import RoutineReference +from google.cloud.bigquery.routine.routine import RoutineType __all__ = ( @@ -26,4 +27,5 @@ "Routine", "RoutineArgument", "RoutineReference", + "RoutineType", ) diff --git a/google/cloud/bigquery/routine/routine.py b/google/cloud/bigquery/routine/routine.py index bbc0a7693..a776212c3 100644 --- a/google/cloud/bigquery/routine/routine.py +++ b/google/cloud/bigquery/routine/routine.py @@ -21,6 +21,21 @@ import google.cloud._helpers from google.cloud.bigquery import _helpers import google.cloud.bigquery_v2.types +from google.cloud.bigquery_v2.types import StandardSqlTableType + + +class RoutineType: + """The fine-grained type of the routine. + + https://cloud.google.com/bigquery/docs/reference/rest/v2/routines#routinetype + + .. versionadded:: 2.22.0 + """ + + ROUTINE_TYPE_UNSPECIFIED = "ROUTINE_TYPE_UNSPECIFIED" + SCALAR_FUNCTION = "SCALAR_FUNCTION" + PROCEDURE = "PROCEDURE" + TABLE_VALUED_FUNCTION = "TABLE_VALUED_FUNCTION" class Routine(object): @@ -48,6 +63,7 @@ class Routine(object): "modified": "lastModifiedTime", "reference": "routineReference", "return_type": "returnType", + "return_table_type": "returnTableType", "type_": "routineType", "description": "description", "determinism_level": "determinismLevel", @@ -204,6 +220,35 @@ def return_type(self, value): resource = None self._properties[self._PROPERTY_TO_API_FIELD["return_type"]] = resource + @property + def return_table_type(self) -> StandardSqlTableType: + """The return type of a Table Valued Function (TVF) routine. + + .. versionadded:: 2.22.0 + """ + resource = self._properties.get( + self._PROPERTY_TO_API_FIELD["return_table_type"] + ) + if not resource: + return resource + + output = google.cloud.bigquery_v2.types.StandardSqlTableType() + raw_protobuf = json_format.ParseDict( + resource, output._pb, ignore_unknown_fields=True + ) + return type(output).wrap(raw_protobuf) + + @return_table_type.setter + def return_table_type(self, value): + if not value: + resource = None + else: + resource = { + "columns": [json_format.MessageToDict(col._pb) for col in value.columns] + } + + self._properties[self._PROPERTY_TO_API_FIELD["return_table_type"]] = resource + @property def imported_libraries(self): """List[str]: The path of the imported JavaScript libraries. diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index 765110ae6..18d969a3f 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -1565,7 +1565,7 @@ def _validate_bqstorage(self, bqstorage_client, create_bqstorage_client): return False try: - _helpers._verify_bq_storage_version() + _helpers.BQ_STORAGE_VERSIONS.verify_version() except LegacyBigQueryStorageError as exc: warnings.warn(str(exc)) return False @@ -1684,7 +1684,7 @@ def to_arrow( This argument does nothing if ``bqstorage_client`` is supplied. - ..versionadded:: 1.24.0 + .. versionadded:: 1.24.0 Returns: pyarrow.Table @@ -1695,7 +1695,7 @@ def to_arrow( Raises: ValueError: If the :mod:`pyarrow` library cannot be imported. - ..versionadded:: 1.17.0 + .. versionadded:: 1.17.0 """ if pyarrow is None: raise ValueError(_NO_PYARROW_ERROR) @@ -1775,7 +1775,7 @@ def to_dataframe_iterable( created by the server. If ``max_queue_size`` is :data:`None`, the queue size is infinite. - ..versionadded:: 2.14.0 + .. versionadded:: 2.14.0 Returns: pandas.DataFrame: @@ -1861,7 +1861,7 @@ def to_dataframe( Use the :func:`tqdm.tqdm_gui` function to display a progress bar as a graphical dialog box. - ..versionadded:: 1.11.0 + .. versionadded:: 1.11.0 create_bqstorage_client (Optional[bool]): If ``True`` (default), create a BigQuery Storage API client using the default API settings. The BigQuery Storage API @@ -1870,13 +1870,13 @@ def to_dataframe( This argument does nothing if ``bqstorage_client`` is supplied. - ..versionadded:: 1.24.0 + .. versionadded:: 1.24.0 date_as_object (Optional[bool]): If ``True`` (default), cast dates to objects. If ``False``, convert to datetime64[ns] dtype. - ..versionadded:: 1.26.0 + .. versionadded:: 1.26.0 Returns: pandas.DataFrame: @@ -2010,7 +2010,7 @@ def to_dataframe_iterable( ) -> Iterator["pandas.DataFrame"]: """Create an iterable of pandas DataFrames, to process the table as a stream. - ..versionadded:: 2.21.0 + .. versionadded:: 2.21.0 Args: bqstorage_client: diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index 563b0e160..2db0ca518 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.21.0" +__version__ = "2.22.0" diff --git a/google/cloud/bigquery_v2/__init__.py b/google/cloud/bigquery_v2/__init__.py index 476bd5747..f9957efa9 100644 --- a/google/cloud/bigquery_v2/__init__.py +++ b/google/cloud/bigquery_v2/__init__.py @@ -26,6 +26,7 @@ from .types.standard_sql import StandardSqlDataType from .types.standard_sql import StandardSqlField from .types.standard_sql import StandardSqlStructType +from .types.standard_sql import StandardSqlTableType from .types.table_reference import TableReference __all__ = ( @@ -40,5 +41,6 @@ "StandardSqlDataType", "StandardSqlField", "StandardSqlStructType", + "StandardSqlTableType", "TableReference", ) diff --git a/google/cloud/bigquery_v2/types/__init__.py b/google/cloud/bigquery_v2/types/__init__.py index 9c850dca1..83bbb3a54 100644 --- a/google/cloud/bigquery_v2/types/__init__.py +++ b/google/cloud/bigquery_v2/types/__init__.py @@ -27,6 +27,7 @@ StandardSqlDataType, StandardSqlField, StandardSqlStructType, + StandardSqlTableType, ) from .table_reference import TableReference @@ -42,5 +43,6 @@ "StandardSqlDataType", "StandardSqlField", "StandardSqlStructType", + "StandardSqlTableType", "TableReference", ) diff --git a/google/cloud/bigquery_v2/types/standard_sql.py b/google/cloud/bigquery_v2/types/standard_sql.py index b2191a417..7a845fc48 100644 --- a/google/cloud/bigquery_v2/types/standard_sql.py +++ b/google/cloud/bigquery_v2/types/standard_sql.py @@ -18,7 +18,12 @@ __protobuf__ = proto.module( package="google.cloud.bigquery.v2", - manifest={"StandardSqlDataType", "StandardSqlField", "StandardSqlStructType",}, + manifest={ + "StandardSqlDataType", + "StandardSqlField", + "StandardSqlStructType", + "StandardSqlTableType", + }, ) @@ -54,9 +59,11 @@ class TypeKind(proto.Enum): DATE = 10 TIME = 20 DATETIME = 21 + INTERVAL = 26 GEOGRAPHY = 22 NUMERIC = 23 BIGNUMERIC = 24 + JSON = 25 ARRAY = 16 STRUCT = 17 @@ -97,4 +104,14 @@ class StandardSqlStructType(proto.Message): fields = proto.RepeatedField(proto.MESSAGE, number=1, message="StandardSqlField",) +class StandardSqlTableType(proto.Message): + r"""A table type + Attributes: + columns (Sequence[google.cloud.bigquery_v2.types.StandardSqlField]): + The columns in this table type + """ + + columns = proto.RepeatedField(proto.MESSAGE, number=1, message="StandardSqlField",) + + __all__ = tuple(sorted(__protobuf__.manifest)) diff --git a/owlbot.py b/owlbot.py index 476c5ee5d..09845480a 100644 --- a/owlbot.py +++ b/owlbot.py @@ -70,6 +70,7 @@ library, excludes=[ "*.tar.gz", + ".coveragerc", "docs/index.rst", f"docs/bigquery_{library.name}/*_service.rst", f"docs/bigquery_{library.name}/services.rst", diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 30a59c15a..c7aa209ad 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,4 +1,4 @@ geojson==2.5.0 -google-cloud-bigquery==2.20.0 +google-cloud-bigquery==2.21.0 google-cloud-bigquery-storage==2.6.0 Shapely==1.7.1 diff --git a/samples/snippets/conftest.py b/samples/snippets/conftest.py index cb11eb68f..000e5f85c 100644 --- a/samples/snippets/conftest.py +++ b/samples/snippets/conftest.py @@ -12,38 +12,18 @@ # See the License for the specific language governing permissions and # limitations under the License. -import datetime -import random - from google.cloud import bigquery import pytest +import test_utils.prefixer -RESOURCE_PREFIX = "python_bigquery_samples_snippets" -RESOURCE_DATE_FORMAT = "%Y%m%d_%H%M%S" -RESOURCE_DATE_LENGTH = 4 + 2 + 2 + 1 + 2 + 2 + 2 - - -def resource_prefix() -> str: - timestamp = datetime.datetime.utcnow().strftime(RESOURCE_DATE_FORMAT) - random_string = hex(random.randrange(1000000))[2:] - return f"{RESOURCE_PREFIX}_{timestamp}_{random_string}" - - -def resource_name_to_date(resource_name: str): - start_date = len(RESOURCE_PREFIX) + 1 - date_string = resource_name[start_date : start_date + RESOURCE_DATE_LENGTH] - return datetime.datetime.strptime(date_string, RESOURCE_DATE_FORMAT) +prefixer = test_utils.prefixer.Prefixer("python-bigquery", "samples/snippets") @pytest.fixture(scope="session", autouse=True) def cleanup_datasets(bigquery_client: bigquery.Client): - yesterday = datetime.datetime.utcnow() - datetime.timedelta(days=1) for dataset in bigquery_client.list_datasets(): - if ( - dataset.dataset_id.startswith(RESOURCE_PREFIX) - and resource_name_to_date(dataset.dataset_id) < yesterday - ): + if prefixer.should_cleanup(dataset.dataset_id): bigquery_client.delete_dataset( dataset, delete_contents=True, not_found_ok=True ) @@ -62,7 +42,7 @@ def project_id(bigquery_client): @pytest.fixture(scope="session") def dataset_id(bigquery_client: bigquery.Client, project_id: str): - dataset_id = resource_prefix() + dataset_id = prefixer.create_prefix() full_dataset_id = f"{project_id}.{dataset_id}" dataset = bigquery.Dataset(full_dataset_id) bigquery_client.create_dataset(dataset) @@ -70,6 +50,17 @@ def dataset_id(bigquery_client: bigquery.Client, project_id: str): bigquery_client.delete_dataset(dataset, delete_contents=True, not_found_ok=True) +@pytest.fixture +def random_table_id(bigquery_client: bigquery.Client, project_id: str, dataset_id: str): + """Create a new table ID each time, so random_table_id can be used as + target for load jobs. + """ + random_table_id = prefixer.create_prefix() + full_table_id = f"{project_id}.{dataset_id}.{random_table_id}" + yield full_table_id + bigquery_client.delete_table(full_table_id, not_found_ok=True) + + @pytest.fixture def bigquery_client_patch(monkeypatch, bigquery_client): monkeypatch.setattr(bigquery, "Client", lambda: bigquery_client) diff --git a/samples/snippets/load_table_uri_firestore.py b/samples/snippets/load_table_uri_firestore.py new file mode 100644 index 000000000..bf9d01349 --- /dev/null +++ b/samples/snippets/load_table_uri_firestore.py @@ -0,0 +1,55 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def load_table_uri_firestore(table_id): + orig_table_id = table_id + # [START bigquery_load_table_gcs_firestore] + # TODO(developer): Set table_id to the ID of the table to create. + table_id = "your-project.your_dataset.your_table_name" + + # TODO(developer): Set uri to the path of the kind export metadata + uri = ( + "gs://cloud-samples-data/bigquery/us-states" + "/2021-07-02T16:04:48_70344/all_namespaces/kind_us-states" + "/all_namespaces_kind_us-states.export_metadata" + ) + + # TODO(developer): Set projection_fields to a list of document properties + # to import. Leave unset or set to `None` for all fields. + projection_fields = ["name", "post_abbr"] + + # [END bigquery_load_table_gcs_firestore] + table_id = orig_table_id + + # [START bigquery_load_table_gcs_firestore] + from google.cloud import bigquery + + # Construct a BigQuery client object. + client = bigquery.Client() + + job_config = bigquery.LoadJobConfig( + source_format=bigquery.SourceFormat.DATASTORE_BACKUP, + projection_fields=projection_fields, + ) + + load_job = client.load_table_from_uri( + uri, table_id, job_config=job_config + ) # Make an API request. + + load_job.result() # Waits for the job to complete. + + destination_table = client.get_table(table_id) + print("Loaded {} rows.".format(destination_table.num_rows)) + # [END bigquery_load_table_gcs_firestore] diff --git a/samples/snippets/load_table_uri_firestore_test.py b/samples/snippets/load_table_uri_firestore_test.py new file mode 100644 index 000000000..ffa02cdf9 --- /dev/null +++ b/samples/snippets/load_table_uri_firestore_test.py @@ -0,0 +1,21 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import load_table_uri_firestore + + +def test_load_table_uri_firestore(capsys, random_table_id): + load_table_uri_firestore.load_table_uri_firestore(random_table_id) + out, _ = capsys.readouterr() + assert "Loaded 50 rows." in out diff --git a/samples/snippets/requirements-test.txt b/samples/snippets/requirements-test.txt index b0cf76724..9e9d4e40f 100644 --- a/samples/snippets/requirements-test.txt +++ b/samples/snippets/requirements-test.txt @@ -1,2 +1,3 @@ +google-cloud-testutils==0.3.0 pytest==6.2.4 mock==4.0.3 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index ce02ac7ed..b62c84c33 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ -google-cloud-bigquery==2.20.0 +google-cloud-bigquery==2.21.0 google-cloud-bigquery-storage==2.6.0 google-auth-oauthlib==0.4.4 grpcio==1.38.1 diff --git a/samples/snippets/test_update_with_dml.py b/samples/snippets/test_update_with_dml.py index 3cca7a649..912fd76e2 100644 --- a/samples/snippets/test_update_with_dml.py +++ b/samples/snippets/test_update_with_dml.py @@ -15,13 +15,13 @@ from google.cloud import bigquery import pytest -from conftest import resource_prefix +from conftest import prefixer import update_with_dml @pytest.fixture def table_id(bigquery_client: bigquery.Client, project_id: str, dataset_id: str): - table_id = f"{resource_prefix()}_update_with_dml" + table_id = f"{prefixer.create_prefix()}_update_with_dml" yield table_id full_table_id = f"{project_id}.{dataset_id}.{table_id}" bigquery_client.delete_table(full_table_id, not_found_ok=True) diff --git a/setup.py b/setup.py index fcb1dd966..71958ccf9 100644 --- a/setup.py +++ b/setup.py @@ -30,10 +30,10 @@ release_status = "Development Status :: 5 - Production/Stable" dependencies = [ "grpcio >= 1.38.1, < 2.0dev", # https://github.com/googleapis/python-bigquery/issues/695 - "google-api-core[grpc] >= 1.29.0, < 2.0.0dev", + "google-api-core[grpc] >= 1.29.0, < 3.0.0dev", "proto-plus >= 1.10.0", - "google-cloud-core >= 1.4.1, < 2.0dev", - "google-resumable-media >= 0.6.0, < 2.0dev", + "google-cloud-core >= 1.4.1, < 3.0dev", + "google-resumable-media >= 0.6.0, < 3.0dev", "packaging >= 14.3", "protobuf >= 3.12.0", "requests >= 2.18.0, < 3.0.0dev", diff --git a/tests/system/test_client.py b/tests/system/test_client.py index 7234333a2..ceb62b8cd 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -1521,6 +1521,62 @@ def test_query_statistics(self): self.assertGreater(stages_with_inputs, 0) self.assertGreater(len(plan), stages_with_inputs) + def test_dml_statistics(self): + table_schema = ( + bigquery.SchemaField("foo", "STRING"), + bigquery.SchemaField("bar", "INTEGER"), + ) + + dataset_id = _make_dataset_id("bq_system_test") + self.temp_dataset(dataset_id) + table_id = "{}.{}.test_dml_statistics".format(Config.CLIENT.project, dataset_id) + + # Create the table before loading so that the column order is deterministic. + table = helpers.retry_403(Config.CLIENT.create_table)( + Table(table_id, schema=table_schema) + ) + self.to_delete.insert(0, table) + + # Insert a few rows and check the stats. + sql = f""" + INSERT INTO `{table_id}` + VALUES ("one", 1), ("two", 2), ("three", 3), ("four", 4); + """ + query_job = Config.CLIENT.query(sql) + query_job.result() + + assert query_job.dml_stats is not None + assert query_job.dml_stats.inserted_row_count == 4 + assert query_job.dml_stats.updated_row_count == 0 + assert query_job.dml_stats.deleted_row_count == 0 + + # Update some of the rows. + sql = f""" + UPDATE `{table_id}` + SET bar = bar + 1 + WHERE bar > 2; + """ + query_job = Config.CLIENT.query(sql) + query_job.result() + + assert query_job.dml_stats is not None + assert query_job.dml_stats.inserted_row_count == 0 + assert query_job.dml_stats.updated_row_count == 2 + assert query_job.dml_stats.deleted_row_count == 0 + + # Now delete a few rows and check the stats. + sql = f""" + DELETE FROM `{table_id}` + WHERE foo != "two"; + """ + query_job = Config.CLIENT.query(sql) + query_job.result() + + assert query_job.dml_stats is not None + assert query_job.dml_stats.inserted_row_count == 0 + assert query_job.dml_stats.updated_row_count == 0 + assert query_job.dml_stats.deleted_row_count == 3 + def test_dbapi_w_standard_sql_types(self): for sql, expected in helpers.STANDARD_SQL_EXAMPLES: Config.CURSOR.execute(sql) @@ -2172,6 +2228,85 @@ def test_create_routine(self): assert len(rows) == 1 assert rows[0].max_value == 100.0 + def test_create_tvf_routine(self): + from google.cloud.bigquery import Routine, RoutineArgument, RoutineType + + StandardSqlDataType = bigquery_v2.types.StandardSqlDataType + StandardSqlField = bigquery_v2.types.StandardSqlField + StandardSqlTableType = bigquery_v2.types.StandardSqlTableType + + INT64 = StandardSqlDataType.TypeKind.INT64 + STRING = StandardSqlDataType.TypeKind.STRING + + client = Config.CLIENT + + dataset = self.temp_dataset(_make_dataset_id("create_tvf_routine")) + routine_ref = dataset.routine("test_tvf_routine") + + routine_body = """ + SELECT int_col, str_col + FROM ( + UNNEST([1, 2, 3]) int_col + JOIN + (SELECT str_col FROM UNNEST(["one", "two", "three"]) str_col) + ON TRUE + ) + WHERE int_col > threshold + """ + + return_table_type = StandardSqlTableType( + columns=[ + StandardSqlField( + name="int_col", type=StandardSqlDataType(type_kind=INT64), + ), + StandardSqlField( + name="str_col", type=StandardSqlDataType(type_kind=STRING), + ), + ] + ) + + routine_args = [ + RoutineArgument( + name="threshold", data_type=StandardSqlDataType(type_kind=INT64), + ) + ] + + routine_def = Routine( + routine_ref, + type_=RoutineType.TABLE_VALUED_FUNCTION, + arguments=routine_args, + return_table_type=return_table_type, + body=routine_body, + ) + + # Create TVF routine. + client.delete_routine(routine_ref, not_found_ok=True) + routine = client.create_routine(routine_def) + + assert routine.body == routine_body + assert routine.return_table_type == return_table_type + assert routine.arguments == routine_args + + # Execute the routine to see if it's working as expected. + query_job = client.query( + f""" + SELECT int_col, str_col + FROM `{routine.reference}`(1) + ORDER BY int_col, str_col ASC + """ + ) + + result_rows = [tuple(row) for row in query_job.result()] + expected = [ + (2, "one"), + (2, "three"), + (2, "two"), + (3, "one"), + (3, "three"), + (3, "two"), + ] + assert result_rows == expected + def test_create_table_rows_fetch_nested_schema(self): table_name = "test_table" dataset = self.temp_dataset(_make_dataset_id("create_table_nested_schema")) diff --git a/tests/unit/job/test_load_config.py b/tests/unit/job/test_load_config.py index 190bd16dc..cbe087dac 100644 --- a/tests/unit/job/test_load_config.py +++ b/tests/unit/job/test_load_config.py @@ -424,6 +424,17 @@ def test_null_marker_setter(self): config.null_marker = null_marker self.assertEqual(config._properties["load"]["nullMarker"], null_marker) + def test_projection_fields_miss(self): + config = self._get_target_class()() + self.assertIsNone(config.projection_fields) + + def test_projection_fields_hit(self): + config = self._get_target_class()() + fields = ["email", "postal_code"] + config.projection_fields = fields + self.assertEqual(config._properties["load"]["projectionFields"], fields) + self.assertEqual(config.projection_fields, fields) + def test_quote_character_missing(self): config = self._get_target_class()() self.assertIsNone(config.quote_character) diff --git a/tests/unit/job/test_query.py b/tests/unit/job/test_query.py index 4665933ea..482f7f3af 100644 --- a/tests/unit/job/test_query.py +++ b/tests/unit/job/test_query.py @@ -110,6 +110,24 @@ def _verify_table_definitions(self, job, config): self.assertIsNotNone(expected_ec) self.assertEqual(found_ec.to_api_repr(), expected_ec) + def _verify_dml_stats_resource_properties(self, job, resource): + query_stats = resource.get("statistics", {}).get("query", {}) + + if "dmlStats" in query_stats: + resource_dml_stats = query_stats["dmlStats"] + job_dml_stats = job.dml_stats + assert str(job_dml_stats.inserted_row_count) == resource_dml_stats.get( + "insertedRowCount", "0" + ) + assert str(job_dml_stats.updated_row_count) == resource_dml_stats.get( + "updatedRowCount", "0" + ) + assert str(job_dml_stats.deleted_row_count) == resource_dml_stats.get( + "deletedRowCount", "0" + ) + else: + assert job.dml_stats is None + def _verify_configuration_properties(self, job, configuration): if "dryRun" in configuration: self.assertEqual(job.dry_run, configuration["dryRun"]) @@ -118,6 +136,7 @@ def _verify_configuration_properties(self, job, configuration): def _verifyResourceProperties(self, job, resource): self._verifyReadonlyResourceProperties(job, resource) + self._verify_dml_stats_resource_properties(job, resource) configuration = resource.get("configuration", {}) self._verify_configuration_properties(job, configuration) @@ -130,16 +149,19 @@ def _verifyResourceProperties(self, job, resource): self._verify_table_definitions(job, query_config) self.assertEqual(job.query, query_config["query"]) + if "createDisposition" in query_config: self.assertEqual(job.create_disposition, query_config["createDisposition"]) else: self.assertIsNone(job.create_disposition) + if "defaultDataset" in query_config: ds_ref = job.default_dataset ds_ref = {"projectId": ds_ref.project, "datasetId": ds_ref.dataset_id} self.assertEqual(ds_ref, query_config["defaultDataset"]) else: self.assertIsNone(job.default_dataset) + if "destinationTable" in query_config: table = job.destination tb_ref = { @@ -150,14 +172,17 @@ def _verifyResourceProperties(self, job, resource): self.assertEqual(tb_ref, query_config["destinationTable"]) else: self.assertIsNone(job.destination) + if "priority" in query_config: self.assertEqual(job.priority, query_config["priority"]) else: self.assertIsNone(job.priority) + if "writeDisposition" in query_config: self.assertEqual(job.write_disposition, query_config["writeDisposition"]) else: self.assertIsNone(job.write_disposition) + if "destinationEncryptionConfiguration" in query_config: self.assertIsNotNone(job.destination_encryption_configuration) self.assertEqual( @@ -166,6 +191,7 @@ def _verifyResourceProperties(self, job, resource): ) else: self.assertIsNone(job.destination_encryption_configuration) + if "schemaUpdateOptions" in query_config: self.assertEqual( job.schema_update_options, query_config["schemaUpdateOptions"] @@ -190,6 +216,7 @@ def test_ctor_defaults(self): self.assertIsNone(job.create_disposition) self.assertIsNone(job.default_dataset) self.assertIsNone(job.destination) + self.assertIsNone(job.dml_stats) self.assertIsNone(job.flatten_results) self.assertIsNone(job.priority) self.assertIsNone(job.use_query_cache) @@ -278,6 +305,26 @@ def test_from_api_repr_with_encryption(self): self.assertIs(job._client, client) self._verifyResourceProperties(job, RESOURCE) + def test_from_api_repr_with_dml_stats(self): + self._setUpConstants() + client = _make_client(project=self.PROJECT) + RESOURCE = { + "id": self.JOB_ID, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "configuration": {"query": {"query": self.QUERY}}, + "statistics": { + "query": { + "dmlStats": {"insertedRowCount": "15", "updatedRowCount": "2"}, + }, + }, + } + klass = self._get_target_class() + + job = klass.from_api_repr(RESOURCE, client=client) + + self.assertIs(job._client, client) + self._verifyResourceProperties(job, RESOURCE) + def test_from_api_repr_w_properties(self): from google.cloud.bigquery.job import CreateDisposition from google.cloud.bigquery.job import SchemaUpdateOption @@ -815,6 +862,23 @@ def test_estimated_bytes_processed(self): query_stats["estimatedBytesProcessed"] = str(est_bytes) self.assertEqual(job.estimated_bytes_processed, est_bytes) + def test_dml_stats(self): + from google.cloud.bigquery.job.query import DmlStats + + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, self.QUERY, client) + assert job.dml_stats is None + + statistics = job._properties["statistics"] = {} + assert job.dml_stats is None + + query_stats = statistics["query"] = {} + assert job.dml_stats is None + + query_stats["dmlStats"] = {"insertedRowCount": "35"} + assert isinstance(job.dml_stats, DmlStats) + assert job.dml_stats.inserted_row_count == 35 + def test_result(self): from google.cloud.bigquery.table import RowIterator diff --git a/tests/unit/job/test_query_stats.py b/tests/unit/job/test_query_stats.py index 09a0efc45..e70eb097c 100644 --- a/tests/unit/job/test_query_stats.py +++ b/tests/unit/job/test_query_stats.py @@ -15,6 +15,43 @@ from .helpers import _Base +class TestDmlStats: + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.job import DmlStats + + return DmlStats + + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) + + def test_ctor_defaults(self): + dml_stats = self._make_one() + assert dml_stats.inserted_row_count == 0 + assert dml_stats.deleted_row_count == 0 + assert dml_stats.updated_row_count == 0 + + def test_from_api_repr_partial_stats(self): + klass = self._get_target_class() + result = klass.from_api_repr({"deletedRowCount": "12"}) + + assert isinstance(result, klass) + assert result.inserted_row_count == 0 + assert result.deleted_row_count == 12 + assert result.updated_row_count == 0 + + def test_from_api_repr_full_stats(self): + klass = self._get_target_class() + result = klass.from_api_repr( + {"updatedRowCount": "4", "insertedRowCount": "7", "deletedRowCount": "25"} + ) + + assert isinstance(result, klass) + assert result.inserted_row_count == 7 + assert result.deleted_row_count == 25 + assert result.updated_row_count == 4 + + class TestQueryPlanEntryStep(_Base): KIND = "KIND" SUBSTEPS = ("SUB1", "SUB2") diff --git a/tests/unit/routine/test_routine.py b/tests/unit/routine/test_routine.py index 0a59e7c5f..fdaf13324 100644 --- a/tests/unit/routine/test_routine.py +++ b/tests/unit/routine/test_routine.py @@ -156,12 +156,86 @@ def test_from_api_repr(target_class): assert actual_routine.return_type == bigquery_v2.types.StandardSqlDataType( type_kind=bigquery_v2.types.StandardSqlDataType.TypeKind.INT64 ) + assert actual_routine.return_table_type is None assert actual_routine.type_ == "SCALAR_FUNCTION" assert actual_routine._properties["someNewField"] == "someValue" assert actual_routine.description == "A routine description." assert actual_routine.determinism_level == "DETERMINISTIC" +def test_from_api_repr_tvf_function(target_class): + from google.cloud.bigquery.routine import RoutineArgument + from google.cloud.bigquery.routine import RoutineReference + from google.cloud.bigquery.routine import RoutineType + + StandardSqlDataType = bigquery_v2.types.StandardSqlDataType + StandardSqlField = bigquery_v2.types.StandardSqlField + StandardSqlTableType = bigquery_v2.types.StandardSqlTableType + + creation_time = datetime.datetime( + 2010, 5, 19, 16, 0, 0, tzinfo=google.cloud._helpers.UTC + ) + modified_time = datetime.datetime( + 2011, 10, 1, 16, 0, 0, tzinfo=google.cloud._helpers.UTC + ) + resource = { + "routineReference": { + "projectId": "my-project", + "datasetId": "my_dataset", + "routineId": "my_routine", + }, + "etag": "abcdefg", + "creationTime": str(google.cloud._helpers._millis(creation_time)), + "lastModifiedTime": str(google.cloud._helpers._millis(modified_time)), + "definitionBody": "SELECT x FROM UNNEST([1,2,3]) x WHERE x > a", + "arguments": [{"name": "a", "dataType": {"typeKind": "INT64"}}], + "language": "SQL", + "returnTableType": { + "columns": [{"name": "int_col", "type": {"typeKind": "INT64"}}] + }, + "routineType": "TABLE_VALUED_FUNCTION", + "someNewField": "someValue", + "description": "A routine description.", + "determinismLevel": bigquery.DeterminismLevel.DETERMINISTIC, + } + actual_routine = target_class.from_api_repr(resource) + + assert actual_routine.project == "my-project" + assert actual_routine.dataset_id == "my_dataset" + assert actual_routine.routine_id == "my_routine" + assert ( + actual_routine.path + == "/projects/my-project/datasets/my_dataset/routines/my_routine" + ) + assert actual_routine.reference == RoutineReference.from_string( + "my-project.my_dataset.my_routine" + ) + assert actual_routine.etag == "abcdefg" + assert actual_routine.created == creation_time + assert actual_routine.modified == modified_time + assert actual_routine.arguments == [ + RoutineArgument( + name="a", + data_type=StandardSqlDataType(type_kind=StandardSqlDataType.TypeKind.INT64), + ) + ] + assert actual_routine.body == "SELECT x FROM UNNEST([1,2,3]) x WHERE x > a" + assert actual_routine.language == "SQL" + assert actual_routine.return_type is None + assert actual_routine.return_table_type == StandardSqlTableType( + columns=[ + StandardSqlField( + name="int_col", + type=StandardSqlDataType(type_kind=StandardSqlDataType.TypeKind.INT64), + ) + ] + ) + assert actual_routine.type_ == RoutineType.TABLE_VALUED_FUNCTION + assert actual_routine._properties["someNewField"] == "someValue" + assert actual_routine.description == "A routine description." + assert actual_routine.determinism_level == "DETERMINISTIC" + + def test_from_api_repr_w_minimal_resource(target_class): from google.cloud.bigquery.routine import RoutineReference @@ -261,6 +335,24 @@ def test_from_api_repr_w_unknown_fields(target_class): ["return_type"], {"returnType": {"typeKind": "INT64"}}, ), + ( + { + "definitionBody": "SELECT x FROM UNNEST([1,2,3]) x WHERE x > 1", + "language": "SQL", + "returnTableType": { + "columns": [{"name": "int_col", "type": {"typeKind": "INT64"}}] + }, + "routineType": "TABLE_VALUED_FUNCTION", + "description": "A routine description.", + "determinismLevel": bigquery.DeterminismLevel.DETERMINISM_LEVEL_UNSPECIFIED, + }, + ["return_table_type"], + { + "returnTableType": { + "columns": [{"name": "int_col", "type": {"typeKind": "INT64"}}] + } + }, + ), ( { "arguments": [{"name": "x", "dataType": {"typeKind": "INT64"}}], @@ -361,6 +453,41 @@ def test_set_return_type_w_none(object_under_test): assert object_under_test._properties["returnType"] is None +def test_set_return_table_type_w_none(object_under_test): + object_under_test.return_table_type = None + assert object_under_test.return_table_type is None + assert object_under_test._properties["returnTableType"] is None + + +def test_set_return_table_type_w_not_none(object_under_test): + StandardSqlDataType = bigquery_v2.types.StandardSqlDataType + StandardSqlField = bigquery_v2.types.StandardSqlField + StandardSqlTableType = bigquery_v2.types.StandardSqlTableType + + table_type = StandardSqlTableType( + columns=[ + StandardSqlField( + name="int_col", + type=StandardSqlDataType(type_kind=StandardSqlDataType.TypeKind.INT64), + ), + StandardSqlField( + name="str_col", + type=StandardSqlDataType(type_kind=StandardSqlDataType.TypeKind.STRING), + ), + ] + ) + + object_under_test.return_table_type = table_type + + assert object_under_test.return_table_type == table_type + assert object_under_test._properties["returnTableType"] == { + "columns": [ + {"name": "int_col", "type": {"typeKind": "INT64"}}, + {"name": "str_col", "type": {"typeKind": "STRING"}}, + ] + } + + def test_set_description_w_none(object_under_test): object_under_test.description = None assert object_under_test.description is None diff --git a/tests/unit/test__helpers.py b/tests/unit/test__helpers.py index c62947d37..af026ccbe 100644 --- a/tests/unit/test__helpers.py +++ b/tests/unit/test__helpers.py @@ -26,11 +26,17 @@ @unittest.skipIf(bigquery_storage is None, "Requires `google-cloud-bigquery-storage`") -class Test_verify_bq_storage_version(unittest.TestCase): +class TestBQStorageVersions(unittest.TestCase): + def _object_under_test(self): + from google.cloud.bigquery import _helpers + + return _helpers.BQStorageVersions() + def _call_fut(self): - from google.cloud.bigquery._helpers import _verify_bq_storage_version + from google.cloud.bigquery import _helpers - return _verify_bq_storage_version() + _helpers.BQ_STORAGE_VERSIONS._installed_version = None + return _helpers.BQ_STORAGE_VERSIONS.verify_version() def test_raises_no_error_w_recent_bqstorage(self): from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError @@ -53,10 +59,35 @@ def test_raises_error_w_unknown_bqstorage_version(self): with mock.patch("google.cloud.bigquery_storage", autospec=True) as fake_module: del fake_module.__version__ - error_pattern = r"version found: legacy" + error_pattern = r"version found: 0.0.0" with self.assertRaisesRegex(LegacyBigQueryStorageError, error_pattern): self._call_fut() + def test_installed_version_returns_cached(self): + versions = self._object_under_test() + versions._installed_version = object() + assert versions.installed_version is versions._installed_version + + def test_installed_version_returns_parsed_version(self): + versions = self._object_under_test() + + with mock.patch("google.cloud.bigquery_storage.__version__", new="1.2.3"): + version = versions.installed_version + + assert version.major == 1 + assert version.minor == 2 + assert version.micro == 3 + + def test_is_read_session_optional_true(self): + versions = self._object_under_test() + with mock.patch("google.cloud.bigquery_storage.__version__", new="2.6.0"): + assert versions.is_read_session_optional + + def test_is_read_session_optional_false(self): + versions = self._object_under_test() + with mock.patch("google.cloud.bigquery_storage.__version__", new="2.5.0"): + assert not versions.is_read_session_optional + class Test_not_null(unittest.TestCase): def _call_fut(self, value, field): diff --git a/tests/unit/test__pandas_helpers.py b/tests/unit/test__pandas_helpers.py index aa87e28f5..0ba671cd9 100644 --- a/tests/unit/test__pandas_helpers.py +++ b/tests/unit/test__pandas_helpers.py @@ -40,11 +40,14 @@ import pytz from google import api_core +from google.cloud.bigquery import _helpers from google.cloud.bigquery import schema from google.cloud.bigquery._pandas_helpers import _BIGNUMERIC_SUPPORT try: from google.cloud import bigquery_storage + + _helpers.BQ_STORAGE_VERSIONS.verify_version() except ImportError: # pragma: NO COVER bigquery_storage = None @@ -1311,6 +1314,72 @@ def test_dataframe_to_parquet_dict_sequence_schema(module_under_test): assert schema_arg == expected_schema_arg +@pytest.mark.skipif( + bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" +) +def test__download_table_bqstorage_stream_includes_read_session( + monkeypatch, module_under_test +): + import google.cloud.bigquery_storage_v1.reader + import google.cloud.bigquery_storage_v1.types + + monkeypatch.setattr(_helpers.BQ_STORAGE_VERSIONS, "_installed_version", None) + monkeypatch.setattr(bigquery_storage, "__version__", "2.5.0") + bqstorage_client = mock.create_autospec( + bigquery_storage.BigQueryReadClient, instance=True + ) + reader = mock.create_autospec( + google.cloud.bigquery_storage_v1.reader.ReadRowsStream, instance=True + ) + bqstorage_client.read_rows.return_value = reader + session = google.cloud.bigquery_storage_v1.types.ReadSession() + + module_under_test._download_table_bqstorage_stream( + module_under_test._DownloadState(), + bqstorage_client, + session, + google.cloud.bigquery_storage_v1.types.ReadStream(name="test"), + queue.Queue(), + mock.Mock(), + ) + + reader.rows.assert_called_once_with(session) + + +@pytest.mark.skipif( + bigquery_storage is None + or not _helpers.BQ_STORAGE_VERSIONS.is_read_session_optional, + reason="Requires `google-cloud-bigquery-storage` >= 2.6.0", +) +def test__download_table_bqstorage_stream_omits_read_session( + monkeypatch, module_under_test +): + import google.cloud.bigquery_storage_v1.reader + import google.cloud.bigquery_storage_v1.types + + monkeypatch.setattr(_helpers.BQ_STORAGE_VERSIONS, "_installed_version", None) + monkeypatch.setattr(bigquery_storage, "__version__", "2.6.0") + bqstorage_client = mock.create_autospec( + bigquery_storage.BigQueryReadClient, instance=True + ) + reader = mock.create_autospec( + google.cloud.bigquery_storage_v1.reader.ReadRowsStream, instance=True + ) + bqstorage_client.read_rows.return_value = reader + session = google.cloud.bigquery_storage_v1.types.ReadSession() + + module_under_test._download_table_bqstorage_stream( + module_under_test._DownloadState(), + bqstorage_client, + session, + google.cloud.bigquery_storage_v1.types.ReadStream(name="test"), + queue.Queue(), + mock.Mock(), + ) + + reader.rows.assert_called_once_with() + + @pytest.mark.parametrize( "stream_count,maxsize_kwarg,expected_call_count,expected_maxsize", [ diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index dffe7bdba..6b62eb85b 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -663,7 +663,7 @@ def test_ensure_bqstorage_client_obsolete_dependency(self): client = self._make_one(project=self.PROJECT, credentials=creds) patcher = mock.patch( - "google.cloud.bigquery.client._verify_bq_storage_version", + "google.cloud.bigquery.client.BQ_STORAGE_VERSIONS.verify_version", side_effect=LegacyBigQueryStorageError("BQ Storage too old"), ) with patcher, warnings.catch_warnings(record=True) as warned: @@ -700,7 +700,7 @@ def test_ensure_bqstorage_client_existing_client_check_fails(self): mock_storage_client = mock.sentinel.mock_storage_client patcher = mock.patch( - "google.cloud.bigquery.client._verify_bq_storage_version", + "google.cloud.bigquery.client.BQ_STORAGE_VERSIONS.verify_version", side_effect=LegacyBigQueryStorageError("BQ Storage too old"), ) with patcher, warnings.catch_warnings(record=True) as warned: @@ -4617,6 +4617,81 @@ def test_query_w_query_parameters(self): }, ) + def test_query_job_rpc_fail_w_random_error(self): + from google.api_core.exceptions import Unknown + from google.cloud.bigquery.job import QueryJob + + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + + job_create_error = Unknown("Not sure what went wrong.") + job_begin_patcher = mock.patch.object( + QueryJob, "_begin", side_effect=job_create_error + ) + with job_begin_patcher: + with pytest.raises(Unknown, match="Not sure what went wrong."): + client.query("SELECT 1;", job_id="123") + + def test_query_job_rpc_fail_w_conflict_job_id_given(self): + from google.api_core.exceptions import Conflict + from google.cloud.bigquery.job import QueryJob + + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + + job_create_error = Conflict("Job already exists.") + job_begin_patcher = mock.patch.object( + QueryJob, "_begin", side_effect=job_create_error + ) + with job_begin_patcher: + with pytest.raises(Conflict, match="Job already exists."): + client.query("SELECT 1;", job_id="123") + + def test_query_job_rpc_fail_w_conflict_random_id_job_fetch_fails(self): + from google.api_core.exceptions import Conflict + from google.api_core.exceptions import DataLoss + from google.cloud.bigquery.job import QueryJob + + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + + job_create_error = Conflict("Job already exists.") + job_begin_patcher = mock.patch.object( + QueryJob, "_begin", side_effect=job_create_error + ) + get_job_patcher = mock.patch.object( + client, "get_job", side_effect=DataLoss("we lost yor job, sorry") + ) + + with job_begin_patcher, get_job_patcher: + # If get job request fails, the original exception should be raised. + with pytest.raises(Conflict, match="Job already exists."): + client.query("SELECT 1;", job_id=None) + + def test_query_job_rpc_fail_w_conflict_random_id_job_fetch_succeeds(self): + from google.api_core.exceptions import Conflict + from google.cloud.bigquery.job import QueryJob + + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + + job_create_error = Conflict("Job already exists.") + job_begin_patcher = mock.patch.object( + QueryJob, "_begin", side_effect=job_create_error + ) + get_job_patcher = mock.patch.object( + client, "get_job", return_value=mock.sentinel.query_job + ) + + with job_begin_patcher, get_job_patcher: + result = client.query("SELECT 1;", job_id=None) + + assert result is mock.sentinel.query_job + def test_insert_rows_w_timeout(self): from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery.table import Table diff --git a/tests/unit/test_magics.py b/tests/unit/test_magics.py index 5e9bf28a9..d030482cc 100644 --- a/tests/unit/test_magics.py +++ b/tests/unit/test_magics.py @@ -368,7 +368,7 @@ def test__make_bqstorage_client_true_obsolete_dependency(): ) patcher = mock.patch( - "google.cloud.bigquery.client._verify_bq_storage_version", + "google.cloud.bigquery.client.BQ_STORAGE_VERSIONS.verify_version", side_effect=LegacyBigQueryStorageError("BQ Storage too old"), ) with patcher, warnings.catch_warnings(record=True) as warned: diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index b30f16fe0..37650cd27 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -1889,7 +1889,7 @@ def test__validate_bqstorage_returns_false_w_warning_if_obsolete_version(self): iterator = self._make_one(first_page_response=None) # not cached patcher = mock.patch( - "google.cloud.bigquery.table._helpers._verify_bq_storage_version", + "google.cloud.bigquery.table._helpers.BQ_STORAGE_VERSIONS.verify_version", side_effect=LegacyBigQueryStorageError("BQ Storage too old"), ) with patcher, warnings.catch_warnings(record=True) as warned: