diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index e2b39f94..cb06536d 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -1,3 +1,3 @@ docker: image: gcr.io/repo-automation-bots/owlbot-python:latest - digest: sha256:99d90d097e4a4710cc8658ee0b5b963f4426d0e424819787c3ac1405c9a26719 + digest: sha256:5ff7446edeaede81c3ed58b23a4e76a5403fba1350ce28478045657303b6479d diff --git a/.kokoro/docker/docs/Dockerfile b/.kokoro/docker/docs/Dockerfile index 412b0b56..4e1b1fb8 100644 --- a/.kokoro/docker/docs/Dockerfile +++ b/.kokoro/docker/docs/Dockerfile @@ -40,6 +40,7 @@ RUN apt-get update \ libssl-dev \ libsqlite3-dev \ portaudio19-dev \ + python3-distutils \ redis-server \ software-properties-common \ ssh \ @@ -59,40 +60,8 @@ RUN apt-get update \ && rm -rf /var/lib/apt/lists/* \ && rm -f /var/cache/apt/archives/*.deb - -COPY fetch_gpg_keys.sh /tmp -# Install the desired versions of Python. -RUN set -ex \ - && export GNUPGHOME="$(mktemp -d)" \ - && echo "disable-ipv6" >> "${GNUPGHOME}/dirmngr.conf" \ - && /tmp/fetch_gpg_keys.sh \ - && for PYTHON_VERSION in 3.7.8 3.8.5; do \ - wget --no-check-certificate -O python-${PYTHON_VERSION}.tar.xz "https://www.python.org/ftp/python/${PYTHON_VERSION%%[a-z]*}/Python-$PYTHON_VERSION.tar.xz" \ - && wget --no-check-certificate -O python-${PYTHON_VERSION}.tar.xz.asc "https://www.python.org/ftp/python/${PYTHON_VERSION%%[a-z]*}/Python-$PYTHON_VERSION.tar.xz.asc" \ - && gpg --batch --verify python-${PYTHON_VERSION}.tar.xz.asc python-${PYTHON_VERSION}.tar.xz \ - && rm -r python-${PYTHON_VERSION}.tar.xz.asc \ - && mkdir -p /usr/src/python-${PYTHON_VERSION} \ - && tar -xJC /usr/src/python-${PYTHON_VERSION} --strip-components=1 -f python-${PYTHON_VERSION}.tar.xz \ - && rm python-${PYTHON_VERSION}.tar.xz \ - && cd /usr/src/python-${PYTHON_VERSION} \ - && ./configure \ - --enable-shared \ - # This works only on Python 2.7 and throws a warning on every other - # version, but seems otherwise harmless. - --enable-unicode=ucs4 \ - --with-system-ffi \ - --without-ensurepip \ - && make -j$(nproc) \ - && make install \ - && ldconfig \ - ; done \ - && rm -rf "${GNUPGHOME}" \ - && rm -rf /usr/src/python* \ - && rm -rf ~/.cache/ - RUN wget -O /tmp/get-pip.py 'https://bootstrap.pypa.io/get-pip.py' \ - && python3.7 /tmp/get-pip.py \ && python3.8 /tmp/get-pip.py \ && rm /tmp/get-pip.py -CMD ["python3.7"] +CMD ["python3.8"] diff --git a/.kokoro/docker/docs/fetch_gpg_keys.sh b/.kokoro/docker/docs/fetch_gpg_keys.sh deleted file mode 100755 index d653dd86..00000000 --- a/.kokoro/docker/docs/fetch_gpg_keys.sh +++ /dev/null @@ -1,45 +0,0 @@ -#!/bin/bash -# Copyright 2020 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# A script to fetch gpg keys with retry. -# Avoid jinja parsing the file. -# - -function retry { - if [[ "${#}" -le 1 ]]; then - echo "Usage: ${0} retry_count commands.." - exit 1 - fi - local retries=${1} - local command="${@:2}" - until [[ "${retries}" -le 0 ]]; do - $command && return 0 - if [[ $? -ne 0 ]]; then - echo "command failed, retrying" - ((retries--)) - fi - done - return 1 -} - -# 3.6.9, 3.7.5 (Ned Deily) -retry 3 gpg --keyserver ha.pool.sks-keyservers.net --recv-keys \ - 0D96DF4D4110E5C43FBFB17F2D347EA6AA65421D - -# 3.8.0 (Ɓukasz Langa) -retry 3 gpg --keyserver ha.pool.sks-keyservers.net --recv-keys \ - E3FF2839C048B25C084DEBE9B26995E310250568 - -# diff --git a/.kokoro/test-samples-impl.sh b/.kokoro/test-samples-impl.sh index cf5de74c..311a8d54 100755 --- a/.kokoro/test-samples-impl.sh +++ b/.kokoro/test-samples-impl.sh @@ -20,9 +20,9 @@ set -eo pipefail # Enables `**` to include files nested inside sub-folders shopt -s globstar -# Exit early if samples directory doesn't exist -if [ ! -d "./samples" ]; then - echo "No tests run. `./samples` not found" +# Exit early if samples don't exist +if ! find samples -name 'requirements.txt' | grep -q .; then + echo "No tests run. './samples/**/requirements.txt' not found" exit 0 fi diff --git a/CHANGELOG.md b/CHANGELOG.md index 90b45ce6..35bd37da 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,18 @@ [1]: https://pypi.org/project/google-cloud-bigquery-storage/#history +### [2.6.1](https://www.github.com/googleapis/python-bigquery-storage/compare/v2.6.0...v2.6.1) (2021-07-20) + + +### Bug Fixes + +* **deps:** pin 'google-{api,cloud}-core', 'google-auth' to allow 2.x versions ([#240](https://www.github.com/googleapis/python-bigquery-storage/issues/240)) ([8f848e1](https://www.github.com/googleapis/python-bigquery-storage/commit/8f848e18379085160492cdd2d12dc8de50a46c8e)) + + +### Documentation + +* pandas DataFrame samples are more standalone ([#224](https://www.github.com/googleapis/python-bigquery-storage/issues/224)) ([4026997](https://www.github.com/googleapis/python-bigquery-storage/commit/4026997d7a286b63ed2b969c0bd49de59635326d)) + ## [2.6.0](https://www.github.com/googleapis/python-bigquery-storage/compare/v2.5.0...v2.6.0) (2021-07-09) diff --git a/samples/conftest.py b/samples/conftest.py new file mode 100644 index 00000000..92068ef5 --- /dev/null +++ b/samples/conftest.py @@ -0,0 +1,22 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +import pytest + + +@pytest.fixture(scope="session") +def project_id(): + return os.environ["GOOGLE_CLOUD_PROJECT"] diff --git a/samples/quickstart/quickstart_test.py b/samples/quickstart/quickstart_test.py index 23f3c350..8e1e0dfd 100644 --- a/samples/quickstart/quickstart_test.py +++ b/samples/quickstart/quickstart_test.py @@ -13,9 +13,6 @@ # limitations under the License. import datetime -import os - -import pytest from . import quickstart @@ -27,11 +24,6 @@ def now_millis(): ) -@pytest.fixture() -def project_id(): - return os.environ["GOOGLE_CLOUD_PROJECT"] - - def test_quickstart_wo_snapshot(capsys, project_id): quickstart.main(project_id) out, _ = capsys.readouterr() diff --git a/samples/quickstart/requirements.txt b/samples/quickstart/requirements.txt index 9eddd046..ca069335 100644 --- a/samples/quickstart/requirements.txt +++ b/samples/quickstart/requirements.txt @@ -1,2 +1,2 @@ fastavro -google-cloud-bigquery-storage==2.5.0 +google-cloud-bigquery-storage==2.6.0 diff --git a/samples/to_dataframe/read_query_results.py b/samples/to_dataframe/read_query_results.py new file mode 100644 index 00000000..45bae1ea --- /dev/null +++ b/samples/to_dataframe/read_query_results.py @@ -0,0 +1,47 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def read_query_results(): + # [START bigquerystorage_pandas_tutorial_read_query_results] + from google.cloud import bigquery + + bqclient = bigquery.Client() + + # Download query results. + query_string = """ + SELECT + CONCAT( + 'https://stackoverflow.com/questions/', + CAST(id as STRING)) as url, + view_count + FROM `bigquery-public-data.stackoverflow.posts_questions` + WHERE tags like '%google-bigquery%' + ORDER BY view_count DESC + """ + + dataframe = ( + bqclient.query(query_string) + .result() + .to_dataframe( + # Optionally, explicitly request to use the BigQuery Storage API. As of + # google-cloud-bigquery version 1.26.0 and above, the BigQuery Storage + # API is used by default. + create_bqstorage_client=True, + ) + ) + print(dataframe.head()) + # [END bigquerystorage_pandas_tutorial_read_query_results] + + return dataframe diff --git a/samples/to_dataframe/read_query_results_test.py b/samples/to_dataframe/read_query_results_test.py new file mode 100644 index 00000000..55b55a08 --- /dev/null +++ b/samples/to_dataframe/read_query_results_test.py @@ -0,0 +1,21 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from . import read_query_results + + +def test_read_query_results(capsys): + read_query_results.read_query_results() + out, _ = capsys.readouterr() + assert "stackoverflow" in out diff --git a/samples/to_dataframe/read_table_bigquery.py b/samples/to_dataframe/read_table_bigquery.py new file mode 100644 index 00000000..82d8879b --- /dev/null +++ b/samples/to_dataframe/read_table_bigquery.py @@ -0,0 +1,42 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def read_table(): + # [START bigquerystorage_pandas_tutorial_read_table] + from google.cloud import bigquery + + bqclient = bigquery.Client() + + # Download a table. + table = bigquery.TableReference.from_string( + "bigquery-public-data.utility_us.country_code_iso" + ) + rows = bqclient.list_rows( + table, + selected_fields=[ + bigquery.SchemaField("country_name", "STRING"), + bigquery.SchemaField("fips_code", "STRING"), + ], + ) + dataframe = rows.to_dataframe( + # Optionally, explicitly request to use the BigQuery Storage API. As of + # google-cloud-bigquery version 1.26.0 and above, the BigQuery Storage + # API is used by default. + create_bqstorage_client=True, + ) + print(dataframe.head()) + # [END bigquerystorage_pandas_tutorial_read_table] + + return dataframe diff --git a/samples/to_dataframe/read_table_bigquery_test.py b/samples/to_dataframe/read_table_bigquery_test.py new file mode 100644 index 00000000..c8301857 --- /dev/null +++ b/samples/to_dataframe/read_table_bigquery_test.py @@ -0,0 +1,21 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from . import read_table_bigquery + + +def test_read_table(capsys): + read_table_bigquery.read_table() + out, _ = capsys.readouterr() + assert "country_name" in out diff --git a/samples/to_dataframe/read_table_bqstorage.py b/samples/to_dataframe/read_table_bqstorage.py new file mode 100644 index 00000000..0a3ae777 --- /dev/null +++ b/samples/to_dataframe/read_table_bqstorage.py @@ -0,0 +1,69 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def read_table(your_project_id): + original_your_project_id = your_project_id + # [START bigquerystorage_pandas_tutorial_read_session] + your_project_id = "project-for-read-session" + # [END bigquerystorage_pandas_tutorial_read_session] + your_project_id = original_your_project_id + + # [START bigquerystorage_pandas_tutorial_read_session] + from google.cloud import bigquery_storage + from google.cloud.bigquery_storage import types + import pandas + + bqstorageclient = bigquery_storage.BigQueryReadClient() + + project_id = "bigquery-public-data" + dataset_id = "new_york_trees" + table_id = "tree_species" + table = f"projects/{project_id}/datasets/{dataset_id}/tables/{table_id}" + + # Select columns to read with read options. If no read options are + # specified, the whole table is read. + read_options = types.ReadSession.TableReadOptions( + selected_fields=["species_common_name", "fall_color"] + ) + + parent = "projects/{}".format(your_project_id) + + requested_session = types.ReadSession( + table=table, + # Avro is also supported, but the Arrow data format is optimized to + # work well with column-oriented data structures such as pandas + # DataFrames. + data_format=types.DataFormat.ARROW, + read_options=read_options, + ) + read_session = bqstorageclient.create_read_session( + parent=parent, read_session=requested_session, max_stream_count=1, + ) + + # This example reads from only a single stream. Read from multiple streams + # to fetch data faster. Note that the session may not contain any streams + # if there are no rows to read. + stream = read_session.streams[0] + reader = bqstorageclient.read_rows(stream.name) + + # Parse all Arrow blocks and create a dataframe. + frames = [] + for message in reader.rows().pages: + frames.append(message.to_dataframe()) + dataframe = pandas.concat(frames) + print(dataframe.head()) + # [END bigquerystorage_pandas_tutorial_read_session] + + return dataframe diff --git a/samples/to_dataframe/read_table_bqstorage_test.py b/samples/to_dataframe/read_table_bqstorage_test.py new file mode 100644 index 00000000..cc093078 --- /dev/null +++ b/samples/to_dataframe/read_table_bqstorage_test.py @@ -0,0 +1,21 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from . import read_table_bqstorage + + +def test_read_table(capsys, project_id): + read_table_bqstorage.read_table(your_project_id=project_id) + out, _ = capsys.readouterr() + assert "species_common_name" in out diff --git a/samples/to_dataframe/requirements.txt b/samples/to_dataframe/requirements.txt index 45aeaa2f..142d4942 100644 --- a/samples/to_dataframe/requirements.txt +++ b/samples/to_dataframe/requirements.txt @@ -1,8 +1,8 @@ -google-auth==1.32.1 -google-cloud-bigquery-storage==2.5.0 -google-cloud-bigquery==2.20.0 +google-auth==1.33.0 +google-cloud-bigquery-storage==2.6.0 +google-cloud-bigquery==2.21.0 pyarrow==4.0.1 -ipython==7.10.2; python_version > '3.0' -ipython==5.9.0; python_version < '3.0' -pandas==0.25.3; python_version > '3.0' -pandas==0.24.2; python_version < '3.0' +ipython==7.24.0; python_version > '3.6' +ipython==7.16.1; python_version <= '3.6' +pandas==1.2.5; python_version > '3.6' +pandas==1.1.5; python_version <= '3.6' diff --git a/setup.py b/setup.py index 59c70ba2..b09e3233 100644 --- a/setup.py +++ b/setup.py @@ -21,10 +21,13 @@ name = "google-cloud-bigquery-storage" description = "BigQuery Storage API API client library" -version = "2.6.0" +version = "2.6.1" release_status = "Development Status :: 5 - Production/Stable" dependencies = [ - "google-api-core[grpc] >= 1.26.0, < 2.0.0dev", + # NOTE: Maintainers, please do not require google-api-core>=2.x.x + # Until this issue is closed + # https://github.com/googleapis/google-cloud-python/issues/10566 + "google-api-core[grpc] >= 1.26.0, <3.0.0dev", "proto-plus >= 1.4.0", "packaging >= 14.3", "libcst >= 0.2.5",