diff --git a/.coveragerc b/.coveragerc index 742e899d4..c540edf34 100644 --- a/.coveragerc +++ b/.coveragerc @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2020 Google LLC +# Copyright 2023 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/.flake8 b/.flake8 index 2e4387498..87f6e408c 100644 --- a/.flake8 +++ b/.flake8 @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2020 Google LLC +# Copyright 2023 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index 02a4dedce..a3da1b0d4 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -13,5 +13,5 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:240b5bcc2bafd450912d2da2be15e62bc6de2cf839823ae4bf94d4f392b451dc -# created: 2023-06-03T21:25:37.968717478Z + digest: sha256:3e3800bb100af5d7f9e810d48212b37812c1856d20ffeafb99ebe66461b61fc7 +# created: 2023-08-02T10:53:29.114535628Z diff --git a/.github/auto-label.yaml b/.github/auto-label.yaml index 41bff0b53..b2016d119 100644 --- a/.github/auto-label.yaml +++ b/.github/auto-label.yaml @@ -1,4 +1,4 @@ -# Copyright 2022 Google LLC +# Copyright 2023 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/.kokoro/build.sh b/.kokoro/build.sh index ec58d54c1..4e816ecf6 100755 --- a/.kokoro/build.sh +++ b/.kokoro/build.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright 2018 Google LLC +# Copyright 2023 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/.kokoro/docker/docs/Dockerfile b/.kokoro/docker/docs/Dockerfile index f8137d0ae..8e39a2cc4 100644 --- a/.kokoro/docker/docs/Dockerfile +++ b/.kokoro/docker/docs/Dockerfile @@ -1,4 +1,4 @@ -# Copyright 2020 Google LLC +# Copyright 2023 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/.kokoro/populate-secrets.sh b/.kokoro/populate-secrets.sh index f52514257..6f3972140 100755 --- a/.kokoro/populate-secrets.sh +++ b/.kokoro/populate-secrets.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright 2020 Google LLC. +# Copyright 2023 Google LLC. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/.kokoro/publish-docs.sh b/.kokoro/publish-docs.sh index 1c4d62370..9eafe0be3 100755 --- a/.kokoro/publish-docs.sh +++ b/.kokoro/publish-docs.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright 2020 Google LLC +# Copyright 2023 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/.kokoro/release.sh b/.kokoro/release.sh index 2b1f28ec0..e8e52653e 100755 --- a/.kokoro/release.sh +++ b/.kokoro/release.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright 2020 Google LLC +# Copyright 2023 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/.kokoro/release/common.cfg b/.kokoro/release/common.cfg index b83a57783..a11679f43 100644 --- a/.kokoro/release/common.cfg +++ b/.kokoro/release/common.cfg @@ -38,3 +38,12 @@ env_vars: { key: "SECRET_MANAGER_KEYS" value: "releasetool-publish-reporter-app,releasetool-publish-reporter-googleapis-installation,releasetool-publish-reporter-pem" } + +# Store the packages we uploaded to PyPI. That way, we have a record of exactly +# what we published, which we can use to generate SBOMs and attestations. +action { + define_artifacts { + regex: "github/python-storage/**/*.tar.gz" + strip_prefix: "github/python-storage" + } +} diff --git a/.kokoro/requirements.txt b/.kokoro/requirements.txt index c7929db6d..029bd342d 100644 --- a/.kokoro/requirements.txt +++ b/.kokoro/requirements.txt @@ -20,9 +20,9 @@ cachetools==5.2.0 \ --hash=sha256:6a94c6402995a99c3970cc7e4884bb60b4a8639938157eeed436098bf9831757 \ --hash=sha256:f9f17d2aec496a9aa6b76f53e3b614c965223c061982d434d160f930c698a9db # via google-auth -certifi==2022.12.7 \ - --hash=sha256:35824b4c3a97115964b408844d64aa14db1cc518f6562e8d7261699d1350a9e3 \ - --hash=sha256:4ad3232f5e926d6718ec31cfc1fcadfde020920e278684144551c91769c7bc18 +certifi==2023.7.22 \ + --hash=sha256:539cc1d13202e33ca466e88b2807e29f4c13049d6d87031a3c110744495cb082 \ + --hash=sha256:92d6037539857d8206b8f6ae472e8b77db8058fec5937a1ef3f54304089edbb9 # via requests cffi==1.15.1 \ --hash=sha256:00a9ed42e88df81ffae7a8ab6d9356b371399b91dbdf0c3cb1e84c03a13aceb5 \ @@ -113,26 +113,30 @@ commonmark==0.9.1 \ --hash=sha256:452f9dc859be7f06631ddcb328b6919c67984aca654e5fefb3914d54691aed60 \ --hash=sha256:da2f38c92590f83de410ba1a3cbceafbc74fee9def35f9251ba9a971d6d66fd9 # via rich -cryptography==41.0.0 \ - --hash=sha256:0ddaee209d1cf1f180f1efa338a68c4621154de0afaef92b89486f5f96047c55 \ - --hash=sha256:14754bcdae909d66ff24b7b5f166d69340ccc6cb15731670435efd5719294895 \ - --hash=sha256:344c6de9f8bda3c425b3a41b319522ba3208551b70c2ae00099c205f0d9fd3be \ - --hash=sha256:34d405ea69a8b34566ba3dfb0521379b210ea5d560fafedf9f800a9a94a41928 \ - --hash=sha256:3680248309d340fda9611498a5319b0193a8dbdb73586a1acf8109d06f25b92d \ - --hash=sha256:3c5ef25d060c80d6d9f7f9892e1d41bb1c79b78ce74805b8cb4aa373cb7d5ec8 \ - --hash=sha256:4ab14d567f7bbe7f1cdff1c53d5324ed4d3fc8bd17c481b395db224fb405c237 \ - --hash=sha256:5c1f7293c31ebc72163a9a0df246f890d65f66b4a40d9ec80081969ba8c78cc9 \ - --hash=sha256:6b71f64beeea341c9b4f963b48ee3b62d62d57ba93eb120e1196b31dc1025e78 \ - --hash=sha256:7d92f0248d38faa411d17f4107fc0bce0c42cae0b0ba5415505df72d751bf62d \ - --hash=sha256:8362565b3835ceacf4dc8f3b56471a2289cf51ac80946f9087e66dc283a810e0 \ - --hash=sha256:84a165379cb9d411d58ed739e4af3396e544eac190805a54ba2e0322feb55c46 \ - --hash=sha256:88ff107f211ea696455ea8d911389f6d2b276aabf3231bf72c8853d22db755c5 \ - --hash=sha256:9f65e842cb02550fac96536edb1d17f24c0a338fd84eaf582be25926e993dde4 \ - --hash=sha256:a4fc68d1c5b951cfb72dfd54702afdbbf0fb7acdc9b7dc4301bbf2225a27714d \ - --hash=sha256:b7f2f5c525a642cecad24ee8670443ba27ac1fab81bba4cc24c7b6b41f2d0c75 \ - --hash=sha256:b846d59a8d5a9ba87e2c3d757ca019fa576793e8758174d3868aecb88d6fc8eb \ - --hash=sha256:bf8fc66012ca857d62f6a347007e166ed59c0bc150cefa49f28376ebe7d992a2 \ - --hash=sha256:f5d0bf9b252f30a31664b6f64432b4730bb7038339bd18b1fafe129cfc2be9be +cryptography==41.0.3 \ + --hash=sha256:0d09fb5356f975974dbcb595ad2d178305e5050656affb7890a1583f5e02a306 \ + --hash=sha256:23c2d778cf829f7d0ae180600b17e9fceea3c2ef8b31a99e3c694cbbf3a24b84 \ + --hash=sha256:3fb248989b6363906827284cd20cca63bb1a757e0a2864d4c1682a985e3dca47 \ + --hash=sha256:41d7aa7cdfded09b3d73a47f429c298e80796c8e825ddfadc84c8a7f12df212d \ + --hash=sha256:42cb413e01a5d36da9929baa9d70ca90d90b969269e5a12d39c1e0d475010116 \ + --hash=sha256:4c2f0d35703d61002a2bbdcf15548ebb701cfdd83cdc12471d2bae80878a4207 \ + --hash=sha256:4fd871184321100fb400d759ad0cddddf284c4b696568204d281c902fc7b0d81 \ + --hash=sha256:5259cb659aa43005eb55a0e4ff2c825ca111a0da1814202c64d28a985d33b087 \ + --hash=sha256:57a51b89f954f216a81c9d057bf1a24e2f36e764a1ca9a501a6964eb4a6800dd \ + --hash=sha256:652627a055cb52a84f8c448185922241dd5217443ca194d5739b44612c5e6507 \ + --hash=sha256:67e120e9a577c64fe1f611e53b30b3e69744e5910ff3b6e97e935aeb96005858 \ + --hash=sha256:6af1c6387c531cd364b72c28daa29232162010d952ceb7e5ca8e2827526aceae \ + --hash=sha256:6d192741113ef5e30d89dcb5b956ef4e1578f304708701b8b73d38e3e1461f34 \ + --hash=sha256:7efe8041897fe7a50863e51b77789b657a133c75c3b094e51b5e4b5cec7bf906 \ + --hash=sha256:84537453d57f55a50a5b6835622ee405816999a7113267739a1b4581f83535bd \ + --hash=sha256:8f09daa483aedea50d249ef98ed500569841d6498aa9c9f4b0531b9964658922 \ + --hash=sha256:95dd7f261bb76948b52a5330ba5202b91a26fbac13ad0e9fc8a3ac04752058c7 \ + --hash=sha256:a74fbcdb2a0d46fe00504f571a2a540532f4c188e6ccf26f1f178480117b33c4 \ + --hash=sha256:a983e441a00a9d57a4d7c91b3116a37ae602907a7618b882c8013b5762e80574 \ + --hash=sha256:ab8de0d091acbf778f74286f4989cf3d1528336af1b59f3e5d2ebca8b5fe49e1 \ + --hash=sha256:aeb57c421b34af8f9fe830e1955bf493a86a7996cc1338fe41b30047d16e962c \ + --hash=sha256:ce785cf81a7bdade534297ef9e490ddff800d956625020ab2ec2780a556c313e \ + --hash=sha256:d0d651aa754ef58d75cec6edfbd21259d93810b73f6ec246436a21b7841908de # via # gcp-releasetool # secretstorage @@ -392,9 +396,9 @@ pycparser==2.21 \ --hash=sha256:8ee45429555515e1f6b185e78100aea234072576aa43ab53aefcae078162fca9 \ --hash=sha256:e644fdec12f7872f86c58ff790da456218b10f863970249516d60a5eaca77206 # via cffi -pygments==2.13.0 \ - --hash=sha256:56a8508ae95f98e2b9bdf93a6be5ae3f7d8af858b43e02c5a2ff083726be40c1 \ - --hash=sha256:f643f331ab57ba3c9d89212ee4a2dabc6e94f117cf4eefde99a0574720d14c42 +pygments==2.15.0 \ + --hash=sha256:77a3299119af881904cd5ecd1ac6a66214b6e9bed1f2db16993b54adede64094 \ + --hash=sha256:f7e36cffc4c517fbc252861b9a6e4644ca0e5abadf9a113c72d1358ad09b9500 # via # readme-renderer # rich diff --git a/.kokoro/test-samples-against-head.sh b/.kokoro/test-samples-against-head.sh index ba3a707b0..63ac41dfa 100755 --- a/.kokoro/test-samples-against-head.sh +++ b/.kokoro/test-samples-against-head.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright 2020 Google LLC +# Copyright 2023 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/.kokoro/test-samples-impl.sh b/.kokoro/test-samples-impl.sh index 2c6500cae..5a0f5fab6 100755 --- a/.kokoro/test-samples-impl.sh +++ b/.kokoro/test-samples-impl.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright 2021 Google LLC +# Copyright 2023 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/.kokoro/test-samples.sh b/.kokoro/test-samples.sh index 11c042d34..50b35a48c 100755 --- a/.kokoro/test-samples.sh +++ b/.kokoro/test-samples.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright 2020 Google LLC +# Copyright 2023 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/.kokoro/trampoline.sh b/.kokoro/trampoline.sh index f39236e94..d85b1f267 100755 --- a/.kokoro/trampoline.sh +++ b/.kokoro/trampoline.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright 2017 Google Inc. +# Copyright 2023 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/.kokoro/trampoline_v2.sh b/.kokoro/trampoline_v2.sh index 4af6cdc26..59a7cf3a9 100755 --- a/.kokoro/trampoline_v2.sh +++ b/.kokoro/trampoline_v2.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Copyright 2020 Google LLC +# Copyright 2023 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 5405cc8ff..19409cbd3 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,4 +1,4 @@ -# Copyright 2021 Google LLC +# Copyright 2023 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -26,6 +26,6 @@ repos: hooks: - id: black - repo: https://github.com/pycqa/flake8 - rev: 3.9.2 + rev: 6.1.0 hooks: - id: flake8 diff --git a/.trampolinerc b/.trampolinerc index 0eee72ab6..a7dfeb42c 100644 --- a/.trampolinerc +++ b/.trampolinerc @@ -1,4 +1,4 @@ -# Copyright 2020 Google LLC +# Copyright 2023 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,8 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -# Template for .trampolinerc - # Add required env vars here. required_envvars+=( ) diff --git a/CHANGELOG.md b/CHANGELOG.md index b04a3a05e..15c4c1f38 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,25 @@ [1]: https://pypi.org/project/google-cloud-storage/#history +## [2.11.0](https://github.com/googleapis/python-storage/compare/v2.10.0...v2.11.0) (2023-09-19) + + +### Features + +* Add gccl-gcs-cmd field to X-Goog-API-Client header for Transfer Manager calls ([#1119](https://github.com/googleapis/python-storage/issues/1119)) ([14a1909](https://github.com/googleapis/python-storage/commit/14a1909963cfa41208f4e25b82b7c84c5e02452f)) +* Add transfer_manager.upload_chunks_concurrently using the XML MPU API ([#1115](https://github.com/googleapis/python-storage/issues/1115)) ([56aeb87](https://github.com/googleapis/python-storage/commit/56aeb8778d25fe245ac2e1e96ef71f0dad1fec0f)) +* Support configurable retries in upload_chunks_concurrently ([#1120](https://github.com/googleapis/python-storage/issues/1120)) ([1271686](https://github.com/googleapis/python-storage/commit/1271686428c0faffd3dd1b4fd57bfe467d2817d4)) + + +### Bug Fixes + +* Split retention period tests due to caching change ([#1068](https://github.com/googleapis/python-storage/issues/1068)) ([cc191b0](https://github.com/googleapis/python-storage/commit/cc191b070c520e85030cd4cef6d7d9a7b1dd0bf4)) + + +### Documentation + +* Add Transfer Manager documentation in c.g.c ([#1109](https://github.com/googleapis/python-storage/issues/1109)) ([c1f8724](https://github.com/googleapis/python-storage/commit/c1f8724dc1c5dc180f36424324def74a5daec620)) + ## [2.10.0](https://github.com/googleapis/python-storage/compare/v2.9.0...v2.10.0) (2023-06-14) diff --git a/MANIFEST.in b/MANIFEST.in index e783f4c62..e0a667053 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2020 Google LLC +# Copyright 2023 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/docs/conf.py b/docs/conf.py index 0e6ccdff0..bee939ca1 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# Copyright 2021 Google LLC +# Copyright 2023 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/docs/index.rst b/docs/index.rst index 07d236e25..1dd08278a 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -32,6 +32,7 @@ API Reference storage/hmac_key storage/notification storage/retry + storage/transfer_manager More Examples diff --git a/docs/storage/transfer_manager.rst b/docs/storage/transfer_manager.rst new file mode 100644 index 000000000..24f3e4e31 --- /dev/null +++ b/docs/storage/transfer_manager.rst @@ -0,0 +1,6 @@ +Transfer Manager +~~~~~~~~~~~~~~~~ + +.. automodule:: google.cloud.storage.transfer_manager + :members: + :show-inheritance: \ No newline at end of file diff --git a/google/cloud/storage/_helpers.py b/google/cloud/storage/_helpers.py index 29968a9aa..77a9dffd0 100644 --- a/google/cloud/storage/_helpers.py +++ b/google/cloud/storage/_helpers.py @@ -599,19 +599,32 @@ def _get_default_headers( user_agent, content_type="application/json; charset=UTF-8", x_upload_content_type=None, + command=None, ): """Get the headers for a request. - Args: - user_agent (str): The user-agent for requests. - Returns: - Dict: The headers to be used for the request. + :type user_agent: str + :param user_agent: The user-agent for requests. + + :type command: str + :param command: + (Optional) Information about which interface for the operation was + used, to be included in the X-Goog-API-Client header. Please leave + as None unless otherwise directed. + + :rtype: dict + :returns: The headers to be used for the request. """ + x_goog_api_client = f"{user_agent} {_get_invocation_id()}" + + if command: + x_goog_api_client += f" gccl-gcs-cmd/{command}" + return { "Accept": "application/json", "Accept-Encoding": "gzip, deflate", "User-Agent": user_agent, - "X-Goog-API-Client": f"{user_agent} {_get_invocation_id()}", + "X-Goog-API-Client": x_goog_api_client, "content-type": content_type, "x-upload-content-type": x_upload_content_type or content_type, } diff --git a/google/cloud/storage/acl.py b/google/cloud/storage/acl.py index 4458966ce..1ca78f258 100644 --- a/google/cloud/storage/acl.py +++ b/google/cloud/storage/acl.py @@ -137,7 +137,6 @@ class ACL(object): # Subclasses must override to provide these attributes (typically, # as properties). - client = None reload_path = None save_path = None user_project = None diff --git a/google/cloud/storage/blob.py b/google/cloud/storage/blob.py index 0d663e775..ece758dbc 100644 --- a/google/cloud/storage/blob.py +++ b/google/cloud/storage/blob.py @@ -904,7 +904,7 @@ def _do_download( ): """Perform a download without any error handling. - This is intended to be called by :meth:`download_to_file` so it can + This is intended to be called by :meth:`_prep_and_do_download` so it can be wrapped with error handling / remapping. :type transport: @@ -957,7 +957,7 @@ def _do_download( This private method does not accept ConditionalRetryPolicy values because the information necessary to evaluate the policy is instead - evaluated in client.download_blob_to_file(). + evaluated in blob._prep_and_do_download(). See the retry.py source code and docstrings in this package (google.cloud.storage.retry) for information on retry types and how @@ -1124,11 +1124,10 @@ def download_to_file( :raises: :class:`google.cloud.exceptions.NotFound` """ - client = self._require_client(client) - client.download_blob_to_file( - self, - file_obj=file_obj, + self._prep_and_do_download( + file_obj, + client=client, start=start, end=end, raw_download=raw_download, @@ -1143,6 +1142,33 @@ def download_to_file( retry=retry, ) + def _handle_filename_and_download(self, filename, *args, **kwargs): + """Download the contents of this blob into a named file. + + :type filename: str + :param filename: A filename to be passed to ``open``. + + For *args and **kwargs, refer to the documentation for download_to_filename() for more information. + """ + + try: + with open(filename, "wb") as file_obj: + self._prep_and_do_download( + file_obj, + *args, + **kwargs, + ) + + except resumable_media.DataCorruption: + # Delete the corrupt downloaded file. + os.remove(filename) + raise + + updated = self.updated + if updated is not None: + mtime = updated.timestamp() + os.utime(file_obj.name, (mtime, mtime)) + def download_to_filename( self, filename, @@ -1250,34 +1276,23 @@ def download_to_filename( :raises: :class:`google.cloud.exceptions.NotFound` """ - client = self._require_client(client) - try: - with open(filename, "wb") as file_obj: - client.download_blob_to_file( - self, - file_obj, - start=start, - end=end, - raw_download=raw_download, - if_etag_match=if_etag_match, - if_etag_not_match=if_etag_not_match, - if_generation_match=if_generation_match, - if_generation_not_match=if_generation_not_match, - if_metageneration_match=if_metageneration_match, - if_metageneration_not_match=if_metageneration_not_match, - timeout=timeout, - checksum=checksum, - retry=retry, - ) - except resumable_media.DataCorruption: - # Delete the corrupt downloaded file. - os.remove(filename) - raise - updated = self.updated - if updated is not None: - mtime = updated.timestamp() - os.utime(file_obj.name, (mtime, mtime)) + self._handle_filename_and_download( + filename, + client=client, + start=start, + end=end, + raw_download=raw_download, + if_etag_match=if_etag_match, + if_etag_not_match=if_etag_not_match, + if_generation_match=if_generation_match, + if_generation_not_match=if_generation_not_match, + if_metageneration_match=if_metageneration_match, + if_metageneration_not_match=if_metageneration_not_match, + timeout=timeout, + checksum=checksum, + retry=retry, + ) def download_as_bytes( self, @@ -1382,11 +1397,12 @@ def download_as_bytes( :raises: :class:`google.cloud.exceptions.NotFound` """ - client = self._require_client(client) + string_buffer = BytesIO() - client.download_blob_to_file( - self, + + self._prep_and_do_download( string_buffer, + client=client, start=start, end=end, raw_download=raw_download, @@ -1697,7 +1713,7 @@ def _get_writable_metadata(self): return object_metadata - def _get_upload_arguments(self, client, content_type): + def _get_upload_arguments(self, client, content_type, filename=None, command=None): """Get required arguments for performing an upload. The content type returned will be determined in order of precedence: @@ -1709,6 +1725,12 @@ def _get_upload_arguments(self, client, content_type): :type content_type: str :param content_type: Type of content being uploaded (or :data:`None`). + :type command: str + :param command: + (Optional) Information about which interface for upload was used, + to be included in the X-Goog-API-Client header. Please leave as None + unless otherwise directed. + :rtype: tuple :returns: A triple of @@ -1716,9 +1738,11 @@ def _get_upload_arguments(self, client, content_type): * An object metadata dictionary * The ``content_type`` as a string (according to precedence) """ - content_type = self._get_content_type(content_type) + content_type = self._get_content_type(content_type, filename=filename) headers = { - **_get_default_headers(client._connection.user_agent, content_type), + **_get_default_headers( + client._connection.user_agent, content_type, command=command + ), **_get_encryption_headers(self._encryption_key), } object_metadata = self._get_writable_metadata() @@ -1739,6 +1763,7 @@ def _do_multipart_upload( timeout=_DEFAULT_TIMEOUT, checksum=None, retry=None, + command=None, ): """Perform a multipart upload. @@ -1822,6 +1847,12 @@ def _do_multipart_upload( (google.cloud.storage.retry) for information on retry types and how to configure them. + :type command: str + :param command: + (Optional) Information about which interface for upload was used, + to be included in the X-Goog-API-Client header. Please leave as None + unless otherwise directed. + :rtype: :class:`~requests.Response` :returns: The "200 OK" response object returned after the multipart upload request. @@ -1840,7 +1871,7 @@ def _do_multipart_upload( transport = self._get_transport(client) if "metadata" in self._properties and "metadata" not in self._changes: self._changes.add("metadata") - info = self._get_upload_arguments(client, content_type) + info = self._get_upload_arguments(client, content_type, command=command) headers, object_metadata, content_type = info hostname = _get_host_name(client._connection) @@ -1910,6 +1941,7 @@ def _initiate_resumable_upload( timeout=_DEFAULT_TIMEOUT, checksum=None, retry=None, + command=None, ): """Initiate a resumable upload. @@ -2008,6 +2040,12 @@ def _initiate_resumable_upload( (google.cloud.storage.retry) for information on retry types and how to configure them. + :type command: str + :param command: + (Optional) Information about which interface for upload was used, + to be included in the X-Goog-API-Client header. Please leave as None + unless otherwise directed. + :rtype: tuple :returns: Pair of @@ -2025,7 +2063,7 @@ def _initiate_resumable_upload( transport = self._get_transport(client) if "metadata" in self._properties and "metadata" not in self._changes: self._changes.add("metadata") - info = self._get_upload_arguments(client, content_type) + info = self._get_upload_arguments(client, content_type, command=command) headers, object_metadata, content_type = info if extra_headers is not None: headers.update(extra_headers) @@ -2103,6 +2141,7 @@ def _do_resumable_upload( timeout=_DEFAULT_TIMEOUT, checksum=None, retry=None, + command=None, ): """Perform a resumable upload. @@ -2191,6 +2230,12 @@ def _do_resumable_upload( (google.cloud.storage.retry) for information on retry types and how to configure them. + :type command: str + :param command: + (Optional) Information about which interface for upload was used, + to be included in the X-Goog-API-Client header. Please leave as None + unless otherwise directed. + :rtype: :class:`~requests.Response` :returns: The "200 OK" response object returned after the final chunk is uploaded. @@ -2209,6 +2254,7 @@ def _do_resumable_upload( timeout=timeout, checksum=checksum, retry=retry, + command=command, ) while not upload.finished: try: @@ -2234,6 +2280,7 @@ def _do_upload( timeout=_DEFAULT_TIMEOUT, checksum=None, retry=None, + command=None, ): """Determine an upload strategy and then perform the upload. @@ -2333,6 +2380,12 @@ def _do_upload( configuration changes for Retry objects such as delays and deadlines are respected. + :type command: str + :param command: + (Optional) Information about which interface for upload was used, + to be included in the X-Goog-API-Client header. Please leave as None + unless otherwise directed. + :rtype: dict :returns: The parsed JSON from the "200 OK" response. This will be the **only** response in the multipart case and it will be the @@ -2366,6 +2419,7 @@ def _do_upload( timeout=timeout, checksum=checksum, retry=retry, + command=command, ) else: response = self._do_resumable_upload( @@ -2382,11 +2436,12 @@ def _do_upload( timeout=timeout, checksum=checksum, retry=retry, + command=command, ) return response.json() - def upload_from_file( + def _prep_and_do_upload( self, file_obj, rewind=False, @@ -2402,6 +2457,7 @@ def upload_from_file( timeout=_DEFAULT_TIMEOUT, checksum=None, retry=DEFAULT_RETRY_IF_GENERATION_SPECIFIED, + command=None, ): """Upload the contents of this blob from a file-like object. @@ -2522,6 +2578,12 @@ def upload_from_file( configuration changes for Retry objects such as delays and deadlines are respected. + :type command: str + :param command: + (Optional) Information about which interface for upload was used, + to be included in the X-Goog-API-Client header. Please leave as None + unless otherwise directed. + :raises: :class:`~google.cloud.exceptions.GoogleCloudError` if the upload response returns an error status. """ @@ -2551,11 +2613,192 @@ def upload_from_file( timeout=timeout, checksum=checksum, retry=retry, + command=command, ) self._set_properties(created_json) except resumable_media.InvalidResponse as exc: _raise_from_invalid_response(exc) + def upload_from_file( + self, + file_obj, + rewind=False, + size=None, + content_type=None, + num_retries=None, + client=None, + predefined_acl=None, + if_generation_match=None, + if_generation_not_match=None, + if_metageneration_match=None, + if_metageneration_not_match=None, + timeout=_DEFAULT_TIMEOUT, + checksum=None, + retry=DEFAULT_RETRY_IF_GENERATION_SPECIFIED, + ): + """Upload the contents of this blob from a file-like object. + + The content type of the upload will be determined in order + of precedence: + + - The value passed in to this method (if not :data:`None`) + - The value stored on the current blob + - The default value ('application/octet-stream') + + .. note:: + The effect of uploading to an existing blob depends on the + "versioning" and "lifecycle" policies defined on the blob's + bucket. In the absence of those policies, upload will + overwrite any existing contents. + + See the [`object versioning`](https://cloud.google.com/storage/docs/object-versioning) + and [`lifecycle`](https://cloud.google.com/storage/docs/lifecycle) + API documents for details. + + If the size of the data to be uploaded exceeds 8 MB a resumable media + request will be used, otherwise the content and the metadata will be + uploaded in a single multipart upload request. + + For more fine-grained over the upload process, check out + [`google-resumable-media`](https://googleapis.dev/python/google-resumable-media/latest/index.html). + + If :attr:`user_project` is set on the bucket, bills the API request + to that project. + + :type file_obj: file + :param file_obj: A file handle opened in binary mode for reading. + + :type rewind: bool + :param rewind: + If True, seek to the beginning of the file handle before writing + the file to Cloud Storage. + + :type size: int + :param size: + The number of bytes to be uploaded (which will be read from + ``file_obj``). If not provided, the upload will be concluded once + ``file_obj`` is exhausted. + + :type content_type: str + :param content_type: (Optional) Type of content being uploaded. + + :type num_retries: int + :param num_retries: + Number of upload retries. By default, only uploads with + if_generation_match set will be retried, as uploads without the + argument are not guaranteed to be idempotent. Setting num_retries + will override this default behavior and guarantee retries even when + if_generation_match is not set. (Deprecated: This argument + will be removed in a future release.) + + :type client: :class:`~google.cloud.storage.client.Client` + :param client: + (Optional) The client to use. If not passed, falls back to the + ``client`` stored on the blob's bucket. + + :type predefined_acl: str + :param predefined_acl: (Optional) Predefined access control list + + :type if_generation_match: long + :param if_generation_match: + (Optional) See :ref:`using-if-generation-match` + + :type if_generation_not_match: long + :param if_generation_not_match: + (Optional) See :ref:`using-if-generation-not-match` + + :type if_metageneration_match: long + :param if_metageneration_match: + (Optional) See :ref:`using-if-metageneration-match` + + :type if_metageneration_not_match: long + :param if_metageneration_not_match: + (Optional) See :ref:`using-if-metageneration-not-match` + + :type timeout: float or tuple + :param timeout: + (Optional) The amount of time, in seconds, to wait + for the server response. See: :ref:`configuring_timeouts` + + :type checksum: str + :param checksum: + (Optional) The type of checksum to compute to verify + the integrity of the object. If the upload is completed in a single + request, the checksum will be entirely precomputed and the remote + server will handle verification and error handling. If the upload + is too large and must be transmitted in multiple requests, the + checksum will be incrementally computed and the client will handle + verification and error handling, raising + google.resumable_media.common.DataCorruption on a mismatch and + attempting to delete the corrupted file. Supported values are + "md5", "crc32c" and None. The default is None. + + :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy + :param retry: (Optional) How to retry the RPC. A None value will disable + retries. A google.api_core.retry.Retry value will enable retries, + and the object will define retriable response codes and errors and + configure backoff and timeout options. + + A google.cloud.storage.retry.ConditionalRetryPolicy value wraps a + Retry object and activates it only if certain conditions are met. + This class exists to provide safe defaults for RPC calls that are + not technically safe to retry normally (due to potential data + duplication or other side-effects) but become safe to retry if a + condition such as if_generation_match is set. + + See the retry.py source code and docstrings in this package + (google.cloud.storage.retry) for information on retry types and how + to configure them. + + Media operations (downloads and uploads) do not support non-default + predicates in a Retry object. The default will always be used. Other + configuration changes for Retry objects such as delays and deadlines + are respected. + + :raises: :class:`~google.cloud.exceptions.GoogleCloudError` + if the upload response returns an error status. + """ + self._prep_and_do_upload( + file_obj, + rewind=rewind, + size=size, + content_type=content_type, + num_retries=num_retries, + client=client, + predefined_acl=predefined_acl, + if_generation_match=if_generation_match, + if_generation_not_match=if_generation_not_match, + if_metageneration_match=if_metageneration_match, + if_metageneration_not_match=if_metageneration_not_match, + timeout=timeout, + checksum=checksum, + retry=retry, + ) + + def _handle_filename_and_upload(self, filename, content_type=None, *args, **kwargs): + """Upload this blob's contents from the content of a named file. + + :type filename: str + :param filename: The path to the file. + + :type content_type: str + :param content_type: (Optional) Type of content being uploaded. + + For *args and **kwargs, refer to the documentation for upload_from_filename() for more information. + """ + + content_type = self._get_content_type(content_type, filename=filename) + + with open(filename, "rb") as file_obj: + total_bytes = os.fstat(file_obj.fileno()).st_size + self._prep_and_do_upload( + file_obj, + content_type=content_type, + size=total_bytes, + *args, + **kwargs, + ) + def upload_from_filename( self, filename, @@ -2677,25 +2920,21 @@ def upload_from_filename( configuration changes for Retry objects such as delays and deadlines are respected. """ - content_type = self._get_content_type(content_type, filename=filename) - with open(filename, "rb") as file_obj: - total_bytes = os.fstat(file_obj.fileno()).st_size - self.upload_from_file( - file_obj, - content_type=content_type, - num_retries=num_retries, - client=client, - size=total_bytes, - predefined_acl=predefined_acl, - if_generation_match=if_generation_match, - if_generation_not_match=if_generation_not_match, - if_metageneration_match=if_metageneration_match, - if_metageneration_not_match=if_metageneration_not_match, - timeout=timeout, - checksum=checksum, - retry=retry, - ) + self._handle_filename_and_upload( + filename, + content_type=content_type, + num_retries=num_retries, + client=client, + predefined_acl=predefined_acl, + if_generation_match=if_generation_match, + if_generation_not_match=if_generation_not_match, + if_metageneration_match=if_metageneration_match, + if_metageneration_not_match=if_metageneration_not_match, + timeout=timeout, + checksum=checksum, + retry=retry, + ) def upload_from_string( self, @@ -3936,6 +4175,168 @@ def open( :rtype: str or ``NoneType`` """ + def _prep_and_do_download( + self, + file_obj, + client=None, + start=None, + end=None, + raw_download=False, + if_etag_match=None, + if_etag_not_match=None, + if_generation_match=None, + if_generation_not_match=None, + if_metageneration_match=None, + if_metageneration_not_match=None, + timeout=_DEFAULT_TIMEOUT, + checksum="md5", + retry=DEFAULT_RETRY, + command=None, + ): + """Download the contents of a blob object into a file-like object. + + See https://cloud.google.com/storage/docs/downloading-objects + + If :attr:`user_project` is set on the bucket, bills the API request + to that project. + + :type file_obj: file + :param file_obj: A file handle to which to write the blob's data. + + :type client: :class:`~google.cloud.storage.client.Client` + :param client: + (Optional) The client to use. If not passed, falls back to the + ``client`` stored on the blob's bucket. + + :type start: int + :param start: (Optional) The first byte in a range to be downloaded. + + :type end: int + :param end: (Optional) The last byte in a range to be downloaded. + + :type raw_download: bool + :param raw_download: + (Optional) If true, download the object without any expansion. + + :type if_etag_match: Union[str, Set[str]] + :param if_etag_match: + (Optional) See :ref:`using-if-etag-match` + + :type if_etag_not_match: Union[str, Set[str]] + :param if_etag_not_match: + (Optional) See :ref:`using-if-etag-not-match` + + :type if_generation_match: long + :param if_generation_match: + (Optional) See :ref:`using-if-generation-match` + + :type if_generation_not_match: long + :param if_generation_not_match: + (Optional) See :ref:`using-if-generation-not-match` + + :type if_metageneration_match: long + :param if_metageneration_match: + (Optional) See :ref:`using-if-metageneration-match` + + :type if_metageneration_not_match: long + :param if_metageneration_not_match: + (Optional) See :ref:`using-if-metageneration-not-match` + + :type timeout: float or tuple + :param timeout: + (Optional) The amount of time, in seconds, to wait + for the server response. See: :ref:`configuring_timeouts` + + :type checksum: str + :param checksum: + (Optional) The type of checksum to compute to verify the integrity + of the object. The response headers must contain a checksum of the + requested type. If the headers lack an appropriate checksum (for + instance in the case of transcoded or ranged downloads where the + remote service does not know the correct checksum, including + downloads where chunk_size is set) an INFO-level log will be + emitted. Supported values are "md5", "crc32c" and None. The default + is "md5". + + :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy + :param retry: (Optional) How to retry the RPC. A None value will disable + retries. A google.api_core.retry.Retry value will enable retries, + and the object will define retriable response codes and errors and + configure backoff and timeout options. + + A google.cloud.storage.retry.ConditionalRetryPolicy value wraps a + Retry object and activates it only if certain conditions are met. + This class exists to provide safe defaults for RPC calls that are + not technically safe to retry normally (due to potential data + duplication or other side-effects) but become safe to retry if a + condition such as if_metageneration_match is set. + + See the retry.py source code and docstrings in this package + (google.cloud.storage.retry) for information on retry types and how + to configure them. + + Media operations (downloads and uploads) do not support non-default + predicates in a Retry object. The default will always be used. Other + configuration changes for Retry objects such as delays and deadlines + are respected. + + :type command: str + :param command: + (Optional) Information about which interface for download was used, + to be included in the X-Goog-API-Client header. Please leave as None + unless otherwise directed. + """ + # Handle ConditionalRetryPolicy. + if isinstance(retry, ConditionalRetryPolicy): + # Conditional retries are designed for non-media calls, which change + # arguments into query_params dictionaries. Media operations work + # differently, so here we make a "fake" query_params to feed to the + # ConditionalRetryPolicy. + query_params = { + "ifGenerationMatch": if_generation_match, + "ifMetagenerationMatch": if_metageneration_match, + } + retry = retry.get_retry_policy_if_conditions_met(query_params=query_params) + + client = self._require_client(client) + + download_url = self._get_download_url( + client, + if_generation_match=if_generation_match, + if_generation_not_match=if_generation_not_match, + if_metageneration_match=if_metageneration_match, + if_metageneration_not_match=if_metageneration_not_match, + ) + headers = _get_encryption_headers(self._encryption_key) + headers["accept-encoding"] = "gzip" + _add_etag_match_headers( + headers, + if_etag_match=if_etag_match, + if_etag_not_match=if_etag_not_match, + ) + headers = { + **_get_default_headers(client._connection.user_agent, command=command), + **headers, + } + + transport = client._http + + try: + self._do_download( + transport, + file_obj, + download_url, + headers, + start, + end, + raw_download, + timeout=timeout, + checksum=checksum, + retry=retry, + ) + except resumable_media.InvalidResponse as exc: + _raise_from_invalid_response(exc) + @property def component_count(self): """Number of underlying components that make up this object. diff --git a/google/cloud/storage/client.py b/google/cloud/storage/client.py index 7df60c306..e6391f5fb 100644 --- a/google/cloud/storage/client.py +++ b/google/cloud/storage/client.py @@ -25,18 +25,16 @@ from google.auth.credentials import AnonymousCredentials -from google import resumable_media - from google.api_core import page_iterator from google.cloud._helpers import _LocalStack, _NOW from google.cloud.client import ClientWithProject from google.cloud.exceptions import NotFound -from google.cloud.storage._helpers import _get_default_headers + from google.cloud.storage._helpers import _get_environ_project from google.cloud.storage._helpers import _get_storage_host from google.cloud.storage._helpers import _DEFAULT_STORAGE_HOST from google.cloud.storage._helpers import _bucket_bound_hostname_url -from google.cloud.storage._helpers import _add_etag_match_headers + from google.cloud.storage._http import Connection from google.cloud.storage._signing import ( get_expiration_seconds_v4, @@ -46,17 +44,12 @@ ) from google.cloud.storage.batch import Batch from google.cloud.storage.bucket import Bucket, _item_to_blob, _blobs_page_start -from google.cloud.storage.blob import ( - Blob, - _get_encryption_headers, - _raise_from_invalid_response, -) +from google.cloud.storage.blob import Blob from google.cloud.storage.hmac_key import HMACKeyMetadata from google.cloud.storage.acl import BucketACL from google.cloud.storage.acl import DefaultObjectACL from google.cloud.storage.constants import _DEFAULT_TIMEOUT from google.cloud.storage.retry import DEFAULT_RETRY -from google.cloud.storage.retry import ConditionalRetryPolicy _marker = object() @@ -147,7 +140,7 @@ def __init__( kw_args["api_endpoint"] = storage_host if _is_emulator_set else None if client_options: - if type(client_options) == dict: + if isinstance(client_options, dict): client_options = google.api_core.client_options.from_dict( client_options ) @@ -1064,52 +1057,25 @@ def download_blob_to_file( are respected. """ - # Handle ConditionalRetryPolicy. - if isinstance(retry, ConditionalRetryPolicy): - # Conditional retries are designed for non-media calls, which change - # arguments into query_params dictionaries. Media operations work - # differently, so here we make a "fake" query_params to feed to the - # ConditionalRetryPolicy. - query_params = { - "ifGenerationMatch": if_generation_match, - "ifMetagenerationMatch": if_metageneration_match, - } - retry = retry.get_retry_policy_if_conditions_met(query_params=query_params) - if not isinstance(blob_or_uri, Blob): blob_or_uri = Blob.from_string(blob_or_uri) - download_url = blob_or_uri._get_download_url( - self, + + blob_or_uri._prep_and_do_download( + file_obj, + client=self, + start=start, + end=end, + raw_download=raw_download, + if_etag_match=if_etag_match, + if_etag_not_match=if_etag_not_match, if_generation_match=if_generation_match, if_generation_not_match=if_generation_not_match, if_metageneration_match=if_metageneration_match, if_metageneration_not_match=if_metageneration_not_match, + timeout=timeout, + checksum=checksum, + retry=retry, ) - headers = _get_encryption_headers(blob_or_uri._encryption_key) - headers["accept-encoding"] = "gzip" - _add_etag_match_headers( - headers, - if_etag_match=if_etag_match, - if_etag_not_match=if_etag_not_match, - ) - headers = {**_get_default_headers(self._connection.user_agent), **headers} - - transport = self._http - try: - blob_or_uri._do_download( - transport, - file_obj, - download_url, - headers, - start, - end, - raw_download, - timeout=timeout, - checksum=checksum, - retry=retry, - ) - except resumable_media.InvalidResponse as exc: - _raise_from_invalid_response(exc) def list_blobs( self, diff --git a/google/cloud/storage/transfer_manager.py b/google/cloud/storage/transfer_manager.py index 0b65702d4..3060528c9 100644 --- a/google/cloud/storage/transfer_manager.py +++ b/google/cloud/storage/transfer_manager.py @@ -26,6 +26,14 @@ from google.api_core import exceptions from google.cloud.storage import Client from google.cloud.storage import Blob +from google.cloud.storage.blob import _get_host_name +from google.cloud.storage.constants import _DEFAULT_TIMEOUT +from google.cloud.storage._helpers import _api_core_retry_to_resumable_media_retry +from google.cloud.storage.retry import DEFAULT_RETRY + +from google.resumable_media.requests.upload import XMLMPUContainer +from google.resumable_media.requests.upload import XMLMPUPart + warnings.warn( "The module `transfer_manager` is a preview feature. Functionality and API " @@ -35,7 +43,14 @@ TM_DEFAULT_CHUNK_SIZE = 32 * 1024 * 1024 DEFAULT_MAX_WORKERS = 8 - +METADATA_HEADER_TRANSLATION = { + "cacheControl": "Cache-Control", + "contentDisposition": "Content-Disposition", + "contentEncoding": "Content-Encoding", + "contentLanguage": "Content-Language", + "customTime": "x-goog-custom-time", + "storageClass": "x-goog-storage-class", +} # Constants to be passed in as `worker_type`. PROCESS = "process" @@ -89,8 +104,7 @@ def upload_many( :type file_blob_pairs: List(Tuple(IOBase or str, 'google.cloud.storage.blob.Blob')) :param file_blob_pairs: A list of tuples of a file or filename and a blob. Each file will be - uploaded to the corresponding blob by using blob.upload_from_file() or - blob.upload_from_filename() as appropriate. + uploaded to the corresponding blob by using APIs identical to blob.upload_from_file() or blob.upload_from_filename() as appropriate. File handlers are only supported if worker_type is set to THREAD. If worker_type is set to PROCESS, please use filenames only. @@ -107,8 +121,7 @@ def upload_many( :param upload_kwargs: A dictionary of keyword arguments to pass to the upload method. Refer to the documentation for blob.upload_from_file() or - blob.upload_from_filename() for more information. The dict is directly - passed into the upload methods and is not validated by this function. + blob.upload_from_filename() for more information. The dict is directly passed into the upload methods and is not validated by this function. :type threads: int :param threads: @@ -179,10 +192,13 @@ def upload_many( """ if upload_kwargs is None: upload_kwargs = {} + if skip_if_exists: upload_kwargs = upload_kwargs.copy() upload_kwargs["if_generation_match"] = 0 + upload_kwargs["command"] = "tm.upload_many" + pool_class, needs_pickling = _get_pool_class_and_requirements(worker_type) with pool_class(max_workers=max_workers) as executor: @@ -198,10 +214,10 @@ def upload_many( futures.append( executor.submit( _call_method_on_maybe_pickled_blob, - _pickle_blob(blob) if needs_pickling else blob, - "upload_from_filename" + _pickle_client(blob) if needs_pickling else blob, + "_handle_filename_and_upload" if isinstance(path_or_file, str) - else "upload_from_file", + else "_prep_and_do_upload", path_or_file, **upload_kwargs, ) @@ -243,12 +259,9 @@ def download_many( :type blob_file_pairs: List(Tuple('google.cloud.storage.blob.Blob', IOBase or str)) :param blob_file_pairs: - A list of tuples of blob and a file or filename. Each blob will be - downloaded to the corresponding blob by using blob.download_to_file() or - blob.download_to_filename() as appropriate. + A list of tuples of blob and a file or filename. Each blob will be downloaded to the corresponding blob by using APIs identical to blob.download_to_file() or blob.download_to_filename() as appropriate. - Note that blob.download_to_filename() does not delete the destination - file if the download fails. + Note that blob.download_to_filename() does not delete the destination file if the download fails. File handlers are only supported if worker_type is set to THREAD. If worker_type is set to PROCESS, please use filenames only. @@ -256,9 +269,7 @@ def download_many( :type download_kwargs: dict :param download_kwargs: A dictionary of keyword arguments to pass to the download method. Refer - to the documentation for blob.download_to_file() or - blob.download_to_filename() for more information. The dict is directly - passed into the download methods and is not validated by this function. + to the documentation for blob.download_to_file() or blob.download_to_filename() for more information. The dict is directly passed into the download methods and is not validated by this function. :type threads: int :param threads: @@ -328,6 +339,8 @@ def download_many( if download_kwargs is None: download_kwargs = {} + download_kwargs["command"] = "tm.download_many" + pool_class, needs_pickling = _get_pool_class_and_requirements(worker_type) with pool_class(max_workers=max_workers) as executor: @@ -343,10 +356,10 @@ def download_many( futures.append( executor.submit( _call_method_on_maybe_pickled_blob, - _pickle_blob(blob) if needs_pickling else blob, - "download_to_filename" + _pickle_client(blob) if needs_pickling else blob, + "_handle_filename_and_download" if isinstance(path_or_file, str) - else "download_to_file", + else "_prep_and_do_download", path_or_file, **download_kwargs, ) @@ -454,8 +467,7 @@ def upload_many_from_filenames( :param upload_kwargs: A dictionary of keyword arguments to pass to the upload method. Refer to the documentation for blob.upload_from_file() or - blob.upload_from_filename() for more information. The dict is directly - passed into the upload methods and is not validated by this function. + blob.upload_from_filename() for more information. The dict is directly passed into the upload methods and is not validated by this function. :type threads: int :param threads: @@ -733,7 +745,6 @@ def download_chunks_concurrently( Checksumming (md5 or crc32c) is not supported for chunked operations. Any `checksum` parameter passed in to download_kwargs will be ignored. - :type bucket: 'google.cloud.storage.bucket.Bucket' :param bucket: The bucket which contains the blobs to be downloaded @@ -745,12 +756,17 @@ def download_chunks_concurrently( :param filename: The destination filename or path. + :type chunk_size: int + :param chunk_size: + The size in bytes of each chunk to send. The optimal chunk size for + maximum throughput may vary depending on the exact network environment + and size of the blob. + :type download_kwargs: dict :param download_kwargs: A dictionary of keyword arguments to pass to the download method. Refer to the documentation for blob.download_to_file() or - blob.download_to_filename() for more information. The dict is directly - passed into the download methods and is not validated by this function. + blob.download_to_filename() for more information. The dict is directly passed into the download methods and is not validated by this function. Keyword arguments "start" and "end" which are not supported and will cause a ValueError if present. @@ -803,13 +819,15 @@ def download_chunks_concurrently( "Download arguments 'start' and 'end' are not supported by download_chunks_concurrently." ) + download_kwargs["command"] = "tm.download_sharded" + # We must know the size and the generation of the blob. if not blob.size or not blob.generation: blob.reload() pool_class, needs_pickling = _get_pool_class_and_requirements(worker_type) # Pickle the blob ahead of time (just once, not once per chunk) if needed. - maybe_pickled_blob = _pickle_blob(blob) if needs_pickling else blob + maybe_pickled_blob = _pickle_client(blob) if needs_pickling else blob futures = [] @@ -844,9 +862,269 @@ def download_chunks_concurrently( return None +def upload_chunks_concurrently( + filename, + blob, + content_type=None, + chunk_size=TM_DEFAULT_CHUNK_SIZE, + deadline=None, + worker_type=PROCESS, + max_workers=DEFAULT_MAX_WORKERS, + *, + checksum="md5", + timeout=_DEFAULT_TIMEOUT, + retry=DEFAULT_RETRY, +): + """Upload a single file in chunks, concurrently. + + This function uses the XML MPU API to initialize an upload and upload a + file in chunks, concurrently with a worker pool. + + The XML MPU API is significantly different from other uploads; please review + the documentation at https://cloud.google.com/storage/docs/multipart-uploads + before using this feature. + + The library will attempt to cancel uploads that fail due to an exception. + If the upload fails in a way that precludes cancellation, such as a + hardware failure, process termination, or power outage, then the incomplete + upload may persist indefinitely. To mitigate this, set the + `AbortIncompleteMultipartUpload` with a nonzero `Age` in bucket lifecycle + rules, or refer to the XML API documentation linked above to learn more + about how to list and delete individual downloads. + + Using this feature with multiple threads is unlikely to improve upload + performance under normal circumstances due to Python interpreter threading + behavior. The default is therefore to use processes instead of threads. + + ACL information cannot be sent with this function and should be set + separately with :class:`ObjectACL` methods. + + :type filename: str + :param filename: + The path to the file to upload. File-like objects are not supported. + + :type blob: `google.cloud.storage.Blob` + :param blob: + The blob to which to upload. + + :type content_type: str + :param content_type: (Optional) Type of content being uploaded. + + :type chunk_size: int + :param chunk_size: + The size in bytes of each chunk to send. The optimal chunk size for + maximum throughput may vary depending on the exact network environment + and size of the blob. The remote API has restrictions on the minimum + and maximum size allowable, see: https://cloud.google.com/storage/quotas#requests + + :type deadline: int + :param deadline: + The number of seconds to wait for all threads to resolve. If the + deadline is reached, all threads will be terminated regardless of their + progress and concurrent.futures.TimeoutError will be raised. This can be + left as the default of None (no deadline) for most use cases. + + :type worker_type: str + :param worker_type: + The worker type to use; one of google.cloud.storage.transfer_manager.PROCESS + or google.cloud.storage.transfer_manager.THREAD. + + Although the exact performance impact depends on the use case, in most + situations the PROCESS worker type will use more system resources (both + memory and CPU) and result in faster operations than THREAD workers. + + Because the subprocesses of the PROCESS worker type can't access memory + from the main process, Client objects have to be serialized and then + recreated in each subprocess. The serialization of the Client object + for use in subprocesses is an approximation and may not capture every + detail of the Client object, especially if the Client was modified after + its initial creation or if `Client._http` was modified in any way. + + THREAD worker types are observed to be relatively efficient for + operations with many small files, but not for operations with large + files. PROCESS workers are recommended for large file operations. + + :type max_workers: int + :param max_workers: + The maximum number of workers to create to handle the workload. + + With PROCESS workers, a larger number of workers will consume more + system resources (memory and CPU) at once. + + How many workers is optimal depends heavily on the specific use case, + and the default is a conservative number that should work okay in most + cases without consuming excessive resources. + + :type checksum: str + :param checksum: + (Optional) The checksum scheme to use: either 'md5', 'crc32c' or None. + Each individual part is checksummed. At present, the selected checksum + rule is only applied to parts and a separate checksum of the entire + resulting blob is not computed. Please compute and compare the checksum + of the file to the resulting blob separately if needed, using the + 'crc32c' algorithm as per the XML MPU documentation. + + :type timeout: float or tuple + :param timeout: + (Optional) The amount of time, in seconds, to wait + for the server response. See: :ref:`configuring_timeouts` + + :type retry: google.api_core.retry.Retry + :param retry: (Optional) How to retry the RPC. A None value will disable + retries. A google.api_core.retry.Retry value will enable retries, + and the object will configure backoff and timeout options. Custom + predicates (customizable error codes) are not supported for media + operations such as this one. + + This function does not accept ConditionalRetryPolicy values because + preconditions are not supported by the underlying API call. + + See the retry.py source code and docstrings in this package + (google.cloud.storage.retry) for information on retry types and how + to configure them. + + :raises: :exc:`concurrent.futures.TimeoutError` if deadline is exceeded. + """ + + bucket = blob.bucket + client = blob.client + transport = blob._get_transport(client) + + hostname = _get_host_name(client._connection) + url = "{hostname}/{bucket}/{blob}".format( + hostname=hostname, bucket=bucket.name, blob=blob.name + ) + + base_headers, object_metadata, content_type = blob._get_upload_arguments( + client, content_type, filename=filename, command="tm.upload_sharded" + ) + headers = {**base_headers, **_headers_from_metadata(object_metadata)} + + if blob.user_project is not None: + headers["x-goog-user-project"] = blob.user_project + + # When a Customer Managed Encryption Key is used to encrypt Cloud Storage object + # at rest, object resource metadata will store the version of the Key Management + # Service cryptographic material. If a Blob instance with KMS Key metadata set is + # used to upload a new version of the object then the existing kmsKeyName version + # value can't be used in the upload request and the client instead ignores it. + if blob.kms_key_name is not None and "cryptoKeyVersions" not in blob.kms_key_name: + headers["x-goog-encryption-kms-key-name"] = blob.kms_key_name + + container = XMLMPUContainer(url, filename, headers=headers) + container._retry_strategy = _api_core_retry_to_resumable_media_retry(retry) + + container.initiate(transport=transport, content_type=content_type) + upload_id = container.upload_id + + size = os.path.getsize(filename) + num_of_parts = -(size // -chunk_size) # Ceiling division + + pool_class, needs_pickling = _get_pool_class_and_requirements(worker_type) + # Pickle the blob ahead of time (just once, not once per chunk) if needed. + maybe_pickled_client = _pickle_client(client) if needs_pickling else client + + futures = [] + + with pool_class(max_workers=max_workers) as executor: + + for part_number in range(1, num_of_parts + 1): + start = (part_number - 1) * chunk_size + end = min(part_number * chunk_size, size) + + futures.append( + executor.submit( + _upload_part, + maybe_pickled_client, + url, + upload_id, + filename, + start=start, + end=end, + part_number=part_number, + checksum=checksum, + headers=headers, + retry=retry, + ) + ) + + concurrent.futures.wait( + futures, timeout=deadline, return_when=concurrent.futures.ALL_COMPLETED + ) + + try: + # Harvest results and raise exceptions. + for future in futures: + part_number, etag = future.result() + container.register_part(part_number, etag) + + container.finalize(blob._get_transport(client)) + except Exception: + container.cancel(blob._get_transport(client)) + raise + + +def _upload_part( + maybe_pickled_client, + url, + upload_id, + filename, + start, + end, + part_number, + checksum, + headers, + retry, +): + """Helper function that runs inside a thread or subprocess to upload a part. + + `maybe_pickled_client` is either a Client (for threads) or a specially + pickled Client (for processes) because the default pickling mangles Client + objects.""" + + if isinstance(maybe_pickled_client, Client): + client = maybe_pickled_client + else: + client = pickle.loads(maybe_pickled_client) + part = XMLMPUPart( + url, + upload_id, + filename, + start=start, + end=end, + part_number=part_number, + checksum=checksum, + headers=headers, + ) + part._retry_strategy = _api_core_retry_to_resumable_media_retry(retry) + part.upload(client._http) + return (part_number, part.etag) + + +def _headers_from_metadata(metadata): + """Helper function to translate object metadata into a header dictionary.""" + + headers = {} + # Handle standard writable metadata + for key, value in metadata.items(): + if key in METADATA_HEADER_TRANSLATION: + headers[METADATA_HEADER_TRANSLATION[key]] = value + # Handle custom metadata + if "metadata" in metadata: + for key, value in metadata["metadata"].items(): + headers["x-goog-meta-" + key] = value + return headers + + def _download_and_write_chunk_in_place( maybe_pickled_blob, filename, start, end, download_kwargs ): + """Helper function that runs inside a thread or subprocess. + + `maybe_pickled_blob` is either a Blob (for threads) or a specially pickled + Blob (for processes) because the default pickling mangles Client objects + which are attached to Blobs.""" + if isinstance(maybe_pickled_blob, Blob): blob = maybe_pickled_blob else: @@ -855,7 +1133,7 @@ def _download_and_write_chunk_in_place( filename, "rb+" ) as f: # Open in mixed read/write mode to avoid truncating or appending f.seek(start) - return blob.download_to_file(f, start=start, end=end, **download_kwargs) + return blob._prep_and_do_download(f, start=start, end=end, **download_kwargs) def _call_method_on_maybe_pickled_blob( @@ -863,9 +1141,9 @@ def _call_method_on_maybe_pickled_blob( ): """Helper function that runs inside a thread or subprocess. - `maybe_pickled_blob` is either a blob (for threads) or a specially pickled - blob (for processes) because the default pickling mangles clients which are - attached to blobs.""" + `maybe_pickled_blob` is either a Blob (for threads) or a specially pickled + Blob (for processes) because the default pickling mangles Client objects + which are attached to Blobs.""" if isinstance(maybe_pickled_blob, Blob): blob = maybe_pickled_blob @@ -894,8 +1172,8 @@ def _reduce_client(cl): ) -def _pickle_blob(blob): - """Pickle a Blob (and its Bucket and Client) and return a bytestring.""" +def _pickle_client(obj): + """Pickle a Client or an object that owns a Client (like a Blob)""" # We need a custom pickler to process Client objects, which are attached to # Buckets (and therefore to Blobs in turn). Unfortunately, the Python @@ -907,7 +1185,7 @@ def _pickle_blob(blob): p = pickle.Pickler(f) p.dispatch_table = copyreg.dispatch_table.copy() p.dispatch_table[Client] = _reduce_client - p.dump(blob) + p.dump(obj) return f.getvalue() diff --git a/google/cloud/storage/version.py b/google/cloud/storage/version.py index 13e710fcc..e6e357434 100644 --- a/google/cloud/storage/version.py +++ b/google/cloud/storage/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.10.0" +__version__ = "2.11.0" diff --git a/noxfile.py b/noxfile.py index 7ee4a2796..1a72c9144 100644 --- a/noxfile.py +++ b/noxfile.py @@ -44,7 +44,9 @@ def lint(session): Returns a failure if the linters find linting errors or sufficiently serious code quality issues. """ - session.install("flake8", BLACK_VERSION) + # Pin flake8 to 6.0.0 + # See https://github.com/googleapis/python-storage/issues/1102 + session.install("flake8==6.0.0", BLACK_VERSION) session.run( "black", "--check", @@ -249,9 +251,7 @@ def docfx(session): session.install("-e", ".") session.install("grpcio") - session.install( - "sphinx==4.0.1", "alabaster", "recommonmark", "gcp-sphinx-docfx-yaml" - ) + session.install("gcp-sphinx-docfx-yaml", "alabaster", "recommonmark") shutil.rmtree(os.path.join("docs", "_build"), ignore_errors=True) session.run( diff --git a/samples/snippets/requirements-test.txt b/samples/snippets/requirements-test.txt index e389934ac..2883c5abc 100644 --- a/samples/snippets/requirements-test.txt +++ b/samples/snippets/requirements-test.txt @@ -1,3 +1,3 @@ -pytest==7.3.2 -mock==5.0.2 +pytest==7.4.0 +mock==5.1.0 backoff==2.2.1 \ No newline at end of file diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 16b451910..f9b37be52 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ -google-cloud-pubsub==2.17.1 -google-cloud-storage==2.9.0 +google-cloud-pubsub==2.18.3 +google-cloud-storage==2.10.0 pandas===1.3.5; python_version == '3.7' -pandas==2.0.2; python_version >= '3.8' +pandas==2.0.3; python_version >= '3.8' diff --git a/samples/snippets/snippets_test.py b/samples/snippets/snippets_test.py index 6e5879eeb..2da7bb94c 100644 --- a/samples/snippets/snippets_test.py +++ b/samples/snippets/snippets_test.py @@ -72,10 +72,11 @@ import storage_set_bucket_default_kms_key import storage_set_client_endpoint import storage_set_metadata -import storage_transfer_manager_download_all_blobs +import storage_transfer_manager_download_bucket import storage_transfer_manager_download_chunks_concurrently +import storage_transfer_manager_download_many import storage_transfer_manager_upload_directory -import storage_transfer_manager_upload_many_blobs +import storage_transfer_manager_upload_many import storage_upload_file import storage_upload_from_memory import storage_upload_from_stream @@ -689,7 +690,7 @@ def test_transfer_manager_snippets(test_bucket, capsys): with open(os.path.join(uploads, name), "w") as f: f.write(name) - storage_transfer_manager_upload_many_blobs.upload_many_blobs_with_transfer_manager( + storage_transfer_manager_upload_many.upload_many_blobs_with_transfer_manager( test_bucket.name, BLOB_NAMES, source_directory="{}/".format(uploads), @@ -702,10 +703,24 @@ def test_transfer_manager_snippets(test_bucket, capsys): with tempfile.TemporaryDirectory() as downloads: # Download the files. - storage_transfer_manager_download_all_blobs.download_all_blobs_with_transfer_manager( + storage_transfer_manager_download_bucket.download_bucket_with_transfer_manager( test_bucket.name, destination_directory=os.path.join(downloads, ""), processes=8, + max_results=10000, + ) + out, _ = capsys.readouterr() + + for name in BLOB_NAMES: + assert "Downloaded {}".format(name) in out + + with tempfile.TemporaryDirectory() as downloads: + # Download the files. + storage_transfer_manager_download_many.download_many_blobs_with_transfer_manager( + test_bucket.name, + blob_names=BLOB_NAMES, + destination_directory=os.path.join(downloads, ""), + processes=8, ) out, _ = capsys.readouterr() diff --git a/samples/snippets/storage_transfer_manager_download_bucket.py b/samples/snippets/storage_transfer_manager_download_bucket.py new file mode 100644 index 000000000..4f21ee6e9 --- /dev/null +++ b/samples/snippets/storage_transfer_manager_download_bucket.py @@ -0,0 +1,74 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the 'License'); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# [START storage_transfer_manager_download_bucket] +def download_bucket_with_transfer_manager( + bucket_name, destination_directory="", processes=8, max_results=1000 +): + """Download all of the blobs in a bucket, concurrently in a process pool. + + The filename of each blob once downloaded is derived from the blob name and + the `destination_directory `parameter. For complete control of the filename + of each blob, use transfer_manager.download_many() instead. + + Directories will be created automatically as needed, for instance to + accommodate blob names that include slashes. + """ + + # The ID of your GCS bucket + # bucket_name = "your-bucket-name" + + # The directory on your computer to which to download all of the files. This + # string is prepended (with os.path.join()) to the name of each blob to form + # the full path. Relative paths and absolute paths are both accepted. An + # empty string means "the current working directory". Note that this + # parameter allows accepts directory traversal ("../" etc.) and is not + # intended for unsanitized end user input. + # destination_directory = "" + + # The maximum number of processes to use for the operation. The performance + # impact of this value depends on the use case, but smaller files usually + # benefit from a higher number of processes. Each additional process occupies + # some CPU and memory resources until finished. + # processes=8 + + # The maximum number of results to fetch from bucket.list_blobs(). This + # sample code fetches all of the blobs up to max_results and queues them all + # for download at once. Though they will still be executed in batches up to + # the processes limit, queueing them all at once can be taxing on system + # memory if buckets are very large. Adjust max_results as needed for your + # system environment, or set it to None if you are sure the bucket is not + # too large to hold in memory easily. + # max_results=1000 + + from google.cloud.storage import Client, transfer_manager + + storage_client = Client() + bucket = storage_client.bucket(bucket_name) + + blob_names = [blob.name for blob in bucket.list_blobs(max_results=max_results)] + + results = transfer_manager.download_many_to_path( + bucket, blob_names, destination_directory=destination_directory, max_workers=processes + ) + + for name, result in zip(blob_names, results): + # The results list is either `None` or an exception for each blob in + # the input list, in order. + + if isinstance(result, Exception): + print("Failed to download {} due to exception: {}".format(name, result)) + else: + print("Downloaded {} to {}.".format(name, destination_directory + name)) +# [END storage_transfer_manager_download_bucket] diff --git a/samples/snippets/storage_transfer_manager_download_chunks_concurrently.py b/samples/snippets/storage_transfer_manager_download_chunks_concurrently.py index 50541fb93..9ddec094e 100644 --- a/samples/snippets/storage_transfer_manager_download_chunks_concurrently.py +++ b/samples/snippets/storage_transfer_manager_download_chunks_concurrently.py @@ -12,9 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. - +# [START storage_transfer_manager_download_chunks_concurrently] def download_chunks_concurrently(bucket_name, blob_name, filename, processes=8): - """Download a single file in chunks, concurrently.""" + """Download a single file in chunks, concurrently in a process pool.""" # The ID of your GCS bucket # bucket_name = "your-bucket-name" @@ -40,3 +40,4 @@ def download_chunks_concurrently(bucket_name, blob_name, filename, processes=8): transfer_manager.download_chunks_concurrently(blob, filename, max_workers=processes) print("Downloaded {} to {}.".format(blob_name, filename)) +# [END storage_transfer_manager_download_chunks_concurrently] diff --git a/samples/snippets/storage_transfer_manager_download_all_blobs.py b/samples/snippets/storage_transfer_manager_download_many.py similarity index 75% rename from samples/snippets/storage_transfer_manager_download_all_blobs.py rename to samples/snippets/storage_transfer_manager_download_many.py index 2285f673f..500eea1ce 100644 --- a/samples/snippets/storage_transfer_manager_download_all_blobs.py +++ b/samples/snippets/storage_transfer_manager_download_many.py @@ -1,4 +1,4 @@ -# Copyright 2022 Google LLC +# Copyright 2023 Google LLC # # Licensed under the Apache License, Version 2.0 (the 'License'); # you may not use this file except in compliance with the License. @@ -12,23 +12,29 @@ # See the License for the specific language governing permissions and # limitations under the License. - -def download_all_blobs_with_transfer_manager( - bucket_name, destination_directory="", processes=8 +# [START storage_transfer_manager_download_many] +def download_many_blobs_with_transfer_manager( + bucket_name, blob_names, destination_directory="", processes=8 ): - """Download all of the blobs in a bucket, concurrently in a thread pool. + """Download blobs in a list by name, concurrently in a process pool. The filename of each blob once downloaded is derived from the blob name and the `destination_directory `parameter. For complete control of the filename of each blob, use transfer_manager.download_many() instead. - Directories will be created automatically as needed, for instance to - accommodate blob names that include slashes. + Directories will be created automatically as needed to accommodate blob + names that include slashes. """ # The ID of your GCS bucket # bucket_name = "your-bucket-name" + # The list of blob names to download. The names of each blobs will also + # be the name of each destination file (use transfer_manager.download_many() + # instead to control each destination file name). If there is a "/" in the + # blob name, then corresponding directories will be created on download. + # blob_names = ["myblob", "myblob2"] + # The directory on your computer to which to download all of the files. This # string is prepended (with os.path.join()) to the name of each blob to form # the full path. Relative paths and absolute paths are both accepted. An @@ -48,8 +54,6 @@ def download_all_blobs_with_transfer_manager( storage_client = Client() bucket = storage_client.bucket(bucket_name) - blob_names = [blob.name for blob in bucket.list_blobs()] - results = transfer_manager.download_many_to_path( bucket, blob_names, destination_directory=destination_directory, max_workers=processes ) @@ -62,3 +66,4 @@ def download_all_blobs_with_transfer_manager( print("Failed to download {} due to exception: {}".format(name, result)) else: print("Downloaded {} to {}.".format(name, destination_directory + name)) +# [END storage_transfer_manager_download_many] diff --git a/samples/snippets/storage_transfer_manager_upload_directory.py b/samples/snippets/storage_transfer_manager_upload_directory.py index e4a369969..c0dbb9c9c 100644 --- a/samples/snippets/storage_transfer_manager_upload_directory.py +++ b/samples/snippets/storage_transfer_manager_upload_directory.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. - +# [START storage_transfer_manager_upload_directory] def upload_directory_with_transfer_manager(bucket_name, source_directory, processes=8): """Upload every file in a directory, including all files in subdirectories. @@ -76,3 +76,4 @@ def upload_directory_with_transfer_manager(bucket_name, source_directory, proces print("Failed to upload {} due to exception: {}".format(name, result)) else: print("Uploaded {} to {}.".format(name, bucket.name)) +# [END storage_transfer_manager_upload_directory] diff --git a/samples/snippets/storage_transfer_manager_upload_many_blobs.py b/samples/snippets/storage_transfer_manager_upload_many.py similarity index 96% rename from samples/snippets/storage_transfer_manager_upload_many_blobs.py rename to samples/snippets/storage_transfer_manager_upload_many.py index 600134bd6..2ed647650 100644 --- a/samples/snippets/storage_transfer_manager_upload_many_blobs.py +++ b/samples/snippets/storage_transfer_manager_upload_many.py @@ -12,11 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. - +# [START storage_transfer_manager_upload_many] def upload_many_blobs_with_transfer_manager( bucket_name, filenames, source_directory="", processes=8 ): - """Upload every file in a list to a bucket, concurrently in a thread pool. + """Upload every file in a list to a bucket, concurrently in a process pool. Each blob name is derived from the filename, not including the `source_directory` parameter. For complete control of the blob name for each @@ -63,3 +63,4 @@ def upload_many_blobs_with_transfer_manager( print("Failed to upload {} due to exception: {}".format(name, result)) else: print("Uploaded {} to {}.".format(name, bucket.name)) +# [END storage_transfer_manager_upload_many] diff --git a/scripts/decrypt-secrets.sh b/scripts/decrypt-secrets.sh index 21f6d2a26..0018b421d 100755 --- a/scripts/decrypt-secrets.sh +++ b/scripts/decrypt-secrets.sh @@ -1,6 +1,6 @@ #!/bin/bash -# Copyright 2015 Google Inc. All rights reserved. +# Copyright 2023 Google LLC All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/scripts/readme-gen/readme_gen.py b/scripts/readme-gen/readme_gen.py index 91b59676b..1acc11983 100644 --- a/scripts/readme-gen/readme_gen.py +++ b/scripts/readme-gen/readme_gen.py @@ -1,6 +1,6 @@ #!/usr/bin/env python -# Copyright 2016 Google Inc +# Copyright 2023 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -33,17 +33,17 @@ autoescape=True, ) -README_TMPL = jinja_env.get_template('README.tmpl.rst') +README_TMPL = jinja_env.get_template("README.tmpl.rst") def get_help(file): - return subprocess.check_output(['python', file, '--help']).decode() + return subprocess.check_output(["python", file, "--help"]).decode() def main(): parser = argparse.ArgumentParser() - parser.add_argument('source') - parser.add_argument('--destination', default='README.rst') + parser.add_argument("source") + parser.add_argument("--destination", default="README.rst") args = parser.parse_args() @@ -51,9 +51,9 @@ def main(): root = os.path.dirname(source) destination = os.path.join(root, args.destination) - jinja_env.globals['get_help'] = get_help + jinja_env.globals["get_help"] = get_help - with io.open(source, 'r') as f: + with io.open(source, "r") as f: config = yaml.load(f) # This allows get_help to execute in the right directory. @@ -61,9 +61,9 @@ def main(): output = README_TMPL.render(config) - with io.open(destination, 'w') as f: + with io.open(destination, "w") as f: f.write(output) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/setup.cfg b/setup.cfg index c3a2b39f6..052350089 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # -# Copyright 2020 Google LLC +# Copyright 2023 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/setup.py b/setup.py index e2b5cc7a4..a57f972ff 100644 --- a/setup.py +++ b/setup.py @@ -31,7 +31,7 @@ "google-auth >= 1.25.0, < 3.0dev", "google-api-core >= 1.31.5, <3.0.0dev,!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.0", "google-cloud-core >= 2.3.0, < 3.0dev", - "google-resumable-media >= 2.3.2", + "google-resumable-media >= 2.6.0", "requests >= 2.18.0, < 3.0.0dev", ] extras = {"protobuf": ["protobuf<5.0.0dev"]} diff --git a/tests/system/_helpers.py b/tests/system/_helpers.py index 385ceaf5c..e298d7932 100644 --- a/tests/system/_helpers.py +++ b/tests/system/_helpers.py @@ -91,7 +91,11 @@ def empty_bucket(bucket): def delete_blob(blob): - errors = (exceptions.Conflict, exceptions.TooManyRequests) + errors = ( + exceptions.Conflict, + exceptions.TooManyRequests, + exceptions.ServiceUnavailable, + ) retry = RetryErrors(errors) try: retry(blob.delete)(timeout=120) # seconds @@ -105,7 +109,11 @@ def delete_blob(blob): def delete_bucket(bucket): - errors = (exceptions.Conflict, exceptions.TooManyRequests) + errors = ( + exceptions.Conflict, + exceptions.TooManyRequests, + exceptions.ServiceUnavailable, + ) retry = RetryErrors(errors, max_tries=15) retry(empty_bucket)(bucket) retry(bucket.delete)(force=True) diff --git a/tests/system/conftest.py b/tests/system/conftest.py index 26d5c785e..fe90ceb80 100644 --- a/tests/system/conftest.py +++ b/tests/system/conftest.py @@ -46,6 +46,21 @@ ebh_bucket_iteration = 0 +_key_name_format = "projects/{}/locations/{}/keyRings/{}/cryptoKeys/{}" + +keyring_name = "gcs-test" +default_key_name = "gcs-test" +alt_key_name = "gcs-test-alternate" + + +def _kms_key_name(client, bucket, key_name): + return _key_name_format.format( + client.project, + bucket.location.lower(), + keyring_name, + key_name, + ) + @pytest.fixture(scope="session") def storage_client(): @@ -218,3 +233,27 @@ def file_data(): file_data["hash"] = _base64_md5hash(file_obj) return _file_data + + +@pytest.fixture(scope="session") +def kms_bucket_name(): + return _helpers.unique_name("gcp-systest-kms") + + +@pytest.fixture(scope="session") +def kms_bucket(storage_client, kms_bucket_name, no_mtls): + bucket = _helpers.retry_429_503(storage_client.create_bucket)(kms_bucket_name) + + yield bucket + + _helpers.delete_bucket(bucket) + + +@pytest.fixture(scope="session") +def kms_key_name(storage_client, kms_bucket): + return _kms_key_name(storage_client, kms_bucket, default_key_name) + + +@pytest.fixture(scope="session") +def alt_kms_key_name(storage_client, kms_bucket): + return _kms_key_name(storage_client, kms_bucket, alt_key_name) diff --git a/tests/system/test_bucket.py b/tests/system/test_bucket.py index 6a2698e29..ac949cf96 100644 --- a/tests/system/test_bucket.py +++ b/tests/system/test_bucket.py @@ -653,10 +653,9 @@ def test_bucket_list_blobs_w_match_glob( assert [blob.name for blob in blobs] == expected_names -def test_bucket_w_retention_period( +def test_bucket_update_retention_period( storage_client, buckets_to_delete, - blobs_to_delete, ): period_secs = 3 bucket_name = _helpers.unique_name("w-retention-period") @@ -676,23 +675,6 @@ def test_bucket_w_retention_period( assert not bucket.default_event_based_hold assert not bucket.retention_policy_locked - blob_name = "test-blob" - payload = b"DEADBEEF" - blob = bucket.blob(blob_name) - blob.upload_from_string(payload) - - blobs_to_delete.append(blob) - - other = bucket.get_blob(blob_name) - _helpers.retry_has_retention_expiration(other.reload)() - - assert not other.event_based_hold - assert not other.temporary_hold - assert isinstance(other.retention_expiration_time, datetime.datetime) - - with pytest.raises(exceptions.Forbidden): - other.delete() - bucket.retention_period = None bucket.patch() @@ -705,15 +687,41 @@ def test_bucket_w_retention_period( assert not bucket.default_event_based_hold assert not bucket.retention_policy_locked - _helpers.retry_no_retention_expiration(other.reload)() - assert not other.event_based_hold - assert not other.temporary_hold - assert other.retention_expiration_time is None +def test_delete_object_bucket_w_retention_period( + storage_client, + buckets_to_delete, + blobs_to_delete, +): + # Create a bucket with retention period. + period_secs = 12 + bucket = storage_client.bucket(_helpers.unique_name("w-retention-period")) + bucket.retention_period = period_secs + bucket.default_event_based_hold = False + bucket = _helpers.retry_429_503(storage_client.create_bucket)(bucket) + buckets_to_delete.append(bucket) + + _helpers.retry_has_retention_period(bucket.reload)() + assert bucket.retention_period == period_secs + assert isinstance(bucket.retention_policy_effective_time, datetime.datetime) + + payload = b"DEADBEEF" + blob = bucket.blob(_helpers.unique_name("w-retention")) + blob.upload_from_string(payload) + blobs_to_delete.append(blob) + + _helpers.retry_has_retention_expiration(blob.reload)() + assert isinstance(blob.retention_expiration_time, datetime.datetime) + assert not blob.event_based_hold + assert not blob.temporary_hold + + # Attempts to delete objects whose age is less than the retention period should fail. + with pytest.raises(exceptions.Forbidden): + blob.delete() # Object can be deleted once it reaches the age defined in the retention policy. _helpers.await_config_changes_propagate(sec=period_secs) - other.delete() + blob.delete() blobs_to_delete.pop() @@ -1051,7 +1059,7 @@ def test_new_bucket_with_autoclass( # Autoclass can be enabled/disabled via bucket patch bucket.autoclass_enabled = False - bucket.patch() + bucket.patch(if_metageneration_match=bucket.metageneration) assert bucket.autoclass_enabled is False assert bucket.autoclass_toggle_time != previous_toggle_time diff --git a/tests/system/test_transfer_manager.py b/tests/system/test_transfer_manager.py index bc7e0d31e..fc7bc2d51 100644 --- a/tests/system/test_transfer_manager.py +++ b/tests/system/test_transfer_manager.py @@ -16,6 +16,8 @@ import tempfile import os +import pytest + from google.cloud.storage import transfer_manager from google.cloud.storage._helpers import _base64_md5hash @@ -23,6 +25,16 @@ DEADLINE = 30 +encryption_key = "b23ff11bba187db8c37077e6af3b25b8" + + +def _check_blob_hash(blob, info): + md5_hash = blob.md5_hash + if not isinstance(md5_hash, bytes): + md5_hash = md5_hash.encode("utf-8") + + assert md5_hash == info["hash"] + def test_upload_many(shared_bucket, file_data, blobs_to_delete): FILE_BLOB_PAIRS = [ @@ -171,3 +183,208 @@ def test_download_chunks_concurrently(shared_bucket, file_data): ) with open(threaded_filename, "rb") as file_obj: assert _base64_md5hash(file_obj) == source_file["hash"] + + +def test_upload_chunks_concurrently(shared_bucket, file_data, blobs_to_delete): + source_file = file_data["big"] + filename = source_file["path"] + blob_name = "mpu_file" + upload_blob = shared_bucket.blob(blob_name) + chunk_size = 5 * 1024 * 1024 # Minimum supported by XML MPU API + assert os.path.getsize(filename) > chunk_size # Won't make a good test otherwise + + blobs_to_delete.append(upload_blob) + + transfer_manager.upload_chunks_concurrently( + filename, upload_blob, chunk_size=chunk_size, deadline=DEADLINE + ) + + with tempfile.NamedTemporaryFile() as tmp: + download_blob = shared_bucket.blob(blob_name) + download_blob.download_to_file(tmp) + tmp.seek(0) + + with open(source_file["path"], "rb") as sf: + source_contents = sf.read() + temp_contents = tmp.read() + assert source_contents == temp_contents + + # Also test threaded mode + blob_name = "mpu_threaded" + upload_blob = shared_bucket.blob(blob_name) + chunk_size = 5 * 1024 * 1024 # Minimum supported by XML MPU API + assert os.path.getsize(filename) > chunk_size # Won't make a good test otherwise + + transfer_manager.upload_chunks_concurrently( + filename, + upload_blob, + chunk_size=chunk_size, + deadline=DEADLINE, + worker_type=transfer_manager.THREAD, + ) + + with tempfile.NamedTemporaryFile() as tmp: + download_blob = shared_bucket.blob(blob_name) + download_blob.download_to_file(tmp) + tmp.seek(0) + + with open(source_file["path"], "rb") as sf: + source_contents = sf.read() + temp_contents = tmp.read() + assert source_contents == temp_contents + + +def test_upload_chunks_concurrently_with_metadata( + shared_bucket, file_data, blobs_to_delete +): + import datetime + from google.cloud._helpers import UTC + + now = datetime.datetime.utcnow().replace(tzinfo=UTC) + custom_metadata = {"key_a": "value_a", "key_b": "value_b"} + + METADATA = { + "cache_control": "private", + "content_disposition": "inline", + "content_language": "en-US", + "custom_time": now, + "metadata": custom_metadata, + "storage_class": "NEARLINE", + } + + source_file = file_data["big"] + filename = source_file["path"] + blob_name = "mpu_file_with_metadata" + upload_blob = shared_bucket.blob(blob_name) + + for key, value in METADATA.items(): + setattr(upload_blob, key, value) + + chunk_size = 5 * 1024 * 1024 # Minimum supported by XML MPU API + assert os.path.getsize(filename) > chunk_size # Won't make a good test otherwise + + transfer_manager.upload_chunks_concurrently( + filename, upload_blob, chunk_size=chunk_size, deadline=DEADLINE + ) + blobs_to_delete.append(upload_blob) + + with tempfile.NamedTemporaryFile() as tmp: + download_blob = shared_bucket.get_blob(blob_name) + + for key, value in METADATA.items(): + assert getattr(download_blob, key) == value + + download_blob.download_to_file(tmp) + tmp.seek(0) + + with open(source_file["path"], "rb") as sf: + source_contents = sf.read() + temp_contents = tmp.read() + assert source_contents == temp_contents + + +def test_upload_chunks_concurrently_with_content_encoding( + shared_bucket, file_data, blobs_to_delete +): + import gzip + + METADATA = { + "content_encoding": "gzip", + } + + source_file = file_data["big"] + filename = source_file["path"] + blob_name = "mpu_file_encoded" + upload_blob = shared_bucket.blob(blob_name) + + for key, value in METADATA.items(): + setattr(upload_blob, key, value) + + chunk_size = 5 * 1024 * 1024 # Minimum supported by XML MPU API + + with tempfile.NamedTemporaryFile() as tmp_gzip: + with open(filename, "rb") as f: + compressed_bytes = gzip.compress(f.read()) + + tmp_gzip.write(compressed_bytes) + tmp_gzip.seek(0) + transfer_manager.upload_chunks_concurrently( + tmp_gzip.name, upload_blob, chunk_size=chunk_size, deadline=DEADLINE + ) + blobs_to_delete.append(upload_blob) + + with tempfile.NamedTemporaryFile() as tmp: + download_blob = shared_bucket.get_blob(blob_name) + + for key, value in METADATA.items(): + assert getattr(download_blob, key) == value + + download_blob.download_to_file(tmp) + tmp.seek(0) + + with open(source_file["path"], "rb") as sf: + source_contents = sf.read() + temp_contents = tmp.read() + assert source_contents == temp_contents + + +def test_upload_chunks_concurrently_with_encryption_key( + shared_bucket, file_data, blobs_to_delete +): + source_file = file_data["big"] + filename = source_file["path"] + blob_name = "mpu_file_encrypted" + upload_blob = shared_bucket.blob(blob_name, encryption_key=encryption_key) + + chunk_size = 5 * 1024 * 1024 # Minimum supported by XML MPU API + assert os.path.getsize(filename) > chunk_size # Won't make a good test otherwise + + transfer_manager.upload_chunks_concurrently( + filename, upload_blob, chunk_size=chunk_size, deadline=DEADLINE + ) + blobs_to_delete.append(upload_blob) + + with tempfile.NamedTemporaryFile() as tmp: + download_blob = shared_bucket.get_blob(blob_name, encryption_key=encryption_key) + + download_blob.download_to_file(tmp) + tmp.seek(0) + + with open(source_file["path"], "rb") as sf: + source_contents = sf.read() + temp_contents = tmp.read() + assert source_contents == temp_contents + + with tempfile.NamedTemporaryFile() as tmp: + keyless_blob = shared_bucket.get_blob(blob_name) + + with pytest.raises(exceptions.BadRequest): + keyless_blob.download_to_file(tmp) + + +def test_upload_chunks_concurrently_with_kms( + kms_bucket, file_data, blobs_to_delete, kms_key_name +): + source_file = file_data["big"] + filename = source_file["path"] + blob_name = "mpu_file_kms" + blob = kms_bucket.blob(blob_name, kms_key_name=kms_key_name) + + chunk_size = 5 * 1024 * 1024 # Minimum supported by XML MPU API + assert os.path.getsize(filename) > chunk_size # Won't make a good test otherwise + + transfer_manager.upload_chunks_concurrently( + filename, blob, chunk_size=chunk_size, deadline=DEADLINE + ) + blobs_to_delete.append(blob) + blob.reload() + assert blob.kms_key_name.startswith(kms_key_name) + + with tempfile.NamedTemporaryFile() as tmp: + blob.download_to_file(tmp) + tmp.seek(0) + + with open(source_file["path"], "rb") as sf: + source_contents = sf.read() + temp_contents = tmp.read() + assert source_contents == temp_contents diff --git a/tests/unit/test__http.py b/tests/unit/test__http.py index 9e7bf216b..e64ae0bab 100644 --- a/tests/unit/test__http.py +++ b/tests/unit/test__http.py @@ -18,7 +18,8 @@ import mock from google.cloud.storage import _helpers -from tests.unit.test__helpers import GCCL_INVOCATION_TEST_CONST + +GCCL_INVOCATION_TEST_CONST = "gccl-invocation-id/test-invocation-123" class TestConnection(unittest.TestCase): diff --git a/tests/unit/test_blob.py b/tests/unit/test_blob.py index 638db9f4e..a8d024176 100644 --- a/tests/unit/test_blob.py +++ b/tests/unit/test_blob.py @@ -1411,33 +1411,35 @@ def test_download_to_file_with_failure(self): blob_name = "blob-name" client = self._make_client() - client.download_blob_to_file.side_effect = NotFound("testing") bucket = _Bucket(client) blob = self._make_one(blob_name, bucket=bucket) file_obj = io.BytesIO() - with self.assertRaises(NotFound): - blob.download_to_file(file_obj) + with mock.patch.object(blob, "_prep_and_do_download"): + blob._prep_and_do_download.side_effect = NotFound("testing") - self.assertEqual(file_obj.tell(), 0) + with self.assertRaises(NotFound): + blob.download_to_file(file_obj) - expected_timeout = self._get_default_timeout() - client.download_blob_to_file.assert_called_once_with( - blob, - file_obj, - start=None, - end=None, - if_etag_match=None, - if_etag_not_match=None, - if_generation_match=None, - if_generation_not_match=None, - if_metageneration_match=None, - if_metageneration_not_match=None, - raw_download=False, - timeout=expected_timeout, - checksum="md5", - retry=DEFAULT_RETRY, - ) + self.assertEqual(file_obj.tell(), 0) + + expected_timeout = self._get_default_timeout() + blob._prep_and_do_download.assert_called_once_with( + file_obj, + client=None, + start=None, + end=None, + if_etag_match=None, + if_etag_not_match=None, + if_generation_match=None, + if_generation_not_match=None, + if_metageneration_match=None, + if_metageneration_not_match=None, + raw_download=False, + timeout=expected_timeout, + checksum="md5", + retry=DEFAULT_RETRY, + ) def test_download_to_file_wo_media_link(self): blob_name = "blob-name" @@ -1446,28 +1448,29 @@ def test_download_to_file_wo_media_link(self): blob = self._make_one(blob_name, bucket=bucket) file_obj = io.BytesIO() - blob.download_to_file(file_obj) + with mock.patch.object(blob, "_prep_and_do_download"): + blob.download_to_file(file_obj) - # Make sure the media link is still unknown. - self.assertIsNone(blob.media_link) + # Make sure the media link is still unknown. + self.assertIsNone(blob.media_link) - expected_timeout = self._get_default_timeout() - client.download_blob_to_file.assert_called_once_with( - blob, - file_obj, - start=None, - end=None, - if_etag_match=None, - if_etag_not_match=None, - if_generation_match=None, - if_generation_not_match=None, - if_metageneration_match=None, - if_metageneration_not_match=None, - raw_download=False, - timeout=expected_timeout, - checksum="md5", - retry=DEFAULT_RETRY, - ) + expected_timeout = self._get_default_timeout() + blob._prep_and_do_download.assert_called_once_with( + file_obj, + client=None, + start=None, + end=None, + if_etag_match=None, + if_etag_not_match=None, + if_generation_match=None, + if_generation_not_match=None, + if_metageneration_match=None, + if_metageneration_not_match=None, + raw_download=False, + timeout=expected_timeout, + checksum="md5", + retry=DEFAULT_RETRY, + ) def test_download_to_file_w_etag_match(self): etag = "kittens" @@ -1475,25 +1478,26 @@ def test_download_to_file_w_etag_match(self): blob = self._make_one("blob-name", bucket=_Bucket(client)) file_obj = io.BytesIO() - blob.download_to_file(file_obj, if_etag_not_match=etag) + with mock.patch.object(blob, "_prep_and_do_download"): + blob.download_to_file(file_obj, if_etag_not_match=etag) - expected_timeout = self._get_default_timeout() - client.download_blob_to_file.assert_called_once_with( - blob, - file_obj, - start=None, - end=None, - if_etag_match=None, - if_etag_not_match=etag, - if_generation_match=None, - if_generation_not_match=None, - if_metageneration_match=None, - if_metageneration_not_match=None, - raw_download=False, - timeout=expected_timeout, - checksum="md5", - retry=DEFAULT_RETRY, - ) + expected_timeout = self._get_default_timeout() + blob._prep_and_do_download.assert_called_once_with( + file_obj, + client=None, + start=None, + end=None, + if_etag_match=None, + if_etag_not_match=etag, + if_generation_match=None, + if_generation_not_match=None, + if_metageneration_match=None, + if_metageneration_not_match=None, + raw_download=False, + timeout=expected_timeout, + checksum="md5", + retry=DEFAULT_RETRY, + ) def test_download_to_file_w_generation_match(self): generation_number = 6 @@ -1501,25 +1505,26 @@ def test_download_to_file_w_generation_match(self): blob = self._make_one("blob-name", bucket=_Bucket(client)) file_obj = io.BytesIO() - blob.download_to_file(file_obj, if_generation_not_match=generation_number) + with mock.patch.object(blob, "_prep_and_do_download"): + blob.download_to_file(file_obj, if_generation_not_match=generation_number) - expected_timeout = self._get_default_timeout() - client.download_blob_to_file.assert_called_once_with( - blob, - file_obj, - start=None, - end=None, - if_etag_match=None, - if_etag_not_match=None, - if_generation_match=None, - if_generation_not_match=generation_number, - if_metageneration_match=None, - if_metageneration_not_match=None, - raw_download=False, - timeout=expected_timeout, - checksum="md5", - retry=DEFAULT_RETRY, - ) + expected_timeout = self._get_default_timeout() + blob._prep_and_do_download.assert_called_once_with( + file_obj, + client=None, + start=None, + end=None, + if_etag_match=None, + if_etag_not_match=None, + if_generation_match=None, + if_generation_not_match=generation_number, + if_metageneration_match=None, + if_metageneration_not_match=None, + raw_download=False, + timeout=expected_timeout, + checksum="md5", + retry=DEFAULT_RETRY, + ) def _download_to_file_helper( self, use_chunks, raw_download, timeout=None, **extra_kwargs @@ -1544,28 +1549,30 @@ def _download_to_file_helper( extra_kwargs.update(timeout_kwarg) file_obj = io.BytesIO() - if raw_download: - blob.download_to_file(file_obj, raw_download=True, **extra_kwargs) - else: - blob.download_to_file(file_obj, **extra_kwargs) - expected_retry = extra_kwargs.get("retry", DEFAULT_RETRY) - client.download_blob_to_file.assert_called_once_with( - blob, - file_obj, - start=None, - end=None, - if_etag_match=None, - if_etag_not_match=None, - if_generation_match=None, - if_generation_not_match=None, - if_metageneration_match=None, - if_metageneration_not_match=None, - raw_download=raw_download, - timeout=expected_timeout, - checksum="md5", - retry=expected_retry, - ) + with mock.patch.object(blob, "_prep_and_do_download"): + if raw_download: + blob.download_to_file(file_obj, raw_download=True, **extra_kwargs) + else: + blob.download_to_file(file_obj, **extra_kwargs) + + expected_retry = extra_kwargs.get("retry", DEFAULT_RETRY) + blob._prep_and_do_download.assert_called_once_with( + file_obj, + client=None, + start=None, + end=None, + if_etag_match=None, + if_etag_not_match=None, + if_generation_match=None, + if_generation_not_match=None, + if_metageneration_match=None, + if_metageneration_not_match=None, + raw_download=raw_download, + timeout=expected_timeout, + checksum="md5", + retry=expected_retry, + ) def test_download_to_file_wo_chunks_wo_raw(self): self._download_to_file_helper(use_chunks=False, raw_download=False) @@ -1602,48 +1609,51 @@ def _download_to_filename_helper( blob = self._make_one(blob_name, bucket=bucket, properties=properties) - with _NamedTemporaryFile() as temp: - if timeout is None: - blob.download_to_filename( - temp.name, raw_download=raw_download, **extra_kwargs - ) - else: - blob.download_to_filename( - temp.name, - raw_download=raw_download, - timeout=timeout, - **extra_kwargs, - ) - - if updated is None: - self.assertIsNone(blob.updated) - else: - mtime = os.path.getmtime(temp.name) - updated_time = blob.updated.timestamp() - self.assertEqual(mtime, updated_time) - - expected_timeout = self._get_default_timeout() if timeout is None else timeout + with mock.patch.object(blob, "_prep_and_do_download"): + with _NamedTemporaryFile() as temp: + if timeout is None: + blob.download_to_filename( + temp.name, raw_download=raw_download, **extra_kwargs + ) + else: + blob.download_to_filename( + temp.name, + raw_download=raw_download, + timeout=timeout, + **extra_kwargs, + ) + + if updated is None: + self.assertIsNone(blob.updated) + else: + mtime = os.path.getmtime(temp.name) + updated_time = blob.updated.timestamp() + self.assertEqual(mtime, updated_time) + + expected_timeout = ( + self._get_default_timeout() if timeout is None else timeout + ) - expected_retry = extra_kwargs.get("retry", DEFAULT_RETRY) + expected_retry = extra_kwargs.get("retry", DEFAULT_RETRY) - client.download_blob_to_file.assert_called_once_with( - blob, - mock.ANY, - start=None, - end=None, - if_etag_match=None, - if_etag_not_match=None, - if_generation_match=None, - if_generation_not_match=None, - if_metageneration_match=None, - if_metageneration_not_match=None, - raw_download=raw_download, - timeout=expected_timeout, - checksum="md5", - retry=expected_retry, - ) - stream = client.download_blob_to_file.mock_calls[0].args[1] - self.assertEqual(stream.name, temp.name) + blob._prep_and_do_download.assert_called_once_with( + mock.ANY, + client=None, + start=None, + end=None, + raw_download=raw_download, + if_etag_match=None, + if_etag_not_match=None, + if_generation_match=None, + if_generation_not_match=None, + if_metageneration_match=None, + if_metageneration_not_match=None, + timeout=expected_timeout, + checksum="md5", + retry=expected_retry, + ) + stream = blob._prep_and_do_download.mock_calls[0].args[0] + self.assertEqual(stream.name, temp.name) def test_download_to_filename_w_updated_wo_raw(self): updated = "2014-12-06T13:13:50.690Z" @@ -1677,28 +1687,29 @@ def test_download_to_filename_w_etag_match(self): client = self._make_client() blob = self._make_one("blob-name", bucket=_Bucket(client)) - with _NamedTemporaryFile() as temp: - blob.download_to_filename(temp.name, if_etag_match=etag) + with mock.patch.object(blob, "_prep_and_do_download"): + with _NamedTemporaryFile() as temp: + blob.download_to_filename(temp.name, if_etag_match=etag) - expected_timeout = self._get_default_timeout() - client.download_blob_to_file.assert_called_once_with( - blob, - mock.ANY, - start=None, - end=None, - if_etag_match=etag, - if_etag_not_match=None, - if_generation_match=None, - if_generation_not_match=None, - if_metageneration_match=None, - if_metageneration_not_match=None, - raw_download=False, - timeout=expected_timeout, - checksum="md5", - retry=DEFAULT_RETRY, - ) - stream = client.download_blob_to_file.mock_calls[0].args[1] - self.assertEqual(stream.name, temp.name) + expected_timeout = self._get_default_timeout() + blob._prep_and_do_download.assert_called_once_with( + mock.ANY, + client=None, + start=None, + end=None, + if_etag_match=etag, + if_etag_not_match=None, + if_generation_match=None, + if_generation_not_match=None, + if_metageneration_match=None, + if_metageneration_not_match=None, + raw_download=False, + timeout=expected_timeout, + checksum="md5", + retry=DEFAULT_RETRY, + ) + stream = blob._prep_and_do_download.mock_calls[0].args[0] + self.assertEqual(stream.name, temp.name) def test_download_to_filename_w_generation_match(self): from google.cloud._testing import _NamedTemporaryFile @@ -1707,28 +1718,31 @@ def test_download_to_filename_w_generation_match(self): client = self._make_client() blob = self._make_one("blob-name", bucket=_Bucket(client)) - with _NamedTemporaryFile() as temp: - blob.download_to_filename(temp.name, if_generation_match=generation_number) + with mock.patch.object(blob, "_prep_and_do_download"): + with _NamedTemporaryFile() as temp: + blob.download_to_filename( + temp.name, if_generation_match=generation_number + ) - expected_timeout = self._get_default_timeout() - client.download_blob_to_file.assert_called_once_with( - blob, - mock.ANY, - start=None, - end=None, - if_etag_match=None, - if_etag_not_match=None, - if_generation_match=generation_number, - if_generation_not_match=None, - if_metageneration_match=None, - if_metageneration_not_match=None, - raw_download=False, - timeout=expected_timeout, - checksum="md5", - retry=DEFAULT_RETRY, - ) - stream = client.download_blob_to_file.mock_calls[0].args[1] - self.assertEqual(stream.name, temp.name) + expected_timeout = self._get_default_timeout() + blob._prep_and_do_download.assert_called_once_with( + mock.ANY, + client=None, + start=None, + end=None, + if_etag_match=None, + if_etag_not_match=None, + if_generation_match=generation_number, + if_generation_not_match=None, + if_metageneration_match=None, + if_metageneration_not_match=None, + raw_download=False, + timeout=expected_timeout, + checksum="md5", + retry=DEFAULT_RETRY, + ) + stream = blob._prep_and_do_download.mock_calls[0].args[0] + self.assertEqual(stream.name, temp.name) def test_download_to_filename_corrupted(self): from google.resumable_media import DataCorruption @@ -1737,40 +1751,42 @@ def test_download_to_filename_corrupted(self): client = self._make_client() bucket = _Bucket(client) blob = self._make_one(blob_name, bucket=bucket) - client.download_blob_to_file.side_effect = DataCorruption("testing") - # Try to download into a temporary file (don't use - # `_NamedTemporaryFile` it will try to remove after the file is - # already removed) - filehandle, filename = tempfile.mkstemp() - os.close(filehandle) - self.assertTrue(os.path.exists(filename)) + with mock.patch.object(blob, "_prep_and_do_download"): + blob._prep_and_do_download.side_effect = DataCorruption("testing") - with self.assertRaises(DataCorruption): - blob.download_to_filename(filename) + # Try to download into a temporary file (don't use + # `_NamedTemporaryFile` it will try to remove after the file is + # already removed) + filehandle, filename = tempfile.mkstemp() + os.close(filehandle) + self.assertTrue(os.path.exists(filename)) - # Make sure the file was cleaned up. - self.assertFalse(os.path.exists(filename)) + with self.assertRaises(DataCorruption): + blob.download_to_filename(filename) - expected_timeout = self._get_default_timeout() - client.download_blob_to_file.assert_called_once_with( - blob, - mock.ANY, - start=None, - end=None, - if_etag_match=None, - if_etag_not_match=None, - if_generation_match=None, - if_generation_not_match=None, - if_metageneration_match=None, - if_metageneration_not_match=None, - raw_download=False, - timeout=expected_timeout, - checksum="md5", - retry=DEFAULT_RETRY, - ) - stream = client.download_blob_to_file.mock_calls[0].args[1] - self.assertEqual(stream.name, filename) + # Make sure the file was cleaned up. + self.assertFalse(os.path.exists(filename)) + + expected_timeout = self._get_default_timeout() + blob._prep_and_do_download.assert_called_once_with( + mock.ANY, + client=None, + start=None, + end=None, + if_etag_match=None, + if_etag_not_match=None, + if_generation_match=None, + if_generation_not_match=None, + if_metageneration_match=None, + if_metageneration_not_match=None, + raw_download=False, + timeout=expected_timeout, + checksum="md5", + retry=DEFAULT_RETRY, + ) + stream = blob._prep_and_do_download.mock_calls[0].args[0] + self.assertEqual(stream.name, filename) def _download_as_bytes_helper(self, raw_download, timeout=None, **extra_kwargs): blob_name = "blob-name" @@ -1778,36 +1794,39 @@ def _download_as_bytes_helper(self, raw_download, timeout=None, **extra_kwargs): bucket = _Bucket(client) blob = self._make_one(blob_name, bucket=bucket) - if timeout is None: - expected_timeout = self._get_default_timeout() - fetched = blob.download_as_bytes(raw_download=raw_download, **extra_kwargs) - else: - expected_timeout = timeout - fetched = blob.download_as_bytes( - raw_download=raw_download, timeout=timeout, **extra_kwargs - ) - self.assertEqual(fetched, b"") + with mock.patch.object(blob, "_prep_and_do_download"): + if timeout is None: + expected_timeout = self._get_default_timeout() + fetched = blob.download_as_bytes( + raw_download=raw_download, **extra_kwargs + ) + else: + expected_timeout = timeout + fetched = blob.download_as_bytes( + raw_download=raw_download, timeout=timeout, **extra_kwargs + ) + self.assertEqual(fetched, b"") - expected_retry = extra_kwargs.get("retry", DEFAULT_RETRY) + expected_retry = extra_kwargs.get("retry", DEFAULT_RETRY) - client.download_blob_to_file.assert_called_once_with( - blob, - mock.ANY, - start=None, - end=None, - if_etag_match=None, - if_etag_not_match=None, - if_generation_match=None, - if_generation_not_match=None, - if_metageneration_match=None, - if_metageneration_not_match=None, - raw_download=raw_download, - timeout=expected_timeout, - checksum="md5", - retry=expected_retry, - ) - stream = client.download_blob_to_file.mock_calls[0].args[1] - self.assertIsInstance(stream, io.BytesIO) + blob._prep_and_do_download.assert_called_once_with( + mock.ANY, + client=None, + start=None, + end=None, + raw_download=raw_download, + if_etag_match=None, + if_etag_not_match=None, + if_generation_match=None, + if_generation_not_match=None, + if_metageneration_match=None, + if_metageneration_not_match=None, + timeout=expected_timeout, + checksum="md5", + retry=expected_retry, + ) + stream = blob._prep_and_do_download.mock_calls[0].args[0] + self.assertIsInstance(stream, io.BytesIO) def test_download_as_bytes_w_custom_timeout(self): self._download_as_bytes_helper(raw_download=False, timeout=9.58) @@ -1820,14 +1839,14 @@ def test_download_as_bytes_w_etag_match(self): blob = self._make_one( "blob-name", bucket=_Bucket(client), properties={"mediaLink": MEDIA_LINK} ) - client.download_blob_to_file = mock.Mock() + blob._prep_and_do_download = mock.Mock() fetched = blob.download_as_bytes(if_etag_match=ETAG) self.assertEqual(fetched, b"") - client.download_blob_to_file.assert_called_once_with( - blob, + blob._prep_and_do_download.assert_called_once_with( mock.ANY, + client=None, start=None, end=None, raw_download=False, @@ -1850,14 +1869,14 @@ def test_download_as_bytes_w_generation_match(self): blob = self._make_one( "blob-name", bucket=_Bucket(client), properties={"mediaLink": MEDIA_LINK} ) - client.download_blob_to_file = mock.Mock() + blob._prep_and_do_download = mock.Mock() fetched = blob.download_as_bytes(if_generation_match=GENERATION_NUMBER) self.assertEqual(fetched, b"") - client.download_blob_to_file.assert_called_once_with( - blob, + blob._prep_and_do_download.assert_called_once_with( mock.ANY, + client=None, start=None, end=None, raw_download=False, @@ -2087,14 +2106,14 @@ def test_download_as_string(self, mock_warn): blob = self._make_one( "blob-name", bucket=_Bucket(client), properties={"mediaLink": MEDIA_LINK} ) - client.download_blob_to_file = mock.Mock() + blob._prep_and_do_download = mock.Mock() fetched = blob.download_as_string() self.assertEqual(fetched, b"") - client.download_blob_to_file.assert_called_once_with( - blob, + blob._prep_and_do_download.assert_called_once_with( mock.ANY, + client=None, start=None, end=None, raw_download=False, @@ -2125,14 +2144,14 @@ def test_download_as_string_no_retry(self, mock_warn): blob = self._make_one( "blob-name", bucket=_Bucket(client), properties={"mediaLink": MEDIA_LINK} ) - client.download_blob_to_file = mock.Mock() + blob._prep_and_do_download = mock.Mock() fetched = blob.download_as_string(retry=None) self.assertEqual(fetched, b"") - client.download_blob_to_file.assert_called_once_with( - blob, + blob._prep_and_do_download.assert_called_once_with( mock.ANY, + client=None, start=None, end=None, raw_download=False, @@ -2232,11 +2251,12 @@ def test__get_upload_arguments(self): blob = self._make_one(name, bucket=None, encryption_key=key) blob.content_disposition = "inline" + COMMAND = "tm.upload_many" content_type = "image/jpeg" with patch.object( _helpers, "_get_invocation_id", return_value=GCCL_INVOCATION_TEST_CONST ): - info = blob._get_upload_arguments(client, content_type) + info = blob._get_upload_arguments(client, content_type, command=COMMAND) headers, object_metadata, new_content_type = info header_key_value = "W3BYd0AscEBAQWZCZnJSM3gtMmIyU0NIUiwuP1l3Uk8=" @@ -2245,11 +2265,17 @@ def test__get_upload_arguments(self): _helpers, "_get_invocation_id", return_value=GCCL_INVOCATION_TEST_CONST ): expected_headers = { - **_get_default_headers(client._connection.user_agent, content_type), + **_get_default_headers( + client._connection.user_agent, content_type, command=COMMAND + ), "X-Goog-Encryption-Algorithm": "AES256", "X-Goog-Encryption-Key": header_key_value, "X-Goog-Encryption-Key-Sha256": header_key_hash_value, } + self.assertEqual( + headers["X-Goog-API-Client"], + f"{client._connection.user_agent} {GCCL_INVOCATION_TEST_CONST} gccl-gcs-cmd/{COMMAND}", + ) self.assertEqual(headers, expected_headers) expected_metadata = { "contentDisposition": blob.content_disposition, @@ -3165,6 +3191,7 @@ def _do_upload_helper( timeout=expected_timeout, checksum=None, retry=retry, + command=None, ) blob._do_resumable_upload.assert_not_called() else: @@ -3183,6 +3210,7 @@ def _do_upload_helper( timeout=expected_timeout, checksum=None, retry=retry, + command=None, ) def test__do_upload_uses_multipart(self): @@ -3275,6 +3303,7 @@ def _upload_from_file_helper(self, side_effect=None, **kwargs): timeout=expected_timeout, checksum=None, retry=retry, + command=None, ) return stream @@ -3366,7 +3395,13 @@ def _do_upload_mock_call_helper( if not retry: retry = DEFAULT_RETRY_IF_GENERATION_SPECIFIED if not num_retries else None self.assertEqual( - kwargs, {"timeout": expected_timeout, "checksum": None, "retry": retry} + kwargs, + { + "timeout": expected_timeout, + "checksum": None, + "retry": retry, + "command": None, + }, ) return pos_args[1] diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 31f7e3988..277610696 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -1639,9 +1639,16 @@ def test_create_bucket_w_name_only(self): _target_object=bucket, ) + @staticmethod + def _make_blob(*args, **kw): + from google.cloud.storage.blob import Blob + + blob = Blob(*args, **kw) + + return blob + def test_download_blob_to_file_with_failure(self): from google.resumable_media import InvalidResponse - from google.cloud.storage.blob import Blob from google.cloud.storage.constants import _DEFAULT_TIMEOUT project = "PROJECT" @@ -1652,7 +1659,7 @@ def test_download_blob_to_file_with_failure(self): grmp_response = InvalidResponse(raw_response) credentials = _make_credentials(project=project) client = self._make_one(credentials=credentials) - blob = mock.create_autospec(Blob) + blob = self._make_blob(name="blob_name", bucket=None) blob._encryption_key = None blob._get_download_url = mock.Mock() blob._do_download = mock.Mock() @@ -1689,7 +1696,7 @@ def test_download_blob_to_file_with_uri(self): project = "PROJECT" credentials = _make_credentials(project=project) client = self._make_one(project=project, credentials=credentials) - blob = mock.Mock() + blob = self._make_blob(name="blob_name", bucket=None) file_obj = io.BytesIO() blob._encryption_key = None blob._get_download_url = mock.Mock() @@ -1787,13 +1794,12 @@ def test_download_blob_to_file_w_conditional_retry_fail(self): def _download_blob_to_file_helper( self, use_chunks, raw_download, expect_condition_fail=False, **extra_kwargs ): - from google.cloud.storage.blob import Blob from google.cloud.storage.constants import _DEFAULT_TIMEOUT project = "PROJECT" credentials = _make_credentials(project=project) client = self._make_one(credentials=credentials) - blob = mock.create_autospec(Blob) + blob = self._make_blob(name="blob_name", bucket=None) blob._encryption_key = None blob._get_download_url = mock.Mock() if use_chunks: @@ -1863,14 +1869,13 @@ def test_download_blob_to_file_w_chunks_w_raw(self): self._download_blob_to_file_helper(use_chunks=True, raw_download=True) def test_download_blob_have_different_uuid(self): - from google.cloud.storage.blob import Blob - project = "PROJECT" credentials = _make_credentials(project=project) client = self._make_one(credentials=credentials) - blob = mock.create_autospec(Blob) + blob = self._make_blob(name="blob_name", bucket=None) blob._encryption_key = None blob._do_download = mock.Mock() + blob._get_download_url = mock.Mock() file_obj = io.BytesIO() client.download_blob_to_file(blob, file_obj) client.download_blob_to_file(blob, file_obj) diff --git a/tests/unit/test_transfer_manager.py b/tests/unit/test_transfer_manager.py index bdfd236b5..1f6d5b0dc 100644 --- a/tests/unit/test_transfer_manager.py +++ b/tests/unit/test_transfer_manager.py @@ -18,6 +18,7 @@ from google.cloud.storage import transfer_manager from google.cloud.storage import Blob +from google.cloud.storage import Client from google.api_core import exceptions @@ -33,6 +34,17 @@ FAKE_ENCODING = "fake_gzip" DOWNLOAD_KWARGS = {"accept-encoding": FAKE_ENCODING} CHUNK_SIZE = 8 +HOSTNAME = "https://example.com" +URL = "https://example.com/bucket/blob" +USER_AGENT = "agent" +EXPECTED_UPLOAD_KWARGS = { + "command": "tm.upload_many", + **UPLOAD_KWARGS, +} +EXPECTED_DOWNLOAD_KWARGS = { + "command": "tm.download_many", + **DOWNLOAD_KWARGS, +} # Used in subprocesses only, so excluded from coverage @@ -40,9 +52,9 @@ def _validate_blob_token_in_subprocess( maybe_pickled_blob, method_name, path_or_file, **kwargs ): # pragma: NO COVER assert pickle.loads(maybe_pickled_blob) == BLOB_TOKEN_STRING - assert method_name.endswith("filename") + assert "filename" in method_name assert path_or_file.startswith("file") - assert kwargs == UPLOAD_KWARGS or kwargs == DOWNLOAD_KWARGS + assert kwargs == EXPECTED_UPLOAD_KWARGS or kwargs == EXPECTED_DOWNLOAD_KWARGS return FAKE_RESULT @@ -51,10 +63,11 @@ def test_upload_many_with_filenames(): ("file_a.txt", mock.Mock(spec=Blob)), ("file_b.txt", mock.Mock(spec=Blob)), ] - EXPECTED_UPLOAD_KWARGS = {"if_generation_match": 0, **UPLOAD_KWARGS} + expected_upload_kwargs = EXPECTED_UPLOAD_KWARGS.copy() + expected_upload_kwargs["if_generation_match"] = 0 for _, blob_mock in FILE_BLOB_PAIRS: - blob_mock.upload_from_filename.return_value = FAKE_RESULT + blob_mock._handle_filename_and_upload.return_value = FAKE_RESULT results = transfer_manager.upload_many( FILE_BLOB_PAIRS, @@ -63,8 +76,8 @@ def test_upload_many_with_filenames(): worker_type=transfer_manager.THREAD, ) for (filename, mock_blob) in FILE_BLOB_PAIRS: - mock_blob.upload_from_filename.assert_any_call( - filename, **EXPECTED_UPLOAD_KWARGS + mock_blob._handle_filename_and_upload.assert_any_call( + filename, **expected_upload_kwargs ) for result in results: assert result == FAKE_RESULT @@ -75,10 +88,11 @@ def test_upload_many_with_file_objs(): (tempfile.TemporaryFile(), mock.Mock(spec=Blob)), (tempfile.TemporaryFile(), mock.Mock(spec=Blob)), ] - EXPECTED_UPLOAD_KWARGS = {"if_generation_match": 0, **UPLOAD_KWARGS} + expected_upload_kwargs = EXPECTED_UPLOAD_KWARGS.copy() + expected_upload_kwargs["if_generation_match"] = 0 for _, blob_mock in FILE_BLOB_PAIRS: - blob_mock.upload_from_file.return_value = FAKE_RESULT + blob_mock._prep_and_do_upload.return_value = FAKE_RESULT results = transfer_manager.upload_many( FILE_BLOB_PAIRS, @@ -87,7 +101,7 @@ def test_upload_many_with_file_objs(): worker_type=transfer_manager.THREAD, ) for (file, mock_blob) in FILE_BLOB_PAIRS: - mock_blob.upload_from_file.assert_any_call(file, **EXPECTED_UPLOAD_KWARGS) + mock_blob._prep_and_do_upload.assert_any_call(file, **expected_upload_kwargs) for result in results: assert result == FAKE_RESULT @@ -153,7 +167,7 @@ def test_upload_many_suppresses_exceptions(): ("file_b.txt", mock.Mock(spec=Blob)), ] for _, mock_blob in FILE_BLOB_PAIRS: - mock_blob.upload_from_filename.side_effect = ConnectionError() + mock_blob._handle_filename_and_upload.side_effect = ConnectionError() results = transfer_manager.upload_many( FILE_BLOB_PAIRS, worker_type=transfer_manager.THREAD @@ -168,7 +182,7 @@ def test_upload_many_raises_exceptions(): ("file_b.txt", mock.Mock(spec=Blob)), ] for _, mock_blob in FILE_BLOB_PAIRS: - mock_blob.upload_from_filename.side_effect = ConnectionError() + mock_blob._handle_filename_and_upload.side_effect = ConnectionError() with pytest.raises(ConnectionError): transfer_manager.upload_many( @@ -182,8 +196,8 @@ def test_upload_many_suppresses_412_with_skip_if_exists(): ("file_b.txt", mock.Mock(spec=Blob)), ] for _, mock_blob in FILE_BLOB_PAIRS: - mock_blob.upload_from_filename.side_effect = exceptions.PreconditionFailed( - "412" + mock_blob._handle_filename_and_upload.side_effect = ( + exceptions.PreconditionFailed("412") ) results = transfer_manager.upload_many( FILE_BLOB_PAIRS, @@ -192,7 +206,7 @@ def test_upload_many_suppresses_412_with_skip_if_exists(): worker_type=transfer_manager.THREAD, ) for result in results: - assert type(result) == exceptions.PreconditionFailed + assert isinstance(result, exceptions.PreconditionFailed) def test_upload_many_with_processes(): @@ -242,7 +256,7 @@ def test_download_many_with_filenames(): ] for blob_mock, _ in BLOB_FILE_PAIRS: - blob_mock.download_to_filename.return_value = FAKE_RESULT + blob_mock._handle_filename_and_download.return_value = FAKE_RESULT results = transfer_manager.download_many( BLOB_FILE_PAIRS, @@ -250,7 +264,9 @@ def test_download_many_with_filenames(): worker_type=transfer_manager.THREAD, ) for (mock_blob, file) in BLOB_FILE_PAIRS: - mock_blob.download_to_filename.assert_any_call(file, **DOWNLOAD_KWARGS) + mock_blob._handle_filename_and_download.assert_any_call( + file, **EXPECTED_DOWNLOAD_KWARGS + ) for result in results: assert result == FAKE_RESULT @@ -262,7 +278,7 @@ def test_download_many_with_file_objs(): ] for blob_mock, _ in BLOB_FILE_PAIRS: - blob_mock.download_to_file.return_value = FAKE_RESULT + blob_mock._prep_and_do_download.return_value = FAKE_RESULT results = transfer_manager.download_many( BLOB_FILE_PAIRS, @@ -270,7 +286,7 @@ def test_download_many_with_file_objs(): worker_type=transfer_manager.THREAD, ) for (mock_blob, file) in BLOB_FILE_PAIRS: - mock_blob.download_to_file.assert_any_call(file, **DOWNLOAD_KWARGS) + mock_blob._prep_and_do_download.assert_any_call(file, **DOWNLOAD_KWARGS) for result in results: assert result == FAKE_RESULT @@ -301,7 +317,7 @@ def test_download_many_suppresses_exceptions(): (mock.Mock(spec=Blob), "file_b.txt"), ] for mock_blob, _ in BLOB_FILE_PAIRS: - mock_blob.download_to_filename.side_effect = ConnectionError() + mock_blob._handle_filename_and_download.side_effect = ConnectionError() results = transfer_manager.download_many( BLOB_FILE_PAIRS, worker_type=transfer_manager.THREAD @@ -316,7 +332,7 @@ def test_download_many_raises_exceptions(): (mock.Mock(spec=Blob), "file_b.txt"), ] for mock_blob, _ in BLOB_FILE_PAIRS: - mock_blob.download_to_filename.side_effect = ConnectionError() + mock_blob._handle_filename_and_download.side_effect = ConnectionError() with pytest.raises(ConnectionError): transfer_manager.download_many( @@ -527,9 +543,12 @@ def test_download_chunks_concurrently(): MULTIPLE = 4 blob_mock.size = CHUNK_SIZE * MULTIPLE - blob_mock.download_to_filename.return_value = FAKE_RESULT + expected_download_kwargs = EXPECTED_DOWNLOAD_KWARGS.copy() + expected_download_kwargs["command"] = "tm.download_sharded" + + blob_mock._handle_filename_and_download.return_value = FAKE_RESULT - with mock.patch("__main__.open", mock.mock_open()): + with mock.patch("google.cloud.storage.transfer_manager.open", mock.mock_open()): result = transfer_manager.download_chunks_concurrently( blob_mock, FILENAME, @@ -538,13 +557,13 @@ def test_download_chunks_concurrently(): worker_type=transfer_manager.THREAD, ) for x in range(MULTIPLE): - blob_mock.download_to_file.assert_any_call( + blob_mock._prep_and_do_download.assert_any_call( mock.ANY, - **DOWNLOAD_KWARGS, + **expected_download_kwargs, start=x * CHUNK_SIZE, - end=((x + 1) * CHUNK_SIZE) - 1 + end=((x + 1) * CHUNK_SIZE) - 1, ) - assert blob_mock.download_to_file.call_count == 4 + assert blob_mock._prep_and_do_download.call_count == 4 assert result is None @@ -554,7 +573,7 @@ def test_download_chunks_concurrently_raises_on_start_and_end(): MULTIPLE = 4 blob_mock.size = CHUNK_SIZE * MULTIPLE - with mock.patch("__main__.open", mock.mock_open()): + with mock.patch("google.cloud.storage.transfer_manager.open", mock.mock_open()): with pytest.raises(ValueError): transfer_manager.download_chunks_concurrently( blob_mock, @@ -587,7 +606,9 @@ def test_download_chunks_concurrently_passes_concurrency_options(): with mock.patch("concurrent.futures.ThreadPoolExecutor") as pool_patch, mock.patch( "concurrent.futures.wait" - ) as wait_patch, mock.patch("__main__.open", mock.mock_open()): + ) as wait_patch, mock.patch( + "google.cloud.storage.transfer_manager.open", mock.mock_open() + ): transfer_manager.download_chunks_concurrently( blob_mock, FILENAME, @@ -600,6 +621,190 @@ def test_download_chunks_concurrently_passes_concurrency_options(): wait_patch.assert_called_with(mock.ANY, timeout=DEADLINE, return_when=mock.ANY) +def test_upload_chunks_concurrently(): + bucket = mock.Mock() + bucket.name = "bucket" + bucket.client = _PickleableMockClient(identify_as_client=True) + transport = bucket.client._http + bucket.user_project = None + + blob = Blob("blob", bucket) + blob.content_type = FAKE_CONTENT_TYPE + + FILENAME = "file_a.txt" + SIZE = 2048 + + container_mock = mock.Mock() + container_mock.upload_id = "abcd" + part_mock = mock.Mock() + ETAG = "efgh" + part_mock.etag = ETAG + + with mock.patch("os.path.getsize", return_value=SIZE), mock.patch( + "google.cloud.storage.transfer_manager.XMLMPUContainer", + return_value=container_mock, + ), mock.patch( + "google.cloud.storage.transfer_manager.XMLMPUPart", return_value=part_mock + ): + transfer_manager.upload_chunks_concurrently( + FILENAME, + blob, + chunk_size=SIZE // 2, + worker_type=transfer_manager.THREAD, + ) + container_mock.initiate.assert_called_once_with( + transport=transport, content_type=blob.content_type + ) + container_mock.register_part.assert_any_call(1, ETAG) + container_mock.register_part.assert_any_call(2, ETAG) + container_mock.finalize.assert_called_once_with(bucket.client._http) + + assert container_mock._retry_strategy.max_sleep == 60.0 + assert container_mock._retry_strategy.max_cumulative_retry == 120.0 + assert container_mock._retry_strategy.max_retries is None + + part_mock.upload.assert_called_with(transport) + + +def test_upload_chunks_concurrently_passes_concurrency_options(): + bucket = mock.Mock() + bucket.name = "bucket" + bucket.client = _PickleableMockClient(identify_as_client=True) + transport = bucket.client._http + bucket.user_project = None + + blob = Blob("blob", bucket) + + FILENAME = "file_a.txt" + SIZE = 2048 + + container_mock = mock.Mock() + container_mock.upload_id = "abcd" + + MAX_WORKERS = 7 + DEADLINE = 10 + + with mock.patch("os.path.getsize", return_value=SIZE), mock.patch( + "google.cloud.storage.transfer_manager.XMLMPUContainer", + return_value=container_mock, + ), mock.patch("concurrent.futures.ThreadPoolExecutor") as pool_patch, mock.patch( + "concurrent.futures.wait" + ) as wait_patch: + try: + transfer_manager.upload_chunks_concurrently( + FILENAME, + blob, + chunk_size=SIZE // 2, + worker_type=transfer_manager.THREAD, + max_workers=MAX_WORKERS, + deadline=DEADLINE, + retry=None, + ) + except ValueError: + pass # The futures don't actually work, so we expect this to abort. + # Conveniently, that gives us a chance to test the auto-delete + # exception handling feature. + container_mock.cancel.assert_called_once_with(transport) + assert container_mock._retry_strategy.max_retries == 0 + + pool_patch.assert_called_with(max_workers=MAX_WORKERS) + wait_patch.assert_called_with(mock.ANY, timeout=DEADLINE, return_when=mock.ANY) + + +def test_upload_chunks_concurrently_with_metadata_and_encryption(): + import datetime + from google.cloud._helpers import UTC + from google.cloud._helpers import _RFC3339_MICROS + + now = datetime.datetime.utcnow().replace(tzinfo=UTC) + now_str = now.strftime(_RFC3339_MICROS) + + custom_metadata = {"key_a": "value_a", "key_b": "value_b"} + encryption_key = "b23ff11bba187db8c37077e6af3b25b8" + kms_key_name = "sample_key_name" + + METADATA = { + "cache_control": "private", + "content_disposition": "inline", + "content_language": "en-US", + "custom_time": now, + "metadata": custom_metadata, + "storage_class": "NEARLINE", + } + + bucket = mock.Mock() + bucket.name = "bucket" + bucket.client = _PickleableMockClient(identify_as_client=True) + transport = bucket.client._http + user_project = "my_project" + bucket.user_project = user_project + + blob = Blob("blob", bucket, kms_key_name=kms_key_name) + blob.content_type = FAKE_CONTENT_TYPE + + for key, value in METADATA.items(): + setattr(blob, key, value) + blob.metadata = {**custom_metadata} + blob.encryption_key = encryption_key + + FILENAME = "file_a.txt" + SIZE = 2048 + + container_mock = mock.Mock() + container_mock.upload_id = "abcd" + part_mock = mock.Mock() + ETAG = "efgh" + part_mock.etag = ETAG + container_cls_mock = mock.Mock(return_value=container_mock) + + invocation_id = "b9f8cbb0-6456-420c-819d-3f4ee3c0c455" + + with mock.patch("os.path.getsize", return_value=SIZE), mock.patch( + "google.cloud.storage.transfer_manager.XMLMPUContainer", new=container_cls_mock + ), mock.patch( + "google.cloud.storage.transfer_manager.XMLMPUPart", return_value=part_mock + ), mock.patch( + "google.cloud.storage._helpers._get_invocation_id", + return_value="gccl-invocation-id/" + invocation_id, + ): + transfer_manager.upload_chunks_concurrently( + FILENAME, + blob, + chunk_size=SIZE // 2, + worker_type=transfer_manager.THREAD, + ) + expected_headers = { + "Accept": "application/json", + "Accept-Encoding": "gzip, deflate", + "User-Agent": "agent", + "X-Goog-API-Client": f"agent gccl-invocation-id/{invocation_id} gccl-gcs-cmd/tm.upload_sharded", + "content-type": FAKE_CONTENT_TYPE, + "x-upload-content-type": FAKE_CONTENT_TYPE, + "X-Goog-Encryption-Algorithm": "AES256", + "X-Goog-Encryption-Key": "YjIzZmYxMWJiYTE4N2RiOGMzNzA3N2U2YWYzYjI1Yjg=", + "X-Goog-Encryption-Key-Sha256": "B25Y4hgVlNXDliAklsNz9ykLk7qvgqDrSbdds5iu8r4=", + "Cache-Control": "private", + "Content-Disposition": "inline", + "Content-Language": "en-US", + "x-goog-storage-class": "NEARLINE", + "x-goog-custom-time": now_str, + "x-goog-meta-key_a": "value_a", + "x-goog-meta-key_b": "value_b", + "x-goog-user-project": "my_project", + "x-goog-encryption-kms-key-name": "sample_key_name", + } + container_cls_mock.assert_called_once_with( + URL, FILENAME, headers=expected_headers + ) + container_mock.initiate.assert_called_once_with( + transport=transport, content_type=blob.content_type + ) + container_mock.register_part.assert_any_call(1, ETAG) + container_mock.register_part.assert_any_call(2, ETAG) + container_mock.finalize.assert_called_once_with(transport) + part_mock.upload.assert_called_with(blob.client._http) + + class _PickleableMockBlob: def __init__( self, @@ -619,10 +824,32 @@ def reload(self): self.size = self._size_after_reload self.generation = self._generation_after_reload - def download_to_file(self, *args, **kwargs): + def _prep_and_do_download(self, *args, **kwargs): return "SUCCESS" +class _PickleableMockConnection: + @staticmethod + def get_api_base_url_for_mtls(): + return HOSTNAME + + user_agent = USER_AGENT + + +class _PickleableMockClient: + def __init__(self, identify_as_client=False): + self._http = "my_transport" # used as an identifier for "called_with" + self._connection = _PickleableMockConnection() + self.identify_as_client = identify_as_client + + @property + def __class__(self): + if self.identify_as_client: + return Client + else: + return _PickleableMockClient + + # Used in subprocesses only, so excluded from coverage def _validate_blob_token_in_subprocess_for_chunk( maybe_pickled_blob, filename, **kwargs @@ -642,7 +869,7 @@ def test_download_chunks_concurrently_with_processes(): with mock.patch( "google.cloud.storage.transfer_manager._download_and_write_chunk_in_place", new=_validate_blob_token_in_subprocess_for_chunk, - ), mock.patch("__main__.open", mock.mock_open()): + ), mock.patch("google.cloud.storage.transfer_manager.open", mock.mock_open()): result = transfer_manager.download_chunks_concurrently( blob, FILENAME, @@ -665,26 +892,59 @@ def test__LazyClient(): assert len(fake_cache) == 1 -def test__pickle_blob(): +def test__pickle_client(): # This test nominally has coverage, but doesn't assert that the essential - # copyreg behavior in _pickle_blob works. Unfortunately there doesn't seem + # copyreg behavior in _pickle_client works. Unfortunately there doesn't seem # to be a good way to check that without actually creating a Client, which # will spin up HTTP connections undesirably. This is more fully checked in - # the system tests, though. - pkl = transfer_manager._pickle_blob(FAKE_RESULT) + # the system tests. + pkl = transfer_manager._pickle_client(FAKE_RESULT) assert pickle.loads(pkl) == FAKE_RESULT def test__download_and_write_chunk_in_place(): pickled_mock = pickle.dumps(_PickleableMockBlob()) FILENAME = "file_a.txt" - with mock.patch("__main__.open", mock.mock_open()): + with mock.patch("google.cloud.storage.transfer_manager.open", mock.mock_open()): result = transfer_manager._download_and_write_chunk_in_place( pickled_mock, FILENAME, 0, 8, {} ) assert result == "SUCCESS" +def test__upload_part(): + from google.cloud.storage.retry import DEFAULT_RETRY + + pickled_mock = pickle.dumps(_PickleableMockClient()) + FILENAME = "file_a.txt" + UPLOAD_ID = "abcd" + ETAG = "efgh" + + part = mock.Mock() + part.etag = ETAG + with mock.patch( + "google.cloud.storage.transfer_manager.XMLMPUPart", return_value=part + ): + result = transfer_manager._upload_part( + pickled_mock, + URL, + UPLOAD_ID, + FILENAME, + 0, + 256, + 1, + None, + {"key", "value"}, + retry=DEFAULT_RETRY, + ) + part.upload.assert_called_once() + assert part._retry_strategy.max_sleep == 60.0 + assert part._retry_strategy.max_cumulative_retry == 120.0 + assert part._retry_strategy.max_retries is None + + assert result == (1, ETAG) + + def test__get_pool_class_and_requirements_error(): with pytest.raises(ValueError): transfer_manager._get_pool_class_and_requirements("garbage") @@ -702,14 +962,14 @@ def test__reduce_client(): def test__call_method_on_maybe_pickled_blob(): blob = mock.Mock(spec=Blob) - blob.download_to_file.return_value = "SUCCESS" + blob._prep_and_do_download.return_value = "SUCCESS" result = transfer_manager._call_method_on_maybe_pickled_blob( - blob, "download_to_file" + blob, "_prep_and_do_download" ) assert result == "SUCCESS" pickled_blob = pickle.dumps(_PickleableMockBlob()) result = transfer_manager._call_method_on_maybe_pickled_blob( - pickled_blob, "download_to_file" + pickled_blob, "_prep_and_do_download" ) assert result == "SUCCESS"