|
| 1 | +# Copyright 2022 Google LLC |
| 2 | +# |
| 3 | +# Licensed under the Apache License, Version 2.0 (the 'License'); |
| 4 | +# you may not use this file except in compliance with the License. |
| 5 | +# You may obtain a copy of the License at |
| 6 | +# |
| 7 | +# http://www.apache.org/licenses/LICENSE-2.0 |
| 8 | +# |
| 9 | +# Unless required by applicable law or agreed to in writing, software |
| 10 | +# distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | +# See the License for the specific language governing permissions and |
| 13 | +# limitations under the License. |
| 14 | + |
| 15 | + |
| 16 | +def download_all_blobs_with_transfer_manager( |
| 17 | + bucket_name, destination_directory="", threads=4 |
| 18 | +): |
| 19 | + """Download all of the blobs in a bucket, concurrently in a thread pool. |
| 20 | +
|
| 21 | + The filename of each blob once downloaded is derived from the blob name and |
| 22 | + the `destination_directory `parameter. For complete control of the filename |
| 23 | + of each blob, use transfer_manager.download_many() instead. |
| 24 | +
|
| 25 | + Directories will be created automatically as needed, for instance to |
| 26 | + accommodate blob names that include slashes. |
| 27 | + """ |
| 28 | + |
| 29 | + # The ID of your GCS bucket |
| 30 | + # bucket_name = "your-bucket-name" |
| 31 | + |
| 32 | + # The directory on your computer to which to download all of the files. This |
| 33 | + # string is prepended (with os.path.join()) to the name of each blob to form |
| 34 | + # the full path. Relative paths and absolute paths are both accepted. An |
| 35 | + # empty string means "the current working directory". Note that this |
| 36 | + # parameter allows accepts directory traversal ("../" etc.) and is not |
| 37 | + # intended for unsanitized end user input. |
| 38 | + # destination_directory = "" |
| 39 | + |
| 40 | + # The number of threads to use for the operation. The performance impact of |
| 41 | + # this value depends on the use case, but generally, smaller files benefit |
| 42 | + # from more threads and larger files don't benefit from more threads. Too |
| 43 | + # many threads can slow operations, especially with large files, due to |
| 44 | + # contention over the Python GIL. |
| 45 | + # threads=4 |
| 46 | + |
| 47 | + from google.cloud.storage import Client, transfer_manager |
| 48 | + |
| 49 | + storage_client = Client() |
| 50 | + bucket = storage_client.bucket(bucket_name) |
| 51 | + |
| 52 | + blob_names = [blob.name for blob in bucket.list_blobs()] |
| 53 | + |
| 54 | + results = transfer_manager.download_many_to_path( |
| 55 | + bucket, blob_names, destination_directory=destination_directory, threads=threads |
| 56 | + ) |
| 57 | + |
| 58 | + for name, result in zip(blob_names, results): |
| 59 | + # The results list is either `None` or an exception for each blob in |
| 60 | + # the input list, in order. |
| 61 | + |
| 62 | + if isinstance(result, Exception): |
| 63 | + print("Failed to download {} due to exception: {}".format(name, result)) |
| 64 | + else: |
| 65 | + print("Downloaded {} to {}.".format(name, destination_directory + name)) |
0 commit comments