[email protected] | 867e5b5 | 2013-03-13 21:43:51 | [diff] [blame] | 1 | #!/usr/bin/env python |
| 2 | # Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 3 | # Use of this source code is governed by a BSD-style license that can be |
| 4 | # found in the LICENSE file. |
| 5 | |
| 6 | """Download files from Google Storage based on SHA1 sums.""" |
| 7 | |
| 8 | |
| 9 | import hashlib |
| 10 | import optparse |
| 11 | import os |
| 12 | import Queue |
| 13 | import re |
[email protected] | 92cd7b0 | 2015-08-18 05:53:55 | [diff] [blame] | 14 | import shutil |
[email protected] | ba63bcb | 2013-10-28 19:55:48 | [diff] [blame] | 15 | import stat |
[email protected] | 867e5b5 | 2013-03-13 21:43:51 | [diff] [blame] | 16 | import sys |
[email protected] | 92cd7b0 | 2015-08-18 05:53:55 | [diff] [blame] | 17 | import tarfile |
[email protected] | 867e5b5 | 2013-03-13 21:43:51 | [diff] [blame] | 18 | import threading |
| 19 | import time |
| 20 | |
| 21 | import subprocess2 |
| 22 | |
| 23 | |
| 24 | GSUTIL_DEFAULT_PATH = os.path.join( |
[email protected] | 199bc5f | 2014-12-17 02:17:14 | [diff] [blame] | 25 | os.path.dirname(os.path.abspath(__file__)), 'gsutil.py') |
[email protected] | c827063 | 2014-01-17 22:28:30 | [diff] [blame] | 26 | # Maps sys.platform to what we actually want to call them. |
| 27 | PLATFORM_MAPPING = { |
| 28 | 'cygwin': 'win', |
| 29 | 'darwin': 'mac', |
| 30 | 'linux2': 'linux', |
| 31 | 'win32': 'win', |
| 32 | } |
[email protected] | 867e5b5 | 2013-03-13 21:43:51 | [diff] [blame] | 33 | |
| 34 | |
| 35 | class FileNotFoundError(IOError): |
| 36 | pass |
| 37 | |
| 38 | |
| 39 | class InvalidFileError(IOError): |
| 40 | pass |
| 41 | |
| 42 | |
[email protected] | c827063 | 2014-01-17 22:28:30 | [diff] [blame] | 43 | class InvalidPlatformError(Exception): |
| 44 | pass |
| 45 | |
| 46 | |
[email protected] | 7d6ffa5 | 2013-12-05 18:43:11 | [diff] [blame] | 47 | def GetNormalizedPlatform(): |
| 48 | """Returns the result of sys.platform accounting for cygwin. |
| 49 | Under cygwin, this will always return "win32" like the native Python.""" |
| 50 | if sys.platform == 'cygwin': |
| 51 | return 'win32' |
| 52 | return sys.platform |
| 53 | |
[email protected] | 867e5b5 | 2013-03-13 21:43:51 | [diff] [blame] | 54 | # Common utilities |
| 55 | class Gsutil(object): |
| 56 | """Call gsutil with some predefined settings. This is a convenience object, |
| 57 | and is also immutable.""" |
[email protected] | b180ded | 2016-03-29 03:27:41 | [diff] [blame] | 58 | |
| 59 | MAX_TRIES = 5 |
| 60 | RETRY_BASE_DELAY = 5.0 |
| 61 | RETRY_DELAY_MULTIPLE = 1.3 |
| 62 | |
[email protected] | c3d4413 | 2015-09-17 23:48:55 | [diff] [blame] | 63 | def __init__(self, path, boto_path=None, timeout=None, version='4.15'): |
[email protected] | 867e5b5 | 2013-03-13 21:43:51 | [diff] [blame] | 64 | if not os.path.exists(path): |
| 65 | raise FileNotFoundError('GSUtil not found in %s' % path) |
| 66 | self.path = path |
| 67 | self.timeout = timeout |
| 68 | self.boto_path = boto_path |
[email protected] | 199bc5f | 2014-12-17 02:17:14 | [diff] [blame] | 69 | self.version = version |
[email protected] | 867e5b5 | 2013-03-13 21:43:51 | [diff] [blame] | 70 | |
[email protected] | 3e31fca | 2013-06-28 17:04:44 | [diff] [blame] | 71 | def get_sub_env(self): |
[email protected] | 867e5b5 | 2013-03-13 21:43:51 | [diff] [blame] | 72 | env = os.environ.copy() |
[email protected] | c61894c | 2013-11-19 20:25:21 | [diff] [blame] | 73 | if self.boto_path == os.devnull: |
| 74 | env['AWS_CREDENTIAL_FILE'] = '' |
| 75 | env['BOTO_CONFIG'] = '' |
| 76 | elif self.boto_path: |
[email protected] | 867e5b5 | 2013-03-13 21:43:51 | [diff] [blame] | 77 | env['AWS_CREDENTIAL_FILE'] = self.boto_path |
[email protected] | c61894c | 2013-11-19 20:25:21 | [diff] [blame] | 78 | env['BOTO_CONFIG'] = self.boto_path |
[email protected] | 3e31fca | 2013-06-28 17:04:44 | [diff] [blame] | 79 | |
| 80 | return env |
| 81 | |
| 82 | def call(self, *args): |
[email protected] | 199bc5f | 2014-12-17 02:17:14 | [diff] [blame] | 83 | cmd = [sys.executable, self.path, '--force-version', self.version] |
[email protected] | c6a2ee6 | 2013-12-12 08:26:18 | [diff] [blame] | 84 | cmd.extend(args) |
| 85 | return subprocess2.call(cmd, env=self.get_sub_env(), timeout=self.timeout) |
[email protected] | 867e5b5 | 2013-03-13 21:43:51 | [diff] [blame] | 86 | |
| 87 | def check_call(self, *args): |
[email protected] | 199bc5f | 2014-12-17 02:17:14 | [diff] [blame] | 88 | cmd = [sys.executable, self.path, '--force-version', self.version] |
[email protected] | c6a2ee6 | 2013-12-12 08:26:18 | [diff] [blame] | 89 | cmd.extend(args) |
[email protected] | 867e5b5 | 2013-03-13 21:43:51 | [diff] [blame] | 90 | ((out, err), code) = subprocess2.communicate( |
[email protected] | c6a2ee6 | 2013-12-12 08:26:18 | [diff] [blame] | 91 | cmd, |
[email protected] | 867e5b5 | 2013-03-13 21:43:51 | [diff] [blame] | 92 | stdout=subprocess2.PIPE, |
| 93 | stderr=subprocess2.PIPE, |
[email protected] | 3e31fca | 2013-06-28 17:04:44 | [diff] [blame] | 94 | env=self.get_sub_env(), |
[email protected] | 867e5b5 | 2013-03-13 21:43:51 | [diff] [blame] | 95 | timeout=self.timeout) |
| 96 | |
| 97 | # Parse output. |
| 98 | status_code_match = re.search('status=([0-9]+)', err) |
| 99 | if status_code_match: |
| 100 | return (int(status_code_match.group(1)), out, err) |
| 101 | if ('You are attempting to access protected data with ' |
| 102 | 'no configured credentials.' in err): |
| 103 | return (403, out, err) |
[email protected] | 31f3df0 | 2015-05-26 21:12:04 | [diff] [blame] | 104 | if 'matched no objects' in err: |
[email protected] | 867e5b5 | 2013-03-13 21:43:51 | [diff] [blame] | 105 | return (404, out, err) |
| 106 | return (code, out, err) |
| 107 | |
[email protected] | b180ded | 2016-03-29 03:27:41 | [diff] [blame] | 108 | def check_call_with_retries(self, *args): |
| 109 | delay = self.RETRY_BASE_DELAY |
| 110 | for i in xrange(self.MAX_TRIES): |
| 111 | code, out, err = self.check_call(*args) |
| 112 | if not code or i == self.MAX_TRIES - 1: |
| 113 | break |
| 114 | |
| 115 | time.sleep(delay) |
| 116 | delay *= self.RETRY_DELAY_MULTIPLE |
| 117 | |
| 118 | return code, out, err |
| 119 | |
[email protected] | 867e5b5 | 2013-03-13 21:43:51 | [diff] [blame] | 120 | |
[email protected] | c827063 | 2014-01-17 22:28:30 | [diff] [blame] | 121 | def check_platform(target): |
| 122 | """Checks if any parent directory of target matches (win|mac|linux).""" |
| 123 | assert os.path.isabs(target) |
| 124 | root, target_name = os.path.split(target) |
| 125 | if not target_name: |
| 126 | return None |
| 127 | if target_name in ('linux', 'mac', 'win'): |
| 128 | return target_name |
| 129 | return check_platform(root) |
| 130 | |
| 131 | |
[email protected] | 867e5b5 | 2013-03-13 21:43:51 | [diff] [blame] | 132 | def get_sha1(filename): |
| 133 | sha1 = hashlib.sha1() |
| 134 | with open(filename, 'rb') as f: |
| 135 | while True: |
| 136 | # Read in 1mb chunks, so it doesn't all have to be loaded into memory. |
| 137 | chunk = f.read(1024*1024) |
| 138 | if not chunk: |
| 139 | break |
| 140 | sha1.update(chunk) |
| 141 | return sha1.hexdigest() |
| 142 | |
| 143 | |
| 144 | # Download-specific code starts here |
| 145 | |
| 146 | def enumerate_work_queue(input_filename, work_queue, directory, |
[email protected] | c827063 | 2014-01-17 22:28:30 | [diff] [blame] | 147 | recursive, ignore_errors, output, sha1_file, |
| 148 | auto_platform): |
[email protected] | 867e5b5 | 2013-03-13 21:43:51 | [diff] [blame] | 149 | if sha1_file: |
| 150 | if not os.path.exists(input_filename): |
| 151 | if not ignore_errors: |
| 152 | raise FileNotFoundError('%s not found.' % input_filename) |
| 153 | print >> sys.stderr, '%s not found.' % input_filename |
| 154 | with open(input_filename, 'rb') as f: |
| 155 | sha1_match = re.match('^([A-Za-z0-9]{40})$', f.read(1024).rstrip()) |
| 156 | if sha1_match: |
[email protected] | 50c8e0e | 2014-12-04 22:18:36 | [diff] [blame] | 157 | work_queue.put((sha1_match.groups(1)[0], output)) |
[email protected] | 867e5b5 | 2013-03-13 21:43:51 | [diff] [blame] | 158 | return 1 |
| 159 | if not ignore_errors: |
| 160 | raise InvalidFileError('No sha1 sum found in %s.' % input_filename) |
| 161 | print >> sys.stderr, 'No sha1 sum found in %s.' % input_filename |
| 162 | return 0 |
| 163 | |
| 164 | if not directory: |
| 165 | work_queue.put((input_filename, output)) |
| 166 | return 1 |
| 167 | |
| 168 | work_queue_size = 0 |
| 169 | for root, dirs, files in os.walk(input_filename): |
| 170 | if not recursive: |
| 171 | for item in dirs[:]: |
| 172 | dirs.remove(item) |
| 173 | else: |
| 174 | for exclude in ['.svn', '.git']: |
| 175 | if exclude in dirs: |
| 176 | dirs.remove(exclude) |
| 177 | for filename in files: |
| 178 | full_path = os.path.join(root, filename) |
| 179 | if full_path.endswith('.sha1'): |
[email protected] | c827063 | 2014-01-17 22:28:30 | [diff] [blame] | 180 | if auto_platform: |
| 181 | # Skip if the platform does not match. |
| 182 | target_platform = check_platform(os.path.abspath(full_path)) |
| 183 | if not target_platform: |
| 184 | err = ('--auto_platform passed in but no platform name found in ' |
| 185 | 'the path of %s' % full_path) |
| 186 | if not ignore_errors: |
| 187 | raise InvalidFileError(err) |
| 188 | print >> sys.stderr, err |
| 189 | continue |
| 190 | current_platform = PLATFORM_MAPPING[sys.platform] |
| 191 | if current_platform != target_platform: |
| 192 | continue |
| 193 | |
[email protected] | 867e5b5 | 2013-03-13 21:43:51 | [diff] [blame] | 194 | with open(full_path, 'rb') as f: |
| 195 | sha1_match = re.match('^([A-Za-z0-9]{40})$', f.read(1024).rstrip()) |
| 196 | if sha1_match: |
| 197 | work_queue.put( |
| 198 | (sha1_match.groups(1)[0], full_path.replace('.sha1', ''))) |
| 199 | work_queue_size += 1 |
| 200 | else: |
| 201 | if not ignore_errors: |
| 202 | raise InvalidFileError('No sha1 sum found in %s.' % filename) |
| 203 | print >> sys.stderr, 'No sha1 sum found in %s.' % filename |
| 204 | return work_queue_size |
| 205 | |
| 206 | |
[email protected] | 92cd7b0 | 2015-08-18 05:53:55 | [diff] [blame] | 207 | def _validate_tar_file(tar, prefix): |
| 208 | def _validate(tarinfo): |
| 209 | """Returns false if the tarinfo is something we explicitly forbid.""" |
| 210 | if tarinfo.issym() or tarinfo.islnk(): |
| 211 | return False |
| 212 | if '..' in tarinfo.name or not tarinfo.name.startswith(prefix): |
| 213 | return False |
| 214 | return True |
| 215 | return all(map(_validate, tar.getmembers())) |
| 216 | |
[email protected] | 867e5b5 | 2013-03-13 21:43:51 | [diff] [blame] | 217 | def _downloader_worker_thread(thread_num, q, force, base_url, |
[email protected] | 92cd7b0 | 2015-08-18 05:53:55 | [diff] [blame] | 218 | gsutil, out_q, ret_codes, verbose, extract, |
| 219 | delete=True): |
[email protected] | 867e5b5 | 2013-03-13 21:43:51 | [diff] [blame] | 220 | while True: |
| 221 | input_sha1_sum, output_filename = q.get() |
| 222 | if input_sha1_sum is None: |
| 223 | return |
[email protected] | 6f830d7 | 2016-04-20 18:48:56 | [diff] [blame] | 224 | extract_dir = None |
| 225 | if extract: |
| 226 | if not output_filename.endswith('.tar.gz'): |
| 227 | out_q.put('%d> Error: %s is not a tar.gz archive.' % ( |
[email protected] | ff7ea00 | 2013-11-25 19:28:54 | [diff] [blame] | 228 | thread_num, output_filename)) |
[email protected] | 6f830d7 | 2016-04-20 18:48:56 | [diff] [blame] | 229 | ret_codes.put((1, '%s is not a tar.gz archive.' % (output_filename))) |
[email protected] | 867e5b5 | 2013-03-13 21:43:51 | [diff] [blame] | 230 | continue |
[email protected] | 6f830d7 | 2016-04-20 18:48:56 | [diff] [blame] | 231 | extract_dir = output_filename[0:len(output_filename)-7] |
| 232 | if os.path.exists(output_filename) and not force: |
| 233 | if not extract or os.path.exists(extract_dir): |
| 234 | if get_sha1(output_filename) == input_sha1_sum: |
| 235 | if verbose: |
| 236 | out_q.put( |
| 237 | '%d> File %s exists and SHA1 matches. Skipping.' % ( |
| 238 | thread_num, output_filename)) |
| 239 | continue |
[email protected] | 867e5b5 | 2013-03-13 21:43:51 | [diff] [blame] | 240 | # Check if file exists. |
| 241 | file_url = '%s/%s' % (base_url, input_sha1_sum) |
[email protected] | 31f3df0 | 2015-05-26 21:12:04 | [diff] [blame] | 242 | (code, _, err) = gsutil.check_call('ls', file_url) |
| 243 | if code != 0: |
| 244 | if code == 404: |
| 245 | out_q.put('%d> File %s for %s does not exist, skipping.' % ( |
| 246 | thread_num, file_url, output_filename)) |
| 247 | ret_codes.put((1, 'File %s for %s does not exist.' % ( |
| 248 | file_url, output_filename))) |
| 249 | else: |
| 250 | # Other error, probably auth related (bad ~/.boto, etc). |
| 251 | out_q.put('%d> Failed to fetch file %s for %s, skipping. [Err: %s]' % ( |
| 252 | thread_num, file_url, output_filename, err)) |
| 253 | ret_codes.put((1, 'Failed to fetch file %s for %s. [Err: %s]' % ( |
| 254 | file_url, output_filename, err))) |
[email protected] | 867e5b5 | 2013-03-13 21:43:51 | [diff] [blame] | 255 | continue |
| 256 | # Fetch the file. |
[email protected] | 6b6a114 | 2014-11-04 00:40:53 | [diff] [blame] | 257 | out_q.put('%d> Downloading %s...' % (thread_num, output_filename)) |
| 258 | try: |
[email protected] | 92cd7b0 | 2015-08-18 05:53:55 | [diff] [blame] | 259 | if delete: |
| 260 | os.remove(output_filename) # Delete the file if it exists already. |
[email protected] | 6b6a114 | 2014-11-04 00:40:53 | [diff] [blame] | 261 | except OSError: |
| 262 | if os.path.exists(output_filename): |
| 263 | out_q.put('%d> Warning: deleting %s failed.' % ( |
| 264 | thread_num, output_filename)) |
[email protected] | 199bc5f | 2014-12-17 02:17:14 | [diff] [blame] | 265 | code, _, err = gsutil.check_call('cp', file_url, output_filename) |
[email protected] | 867e5b5 | 2013-03-13 21:43:51 | [diff] [blame] | 266 | if code != 0: |
| 267 | out_q.put('%d> %s' % (thread_num, err)) |
| 268 | ret_codes.put((code, err)) |
[email protected] | be46fdd | 2015-07-29 21:11:36 | [diff] [blame] | 269 | continue |
| 270 | |
| 271 | remote_sha1 = get_sha1(output_filename) |
| 272 | if remote_sha1 != input_sha1_sum: |
| 273 | msg = ('%d> ERROR remote sha1 (%s) does not match expected sha1 (%s).' % |
| 274 | (thread_num, remote_sha1, input_sha1_sum)) |
| 275 | out_q.put(msg) |
| 276 | ret_codes.put((20, msg)) |
| 277 | continue |
[email protected] | 867e5b5 | 2013-03-13 21:43:51 | [diff] [blame] | 278 | |
[email protected] | 92cd7b0 | 2015-08-18 05:53:55 | [diff] [blame] | 279 | if extract: |
[email protected] | 6f830d7 | 2016-04-20 18:48:56 | [diff] [blame] | 280 | if not tarfile.is_tarfile(output_filename): |
[email protected] | 92cd7b0 | 2015-08-18 05:53:55 | [diff] [blame] | 281 | out_q.put('%d> Error: %s is not a tar.gz archive.' % ( |
| 282 | thread_num, output_filename)) |
| 283 | ret_codes.put((1, '%s is not a tar.gz archive.' % (output_filename))) |
| 284 | continue |
| 285 | with tarfile.open(output_filename, 'r:gz') as tar: |
| 286 | dirname = os.path.dirname(os.path.abspath(output_filename)) |
[email protected] | 92cd7b0 | 2015-08-18 05:53:55 | [diff] [blame] | 287 | if not _validate_tar_file(tar, os.path.basename(extract_dir)): |
| 288 | out_q.put('%d> Error: %s contains files outside %s.' % ( |
| 289 | thread_num, output_filename, extract_dir)) |
| 290 | ret_codes.put((1, '%s contains invalid entries.' % (output_filename))) |
| 291 | continue |
| 292 | if os.path.exists(extract_dir): |
| 293 | try: |
| 294 | shutil.rmtree(extract_dir) |
| 295 | out_q.put('%d> Removed %s...' % (thread_num, extract_dir)) |
| 296 | except OSError: |
| 297 | out_q.put('%d> Warning: Can\'t delete: %s' % ( |
| 298 | thread_num, extract_dir)) |
| 299 | ret_codes.put((1, 'Can\'t delete %s.' % (extract_dir))) |
| 300 | continue |
| 301 | out_q.put('%d> Extracting %d entries from %s to %s' % |
| 302 | (thread_num, len(tar.getmembers()),output_filename, |
| 303 | extract_dir)) |
| 304 | tar.extractall(path=dirname) |
[email protected] | 25a33d3 | 2013-12-05 22:34:27 | [diff] [blame] | 305 | # Set executable bit. |
| 306 | if sys.platform == 'cygwin': |
| 307 | # Under cygwin, mark all files as executable. The executable flag in |
| 308 | # Google Storage will not be set when uploading from Windows, so if |
| 309 | # this script is running under cygwin and we're downloading an |
| 310 | # executable, it will be unrunnable from inside cygwin without this. |
| 311 | st = os.stat(output_filename) |
| 312 | os.chmod(output_filename, st.st_mode | stat.S_IEXEC) |
| 313 | elif sys.platform != 'win32': |
| 314 | # On non-Windows platforms, key off of the custom header |
| 315 | # "x-goog-meta-executable". |
[email protected] | 20bef06 | 2014-12-17 23:47:23 | [diff] [blame] | 316 | code, out, _ = gsutil.check_call('stat', file_url) |
[email protected] | ba63bcb | 2013-10-28 19:55:48 | [diff] [blame] | 317 | if code != 0: |
| 318 | out_q.put('%d> %s' % (thread_num, err)) |
| 319 | ret_codes.put((code, err)) |
[email protected] | 20bef06 | 2014-12-17 23:47:23 | [diff] [blame] | 320 | elif re.search(r'executable:\s*1', out): |
[email protected] | ba63bcb | 2013-10-28 19:55:48 | [diff] [blame] | 321 | st = os.stat(output_filename) |
| 322 | os.chmod(output_filename, st.st_mode | stat.S_IEXEC) |
[email protected] | 867e5b5 | 2013-03-13 21:43:51 | [diff] [blame] | 323 | |
| 324 | def printer_worker(output_queue): |
| 325 | while True: |
| 326 | line = output_queue.get() |
| 327 | # Its plausible we want to print empty lines. |
| 328 | if line is None: |
| 329 | break |
| 330 | print line |
| 331 | |
| 332 | |
| 333 | def download_from_google_storage( |
| 334 | input_filename, base_url, gsutil, num_threads, directory, recursive, |
[email protected] | 92cd7b0 | 2015-08-18 05:53:55 | [diff] [blame] | 335 | force, output, ignore_errors, sha1_file, verbose, auto_platform, extract): |
[email protected] | 867e5b5 | 2013-03-13 21:43:51 | [diff] [blame] | 336 | # Start up all the worker threads. |
| 337 | all_threads = [] |
| 338 | download_start = time.time() |
| 339 | stdout_queue = Queue.Queue() |
| 340 | work_queue = Queue.Queue() |
| 341 | ret_codes = Queue.Queue() |
| 342 | ret_codes.put((0, None)) |
| 343 | for thread_num in range(num_threads): |
| 344 | t = threading.Thread( |
| 345 | target=_downloader_worker_thread, |
| 346 | args=[thread_num, work_queue, force, base_url, |
[email protected] | 92cd7b0 | 2015-08-18 05:53:55 | [diff] [blame] | 347 | gsutil, stdout_queue, ret_codes, verbose, extract]) |
[email protected] | 867e5b5 | 2013-03-13 21:43:51 | [diff] [blame] | 348 | t.daemon = True |
| 349 | t.start() |
| 350 | all_threads.append(t) |
| 351 | printer_thread = threading.Thread(target=printer_worker, args=[stdout_queue]) |
| 352 | printer_thread.daemon = True |
| 353 | printer_thread.start() |
| 354 | |
| 355 | # Enumerate our work queue. |
| 356 | work_queue_size = enumerate_work_queue( |
| 357 | input_filename, work_queue, directory, recursive, |
[email protected] | c827063 | 2014-01-17 22:28:30 | [diff] [blame] | 358 | ignore_errors, output, sha1_file, auto_platform) |
[email protected] | 867e5b5 | 2013-03-13 21:43:51 | [diff] [blame] | 359 | for _ in all_threads: |
| 360 | work_queue.put((None, None)) # Used to tell worker threads to stop. |
| 361 | |
| 362 | # Wait for all downloads to finish. |
| 363 | for t in all_threads: |
| 364 | t.join() |
| 365 | stdout_queue.put(None) |
| 366 | printer_thread.join() |
| 367 | |
| 368 | # See if we ran into any errors. |
| 369 | max_ret_code = 0 |
| 370 | for ret_code, message in ret_codes.queue: |
| 371 | max_ret_code = max(ret_code, max_ret_code) |
| 372 | if message: |
| 373 | print >> sys.stderr, message |
[email protected] | ff7ea00 | 2013-11-25 19:28:54 | [diff] [blame] | 374 | if verbose and not max_ret_code: |
[email protected] | 867e5b5 | 2013-03-13 21:43:51 | [diff] [blame] | 375 | print 'Success!' |
| 376 | |
[email protected] | ff7ea00 | 2013-11-25 19:28:54 | [diff] [blame] | 377 | if verbose: |
| 378 | print 'Downloading %d files took %1f second(s)' % ( |
| 379 | work_queue_size, time.time() - download_start) |
[email protected] | 867e5b5 | 2013-03-13 21:43:51 | [diff] [blame] | 380 | return max_ret_code |
| 381 | |
| 382 | |
| 383 | def main(args): |
| 384 | usage = ('usage: %prog [options] target\n' |
| 385 | 'Target must be:\n' |
| 386 | ' (default) a sha1 sum ([A-Za-z0-9]{40}).\n' |
| 387 | ' (-s or --sha1_file) a .sha1 file, containing a sha1 sum on ' |
| 388 | 'the first line.\n' |
| 389 | ' (-d or --directory) A directory to scan for .sha1 files.') |
| 390 | parser = optparse.OptionParser(usage) |
| 391 | parser.add_option('-o', '--output', |
| 392 | help='Specify the output file name. Defaults to: ' |
| 393 | '(a) Given a SHA1 hash, the name is the SHA1 hash. ' |
| 394 | '(b) Given a .sha1 file or directory, the name will ' |
| 395 | 'match (.*).sha1.') |
| 396 | parser.add_option('-b', '--bucket', |
| 397 | help='Google Storage bucket to fetch from.') |
| 398 | parser.add_option('-e', '--boto', |
| 399 | help='Specify a custom boto file.') |
| 400 | parser.add_option('-c', '--no_resume', action='store_true', |
[email protected] | 79c7897 | 2016-02-26 23:22:54 | [diff] [blame] | 401 | help='DEPRECATED: Resume download if file is ' |
| 402 | 'partially downloaded.') |
[email protected] | 867e5b5 | 2013-03-13 21:43:51 | [diff] [blame] | 403 | parser.add_option('-f', '--force', action='store_true', |
| 404 | help='Force download even if local file exists.') |
| 405 | parser.add_option('-i', '--ignore_errors', action='store_true', |
| 406 | help='Don\'t throw error if we find an invalid .sha1 file.') |
| 407 | parser.add_option('-r', '--recursive', action='store_true', |
| 408 | help='Scan folders recursively for .sha1 files. ' |
| 409 | 'Must be used with -d/--directory') |
| 410 | parser.add_option('-t', '--num_threads', default=1, type='int', |
| 411 | help='Number of downloader threads to run.') |
| 412 | parser.add_option('-d', '--directory', action='store_true', |
| 413 | help='The target is a directory. ' |
| 414 | 'Cannot be used with -s/--sha1_file.') |
| 415 | parser.add_option('-s', '--sha1_file', action='store_true', |
| 416 | help='The target is a file containing a sha1 sum. ' |
| 417 | 'Cannot be used with -d/--directory.') |
[email protected] | 0477f8c | 2013-06-26 22:23:57 | [diff] [blame] | 418 | parser.add_option('-g', '--config', action='store_true', |
| 419 | help='Alias for "gsutil config". Run this if you want ' |
| 420 | 'to initialize your saved Google Storage ' |
[email protected] | 4b74fcd | 2014-01-10 23:36:24 | [diff] [blame] | 421 | 'credentials. This will create a read-only ' |
| 422 | 'credentials file in ~/.boto.depot_tools.') |
[email protected] | c61894c | 2013-11-19 20:25:21 | [diff] [blame] | 423 | parser.add_option('-n', '--no_auth', action='store_true', |
| 424 | help='Skip auth checking. Use if it\'s known that the ' |
| 425 | 'target bucket is a public bucket.') |
| 426 | parser.add_option('-p', '--platform', |
[email protected] | ba63bcb | 2013-10-28 19:55:48 | [diff] [blame] | 427 | help='A regular expression that is compared against ' |
| 428 | 'Python\'s sys.platform. If this option is specified, ' |
| 429 | 'the download will happen only if there is a match.') |
[email protected] | c827063 | 2014-01-17 22:28:30 | [diff] [blame] | 430 | parser.add_option('-a', '--auto_platform', |
| 431 | action='store_true', |
| 432 | help='Detects if any parent folder of the target matches ' |
| 433 | '(linux|mac|win). If so, the script will only ' |
| 434 | 'process files that are in the paths that ' |
| 435 | 'that matches the current platform.') |
[email protected] | 92cd7b0 | 2015-08-18 05:53:55 | [diff] [blame] | 436 | parser.add_option('-u', '--extract', |
| 437 | action='store_true', |
| 438 | help='Extract a downloaded tar.gz file. ' |
| 439 | 'Leaves the tar.gz file around for sha1 verification' |
| 440 | 'If a directory with the same name as the tar.gz ' |
| 441 | 'file already exists, is deleted (to get a ' |
| 442 | 'clean state in case of update.)') |
[email protected] | 47ea639 | 2015-09-25 18:33:41 | [diff] [blame] | 443 | parser.add_option('-v', '--verbose', action='store_true', default=True, |
| 444 | help='DEPRECATED: Defaults to True. Use --no-verbose ' |
| 445 | 'to suppress.') |
| 446 | parser.add_option('-q', '--quiet', action='store_false', dest='verbose', |
| 447 | help='Suppresses diagnostic and progress information.') |
[email protected] | 867e5b5 | 2013-03-13 21:43:51 | [diff] [blame] | 448 | |
| 449 | (options, args) = parser.parse_args() |
[email protected] | ba63bcb | 2013-10-28 19:55:48 | [diff] [blame] | 450 | |
| 451 | # Make sure we should run at all based on platform matching. |
| 452 | if options.platform: |
[email protected] | c827063 | 2014-01-17 22:28:30 | [diff] [blame] | 453 | if options.auto_platform: |
| 454 | parser.error('--platform can not be specified with --auto_platform') |
[email protected] | 7d4cc4a | 2013-12-06 18:30:57 | [diff] [blame] | 455 | if not re.match(options.platform, GetNormalizedPlatform()): |
[email protected] | ff7ea00 | 2013-11-25 19:28:54 | [diff] [blame] | 456 | if options.verbose: |
| 457 | print('The current platform doesn\'t match "%s", skipping.' % |
| 458 | options.platform) |
[email protected] | ba63bcb | 2013-10-28 19:55:48 | [diff] [blame] | 459 | return 0 |
| 460 | |
[email protected] | c61894c | 2013-11-19 20:25:21 | [diff] [blame] | 461 | # Set the boto file to /dev/null if we don't need auth. |
| 462 | if options.no_auth: |
[email protected] | 0ebd029 | 2015-01-15 19:00:49 | [diff] [blame] | 463 | if (set(('http_proxy', 'https_proxy')).intersection( |
| 464 | env.lower() for env in os.environ) and |
| 465 | 'NO_AUTH_BOTO_CONFIG' not in os.environ): |
| 466 | print >> sys.stderr, ('NOTICE: You have PROXY values set in your ' |
| 467 | 'environment, but gsutil in depot_tools does not ' |
| 468 | '(yet) obey them.') |
| 469 | print >> sys.stderr, ('Also, --no_auth prevents the normal BOTO_CONFIG ' |
| 470 | 'environment variable from being used.') |
| 471 | print >> sys.stderr, ('To use a proxy in this situation, please supply ' |
| 472 | 'those settings in a .boto file pointed to by ' |
| 473 | 'the NO_AUTH_BOTO_CONFIG environment var.') |
| 474 | options.boto = os.environ.get('NO_AUTH_BOTO_CONFIG', os.devnull) |
[email protected] | c61894c | 2013-11-19 20:25:21 | [diff] [blame] | 475 | |
[email protected] | c6a2ee6 | 2013-12-12 08:26:18 | [diff] [blame] | 476 | # Make sure gsutil exists where we expect it to. |
[email protected] | 0477f8c | 2013-06-26 22:23:57 | [diff] [blame] | 477 | if os.path.exists(GSUTIL_DEFAULT_PATH): |
[email protected] | c6a2ee6 | 2013-12-12 08:26:18 | [diff] [blame] | 478 | gsutil = Gsutil(GSUTIL_DEFAULT_PATH, |
[email protected] | 199bc5f | 2014-12-17 02:17:14 | [diff] [blame] | 479 | boto_path=options.boto) |
[email protected] | 0477f8c | 2013-06-26 22:23:57 | [diff] [blame] | 480 | else: |
[email protected] | c6a2ee6 | 2013-12-12 08:26:18 | [diff] [blame] | 481 | parser.error('gsutil not found in %s, bad depot_tools checkout?' % |
| 482 | GSUTIL_DEFAULT_PATH) |
[email protected] | 0477f8c | 2013-06-26 22:23:57 | [diff] [blame] | 483 | |
| 484 | # Passing in -g/--config will run our copy of GSUtil, then quit. |
| 485 | if options.config: |
[email protected] | 40cdc99 | 2015-06-17 20:37:08 | [diff] [blame] | 486 | print '===Note from depot_tools===' |
| 487 | print 'If you do not have a project ID, enter "0" when asked for one.' |
| 488 | print '===End note from depot_tools===' |
| 489 | print |
| 490 | return gsutil.call('config') |
[email protected] | 0477f8c | 2013-06-26 22:23:57 | [diff] [blame] | 491 | |
[email protected] | 867e5b5 | 2013-03-13 21:43:51 | [diff] [blame] | 492 | if not args: |
| 493 | parser.error('Missing target.') |
| 494 | if len(args) > 1: |
| 495 | parser.error('Too many targets.') |
| 496 | if not options.bucket: |
| 497 | parser.error('Missing bucket. Specify bucket with --bucket.') |
| 498 | if options.sha1_file and options.directory: |
| 499 | parser.error('Both --directory and --sha1_file are specified, ' |
| 500 | 'can only specify one.') |
| 501 | if options.recursive and not options.directory: |
| 502 | parser.error('--recursive specified but --directory not specified.') |
| 503 | if options.output and options.directory: |
| 504 | parser.error('--directory is specified, so --output has no effect.') |
[email protected] | c827063 | 2014-01-17 22:28:30 | [diff] [blame] | 505 | if (not (options.sha1_file or options.directory) |
| 506 | and options.auto_platform): |
| 507 | parser.error('--auto_platform must be specified with either ' |
| 508 | '--sha1_file or --directory') |
| 509 | |
[email protected] | 867e5b5 | 2013-03-13 21:43:51 | [diff] [blame] | 510 | input_filename = args[0] |
| 511 | |
| 512 | # Set output filename if not specified. |
| 513 | if not options.output and not options.directory: |
| 514 | if not options.sha1_file: |
| 515 | # Target is a sha1 sum, so output filename would also be the sha1 sum. |
| 516 | options.output = input_filename |
| 517 | elif options.sha1_file: |
| 518 | # Target is a .sha1 file. |
| 519 | if not input_filename.endswith('.sha1'): |
| 520 | parser.error('--sha1_file is specified, but the input filename ' |
| 521 | 'does not end with .sha1, and no --output is specified. ' |
| 522 | 'Either make sure the input filename has a .sha1 ' |
| 523 | 'extension, or specify --output.') |
| 524 | options.output = input_filename[:-5] |
| 525 | else: |
| 526 | parser.error('Unreachable state.') |
| 527 | |
[email protected] | d3e713b | 2014-12-04 22:11:08 | [diff] [blame] | 528 | base_url = 'gs://%s' % options.bucket |
| 529 | |
[email protected] | 867e5b5 | 2013-03-13 21:43:51 | [diff] [blame] | 530 | return download_from_google_storage( |
| 531 | input_filename, base_url, gsutil, options.num_threads, options.directory, |
| 532 | options.recursive, options.force, options.output, options.ignore_errors, |
[email protected] | 92cd7b0 | 2015-08-18 05:53:55 | [diff] [blame] | 533 | options.sha1_file, options.verbose, options.auto_platform, |
| 534 | options.extract) |
[email protected] | 867e5b5 | 2013-03-13 21:43:51 | [diff] [blame] | 535 | |
| 536 | |
| 537 | if __name__ == '__main__': |
[email protected] | acb9ed7 | 2013-06-20 12:16:15 | [diff] [blame] | 538 | sys.exit(main(sys.argv)) |