blob: 8101dcdf960c8adbf05e2c309024d7990700279e [file] [log] [blame]
[email protected]9c47c5fd2014-05-15 00:13:511#!/usr/bin/env python
2# Copyright 2014 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6"""A utility script for downloading versioned Syzygy binaries."""
7
[email protected]9c47c5fd2014-05-15 00:13:518import hashlib
[email protected]f1d116b2014-06-13 18:07:269import errno
[email protected]9c47c5fd2014-05-15 00:13:5110import json
11import logging
12import optparse
13import os
14import re
15import shutil
[email protected]f1d116b2014-06-13 18:07:2616import stat
17import sys
[email protected]9c47c5fd2014-05-15 00:13:5118import subprocess
chrisha3641d5f2015-07-14 18:35:1719import tempfile
20import time
[email protected]9c47c5fd2014-05-15 00:13:5121import zipfile
22
23
24_LOGGER = logging.getLogger(os.path.basename(__file__))
25
chrisha3641d5f2015-07-14 18:35:1726# The relative path where official builds are archived in their GS bucket.
27_SYZYGY_ARCHIVE_PATH = ('/builds/official/%(revision)s')
[email protected]9c47c5fd2014-05-15 00:13:5128
29# A JSON file containing the state of the download directory. If this file and
30# directory state do not agree, then the binaries will be downloaded and
31# installed again.
32_STATE = '.state'
33
34# This matches an integer (an SVN revision number) or a SHA1 value (a GIT hash).
35# The archive exclusively uses lowercase GIT hashes.
36_REVISION_RE = re.compile('^(?:\d+|[a-f0-9]{40})$')
37
38# This matches an MD5 hash.
39_MD5_RE = re.compile('^[a-f0-9]{32}$')
40
41# List of reources to be downloaded and installed. These are tuples with the
42# following format:
43# (basename, logging name, relative installation path, extraction filter)
44_RESOURCES = [
45 ('benchmark.zip', 'benchmark', '', None),
46 ('binaries.zip', 'binaries', 'exe', None),
[email protected]fcee7862014-06-15 18:28:4847 ('symbols.zip', 'symbols', 'exe',
sebmarchandf07597f2014-10-23 23:04:4448 lambda x: x.filename.endswith('.dll.pdb'))]
[email protected]9c47c5fd2014-05-15 00:13:5149
50
sebmarchandaf7cc2f2016-10-04 18:22:2551# Name of the MS DIA dll that we need to copy to the binaries directory.
52_DIA_DLL_NAME = "msdia140.dll"
53
54
[email protected]9c47c5fd2014-05-15 00:13:5155def _LoadState(output_dir):
56 """Loads the contents of the state file for a given |output_dir|, returning
57 None if it doesn't exist.
58 """
59 path = os.path.join(output_dir, _STATE)
60 if not os.path.exists(path):
61 _LOGGER.debug('No state file found.')
62 return None
63 with open(path, 'rb') as f:
64 _LOGGER.debug('Reading state file: %s', path)
65 try:
66 return json.load(f)
67 except ValueError:
68 _LOGGER.debug('Invalid state file.')
69 return None
70
71
72def _SaveState(output_dir, state, dry_run=False):
73 """Saves the |state| dictionary to the given |output_dir| as a JSON file."""
74 path = os.path.join(output_dir, _STATE)
75 _LOGGER.debug('Writing state file: %s', path)
76 if dry_run:
77 return
78 with open(path, 'wb') as f:
79 f.write(json.dumps(state, sort_keys=True, indent=2))
80
81
82def _Md5(path):
83 """Returns the MD5 hash of the file at |path|, which must exist."""
84 return hashlib.md5(open(path, 'rb').read()).hexdigest()
85
86
87def _StateIsValid(state):
88 """Returns true if the given state structure is valid."""
89 if not isinstance(state, dict):
90 _LOGGER.debug('State must be a dict.')
91 return False
92 r = state.get('revision', None)
93 if not isinstance(r, basestring) or not _REVISION_RE.match(r):
94 _LOGGER.debug('State contains an invalid revision.')
95 return False
96 c = state.get('contents', None)
97 if not isinstance(c, dict):
98 _LOGGER.debug('State must contain a contents dict.')
99 return False
100 for (relpath, md5) in c.iteritems():
101 if not isinstance(relpath, basestring) or len(relpath) == 0:
102 _LOGGER.debug('State contents dict contains an invalid path.')
103 return False
104 if not isinstance(md5, basestring) or not _MD5_RE.match(md5):
105 _LOGGER.debug('State contents dict contains an invalid MD5 digest.')
106 return False
107 return True
108
109
110def _BuildActualState(stored, revision, output_dir):
111 """Builds the actual state using the provided |stored| state as a template.
112 Only examines files listed in the stored state, causing the script to ignore
113 files that have been added to the directories locally. |stored| must be a
114 valid state dictionary.
115 """
116 contents = {}
117 state = { 'revision': revision, 'contents': contents }
118 for relpath, md5 in stored['contents'].iteritems():
119 abspath = os.path.abspath(os.path.join(output_dir, relpath))
120 if os.path.isfile(abspath):
121 m = _Md5(abspath)
122 contents[relpath] = m
123
124 return state
125
126
127def _StatesAreConsistent(stored, actual):
128 """Validates whether two state dictionaries are consistent. Both must be valid
129 state dictionaries. Additional entries in |actual| are ignored.
130 """
131 if stored['revision'] != actual['revision']:
132 _LOGGER.debug('Mismatched revision number.')
133 return False
134 cont_stored = stored['contents']
135 cont_actual = actual['contents']
136 for relpath, md5 in cont_stored.iteritems():
137 if relpath not in cont_actual:
138 _LOGGER.debug('Missing content: %s', relpath)
139 return False
140 if md5 != cont_actual[relpath]:
141 _LOGGER.debug('Modified content: %s', relpath)
142 return False
143 return True
144
145
146def _GetCurrentState(revision, output_dir):
147 """Loads the current state and checks to see if it is consistent. Returns
148 a tuple (state, bool). The returned state will always be valid, even if an
149 invalid state is present on disk.
150 """
151 stored = _LoadState(output_dir)
152 if not _StateIsValid(stored):
153 _LOGGER.debug('State is invalid.')
154 # Return a valid but empty state.
155 return ({'revision': '0', 'contents': {}}, False)
156 actual = _BuildActualState(stored, revision, output_dir)
[email protected]fcee7862014-06-15 18:28:48157 # If the script has been modified consider the state invalid.
158 path = os.path.join(output_dir, _STATE)
159 if os.path.getmtime(__file__) > os.path.getmtime(path):
160 return (stored, False)
161 # Otherwise, explicitly validate the state.
[email protected]9c47c5fd2014-05-15 00:13:51162 if not _StatesAreConsistent(stored, actual):
163 return (stored, False)
164 return (stored, True)
165
166
167def _DirIsEmpty(path):
168 """Returns true if the given directory is empty, false otherwise."""
169 for root, dirs, files in os.walk(path):
170 return not dirs and not files
171
172
[email protected]f1d116b2014-06-13 18:07:26173def _RmTreeHandleReadOnly(func, path, exc):
174 """An error handling function for use with shutil.rmtree. This will
175 detect failures to remove read-only files, and will change their properties
176 prior to removing them. This is necessary on Windows as os.remove will return
177 an access error for read-only files, and git repos contain read-only
178 pack/index files.
179 """
180 excvalue = exc[1]
181 if func in (os.rmdir, os.remove) and excvalue.errno == errno.EACCES:
182 _LOGGER.debug('Removing read-only path: %s', path)
183 os.chmod(path, stat.S_IRWXU | stat.S_IRWXG | stat.S_IRWXO)
184 func(path)
185 else:
186 raise
187
188
189def _RmTree(path):
190 """A wrapper of shutil.rmtree that handles read-only files."""
191 shutil.rmtree(path, ignore_errors=False, onerror=_RmTreeHandleReadOnly)
192
193
[email protected]9c47c5fd2014-05-15 00:13:51194def _CleanState(output_dir, state, dry_run=False):
195 """Cleans up files/directories in |output_dir| that are referenced by
196 the given |state|. Raises an error if there are local changes. Returns a
197 dictionary of files that were deleted.
198 """
199 _LOGGER.debug('Deleting files from previous installation.')
200 deleted = {}
201
202 # Generate a list of files to delete, relative to |output_dir|.
203 contents = state['contents']
204 files = sorted(contents.keys())
205
206 # Try to delete the files. Keep track of directories to delete as well.
207 dirs = {}
208 for relpath in files:
209 fullpath = os.path.join(output_dir, relpath)
210 fulldir = os.path.dirname(fullpath)
211 dirs[fulldir] = True
212 if os.path.exists(fullpath):
213 # If somehow the file has become a directory complain about it.
214 if os.path.isdir(fullpath):
215 raise Exception('Directory exists where file expected: %s' % fullpath)
216
217 # Double check that the file doesn't have local changes. If it does
218 # then refuse to delete it.
219 if relpath in contents:
220 stored_md5 = contents[relpath]
221 actual_md5 = _Md5(fullpath)
222 if actual_md5 != stored_md5:
223 raise Exception('File has local changes: %s' % fullpath)
224
225 # The file is unchanged so it can safely be deleted.
226 _LOGGER.debug('Deleting file "%s".', fullpath)
227 deleted[relpath] = True
228 if not dry_run:
229 os.unlink(fullpath)
230
231 # Sort directories from longest name to shortest. This lets us remove empty
232 # directories from the most nested paths first.
233 dirs = sorted(dirs.keys(), key=lambda x: len(x), reverse=True)
234 for p in dirs:
235 if os.path.exists(p) and _DirIsEmpty(p):
236 _LOGGER.debug('Deleting empty directory "%s".', p)
237 if not dry_run:
[email protected]f1d116b2014-06-13 18:07:26238 _RmTree(p)
[email protected]9c47c5fd2014-05-15 00:13:51239
240 return deleted
241
242
chrisha3641d5f2015-07-14 18:35:17243def _FindGsUtil():
244 """Looks for depot_tools and returns the absolute path to gsutil.py."""
245 for path in os.environ['PATH'].split(os.pathsep):
246 path = os.path.abspath(path)
247 git_cl = os.path.join(path, 'git_cl.py')
248 gs_util = os.path.join(path, 'gsutil.py')
249 if os.path.exists(git_cl) and os.path.exists(gs_util):
250 return gs_util
251 return None
252
253
254def _GsUtil(*cmd):
255 """Runs the given command in gsutil with exponential backoff and retries."""
256 gs_util = _FindGsUtil()
257 cmd = [sys.executable, gs_util] + list(cmd)
258
259 retries = 3
260 timeout = 4 # Seconds.
261 while True:
262 _LOGGER.debug('Running %s', cmd)
bcwhite2fc996192015-07-16 18:44:59263 prog = subprocess.Popen(cmd, shell=False)
chrisha3641d5f2015-07-14 18:35:17264 prog.communicate()
265
266 # Stop retrying on success.
267 if prog.returncode == 0:
268 return
269
270 # Raise a permanent failure if retries have been exhausted.
271 if retries == 0:
272 raise RuntimeError('Command "%s" returned %d.' % (cmd, prog.returncode))
273
274 _LOGGER.debug('Sleeping %d seconds and trying again.', timeout)
275 time.sleep(timeout)
276 retries -= 1
277 timeout *= 2
278
279
280def _Download(resource):
281 """Downloads the given GS resource to a temporary file, returning its path."""
282 tmp = tempfile.mkstemp(suffix='syzygy_archive')
283 os.close(tmp[0])
284 url = 'gs://syzygy-archive' + resource
285 _GsUtil('cp', url, tmp[1])
286 return tmp[1]
[email protected]9c47c5fd2014-05-15 00:13:51287
288
sebmarchandaf7cc2f2016-10-04 18:22:25289def _MaybeCopyDIABinaries(options, contents):
290 """Try to copy the DIA DLL to the binaries exe directory."""
291 toolchain_data_file = os.path.join(os.path.dirname(__file__),
292 'win_toolchain.json')
293 if not os.path.exists(toolchain_data_file):
294 _LOGGER.debug('Toolchain JSON data file doesn\'t exist, skipping.')
295 return
296 with open(toolchain_data_file) as temp_f:
297 toolchain_data = json.load(temp_f)
298 if not os.path.isdir(toolchain_data['path']):
299 _LOGGER.error('The toolchain JSON file is invalid.')
300 return
301 dia_sdk_binaries_dir = os.path.join(toolchain_data['path'], 'DIA SDK', 'bin')
302 dia_dll = os.path.join(dia_sdk_binaries_dir, _DIA_DLL_NAME)
303 if not os.path.exists(dia_dll):
304 _LOGGER.debug('%s is missing, skipping.')
305 return
306 dia_dll_dest = os.path.join(options.output_dir, 'exe', _DIA_DLL_NAME)
307 _LOGGER.debug('Copying %s to %s.' % (dia_dll, dia_dll_dest))
308 if not options.dry_run:
309 shutil.copy(dia_dll, dia_dll_dest)
310 contents[os.path.relpath(dia_dll_dest, options.output_dir)] = (
311 _Md5(dia_dll_dest))
312
313
[email protected]9c47c5fd2014-05-15 00:13:51314def _InstallBinaries(options, deleted={}):
315 """Installs Syzygy binaries. This assumes that the output directory has
316 already been cleaned, as it will refuse to overwrite existing files."""
317 contents = {}
318 state = { 'revision': options.revision, 'contents': contents }
chrisha3641d5f2015-07-14 18:35:17319 archive_path = _SYZYGY_ARCHIVE_PATH % { 'revision': options.revision }
erikwright9bdc2bf2015-01-22 20:20:04320 if options.resources:
321 resources = [(resource, resource, '', None)
322 for resource in options.resources]
323 else:
324 resources = _RESOURCES
325 for (base, name, subdir, filt) in resources:
[email protected]9c47c5fd2014-05-15 00:13:51326 # Create the output directory if it doesn't exist.
327 fulldir = os.path.join(options.output_dir, subdir)
328 if os.path.isfile(fulldir):
329 raise Exception('File exists where a directory needs to be created: %s' %
330 fulldir)
331 if not os.path.exists(fulldir):
332 _LOGGER.debug('Creating directory: %s', fulldir)
333 if not options.dry_run:
334 os.makedirs(fulldir)
335
chrisha3641d5f2015-07-14 18:35:17336 # Download and read the archive.
337 resource = archive_path + '/' + base
338 _LOGGER.debug('Retrieving %s archive at "%s".', name, resource)
339 path = _Download(resource)
[email protected]9c47c5fd2014-05-15 00:13:51340
341 _LOGGER.debug('Unzipping %s archive.', name)
chrisha3641d5f2015-07-14 18:35:17342 with open(path, 'rb') as data:
343 archive = zipfile.ZipFile(data)
344 for entry in archive.infolist():
345 if not filt or filt(entry):
346 fullpath = os.path.normpath(os.path.join(fulldir, entry.filename))
347 relpath = os.path.relpath(fullpath, options.output_dir)
348 if os.path.exists(fullpath):
349 # If in a dry-run take into account the fact that the file *would*
350 # have been deleted.
351 if options.dry_run and relpath in deleted:
352 pass
353 else:
354 raise Exception('Path already exists: %s' % fullpath)
[email protected]9c47c5fd2014-05-15 00:13:51355
chrisha3641d5f2015-07-14 18:35:17356 # Extract the file and update the state dictionary.
357 _LOGGER.debug('Extracting "%s".', fullpath)
358 if not options.dry_run:
359 archive.extract(entry.filename, fulldir)
360 md5 = _Md5(fullpath)
361 contents[relpath] = md5
362 if sys.platform == 'cygwin':
363 os.chmod(fullpath, os.stat(fullpath).st_mode | stat.S_IXUSR)
364
365 _LOGGER.debug('Removing temporary file "%s".', path)
366 os.remove(path)
[email protected]9c47c5fd2014-05-15 00:13:51367
sebmarchandaf7cc2f2016-10-04 18:22:25368 if options.copy_dia_binaries:
369 # Try to copy the DIA binaries to the binaries directory.
370 _MaybeCopyDIABinaries(options, contents)
371
[email protected]9c47c5fd2014-05-15 00:13:51372 return state
373
374
375def _ParseCommandLine():
376 """Parses the command-line and returns an options structure."""
377 option_parser = optparse.OptionParser()
378 option_parser.add_option('--dry-run', action='store_true', default=False,
379 help='If true then will simply list actions that would be performed.')
380 option_parser.add_option('--force', action='store_true', default=False,
381 help='Force an installation even if the binaries are up to date.')
chrisha3641d5f2015-07-14 18:35:17382 option_parser.add_option('--no-cleanup', action='store_true', default=False,
383 help='Allow installation on non-Windows platforms, and skip the forced '
384 'cleanup step.')
[email protected]9c47c5fd2014-05-15 00:13:51385 option_parser.add_option('--output-dir', type='string',
386 help='The path where the binaries will be replaced. Existing binaries '
387 'will only be overwritten if not up to date.')
388 option_parser.add_option('--overwrite', action='store_true', default=False,
389 help='If specified then the installation will happily delete and rewrite '
390 'the entire output directory, blasting any local changes.')
391 option_parser.add_option('--revision', type='string',
392 help='The SVN revision or GIT hash associated with the required version.')
393 option_parser.add_option('--revision-file', type='string',
394 help='A text file containing an SVN revision or GIT hash.')
erikwright9bdc2bf2015-01-22 20:20:04395 option_parser.add_option('--resource', type='string', action='append',
396 dest='resources', help='A resource to be downloaded.')
[email protected]9c47c5fd2014-05-15 00:13:51397 option_parser.add_option('--verbose', dest='log_level', action='store_const',
398 default=logging.INFO, const=logging.DEBUG,
399 help='Enables verbose logging.')
400 option_parser.add_option('--quiet', dest='log_level', action='store_const',
401 default=logging.INFO, const=logging.ERROR,
402 help='Disables all output except for errors.')
sebmarchandaf7cc2f2016-10-04 18:22:25403 option_parser.add_option('--copy-dia-binaries', action='store_true',
404 default=False, help='If true then the DIA dll will get copied into the '
405 'binaries directory if it\'s available.')
[email protected]9c47c5fd2014-05-15 00:13:51406 options, args = option_parser.parse_args()
407 if args:
408 option_parser.error('Unexpected arguments: %s' % args)
409 if not options.output_dir:
410 option_parser.error('Must specify --output-dir.')
411 if not options.revision and not options.revision_file:
412 option_parser.error('Must specify one of --revision or --revision-file.')
413 if options.revision and options.revision_file:
414 option_parser.error('Must not specify both --revision and --revision-file.')
415
416 # Configure logging.
417 logging.basicConfig(level=options.log_level)
418
419 # If a revision file has been specified then read it.
420 if options.revision_file:
421 options.revision = open(options.revision_file, 'rb').read().strip()
422 _LOGGER.debug('Parsed revision "%s" from file "%s".',
423 options.revision, options.revision_file)
424
425 # Ensure that the specified SVN revision or GIT hash is valid.
426 if not _REVISION_RE.match(options.revision):
427 option_parser.error('Must specify a valid SVN or GIT revision.')
428
429 # This just makes output prettier to read.
430 options.output_dir = os.path.normpath(options.output_dir)
431
432 return options
433
434
chrishaf3b3b3bb2014-09-19 19:27:58435def _RemoveOrphanedFiles(options):
436 """This is run on non-Windows systems to remove orphaned files that may have
437 been downloaded by a previous version of this script.
438 """
439 # Reconfigure logging to output info messages. This will allow inspection of
440 # cleanup status on non-Windows buildbots.
441 _LOGGER.setLevel(logging.INFO)
442
443 output_dir = os.path.abspath(options.output_dir)
444
445 # We only want to clean up the folder in 'src/third_party/syzygy', and we
446 # expect to be called with that as an output directory. This is an attempt to
447 # not start deleting random things if the script is run from an alternate
448 # location, or not called from the gclient hooks.
449 expected_syzygy_dir = os.path.abspath(os.path.join(
450 os.path.dirname(__file__), '..', 'third_party', 'syzygy'))
451 expected_output_dir = os.path.join(expected_syzygy_dir, 'binaries')
452 if expected_output_dir != output_dir:
453 _LOGGER.info('Unexpected output directory, skipping cleanup.')
[email protected]f1d116b2014-06-13 18:07:26454 return
455
chrishaf3b3b3bb2014-09-19 19:27:58456 if not os.path.isdir(expected_syzygy_dir):
457 _LOGGER.info('Output directory does not exist, skipping cleanup.')
458 return
459
460 def OnError(function, path, excinfo):
461 """Logs error encountered by shutil.rmtree."""
462 _LOGGER.error('Error when running %s(%s)', function, path, exc_info=excinfo)
463
464 _LOGGER.info('Removing orphaned files from %s', expected_syzygy_dir)
465 if not options.dry_run:
466 shutil.rmtree(expected_syzygy_dir, True, OnError)
467
468
469def main():
[email protected]9c47c5fd2014-05-15 00:13:51470 options = _ParseCommandLine()
471
472 if options.dry_run:
473 _LOGGER.debug('Performing a dry-run.')
474
chrishaf3b3b3bb2014-09-19 19:27:58475 # We only care about Windows platforms, as the Syzygy binaries aren't used
476 # elsewhere. However, there was a short period of time where this script
477 # wasn't gated on OS types, and those OSes downloaded and installed binaries.
478 # This will cleanup orphaned files on those operating systems.
479 if sys.platform not in ('win32', 'cygwin'):
chrisha3641d5f2015-07-14 18:35:17480 if options.no_cleanup:
481 _LOGGER.debug('Skipping usual cleanup for non-Windows platforms.')
482 else:
483 return _RemoveOrphanedFiles(options)
chrishaf3b3b3bb2014-09-19 19:27:58484
[email protected]9c47c5fd2014-05-15 00:13:51485 # Load the current installation state, and validate it against the
486 # requested installation.
487 state, is_consistent = _GetCurrentState(options.revision, options.output_dir)
488
489 # Decide whether or not an install is necessary.
490 if options.force:
491 _LOGGER.debug('Forcing reinstall of binaries.')
492 elif is_consistent:
493 # Avoid doing any work if the contents of the directory are consistent.
494 _LOGGER.debug('State unchanged, no reinstall necessary.')
495 return
496
497 # Under normal logging this is the only only message that will be reported.
498 _LOGGER.info('Installing revision %s Syzygy binaries.',
499 options.revision[0:12])
500
501 # Clean up the old state to begin with.
502 deleted = []
503 if options.overwrite:
504 if os.path.exists(options.output_dir):
505 # If overwrite was specified then take a heavy-handed approach.
506 _LOGGER.debug('Deleting entire installation directory.')
507 if not options.dry_run:
[email protected]f1d116b2014-06-13 18:07:26508 _RmTree(options.output_dir)
[email protected]9c47c5fd2014-05-15 00:13:51509 else:
510 # Otherwise only delete things that the previous installation put in place,
511 # and take care to preserve any local changes.
512 deleted = _CleanState(options.output_dir, state, options.dry_run)
513
514 # Install the new binaries. In a dry-run this will actually download the
515 # archives, but it won't write anything to disk.
516 state = _InstallBinaries(options, deleted)
517
518 # Build and save the state for the directory.
519 _SaveState(options.output_dir, state, options.dry_run)
520
521
522if __name__ == '__main__':
523 main()