blob: 0dc40e38cc8688d01ac2ecf2eb813569a8aa0e96 [file] [log] [blame]
[email protected]5a306a22014-02-24 22:13:591#!/usr/bin/env python
2# Copyright 2014 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6"""A git command for managing a local cache of git repositories."""
7
[email protected]848fd492014-04-09 19:06:448from __future__ import print_function
Andrii Shyshkalov4f56f232017-11-23 10:19:259import contextlib
[email protected]5a306a22014-02-24 22:13:5910import errno
11import logging
12import optparse
13import os
[email protected]174766f2014-05-13 21:27:4614import re
[email protected]5a306a22014-02-24 22:13:5915import tempfile
[email protected]1132f5f2014-08-23 01:57:5916import threading
[email protected]f3726102014-04-17 17:24:1517import time
[email protected]5a306a22014-02-24 22:13:5918import subprocess
19import sys
20import urlparse
[email protected]776a2c32014-04-25 07:54:2521import zipfile
[email protected]5a306a22014-02-24 22:13:5922
[email protected]563559c2014-04-02 00:36:2423from download_from_google_storage import Gsutil
[email protected]5a306a22014-02-24 22:13:5924import gclient_utils
25import subcommand
26
[email protected]301a7c32014-06-16 17:13:5027# Analogous to gc.autopacklimit git config.
28GC_AUTOPACKLIMIT = 50
Takuto Ikuta9fce2132017-12-14 01:44:2829
[email protected]aa1e1a42014-06-26 21:58:5130GIT_CACHE_CORRUPT_MESSAGE = 'WARNING: The Git cache is corrupt.'
31
[email protected]848fd492014-04-09 19:06:4432try:
Quinten Yearsleyb2cc4a92016-12-15 21:53:2633 # pylint: disable=undefined-variable
[email protected]848fd492014-04-09 19:06:4434 WinErr = WindowsError
35except NameError:
36 class WinErr(Exception):
37 pass
[email protected]5a306a22014-02-24 22:13:5938
Vadim Shtayura08049e22017-10-11 00:14:5239class LockError(Exception):
40 pass
41
hinokadcd84042016-06-09 21:26:1742class ClobberNeeded(Exception):
[email protected]aa1e1a42014-06-26 21:58:5143 pass
[email protected]5a306a22014-02-24 22:13:5944
dnj4625b5a2016-11-11 02:23:2645
46def exponential_backoff_retry(fn, excs=(Exception,), name=None, count=10,
47 sleep_time=0.25, printerr=None):
48 """Executes |fn| up to |count| times, backing off exponentially.
49
50 Args:
51 fn (callable): The function to execute. If this raises a handled
52 exception, the function will retry with exponential backoff.
53 excs (tuple): A tuple of Exception types to handle. If one of these is
54 raised by |fn|, a retry will be attempted. If |fn| raises an Exception
55 that is not in this list, it will immediately pass through. If |excs|
56 is empty, the Exception base class will be used.
57 name (str): Optional operation name to print in the retry string.
58 count (int): The number of times to try before allowing the exception to
59 pass through.
60 sleep_time (float): The initial number of seconds to sleep in between
61 retries. This will be doubled each retry.
62 printerr (callable): Function that will be called with the error string upon
63 failures. If None, |logging.warning| will be used.
64
65 Returns: The return value of the successful fn.
66 """
67 printerr = printerr or logging.warning
68 for i in xrange(count):
69 try:
70 return fn()
71 except excs as e:
72 if (i+1) >= count:
73 raise
74
75 printerr('Retrying %s in %.2f second(s) (%d / %d attempts): %s' % (
76 (name or 'operation'), sleep_time, (i+1), count, e))
77 time.sleep(sleep_time)
78 sleep_time *= 2
79
80
Vadim Shtayura08049e22017-10-11 00:14:5281class Lockfile(object):
82 """Class to represent a cross-platform process-specific lockfile."""
83
84 def __init__(self, path, timeout=0):
85 self.path = os.path.abspath(path)
86 self.timeout = timeout
87 self.lockfile = self.path + ".lock"
88 self.pid = os.getpid()
89
90 def _read_pid(self):
91 """Read the pid stored in the lockfile.
92
93 Note: This method is potentially racy. By the time it returns the lockfile
94 may have been unlocked, removed, or stolen by some other process.
95 """
96 try:
97 with open(self.lockfile, 'r') as f:
98 pid = int(f.readline().strip())
99 except (IOError, ValueError):
100 pid = None
101 return pid
102
103 def _make_lockfile(self):
104 """Safely creates a lockfile containing the current pid."""
105 open_flags = (os.O_CREAT | os.O_EXCL | os.O_WRONLY)
106 fd = os.open(self.lockfile, open_flags, 0o644)
107 f = os.fdopen(fd, 'w')
108 print(self.pid, file=f)
109 f.close()
110
111 def _remove_lockfile(self):
112 """Delete the lockfile. Complains (implicitly) if it doesn't exist.
113
114 See gclient_utils.py:rmtree docstring for more explanation on the
115 windows case.
116 """
117 if sys.platform == 'win32':
118 lockfile = os.path.normcase(self.lockfile)
119
120 def delete():
121 exitcode = subprocess.call(['cmd.exe', '/c',
122 'del', '/f', '/q', lockfile])
123 if exitcode != 0:
124 raise LockError('Failed to remove lock: %s' % (lockfile,))
125 exponential_backoff_retry(
126 delete,
127 excs=(LockError,),
128 name='del [%s]' % (lockfile,))
129 else:
130 os.remove(self.lockfile)
131
132 def lock(self):
133 """Acquire the lock.
134
135 This will block with a deadline of self.timeout seconds.
136 """
137 elapsed = 0
138 while True:
139 try:
140 self._make_lockfile()
141 return
142 except OSError as e:
143 if elapsed < self.timeout:
144 sleep_time = max(10, min(3, self.timeout - elapsed))
145 logging.info('Could not create git cache lockfile; '
146 'will retry after sleep(%d).', sleep_time);
147 elapsed += sleep_time
148 time.sleep(sleep_time)
149 continue
150 if e.errno == errno.EEXIST:
151 raise LockError("%s is already locked" % self.path)
152 else:
153 raise LockError("Failed to create %s (err %s)" % (self.path, e.errno))
154
155 def unlock(self):
156 """Release the lock."""
157 try:
158 if not self.is_locked():
159 raise LockError("%s is not locked" % self.path)
160 if not self.i_am_locking():
161 raise LockError("%s is locked, but not by me" % self.path)
162 self._remove_lockfile()
163 except WinErr:
164 # Windows is unreliable when it comes to file locking. YMMV.
165 pass
166
167 def break_lock(self):
168 """Remove the lock, even if it was created by someone else."""
169 try:
170 self._remove_lockfile()
171 return True
172 except OSError as exc:
173 if exc.errno == errno.ENOENT:
174 return False
175 else:
176 raise
177
178 def is_locked(self):
179 """Test if the file is locked by anyone.
180
181 Note: This method is potentially racy. By the time it returns the lockfile
182 may have been unlocked, removed, or stolen by some other process.
183 """
184 return os.path.exists(self.lockfile)
185
186 def i_am_locking(self):
187 """Test if the file is locked by this process."""
188 return self.is_locked() and self.pid == self._read_pid()
189
190
[email protected]848fd492014-04-09 19:06:44191class Mirror(object):
192
193 git_exe = 'git.bat' if sys.platform.startswith('win') else 'git'
194 gsutil_exe = os.path.join(
[email protected]b091aa52014-12-20 01:47:31195 os.path.dirname(os.path.abspath(__file__)), 'gsutil.py')
Vadim Shtayura08049e22017-10-11 00:14:52196 cachepath_lock = threading.Lock()
[email protected]848fd492014-04-09 19:06:44197
Robert Iannuccia19649b2018-06-29 16:31:45198 UNSET_CACHEPATH = object()
199
200 # Used for tests
201 _GIT_CONFIG_LOCATION = []
202
[email protected]66c8b852015-09-22 23:19:07203 @staticmethod
204 def parse_fetch_spec(spec):
205 """Parses and canonicalizes a fetch spec.
206
207 Returns (fetchspec, value_regex), where value_regex can be used
208 with 'git config --replace-all'.
209 """
210 parts = spec.split(':', 1)
211 src = parts[0].lstrip('+').rstrip('/')
212 if not src.startswith('refs/'):
213 src = 'refs/heads/%s' % src
214 dest = parts[1].rstrip('/') if len(parts) > 1 else src
215 regex = r'\+%s:.*' % src.replace('*', r'\*')
216 return ('+%s:%s' % (src, dest), regex)
217
[email protected]848fd492014-04-09 19:06:44218 def __init__(self, url, refs=None, print_func=None):
219 self.url = url
[email protected]66c8b852015-09-22 23:19:07220 self.fetch_specs = set([self.parse_fetch_spec(ref) for ref in (refs or [])])
[email protected]848fd492014-04-09 19:06:44221 self.basedir = self.UrlToCacheDir(url)
222 self.mirror_path = os.path.join(self.GetCachePath(), self.basedir)
[email protected]0fb693f2014-12-25 15:28:22223 if print_func:
224 self.print = self.print_without_file
225 self.print_func = print_func
226 else:
227 self.print = print
228
dnj4625b5a2016-11-11 02:23:26229 def print_without_file(self, message, **_kwargs):
[email protected]0fb693f2014-12-25 15:28:22230 self.print_func(message)
[email protected]848fd492014-04-09 19:06:44231
Andrii Shyshkalov4f56f232017-11-23 10:19:25232 @contextlib.contextmanager
233 def print_duration_of(self, what):
234 start = time.time()
235 try:
236 yield
237 finally:
238 self.print('%s took %.1f minutes' % (what, (time.time() - start) / 60.0))
239
[email protected]f8fa23d2014-06-05 01:00:04240 @property
241 def bootstrap_bucket(self):
Ryan Tseng3beabd02017-03-15 20:57:58242 u = urlparse.urlparse(self.url)
243 if u.netloc == 'chromium.googlesource.com':
[email protected]f8fa23d2014-06-05 01:00:04244 return 'chromium-git-cache'
Ryan Tseng3beabd02017-03-15 20:57:58245 elif u.netloc == 'chrome-internal.googlesource.com':
246 return 'chrome-git-cache'
247 # Not recognized.
248 return None
[email protected]f8fa23d2014-06-05 01:00:04249
[email protected]174766f2014-05-13 21:27:46250 @classmethod
251 def FromPath(cls, path):
252 return cls(cls.CacheDirToUrl(path))
253
[email protected]848fd492014-04-09 19:06:44254 @staticmethod
255 def UrlToCacheDir(url):
256 """Convert a git url to a normalized form for the cache dir path."""
257 parsed = urlparse.urlparse(url)
258 norm_url = parsed.netloc + parsed.path
259 if norm_url.endswith('.git'):
260 norm_url = norm_url[:-len('.git')]
261 return norm_url.replace('-', '--').replace('/', '-').lower()
262
263 @staticmethod
[email protected]174766f2014-05-13 21:27:46264 def CacheDirToUrl(path):
265 """Convert a cache dir path to its corresponding url."""
266 netpath = re.sub(r'\b-\b', '/', os.path.basename(path)).replace('--', '-')
267 return 'https://%s' % netpath
268
[email protected]848fd492014-04-09 19:06:44269 @classmethod
270 def SetCachePath(cls, cachepath):
Vadim Shtayura08049e22017-10-11 00:14:52271 with cls.cachepath_lock:
272 setattr(cls, 'cachepath', cachepath)
[email protected]848fd492014-04-09 19:06:44273
274 @classmethod
275 def GetCachePath(cls):
Vadim Shtayura08049e22017-10-11 00:14:52276 with cls.cachepath_lock:
277 if not hasattr(cls, 'cachepath'):
278 try:
279 cachepath = subprocess.check_output(
Robert Iannuccia19649b2018-06-29 16:31:45280 [cls.git_exe, 'config'] +
281 cls._GIT_CONFIG_LOCATION +
282 ['cache.cachepath']).strip()
Vadim Shtayura08049e22017-10-11 00:14:52283 except subprocess.CalledProcessError:
Robert Iannuccia19649b2018-06-29 16:31:45284 cachepath = os.environ.get('GIT_CACHE_PATH', cls.UNSET_CACHEPATH)
Vadim Shtayura08049e22017-10-11 00:14:52285 setattr(cls, 'cachepath', cachepath)
Robert Iannuccia19649b2018-06-29 16:31:45286
287 ret = getattr(cls, 'cachepath')
288 if ret is cls.UNSET_CACHEPATH:
289 raise RuntimeError('No cache.cachepath git configuration or '
290 '$GIT_CACHE_PATH is set.')
291 return ret
[email protected]848fd492014-04-09 19:06:44292
dnj4625b5a2016-11-11 02:23:26293 def Rename(self, src, dst):
294 # This is somehow racy on Windows.
295 # Catching OSError because WindowsError isn't portable and
296 # pylint complains.
297 exponential_backoff_retry(
298 lambda: os.rename(src, dst),
299 excs=(OSError,),
300 name='rename [%s] => [%s]' % (src, dst),
301 printerr=self.print)
302
[email protected]848fd492014-04-09 19:06:44303 def RunGit(self, cmd, **kwargs):
304 """Run git in a subprocess."""
305 cwd = kwargs.setdefault('cwd', self.mirror_path)
306 kwargs.setdefault('print_stdout', False)
307 kwargs.setdefault('filter_fn', self.print)
308 env = kwargs.get('env') or kwargs.setdefault('env', os.environ.copy())
309 env.setdefault('GIT_ASKPASS', 'true')
310 env.setdefault('SSH_ASKPASS', 'true')
311 self.print('running "git %s" in "%s"' % (' '.join(cmd), cwd))
312 gclient_utils.CheckCallAndFilter([self.git_exe] + cmd, **kwargs)
313
Edward Lemur579c9862018-07-13 23:17:51314 def config(self, cwd=None, reset_fetch_config=False):
[email protected]848fd492014-04-09 19:06:44315 if cwd is None:
316 cwd = self.mirror_path
[email protected]301a7c32014-06-16 17:13:50317
Edward Lemur579c9862018-07-13 23:17:51318 if reset_fetch_config:
Edward Lemur2f38df62018-07-14 02:13:21319 try:
320 self.RunGit(['config', '--unset-all', 'remote.origin.fetch'], cwd=cwd)
321 except subprocess.CalledProcessError as e:
322 # If exit code was 5, it means we attempted to unset a config that
323 # didn't exist. Ignore it.
324 if e.returncode != 5:
325 raise
Edward Lemur579c9862018-07-13 23:17:51326
[email protected]301a7c32014-06-16 17:13:50327 # Don't run git-gc in a daemon. Bad things can happen if it gets killed.
hinokadcd84042016-06-09 21:26:17328 try:
329 self.RunGit(['config', 'gc.autodetach', '0'], cwd=cwd)
330 except subprocess.CalledProcessError:
331 # Hard error, need to clobber.
332 raise ClobberNeeded()
[email protected]301a7c32014-06-16 17:13:50333
334 # Don't combine pack files into one big pack file. It's really slow for
335 # repositories, and there's no way to track progress and make sure it's
336 # not stuck.
Ryan Tseng3beabd02017-03-15 20:57:58337 if self.supported_project():
338 self.RunGit(['config', 'gc.autopacklimit', '0'], cwd=cwd)
[email protected]301a7c32014-06-16 17:13:50339
340 # Allocate more RAM for cache-ing delta chains, for better performance
341 # of "Resolving deltas".
[email protected]848fd492014-04-09 19:06:44342 self.RunGit(['config', 'core.deltaBaseCacheLimit',
[email protected]8e095af2015-06-10 19:19:07343 gclient_utils.DefaultDeltaBaseCacheLimit()], cwd=cwd)
[email protected]301a7c32014-06-16 17:13:50344
[email protected]8e095af2015-06-10 19:19:07345 self.RunGit(['config', 'remote.origin.url', self.url], cwd=cwd)
[email protected]848fd492014-04-09 19:06:44346 self.RunGit(['config', '--replace-all', 'remote.origin.fetch',
[email protected]8e095af2015-06-10 19:19:07347 '+refs/heads/*:refs/heads/*', r'\+refs/heads/\*:.*'], cwd=cwd)
[email protected]66c8b852015-09-22 23:19:07348 for spec, value_regex in self.fetch_specs:
[email protected]965c44f2014-08-19 21:19:19349 self.RunGit(
[email protected]66c8b852015-09-22 23:19:07350 ['config', '--replace-all', 'remote.origin.fetch', spec, value_regex],
[email protected]8e095af2015-06-10 19:19:07351 cwd=cwd)
[email protected]848fd492014-04-09 19:06:44352
353 def bootstrap_repo(self, directory):
Andrii Shyshkalov4f56f232017-11-23 10:19:25354 """Bootstrap the repo from Google Storage if possible.
[email protected]aa1e1a42014-06-26 21:58:51355
356 More apt-ly named bootstrap_repo_from_cloud_if_possible_else_do_nothing().
357 """
Ryan Tseng3beabd02017-03-15 20:57:58358 if not self.bootstrap_bucket:
359 return False
Jeremy Apthorpd795ab82018-07-27 19:23:25360 python_fallback = (
361 (sys.platform.startswith('win') and
362 not gclient_utils.FindExecutable('7z')) or
363 (not gclient_utils.FindExecutable('unzip')) or
364 ('ZIP64_SUPPORT' not in subprocess.check_output(["unzip", "-v"]))
365 )
[email protected]848fd492014-04-09 19:06:44366
367 gs_folder = 'gs://%s/%s' % (self.bootstrap_bucket, self.basedir)
[email protected]199bc5f2014-12-17 02:17:14368 gsutil = Gsutil(self.gsutil_exe, boto_path=None)
[email protected]848fd492014-04-09 19:06:44369 # Get the most recent version of the zipfile.
Andrii Shyshkalov4f56f232017-11-23 10:19:25370 _, ls_out, ls_err = gsutil.check_call('ls', gs_folder)
[email protected]848fd492014-04-09 19:06:44371 ls_out_sorted = sorted(ls_out.splitlines())
372 if not ls_out_sorted:
373 # This repo is not on Google Storage.
Andrii Shyshkalov4f56f232017-11-23 10:19:25374 self.print('No bootstrap file for %s found in %s, stderr:\n %s' %
375 (self.mirror_path, self.bootstrap_bucket,
376 ' '.join((ls_err or '').splitlines(True))))
[email protected]848fd492014-04-09 19:06:44377 return False
378 latest_checkout = ls_out_sorted[-1]
379
380 # Download zip file to a temporary directory.
381 try:
[email protected]1cbf1042014-06-17 18:26:24382 tempdir = tempfile.mkdtemp(prefix='_cache_tmp', dir=self.GetCachePath())
[email protected]848fd492014-04-09 19:06:44383 self.print('Downloading %s' % latest_checkout)
Andrii Shyshkalov4f56f232017-11-23 10:19:25384 with self.print_duration_of('download'):
385 code = gsutil.call('cp', latest_checkout, tempdir)
[email protected]848fd492014-04-09 19:06:44386 if code:
[email protected]848fd492014-04-09 19:06:44387 return False
388 filename = os.path.join(tempdir, latest_checkout.split('/')[-1])
389
[email protected]776a2c32014-04-25 07:54:25390 # Unpack the file with 7z on Windows, unzip on linux, or fallback.
Andrii Shyshkalov4f56f232017-11-23 10:19:25391 with self.print_duration_of('unzip'):
392 if not python_fallback:
393 if sys.platform.startswith('win'):
394 cmd = ['7z', 'x', '-o%s' % directory, '-tzip', filename]
395 else:
396 cmd = ['unzip', filename, '-d', directory]
397 retcode = subprocess.call(cmd)
[email protected]776a2c32014-04-25 07:54:25398 else:
Andrii Shyshkalov4f56f232017-11-23 10:19:25399 try:
400 with zipfile.ZipFile(filename, 'r') as f:
401 f.printdir()
402 f.extractall(directory)
403 except Exception as e:
404 self.print('Encountered error: %s' % str(e), file=sys.stderr)
405 retcode = 1
406 else:
407 retcode = 0
[email protected]848fd492014-04-09 19:06:44408 finally:
409 # Clean up the downloaded zipfile.
dnj4625b5a2016-11-11 02:23:26410 #
411 # This is somehow racy on Windows.
412 # Catching OSError because WindowsError isn't portable and
413 # pylint complains.
414 exponential_backoff_retry(
415 lambda: gclient_utils.rm_file_or_tree(tempdir),
416 excs=(OSError,),
417 name='rmtree [%s]' % (tempdir,),
418 printerr=self.print)
[email protected]848fd492014-04-09 19:06:44419
420 if retcode:
421 self.print(
422 'Extracting bootstrap zipfile %s failed.\n'
423 'Resuming normal operations.' % filename)
424 return False
425 return True
426
Andrii Shyshkalov46a672b2017-11-25 02:04:43427 def contains_revision(self, revision):
428 if not self.exists():
429 return False
430
431 if sys.platform.startswith('win'):
432 # Windows .bat scripts use ^ as escape sequence, which means we have to
433 # escape it with itself for every .bat invocation.
434 needle = '%s^^^^{commit}' % revision
435 else:
436 needle = '%s^{commit}' % revision
437 try:
438 # cat-file exits with 0 on success, that is git object of given hash was
439 # found.
440 self.RunGit(['cat-file', '-e', needle])
441 return True
442 except subprocess.CalledProcessError:
443 return False
444
[email protected]848fd492014-04-09 19:06:44445 def exists(self):
446 return os.path.isfile(os.path.join(self.mirror_path, 'config'))
447
Ryan Tseng3beabd02017-03-15 20:57:58448 def supported_project(self):
449 """Returns true if this repo is known to have a bootstrap zip file."""
450 u = urlparse.urlparse(self.url)
451 return u.netloc in [
452 'chromium.googlesource.com',
453 'chrome-internal.googlesource.com']
454
[email protected]66c8b852015-09-22 23:19:07455 def _preserve_fetchspec(self):
456 """Read and preserve remote.origin.fetch from an existing mirror.
457
458 This modifies self.fetch_specs.
459 """
460 if not self.exists():
461 return
462 try:
463 config_fetchspecs = subprocess.check_output(
464 [self.git_exe, 'config', '--get-all', 'remote.origin.fetch'],
465 cwd=self.mirror_path)
466 for fetchspec in config_fetchspecs.splitlines():
467 self.fetch_specs.add(self.parse_fetch_spec(fetchspec))
468 except subprocess.CalledProcessError:
469 logging.warn('Tried and failed to preserve remote.origin.fetch from the '
470 'existing cache directory. You may need to manually edit '
471 '%s and "git cache fetch" again.'
472 % os.path.join(self.mirror_path, 'config'))
473
[email protected]aa1e1a42014-06-26 21:58:51474 def _ensure_bootstrapped(self, depth, bootstrap, force=False):
475 tempdir = None
[email protected]aa1e1a42014-06-26 21:58:51476 pack_dir = os.path.join(self.mirror_path, 'objects', 'pack')
477 pack_files = []
478
479 if os.path.isdir(pack_dir):
480 pack_files = [f for f in os.listdir(pack_dir) if f.endswith('.pack')]
Andrii Shyshkalov4f56f232017-11-23 10:19:25481 self.print('%s has %d .pack files, re-bootstrapping if >%d' %
482 (self.mirror_path, len(pack_files), GC_AUTOPACKLIMIT))
[email protected]aa1e1a42014-06-26 21:58:51483
484 should_bootstrap = (force or
[email protected]66c8b852015-09-22 23:19:07485 not self.exists() or
[email protected]aa1e1a42014-06-26 21:58:51486 len(pack_files) > GC_AUTOPACKLIMIT)
487 if should_bootstrap:
[email protected]66c8b852015-09-22 23:19:07488 if self.exists():
489 # Re-bootstrapping an existing mirror; preserve existing fetch spec.
490 self._preserve_fetchspec()
[email protected]aa1e1a42014-06-26 21:58:51491 tempdir = tempfile.mkdtemp(
492 prefix='_cache_tmp', suffix=self.basedir, dir=self.GetCachePath())
493 bootstrapped = not depth and bootstrap and self.bootstrap_repo(tempdir)
494 if bootstrapped:
495 # Bootstrap succeeded; delete previous cache, if any.
[email protected]42f9adf2014-09-05 11:10:35496 gclient_utils.rmtree(self.mirror_path)
Ryan Tseng3beabd02017-03-15 20:57:58497 elif not self.exists() or not self.supported_project():
498 # Bootstrap failed due to either
499 # 1. No previous cache
500 # 2. Project doesn't have a bootstrap zip file
501 # Start with a bare git dir.
[email protected]aa1e1a42014-06-26 21:58:51502 self.RunGit(['init', '--bare'], cwd=tempdir)
503 else:
504 # Bootstrap failed, previous cache exists; warn and continue.
505 logging.warn(
Andrii Shyshkalov4f56f232017-11-23 10:19:25506 'Git cache has a lot of pack files (%d). Tried to re-bootstrap '
507 'but failed. Continuing with non-optimized repository.'
[email protected]aa1e1a42014-06-26 21:58:51508 % len(pack_files))
509 gclient_utils.rmtree(tempdir)
510 tempdir = None
511 else:
512 if depth and os.path.exists(os.path.join(self.mirror_path, 'shallow')):
513 logging.warn(
514 'Shallow fetch requested, but repo cache already exists.')
515 return tempdir
516
Edward Lemur579c9862018-07-13 23:17:51517 def _fetch(self, rundir, verbose, depth, reset_fetch_config):
518 self.config(rundir, reset_fetch_config)
[email protected]aa1e1a42014-06-26 21:58:51519 v = []
520 d = []
521 if verbose:
522 v = ['-v', '--progress']
523 if depth:
524 d = ['--depth', str(depth)]
525 fetch_cmd = ['fetch'] + v + d + ['origin']
526 fetch_specs = subprocess.check_output(
527 [self.git_exe, 'config', '--get-all', 'remote.origin.fetch'],
528 cwd=rundir).strip().splitlines()
529 for spec in fetch_specs:
530 try:
531 self.print('Fetching %s' % spec)
Andrii Shyshkalov4f56f232017-11-23 10:19:25532 with self.print_duration_of('fetch %s' % spec):
533 self.RunGit(fetch_cmd + [spec], cwd=rundir, retry=True)
[email protected]aa1e1a42014-06-26 21:58:51534 except subprocess.CalledProcessError:
535 if spec == '+refs/heads/*:refs/heads/*':
hinokadcd84042016-06-09 21:26:17536 raise ClobberNeeded() # Corrupted cache.
[email protected]aa1e1a42014-06-26 21:58:51537 logging.warn('Fetch of %s failed' % spec)
538
Vadim Shtayura08049e22017-10-11 00:14:52539 def populate(self, depth=None, shallow=False, bootstrap=False,
Edward Lemur579c9862018-07-13 23:17:51540 verbose=False, ignore_lock=False, lock_timeout=0,
541 reset_fetch_config=False):
[email protected]b0a13a22014-06-18 00:52:25542 assert self.GetCachePath()
[email protected]848fd492014-04-09 19:06:44543 if shallow and not depth:
544 depth = 10000
545 gclient_utils.safe_makedirs(self.GetCachePath())
546
Vadim Shtayura08049e22017-10-11 00:14:52547 lockfile = Lockfile(self.mirror_path, lock_timeout)
548 if not ignore_lock:
549 lockfile.lock()
550
[email protected]aa1e1a42014-06-26 21:58:51551 tempdir = None
[email protected]108eced2014-06-19 21:22:43552 try:
[email protected]aa1e1a42014-06-26 21:58:51553 tempdir = self._ensure_bootstrapped(depth, bootstrap)
[email protected]848fd492014-04-09 19:06:44554 rundir = tempdir or self.mirror_path
Edward Lemur579c9862018-07-13 23:17:51555 self._fetch(rundir, verbose, depth, reset_fetch_config)
hinokadcd84042016-06-09 21:26:17556 except ClobberNeeded:
[email protected]aa1e1a42014-06-26 21:58:51557 # This is a major failure, we need to clean and force a bootstrap.
558 gclient_utils.rmtree(rundir)
559 self.print(GIT_CACHE_CORRUPT_MESSAGE)
560 tempdir = self._ensure_bootstrapped(depth, bootstrap, force=True)
561 assert tempdir
Edward Lemur579c9862018-07-13 23:17:51562 self._fetch(tempdir, verbose, depth, reset_fetch_config)
[email protected]aa1e1a42014-06-26 21:58:51563 finally:
[email protected]848fd492014-04-09 19:06:44564 if tempdir:
dnjb445ef52016-11-10 23:51:39565 if os.path.exists(self.mirror_path):
566 gclient_utils.rmtree(self.mirror_path)
dnj4625b5a2016-11-11 02:23:26567 self.Rename(tempdir, self.mirror_path)
Vadim Shtayura08049e22017-10-11 00:14:52568 if not ignore_lock:
569 lockfile.unlock()
[email protected]848fd492014-04-09 19:06:44570
[email protected]c8444f32014-06-18 23:18:17571 def update_bootstrap(self, prune=False):
[email protected]848fd492014-04-09 19:06:44572 # The files are named <git number>.zip
573 gen_number = subprocess.check_output(
574 [self.git_exe, 'number', 'master'], cwd=self.mirror_path).strip()
[email protected]7b1cb6f2014-09-08 21:40:50575 # Run Garbage Collect to compress packfile.
576 self.RunGit(['gc', '--prune=all'])
[email protected]848fd492014-04-09 19:06:44577 # Creating a temp file and then deleting it ensures we can use this name.
578 _, tmp_zipfile = tempfile.mkstemp(suffix='.zip')
579 os.remove(tmp_zipfile)
580 subprocess.call(['zip', '-r', tmp_zipfile, '.'], cwd=self.mirror_path)
581 gsutil = Gsutil(path=self.gsutil_exe, boto_path=None)
[email protected]c8444f32014-06-18 23:18:17582 gs_folder = 'gs://%s/%s' % (self.bootstrap_bucket, self.basedir)
583 dest_name = '%s/%s.zip' % (gs_folder, gen_number)
[email protected]848fd492014-04-09 19:06:44584 gsutil.call('cp', tmp_zipfile, dest_name)
585 os.remove(tmp_zipfile)
586
[email protected]c8444f32014-06-18 23:18:17587 # Remove all other files in the same directory.
588 if prune:
589 _, ls_out, _ = gsutil.check_call('ls', gs_folder)
590 for filename in ls_out.splitlines():
591 if filename == dest_name:
592 continue
593 gsutil.call('rm', filename)
594
[email protected]cdfcd7c2014-06-10 23:40:46595 @staticmethod
596 def DeleteTmpPackFiles(path):
597 pack_dir = os.path.join(path, 'objects', 'pack')
[email protected]33418492014-06-18 19:03:39598 if not os.path.isdir(pack_dir):
599 return
[email protected]cdfcd7c2014-06-10 23:40:46600 pack_files = [f for f in os.listdir(pack_dir) if
601 f.startswith('.tmp-') or f.startswith('tmp_pack_')]
602 for f in pack_files:
603 f = os.path.join(pack_dir, f)
604 try:
605 os.remove(f)
606 logging.warn('Deleted stale temporary pack file %s' % f)
607 except OSError:
608 logging.warn('Unable to delete temporary pack file %s' % f)
[email protected]174766f2014-05-13 21:27:46609
Vadim Shtayura08049e22017-10-11 00:14:52610 @classmethod
611 def BreakLocks(cls, path):
612 did_unlock = False
613 lf = Lockfile(path)
614 if lf.break_lock():
615 did_unlock = True
616 # Look for lock files that might have been left behind by an interrupted
617 # git process.
618 lf = os.path.join(path, 'config.lock')
619 if os.path.exists(lf):
620 os.remove(lf)
621 did_unlock = True
622 cls.DeleteTmpPackFiles(path)
623 return did_unlock
624
625 def unlock(self):
626 return self.BreakLocks(self.mirror_path)
627
628 @classmethod
629 def UnlockAll(cls):
630 cachepath = cls.GetCachePath()
631 if not cachepath:
632 return
633 dirlist = os.listdir(cachepath)
634 repo_dirs = set([os.path.join(cachepath, path) for path in dirlist
635 if os.path.isdir(os.path.join(cachepath, path))])
636 for dirent in dirlist:
637 if dirent.startswith('_cache_tmp') or dirent.startswith('tmp'):
638 gclient_utils.rm_file_or_tree(os.path.join(cachepath, dirent))
639 elif (dirent.endswith('.lock') and
640 os.path.isfile(os.path.join(cachepath, dirent))):
641 repo_dirs.add(os.path.join(cachepath, dirent[:-5]))
642
643 unlocked_repos = []
644 for repo_dir in repo_dirs:
645 if cls.BreakLocks(repo_dir):
646 unlocked_repos.append(repo_dir)
647
648 return unlocked_repos
[email protected]848fd492014-04-09 19:06:44649
[email protected]5a306a22014-02-24 22:13:59650@subcommand.usage('[url of repo to check for caching]')
651def CMDexists(parser, args):
652 """Check to see if there already is a cache of the given repo."""
[email protected]848fd492014-04-09 19:06:44653 _, args = parser.parse_args(args)
[email protected]5a306a22014-02-24 22:13:59654 if not len(args) == 1:
655 parser.error('git cache exists only takes exactly one repo url.')
656 url = args[0]
[email protected]848fd492014-04-09 19:06:44657 mirror = Mirror(url)
658 if mirror.exists():
659 print(mirror.mirror_path)
[email protected]5a306a22014-02-24 22:13:59660 return 0
661 return 1
662
663
[email protected]563559c2014-04-02 00:36:24664@subcommand.usage('[url of repo to create a bootstrap zip file]')
665def CMDupdate_bootstrap(parser, args):
666 """Create and uploads a bootstrap tarball."""
667 # Lets just assert we can't do this on Windows.
668 if sys.platform.startswith('win'):
[email protected]848fd492014-04-09 19:06:44669 print('Sorry, update bootstrap will not work on Windows.', file=sys.stderr)
[email protected]563559c2014-04-02 00:36:24670 return 1
671
[email protected]c8444f32014-06-18 23:18:17672 parser.add_option('--prune', action='store_true',
673 help='Prune all other cached zipballs of the same repo.')
674
[email protected]563559c2014-04-02 00:36:24675 # First, we need to ensure the cache is populated.
676 populate_args = args[:]
[email protected]b9f27512014-08-08 15:52:33677 populate_args.append('--no-bootstrap')
[email protected]563559c2014-04-02 00:36:24678 CMDpopulate(parser, populate_args)
679
680 # Get the repo directory.
[email protected]c8444f32014-06-18 23:18:17681 options, args = parser.parse_args(args)
[email protected]563559c2014-04-02 00:36:24682 url = args[0]
[email protected]848fd492014-04-09 19:06:44683 mirror = Mirror(url)
[email protected]c8444f32014-06-18 23:18:17684 mirror.update_bootstrap(options.prune)
[email protected]848fd492014-04-09 19:06:44685 return 0
[email protected]563559c2014-04-02 00:36:24686
687
[email protected]5a306a22014-02-24 22:13:59688@subcommand.usage('[url of repo to add to or update in cache]')
689def CMDpopulate(parser, args):
690 """Ensure that the cache has all up-to-date objects for the given repo."""
691 parser.add_option('--depth', type='int',
692 help='Only cache DEPTH commits of history')
693 parser.add_option('--shallow', '-s', action='store_true',
694 help='Only cache 10000 commits of history')
695 parser.add_option('--ref', action='append',
696 help='Specify additional refs to be fetched')
[email protected]b9f27512014-08-08 15:52:33697 parser.add_option('--no_bootstrap', '--no-bootstrap',
698 action='store_true',
[email protected]563559c2014-04-02 00:36:24699 help='Don\'t bootstrap from Google Storage')
Vadim Shtayura08049e22017-10-11 00:14:52700 parser.add_option('--ignore_locks', '--ignore-locks',
701 action='store_true',
702 help='Don\'t try to lock repository')
Edward Lemur579c9862018-07-13 23:17:51703 parser.add_option('--reset-fetch-config', action='store_true', default=False,
704 help='Reset the fetch config before populating the cache.')
[email protected]563559c2014-04-02 00:36:24705
[email protected]5a306a22014-02-24 22:13:59706 options, args = parser.parse_args(args)
[email protected]5a306a22014-02-24 22:13:59707 if not len(args) == 1:
708 parser.error('git cache populate only takes exactly one repo url.')
709 url = args[0]
710
[email protected]848fd492014-04-09 19:06:44711 mirror = Mirror(url, refs=options.ref)
712 kwargs = {
713 'verbose': options.verbose,
714 'shallow': options.shallow,
715 'bootstrap': not options.no_bootstrap,
Vadim Shtayura08049e22017-10-11 00:14:52716 'ignore_lock': options.ignore_locks,
717 'lock_timeout': options.timeout,
Edward Lemur579c9862018-07-13 23:17:51718 'reset_fetch_config': options.reset_fetch_config,
[email protected]848fd492014-04-09 19:06:44719 }
[email protected]5a306a22014-02-24 22:13:59720 if options.depth:
[email protected]848fd492014-04-09 19:06:44721 kwargs['depth'] = options.depth
722 mirror.populate(**kwargs)
[email protected]5a306a22014-02-24 22:13:59723
724
[email protected]f3145112014-08-07 21:02:36725@subcommand.usage('Fetch new commits into cache and current checkout')
726def CMDfetch(parser, args):
727 """Update mirror, and fetch in cwd."""
728 parser.add_option('--all', action='store_true', help='Fetch all remotes')
[email protected]66c8b852015-09-22 23:19:07729 parser.add_option('--no_bootstrap', '--no-bootstrap',
730 action='store_true',
731 help='Don\'t (re)bootstrap from Google Storage')
[email protected]f3145112014-08-07 21:02:36732 options, args = parser.parse_args(args)
733
734 # Figure out which remotes to fetch. This mimics the behavior of regular
735 # 'git fetch'. Note that in the case of "stacked" or "pipelined" branches,
736 # this will NOT try to traverse up the branching structure to find the
737 # ultimate remote to update.
738 remotes = []
739 if options.all:
740 assert not args, 'fatal: fetch --all does not take a repository argument'
741 remotes = subprocess.check_output([Mirror.git_exe, 'remote']).splitlines()
742 elif args:
743 remotes = args
744 else:
745 current_branch = subprocess.check_output(
746 [Mirror.git_exe, 'rev-parse', '--abbrev-ref', 'HEAD']).strip()
747 if current_branch != 'HEAD':
748 upstream = subprocess.check_output(
749 [Mirror.git_exe, 'config', 'branch.%s.remote' % current_branch]
750 ).strip()
751 if upstream and upstream != '.':
752 remotes = [upstream]
753 if not remotes:
754 remotes = ['origin']
755
756 cachepath = Mirror.GetCachePath()
757 git_dir = os.path.abspath(subprocess.check_output(
758 [Mirror.git_exe, 'rev-parse', '--git-dir']))
759 git_dir = os.path.abspath(git_dir)
760 if git_dir.startswith(cachepath):
761 mirror = Mirror.FromPath(git_dir)
[email protected]dbb6f822016-02-02 22:59:30762 mirror.populate(
Vadim Shtayura08049e22017-10-11 00:14:52763 bootstrap=not options.no_bootstrap, lock_timeout=options.timeout)
[email protected]f3145112014-08-07 21:02:36764 return 0
765 for remote in remotes:
766 remote_url = subprocess.check_output(
767 [Mirror.git_exe, 'config', 'remote.%s.url' % remote]).strip()
768 if remote_url.startswith(cachepath):
769 mirror = Mirror.FromPath(remote_url)
770 mirror.print = lambda *args: None
771 print('Updating git cache...')
[email protected]dbb6f822016-02-02 22:59:30772 mirror.populate(
Vadim Shtayura08049e22017-10-11 00:14:52773 bootstrap=not options.no_bootstrap, lock_timeout=options.timeout)
[email protected]f3145112014-08-07 21:02:36774 subprocess.check_call([Mirror.git_exe, 'fetch', remote])
775 return 0
776
777
Vadim Shtayura08049e22017-10-11 00:14:52778@subcommand.usage('[url of repo to unlock, or -a|--all]')
779def CMDunlock(parser, args):
780 """Unlock one or all repos if their lock files are still around."""
781 parser.add_option('--force', '-f', action='store_true',
782 help='Actually perform the action')
783 parser.add_option('--all', '-a', action='store_true',
784 help='Unlock all repository caches')
785 options, args = parser.parse_args(args)
786 if len(args) > 1 or (len(args) == 0 and not options.all):
787 parser.error('git cache unlock takes exactly one repo url, or --all')
788
789 if not options.force:
790 cachepath = Mirror.GetCachePath()
791 lockfiles = [os.path.join(cachepath, path)
792 for path in os.listdir(cachepath)
793 if path.endswith('.lock') and os.path.isfile(path)]
794 parser.error('git cache unlock requires -f|--force to do anything. '
795 'Refusing to unlock the following repo caches: '
796 ', '.join(lockfiles))
797
798 unlocked_repos = []
799 if options.all:
800 unlocked_repos.extend(Mirror.UnlockAll())
801 else:
802 m = Mirror(args[0])
803 if m.unlock():
804 unlocked_repos.append(m.mirror_path)
805
806 if unlocked_repos:
807 logging.info('Broke locks on these caches:\n %s' % '\n '.join(
808 unlocked_repos))
809
810
[email protected]5a306a22014-02-24 22:13:59811class OptionParser(optparse.OptionParser):
812 """Wrapper class for OptionParser to handle global options."""
813
814 def __init__(self, *args, **kwargs):
815 optparse.OptionParser.__init__(self, *args, prog='git cache', **kwargs)
816 self.add_option('-c', '--cache-dir',
Robert Iannuccia19649b2018-06-29 16:31:45817 help=(
818 'Path to the directory containing the caches. Normally '
819 'deduced from git config cache.cachepath or '
820 '$GIT_CACHE_PATH.'))
[email protected]2c391af2014-05-23 09:07:15821 self.add_option('-v', '--verbose', action='count', default=1,
[email protected]5a306a22014-02-24 22:13:59822 help='Increase verbosity (can be passed multiple times)')
[email protected]2c391af2014-05-23 09:07:15823 self.add_option('-q', '--quiet', action='store_true',
824 help='Suppress all extraneous output')
Vadim Shtayura08049e22017-10-11 00:14:52825 self.add_option('--timeout', type='int', default=0,
826 help='Timeout for acquiring cache lock, in seconds')
[email protected]5a306a22014-02-24 22:13:59827
828 def parse_args(self, args=None, values=None):
829 options, args = optparse.OptionParser.parse_args(self, args, values)
[email protected]2c391af2014-05-23 09:07:15830 if options.quiet:
831 options.verbose = 0
832
833 levels = [logging.ERROR, logging.WARNING, logging.INFO, logging.DEBUG]
834 logging.basicConfig(level=levels[min(options.verbose, len(levels) - 1)])
[email protected]5a306a22014-02-24 22:13:59835
836 try:
[email protected]848fd492014-04-09 19:06:44837 global_cache_dir = Mirror.GetCachePath()
838 except RuntimeError:
839 global_cache_dir = None
840 if options.cache_dir:
841 if global_cache_dir and (
842 os.path.abspath(options.cache_dir) !=
843 os.path.abspath(global_cache_dir)):
844 logging.warn('Overriding globally-configured cache directory.')
845 Mirror.SetCachePath(options.cache_dir)
[email protected]5a306a22014-02-24 22:13:59846
[email protected]5a306a22014-02-24 22:13:59847 return options, args
848
849
850def main(argv):
851 dispatcher = subcommand.CommandDispatcher(__name__)
852 return dispatcher.execute(OptionParser(), argv)
853
854
855if __name__ == '__main__':
[email protected]013731e2015-02-26 18:28:43856 try:
857 sys.exit(main(sys.argv[1:]))
858 except KeyboardInterrupt:
859 sys.stderr.write('interrupted\n')
860 sys.exit(1)