blob: 7fa0398af775abfafa33caa1e803ffe508fcbf23 [file] [log] [blame]
[email protected]aa74cf62013-11-19 20:00:491#!/usr/bin/env python
2# Copyright 2013 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6"""Usage: %prog [options] [<commitref>]*
7
8If no <commitref>'s are supplied, it defaults to HEAD.
9
10Calculates the generation number for one or more commits in a git repo.
11
12Generation number of a commit C with parents P is defined as:
13 generation_number(C, []) = 0
14 generation_number(C, P) = max(map(generation_number, P)) + 1
15
16This number can be used to order commits relative to each other, as long as for
17any pair of the commits, one is an ancestor of the other.
18
19Since calculating the generation number of a commit requires walking that
20commit's entire history, this script caches all calculated data inside the git
21repo that it operates on in the ref 'refs/number/commits'.
22"""
23
24import binascii
25import collections
26import logging
27import optparse
28import os
29import struct
30import sys
31import tempfile
32
33import git_common as git
34import subprocess2
35
36CHUNK_FMT = '!20sL'
37CHUNK_SIZE = struct.calcsize(CHUNK_FMT)
38DIRTY_TREES = collections.defaultdict(int)
39REF = 'refs/number/commits'
[email protected]ee740702014-04-03 01:43:3240AUTHOR_NAME = 'git-number'
41AUTHOR_EMAIL = '[email protected]'
[email protected]aa74cf62013-11-19 20:00:4942
43# Number of bytes to use for the prefix on our internal number structure.
44# 0 is slow to deserialize. 2 creates way too much bookeeping overhead (would
45# need to reimplement cache data structures to be a bit more sophisticated than
46# dicts. 1 seems to be just right.
47PREFIX_LEN = 1
48
49# Set this to 'threads' to gather coverage data while testing.
50POOL_KIND = 'procs'
51
52
53def pathlify(hash_prefix):
54 """Converts a binary object hash prefix into a posix path, one folder per
55 byte.
56
57 >>> pathlify('\xDE\xAD')
58 'de/ad'
59 """
60 return '/'.join('%02x' % ord(b) for b in hash_prefix)
61
62
63@git.memoize_one(threadsafe=False)
64def get_number_tree(prefix_bytes):
65 """Returns a dictionary of the git-number registry specified by
66 |prefix_bytes|.
67
68 This is in the form of {<full binary ref>: <gen num> ...}
69
70 >>> get_number_tree('\x83\xb4')
71 {'\x83\xb4\xe3\xe4W\xf9J*\x8f/c\x16\xecD\xd1\x04\x8b\xa9qz': 169, ...}
72 """
73 ref = '%s:%s' % (REF, pathlify(prefix_bytes))
74
75 try:
76 raw = buffer(git.run('cat-file', 'blob', ref, autostrip=False))
77 return dict(struct.unpack_from(CHUNK_FMT, raw, i * CHUNK_SIZE)
78 for i in xrange(len(raw) / CHUNK_SIZE))
79 except subprocess2.CalledProcessError:
80 return {}
81
82
83@git.memoize_one(threadsafe=False)
84def get_num(commit_hash):
85 """Returns the generation number for a commit.
86
87 Returns None if the generation number for this commit hasn't been calculated
88 yet (see load_generation_numbers()).
89 """
90 return get_number_tree(commit_hash[:PREFIX_LEN]).get(commit_hash)
91
92
93def clear_caches(on_disk=False):
94 """Clears in-process caches for e.g. unit testing."""
95 get_number_tree.clear()
96 get_num.clear()
97 if on_disk:
98 git.run('update-ref', '-d', REF)
99
100
101def intern_number_tree(tree):
102 """Transforms a number tree (in the form returned by |get_number_tree|) into
103 a git blob.
104
105 Returns the git blob id as hex-encoded string.
106
107 >>> d = {'\x83\xb4\xe3\xe4W\xf9J*\x8f/c\x16\xecD\xd1\x04\x8b\xa9qz': 169}
108 >>> intern_number_tree(d)
109 'c552317aa95ca8c3f6aae3357a4be299fbcb25ce'
110 """
111 with tempfile.TemporaryFile() as f:
112 for k, v in sorted(tree.iteritems()):
113 f.write(struct.pack(CHUNK_FMT, k, v))
114 f.seek(0)
115 return git.intern_f(f)
116
117
118def leaf_map_fn((pre, tree)):
119 """Converts a prefix and number tree into a git index line."""
120 return '100644 blob %s\t%s\0' % (intern_number_tree(tree), pathlify(pre))
121
122
123def finalize(targets):
124 """Saves all cache data to the git repository.
125
126 After calculating the generation number for |targets|, call finalize() to
127 save all the work to the git repository.
128
129 This in particular saves the trees referred to by DIRTY_TREES.
130 """
131 if not DIRTY_TREES:
132 return
133
134 msg = 'git-number Added %s numbers' % sum(DIRTY_TREES.itervalues())
135
136 idx = os.path.join(git.run('rev-parse', '--git-dir'), 'number.idx')
137 env = os.environ.copy()
138 env['GIT_INDEX_FILE'] = idx
139
140 progress_message = 'Finalizing: (%%(count)d/%d)' % len(DIRTY_TREES)
141 with git.ProgressPrinter(progress_message) as inc:
142 git.run('read-tree', REF, env=env)
143
144 prefixes_trees = ((p, get_number_tree(p)) for p in sorted(DIRTY_TREES))
145 updater = subprocess2.Popen(['git', 'update-index', '-z', '--index-info'],
146 stdin=subprocess2.PIPE, env=env)
147
148 with git.ScopedPool(kind=POOL_KIND) as leaf_pool:
149 for item in leaf_pool.imap(leaf_map_fn, prefixes_trees):
150 updater.stdin.write(item)
151 inc()
152
153 updater.stdin.close()
154 updater.wait()
155 assert updater.returncode == 0
156
157 tree_id = git.run('write-tree', env=env)
[email protected]ee740702014-04-03 01:43:32158 commit_cmd = [
159 # Git user.name and/or user.email may not be configured, so specifying
160 # them explicitly. They are not used, but requried by Git.
161 '-c', 'user.name=%s' % AUTHOR_NAME,
162 '-c', 'user.email=%s' % AUTHOR_EMAIL,
163 'commit-tree',
164 '-m', msg,
165 '-p'] + git.hash_multi(REF)
[email protected]aa74cf62013-11-19 20:00:49166 for t in targets:
167 commit_cmd.extend(['-p', binascii.hexlify(t)])
168 commit_cmd.append(tree_id)
169 commit_hash = git.run(*commit_cmd)
170 git.run('update-ref', REF, commit_hash)
171 DIRTY_TREES.clear()
172
173
174def preload_tree(prefix):
175 """Returns the prefix and parsed tree object for the specified prefix."""
176 return prefix, get_number_tree(prefix)
177
178
179def all_prefixes(depth=PREFIX_LEN):
180 for x in (chr(i) for i in xrange(255)):
181 # This isn't covered because PREFIX_LEN currently == 1
182 if depth > 1: # pragma: no cover
183 for r in all_prefixes(depth - 1):
184 yield x + r
185 else:
186 yield x
187
188
189def load_generation_numbers(targets):
190 """Populates the caches of get_num and get_number_tree so they contain
191 the results for |targets|.
192
193 Loads cached numbers from disk, and calculates missing numbers if one or
194 more of |targets| is newer than the cached calculations.
195
196 Args:
197 targets - An iterable of binary-encoded full git commit hashes.
198 """
199 # In case they pass us a generator, listify targets.
200 targets = list(targets)
201
202 if all(get_num(t) is not None for t in targets):
203 return
204
205 if git.tree(REF) is None:
206 empty = git.mktree({})
207 commit_hash = git.run('commit-tree', '-m', 'Initial commit from git-number',
208 empty)
209 git.run('update-ref', REF, commit_hash)
210
211 with git.ScopedPool(kind=POOL_KIND) as pool:
212 preload_iter = pool.imap_unordered(preload_tree, all_prefixes())
213
214 rev_list = []
215
216 with git.ProgressPrinter('Loading commits: %(count)d') as inc:
217 # Curiously, buffering the list into memory seems to be the fastest
218 # approach in python (as opposed to iterating over the lines in the
219 # stdout as they're produced). GIL strikes again :/
220 cmd = [
221 'rev-list', '--topo-order', '--parents', '--reverse', '^' + REF,
222 ] + map(binascii.hexlify, targets)
223 for line in git.run(*cmd).splitlines():
224 tokens = map(binascii.unhexlify, line.split())
225 rev_list.append((tokens[0], tokens[1:]))
226 inc()
227
228 get_number_tree.update(preload_iter)
229
230 with git.ProgressPrinter('Counting: %%(count)d/%d' % len(rev_list)) as inc:
231 for commit_hash, pars in rev_list:
232 num = max(map(get_num, pars)) + 1 if pars else 0
233
234 prefix = commit_hash[:PREFIX_LEN]
235 get_number_tree(prefix)[commit_hash] = num
236 DIRTY_TREES[prefix] += 1
237 get_num.set(commit_hash, num)
238
239 inc()
240
241
242def main(): # pragma: no cover
243 parser = optparse.OptionParser(usage=sys.modules[__name__].__doc__)
244 parser.add_option('--no-cache', action='store_true',
245 help='Do not actually cache anything we calculate.')
246 parser.add_option('--reset', action='store_true',
247 help='Reset the generation number cache and quit.')
248 parser.add_option('-v', '--verbose', action='count', default=0,
249 help='Be verbose. Use more times for more verbosity.')
250 opts, args = parser.parse_args()
251
252 levels = [logging.ERROR, logging.INFO, logging.DEBUG]
253 logging.basicConfig(level=levels[min(opts.verbose, len(levels) - 1)])
254
255 try:
256 if opts.reset:
257 clear_caches(on_disk=True)
258 return
259
260 try:
261 targets = git.parse_commitrefs(*(args or ['HEAD']))
262 except git.BadCommitRefException as e:
263 parser.error(e)
264
265 load_generation_numbers(targets)
266 if not opts.no_cache:
267 finalize(targets)
268
269 print '\n'.join(map(str, map(get_num, targets)))
270 return 0
271 except KeyboardInterrupt:
272 return 1
273
274
275if __name__ == '__main__': # pragma: no cover
276 sys.exit(main())