blob: ad793eed7e8b7060687232e2ab1099278961ce75 [file] [log] [blame]
Ned Nguyenaa1db20f2018-03-15 01:01:571#!/usr/bin/env vpython
Ashley Enstad6a67ad72018-06-01 21:35:022# Copyright 2018 The Chromium Authors. All rights reserved.
Stephen Martinis79e98da2017-05-05 19:24:103# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
Raul Tambre0b068f92019-09-25 14:36:386from __future__ import print_function
7
nednguyendf8d7a02018-08-27 20:58:428import argparse
Caleb Rouleau43e632b42019-04-26 23:26:139import collections
Caleb Rouleau360b04d2019-12-18 18:41:2010import filecmp
nednguyendf8d7a02018-08-27 20:58:4211import json
nednguyenc2ceac42018-08-31 11:12:4912import multiprocessing
Caleb Rouleau43e632b42019-04-26 23:26:1313import os
Caleb Rouleau360b04d2019-12-18 18:41:2014import shutil
Stephen Martinis79e98da2017-05-05 19:24:1015import sys
Caleb Rouleau360b04d2019-12-18 18:41:2016import tempfile
Ned Nguyen8e27d0d2018-11-28 00:20:1717import textwrap
Stephen Martinis79e98da2017-05-05 19:24:1018
Juan Antonio Navarro Perez68944cab2019-08-20 09:43:4119from core import path_util
20path_util.AddTelemetryToPath()
21
nednguyenc2ceac42018-08-31 11:12:4922from core import bot_platforms
nednguyenc2ceac42018-08-31 11:12:4923from core import retrieve_story_timing
Leonard Ge3e1d472b2018-09-10 17:51:5324from core import sharding_map_generator
Stephen Martinis79e98da2017-05-05 19:24:1025
Ned Nguyen8e27d0d2018-11-28 00:20:1726_SCRIPT_USAGE = """
27Generate sharding maps for Telemetry benchmarks.
28
29Every performance benchmark should be run on a same machine as long as possible
30to preserve high fidelity of data monitoring. Hence in order to shard the
31Telemetry benchmarks on multiple machines, we generate a JSON map that
32specifies how benchmarks should be distributed on machines. There is one
33sharding JSON map for every builder in the perf & perf.fyi waterfalls which are
34specified by PerfPlatform classes in //tools/perf/core/bot_platforms.py.
35
36Generating these JSON maps depends on how many Telemetry benchmarks
37actually exist at the time. Because of this, CLs to generate the JSON maps
38should never be automatically reverted, since the reverted state of the JSON map
39files may not match with the true state of world.
40
41"""
42
Leonard Ge3e1d472b2018-09-10 17:51:5343def GetParser():
nednguyendf8d7a02018-08-27 20:58:4244 parser = argparse.ArgumentParser(
Ned Nguyen8e27d0d2018-11-28 00:20:1745 description=_SCRIPT_USAGE, formatter_class=argparse.RawTextHelpFormatter)
Leonard Ge3e1d472b2018-09-10 17:51:5346 subparsers = parser.add_subparsers()
47
Caleb Rouleau2e48cb82019-10-30 00:17:4548 parser_update = subparsers.add_parser(
49 'update',
50 help='Update the shard maps.')
Leonard Ge3e1d472b2018-09-10 17:51:5351 parser_update.add_argument(
Caleb Rouleau2e48cb82019-10-30 00:17:4552 '--use-existing-timing-data', '-o', action='store_true',
Ned Nguyena0159602018-12-04 22:31:1753 help=('Whether to reuse existing builder timing data (stored in '
54 '//tools/perf/core/shard_maps/timing_data/) and skip the step of '
55 'fetching the most recent timing data from test results server. '
56 'This flag is default to False. One typically uses this option '
57 'when they need to fix the timing data to debug sharding '
58 'generation.'),
59 default=False)
Caleb Rouleau2e48cb82019-10-30 00:17:4560 _AddBuilderPlatformSelectionArgs(parser_update)
Ned Nguyen36a634af02018-09-24 07:47:0461 parser.add_argument(
62 '--debug', action='store_true',
Ned Nguyen8e27d0d2018-11-28 00:20:1763 help=('Whether to include detailed debug info of the sharding map in the '
Ned Nguyen36a634af02018-09-24 07:47:0464 'shard maps.'), default=False)
Leonard Ge3e1d472b2018-09-10 17:51:5365 parser_update.set_defaults(func=_UpdateShardsForBuilders)
66
Caleb Rouleau2e48cb82019-10-30 00:17:4567 parser_update_timing = subparsers.add_parser(
68 'update-timing',
69 help='Update the timing data that is used to create the shard maps, '
70 'but don\'t update the shard maps themselves.')
71 _AddBuilderPlatformSelectionArgs(parser_update_timing)
Caleb Rouleau360b04d2019-12-18 18:41:2072 parser_update_timing.add_argument(
73 '--filter-only', action='store_true',
74 help='Do not grab new data from bigquery but instead simply filter '
75 'the existing data to reflect some change in the benchmark (for example '
76 'if the benchmark was switched to abridged mode on some platform or if '
77 'a story was removed from the benchmark.)')
Caleb Rouleau2e48cb82019-10-30 00:17:4578 parser_update_timing.set_defaults(func=_UpdateTimingDataCommand)
79
Caleb Rouleau43e632b42019-04-26 23:26:1380 parser_deschedule = subparsers.add_parser(
81 'deschedule',
82 help=('After you deschedule one or more '
83 'benchmarks by deleting from tools/perf/benchmarks or by editing '
84 'bot_platforms.py, use this script to deschedule the '
Caleb Rouleau97996622019-12-10 01:22:5285 'benchmark(s) without impacting the sharding for other benchmarks.'
86 ))
Caleb Rouleau43e632b42019-04-26 23:26:1387 parser_deschedule.set_defaults(func=_DescheduleBenchmark)
Caleb Rouleau0d3593c72019-05-17 00:47:2488
89 parser_validate = subparsers.add_parser(
90 'validate',
91 help=('Validate that the shard maps match up with the benchmarks and '
92 'bot_platforms.py.'))
93 parser_validate.set_defaults(func=_ValidateShardMaps)
94
nednguyendf8d7a02018-08-27 20:58:4295 return parser
96
97
Caleb Rouleau2e48cb82019-10-30 00:17:4598def _AddBuilderPlatformSelectionArgs(parser):
99 builder_selection = parser.add_mutually_exclusive_group()
100 builder_selection.add_argument(
101 '--builders', '-b', action='append',
102 help=('The builder names to use.'), default=[],
103 choices=bot_platforms.ALL_PLATFORM_NAMES)
104 builder_selection.add_argument(
105 '--waterfall', '-w', choices=['perf', 'perf-fyi', 'all'], default=None,
106 help=('The name of waterfall whose builders should be used. If not '
107 'specified, use all perf builders by default'))
108
109
Caleb Rouleau43e632b42019-04-26 23:26:13110def _DumpJson(data, output_path):
111 with open(output_path, 'w') as output_file:
112 json.dump(data, output_file, indent=4, separators=(',', ': '))
113
114
nednguyenc2ceac42018-08-31 11:12:49115def _LoadTimingData(args):
Caleb Rouleaubf96ab02019-12-28 03:59:55116 builder, timing_file_path = args
Caleb Rouleau360b04d2019-12-18 18:41:20117 data = retrieve_story_timing.FetchAverageStoryTimingData(
Caleb Rouleaubf96ab02019-12-28 03:59:55118 configurations=[builder.name], num_last_days=5)
Caleb Rouleaubf96ab02019-12-28 03:59:55119 for executable in builder.executables:
120 data.append({unicode('duration'): unicode(
121 float(executable.estimated_runtime)),
122 unicode('name'): unicode(
123 executable.name + '/' + bot_platforms.GTEST_STORY_NAME)})
Caleb Rouleau43e632b42019-04-26 23:26:13124 _DumpJson(data, timing_file_path)
Caleb Rouleaubf96ab02019-12-28 03:59:55125 print('Finished retrieving story timing data for %s' % repr(builder.name))
nednguyenc2ceac42018-08-31 11:12:49126
127
Caleb Rouleau97996622019-12-10 01:22:52128def _source_filepath(posix_path):
129 return os.path.join(path_util.GetChromiumSrcDir(), *posix_path.split('/'))
130
131
Ned Nguyen36a634af02018-09-24 07:47:04132def _GenerateShardMap(
Caleb Rouleau134fd732019-11-26 18:39:35133 builder, num_of_shards, output_path, debug):
Leonard Ge3e1d472b2018-09-10 17:51:53134 timing_data = []
135 if builder:
136 with open(builder.timing_file_path) as f:
137 timing_data = json.load(f)
Caleb Rouleaubf96ab02019-12-28 03:59:55138 benchmarks_to_shard = (
139 list(builder.benchmark_configs) + list(builder.executables))
Leonard Ge3e1d472b2018-09-10 17:51:53140 sharding_map = sharding_map_generator.generate_sharding_map(
Caleb Rouleaubf96ab02019-12-28 03:59:55141 benchmarks_to_shard, timing_data,
142 num_shards=num_of_shards,
Ned Nguyen36a634af02018-09-24 07:47:04143 debug=debug)
Caleb Rouleau43e632b42019-04-26 23:26:13144 _DumpJson(sharding_map, output_path)
nednguyenc2ceac42018-08-31 11:12:49145
nednguyenc2ceac42018-08-31 11:12:49146
Ned Nguyen8e27d0d2018-11-28 00:20:17147def _PromptWarning():
Caleb Rouleau2e48cb82019-10-30 00:17:45148 message = ('This will regenerate the sharding maps for perf benchmarks. '
Ned Nguyen8e27d0d2018-11-28 00:20:17149 'Note that this will shuffle all the benchmarks on the shards, '
150 'which can cause false regressions. In general this operation '
151 'should only be done when the shards are too unbalanced or when '
152 'benchmarks are added/removed. '
Caleb Rouleau2e48cb82019-10-30 00:17:45153 'In addition, this is a tricky operation and should '
154 'always be reviewed by Benchmarking '
155 'team members. Upon landing the CL to update the shard maps, '
Ned Nguyen8e27d0d2018-11-28 00:20:17156 'please notify Chromium perf sheriffs in '
157 '[email protected] and put a warning about expected '
158 'false regressions in your CL '
159 'description')
Raul Tambre0b068f92019-09-25 14:36:38160 print(textwrap.fill(message, 70), '\n')
Ned Nguyen8e27d0d2018-11-28 00:20:17161 answer = raw_input("Enter 'y' to continue: ")
162 if answer != 'y':
Raul Tambre0b068f92019-09-25 14:36:38163 print('Abort updating shard maps for benchmarks on perf waterfall')
Ned Nguyen8e27d0d2018-11-28 00:20:17164 sys.exit(0)
165
166
Caleb Rouleau2e48cb82019-10-30 00:17:45167def _UpdateTimingDataCommand(args):
Caleb Rouleau360b04d2019-12-18 18:41:20168 builders = _GetBuilderPlatforms(args.builders, args.waterfall)
169 if not args.filter_only:
170 _UpdateTimingData(builders)
171 for builder in builders:
172 _FilterTimingData(builder)
Caleb Rouleau360b04d2019-12-18 18:41:20173
174
175def _FilterTimingData(builder, output_path=None):
176 output_path = output_path or builder.timing_file_path
177 with open(builder.timing_file_path) as f:
178 timing_dataset = json.load(f)
179 story_full_names = set()
180 for benchmark_config in builder.benchmark_configs:
181 for story in benchmark_config.stories:
182 story_full_names.add('/'.join([benchmark_config.name, story]))
183 # When benchmarks are abridged or stories are removed, we want that
184 # to be reflected in the timing data right away.
Caleb Rouleaubf96ab02019-12-28 03:59:55185 executable_story_names = [e.name + '/' + bot_platforms.GTEST_STORY_NAME
186 for e in builder.executables]
Caleb Rouleau360b04d2019-12-18 18:41:20187 timing_dataset = [point for point in timing_dataset
Caleb Rouleaubf96ab02019-12-28 03:59:55188 if (str(point['name']) in story_full_names or
189 str(point['name']) in executable_story_names)]
Caleb Rouleau360b04d2019-12-18 18:41:20190 _DumpJson(timing_dataset, output_path)
Caleb Rouleau2e48cb82019-10-30 00:17:45191
192
193def _UpdateTimingData(builders):
194 print('Updating shards timing data. May take a while...')
195 load_timing_args = []
196 for b in builders:
Caleb Rouleaubf96ab02019-12-28 03:59:55197 load_timing_args.append((b, b.timing_file_path))
Caleb Rouleau2e48cb82019-10-30 00:17:45198 p = multiprocessing.Pool(len(load_timing_args))
Caleb Rouleaua7e83e02019-11-19 01:34:24199 # Use map_async to work around python bug. See crbug.com/1026004.
200 p.map_async(_LoadTimingData, load_timing_args).get(12*60*60)
Caleb Rouleau2e48cb82019-10-30 00:17:45201
202
Caleb Rouleau97996622019-12-10 01:22:52203def _GetBuilderPlatforms(builders, waterfall):
Caleb Rouleau2e48cb82019-10-30 00:17:45204 """Get a list of PerfBuilder objects for the given builders or waterfall.
205
206 Otherwise, just return all platforms.
207 """
Caleb Rouleau97996622019-12-10 01:22:52208 if builders:
Caleb Rouleau2e48cb82019-10-30 00:17:45209 return {b for b in bot_platforms.ALL_PLATFORMS if b.name in
Caleb Rouleau97996622019-12-10 01:22:52210 builders}
211 elif waterfall == 'perf':
212 return bot_platforms.OFFICIAL_PLATFORMS
213 elif waterfall == 'perf-fyi':
214 return bot_platforms.FYI_PLATFORMS
Ned Nguyen0d2d04f2018-11-13 14:26:31215 else:
Caleb Rouleau2e48cb82019-10-30 00:17:45216 return bot_platforms.ALL_PLATFORMS
Ned Nguyen0d2d04f2018-11-13 14:26:31217
nednguyenc2ceac42018-08-31 11:12:49218
Caleb Rouleau2e48cb82019-10-30 00:17:45219def _UpdateShardsForBuilders(args):
220 _PromptWarning()
Caleb Rouleau97996622019-12-10 01:22:52221 builders = _GetBuilderPlatforms(args.builders, args.waterfall)
Caleb Rouleau2e48cb82019-10-30 00:17:45222 if not args.use_existing_timing_data:
223 _UpdateTimingData(builders)
nednguyenc2ceac42018-08-31 11:12:49224 for b in builders:
Ned Nguyen36a634af02018-09-24 07:47:04225 _GenerateShardMap(
Caleb Rouleau134fd732019-11-26 18:39:35226 b, b.num_shards, b.shards_map_file_path, args.debug)
Raul Tambre0b068f92019-09-25 14:36:38227 print('Updated sharding map for %s' % repr(b.name))
nednguyenc2ceac42018-08-31 11:12:49228
229
Caleb Rouleau43e632b42019-04-26 23:26:13230def _DescheduleBenchmark(args):
231 """Remove benchmarks from the shard maps without re-sharding."""
Caleb Rouleau0d3593c72019-05-17 00:47:24232 del args
Caleb Rouleau43e632b42019-04-26 23:26:13233 builders = bot_platforms.ALL_PLATFORMS
234 for b in builders:
235 benchmarks_to_keep = set(
236 benchmark.Name() for benchmark in b.benchmarks_to_run)
Caleb Rouleau6167430d2020-02-15 00:56:23237 executables_to_keep = set(executable.name for executable in b.executables)
Caleb Rouleau43e632b42019-04-26 23:26:13238 with open(b.shards_map_file_path, 'r') as f:
239 if not os.path.exists(b.shards_map_file_path):
240 continue
241 shards_map = json.load(f, object_pairs_hook=collections.OrderedDict)
242 for shard, shard_map in shards_map.items():
243 if shard == 'extra_infos':
244 break
Caleb Rouleau9bd426b02020-02-05 19:05:22245 benchmarks = shard_map.get('benchmarks', dict())
Caleb Rouleau43e632b42019-04-26 23:26:13246 for benchmark in benchmarks.keys():
247 if benchmark not in benchmarks_to_keep:
248 del benchmarks[benchmark]
Caleb Rouleau6167430d2020-02-15 00:56:23249 executables = shard_map.get('executables', dict())
250 for executable in executables.keys():
251 if executable not in executables_to_keep:
252 del executables[executable]
Caleb Rouleau43e632b42019-04-26 23:26:13253 os.remove(b.shards_map_file_path)
254 _DumpJson(shards_map, b.shards_map_file_path)
Raul Tambre0b068f92019-09-25 14:36:38255 print('done.')
Caleb Rouleau43e632b42019-04-26 23:26:13256
257
Caleb Rouleau0d3593c72019-05-17 00:47:24258def _ParseBenchmarks(shard_map_path):
259 if not os.path.exists(shard_map_path):
260 raise RuntimeError(
261 'Platform does not have a shard map at %s.' % shard_map_path)
262 all_benchmarks = set()
263 with open(shard_map_path) as f:
264 shard_map = json.load(f)
265 for shard, benchmarks_in_shard in shard_map.iteritems():
266 if "extra_infos" in shard:
267 continue
Caleb Rouleaud1c32eb2020-01-03 22:37:04268 if benchmarks_in_shard.get('benchmarks'):
269 all_benchmarks |= set(benchmarks_in_shard['benchmarks'].keys())
270 if benchmarks_in_shard.get('executables'):
271 all_benchmarks |= set(benchmarks_in_shard['executables'].keys())
Caleb Rouleau0d3593c72019-05-17 00:47:24272 return frozenset(all_benchmarks)
273
274
275def _ValidateShardMaps(args):
Caleb Rouleau360b04d2019-12-18 18:41:20276 """Validate that the shard maps, csv files, etc. are consistent."""
Caleb Rouleau0d3593c72019-05-17 00:47:24277 del args
278 errors = []
279
Caleb Rouleau360b04d2019-12-18 18:41:20280 tempdir = tempfile.mkdtemp()
281 try:
Caleb Rouleau6e881152020-01-17 00:09:13282 builders = _GetBuilderPlatforms(builders=None, waterfall='all')
Caleb Rouleau360b04d2019-12-18 18:41:20283 for builder in builders:
284 output_file = os.path.join(
285 tempdir, os.path.basename(builder.timing_file_path))
286 _FilterTimingData(builder, output_file)
287 if not filecmp.cmp(builder.timing_file_path, output_file):
288 errors.append(
289 '{timing_data} is not up to date. Please run '
290 '`./generate_perf_sharding.py update-timing --filter-only` '
291 'to regenerate it.'.format(timing_data=builder.timing_file_path))
292 finally:
293 shutil.rmtree(tempdir)
294
Caleb Rouleau0d3593c72019-05-17 00:47:24295 # Check that bot_platforms.py matches the actual shard maps
296 for platform in bot_platforms.ALL_PLATFORMS:
297 platform_benchmark_names = set(
Caleb Rouleaud1c32eb2020-01-03 22:37:04298 b.name for b in platform.benchmark_configs) | set(
299 e.name for e in platform.executables)
Caleb Rouleau0d3593c72019-05-17 00:47:24300 shard_map_benchmark_names = _ParseBenchmarks(platform.shards_map_file_path)
301 for benchmark in platform_benchmark_names - shard_map_benchmark_names:
302 errors.append(
303 'Benchmark {benchmark} is supposed to be scheduled on platform '
304 '{platform} according to '
305 'bot_platforms.py, but it is not yet scheduled. If this is a new '
306 'benchmark, please rename it to UNSCHEDULED_{benchmark}, and then '
307 'contact '
308 'Telemetry and Chrome Client Infra team to schedule the benchmark. '
309 'You can email chrome-benchmarking-request@ to get started.'.format(
310 benchmark=benchmark, platform=platform.name))
311 for benchmark in shard_map_benchmark_names - platform_benchmark_names:
312 errors.append(
313 'Benchmark {benchmark} is scheduled on shard map {path}, but '
314 'bot_platforms.py '
315 'says that it should not be on that shard map. This could be because '
316 'the benchmark was deleted. If that is the case, you can use '
317 '`generate_perf_sharding deschedule` to deschedule the benchmark '
318 'from the shard map.'.format(
319 benchmark=benchmark, path=platform.shards_map_file_path))
320
321 # Check that every official benchmark is scheduled on some shard map.
322 # TODO(crbug.com/963614): Note that this check can be deleted if we
323 # find some way other than naming the benchmark with prefix "UNSCHEDULED_"
324 # to make it clear that a benchmark is not running.
325 scheduled_benchmarks = set()
326 for platform in bot_platforms.ALL_PLATFORMS:
327 scheduled_benchmarks = scheduled_benchmarks | _ParseBenchmarks(
328 platform.shards_map_file_path)
329 for benchmark in (
330 bot_platforms.OFFICIAL_BENCHMARK_NAMES - scheduled_benchmarks):
Caleb Rouleau97996622019-12-10 01:22:52331 errors.append(
332 'Benchmark {benchmark} is an official benchmark, but it is not '
333 'scheduled to run anywhere. please rename it to '
334 'UNSCHEDULED_{benchmark}'.format(benchmark=benchmark))
Caleb Rouleau0d3593c72019-05-17 00:47:24335
336 for error in errors:
Raul Tambre0b068f92019-09-25 14:36:38337 print('*', textwrap.fill(error, 70), '\n', file=sys.stderr)
Caleb Rouleau0d3593c72019-05-17 00:47:24338 if errors:
339 return 1
340 return 0
341
342
nednguyenc2ceac42018-08-31 11:12:49343def main():
Leonard Ge3e1d472b2018-09-10 17:51:53344 parser = GetParser()
nednguyenc2ceac42018-08-31 11:12:49345 options = parser.parse_args()
Caleb Rouleau0d3593c72019-05-17 00:47:24346 return options.func(options)
nednguyenc2ceac42018-08-31 11:12:49347
nednguyenc2ceac42018-08-31 11:12:49348if __name__ == '__main__':
349 sys.exit(main())