smut | 64cb942 | 2016-05-07 00:24:20 | [diff] [blame] | 1 | # Copyright (c) 2016 The Chromium Authors. All rights reserved. |
| 2 | # Use of this source code is governed by a BSD-style license that can be |
| 3 | # found in the LICENSE file. |
| 4 | |
| 5 | import collections |
| 6 | import copy |
| 7 | import re |
| 8 | |
| 9 | |
| 10 | # These labels should match the ones output by gtest's JSON. |
| 11 | TEST_UNKNOWN_LABEL = 'UNKNOWN' |
| 12 | TEST_SUCCESS_LABEL = 'SUCCESS' |
| 13 | TEST_FAILURE_LABEL = 'FAILURE' |
| 14 | TEST_TIMEOUT_LABEL = 'TIMEOUT' |
| 15 | TEST_WARNING_LABEL = 'WARNING' |
| 16 | |
| 17 | |
| 18 | class GTestResult(object): |
| 19 | """A result of gtest. |
| 20 | |
| 21 | Properties: |
| 22 | command: The command argv. |
| 23 | crashed: Whether or not the test crashed. |
| 24 | crashed_test: The name of the test during which execution crashed, or |
| 25 | None if a particular test didn't crash. |
| 26 | failed_tests: A dict mapping the names of failed tests to a list of |
| 27 | lines of output from those tests. |
| 28 | flaked_tests: A dict mapping the names of failed flaky tests to a list |
| 29 | of lines of output from those tests. |
| 30 | passed_tests: A list of passed tests. |
| 31 | perf_links: A dict mapping the names of perf data points collected |
| 32 | to links to view those graphs. |
| 33 | return_code: The return code of the command. |
| 34 | success: Whether or not this run of the command was considered a |
| 35 | successful GTest execution. |
| 36 | """ |
| 37 | @property |
| 38 | def crashed(self): |
| 39 | return self._crashed |
| 40 | |
| 41 | @property |
| 42 | def crashed_test(self): |
| 43 | return self._crashed_test |
| 44 | |
| 45 | @property |
| 46 | def command(self): |
| 47 | return self._command |
| 48 | |
| 49 | @property |
| 50 | def failed_tests(self): |
| 51 | if self.__finalized: |
| 52 | return copy.deepcopy(self._failed_tests) |
| 53 | return self._failed_tests |
| 54 | |
| 55 | @property |
| 56 | def flaked_tests(self): |
| 57 | if self.__finalized: |
| 58 | return copy.deepcopy(self._flaked_tests) |
| 59 | return self._flaked_tests |
| 60 | |
| 61 | @property |
| 62 | def passed_tests(self): |
| 63 | if self.__finalized: |
| 64 | return copy.deepcopy(self._passed_tests) |
| 65 | return self._passed_tests |
| 66 | |
| 67 | @property |
| 68 | def perf_links(self): |
| 69 | if self.__finalized: |
| 70 | return copy.deepcopy(self._perf_links) |
| 71 | return self._perf_links |
| 72 | |
| 73 | @property |
| 74 | def return_code(self): |
| 75 | return self._return_code |
| 76 | |
| 77 | @property |
| 78 | def success(self): |
| 79 | return self._success |
| 80 | |
| 81 | def __init__(self, command): |
| 82 | if not isinstance(command, collections.Iterable): |
| 83 | raise ValueError('Expected an iterable of command arguments.', command) |
| 84 | |
| 85 | if not command: |
| 86 | raise ValueError('Expected a non-empty command.', command) |
| 87 | |
| 88 | self._command = tuple(command) |
| 89 | self._crashed = False |
| 90 | self._crashed_test = None |
| 91 | self._failed_tests = collections.OrderedDict() |
| 92 | self._flaked_tests = collections.OrderedDict() |
| 93 | self._passed_tests = [] |
| 94 | self._perf_links = collections.OrderedDict() |
| 95 | self._return_code = None |
| 96 | self._success = None |
| 97 | self.__finalized = False |
| 98 | |
| 99 | def finalize(self, return_code, success): |
| 100 | self._return_code = return_code |
| 101 | self._success = success |
| 102 | |
| 103 | # If the test was not considered to be a GTest success, but had no |
| 104 | # failing tests, conclude that it must have crashed. |
| 105 | if not self._success and not self._failed_tests and not self._flaked_tests: |
| 106 | self._crashed = True |
| 107 | |
| 108 | # At most one test can crash the entire app in a given parsing. |
| 109 | for test, log_lines in self._failed_tests.iteritems(): |
| 110 | # A test with no output would have crashed. No output is replaced |
| 111 | # by the GTestLogParser by a sentence indicating non-completion. |
| 112 | if 'Did not complete.' in log_lines: |
| 113 | self._crashed = True |
| 114 | self._crashed_test = test |
| 115 | |
| 116 | # A test marked as flaky may also have crashed the app. |
| 117 | for test, log_lines in self._flaked_tests.iteritems(): |
| 118 | if 'Did not complete.' in log_lines: |
| 119 | self._crashed = True |
| 120 | self._crashed_test = test |
| 121 | |
| 122 | self.__finalized = True |
| 123 | |
| 124 | |
| 125 | class GTestLogParser(object): |
| 126 | """This helper class process GTest test output.""" |
| 127 | |
| 128 | def __init__(self): |
| 129 | # State tracking for log parsing |
| 130 | self.completed = False |
| 131 | self._current_test = '' |
| 132 | self._failure_description = [] |
| 133 | self._parsing_failures = False |
| 134 | |
| 135 | # Line number currently being processed. |
| 136 | self._line_number = 0 |
| 137 | |
| 138 | # List of parsing errors, as human-readable strings. |
| 139 | self._internal_error_lines = [] |
| 140 | |
| 141 | # Tests are stored here as 'test.name': (status, [description]). |
| 142 | # The status should be one of ('started', 'OK', 'failed', 'timeout', |
| 143 | # 'warning'). Warning indicates that a test did not pass when run in |
| 144 | # parallel with other tests but passed when run alone. The description is |
| 145 | # a list of lines detailing the test's error, as reported in the log. |
| 146 | self._test_status = {} |
| 147 | |
| 148 | # This may be either text or a number. It will be used in the phrase |
| 149 | # '%s disabled' or '%s flaky' on the waterfall display. |
| 150 | self._disabled_tests = 0 |
| 151 | self._flaky_tests = 0 |
| 152 | |
| 153 | # Regular expressions for parsing GTest logs. Test names look like |
| 154 | # "x.y", with 0 or more "w/" prefixes and 0 or more "/z" suffixes. |
| 155 | # e.g.: |
| 156 | # SomeName/SomeTestCase.SomeTest/1 |
| 157 | # SomeName/SomeTestCase/1.SomeTest |
| 158 | # SomeName/SomeTestCase/1.SomeTest/SomeModifider |
| 159 | test_name_regexp = r'((\w+/)*\w+\.\w+(/\w+)*)' |
| 160 | |
| 161 | self._master_name_re = re.compile(r'\[Running for master: "([^"]*)"') |
| 162 | self.master_name = '' |
| 163 | |
| 164 | self._test_name = re.compile(test_name_regexp) |
| 165 | self._test_start = re.compile(r'\[\s+RUN\s+\] ' + test_name_regexp) |
| 166 | self._test_ok = re.compile(r'\[\s+OK\s+\] ' + test_name_regexp) |
| 167 | self._test_fail = re.compile(r'\[\s+FAILED\s+\] ' + test_name_regexp) |
| 168 | self._test_passed = re.compile(r'\[\s+PASSED\s+\] \d+ tests?.') |
| 169 | self._run_test_cases_line = re.compile( |
| 170 | r'\[\s*\d+\/\d+\]\s+[0-9\.]+s ' + test_name_regexp + ' .+') |
| 171 | self._test_timeout = re.compile( |
| 172 | r'Test timeout \([0-9]+ ms\) exceeded for ' + test_name_regexp) |
| 173 | self._disabled = re.compile(r'\s*YOU HAVE (\d+) DISABLED TEST') |
| 174 | self._flaky = re.compile(r'\s*YOU HAVE (\d+) FLAKY TEST') |
| 175 | |
| 176 | self._retry_message = re.compile('RETRYING FAILED TESTS:') |
| 177 | self.retrying_failed = False |
| 178 | |
| 179 | self.TEST_STATUS_MAP = { |
| 180 | 'OK': TEST_SUCCESS_LABEL, |
| 181 | 'failed': TEST_FAILURE_LABEL, |
| 182 | 'timeout': TEST_TIMEOUT_LABEL, |
| 183 | 'warning': TEST_WARNING_LABEL |
| 184 | } |
| 185 | |
| 186 | def GetCurrentTest(self): |
| 187 | return self._current_test |
| 188 | |
| 189 | def _StatusOfTest(self, test): |
| 190 | """Returns the status code for the given test, or 'not known'.""" |
| 191 | test_status = self._test_status.get(test, ('not known', [])) |
| 192 | return test_status[0] |
| 193 | |
| 194 | def _TestsByStatus(self, status, include_fails, include_flaky): |
| 195 | """Returns list of tests with the given status. |
| 196 | |
| 197 | Args: |
| 198 | include_fails: If False, tests containing 'FAILS_' anywhere in their |
| 199 | names will be excluded from the list. |
| 200 | include_flaky: If False, tests containing 'FLAKY_' anywhere in their |
| 201 | names will be excluded from the list. |
| 202 | """ |
| 203 | test_list = [x[0] for x in self._test_status.items() |
| 204 | if self._StatusOfTest(x[0]) == status] |
| 205 | |
| 206 | if not include_fails: |
| 207 | test_list = [x for x in test_list if x.find('FAILS_') == -1] |
| 208 | if not include_flaky: |
| 209 | test_list = [x for x in test_list if x.find('FLAKY_') == -1] |
| 210 | |
| 211 | return test_list |
| 212 | |
| 213 | def _RecordError(self, line, reason): |
| 214 | """Record a log line that produced a parsing error. |
| 215 | |
| 216 | Args: |
| 217 | line: text of the line at which the error occurred |
| 218 | reason: a string describing the error |
| 219 | """ |
| 220 | self._internal_error_lines.append('%s: %s [%s]' % |
| 221 | (self._line_number, line.strip(), reason)) |
| 222 | |
| 223 | def RunningTests(self): |
| 224 | """Returns list of tests that appear to be currently running.""" |
| 225 | return self._TestsByStatus('started', True, True) |
| 226 | |
| 227 | def ParsingErrors(self): |
| 228 | """Returns a list of lines that have caused parsing errors.""" |
| 229 | return self._internal_error_lines |
| 230 | |
| 231 | def ClearParsingErrors(self): |
| 232 | """Clears the currently stored parsing errors.""" |
| 233 | self._internal_error_lines = ['Cleared.'] |
| 234 | |
| 235 | def PassedTests(self, include_fails=False, include_flaky=False): |
| 236 | """Returns list of tests that passed.""" |
| 237 | return self._TestsByStatus('OK', include_fails, include_flaky) |
| 238 | |
| 239 | def FailedTests(self, include_fails=False, include_flaky=False): |
| 240 | """Returns list of tests that failed, timed out, or didn't finish |
| 241 | (crashed). |
| 242 | |
| 243 | This list will be incorrect until the complete log has been processed, |
| 244 | because it will show currently running tests as having failed. |
| 245 | |
| 246 | Args: |
| 247 | include_fails: If true, all failing tests with FAILS_ in their names will |
| 248 | be included. Otherwise, they will only be included if they crashed or |
| 249 | timed out. |
| 250 | include_flaky: If true, all failing tests with FLAKY_ in their names will |
| 251 | be included. Otherwise, they will only be included if they crashed or |
| 252 | timed out. |
| 253 | |
| 254 | """ |
| 255 | return (self._TestsByStatus('failed', include_fails, include_flaky) + |
| 256 | self._TestsByStatus('timeout', True, True) + |
| 257 | self._TestsByStatus('warning', include_fails, include_flaky) + |
| 258 | self.RunningTests()) |
| 259 | |
| 260 | def TriesForTest(self, test): |
| 261 | """Returns a list containing the state for all tries of the given test. |
| 262 | This parser doesn't support retries so a single result is returned.""" |
| 263 | return [self.TEST_STATUS_MAP.get(self._StatusOfTest(test), |
| 264 | TEST_UNKNOWN_LABEL)] |
| 265 | |
| 266 | def DisabledTests(self): |
| 267 | """Returns the name of the disabled test (if there is only 1) or the number |
| 268 | of disabled tests. |
| 269 | """ |
| 270 | return self._disabled_tests |
| 271 | |
| 272 | def FlakyTests(self): |
| 273 | """Returns the name of the flaky test (if there is only 1) or the number |
| 274 | of flaky tests. |
| 275 | """ |
| 276 | return self._flaky_tests |
| 277 | |
| 278 | def FailureDescription(self, test): |
| 279 | """Returns a list containing the failure description for the given test. |
| 280 | |
| 281 | If the test didn't fail or timeout, returns []. |
| 282 | """ |
| 283 | test_status = self._test_status.get(test, ('', [])) |
| 284 | return ['%s: ' % test] + test_status[1] |
| 285 | |
| 286 | def CompletedWithoutFailure(self): |
| 287 | """Returns True if all tests completed and no tests failed unexpectedly.""" |
| 288 | return self.completed and not self.FailedTests() |
| 289 | |
| 290 | def ProcessLine(self, line): |
| 291 | """This is called once with each line of the test log.""" |
| 292 | |
| 293 | # Track line number for error messages. |
| 294 | self._line_number += 1 |
| 295 | |
| 296 | # Some tests (net_unittests in particular) run subprocesses which can write |
| 297 | # stuff to shared stdout buffer. Sometimes such output appears between new |
| 298 | # line and gtest directives ('[ RUN ]', etc) which breaks the parser. |
| 299 | # Code below tries to detect such cases and recognize a mixed line as two |
| 300 | # separate lines. |
| 301 | |
| 302 | # List of regexps that parses expects to find at the start of a line but |
| 303 | # which can be somewhere in the middle. |
| 304 | gtest_regexps = [ |
| 305 | self._test_start, |
| 306 | self._test_ok, |
| 307 | self._test_fail, |
| 308 | self._test_passed, |
| 309 | ] |
| 310 | |
| 311 | for regexp in gtest_regexps: |
| 312 | match = regexp.search(line) |
| 313 | if match: |
| 314 | break |
| 315 | |
| 316 | if not match or match.start() == 0: |
| 317 | self._ProcessLine(line) |
| 318 | else: |
| 319 | self._ProcessLine(line[:match.start()]) |
| 320 | self._ProcessLine(line[match.start():]) |
| 321 | |
| 322 | def _ProcessLine(self, line): |
| 323 | """Parses the line and changes the state of parsed tests accordingly. |
| 324 | |
| 325 | Will recognize newly started tests, OK or FAILED statuses, timeouts, etc. |
| 326 | """ |
| 327 | |
| 328 | # Note: When sharding, the number of disabled and flaky tests will be read |
| 329 | # multiple times, so this will only show the most recent values (but they |
| 330 | # should all be the same anyway). |
| 331 | |
| 332 | # Is it a line listing the master name? |
| 333 | if not self.master_name: |
| 334 | results = self._master_name_re.match(line) |
| 335 | if results: |
| 336 | self.master_name = results.group(1) |
| 337 | |
| 338 | results = self._run_test_cases_line.match(line) |
| 339 | if results: |
| 340 | # A run_test_cases.py output. |
| 341 | if self._current_test: |
| 342 | if self._test_status[self._current_test][0] == 'started': |
| 343 | self._test_status[self._current_test] = ( |
| 344 | 'timeout', self._failure_description) |
| 345 | self._current_test = '' |
| 346 | self._failure_description = [] |
| 347 | return |
| 348 | |
| 349 | # Is it a line declaring all tests passed? |
| 350 | results = self._test_passed.match(line) |
| 351 | if results: |
| 352 | self.completed = True |
| 353 | self._current_test = '' |
| 354 | return |
| 355 | |
| 356 | # Is it a line reporting disabled tests? |
| 357 | results = self._disabled.match(line) |
| 358 | if results: |
| 359 | try: |
| 360 | disabled = int(results.group(1)) |
| 361 | except ValueError: |
| 362 | disabled = 0 |
| 363 | if disabled > 0 and isinstance(self._disabled_tests, int): |
| 364 | self._disabled_tests = disabled |
| 365 | else: |
| 366 | # If we can't parse the line, at least give a heads-up. This is a |
| 367 | # safety net for a case that shouldn't happen but isn't a fatal error. |
| 368 | self._disabled_tests = 'some' |
| 369 | return |
| 370 | |
| 371 | # Is it a line reporting flaky tests? |
| 372 | results = self._flaky.match(line) |
| 373 | if results: |
| 374 | try: |
| 375 | flaky = int(results.group(1)) |
| 376 | except ValueError: |
| 377 | flaky = 0 |
| 378 | if flaky > 0 and isinstance(self._flaky_tests, int): |
| 379 | self._flaky_tests = flaky |
| 380 | else: |
| 381 | # If we can't parse the line, at least give a heads-up. This is a |
| 382 | # safety net for a case that shouldn't happen but isn't a fatal error. |
| 383 | self._flaky_tests = 'some' |
| 384 | return |
| 385 | |
| 386 | # Is it the start of a test? |
| 387 | results = self._test_start.match(line) |
| 388 | if results: |
| 389 | if self._current_test: |
| 390 | if self._test_status[self._current_test][0] == 'started': |
| 391 | self._test_status[self._current_test] = ( |
| 392 | 'timeout', self._failure_description) |
| 393 | test_name = results.group(1) |
| 394 | self._test_status[test_name] = ('started', ['Did not complete.']) |
| 395 | self._current_test = test_name |
| 396 | if self.retrying_failed: |
| 397 | self._failure_description = self._test_status[test_name][1] |
| 398 | self._failure_description.extend(['', 'RETRY OUTPUT:', '']) |
| 399 | else: |
| 400 | self._failure_description = [] |
| 401 | return |
| 402 | |
| 403 | # Is it a test success line? |
| 404 | results = self._test_ok.match(line) |
| 405 | if results: |
| 406 | test_name = results.group(1) |
| 407 | status = self._StatusOfTest(test_name) |
| 408 | if status != 'started': |
| 409 | self._RecordError(line, 'success while in status %s' % status) |
| 410 | if self.retrying_failed: |
| 411 | self._test_status[test_name] = ('warning', self._failure_description) |
| 412 | else: |
| 413 | self._test_status[test_name] = ('OK', []) |
| 414 | self._failure_description = [] |
| 415 | self._current_test = '' |
| 416 | return |
| 417 | |
| 418 | # Is it a test failure line? |
| 419 | results = self._test_fail.match(line) |
| 420 | if results: |
| 421 | test_name = results.group(1) |
| 422 | status = self._StatusOfTest(test_name) |
| 423 | if status not in ('started', 'failed', 'timeout'): |
| 424 | self._RecordError(line, 'failure while in status %s' % status) |
| 425 | # Don't overwrite the failure description when a failing test is listed a |
| 426 | # second time in the summary, or if it was already recorded as timing |
| 427 | # out. |
| 428 | if status not in ('failed', 'timeout'): |
| 429 | self._test_status[test_name] = ('failed', self._failure_description) |
| 430 | self._failure_description = [] |
| 431 | self._current_test = '' |
| 432 | return |
| 433 | |
| 434 | # Is it a test timeout line? |
| 435 | results = self._test_timeout.search(line) |
| 436 | if results: |
| 437 | test_name = results.group(1) |
| 438 | status = self._StatusOfTest(test_name) |
| 439 | if status not in ('started', 'failed'): |
| 440 | self._RecordError(line, 'timeout while in status %s' % status) |
| 441 | self._test_status[test_name] = ( |
| 442 | 'timeout', self._failure_description + ['Killed (timed out).']) |
| 443 | self._failure_description = [] |
| 444 | self._current_test = '' |
| 445 | return |
| 446 | |
| 447 | # Is it the start of the retry tests? |
| 448 | results = self._retry_message.match(line) |
| 449 | if results: |
| 450 | self.retrying_failed = True |
| 451 | return |
| 452 | |
| 453 | # Random line: if we're in a test, collect it for the failure description. |
| 454 | # Tests may run simultaneously, so this might be off, but it's worth a try. |
| 455 | # This also won't work if a test times out before it begins running. |
| 456 | if self._current_test: |
| 457 | self._failure_description.append(line) |
| 458 | |
| 459 | # Parse the "Failing tests:" list at the end of the output, and add any |
| 460 | # additional failed tests to the list. For example, this includes tests |
| 461 | # that crash after the OK line. |
| 462 | if self._parsing_failures: |
| 463 | results = self._test_name.match(line) |
| 464 | if results: |
| 465 | test_name = results.group(1) |
| 466 | status = self._StatusOfTest(test_name) |
| 467 | if status in ('not known', 'OK'): |
| 468 | self._test_status[test_name] = ( |
| 469 | 'failed', ['Unknown error, see stdio log.']) |
| 470 | else: |
| 471 | self._parsing_failures = False |
| 472 | elif line.startswith('Failing tests:'): |
| 473 | self._parsing_failures = True |