Port iOS test runner from chromium/tools/build

BUG=608537

Review-Url: https://codereview.chromium.org/1949123004
Cr-Commit-Position: refs/heads/master@{#392215}
diff --git a/ios/build/bots/scripts/gtest_utils.py b/ios/build/bots/scripts/gtest_utils.py
new file mode 100755
index 0000000..891939d
--- /dev/null
+++ b/ios/build/bots/scripts/gtest_utils.py
@@ -0,0 +1,474 @@
+#!/usr/bin/env python
+# Copyright (c) 2016 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+import collections
+import copy
+import re
+
+
+# These labels should match the ones output by gtest's JSON.
+TEST_UNKNOWN_LABEL = 'UNKNOWN'
+TEST_SUCCESS_LABEL = 'SUCCESS'
+TEST_FAILURE_LABEL = 'FAILURE'
+TEST_TIMEOUT_LABEL = 'TIMEOUT'
+TEST_WARNING_LABEL = 'WARNING'
+
+
+class GTestResult(object):
+  """A result of gtest.
+
+  Properties:
+    command: The command argv.
+    crashed: Whether or not the test crashed.
+    crashed_test: The name of the test during which execution crashed, or
+      None if a particular test didn't crash.
+    failed_tests: A dict mapping the names of failed tests to a list of
+      lines of output from those tests.
+    flaked_tests: A dict mapping the names of failed flaky tests to a list
+      of lines of output from those tests.
+    passed_tests: A list of passed tests.
+    perf_links: A dict mapping the names of perf data points collected
+      to links to view those graphs.
+    return_code: The return code of the command.
+    success: Whether or not this run of the command was considered a
+      successful GTest execution.
+  """
+  @property
+  def crashed(self):
+    return self._crashed
+
+  @property
+  def crashed_test(self):
+    return self._crashed_test
+
+  @property
+  def command(self):
+    return self._command
+
+  @property
+  def failed_tests(self):
+    if self.__finalized:
+      return copy.deepcopy(self._failed_tests)
+    return self._failed_tests
+
+  @property
+  def flaked_tests(self):
+    if self.__finalized:
+      return copy.deepcopy(self._flaked_tests)
+    return self._flaked_tests
+
+  @property
+  def passed_tests(self):
+    if self.__finalized:
+      return copy.deepcopy(self._passed_tests)
+    return self._passed_tests
+
+  @property
+  def perf_links(self):
+    if self.__finalized:
+      return copy.deepcopy(self._perf_links)
+    return self._perf_links
+
+  @property
+  def return_code(self):
+    return self._return_code
+
+  @property
+  def success(self):
+    return self._success
+
+  def __init__(self, command):
+    if not isinstance(command, collections.Iterable):
+      raise ValueError('Expected an iterable of command arguments.', command)
+
+    if not command:
+      raise ValueError('Expected a non-empty command.', command)
+
+    self._command = tuple(command)
+    self._crashed = False
+    self._crashed_test = None
+    self._failed_tests = collections.OrderedDict()
+    self._flaked_tests = collections.OrderedDict()
+    self._passed_tests = []
+    self._perf_links = collections.OrderedDict()
+    self._return_code = None
+    self._success = None
+    self.__finalized = False
+
+  def finalize(self, return_code, success):
+    self._return_code = return_code
+    self._success = success
+
+    # If the test was not considered to be a GTest success, but had no
+    # failing tests, conclude that it must have crashed.
+    if not self._success and not self._failed_tests and not self._flaked_tests:
+      self._crashed = True
+
+    # At most one test can crash the entire app in a given parsing.
+    for test, log_lines in self._failed_tests.iteritems():
+      # A test with no output would have crashed. No output is replaced
+      # by the GTestLogParser by a sentence indicating non-completion.
+      if 'Did not complete.' in log_lines:
+        self._crashed = True
+        self._crashed_test = test
+
+    # A test marked as flaky may also have crashed the app.
+    for test, log_lines in self._flaked_tests.iteritems():
+      if 'Did not complete.' in log_lines:
+        self._crashed = True
+        self._crashed_test = test
+
+    self.__finalized = True
+
+
+class GTestLogParser(object):
+  """This helper class process GTest test output."""
+
+  def __init__(self):
+    # State tracking for log parsing
+    self.completed = False
+    self._current_test = ''
+    self._failure_description = []
+    self._parsing_failures = False
+
+    # Line number currently being processed.
+    self._line_number = 0
+
+    # List of parsing errors, as human-readable strings.
+    self._internal_error_lines = []
+
+    # Tests are stored here as 'test.name': (status, [description]).
+    # The status should be one of ('started', 'OK', 'failed', 'timeout',
+    # 'warning'). Warning indicates that a test did not pass when run in
+    # parallel with other tests but passed when run alone. The description is
+    # a list of lines detailing the test's error, as reported in the log.
+    self._test_status = {}
+
+    # This may be either text or a number. It will be used in the phrase
+    # '%s disabled' or '%s flaky' on the waterfall display.
+    self._disabled_tests = 0
+    self._flaky_tests = 0
+
+    # Regular expressions for parsing GTest logs. Test names look like
+    # "x.y", with 0 or more "w/" prefixes and 0 or more "/z" suffixes.
+    # e.g.:
+    #   SomeName/SomeTestCase.SomeTest/1
+    #   SomeName/SomeTestCase/1.SomeTest
+    #   SomeName/SomeTestCase/1.SomeTest/SomeModifider
+    test_name_regexp = r'((\w+/)*\w+\.\w+(/\w+)*)'
+
+    self._master_name_re = re.compile(r'\[Running for master: "([^"]*)"')
+    self.master_name = ''
+
+    self._test_name = re.compile(test_name_regexp)
+    self._test_start = re.compile(r'\[\s+RUN\s+\] ' + test_name_regexp)
+    self._test_ok = re.compile(r'\[\s+OK\s+\] ' + test_name_regexp)
+    self._test_fail = re.compile(r'\[\s+FAILED\s+\] ' + test_name_regexp)
+    self._test_passed = re.compile(r'\[\s+PASSED\s+\] \d+ tests?.')
+    self._run_test_cases_line = re.compile(
+        r'\[\s*\d+\/\d+\]\s+[0-9\.]+s ' + test_name_regexp + ' .+')
+    self._test_timeout = re.compile(
+        r'Test timeout \([0-9]+ ms\) exceeded for ' + test_name_regexp)
+    self._disabled = re.compile(r'\s*YOU HAVE (\d+) DISABLED TEST')
+    self._flaky = re.compile(r'\s*YOU HAVE (\d+) FLAKY TEST')
+
+    self._retry_message = re.compile('RETRYING FAILED TESTS:')
+    self.retrying_failed = False
+
+    self.TEST_STATUS_MAP = {
+      'OK': TEST_SUCCESS_LABEL,
+      'failed': TEST_FAILURE_LABEL,
+      'timeout': TEST_TIMEOUT_LABEL,
+      'warning': TEST_WARNING_LABEL
+    }
+
+  def GetCurrentTest(self):
+    return self._current_test
+
+  def _StatusOfTest(self, test):
+    """Returns the status code for the given test, or 'not known'."""
+    test_status = self._test_status.get(test, ('not known', []))
+    return test_status[0]
+
+  def _TestsByStatus(self, status, include_fails, include_flaky):
+    """Returns list of tests with the given status.
+
+    Args:
+      include_fails: If False, tests containing 'FAILS_' anywhere in their
+          names will be excluded from the list.
+      include_flaky: If False, tests containing 'FLAKY_' anywhere in their
+          names will be excluded from the list.
+    """
+    test_list = [x[0] for x in self._test_status.items()
+                 if self._StatusOfTest(x[0]) == status]
+
+    if not include_fails:
+      test_list = [x for x in test_list if x.find('FAILS_') == -1]
+    if not include_flaky:
+      test_list = [x for x in test_list if x.find('FLAKY_') == -1]
+
+    return test_list
+
+  def _RecordError(self, line, reason):
+    """Record a log line that produced a parsing error.
+
+    Args:
+      line: text of the line at which the error occurred
+      reason: a string describing the error
+    """
+    self._internal_error_lines.append('%s: %s [%s]' %
+                                      (self._line_number, line.strip(), reason))
+
+  def RunningTests(self):
+    """Returns list of tests that appear to be currently running."""
+    return self._TestsByStatus('started', True, True)
+
+  def ParsingErrors(self):
+    """Returns a list of lines that have caused parsing errors."""
+    return self._internal_error_lines
+
+  def ClearParsingErrors(self):
+    """Clears the currently stored parsing errors."""
+    self._internal_error_lines = ['Cleared.']
+
+  def PassedTests(self, include_fails=False, include_flaky=False):
+    """Returns list of tests that passed."""
+    return self._TestsByStatus('OK', include_fails, include_flaky)
+
+  def FailedTests(self, include_fails=False, include_flaky=False):
+    """Returns list of tests that failed, timed out, or didn't finish
+    (crashed).
+
+    This list will be incorrect until the complete log has been processed,
+    because it will show currently running tests as having failed.
+
+    Args:
+      include_fails: If true, all failing tests with FAILS_ in their names will
+          be included. Otherwise, they will only be included if they crashed or
+          timed out.
+      include_flaky: If true, all failing tests with FLAKY_ in their names will
+          be included. Otherwise, they will only be included if they crashed or
+          timed out.
+
+    """
+    return (self._TestsByStatus('failed', include_fails, include_flaky) +
+            self._TestsByStatus('timeout', True, True) +
+            self._TestsByStatus('warning', include_fails, include_flaky) +
+            self.RunningTests())
+
+  def TriesForTest(self, test):
+    """Returns a list containing the state for all tries of the given test.
+    This parser doesn't support retries so a single result is returned."""
+    return [self.TEST_STATUS_MAP.get(self._StatusOfTest(test),
+                                    TEST_UNKNOWN_LABEL)]
+
+  def DisabledTests(self):
+    """Returns the name of the disabled test (if there is only 1) or the number
+    of disabled tests.
+    """
+    return self._disabled_tests
+
+  def FlakyTests(self):
+    """Returns the name of the flaky test (if there is only 1) or the number
+    of flaky tests.
+    """
+    return self._flaky_tests
+
+  def FailureDescription(self, test):
+    """Returns a list containing the failure description for the given test.
+
+    If the test didn't fail or timeout, returns [].
+    """
+    test_status = self._test_status.get(test, ('', []))
+    return ['%s: ' % test] + test_status[1]
+
+  def CompletedWithoutFailure(self):
+    """Returns True if all tests completed and no tests failed unexpectedly."""
+    return self.completed and not self.FailedTests()
+
+  def ProcessLine(self, line):
+    """This is called once with each line of the test log."""
+
+    # Track line number for error messages.
+    self._line_number += 1
+
+    # Some tests (net_unittests in particular) run subprocesses which can write
+    # stuff to shared stdout buffer. Sometimes such output appears between new
+    # line and gtest directives ('[  RUN  ]', etc) which breaks the parser.
+    # Code below tries to detect such cases and recognize a mixed line as two
+    # separate lines.
+
+    # List of regexps that parses expects to find at the start of a line but
+    # which can be somewhere in the middle.
+    gtest_regexps = [
+      self._test_start,
+      self._test_ok,
+      self._test_fail,
+      self._test_passed,
+    ]
+
+    for regexp in gtest_regexps:
+      match = regexp.search(line)
+      if match:
+        break
+
+    if not match or match.start() == 0:
+      self._ProcessLine(line)
+    else:
+      self._ProcessLine(line[:match.start()])
+      self._ProcessLine(line[match.start():])
+
+  def _ProcessLine(self, line):
+    """Parses the line and changes the state of parsed tests accordingly.
+
+    Will recognize newly started tests, OK or FAILED statuses, timeouts, etc.
+    """
+
+    # Note: When sharding, the number of disabled and flaky tests will be read
+    # multiple times, so this will only show the most recent values (but they
+    # should all be the same anyway).
+
+    # Is it a line listing the master name?
+    if not self.master_name:
+      results = self._master_name_re.match(line)
+      if results:
+        self.master_name = results.group(1)
+
+    results = self._run_test_cases_line.match(line)
+    if results:
+      # A run_test_cases.py output.
+      if self._current_test:
+        if self._test_status[self._current_test][0] == 'started':
+          self._test_status[self._current_test] = (
+              'timeout', self._failure_description)
+      self._current_test = ''
+      self._failure_description = []
+      return
+
+    # Is it a line declaring all tests passed?
+    results = self._test_passed.match(line)
+    if results:
+      self.completed = True
+      self._current_test = ''
+      return
+
+    # Is it a line reporting disabled tests?
+    results = self._disabled.match(line)
+    if results:
+      try:
+        disabled = int(results.group(1))
+      except ValueError:
+        disabled = 0
+      if disabled > 0 and isinstance(self._disabled_tests, int):
+        self._disabled_tests = disabled
+      else:
+        # If we can't parse the line, at least give a heads-up. This is a
+        # safety net for a case that shouldn't happen but isn't a fatal error.
+        self._disabled_tests = 'some'
+      return
+
+    # Is it a line reporting flaky tests?
+    results = self._flaky.match(line)
+    if results:
+      try:
+        flaky = int(results.group(1))
+      except ValueError:
+        flaky = 0
+      if flaky > 0 and isinstance(self._flaky_tests, int):
+        self._flaky_tests = flaky
+      else:
+        # If we can't parse the line, at least give a heads-up. This is a
+        # safety net for a case that shouldn't happen but isn't a fatal error.
+        self._flaky_tests = 'some'
+      return
+
+    # Is it the start of a test?
+    results = self._test_start.match(line)
+    if results:
+      if self._current_test:
+        if self._test_status[self._current_test][0] == 'started':
+          self._test_status[self._current_test] = (
+              'timeout', self._failure_description)
+      test_name = results.group(1)
+      self._test_status[test_name] = ('started', ['Did not complete.'])
+      self._current_test = test_name
+      if self.retrying_failed:
+        self._failure_description = self._test_status[test_name][1]
+        self._failure_description.extend(['', 'RETRY OUTPUT:', ''])
+      else:
+        self._failure_description = []
+      return
+
+    # Is it a test success line?
+    results = self._test_ok.match(line)
+    if results:
+      test_name = results.group(1)
+      status = self._StatusOfTest(test_name)
+      if status != 'started':
+        self._RecordError(line, 'success while in status %s' % status)
+      if self.retrying_failed:
+        self._test_status[test_name] = ('warning', self._failure_description)
+      else:
+        self._test_status[test_name] = ('OK', [])
+      self._failure_description = []
+      self._current_test = ''
+      return
+
+    # Is it a test failure line?
+    results = self._test_fail.match(line)
+    if results:
+      test_name = results.group(1)
+      status = self._StatusOfTest(test_name)
+      if status not in ('started', 'failed', 'timeout'):
+        self._RecordError(line, 'failure while in status %s' % status)
+      # Don't overwrite the failure description when a failing test is listed a
+      # second time in the summary, or if it was already recorded as timing
+      # out.
+      if status not in ('failed', 'timeout'):
+        self._test_status[test_name] = ('failed', self._failure_description)
+      self._failure_description = []
+      self._current_test = ''
+      return
+
+    # Is it a test timeout line?
+    results = self._test_timeout.search(line)
+    if results:
+      test_name = results.group(1)
+      status = self._StatusOfTest(test_name)
+      if status not in ('started', 'failed'):
+        self._RecordError(line, 'timeout while in status %s' % status)
+      self._test_status[test_name] = (
+          'timeout', self._failure_description + ['Killed (timed out).'])
+      self._failure_description = []
+      self._current_test = ''
+      return
+
+    # Is it the start of the retry tests?
+    results = self._retry_message.match(line)
+    if results:
+      self.retrying_failed = True
+      return
+
+    # Random line: if we're in a test, collect it for the failure description.
+    # Tests may run simultaneously, so this might be off, but it's worth a try.
+    # This also won't work if a test times out before it begins running.
+    if self._current_test:
+      self._failure_description.append(line)
+
+    # Parse the "Failing tests:" list at the end of the output, and add any
+    # additional failed tests to the list. For example, this includes tests
+    # that crash after the OK line.
+    if self._parsing_failures:
+      results = self._test_name.match(line)
+      if results:
+        test_name = results.group(1)
+        status = self._StatusOfTest(test_name)
+        if status in ('not known', 'OK'):
+          self._test_status[test_name] = (
+              'failed', ['Unknown error, see stdio log.'])
+      else:
+        self._parsing_failures = False
+    elif line.startswith('Failing tests:'):
+      self._parsing_failures = True
diff --git a/ios/build/bots/scripts/run.py b/ios/build/bots/scripts/run.py
new file mode 100755
index 0000000..929f3f81
--- /dev/null
+++ b/ios/build/bots/scripts/run.py
@@ -0,0 +1,110 @@
+#!/usr/bin/python
+# Copyright 2016 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""Run a test.
+
+Sample usage:
+  ./run.py \
+  -a src/xcodebuild/Release-iphoneos/base_unittests.app \
+  -o /tmp/out \
+  -p iPhone 5s \
+  -v 9.3
+
+  Installs base_unittests.app in an iPhone 5s simulator running iOS 9.3,
+  runs it, and captures all test data in /tmp/out.
+"""
+
+import argparse
+import json
+import os
+import sys
+import traceback
+
+import test_runner
+
+
+def main(args, test_args):
+  summary = {}
+  tr = None
+
+  if not os.path.exists(args.out_dir):
+    os.makedirs(args.out_dir)
+
+  try:
+    tr = test_runner.SimulatorTestRunner(
+      args.app,
+      args.iossim,
+      args.platform,
+      args.version,
+      args.xcode_version,
+      args.out_dir,
+      test_args=test_args,
+    )
+
+    return 0 if tr.launch() else 1
+  except test_runner.TestRunnerError as e:
+    sys.stderr.write(traceback.format_exc())
+    summary['step_text'] = '%s%s' % (
+      e.__class__.__name__, ': %s' % e.args[0] if e.args else '')
+
+    # test_runner.Launch returns 0 on success, 1 on failure, so return 2
+    # on exception to distinguish between a test failure, and a failure
+    # to launch the test at all.
+    return 2
+  finally:
+    if tr:
+      summary['logs'] = tr.logs
+
+    with open(os.path.join(args.out_dir, 'summary.json'), 'w') as f:
+      json.dump(summary, f)
+
+
+if __name__ == '__main__':
+  parser = argparse.ArgumentParser()
+
+  parser.add_argument(
+    '-a',
+    '--app',
+    help='Compiled .app to run.',
+    metavar='app',
+    required=True,
+  )
+  parser.add_argument(
+    '-i',
+    '--iossim',
+    help='Compiled iossim to run the app on.',
+    metavar='iossim',
+    required=True,
+  )
+  parser.add_argument(
+    '-o',
+    '--out-dir',
+    help='Directory to store all test data in.',
+    metavar='dir',
+    required=True,
+  )
+  parser.add_argument(
+    '-p',
+    '--platform',
+    help='Platform to simulate.',
+    metavar='sim',
+    required=True,
+  )
+  parser.add_argument(
+    '-v',
+    '--version',
+    help='Version of iOS the simulator should run.',
+    metavar='ver',
+    required=True,
+  )
+  parser.add_argument(
+    '-x',
+    '--xcode-version',
+    help='Version of Xcode to use.',
+    metavar='ver',
+    required=True,
+  )
+
+  sys.exit(main(*parser.parse_known_args()))
diff --git a/ios/build/bots/scripts/test_runner.py b/ios/build/bots/scripts/test_runner.py
new file mode 100644
index 0000000..01c0dc2
--- /dev/null
+++ b/ios/build/bots/scripts/test_runner.py
@@ -0,0 +1,425 @@
+# Copyright 2016 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""Test runners for iOS."""
+
+import argparse
+import collections
+import os
+import shutil
+import subprocess
+import sys
+import tempfile
+import time
+
+import find_xcode
+import gtest_utils
+
+
+class Error(Exception):
+  """Base class for errors."""
+  pass
+
+
+class TestRunnerError(Error):
+  """Base class for TestRunner-related errors."""
+  pass
+
+
+class AppLaunchError(TestRunnerError):
+  """The app failed to launch."""
+  pass
+
+
+class AppNotFoundError(TestRunnerError):
+  """The requested app was not found."""
+  def __init__(self, app_path):
+    super(AppNotFoundError, self).__init__(
+        'App does not exist: %s' % app_path)
+
+
+class SimulatorNotFoundError(TestRunnerError):
+  """The given simulator binary was not found."""
+  def __init__(self, iossim_path):
+    super(SimulatorNotFoundError, self).__init__(
+        'Simulator does not exist: %s' % iossim_path)
+
+
+class XcodeVersionNotFound(TestRunnerError):
+  """The requested version of Xcode was not found."""
+  def __init__(self, xcode_version):
+    super(XcodeVersionNotFoundError, self).__init__(
+        'Xcode version not found: %s', xcode_version)
+
+
+def get_kif_test_filter(tests, invert=False):
+  """Returns the KIF test filter to filter the given test cases.
+
+  Args:
+    tests: List of test cases to filter.
+    invert: Whether to invert the filter or not. Inverted, the filter will match
+      everything except the given test cases.
+
+  Returns:
+    A string which can be supplied to GKIF_SCENARIO_FILTER.
+  """
+  # A pipe-separated list of test cases with the "KIF." prefix omitted.
+  # e.g. NAME:a|b|c matches KIF.a, KIF.b, KIF.c.
+  # e.g. -NAME:a|b|c matches everything except KIF.a, KIF.b, KIF.c.
+  test_filter = '|'.join(test.split('KIF.', 1)[-1] for test in tests)
+  if invert:
+    return '-NAME:%s' % test_filter
+  return 'NAME:%s' % test_filter
+
+
+def get_gtest_filter(tests, invert=False):
+  """Returns the GTest filter to filter the given test cases.
+
+  Args:
+    tests: List of test cases to filter.
+    invert: Whether to invert the filter or not. Inverted, the filter will match
+      everything except the given test cases.
+
+  Returns:
+    A string which can be supplied to --gtest_filter.
+  """
+  # A colon-separated list of tests cases.
+  # e.g. a:b:c matches a, b, c.
+  # e.g. -a:b:c matches everything except a, b, c.
+  test_filter = ':'.join(test for test in tests)
+  if invert:
+    return '-%s' % test_filter
+  return test_filter
+
+
+class TestRunner(object):
+  """Base class containing common functionality."""
+
+  def __init__(self, app_path, xcode_version, out_dir, test_args=None):
+    """Initializes a new instance of this class.
+
+    Args:
+      app_path: Path to the compiled .app to run.
+      xcode_version: Version of Xcode to use when running the test.
+      out_dir: Directory to emit test data into.
+      test_args: List of strings to pass as arguments to the test when
+        launching.
+
+    Raises:
+      AppNotFoundError: If the given app does not exist.
+      XcodeVersionNotFoundError: If the given Xcode version does not exist.
+    """
+    if not os.path.exists(app_path):
+      raise AppNotFoundError(app_path)
+
+    if not find_xcode.find_xcode(xcode_version)['found']:
+      raise XcodeVersionNotFoundError(xcode_version)
+
+    if not os.path.exists(out_dir):
+      os.makedirs(out_dir)
+
+    self.app_name = os.path.splitext(os.path.split(app_path)[-1])[0]
+    self.app_path = app_path
+    self.cfbundleid = subprocess.check_output([
+        '/usr/libexec/PlistBuddy',
+        '-c', 'Print:CFBundleIdentifier',
+        os.path.join(app_path, 'Info.plist'),
+    ]).rstrip()
+    self.logs = collections.OrderedDict()
+    self.out_dir = out_dir
+    self.test_args = test_args or []
+
+  def get_launch_command(self, test_filter=None, invert=False):
+    """Returns the command that can be used to launch the test app.
+
+    Args:
+      test_filter: List of test cases to filter.
+      invert: Whether to invert the filter or not. Inverted, the filter will
+        match everything except the given test cases.
+
+    Returns:
+      A list of strings forming the command to launch the test.
+    """
+    raise NotImplementedError
+
+  def set_up(self):
+    """Performs setup actions which must occur prior to every test launch."""
+    raise NotImplementedError
+
+  def tear_down(self):
+    """Performs cleanup actions which must occur after every test launch."""
+    raise NotImplementedError
+
+  def screenshot_desktop(self):
+    """Saves a screenshot of the desktop in the output directory."""
+    subprocess.check_call([
+        'screencapture',
+        os.path.join(self.out_dir, 'desktop_%s.png' % time.time()),
+    ])
+
+  @staticmethod
+  def _run(cmd):
+    """Runs the specified command, parsing GTest output.
+
+    Args:
+      cmd: List of strings forming the command to run.
+
+    Returns:
+      GTestResult instance.
+    """
+    print ' '.join(cmd)
+    print
+
+    parser = gtest_utils.GTestLogParser()
+    result = gtest_utils.GTestResult(cmd)
+
+    proc = subprocess.Popen(
+        cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
+
+    while True:
+      line = proc.stdout.readline()
+      if not line:
+        break
+      line = line.rstrip()
+      parser.ProcessLine(line)
+      print line
+
+    proc.wait()
+
+    for test in parser.FailedTests(include_flaky=True):
+      # Test cases are named as <test group>.<test case>. If the test case
+      # is prefixed with "FLAKY_", it should be reported as flaked not failed.
+      if '.' in test and test.split('.', 1)[1].startswith('FLAKY_'):
+        result.flaked_tests[test] = parser.FailureDescription(test)
+      else:
+        result.failed_tests[test] = parser.FailureDescription(test)
+
+    result.passed_tests.extend(parser.PassedTests(include_flaky=True))
+
+    print '%s returned %s' % (cmd[0], proc.returncode)
+    print
+
+    # iossim can return 5 if it exits noncleanly even if all tests passed.
+    # Therefore we cannot rely on process exit code to determine success.
+    result.finalize(proc.returncode, parser.CompletedWithoutFailure())
+    return result
+
+  def launch(self):
+    """Launches the test app."""
+    self.set_up()
+    cmd = self.get_launch_command()
+    try:
+      result = self._run(cmd)
+      if result.crashed and not result.crashed_test:
+        # If the app crashed but not during any particular test case, assume
+        # it crashed on startup. Try one more time.
+        print 'Crashed on startup, retrying...'
+        print
+        result = self._run(cmd)
+
+      if result.crashed and not result.crashed_test:
+        raise AppLaunchError
+
+      passed = result.passed_tests
+      failed = result.failed_tests
+      flaked = result.flaked_tests
+
+      try:
+        while result.crashed and result.crashed_test:
+          # If the app crashes during a specific test case, then resume at the
+          # next test case. This is achieved by filtering out every test case
+          # which has already run.
+          print 'Crashed during %s, resuming...' % result.crashed_test
+          print
+          result = self._run(self.get_launch_command(
+              test_filter=passed + failed.keys() + flaked.keys(), invert=True,
+          ))
+          passed.extend(result.passed_tests)
+          failed.update(result.failed_tests)
+          flaked.update(result.flaked_tests)
+      except OSError as e:
+        if e.errno == errno.E2BIG:
+          print 'Too many test cases to resume.'
+          print
+        else:
+          raise
+
+      self.logs['passed tests'] = passed
+      for test, log_lines in failed.iteritems():
+        self.logs[test] = log_lines
+      for test, log_lines in flaked.iteritems():
+        self.logs[test] = log_lines
+
+      return not failed
+    finally:
+      self.tear_down()
+
+
+class SimulatorTestRunner(TestRunner):
+  """Class for running tests on iossim."""
+
+  def __init__(
+      self,
+      app_path,
+      iossim_path,
+      platform,
+      version,
+      xcode_version,
+      out_dir,
+      test_args=None,
+  ):
+    """Initializes a new instance of this class.
+
+    Args:
+      app_path: Path to the compiled .app or .ipa to run.
+      iossim_path: Path to the compiled iossim binary to use.
+      platform: Name of the platform to simulate. Supported values can be found
+        by running "iossim -l". e.g. "iPhone 5s", "iPad Retina".
+      version: Version of iOS the platform should be running. Supported values
+        can be found by running "iossim -l". e.g. "9.3", "8.2", "7.1".
+      xcode_version: Version of Xcode to use when running the test.
+      out_dir: Directory to emit test data into.
+      test_args: List of strings to pass as arguments to the test when
+        launching.
+
+    Raises:
+      AppNotFoundError: If the given app does not exist.
+      XcodeVersionNotFoundError: If the given Xcode version does not exist.
+    """
+    super(SimulatorTestRunner, self).__init__(
+        app_path, xcode_version, out_dir, test_args=test_args)
+
+    if not os.path.exists(iossim_path):
+      raise SimulatorNotFoundError(iossim_path)
+
+    self.homedir = ''
+    self.iossim_path = iossim_path
+    self.platform = platform
+    self.start_time = None
+    self.version = version
+
+  @staticmethod
+  def kill_simulators():
+    """Kills all running simulators."""
+    try:
+      subprocess.check_call([
+          'pkill',
+          '-9',
+          '-x',
+          # The simulator's name varies by Xcode version.
+          'iPhone Simulator', # Xcode 5
+          'iOS Simulator', # Xcode 6
+          'Simulator', # Xcode 7
+      ])
+    except subprocess.CalledProcessError as e:
+      if e.returncode != 1:
+        # Ignore a 1 exit code (which means there were no simulators to kill).
+        raise
+
+  def set_up(self):
+    """Performs setup actions which must occur prior to every test launch."""
+    self.kill_simulators()
+    self.homedir = tempfile.mkdtemp()
+    # Crash reports have a timestamp in their file name, formatted as
+    # YYYY-MM-DD-HHMMSS. Save the current time in the same format so
+    # we can compare and fetch crash reports from this run later on.
+    self.start_time = time.strftime('%Y-%m-%d-%H%M%S', time.localtime())
+
+  def extract_test_data(self):
+    """Extracts data emitted by the test."""
+    # Find the directory named after the unique device ID of the simulator we
+    # started. We expect only one because we use a new homedir each time.
+    udid_dir = os.path.join(
+        self.homedir, 'Library', 'Developer', 'CoreSimulator', 'Devices')
+    if not os.path.exists(udid_dir):
+      return
+    udids = os.listdir(udid_dir)
+    if len(udids) != 1:
+      return
+
+    # Find the Documents directory of the test app. The app directory names
+    # don't correspond with any known information, so we have to examine them
+    # all until we find one with a matching CFBundleIdentifier.
+    apps_dir = os.path.join(
+        udid_dir, udids[0], 'data', 'Containers', 'Data', 'Application')
+    if os.path.exists(apps_dir):
+      for appid_dir in os.listdir(apps_dir):
+        docs_dir = os.path.join(apps_dir, appid_dir, 'Documents')
+        metadata_plist = os.path.join(
+            apps_dir,
+            appid_dir,
+            '.com.apple.mobile_container_manager.metadata.plist',
+        )
+        if os.path.exists(docs_dir) and os.path.exists(metadata_plist):
+          cfbundleid = subprocess.check_output([
+              '/usr/libexec/PlistBuddy',
+              '-c', 'Print:MCMMetadataIdentifier',
+              metadata_plist,
+          ]).rstrip()
+          if cfbundleid == self.cfbundleid:
+            shutil.copytree(docs_dir, os.path.join(self.out_dir, 'Documents'))
+            return
+
+  def retrieve_crash_reports(self):
+    """Retrieves crash reports produced by the test."""
+    # A crash report's naming scheme is [app]_[timestamp]_[hostname].crash.
+    # e.g. net_unittests_2014-05-13-15-0900_vm1-a1.crash.
+    crash_reports_dir = os.path.expanduser(os.path.join(
+        '~', 'Library', 'Logs', 'DiagnosticReports'))
+
+    if not os.path.exists(crash_reports_dir):
+      return
+
+    for crash_report in os.listdir(crash_reports_dir):
+      report_name, ext = os.path.splitext(crash_report)
+      if report_name.startswith(self.app_name) and ext == '.crash':
+        report_time = report_name[len(self.app_name) + 1:].split('_')[0]
+
+        # The timestamp format in a crash report is big-endian and therefore
+        # a staight string comparison works.
+        if report_time > self.start_time:
+          with open(os.path.join(crash_reports_dir, crash_report)) as f:
+            self.logs['crash report (%s)' % report_time] = (
+                f.read().splitlines())
+
+  def tear_down(self):
+    """Performs cleanup actions which must occur after every test launch."""
+    self.extract_test_data()
+    self.retrieve_crash_reports()
+    self.screenshot_desktop()
+    self.kill_simulators()
+    if os.path.exists(self.homedir):
+      shutil.rmtree(self.homedir, ignore_errors=True)
+      self.homedir = ''
+
+  def get_launch_command(self, test_filter=None, invert=False):
+    """Returns the command that can be used to launch the test app.
+
+    Args:
+      test_filter: List of test cases to filter.
+      invert: Whether to invert the filter or not. Inverted, the filter will
+        match everything except the given test cases.
+
+    Returns:
+      A list of strings forming the command to launch the test.
+    """
+    cmd = [
+        self.iossim_path,
+        '-d', self.platform,
+        '-s', self.version,
+        '-t', '120',
+        '-u', self.homedir,
+    ]
+    args = []
+
+    if test_filter:
+      kif_filter = get_kif_test_filter(test_filter, invert=invert)
+      gtest_filter = get_gtest_filter(test_filter, invert=invert)
+      cmd.extend(['-e', 'GKIF_SCENARIO_FILTER=%s' % kif_filter])
+      args.append('--gtest_filter=%s' % gtest_filter)
+
+    cmd.append(self.app_path)
+    cmd.extend(self.test_args)
+    cmd.extend(args)
+    return cmd
diff --git a/ios/build/bots/scripts/test_runner_test.py b/ios/build/bots/scripts/test_runner_test.py
new file mode 100755
index 0000000..0d7c8b1
--- /dev/null
+++ b/ios/build/bots/scripts/test_runner_test.py
@@ -0,0 +1,289 @@
+#!/usr/bin/python
+# Copyright 2016 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""Unittests for test_runner.py."""
+
+import collections
+import json
+import os
+import sys
+import unittest
+
+import test_runner
+
+
+class TestCase(unittest.TestCase):
+  """Test case which supports installing mocks. Uninstalls on tear down."""
+
+  def __init__(self, *args, **kwargs):
+    """Initializes a new instance of this class."""
+    super(TestCase, self).__init__(*args, **kwargs)
+
+    # Maps object to a dict which maps names of mocked members to their
+    # original values.
+    self._mocks = collections.OrderedDict()
+
+  def mock(self, obj, member, mock):
+    """Installs mock in place of the named member of the given obj.
+
+    Args:
+      obj: Any object.
+      member: String naming the attribute of the object to mock.
+      mock: The mock to install.
+    """
+    self._mocks.setdefault(obj, collections.OrderedDict()).setdefault(
+        member, getattr(obj, member))
+    setattr(obj, member, mock)
+
+  def tearDown(self, *args, **kwargs):
+    """Uninstalls mocks."""
+    super(TestCase, self).tearDown(*args, **kwargs)
+
+    for obj in self._mocks:
+      for member, original_value in self._mocks[obj].iteritems():
+        setattr(obj, member, original_value)
+
+
+class GetKIFTestFilterTest(TestCase):
+  """Tests for test_runner.get_kif_test_filter."""
+
+  def test_correct(self):
+    """Ensures correctness of filter."""
+    tests = [
+      'KIF.test1',
+      'KIF.test2',
+    ]
+    expected = 'NAME:test1|test2'
+
+    self.assertEqual(test_runner.get_kif_test_filter(tests), expected)
+
+  def test_correct_inverted(self):
+    """Ensures correctness of inverted filter."""
+    tests = [
+      'KIF.test1',
+      'KIF.test2',
+    ]
+    expected = '-NAME:test1|test2'
+
+    self.assertEqual(
+        test_runner.get_kif_test_filter(tests, invert=True), expected)
+
+
+class GetGTestFilterTest(TestCase):
+  """Tests for test_runner.get_gtest_filter."""
+
+  def test_correct(self):
+    """Ensures correctness of filter."""
+    tests = [
+      'test.1',
+      'test.2',
+    ]
+    expected = 'test.1:test.2'
+
+    self.assertEqual(test_runner.get_gtest_filter(tests), expected)
+
+  def test_correct_inverted(self):
+    """Ensures correctness of inverted filter."""
+    tests = [
+      'test.1',
+      'test.2',
+    ]
+    expected = '-test.1:test.2'
+
+    self.assertEqual(
+        test_runner.get_gtest_filter(tests, invert=True), expected)
+
+
+class SimulatorTestRunnerTest(TestCase):
+  """Tests for test_runner.SimulatorTestRunner."""
+
+  def test_app_not_found(self):
+    """Ensures AppNotFoundError is raised."""
+    def exists(path):
+      if path == 'fake-app':
+        return False
+      return True
+
+    def find_xcode(version):
+      return {'found': True}
+
+    def check_output(command):
+      return 'fake-bundle-id'
+
+    self.mock(test_runner.os.path, 'exists', exists)
+    self.mock(test_runner.find_xcode, 'find_xcode', find_xcode)
+    self.mock(test_runner.subprocess, 'check_output', check_output)
+
+    self.assertRaises(
+        test_runner.AppNotFoundError,
+        test_runner.SimulatorTestRunner,
+        'fake-app',
+        'fake-iossim',
+        'platform',
+        'os',
+        'xcode-version',
+        'out-dir',
+    )
+
+  def test_iossim_not_found(self):
+    """Ensures SimulatorNotFoundError is raised."""
+    def exists(path):
+      if path == 'fake-iossim':
+        return False
+      return True
+
+    def find_xcode(version):
+      return {'found': True}
+
+    def check_output(command):
+      return 'fake-bundle-id'
+
+    self.mock(test_runner.os.path, 'exists', exists)
+    self.mock(test_runner.find_xcode, 'find_xcode', find_xcode)
+    self.mock(test_runner.subprocess, 'check_output', check_output)
+
+    self.assertRaises(
+        test_runner.SimulatorNotFoundError,
+        test_runner.SimulatorTestRunner,
+        'fake-app',
+        'fake-iossim',
+        'platform',
+        'os',
+        'xcode-version',
+        'out-dir',
+    )
+
+  def test_init(self):
+    """Ensures instance is created."""
+    def exists(path):
+      return True
+
+    def find_xcode(version):
+      return {'found': True}
+
+    def check_output(command):
+      return 'fake-bundle-id'
+
+    self.mock(test_runner.os.path, 'exists', exists)
+    self.mock(test_runner.find_xcode, 'find_xcode', find_xcode)
+    self.mock(test_runner.subprocess, 'check_output', check_output)
+
+    tr = test_runner.SimulatorTestRunner(
+        'fake-app',
+        'fake-iossim',
+        'platform',
+        'os',
+        'xcode-version',
+        'out-dir',
+    )
+
+    self.failUnless(tr)
+
+  def test_startup_crash(self):
+    """Ensures test is relaunched once on startup crash."""
+    def exists(path):
+      return True
+
+    def find_xcode(version):
+      return {'found': True}
+
+    def check_output(command):
+      return 'fake-bundle-id'
+
+    def set_up(self):
+      return
+
+    @staticmethod
+    def _run(command):
+      return collections.namedtuple('result', ['crashed', 'crashed_test'])(
+          crashed=True, crashed_test=None)
+
+    def tear_down(self):
+      return
+
+    self.mock(test_runner.os.path, 'exists', exists)
+    self.mock(test_runner.find_xcode, 'find_xcode', find_xcode)
+    self.mock(test_runner.subprocess, 'check_output', check_output)
+    self.mock(test_runner.SimulatorTestRunner, 'set_up', set_up)
+    self.mock(test_runner.TestRunner, '_run', _run)
+    self.mock(test_runner.SimulatorTestRunner, 'tear_down', tear_down)
+
+    tr = test_runner.SimulatorTestRunner(
+        'fake-app',
+        'fake-iossim',
+        'platform',
+        'os',
+        'xcode-version',
+        'out-dir',
+    )
+    self.assertRaises(test_runner.AppLaunchError, tr.launch)
+
+  def test_relaunch(self):
+    """Ensures test is relaunched on test crash until tests complete."""
+    def exists(path):
+      return True
+
+    def find_xcode(version):
+      return {'found': True}
+
+    def check_output(command):
+      return 'fake-bundle-id'
+
+    def set_up(self):
+      return
+
+    @staticmethod
+    def _run(command):
+      result = collections.namedtuple(
+          'result', [
+              'crashed',
+              'crashed_test',
+              'failed_tests',
+              'flaked_tests',
+              'passed_tests',
+          ],
+      )
+      if '-e' not in command:
+        # First run, has no test filter supplied. Mock a crash.
+        return result(
+            crashed=True,
+            crashed_test='c',
+            failed_tests={'b': ['b-out'], 'c': ['Did not complete.']},
+            flaked_tests={'d': ['d-out']},
+            passed_tests=['a'],
+        )
+      else:
+        return result(
+            crashed=False,
+            crashed_test=None,
+            failed_tests={},
+            flaked_tests={},
+            passed_tests=[],
+        )
+
+    def tear_down(self):
+      return
+
+    self.mock(test_runner.os.path, 'exists', exists)
+    self.mock(test_runner.find_xcode, 'find_xcode', find_xcode)
+    self.mock(test_runner.subprocess, 'check_output', check_output)
+    self.mock(test_runner.SimulatorTestRunner, 'set_up', set_up)
+    self.mock(test_runner.TestRunner, '_run', _run)
+    self.mock(test_runner.SimulatorTestRunner, 'tear_down', tear_down)
+
+    tr = test_runner.SimulatorTestRunner(
+        'fake-app',
+        'fake-iossim',
+        'platform',
+        'os',
+        'xcode-version',
+        'out-dir',
+    )
+    tr.launch()
+    self.failUnless(tr.summary['logs'])
+
+
+if __name__ == '__main__':
+  unittest.main()