Convert test_running.py to use Python logging, and add more logs. The commit https://chromium-review.googlesource.com/c/chromium/src/+/1448684 didn't seem to solve the issue where sometimes passing tests will time out - EG tests that passed seem to hang before even reaching the "Waiting for test process to terminate." line, and GTest tests that get SIGTERM'd properly terminate but don't seem to return before hard timeout. This changeset migrates to Python logging to allow for timestamping of logging, and adds additional logging. Combined this should help provide a closer picture of what's going on with this issue. Bug: 898549 Change-Id: Ibc5963f820541a23c939eff5562b672d6200c0b1 Reviewed-on: https://chromium-review.googlesource.com/c/1459322 Commit-Queue: ericale <[email protected]> Reviewed-by: Sergey Berezin <[email protected]> Cr-Commit-Position: refs/heads/master@{#630240}

commit: ce66b9fa9f0e1428c0f05f6c7091d4185c681937 [log] [tgz]
author: Eric Aleshire <[email protected]> Fri Feb 08 08:00:31 2019
committer: Commit Bot <[email protected]> Fri Feb 08 08:00:31 2019
tree: 86a422540b8d6900d2b2df6b559b93f76096f552
parent: a68807e1cbb03365f55c9d88eaa8924814974a17 [diff]
diff --git a/ios/build/bots/scripts/run.py b/ios/build/bots/scripts/run.py
index 3954f02..142b9946 100755
--- a/ios/build/bots/scripts/run.py
+++ b/ios/build/bots/scripts/run.py

@@ -19,6 +19,7 @@
 
 import argparse
 import json
+import logging
 import os
 import sys
 import traceback
@@ -28,6 +29,9 @@
 
 
 def main():
+  logging.basicConfig(format='[%(asctime)s:%(levelname)s] %(message)s',
+    level=logging.DEBUG, datefmt='%I:%M:%S')
+
   args, test_args = parse_args()
 
   summary = {}

diff --git a/ios/build/bots/scripts/test_runner.py b/ios/build/bots/scripts/test_runner.py
index 88307b1f..51f3ffa8 100644
--- a/ios/build/bots/scripts/test_runner.py
+++ b/ios/build/bots/scripts/test_runner.py

@@ -8,6 +8,7 @@
 import errno
 import glob
 import json
+import logging
 import os
 import plistlib
 import re
@@ -24,9 +25,9 @@
 import xctest_utils
 
 
+LOGGER = logging.getLogger(__name__)
 DERIVED_DATA = os.path.expanduser('~/Library/Developer/Xcode/DerivedData')
 
-
 class Error(Exception):
   """Base class for errors."""
   pass
@@ -345,8 +346,10 @@
       raise XcodeVersionNotFoundError(xcode_build_version)
 
     xcode_info = get_current_xcode_info()
-    print 'Using Xcode version %s build %s at %s' % (
-      xcode_info['version'], xcode_info['build'], xcode_info['path'])
+    LOGGER.info('Using Xcode version %s build %s at %s',
+                 xcode_info['version'],
+                 xcode_info['build'],
+                 xcode_info['path'])
 
     if not os.path.exists(out_dir):
       os.makedirs(out_dir)
@@ -468,6 +471,7 @@
     Returns:
       The previous SIGTERM handler for the test runner.
     """
+    LOGGER.debug('Setting sigterm handler.')
     return signal.signal(signal.SIGTERM, handler)
 
   def handle_sigterm(self, proc):
@@ -479,7 +483,7 @@
     Args:
       proc: The currently executing test process.
     """
-    print "Sigterm caught during test run. Killing test process."
+    LOGGER.warning('Sigterm caught during test run. Killing test process.')
     proc.kill()
 
   def _run(self, cmd, shards=1):
@@ -507,9 +511,9 @@
       thread_pool = pool.ThreadPool(processes=shards)
       for out, name, ret in thread_pool.imap_unordered(
         self.run_tests, test_shards):
-        print "Simulator %s" % name
+        LOGGER.info('Simulator %s', name)
         for line in out:
-          print line
+          LOGGER.info(line)
           parser.ProcessLine(line)
         returncode = ret if ret else 0
       thread_pool.close()
@@ -532,20 +536,22 @@
           break
         line = line.rstrip()
         parser.ProcessLine(line)
-        print line
+        LOGGER.info(line)
         sys.stdout.flush()
 
-      print "Waiting for test process to terminate."
+      LOGGER.info('Waiting for test process to terminate.')
       proc.wait()
-      print "Test process terminated."
+      LOGGER.info('Test process terminated.')
       self.set_sigterm_handler(old_handler)
       sys.stdout.flush()
+      LOGGER.debug('Stdout flushed after test process.')
 
       returncode = proc.returncode
 
     if self.xctest_path and parser.SystemAlertPresent():
       raise SystemAlertPresentError()
 
+    LOGGER.debug('Processing test results.')
     for test in parser.FailedTests(include_flaky=True):
       # Test cases are named as <test group>.<test case>. If the test case
       # is prefixed with "FLAKY_", it should be reported as flaked not failed.
@@ -556,8 +562,7 @@
 
     result.passed_tests.extend(parser.PassedTests(include_flaky=True))
 
-    print '%s returned %s' % (cmd[0], returncode)
-    print
+    LOGGER.info('%s returned %s\n', cmd[0], returncode)
 
     # iossim can return 5 if it exits noncleanly even if all tests passed.
     # Therefore we cannot rely on process exit code to determine success.
@@ -574,8 +579,7 @@
         # If the app crashed but not during any particular test case, assume
         # it crashed on startup. Try one more time.
         self.shutdown_and_restart()
-        print 'Crashed on startup, retrying...'
-        print
+        LOGGER.warning('Crashed on startup, retrying...\n')
         result = self._run(cmd)
 
       if result.crashed and not result.crashed_test:
@@ -591,8 +595,8 @@
           # If the app crashes during a specific test case, then resume at the
           # next test case. This is achieved by filtering out every test case
           # which has already run.
-          print 'Crashed during %s, resuming...' % result.crashed_test
-          print
+          LOGGER.warning('Crashed during %s, resuming...\n',
+                         result.crashed_test)
           result = self._run(self.get_launch_command(
               test_filter=passed + failed.keys() + flaked.keys(), invert=True,
           ))
@@ -601,20 +605,17 @@
           flaked.update(result.flaked_tests)
       except OSError as e:
         if e.errno == errno.E2BIG:
-          print 'Too many test cases to resume.'
-          print
+          LOGGER.error('Too many test cases to resume.')
         else:
           raise
 
       # Retry failed test cases.
       retry_results = {}
       if self.retries and failed:
-        print '%s tests failed and will be retried.' % len(failed)
-        print
+        LOGGER.warning('%s tests failed and will be retried.\n', len(failed))
         for i in xrange(self.retries):
           for test in failed.keys():
-            print 'Retry #%s for %s.' % (i + 1, test)
-            print
+            LOGGER.info('Retry #%s for %s.\n', i + 1, test)
             retry_result = self._run(self.get_launch_command(
                 test_filter=[test]
             ))
@@ -908,10 +909,10 @@
         runtime_id = runtime['identifier']
 
     name = '%s test' % self.platform
-    print 'creating simulator %s' % name
+    LOGGER.info('creating simulator %s', name)
     udid = subprocess.check_output([
       'xcrun', 'simctl', 'create', name, device_type_id, runtime_id]).rstrip()
-    print udid
+    LOGGER.info(udid)
 
     if self.use_trusted_cert:
       if not os.path.exists(self.wpr_tools_path):
@@ -925,7 +926,7 @@
   def deleteSimulator(self, udid=None):
     """Removes dynamically created simulator devices."""
     if udid:
-      print 'deleting simulator %s' % udid
+      LOGGER.info('deleting simulator %s', udid)
       subprocess.call(['xcrun', 'simctl', 'delete', udid])
 
   def get_launch_command(self, test_filter=None, invert=False, test_shard=None):
@@ -1002,7 +1003,7 @@
         '{}/Library/Developer/CoreSimulator/Devices/*/data/Library'.
         format(os.path.expanduser('~')))
     for trustStore in trustStores:
-      print 'Copying TrustStore to {}'.format(trustStore)
+      LOGGER.info('Copying TrustStore to %s', trustStore)
       if not os.path.exists(trustStore + "/Keychains/"):
         os.makedirs(trustStore + "/Keychains/")
       shutil.copy(cert_path, trustStore + "/Keychains/TrustStore.sqlite3")
@@ -1115,7 +1116,7 @@
         the output from the test
     '''
 
-    print 'Running test for recipe {}'.format(recipe_path)
+    LOGGER.info('Running test for recipe %s', recipe_path)
     self.wprgo_start(replay_path)
 
     # TODO(crbug.com/881096): Consider reusing get_launch_command
@@ -1165,7 +1166,7 @@
         break
       line = line.rstrip()
       parser.ProcessLine(line)
-      print line
+      LOGGER.info(line)
       sys.stdout.flush()
 
     proc.wait()
@@ -1191,8 +1192,8 @@
     # If the matching replay for the recipe doesn't exist, don't run it
     replay_path = '{}/{}'.format(self.replay_path, recipe_name)
     if not os.path.isfile(replay_path):
-      print 'No matching replay file for recipe {}'.format(
-        recipe_path)
+      LOGGER.error('No matching replay file for recipe %s',
+                   recipe_name)
       return False
 
     # if there is no filter, then run tests
@@ -1278,8 +1279,7 @@
             total_returncode = returncode
           if parser.CompletedWithoutFailure() == False:
             completed_without_failure = False
-          print '%s test returned %s' % (recipe_path, returncode)
-          print
+          LOGGER.info('%s test returned %s\n', recipe_path, returncode)
 
       self.deleteSimulator(udid)
 
@@ -1506,7 +1506,7 @@
     # in a few hours unexpectedly, which is assumed as an ios beta issue. Should
     # remove this method once the bug is fixed.
     if self.restart:
-      print 'Restarting device, wait for two minutes.'
+      LOGGER.info('Restarting device, wait for two minutes.')
       try:
         subprocess.check_call(
           ['idevicediagnostics', 'restart', '--udid', self.udid])
@@ -1527,7 +1527,7 @@
       ])
     except subprocess.CalledProcessError:
       # TODO(crbug.com/828951): Raise the exception when the bug is fixed.
-      print 'Warning: Failed to retrieve crash reports from the device.'
+      LOGGER.warning('Failed to retrieve crash reports from device.')
 
   def tear_down(self):
     """Performs cleanup actions which must occur after every test launch."""

diff --git a/ios/build/bots/scripts/test_runner_test.py b/ios/build/bots/scripts/test_runner_test.py
index 1da6292..dce3dc8 100755
--- a/ios/build/bots/scripts/test_runner_test.py
+++ b/ios/build/bots/scripts/test_runner_test.py

@@ -7,6 +7,7 @@
 
 import collections
 import glob
+import logging
 import os
 import subprocess
 import unittest
@@ -605,4 +606,6 @@
 
 
 if __name__ == '__main__':
+  logging.basicConfig(format='[%(asctime)s:%(levelname)s] %(message)s',
+    level=logging.DEBUG, datefmt='%I:%M:%S')
   unittest.main()
commit	ce66b9fa9f0e1428c0f05f6c7091d4185c681937	[log] [tgz]
author	Eric Aleshire <[email protected]>	Fri Feb 08 08:00:31 2019
committer	Commit Bot <[email protected]>	Fri Feb 08 08:00:31 2019
tree	86a422540b8d6900d2b2df6b559b93f76096f552
parent	a68807e1cbb03365f55c9d88eaa8924814974a17 [diff]