xixuan | ebdb0a8 | 2017-04-28 18:25:02 | [diff] [blame] | 1 | #!/usr/bin/env python2 |
Paul Hobbs | ef4e070 | 2016-06-28 00:01:42 | [diff] [blame] | 2 | |
| 3 | # Copyright 2016 The Chromium OS Authors. All rights reserved. |
| 4 | # Use of this source code is governed by a BSD-style license that can be |
| 5 | # found in the LICENSE file. |
| 6 | |
| 7 | """Script to upload metrics from apache logs to Monarch. |
| 8 | |
| 9 | We are interested in static file bandwidth, so it parses out GET requests to |
| 10 | /static and uploads the sizes to a cumulative metric. |
| 11 | """ |
| 12 | from __future__ import print_function |
| 13 | |
| 14 | import argparse |
Paul Hobbs | 84acd9d | 2017-09-20 22:10:53 | [diff] [blame] | 15 | from logging import handlers |
Paul Hobbs | ef4e070 | 2016-06-28 00:01:42 | [diff] [blame] | 16 | import re |
| 17 | import sys |
| 18 | |
xixuan | ebdb0a8 | 2017-04-28 18:25:02 | [diff] [blame] | 19 | # TODO(ayatane): Fix cros lint pylint to work with virtualenv imports |
| 20 | # pylint: disable=import-error |
Paul Hobbs | ef4e070 | 2016-06-28 00:01:42 | [diff] [blame] | 21 | |
xixuan | ebdb0a8 | 2017-04-28 18:25:02 | [diff] [blame] | 22 | # only import setup_chromite before chromite import. |
| 23 | import setup_chromite # pylint: disable=unused-import |
Paul Hobbs | ef4e070 | 2016-06-28 00:01:42 | [diff] [blame] | 24 | from chromite.lib import ts_mon_config |
| 25 | from chromite.lib import metrics |
Paul Hobbs | 338baee | 2016-07-13 20:42:34 | [diff] [blame] | 26 | from chromite.lib import cros_logging as logging |
Paul Hobbs | fe0b1c6 | 2017-08-18 19:56:14 | [diff] [blame] | 27 | |
| 28 | |
| 29 | # Log rotation parameters. Keep about two weeks of old logs. |
| 30 | # |
| 31 | # For more, see the documentation in standard python library for |
| 32 | # logging.handlers.TimedRotatingFileHandler |
| 33 | _LOG_ROTATION_TIME = 'H' |
| 34 | _LOG_ROTATION_INTERVAL = 24 # hours |
| 35 | _LOG_ROTATION_BACKUP = 14 # backup counts |
Paul Hobbs | ef4e070 | 2016-06-28 00:01:42 | [diff] [blame] | 36 | |
| 37 | |
| 38 | STATIC_GET_MATCHER = re.compile( |
| 39 | r'^(?P<ip_addr>\d+\.\d+\.\d+\.\d+) ' |
Paul Hobbs | fa91568 | 2016-07-19 22:11:29 | [diff] [blame] | 40 | r'.*GET /static/(?P<endpoint>\S*)[^"]*" ' |
Paul Hobbs | ef4e070 | 2016-06-28 00:01:42 | [diff] [blame] | 41 | r'200 (?P<size>\S+) .*') |
| 42 | |
| 43 | STATIC_GET_METRIC_NAME = 'chromeos/devserver/apache/static_response_size' |
| 44 | |
| 45 | |
| 46 | LAB_SUBNETS = ( |
| 47 | ("172.17.40.0", 22), |
| 48 | ("100.107.160.0", 19), |
| 49 | ("100.115.128.0", 17), |
| 50 | ("100.115.254.126", 25), |
| 51 | ("100.107.141.128", 25), |
| 52 | ("172.27.212.0", 22), |
| 53 | ("100.107.156.192", 26), |
| 54 | ("172.22.29.0", 25), |
| 55 | ("172.22.38.0", 23), |
| 56 | ("100.107.224.0", 23), |
| 57 | ("100.107.226.0", 25), |
| 58 | ("100.107.126.0", 25), |
| 59 | ) |
| 60 | |
| 61 | def IPToNum(ip): |
Paul Hobbs | fa91568 | 2016-07-19 22:11:29 | [diff] [blame] | 62 | """Returns the integer represented by an IPv4 string. |
| 63 | |
| 64 | Args: |
| 65 | ip: An IPv4-formatted string. |
| 66 | """ |
Paul Hobbs | 487e381 | 2016-07-22 22:45:33 | [diff] [blame] | 67 | return reduce(lambda seed, x: seed * 2**8 + int(x), |
Paul Hobbs | fa91568 | 2016-07-19 22:11:29 | [diff] [blame] | 68 | ip.split('.'), |
| 69 | 0) |
Paul Hobbs | ef4e070 | 2016-06-28 00:01:42 | [diff] [blame] | 70 | |
| 71 | |
| 72 | def MatchesSubnet(ip, base, mask): |
Paul Hobbs | fa91568 | 2016-07-19 22:11:29 | [diff] [blame] | 73 | """Whether the ip string |ip| matches the subnet |base|, |mask|. |
| 74 | |
| 75 | Args: |
| 76 | ip: An IPv4 string. |
| 77 | base: An IPv4 string which is the lowest value in the subnet. |
| 78 | mask: The number of bits which are not wildcards in the subnet. |
| 79 | """ |
Paul Hobbs | ef4e070 | 2016-06-28 00:01:42 | [diff] [blame] | 80 | ip_value = IPToNum(ip) |
| 81 | base_value = IPToNum(base) |
| 82 | mask = (2**mask - 1) << (32 - mask) |
| 83 | return (ip_value & mask) == (base_value & mask) |
| 84 | |
| 85 | |
| 86 | def InLab(ip): |
Paul Hobbs | fa91568 | 2016-07-19 22:11:29 | [diff] [blame] | 87 | """Whether |ip| is an IPv4 address which is in the ChromeOS Lab. |
| 88 | |
| 89 | Args: |
| 90 | ip: An IPv4 address to be tested. |
| 91 | """ |
Paul Hobbs | ef4e070 | 2016-06-28 00:01:42 | [diff] [blame] | 92 | return any(MatchesSubnet(ip, base, mask) |
| 93 | for (base, mask) in LAB_SUBNETS) |
| 94 | |
| 95 | |
Paul Hobbs | 5c56c83 | 2016-07-23 00:21:57 | [diff] [blame] | 96 | MILESTONE_PATTERN = re.compile(r'R\d+') |
| 97 | |
| 98 | FILENAME_CONSTANTS = [ |
| 99 | 'stateful.tgz', |
| 100 | 'client-autotest.tar.bz2', |
| 101 | 'chromiumos_test_image.bin', |
| 102 | 'autotest_server_package.tar.bz2', |
| 103 | ] |
| 104 | |
| 105 | FILENAME_PATTERNS = [(re.compile(s), s) for s in FILENAME_CONSTANTS] + [ |
| 106 | (re.compile(r'dep-.*\.bz2'), 'dep-*.bz2'), |
| 107 | (re.compile(r'chromeos_.*_delta_test\.bin-.*'), |
| 108 | 'chromeos_*_delta_test.bin-*'), |
| 109 | (re.compile(r'chromeos_.*_full_test\.bin-.*'), |
| 110 | 'chromeos_*_full_test.bin-*'), |
| 111 | (re.compile(r'test-.*\.bz2'), 'test-*.bz2'), |
| 112 | (re.compile(r'dep-.*\.bz2'), 'dep-*.bz2'), |
| 113 | ] |
| 114 | |
| 115 | |
| 116 | def MatchAny(needle, patterns, default=''): |
| 117 | for pattern, value in patterns: |
| 118 | if pattern.match(needle): |
| 119 | return value |
| 120 | return default |
| 121 | |
| 122 | |
Paul Hobbs | fa91568 | 2016-07-19 22:11:29 | [diff] [blame] | 123 | def ParseStaticEndpoint(endpoint): |
| 124 | """Parses a /static/.* URL path into build_config, milestone, and filename. |
| 125 | |
| 126 | Static endpoints are expected to be of the form |
| 127 | /static/$BUILD_CONFIG/$MILESTONE-$VERSION/$FILENAME |
| 128 | |
| 129 | This function expects the '/static/' prefix to already be stripped off. |
| 130 | |
| 131 | Args: |
| 132 | endpoint: A string which is the matched URL path after /static/ |
| 133 | """ |
| 134 | build_config, milestone, filename = [''] * 3 |
Paul Hobbs | ef4e070 | 2016-06-28 00:01:42 | [diff] [blame] | 135 | try: |
Paul Hobbs | fa91568 | 2016-07-19 22:11:29 | [diff] [blame] | 136 | parts = endpoint.split('/') |
| 137 | build_config = parts[0] |
| 138 | if len(parts) >= 2: |
| 139 | version = parts[1] |
| 140 | milestone = version[:version.index('-')] |
Paul Hobbs | 5c56c83 | 2016-07-23 00:21:57 | [diff] [blame] | 141 | if not MILESTONE_PATTERN.match(milestone): |
| 142 | milestone = '' |
Paul Hobbs | fa91568 | 2016-07-19 22:11:29 | [diff] [blame] | 143 | if len(parts) >= 3: |
Paul Hobbs | 5c56c83 | 2016-07-23 00:21:57 | [diff] [blame] | 144 | filename = MatchAny(parts[-1], FILENAME_PATTERNS) |
| 145 | |
Paul Hobbs | fa91568 | 2016-07-19 22:11:29 | [diff] [blame] | 146 | except IndexError as e: |
| 147 | logging.debug('%s failed to parse. Caught %s' % (endpoint, str(e))) |
| 148 | |
| 149 | return build_config, milestone, filename |
| 150 | |
| 151 | |
| 152 | def EmitStaticRequestMetric(m): |
| 153 | """Emits a Counter metric for sucessful GETs to /static endpoints. |
| 154 | |
| 155 | Args: |
| 156 | m: A regex match object |
| 157 | """ |
| 158 | build_config, milestone, filename = ParseStaticEndpoint(m.group('endpoint')) |
| 159 | |
| 160 | try: |
| 161 | size = int(m.group('size')) |
Paul Hobbs | ef4e070 | 2016-06-28 00:01:42 | [diff] [blame] | 162 | except ValueError: # Zero is represented by "-" |
| 163 | size = 0 |
| 164 | |
| 165 | metrics.Counter(STATIC_GET_METRIC_NAME).increment_by( |
| 166 | size, fields={ |
Paul Hobbs | fa91568 | 2016-07-19 22:11:29 | [diff] [blame] | 167 | 'build_config': build_config, |
| 168 | 'milestone': milestone, |
Paul Hobbs | 487e381 | 2016-07-22 22:45:33 | [diff] [blame] | 169 | 'in_lab': InLab(m.group('ip_addr')), |
Paul Hobbs | fa91568 | 2016-07-19 22:11:29 | [diff] [blame] | 170 | 'endpoint': filename}) |
Paul Hobbs | ef4e070 | 2016-06-28 00:01:42 | [diff] [blame] | 171 | |
| 172 | |
| 173 | def RunMatchers(stream, matchers): |
Paul Hobbs | fa91568 | 2016-07-19 22:11:29 | [diff] [blame] | 174 | """Parses lines of |stream| using patterns and emitters from |matchers| |
| 175 | |
| 176 | Args: |
| 177 | stream: A file object to read from. |
| 178 | matchers: A list of pairs of (matcher, emitter), where matcher is a regex |
| 179 | and emitter is a function called when the regex matches. |
| 180 | """ |
Paul Hobbs | 338baee | 2016-07-13 20:42:34 | [diff] [blame] | 181 | for line in iter(stream.readline, ''): |
Paul Hobbs | ef4e070 | 2016-06-28 00:01:42 | [diff] [blame] | 182 | for matcher, emitter in matchers: |
Paul Hobbs | 338baee | 2016-07-13 20:42:34 | [diff] [blame] | 183 | logging.debug('Emitting %s for input "%s"', |
| 184 | emitter.__name__, line.strip()) |
Paul Hobbs | ef4e070 | 2016-06-28 00:01:42 | [diff] [blame] | 185 | m = matcher.match(line) |
| 186 | if m: |
| 187 | emitter(m) |
Paul Hobbs | ef4e070 | 2016-06-28 00:01:42 | [diff] [blame] | 188 | |
| 189 | |
| 190 | # TODO(phobbs) add a matcher for all requests, not just static files. |
| 191 | MATCHERS = [ |
| 192 | (STATIC_GET_MATCHER, EmitStaticRequestMetric), |
| 193 | ] |
| 194 | |
| 195 | |
| 196 | def ParseArgs(): |
| 197 | """Parses command line arguments.""" |
| 198 | p = argparse.ArgumentParser( |
| 199 | description='Parses apache logs and emits metrics to Monarch') |
xixuan | ebdb0a8 | 2017-04-28 18:25:02 | [diff] [blame] | 200 | p.add_argument('--logfile', required=True) |
Paul Hobbs | ef4e070 | 2016-06-28 00:01:42 | [diff] [blame] | 201 | return p.parse_args() |
| 202 | |
| 203 | |
| 204 | def main(): |
| 205 | """Sets up logging and runs matchers against stdin""" |
| 206 | args = ParseArgs() |
| 207 | root = logging.getLogger() |
xixuan | ebdb0a8 | 2017-04-28 18:25:02 | [diff] [blame] | 208 | |
Paul Hobbs | 84acd9d | 2017-09-20 22:10:53 | [diff] [blame] | 209 | root.addHandler(handlers.TimedRotatingFileHandler( |
Paul Hobbs | fe0b1c6 | 2017-08-18 19:56:14 | [diff] [blame] | 210 | args.logfile, when=_LOG_ROTATION_TIME, |
| 211 | interval=_LOG_ROTATION_INTERVAL, |
| 212 | backupCount=_LOG_ROTATION_BACKUP)) |
Paul Hobbs | ef4e070 | 2016-06-28 00:01:42 | [diff] [blame] | 213 | root.setLevel(logging.DEBUG) |
Paul Hobbs | fe0b1c6 | 2017-08-18 19:56:14 | [diff] [blame] | 214 | with ts_mon_config.SetupTsMonGlobalState('devserver_apache_log_metrics', |
| 215 | indirect=True): |
| 216 | RunMatchers(sys.stdin, MATCHERS) |
Paul Hobbs | ef4e070 | 2016-06-28 00:01:42 | [diff] [blame] | 217 | |
| 218 | |
| 219 | if __name__ == '__main__': |
| 220 | main() |