blob: 1f816c7677656424e62b7c1f3fb5f20b636db5da [file] [log] [blame]
Congbin Guo3afae6c2019-08-13 23:29:421# -*- coding: utf-8 -*-
2# Copyright 2019 The Chromium OS Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5"""A cherrypy application to check devserver health status."""
6
7from __future__ import absolute_import
8from __future__ import division
9from __future__ import print_function
10
11import json
12import os
13import subprocess
14import threading
15import time
16
Amin Hassanid4e35392019-10-03 18:02:4417import cherrypy # pylint: disable=import-error
Vaibhav Rustagid10bd8d2019-11-20 19:32:2518
19try:
20 import psutil
21except ImportError:
22 # Ignore psutil import failure. lakitu doesn't have psutil installed
23 # and for auto-update test, lakitu copies the devserver code and uses
24 # that to run the devserver. This results in failure of devserver
25 # and the auto-update test fails.
26 psutil = None
Congbin Guo3afae6c2019-08-13 23:29:4227
Achuith Bhandarkar662fb722019-10-31 23:12:4928import setup_chromite # pylint: disable=unused-import
Amin Hassanie427e212019-10-28 18:04:2729from chromite.lib import cros_update_progress
Achuith Bhandarkar662fb722019-10-31 23:12:4930from chromite.lib.xbuddy import cherrypy_log_util
Congbin Guo3afae6c2019-08-13 23:29:4231
Congbin Guo3afae6c2019-08-13 23:29:4232
Achuith Bhandarkar662fb722019-10-31 23:12:4933def _Log(message, *args):
34 """Module-local log function."""
35 return cherrypy_log_util.LogWithTag('HEALTHCHECKER', message, *args)
36
Congbin Guo3afae6c2019-08-13 23:29:4237# Number of seconds between the collection of disk and network IO counters.
38STATS_INTERVAL = 10.0
39_1G = 1000000000
40
41
42def require_psutil():
43 """Decorator for functions require psutil to run."""
44 def deco_require_psutil(func):
45 """Wrapper of the decorator function.
46
47 Args:
48 func: function to be called.
49 """
50 def func_require_psutil(*args, **kwargs):
51 """Decorator for functions require psutil to run.
52
53 If psutil is not installed, skip calling the function.
54
55 Args:
56 *args: arguments for function to be called.
57 **kwargs: keyword arguments for function to be called.
58 """
59 if psutil:
60 return func(*args, **kwargs)
61 else:
62 _Log('Python module psutil is not installed. Function call %s is '
63 'skipped.' % func)
64 return func_require_psutil
65 return deco_require_psutil
66
67
68def _get_process_count(process_cmd_pattern):
69 """Get the count of processes that match the given command pattern.
70
71 Args:
72 process_cmd_pattern: The regex pattern of process command to match.
73
74 Returns:
75 The count of processes that match the given command pattern.
76 """
77 try:
78 # Use Popen instead of check_output since the latter cannot run with old
79 # python version (less than 2.7)
80 proc = subprocess.Popen(
81 ['pgrep', '-fc', process_cmd_pattern],
82 stdout=subprocess.PIPE,
83 stderr=subprocess.PIPE,
84 )
85 cmd_output, cmd_error = proc.communicate()
86 if cmd_error:
87 _Log('Error happened when getting process count: %s' % cmd_error)
88
89 return int(cmd_output)
90 except subprocess.CalledProcessError:
91 return 0
92
93
94def get_config():
95 """Get cherrypy config for this application."""
96 return {
97 '/': {
98 # Automatically add trailing slash, i.e.
99 # /check_health -> /check_health/.
100 'tools.trailing_slash.on': False,
101 }
102 }
103
104
105class Root(object):
106 """Cherrypy Root class of the application."""
107 def __init__(self, devserver, static_dir):
108 self._static_dir = static_dir
109 self._devserver = devserver
110
111 # Cache of disk IO stats, a thread refresh the stats every 10 seconds.
112 # lock is not used for these variables as the only thread writes to these
113 # variables is _refresh_io_stats.
114 self.disk_read_bytes_per_sec = 0
115 self.disk_write_bytes_per_sec = 0
116 # Cache of network IO stats.
117 self.network_sent_bytes_per_sec = 0
118 self.network_recv_bytes_per_sec = 0
119 self._start_io_stat_thread()
120
121 @require_psutil()
122 def _get_io_stats(self):
123 """Get the IO stats as a dictionary.
124
125 Returns:
126 A dictionary of IO stats collected by psutil.
127 """
128 return {'disk_read_bytes_per_second': self.disk_read_bytes_per_sec,
129 'disk_write_bytes_per_second': self.disk_write_bytes_per_sec,
130 'disk_total_bytes_per_second': (self.disk_read_bytes_per_sec +
131 self.disk_write_bytes_per_sec),
132 'network_sent_bytes_per_second': self.network_sent_bytes_per_sec,
133 'network_recv_bytes_per_second': self.network_recv_bytes_per_sec,
134 'network_total_bytes_per_second': (self.network_sent_bytes_per_sec +
135 self.network_recv_bytes_per_sec),
136 'cpu_percent': psutil.cpu_percent(), }
137
138 @require_psutil()
139 def _refresh_io_stats(self):
140 """A call running in a thread to update IO stats periodically."""
141 prev_disk_io_counters = psutil.disk_io_counters()
142 prev_network_io_counters = psutil.net_io_counters()
143 prev_read_time = time.time()
144 while True:
145 time.sleep(STATS_INTERVAL)
146 now = time.time()
147 interval = now - prev_read_time
148 prev_read_time = now
149 # Disk IO is for all disks.
150 disk_io_counters = psutil.disk_io_counters()
151 network_io_counters = psutil.net_io_counters()
152
153 self.disk_read_bytes_per_sec = (
154 disk_io_counters.read_bytes -
155 prev_disk_io_counters.read_bytes) / interval
156 self.disk_write_bytes_per_sec = (
157 disk_io_counters.write_bytes -
158 prev_disk_io_counters.write_bytes) / interval
159 prev_disk_io_counters = disk_io_counters
160
161 self.network_sent_bytes_per_sec = (
162 network_io_counters.bytes_sent -
163 prev_network_io_counters.bytes_sent) / interval
164 self.network_recv_bytes_per_sec = (
165 network_io_counters.bytes_recv -
166 prev_network_io_counters.bytes_recv) / interval
167 prev_network_io_counters = network_io_counters
168
169 @require_psutil()
170 def _start_io_stat_thread(self):
171 """Start the thread to collect IO stats."""
172 thread = threading.Thread(target=self._refresh_io_stats)
173 thread.daemon = True
174 thread.start()
175
176 @cherrypy.expose
177 def index(self):
178 """Collect the health status of devserver to see if it's ready for staging.
179
180 Returns:
181 A JSON dictionary containing all or some of the following fields:
182 free_disk (int): free disk space in GB
183 staging_thread_count (int): number of devserver threads currently staging
184 an image
185 apache_client_count (int): count of Apache processes.
186 telemetry_test_count (int): count of telemetry tests.
187 gsutil_count (int): count of gsutil processes.
188 """
189 # Get free disk space.
190 stat = os.statvfs(self._static_dir)
191 free_disk = stat.f_bsize * stat.f_bavail / _1G
192 apache_client_count = _get_process_count('bin/apache2? -k start')
193 telemetry_test_count = _get_process_count('python.*telemetry')
194 gsutil_count = _get_process_count('gsutil')
195 au_process_count = len(cros_update_progress.GetAllRunningAUProcess())
196
197 health_data = {
198 'free_disk': free_disk,
199 'staging_thread_count': self._devserver.staging_thread_count,
200 'apache_client_count': apache_client_count,
201 'telemetry_test_count': telemetry_test_count,
202 'gsutil_count': gsutil_count,
203 'au_process_count': au_process_count,
204 }
205 health_data.update(self._get_io_stats() or {})
206
207 return json.dumps(health_data)