blob: 4252580ac987dc71f8cf11643c03887016ea40d8 [file] [log] [blame]
[email protected]3617ea92011-02-23 07:27:021// Copyright (c) 2011 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
[email protected]8fd11832011-07-14 20:01:135#include "chrome/browser/metrics/thread_watcher.h"
6
[email protected]f8614c32011-06-19 23:21:107#include <math.h> // ceil
8
[email protected]28e76d82011-09-30 23:14:189#include "base/bind.h"
[email protected]8fd11832011-07-14 20:01:1310#include "base/debug/alias.h"
[email protected]f8614c32011-06-19 23:21:1011#include "base/string_tokenizer.h"
[email protected]3617ea92011-02-23 07:27:0212#include "base/threading/thread_restrictions.h"
[email protected]a55edc42011-02-24 20:17:2813#include "build/build_config.h"
[email protected]3617ea92011-02-23 07:27:0214#include "chrome/browser/metrics/metrics_service.h"
[email protected]f8614c32011-06-19 23:21:1015#include "chrome/common/chrome_switches.h"
[email protected]6a084f02011-07-26 21:34:3616#include "chrome/common/chrome_version_info.h"
[email protected]b69941522011-10-08 03:17:3717#include "chrome/common/logging_chrome.h"
[email protected]3617ea92011-02-23 07:27:0218
[email protected]0b565182011-03-02 18:11:1519#if defined(OS_WIN)
[email protected]85339942011-08-29 21:03:4320#include "base/win/windows_version.h"
[email protected]0b565182011-03-02 18:11:1521#endif
22
[email protected]631bb742011-11-02 11:29:3923using content::BrowserThread;
24
[email protected]3617ea92011-02-23 07:27:0225// ThreadWatcher methods and members.
[email protected]28e76d82011-09-30 23:14:1826ThreadWatcher::ThreadWatcher(const WatchingParams& params)
27 : thread_id_(params.thread_id),
28 thread_name_(params.thread_name),
29 watched_loop_(
30 BrowserThread::GetMessageLoopProxyForThread(params.thread_id)),
31 sleep_time_(params.sleep_time),
32 unresponsive_time_(params.unresponsive_time),
[email protected]3617ea92011-02-23 07:27:0233 ping_time_(base::TimeTicks::Now()),
[email protected]9a4386342011-04-23 22:41:2634 pong_time_(ping_time_),
[email protected]3617ea92011-02-23 07:27:0235 ping_sequence_number_(0),
36 active_(false),
[email protected]28e76d82011-09-30 23:14:1837 ping_count_(params.unresponsive_threshold),
[email protected]9a4386342011-04-23 22:41:2638 response_time_histogram_(NULL),
39 unresponsive_time_histogram_(NULL),
40 unresponsive_count_(0),
[email protected]8125a9a2011-05-17 15:06:2141 hung_processing_complete_(false),
[email protected]28e76d82011-09-30 23:14:1842 unresponsive_threshold_(params.unresponsive_threshold),
43 crash_on_hang_(params.crash_on_hang),
44 live_threads_threshold_(params.live_threads_threshold),
45 ALLOW_THIS_IN_INITIALIZER_LIST(weak_ptr_factory_(this)) {
[email protected]f8614c32011-06-19 23:21:1046 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
[email protected]3617ea92011-02-23 07:27:0247 Initialize();
48}
49
[email protected]1d28d692011-02-23 22:05:3850ThreadWatcher::~ThreadWatcher() {}
51
[email protected]3617ea92011-02-23 07:27:0252// static
[email protected]28e76d82011-09-30 23:14:1853void ThreadWatcher::StartWatching(const WatchingParams& params) {
54 DCHECK_GE(params.sleep_time.InMilliseconds(), 0);
55 DCHECK_GE(params.unresponsive_time.InMilliseconds(),
56 params.sleep_time.InMilliseconds());
[email protected]3617ea92011-02-23 07:27:0257
[email protected]0b565182011-03-02 18:11:1558 // If we are not on WatchDogThread, then post a task to call StartWatching on
59 // WatchDogThread.
60 if (!WatchDogThread::CurrentlyOnWatchDogThread()) {
61 WatchDogThread::PostTask(
[email protected]db5bdf32011-02-28 07:57:4062 FROM_HERE,
[email protected]28e76d82011-09-30 23:14:1863 base::Bind(&ThreadWatcher::StartWatching, params));
[email protected]3617ea92011-02-23 07:27:0264 return;
65 }
66
[email protected]0b565182011-03-02 18:11:1567 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
[email protected]3617ea92011-02-23 07:27:0268
69 // Create a new thread watcher object for the given thread and activate it.
[email protected]28e76d82011-09-30 23:14:1870 ThreadWatcher* watcher = new ThreadWatcher(params);
71
[email protected]3617ea92011-02-23 07:27:0272 DCHECK(watcher);
[email protected]dedfabae2011-03-04 04:00:4073 // If we couldn't register the thread watcher object, we are shutting down,
74 // then don't activate thread watching.
[email protected]28e76d82011-09-30 23:14:1875 if (!ThreadWatcherList::IsRegistered(params.thread_id))
[email protected]dedfabae2011-03-04 04:00:4076 return;
[email protected]3617ea92011-02-23 07:27:0277 watcher->ActivateThreadWatching();
78}
79
80void ThreadWatcher::ActivateThreadWatching() {
[email protected]0b565182011-03-02 18:11:1581 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
[email protected]3617ea92011-02-23 07:27:0282 if (active_) return;
83 active_ = true;
[email protected]26ecd8a2e2011-09-02 00:51:5484 ping_count_ = unresponsive_threshold_;
[email protected]3628ecaf2011-05-27 16:10:5285 ResetHangCounters();
[email protected]3617ea92011-02-23 07:27:0286 MessageLoop::current()->PostTask(
87 FROM_HERE,
[email protected]28e76d82011-09-30 23:14:1888 base::Bind(&ThreadWatcher::PostPingMessage,
89 weak_ptr_factory_.GetWeakPtr()));
[email protected]3617ea92011-02-23 07:27:0290}
91
92void ThreadWatcher::DeActivateThreadWatching() {
[email protected]0b565182011-03-02 18:11:1593 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
[email protected]3617ea92011-02-23 07:27:0294 active_ = false;
95 ping_count_ = 0;
[email protected]28e76d82011-09-30 23:14:1896 weak_ptr_factory_.InvalidateWeakPtrs();
[email protected]3617ea92011-02-23 07:27:0297}
98
99void ThreadWatcher::WakeUp() {
[email protected]0b565182011-03-02 18:11:15100 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
[email protected]3617ea92011-02-23 07:27:02101 // There is some user activity, PostPingMessage task of thread watcher if
102 // needed.
103 if (!active_) return;
104
[email protected]01a605f2011-09-06 00:14:42105 // Throw away the previous |unresponsive_count_| and start over again. Just
106 // before going to sleep, |unresponsive_count_| could be very close to
107 // |unresponsive_threshold_| and when user becomes active,
108 // |unresponsive_count_| can go over |unresponsive_threshold_| if there was no
109 // response for ping messages. Reset |unresponsive_count_| to start measuring
110 // the unresponsiveness of the threads when system becomes active.
111 unresponsive_count_ = 0;
112
[email protected]3617ea92011-02-23 07:27:02113 if (ping_count_ <= 0) {
[email protected]26ecd8a2e2011-09-02 00:51:54114 ping_count_ = unresponsive_threshold_;
[email protected]3628ecaf2011-05-27 16:10:52115 ResetHangCounters();
[email protected]3617ea92011-02-23 07:27:02116 PostPingMessage();
117 } else {
[email protected]26ecd8a2e2011-09-02 00:51:54118 ping_count_ = unresponsive_threshold_;
[email protected]3617ea92011-02-23 07:27:02119 }
120}
121
122void ThreadWatcher::PostPingMessage() {
[email protected]0b565182011-03-02 18:11:15123 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
[email protected]3617ea92011-02-23 07:27:02124 // If we have stopped watching or if the user is idle, then stop sending
125 // ping messages.
126 if (!active_ || ping_count_ <= 0)
127 return;
128
129 // Save the current time when we have sent ping message.
130 ping_time_ = base::TimeTicks::Now();
131
[email protected]28e76d82011-09-30 23:14:18132 // Send a ping message to the watched thread. Callback will be called on
133 // the WatchDogThread.
134 base::Closure callback(
135 base::Bind(&ThreadWatcher::OnPongMessage, weak_ptr_factory_.GetWeakPtr(),
136 ping_sequence_number_));
[email protected]8fd11832011-07-14 20:01:13137 if (watched_loop_->PostTask(
[email protected]89fb232c22011-02-24 01:45:10138 FROM_HERE,
[email protected]28e76d82011-09-30 23:14:18139 base::Bind(&ThreadWatcher::OnPingMessage, thread_id_,
140 callback))) {
[email protected]89fb232c22011-02-24 01:45:10141 // Post a task to check the responsiveness of watched thread.
142 MessageLoop::current()->PostDelayedTask(
143 FROM_HERE,
[email protected]28e76d82011-09-30 23:14:18144 base::Bind(&ThreadWatcher::OnCheckResponsiveness,
145 weak_ptr_factory_.GetWeakPtr(), ping_sequence_number_),
[email protected]89fb232c22011-02-24 01:45:10146 unresponsive_time_.InMilliseconds());
147 } else {
148 // Watched thread might have gone away, stop watching it.
[email protected]89fb232c22011-02-24 01:45:10149 DeActivateThreadWatching();
150 }
[email protected]3617ea92011-02-23 07:27:02151}
152
153void ThreadWatcher::OnPongMessage(uint64 ping_sequence_number) {
[email protected]0b565182011-03-02 18:11:15154 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
[email protected]9a4386342011-04-23 22:41:26155
[email protected]3617ea92011-02-23 07:27:02156 // Record watched thread's response time.
[email protected]9a4386342011-04-23 22:41:26157 base::TimeTicks now = base::TimeTicks::Now();
158 base::TimeDelta response_time = now - ping_time_;
159 response_time_histogram_->AddTime(response_time);
160
161 // Save the current time when we have got pong message.
162 pong_time_ = now;
[email protected]3617ea92011-02-23 07:27:02163
164 // Check if there are any extra pings in flight.
165 DCHECK_EQ(ping_sequence_number_, ping_sequence_number);
166 if (ping_sequence_number_ != ping_sequence_number)
167 return;
168
169 // Increment sequence number for the next ping message to indicate watched
170 // thread is responsive.
171 ++ping_sequence_number_;
172
173 // If we have stopped watching or if the user is idle, then stop sending
174 // ping messages.
175 if (!active_ || --ping_count_ <= 0)
176 return;
177
178 MessageLoop::current()->PostDelayedTask(
179 FROM_HERE,
[email protected]28e76d82011-09-30 23:14:18180 base::Bind(&ThreadWatcher::PostPingMessage,
181 weak_ptr_factory_.GetWeakPtr()),
[email protected]3617ea92011-02-23 07:27:02182 sleep_time_.InMilliseconds());
183}
184
[email protected]28e76d82011-09-30 23:14:18185void ThreadWatcher::OnCheckResponsiveness(uint64 ping_sequence_number) {
[email protected]0b565182011-03-02 18:11:15186 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
[email protected]3617ea92011-02-23 07:27:02187 // If we have stopped watching then consider thread as responding.
[email protected]28e76d82011-09-30 23:14:18188 if (!active_) {
189 responsive_ = true;
190 return;
191 }
[email protected]3617ea92011-02-23 07:27:02192 // If the latest ping_sequence_number_ is not same as the ping_sequence_number
193 // that is passed in, then we can assume OnPongMessage was called.
194 // OnPongMessage increments ping_sequence_number_.
[email protected]9a4386342011-04-23 22:41:26195 if (ping_sequence_number_ != ping_sequence_number) {
196 // Reset unresponsive_count_ to zero because we got a response from the
197 // watched thread.
[email protected]3628ecaf2011-05-27 16:10:52198 ResetHangCounters();
[email protected]28e76d82011-09-30 23:14:18199
200 responsive_ = true;
201 return;
[email protected]9a4386342011-04-23 22:41:26202 }
203 // Record that we got no response from watched thread.
204 GotNoResponse();
205
206 // Post a task to check the responsiveness of watched thread.
207 MessageLoop::current()->PostDelayedTask(
208 FROM_HERE,
[email protected]28e76d82011-09-30 23:14:18209 base::Bind(&ThreadWatcher::OnCheckResponsiveness,
210 weak_ptr_factory_.GetWeakPtr(), ping_sequence_number_),
[email protected]9a4386342011-04-23 22:41:26211 unresponsive_time_.InMilliseconds());
[email protected]28e76d82011-09-30 23:14:18212 responsive_ = false;
[email protected]3617ea92011-02-23 07:27:02213}
214
215void ThreadWatcher::Initialize() {
[email protected]f8614c32011-06-19 23:21:10216 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
[email protected]3617ea92011-02-23 07:27:02217 ThreadWatcherList::Register(this);
[email protected]9a4386342011-04-23 22:41:26218
219 const std::string response_time_histogram_name =
[email protected]f6179ec2011-03-17 00:25:46220 "ThreadWatcher.ResponseTime." + thread_name_;
[email protected]9a4386342011-04-23 22:41:26221 response_time_histogram_ = base::Histogram::FactoryTimeGet(
222 response_time_histogram_name,
223 base::TimeDelta::FromMilliseconds(1),
224 base::TimeDelta::FromSeconds(100), 50,
225 base::Histogram::kUmaTargetedHistogramFlag);
226
227 const std::string unresponsive_time_histogram_name =
228 "ThreadWatcher.Unresponsive." + thread_name_;
229 unresponsive_time_histogram_ = base::Histogram::FactoryTimeGet(
230 unresponsive_time_histogram_name,
[email protected]3617ea92011-02-23 07:27:02231 base::TimeDelta::FromMilliseconds(1),
232 base::TimeDelta::FromSeconds(100), 50,
233 base::Histogram::kUmaTargetedHistogramFlag);
[email protected]42499b82011-04-28 22:47:39234
235 const std::string responsive_count_histogram_name =
236 "ThreadWatcher.ResponsiveThreads." + thread_name_;
237 responsive_count_histogram_ = base::LinearHistogram::FactoryGet(
238 responsive_count_histogram_name, 1, 10, 11,
239 base::Histogram::kUmaTargetedHistogramFlag);
240
241 const std::string unresponsive_count_histogram_name =
242 "ThreadWatcher.UnresponsiveThreads." + thread_name_;
243 unresponsive_count_histogram_ = base::LinearHistogram::FactoryGet(
244 unresponsive_count_histogram_name, 1, 10, 11,
245 base::Histogram::kUmaTargetedHistogramFlag);
[email protected]3617ea92011-02-23 07:27:02246}
247
248// static
[email protected]f6179ec2011-03-17 00:25:46249void ThreadWatcher::OnPingMessage(const BrowserThread::ID& thread_id,
[email protected]28e76d82011-09-30 23:14:18250 const base::Closure& callback_task) {
[email protected]3617ea92011-02-23 07:27:02251 // This method is called on watched thread.
252 DCHECK(BrowserThread::CurrentlyOn(thread_id));
[email protected]0b565182011-03-02 18:11:15253 WatchDogThread::PostTask(FROM_HERE, callback_task);
[email protected]3617ea92011-02-23 07:27:02254}
255
[email protected]3628ecaf2011-05-27 16:10:52256void ThreadWatcher::ResetHangCounters() {
[email protected]9a4386342011-04-23 22:41:26257 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
258 unresponsive_count_ = 0;
[email protected]8125a9a2011-05-17 15:06:21259 hung_processing_complete_ = false;
[email protected]9a4386342011-04-23 22:41:26260}
261
262void ThreadWatcher::GotNoResponse() {
263 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
[email protected]519b3ec72011-05-14 02:44:25264
[email protected]f8614c32011-06-19 23:21:10265 ++unresponsive_count_;
266 if (!IsVeryUnresponsive())
[email protected]519b3ec72011-05-14 02:44:25267 return;
268
[email protected]42499b82011-04-28 22:47:39269 // Record total unresponsive_time since last pong message.
270 base::TimeDelta unresponse_time = base::TimeTicks::Now() - pong_time_;
271 unresponsive_time_histogram_->AddTime(unresponse_time);
272
[email protected]8125a9a2011-05-17 15:06:21273 // We have already collected stats for the non-responding watched thread.
274 if (hung_processing_complete_)
275 return;
276
[email protected]f8614c32011-06-19 23:21:10277 // Record how other threads are responding.
278 uint32 responding_thread_count = 0;
279 uint32 unresponding_thread_count = 0;
280 ThreadWatcherList::GetStatusOfThreads(&responding_thread_count,
281 &unresponding_thread_count);
[email protected]42499b82011-04-28 22:47:39282
283 // Record how many watched threads are responding.
[email protected]f8614c32011-06-19 23:21:10284 responsive_count_histogram_->Add(responding_thread_count);
[email protected]42499b82011-04-28 22:47:39285
286 // Record how many watched threads are not responding.
[email protected]f8614c32011-06-19 23:21:10287 unresponsive_count_histogram_->Add(unresponding_thread_count);
[email protected]8125a9a2011-05-17 15:06:21288
[email protected]f8614c32011-06-19 23:21:10289 // Crash the browser if the watched thread is to be crashed on hang and if the
290 // number of other threads responding is equal to live_threads_threshold_.
[email protected]8fd11832011-07-14 20:01:13291 int thread_id = thread_id_;
292 base::debug::Alias(&thread_id);
[email protected]d31058ba2011-10-20 23:14:49293 if (crash_on_hang_ && responding_thread_count == live_threads_threshold_) {
294 static bool crashed_once = false;
295 if (!crashed_once) {
296 crashed_once = true;
297 CHECK(false);
298 }
299 }
[email protected]8125a9a2011-05-17 15:06:21300
301 hung_processing_complete_ = true;
[email protected]9a4386342011-04-23 22:41:26302}
303
[email protected]f8614c32011-06-19 23:21:10304bool ThreadWatcher::IsVeryUnresponsive() {
305 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
306 return unresponsive_count_ >= unresponsive_threshold_;
307}
308
[email protected]3617ea92011-02-23 07:27:02309// ThreadWatcherList methods and members.
[email protected]f6179ec2011-03-17 00:25:46310//
[email protected]3617ea92011-02-23 07:27:02311// static
[email protected]f8614c32011-06-19 23:21:10312ThreadWatcherList* ThreadWatcherList::g_thread_watcher_list_ = NULL;
[email protected]9a4386342011-04-23 22:41:26313// static
[email protected]3628ecaf2011-05-27 16:10:52314const int ThreadWatcherList::kSleepSeconds = 1;
[email protected]9a4386342011-04-23 22:41:26315// static
[email protected]3628ecaf2011-05-27 16:10:52316const int ThreadWatcherList::kUnresponsiveSeconds = 2;
[email protected]f8614c32011-06-19 23:21:10317// static
[email protected]26ecd8a2e2011-09-02 00:51:54318const int ThreadWatcherList::kUnresponsiveCount = 9;
[email protected]f8614c32011-06-19 23:21:10319// static
320const int ThreadWatcherList::kLiveThreadsThreshold = 1;
[email protected]3617ea92011-02-23 07:27:02321
322// static
[email protected]f8614c32011-06-19 23:21:10323void ThreadWatcherList::StartWatchingAll(const CommandLine& command_line) {
[email protected]f8614c32011-06-19 23:21:10324 uint32 unresponsive_threshold;
325 std::set<std::string> crash_on_hang_thread_names;
326 uint32 live_threads_threshold;
327 ParseCommandLine(command_line,
328 &unresponsive_threshold,
329 &crash_on_hang_thread_names,
330 &live_threads_threshold);
[email protected]dedfabae2011-03-04 04:00:40331
[email protected]26ecd8a2e2011-09-02 00:51:54332 ThreadWatcherObserver::SetupNotifications(
333 base::TimeDelta::FromSeconds(kSleepSeconds * unresponsive_threshold));
334
[email protected]f8614c32011-06-19 23:21:10335 WatchDogThread::PostDelayedTask(
336 FROM_HERE,
[email protected]28e76d82011-09-30 23:14:18337 base::Bind(&ThreadWatcherList::InitializeAndStartWatching,
338 unresponsive_threshold,
339 crash_on_hang_thread_names,
340 live_threads_threshold),
[email protected]f8614c32011-06-19 23:21:10341 base::TimeDelta::FromSeconds(120).InMilliseconds());
[email protected]0b565182011-03-02 18:11:15342}
343
344// static
[email protected]3617ea92011-02-23 07:27:02345void ThreadWatcherList::StopWatchingAll() {
[email protected]f8614c32011-06-19 23:21:10346 ThreadWatcherObserver::RemoveNotifications();
347 DeleteAll();
[email protected]3617ea92011-02-23 07:27:02348}
349
350// static
[email protected]f8614c32011-06-19 23:21:10351void ThreadWatcherList::Register(ThreadWatcher* watcher) {
[email protected]9a4386342011-04-23 22:41:26352 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
[email protected]f8614c32011-06-19 23:21:10353 if (!g_thread_watcher_list_)
354 return;
355 DCHECK(!g_thread_watcher_list_->Find(watcher->thread_id()));
356 g_thread_watcher_list_->registered_[watcher->thread_id()] = watcher;
357}
358
359// static
360bool ThreadWatcherList::IsRegistered(const BrowserThread::ID thread_id) {
361 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
362 return NULL != ThreadWatcherList::Find(thread_id);
363}
364
365// static
366void ThreadWatcherList::GetStatusOfThreads(uint32* responding_thread_count,
367 uint32* unresponding_thread_count) {
368 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
369 *responding_thread_count = 0;
370 *unresponding_thread_count = 0;
371 if (!g_thread_watcher_list_)
[email protected]42499b82011-04-28 22:47:39372 return;
[email protected]9a4386342011-04-23 22:41:26373
[email protected]f8614c32011-06-19 23:21:10374 for (RegistrationList::iterator it =
375 g_thread_watcher_list_->registered_.begin();
376 g_thread_watcher_list_->registered_.end() != it;
[email protected]9a4386342011-04-23 22:41:26377 ++it) {
[email protected]f8614c32011-06-19 23:21:10378 if (it->second->IsVeryUnresponsive())
379 ++(*unresponding_thread_count);
[email protected]ae53f112011-05-19 23:29:08380 else
[email protected]f8614c32011-06-19 23:21:10381 ++(*responding_thread_count);
[email protected]9a4386342011-04-23 22:41:26382 }
[email protected]9a4386342011-04-23 22:41:26383}
384
[email protected]f8614c32011-06-19 23:21:10385// static
[email protected]3617ea92011-02-23 07:27:02386void ThreadWatcherList::WakeUpAll() {
[email protected]0b565182011-03-02 18:11:15387 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
[email protected]f8614c32011-06-19 23:21:10388 if (!g_thread_watcher_list_)
[email protected]dedfabae2011-03-04 04:00:40389 return;
[email protected]f8614c32011-06-19 23:21:10390
391 for (RegistrationList::iterator it =
392 g_thread_watcher_list_->registered_.begin();
393 g_thread_watcher_list_->registered_.end() != it;
[email protected]3617ea92011-02-23 07:27:02394 ++it)
395 it->second->WakeUp();
396}
397
[email protected]f8614c32011-06-19 23:21:10398ThreadWatcherList::ThreadWatcherList() {
399 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
400 CHECK(!g_thread_watcher_list_);
401 g_thread_watcher_list_ = this;
[email protected]3617ea92011-02-23 07:27:02402}
403
[email protected]f8614c32011-06-19 23:21:10404ThreadWatcherList::~ThreadWatcherList() {
405 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
406 DCHECK(this == g_thread_watcher_list_);
407 g_thread_watcher_list_ = NULL;
408}
409
410// static
411void ThreadWatcherList::ParseCommandLine(
412 const CommandLine& command_line,
413 uint32* unresponsive_threshold,
414 std::set<std::string>* crash_on_hang_thread_names,
415 uint32* live_threads_threshold) {
416 // Determine |unresponsive_threshold| based on switches::kCrashOnHangSeconds.
417 *unresponsive_threshold = kUnresponsiveCount;
[email protected]85339942011-08-29 21:03:43418
[email protected]d8c65a82011-09-10 23:17:09419 // Increase the unresponsive_threshold on the Stable and Beta channels to
420 // reduce the number of crashes due to ThreadWatcher.
421 chrome::VersionInfo::Channel channel = chrome::VersionInfo::GetChannel();
422 if (channel == chrome::VersionInfo::CHANNEL_STABLE) {
423 *unresponsive_threshold *= 4;
424 } else if (channel == chrome::VersionInfo::CHANNEL_BETA) {
[email protected]85339942011-08-29 21:03:43425 *unresponsive_threshold *= 2;
[email protected]85339942011-08-29 21:03:43426 }
427
[email protected]d8c65a82011-09-10 23:17:09428#if defined(OS_WIN)
429 // For Windows XP (old systems), double the unresponsive_threshold to give
430 // the OS a chance to schedule UI/IO threads a time slice to respond with a
431 // pong message (to get around limitations with the OS).
432 if (base::win::GetVersion() <= base::win::VERSION_XP)
433 *unresponsive_threshold *= 2;
434#endif
435
[email protected]f8614c32011-06-19 23:21:10436 std::string crash_on_hang_seconds =
437 command_line.GetSwitchValueASCII(switches::kCrashOnHangSeconds);
438 if (!crash_on_hang_seconds.empty()) {
439 int crash_seconds = atoi(crash_on_hang_seconds.c_str());
440 if (crash_seconds > 0) {
441 *unresponsive_threshold = static_cast<uint32>(
442 ceil(static_cast<float>(crash_seconds) / kUnresponsiveSeconds));
443 }
444 }
445
[email protected]6a084f02011-07-26 21:34:36446 std::string crash_on_hang_threads;
447
448 // Default to crashing the browser if UI or IO threads are not responsive
449 // except in stable channel.
[email protected]d8c65a82011-09-10 23:17:09450 if (channel == chrome::VersionInfo::CHANNEL_STABLE)
[email protected]6a084f02011-07-26 21:34:36451 crash_on_hang_threads = "";
452 else
453 crash_on_hang_threads = "UI,IO";
454
[email protected]f8614c32011-06-19 23:21:10455 if (command_line.HasSwitch(switches::kCrashOnHangThreads)) {
456 crash_on_hang_threads =
457 command_line.GetSwitchValueASCII(switches::kCrashOnHangThreads);
458 }
459 StringTokenizer tokens(crash_on_hang_threads, ",");
460 while (tokens.GetNext())
461 crash_on_hang_thread_names->insert(tokens.token());
462
463 // Determine |live_threads_threshold| based on switches::kCrashOnLive.
464 *live_threads_threshold = kLiveThreadsThreshold;
465 if (command_line.HasSwitch(switches::kCrashOnLive)) {
466 std::string live_threads =
467 command_line.GetSwitchValueASCII(switches::kCrashOnLive);
468 *live_threads_threshold = static_cast<uint32>(atoi(live_threads.c_str()));
469 }
470}
471
472// static
473void ThreadWatcherList::InitializeAndStartWatching(
474 uint32 unresponsive_threshold,
475 const std::set<std::string>& crash_on_hang_thread_names,
476 uint32 live_threads_threshold) {
477 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
478
479 ThreadWatcherList* thread_watcher_list = new ThreadWatcherList();
480 CHECK(thread_watcher_list);
481
482 const base::TimeDelta kSleepTime =
483 base::TimeDelta::FromSeconds(kSleepSeconds);
484 const base::TimeDelta kUnresponsiveTime =
485 base::TimeDelta::FromSeconds(kUnresponsiveSeconds);
486
487 StartWatching(BrowserThread::UI, "UI", kSleepTime, kUnresponsiveTime,
488 unresponsive_threshold, crash_on_hang_thread_names,
489 live_threads_threshold);
490 StartWatching(BrowserThread::IO, "IO", kSleepTime, kUnresponsiveTime,
491 unresponsive_threshold, crash_on_hang_thread_names,
492 live_threads_threshold);
493 StartWatching(BrowserThread::DB, "DB", kSleepTime, kUnresponsiveTime,
494 unresponsive_threshold, crash_on_hang_thread_names,
495 live_threads_threshold);
496 StartWatching(BrowserThread::FILE, "FILE", kSleepTime, kUnresponsiveTime,
497 unresponsive_threshold, crash_on_hang_thread_names,
498 live_threads_threshold);
499 StartWatching(BrowserThread::CACHE, "CACHE", kSleepTime, kUnresponsiveTime,
500 unresponsive_threshold, crash_on_hang_thread_names,
501 live_threads_threshold);
[email protected]b69941522011-10-08 03:17:37502
503 BrowserThread::PostTask(
504 BrowserThread::UI,
505 FROM_HERE,
506 NewRunnableFunction(StartupTimeBomb::Disarm));
[email protected]f8614c32011-06-19 23:21:10507}
508
509// static
510void ThreadWatcherList::StartWatching(
511 const BrowserThread::ID& thread_id,
512 const std::string& thread_name,
513 const base::TimeDelta& sleep_time,
514 const base::TimeDelta& unresponsive_time,
515 uint32 unresponsive_threshold,
516 const std::set<std::string>& crash_on_hang_thread_names,
517 uint32 live_threads_threshold) {
518 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
519
[email protected]f8614c32011-06-19 23:21:10520 std::set<std::string>::const_iterator it =
521 crash_on_hang_thread_names.find(thread_name);
522 bool crash_on_hang = (it != crash_on_hang_thread_names.end());
523
[email protected]28e76d82011-09-30 23:14:18524 ThreadWatcher::StartWatching(
525 ThreadWatcher::WatchingParams(thread_id,
526 thread_name,
527 sleep_time,
528 unresponsive_time,
529 unresponsive_threshold,
530 crash_on_hang,
531 live_threads_threshold));
[email protected]f8614c32011-06-19 23:21:10532}
533
534// static
535void ThreadWatcherList::DeleteAll() {
536 if (!WatchDogThread::CurrentlyOnWatchDogThread()) {
537 WatchDogThread::PostTask(
538 FROM_HERE,
[email protected]28e76d82011-09-30 23:14:18539 base::Bind(&ThreadWatcherList::DeleteAll));
[email protected]f8614c32011-06-19 23:21:10540 return;
541 }
542
543 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
544 if (!g_thread_watcher_list_)
545 return;
546
547 // Delete all thread watcher objects.
548 while (!g_thread_watcher_list_->registered_.empty()) {
549 RegistrationList::iterator it = g_thread_watcher_list_->registered_.begin();
550 delete it->second;
551 g_thread_watcher_list_->registered_.erase(it);
552 }
553
554 delete g_thread_watcher_list_;
555}
556
557// static
558ThreadWatcher* ThreadWatcherList::Find(const BrowserThread::ID& thread_id) {
559 DCHECK(WatchDogThread::CurrentlyOnWatchDogThread());
560 if (!g_thread_watcher_list_)
561 return NULL;
562 RegistrationList::iterator it =
563 g_thread_watcher_list_->registered_.find(thread_id);
564 if (g_thread_watcher_list_->registered_.end() == it)
[email protected]3617ea92011-02-23 07:27:02565 return NULL;
566 return it->second;
567}
568
[email protected]f8614c32011-06-19 23:21:10569// ThreadWatcherObserver methods and members.
570//
571// static
572ThreadWatcherObserver* ThreadWatcherObserver::g_thread_watcher_observer_ = NULL;
573
574ThreadWatcherObserver::ThreadWatcherObserver(
575 const base::TimeDelta& wakeup_interval)
576 : last_wakeup_time_(base::TimeTicks::Now()),
577 wakeup_interval_(wakeup_interval) {
578 CHECK(!g_thread_watcher_observer_);
579 g_thread_watcher_observer_ = this;
580}
581
582ThreadWatcherObserver::~ThreadWatcherObserver() {
583 DCHECK(this == g_thread_watcher_observer_);
584 g_thread_watcher_observer_ = NULL;
585}
586
587// static
588void ThreadWatcherObserver::SetupNotifications(
589 const base::TimeDelta& wakeup_interval) {
590 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
591 ThreadWatcherObserver* observer = new ThreadWatcherObserver(wakeup_interval);
592 MetricsService::SetUpNotifications(&observer->registrar_, observer);
593}
594
595// static
596void ThreadWatcherObserver::RemoveNotifications() {
597 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
598 if (!g_thread_watcher_observer_)
599 return;
600 g_thread_watcher_observer_->registrar_.RemoveAll();
601 delete g_thread_watcher_observer_;
602}
603
[email protected]6c2381d2011-10-19 02:52:53604void ThreadWatcherObserver::Observe(
605 int type,
606 const content::NotificationSource& source,
607 const content::NotificationDetails& details) {
[email protected]f8614c32011-06-19 23:21:10608 // There is some user activity, see if thread watchers are to be awakened.
609 base::TimeTicks now = base::TimeTicks::Now();
610 if ((now - last_wakeup_time_) < wakeup_interval_)
611 return;
612 last_wakeup_time_ = now;
613 WatchDogThread::PostTask(
614 FROM_HERE,
[email protected]28e76d82011-09-30 23:14:18615 base::Bind(&ThreadWatcherList::WakeUpAll));
[email protected]f8614c32011-06-19 23:21:10616}
617
[email protected]3617ea92011-02-23 07:27:02618// WatchDogThread methods and members.
[email protected]f6179ec2011-03-17 00:25:46619//
[email protected]0b565182011-03-02 18:11:15620// static
621base::Lock WatchDogThread::lock_;
622// static
623WatchDogThread* WatchDogThread::watchdog_thread_ = NULL;
624
[email protected]3617ea92011-02-23 07:27:02625// The WatchDogThread object must outlive any tasks posted to the IO thread
626// before the Quit task.
627DISABLE_RUNNABLE_METHOD_REFCOUNT(WatchDogThread);
628
[email protected]9c68d422011-10-18 21:11:47629WatchDogThread::WatchDogThread() : Thread("BrowserWatchdog") {
[email protected]3617ea92011-02-23 07:27:02630}
631
632WatchDogThread::~WatchDogThread() {
[email protected]3617ea92011-02-23 07:27:02633 // We cannot rely on our base class to stop the thread since we want our
634 // CleanUp function to run.
635 Stop();
636}
637
[email protected]0b565182011-03-02 18:11:15638// static
639bool WatchDogThread::CurrentlyOnWatchDogThread() {
640 base::AutoLock lock(lock_);
641 return watchdog_thread_ &&
642 watchdog_thread_->message_loop() == MessageLoop::current();
643}
644
645// static
646bool WatchDogThread::PostTask(const tracked_objects::Location& from_here,
[email protected]28e76d82011-09-30 23:14:18647 const base::Closure& task) {
[email protected]0b565182011-03-02 18:11:15648 return PostTaskHelper(from_here, task, 0);
649}
650
651// static
652bool WatchDogThread::PostDelayedTask(const tracked_objects::Location& from_here,
[email protected]28e76d82011-09-30 23:14:18653 const base::Closure& task,
[email protected]0b565182011-03-02 18:11:15654 int64 delay_ms) {
655 return PostTaskHelper(from_here, task, delay_ms);
656}
657
658// static
659bool WatchDogThread::PostTaskHelper(
660 const tracked_objects::Location& from_here,
[email protected]28e76d82011-09-30 23:14:18661 const base::Closure& task,
[email protected]0b565182011-03-02 18:11:15662 int64 delay_ms) {
663 {
664 base::AutoLock lock(lock_);
665
666 MessageLoop* message_loop = watchdog_thread_ ?
667 watchdog_thread_->message_loop() : NULL;
668 if (message_loop) {
669 message_loop->PostDelayedTask(from_here, task, delay_ms);
670 return true;
671 }
672 }
[email protected]0b565182011-03-02 18:11:15673
674 return false;
675}
676
[email protected]3617ea92011-02-23 07:27:02677void WatchDogThread::Init() {
678 // This thread shouldn't be allowed to perform any blocking disk I/O.
679 base::ThreadRestrictions::SetIOAllowed(false);
680
[email protected]0b565182011-03-02 18:11:15681 base::AutoLock lock(lock_);
682 CHECK(!watchdog_thread_);
683 watchdog_thread_ = this;
[email protected]5315ff72011-03-02 00:11:35684}
[email protected]ed590632011-03-02 00:17:37685
[email protected]0b565182011-03-02 18:11:15686void WatchDogThread::CleanUp() {
687 base::AutoLock lock(lock_);
688 watchdog_thread_ = NULL;
689}
[email protected]6d823b42011-09-05 02:54:02690
691namespace {
692
[email protected]b69941522011-10-08 03:17:37693// StartupWatchDogThread methods and members.
694//
695// Class for detecting hangs during startup.
696class StartupWatchDogThread : public base::Watchdog {
697 public:
698 // Constructor specifies how long the StartupWatchDogThread will wait before
699 // alarming.
700 explicit StartupWatchDogThread(const base::TimeDelta& duration)
701 : base::Watchdog(duration, "Startup watchdog thread", true) {
702 }
703
704 // Alarm is called if the time expires after an Arm() without someone calling
705 // Disarm(). When Alarm goes off, in release mode we get the crash dump
706 // without crashing and in debug mode we break into the debugger.
707 virtual void Alarm() {
708#ifndef NDEBUG
709 DCHECK(false);
710#else
711 logging::DumpWithoutCrashing();
712#endif
713 }
714
715 DISALLOW_COPY_AND_ASSIGN(StartupWatchDogThread);
716};
717
[email protected]6d823b42011-09-05 02:54:02718// ShutdownWatchDogThread methods and members.
719//
[email protected]b69941522011-10-08 03:17:37720// Class for detecting hangs during shutdown.
[email protected]6d823b42011-09-05 02:54:02721class ShutdownWatchDogThread : public base::Watchdog {
722 public:
723 // Constructor specifies how long the ShutdownWatchDogThread will wait before
724 // alarming.
725 explicit ShutdownWatchDogThread(const base::TimeDelta& duration)
726 : base::Watchdog(duration, "Shutdown watchdog thread", true) {
727 }
728
729 // Alarm is called if the time expires after an Arm() without someone calling
730 // Disarm(). We crash the browser if this method is called.
731 virtual void Alarm() {
732 CHECK(false);
733 }
734
735 DISALLOW_COPY_AND_ASSIGN(ShutdownWatchDogThread);
736};
737} // namespace
738
[email protected]b69941522011-10-08 03:17:37739// StartupTimeBomb methods and members.
740//
741// static
742base::Watchdog* StartupTimeBomb::startup_watchdog_ = NULL;
743
744// static
745void StartupTimeBomb::Arm(const base::TimeDelta& duration) {
746 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
747 DCHECK(!startup_watchdog_);
748 startup_watchdog_ = new StartupWatchDogThread(duration);
749 startup_watchdog_->Arm();
750}
751
752// static
753void StartupTimeBomb::Disarm() {
754 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
755 if (startup_watchdog_) {
756 startup_watchdog_->Disarm();
757 // Allow the watchdog thread to shutdown on UI. Watchdog thread shutdowns
758 // very fast.
759 base::ThreadRestrictions::SetIOAllowed(true);
760 delete startup_watchdog_;
761 startup_watchdog_ = NULL;
762 }
763}
764
[email protected]6d823b42011-09-05 02:54:02765// ShutdownWatcherHelper methods and members.
766//
[email protected]b69941522011-10-08 03:17:37767// ShutdownWatcherHelper is a wrapper class for detecting hangs during
[email protected]6d823b42011-09-05 02:54:02768// shutdown.
769ShutdownWatcherHelper::ShutdownWatcherHelper() : shutdown_watchdog_(NULL) {
770}
771
772ShutdownWatcherHelper::~ShutdownWatcherHelper() {
773 if (shutdown_watchdog_) {
774 shutdown_watchdog_->Disarm();
775 delete shutdown_watchdog_;
776 shutdown_watchdog_ = NULL;
777 }
778}
779
780void ShutdownWatcherHelper::Arm(const base::TimeDelta& duration) {
781 DCHECK(!shutdown_watchdog_);
[email protected]6e2c54e2011-09-06 20:35:39782 base::TimeDelta actual_duration = duration;
[email protected]d8c65a82011-09-10 23:17:09783
[email protected]6e2c54e2011-09-06 20:35:39784 chrome::VersionInfo::Channel channel = chrome::VersionInfo::GetChannel();
785 if (channel == chrome::VersionInfo::CHANNEL_STABLE) {
786 actual_duration *= 50;
787 } else if (channel == chrome::VersionInfo::CHANNEL_BETA ||
788 channel == chrome::VersionInfo::CHANNEL_DEV) {
789 actual_duration *= 25;
790 }
[email protected]d8c65a82011-09-10 23:17:09791
792#if defined(OS_WIN)
793 // On Windows XP, give twice the time for shutdown.
794 if (base::win::GetVersion() <= base::win::VERSION_XP)
795 actual_duration *= 2;
796#endif
797
[email protected]6e2c54e2011-09-06 20:35:39798 shutdown_watchdog_ = new ShutdownWatchDogThread(actual_duration);
[email protected]6d823b42011-09-05 02:54:02799 shutdown_watchdog_->Arm();
800}