blob: 4e2f7fb92ef59fbc762113b8987c26d1fe898a44 [file] [log] [blame]
[email protected]623c0bd2011-03-12 01:00:411// Copyright (c) 2011 The Chromium Authors. All rights reserved.
[email protected]e09cee42010-11-09 01:50:082// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#if defined(OS_WIN)
6#include <windows.h>
7#endif
8
[email protected]623c0bd2011-03-12 01:00:419#include "content/gpu/gpu_watchdog_thread.h"
[email protected]e09cee42010-11-09 01:50:0810
11#include "base/compiler_specific.h"
[email protected]f9a7e08f2011-08-18 21:20:1612#include "base/process_util.h"
13#include "base/process.h"
[email protected]e09cee42010-11-09 01:50:0814#include "build/build_config.h"
[email protected]f9a7e08f2011-08-18 21:20:1615#include "content/common/result_codes.h"
[email protected]e09cee42010-11-09 01:50:0816
17namespace {
18const int64 kCheckPeriod = 2000;
[email protected]49eab482010-11-24 00:07:4319
20void DoNothing() {
21}
[email protected]e09cee42010-11-09 01:50:0822}
23
[email protected]981c1c52010-12-01 20:09:2424GpuWatchdogThread::GpuWatchdogThread(int timeout)
[email protected]e09cee42010-11-09 01:50:0825 : base::Thread("Watchdog"),
[email protected]981c1c52010-12-01 20:09:2426 watched_message_loop_(MessageLoop::current()),
[email protected]49eab482010-11-24 00:07:4327 timeout_(timeout),
28 armed_(false),
[email protected]981c1c52010-12-01 20:09:2429#if defined(OS_WIN)
30 watched_thread_handle_(0),
[email protected]995a7f12011-02-11 23:07:1731 arm_cpu_time_(0),
[email protected]981c1c52010-12-01 20:09:2432#endif
[email protected]49eab482010-11-24 00:07:4333 ALLOW_THIS_IN_INITIALIZER_LIST(task_observer_(this)) {
[email protected]e09cee42010-11-09 01:50:0834 DCHECK(timeout >= 0);
[email protected]49eab482010-11-24 00:07:4335
[email protected]981c1c52010-12-01 20:09:2436#if defined(OS_WIN)
37 // GetCurrentThread returns a pseudo-handle that cannot be used by one thread
38 // to identify another. DuplicateHandle creates a "real" handle that can be
39 // used for this purpose.
40 BOOL result = DuplicateHandle(GetCurrentProcess(),
41 GetCurrentThread(),
42 GetCurrentProcess(),
43 &watched_thread_handle_,
44 THREAD_QUERY_INFORMATION,
45 FALSE,
46 0);
47 DCHECK(result);
48#endif
49
[email protected]49eab482010-11-24 00:07:4350 watched_message_loop_->AddTaskObserver(&task_observer_);
[email protected]e09cee42010-11-09 01:50:0851}
52
53GpuWatchdogThread::~GpuWatchdogThread() {
54 // Verify that the thread was explicitly stopped. If the thread is stopped
55 // implicitly by the destructor, CleanUp() will not be called.
56 DCHECK(!method_factory_.get());
[email protected]49eab482010-11-24 00:07:4357
[email protected]981c1c52010-12-01 20:09:2458#if defined(OS_WIN)
59 CloseHandle(watched_thread_handle_);
60#endif
61
[email protected]49eab482010-11-24 00:07:4362 watched_message_loop_->RemoveTaskObserver(&task_observer_);
63}
64
65void GpuWatchdogThread::PostAcknowledge() {
66 // Called on the monitored thread. Responds with OnAcknowledge. Cannot use
67 // the method factory. Rely on reference counting instead.
68 message_loop()->PostTask(
69 FROM_HERE,
70 NewRunnableMethod(this, &GpuWatchdogThread::OnAcknowledge));
[email protected]e09cee42010-11-09 01:50:0871}
72
73void GpuWatchdogThread::Init() {
74 // The method factory must be created on the watchdog thread.
75 method_factory_.reset(new MethodFactory(this));
76
77 // Schedule the first check.
78 OnCheck();
79}
80
81void GpuWatchdogThread::CleanUp() {
82 // The method factory must be destroyed on the watchdog thread.
83 method_factory_->RevokeAll();
84 method_factory_.reset();
[email protected]e09cee42010-11-09 01:50:0885}
86
[email protected]49eab482010-11-24 00:07:4387GpuWatchdogThread::GpuWatchdogTaskObserver::GpuWatchdogTaskObserver(
88 GpuWatchdogThread* watchdog)
[email protected]b224f792011-04-20 16:02:2389 : watchdog_(watchdog) {
[email protected]49eab482010-11-24 00:07:4390}
91
92GpuWatchdogThread::GpuWatchdogTaskObserver::~GpuWatchdogTaskObserver() {
93}
94
95void GpuWatchdogThread::GpuWatchdogTaskObserver::WillProcessTask(
[email protected]b224f792011-04-20 16:02:2396 base::TimeTicks time_posted) {
[email protected]808f7fe72011-03-23 03:49:0297 watchdog_->CheckArmed();
[email protected]49eab482010-11-24 00:07:4398}
99
100void GpuWatchdogThread::GpuWatchdogTaskObserver::DidProcessTask(
[email protected]b224f792011-04-20 16:02:23101 base::TimeTicks time_posted) {
[email protected]808f7fe72011-03-23 03:49:02102 watchdog_->CheckArmed();
[email protected]49eab482010-11-24 00:07:43103}
104
[email protected]b224f792011-04-20 16:02:23105void GpuWatchdogThread::CheckArmed() {
[email protected]49eab482010-11-24 00:07:43106 // Acknowledge the watchdog if it has armed itself. The watchdog will not
107 // change its armed state until it is acknowledged.
[email protected]808f7fe72011-03-23 03:49:02108 if (armed()) {
109 PostAcknowledge();
[email protected]49eab482010-11-24 00:07:43110 }
111}
112
[email protected]e09cee42010-11-09 01:50:08113void GpuWatchdogThread::OnAcknowledge() {
[email protected]49eab482010-11-24 00:07:43114 // The check has already been acknowledged and another has already been
115 // scheduled by a previous call to OnAcknowledge. It is normal for a
116 // watched thread to see armed_ being true multiple times before
117 // the OnAcknowledge task is run on the watchdog thread.
118 if (!armed_)
119 return;
120
[email protected]cff2ac8e2011-02-25 22:08:49121 // Revoke any pending hang termination.
[email protected]e09cee42010-11-09 01:50:08122 method_factory_->RevokeAll();
[email protected]49eab482010-11-24 00:07:43123 armed_ = false;
[email protected]e09cee42010-11-09 01:50:08124
125 // The monitored thread has responded. Post a task to check it again.
[email protected]981c1c52010-12-01 20:09:24126 message_loop()->PostDelayedTask(
127 FROM_HERE,
128 method_factory_->NewRunnableMethod(&GpuWatchdogThread::OnCheck),
129 kCheckPeriod);
[email protected]e09cee42010-11-09 01:50:08130}
131
[email protected]981c1c52010-12-01 20:09:24132#if defined(OS_WIN)
133int64 GpuWatchdogThread::GetWatchedThreadTime() {
134 FILETIME creation_time;
135 FILETIME exit_time;
136 FILETIME user_time;
137 FILETIME kernel_time;
138 BOOL result = GetThreadTimes(watched_thread_handle_,
139 &creation_time,
140 &exit_time,
141 &kernel_time,
142 &user_time);
143 DCHECK(result);
144
145 ULARGE_INTEGER user_time64;
146 user_time64.HighPart = user_time.dwHighDateTime;
147 user_time64.LowPart = user_time.dwLowDateTime;
148
149 ULARGE_INTEGER kernel_time64;
150 kernel_time64.HighPart = kernel_time.dwHighDateTime;
151 kernel_time64.LowPart = kernel_time.dwLowDateTime;
152
153 // Time is reported in units of 100 nanoseconds. Kernel and user time are
154 // summed to deal with to kinds of hangs. One is where the GPU process is
155 // stuck in user level, never calling into the kernel and kernel time is
156 // not increasing. The other is where either the kernel hangs and never
157 // returns to user level or where user level code
158 // calls into kernel level repeatedly, giving up its quanta before it is
159 // tracked, for example a loop that repeatedly Sleeps.
160 return static_cast<int64>(
161 (user_time64.QuadPart + kernel_time64.QuadPart) / 10000);
162}
163#endif
164
[email protected]e09cee42010-11-09 01:50:08165void GpuWatchdogThread::OnCheck() {
[email protected]981c1c52010-12-01 20:09:24166 if (armed_)
167 return;
[email protected]49eab482010-11-24 00:07:43168
[email protected]981c1c52010-12-01 20:09:24169 // Must set armed before posting the task. This task might be the only task
170 // that will activate the TaskObserver on the watched thread and it must not
171 // miss the false -> true transition.
172 armed_ = true;
[email protected]e09cee42010-11-09 01:50:08173
[email protected]981c1c52010-12-01 20:09:24174#if defined(OS_WIN)
[email protected]995a7f12011-02-11 23:07:17175 arm_cpu_time_ = GetWatchedThreadTime();
[email protected]981c1c52010-12-01 20:09:24176#endif
177
[email protected]995a7f12011-02-11 23:07:17178 arm_absolute_time_ = base::Time::Now();
179
[email protected]981c1c52010-12-01 20:09:24180 // Post a task to the monitored thread that does nothing but wake up the
181 // TaskObserver. Any other tasks that are pending on the watched thread will
182 // also wake up the observer. This simply ensures there is at least one.
183 watched_message_loop_->PostTask(
184 FROM_HERE,
185 NewRunnableFunction(DoNothing));
186
187 // Post a task to the watchdog thread to exit if the monitored thread does
188 // not respond in time.
189 message_loop()->PostDelayedTask(
190 FROM_HERE,
[email protected]cff2ac8e2011-02-25 22:08:49191 method_factory_->NewRunnableMethod(
[email protected]f9a7e08f2011-08-18 21:20:16192 &GpuWatchdogThread::DeliberatelyTerminateToRecoverFromHang),
[email protected]981c1c52010-12-01 20:09:24193 timeout_);
[email protected]e09cee42010-11-09 01:50:08194}
195
[email protected]e09cee42010-11-09 01:50:08196// Use the --disable-gpu-watchdog command line switch to disable this.
[email protected]f9a7e08f2011-08-18 21:20:16197void GpuWatchdogThread::DeliberatelyTerminateToRecoverFromHang() {
[email protected]981c1c52010-12-01 20:09:24198#if defined(OS_WIN)
[email protected]995a7f12011-02-11 23:07:17199 // Defer termination until a certain amount of CPU time has elapsed on the
200 // watched thread.
201 int64 time_since_arm = GetWatchedThreadTime() - arm_cpu_time_;
[email protected]981c1c52010-12-01 20:09:24202 if (time_since_arm < timeout_) {
203 message_loop()->PostDelayedTask(
204 FROM_HERE,
[email protected]cff2ac8e2011-02-25 22:08:49205 method_factory_->NewRunnableMethod(
[email protected]f9a7e08f2011-08-18 21:20:16206 &GpuWatchdogThread::DeliberatelyTerminateToRecoverFromHang),
[email protected]981c1c52010-12-01 20:09:24207 timeout_ - time_since_arm);
208 return;
209 }
210#endif
211
[email protected]995a7f12011-02-11 23:07:17212 // If the watchdog woke up significantly behind schedule, disarm and reset
213 // the watchdog check. This is to prevent the watchdog thread from terminating
214 // when a machine wakes up from sleep or hibernation, which would otherwise
215 // appear to be a hang.
216 if ((base::Time::Now() - arm_absolute_time_).InMilliseconds() >
217 timeout_ * 2) {
218 armed_ = false;
219 OnCheck();
220 return;
221 }
222
[email protected]f9a7e08f2011-08-18 21:20:16223 // For minimal developer annoyance, don't keep terminating. You need to skip
224 // the call to base::Process::Terminate below in a debugger for this to be
225 // useful.
226 static bool terminated = false;
227 if (terminated)
[email protected]e09cee42010-11-09 01:50:08228 return;
229
230#if defined(OS_WIN)
231 if (IsDebuggerPresent())
232 return;
233#endif
234
[email protected]e8ea65a2011-01-19 01:24:49235 LOG(ERROR) << "The GPU process hung. Terminating after "
236 << timeout_ << " ms.";
[email protected]e09cee42010-11-09 01:50:08237
[email protected]f9a7e08f2011-08-18 21:20:16238 base::Process current_process(base::GetCurrentProcessHandle());
239 current_process.Terminate(content::RESULT_CODE_HUNG);
[email protected]e09cee42010-11-09 01:50:08240
[email protected]f9a7e08f2011-08-18 21:20:16241 terminated = true;
[email protected]e09cee42010-11-09 01:50:08242}