blob: bb0109706d1f344a10d88abf0bd67393d65050f9 [file] [log] [blame]
[email protected]e09cee42010-11-09 01:50:081// Copyright (c) 2010 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#if defined(OS_WIN)
6#include <windows.h>
7#endif
8
9#include "chrome/gpu/gpu_watchdog_thread.h"
10
11#include "base/compiler_specific.h"
12#include "build/build_config.h"
13
14namespace {
15const int64 kCheckPeriod = 2000;
[email protected]49eab482010-11-24 00:07:4316
17void DoNothing() {
18}
[email protected]e09cee42010-11-09 01:50:0819}
20
[email protected]981c1c52010-12-01 20:09:2421GpuWatchdogThread::GpuWatchdogThread(int timeout)
[email protected]e09cee42010-11-09 01:50:0822 : base::Thread("Watchdog"),
[email protected]981c1c52010-12-01 20:09:2423 watched_message_loop_(MessageLoop::current()),
[email protected]49eab482010-11-24 00:07:4324 timeout_(timeout),
25 armed_(false),
[email protected]981c1c52010-12-01 20:09:2426#if defined(OS_WIN)
27 watched_thread_handle_(0),
[email protected]995a7f12011-02-11 23:07:1728 arm_cpu_time_(0),
[email protected]981c1c52010-12-01 20:09:2429#endif
[email protected]49eab482010-11-24 00:07:4330 ALLOW_THIS_IN_INITIALIZER_LIST(task_observer_(this)) {
[email protected]e09cee42010-11-09 01:50:0831 DCHECK(timeout >= 0);
[email protected]49eab482010-11-24 00:07:4332
[email protected]981c1c52010-12-01 20:09:2433#if defined(OS_WIN)
34 // GetCurrentThread returns a pseudo-handle that cannot be used by one thread
35 // to identify another. DuplicateHandle creates a "real" handle that can be
36 // used for this purpose.
37 BOOL result = DuplicateHandle(GetCurrentProcess(),
38 GetCurrentThread(),
39 GetCurrentProcess(),
40 &watched_thread_handle_,
41 THREAD_QUERY_INFORMATION,
42 FALSE,
43 0);
44 DCHECK(result);
45#endif
46
[email protected]49eab482010-11-24 00:07:4347 watched_message_loop_->AddTaskObserver(&task_observer_);
[email protected]e09cee42010-11-09 01:50:0848}
49
50GpuWatchdogThread::~GpuWatchdogThread() {
51 // Verify that the thread was explicitly stopped. If the thread is stopped
52 // implicitly by the destructor, CleanUp() will not be called.
53 DCHECK(!method_factory_.get());
[email protected]49eab482010-11-24 00:07:4354
[email protected]981c1c52010-12-01 20:09:2455#if defined(OS_WIN)
56 CloseHandle(watched_thread_handle_);
57#endif
58
[email protected]49eab482010-11-24 00:07:4359 watched_message_loop_->RemoveTaskObserver(&task_observer_);
60}
61
62void GpuWatchdogThread::PostAcknowledge() {
63 // Called on the monitored thread. Responds with OnAcknowledge. Cannot use
64 // the method factory. Rely on reference counting instead.
65 message_loop()->PostTask(
66 FROM_HERE,
67 NewRunnableMethod(this, &GpuWatchdogThread::OnAcknowledge));
[email protected]e09cee42010-11-09 01:50:0868}
69
70void GpuWatchdogThread::Init() {
71 // The method factory must be created on the watchdog thread.
72 method_factory_.reset(new MethodFactory(this));
73
74 // Schedule the first check.
75 OnCheck();
76}
77
78void GpuWatchdogThread::CleanUp() {
79 // The method factory must be destroyed on the watchdog thread.
80 method_factory_->RevokeAll();
81 method_factory_.reset();
[email protected]e09cee42010-11-09 01:50:0882}
83
[email protected]49eab482010-11-24 00:07:4384GpuWatchdogThread::GpuWatchdogTaskObserver::GpuWatchdogTaskObserver(
85 GpuWatchdogThread* watchdog)
86 : watchdog_(watchdog) {
87}
88
89GpuWatchdogThread::GpuWatchdogTaskObserver::~GpuWatchdogTaskObserver() {
90}
91
92void GpuWatchdogThread::GpuWatchdogTaskObserver::WillProcessTask(
93 const Task* task)
94{
95 CheckArmed();
96}
97
98void GpuWatchdogThread::GpuWatchdogTaskObserver::DidProcessTask(
99 const Task* task)
100{
101 CheckArmed();
102}
103
104void GpuWatchdogThread::GpuWatchdogTaskObserver::CheckArmed()
105{
106 // Acknowledge the watchdog if it has armed itself. The watchdog will not
107 // change its armed state until it is acknowledged.
108 if (watchdog_->armed()) {
109 watchdog_->PostAcknowledge();
110 }
111}
112
[email protected]e09cee42010-11-09 01:50:08113void GpuWatchdogThread::OnAcknowledge() {
[email protected]49eab482010-11-24 00:07:43114 // The check has already been acknowledged and another has already been
115 // scheduled by a previous call to OnAcknowledge. It is normal for a
116 // watched thread to see armed_ being true multiple times before
117 // the OnAcknowledge task is run on the watchdog thread.
118 if (!armed_)
119 return;
120
[email protected]cff2ac8e2011-02-25 22:08:49121 // Revoke any pending hang termination.
[email protected]e09cee42010-11-09 01:50:08122 method_factory_->RevokeAll();
[email protected]49eab482010-11-24 00:07:43123 armed_ = false;
[email protected]e09cee42010-11-09 01:50:08124
125 // The monitored thread has responded. Post a task to check it again.
[email protected]981c1c52010-12-01 20:09:24126 message_loop()->PostDelayedTask(
127 FROM_HERE,
128 method_factory_->NewRunnableMethod(&GpuWatchdogThread::OnCheck),
129 kCheckPeriod);
[email protected]e09cee42010-11-09 01:50:08130}
131
[email protected]981c1c52010-12-01 20:09:24132#if defined(OS_WIN)
133int64 GpuWatchdogThread::GetWatchedThreadTime() {
134 FILETIME creation_time;
135 FILETIME exit_time;
136 FILETIME user_time;
137 FILETIME kernel_time;
138 BOOL result = GetThreadTimes(watched_thread_handle_,
139 &creation_time,
140 &exit_time,
141 &kernel_time,
142 &user_time);
143 DCHECK(result);
144
145 ULARGE_INTEGER user_time64;
146 user_time64.HighPart = user_time.dwHighDateTime;
147 user_time64.LowPart = user_time.dwLowDateTime;
148
149 ULARGE_INTEGER kernel_time64;
150 kernel_time64.HighPart = kernel_time.dwHighDateTime;
151 kernel_time64.LowPart = kernel_time.dwLowDateTime;
152
153 // Time is reported in units of 100 nanoseconds. Kernel and user time are
154 // summed to deal with to kinds of hangs. One is where the GPU process is
155 // stuck in user level, never calling into the kernel and kernel time is
156 // not increasing. The other is where either the kernel hangs and never
157 // returns to user level or where user level code
158 // calls into kernel level repeatedly, giving up its quanta before it is
159 // tracked, for example a loop that repeatedly Sleeps.
160 return static_cast<int64>(
161 (user_time64.QuadPart + kernel_time64.QuadPart) / 10000);
162}
163#endif
164
[email protected]e09cee42010-11-09 01:50:08165void GpuWatchdogThread::OnCheck() {
[email protected]981c1c52010-12-01 20:09:24166 if (armed_)
167 return;
[email protected]49eab482010-11-24 00:07:43168
[email protected]981c1c52010-12-01 20:09:24169 // Must set armed before posting the task. This task might be the only task
170 // that will activate the TaskObserver on the watched thread and it must not
171 // miss the false -> true transition.
172 armed_ = true;
[email protected]e09cee42010-11-09 01:50:08173
[email protected]981c1c52010-12-01 20:09:24174#if defined(OS_WIN)
[email protected]995a7f12011-02-11 23:07:17175 arm_cpu_time_ = GetWatchedThreadTime();
[email protected]981c1c52010-12-01 20:09:24176#endif
177
[email protected]995a7f12011-02-11 23:07:17178 arm_absolute_time_ = base::Time::Now();
179
[email protected]981c1c52010-12-01 20:09:24180 // Post a task to the monitored thread that does nothing but wake up the
181 // TaskObserver. Any other tasks that are pending on the watched thread will
182 // also wake up the observer. This simply ensures there is at least one.
183 watched_message_loop_->PostTask(
184 FROM_HERE,
185 NewRunnableFunction(DoNothing));
186
187 // Post a task to the watchdog thread to exit if the monitored thread does
188 // not respond in time.
189 message_loop()->PostDelayedTask(
190 FROM_HERE,
[email protected]cff2ac8e2011-02-25 22:08:49191 method_factory_->NewRunnableMethod(
192 &GpuWatchdogThread::DeliberatelyCrashingToRecoverFromHang),
[email protected]981c1c52010-12-01 20:09:24193 timeout_);
[email protected]e09cee42010-11-09 01:50:08194}
195
[email protected]e09cee42010-11-09 01:50:08196// Use the --disable-gpu-watchdog command line switch to disable this.
[email protected]cff2ac8e2011-02-25 22:08:49197void GpuWatchdogThread::DeliberatelyCrashingToRecoverFromHang() {
[email protected]981c1c52010-12-01 20:09:24198#if defined(OS_WIN)
[email protected]995a7f12011-02-11 23:07:17199 // Defer termination until a certain amount of CPU time has elapsed on the
200 // watched thread.
201 int64 time_since_arm = GetWatchedThreadTime() - arm_cpu_time_;
[email protected]981c1c52010-12-01 20:09:24202 if (time_since_arm < timeout_) {
203 message_loop()->PostDelayedTask(
204 FROM_HERE,
[email protected]cff2ac8e2011-02-25 22:08:49205 method_factory_->NewRunnableMethod(
206 &GpuWatchdogThread::DeliberatelyCrashingToRecoverFromHang),
[email protected]981c1c52010-12-01 20:09:24207 timeout_ - time_since_arm);
208 return;
209 }
210#endif
211
[email protected]995a7f12011-02-11 23:07:17212 // If the watchdog woke up significantly behind schedule, disarm and reset
213 // the watchdog check. This is to prevent the watchdog thread from terminating
214 // when a machine wakes up from sleep or hibernation, which would otherwise
215 // appear to be a hang.
216 if ((base::Time::Now() - arm_absolute_time_).InMilliseconds() >
217 timeout_ * 2) {
218 armed_ = false;
219 OnCheck();
220 return;
221 }
222
[email protected]e09cee42010-11-09 01:50:08223 // Make sure the timeout period is on the stack before crashing.
224 volatile int timeout = timeout_;
225
226 // For minimal developer annoyance, don't keep crashing.
227 static bool crashed = false;
228 if (crashed)
229 return;
230
231#if defined(OS_WIN)
232 if (IsDebuggerPresent())
233 return;
234#endif
235
[email protected]e8ea65a2011-01-19 01:24:49236 LOG(ERROR) << "The GPU process hung. Terminating after "
237 << timeout_ << " ms.";
[email protected]e09cee42010-11-09 01:50:08238
239 volatile int* null_pointer = NULL;
240 *null_pointer = timeout;
241
242 crashed = true;
243}