summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--chrome/browser/metrics/thread_watcher.cc105
-rw-r--r--chrome/browser/metrics/thread_watcher.h98
-rw-r--r--chrome/browser/metrics/thread_watcher_unittest.cc95
-rw-r--r--chrome/common/chrome_switches.cc11
-rw-r--r--chrome/common/chrome_switches.h1
5 files changed, 226 insertions, 84 deletions
diff --git a/chrome/browser/metrics/thread_watcher.cc b/chrome/browser/metrics/thread_watcher.cc
index afc84f2..a76e6e3 100644
--- a/chrome/browser/metrics/thread_watcher.cc
+++ b/chrome/browser/metrics/thread_watcher.cc
@@ -11,6 +11,7 @@
#include "base/debug/alias.h"
#include "base/lazy_instance.h"
#include "base/string_split.h"
+#include "base/stringprintf.h"
#include "base/strings/string_number_conversions.h"
#include "base/strings/string_tokenizer.h"
#include "base/threading/thread_restrictions.h"
@@ -408,6 +409,18 @@ const int ThreadWatcherList::kUnresponsiveCount = 9;
// static
const int ThreadWatcherList::kLiveThreadsThreshold = 3;
+ThreadWatcherList::CrashDataThresholds::CrashDataThresholds(
+ uint32 live_threads_threshold,
+ uint32 unresponsive_threshold)
+ : live_threads_threshold(live_threads_threshold),
+ unresponsive_threshold(unresponsive_threshold) {
+}
+
+ThreadWatcherList::CrashDataThresholds::CrashDataThresholds()
+ : live_threads_threshold(kLiveThreadsThreshold),
+ unresponsive_threshold(kUnresponsiveCount) {
+}
+
// static
void ThreadWatcherList::StartWatchingAll(const CommandLine& command_line) {
uint32 unresponsive_threshold;
@@ -498,7 +511,7 @@ void ThreadWatcherList::ParseCommandLine(
const CommandLine& command_line,
uint32* unresponsive_threshold,
CrashOnHangThreadMap* crash_on_hang_threads) {
- // Determine |unresponsive_threshold| based on switches::kCrashOnHangSeconds.
+ // Initialize |unresponsive_threshold| to a default value.
*unresponsive_threshold = kUnresponsiveCount;
// Increase the unresponsive_threshold on the Stable and Beta channels to
@@ -518,47 +531,27 @@ void ThreadWatcherList::ParseCommandLine(
*unresponsive_threshold *= 2;
#endif
- std::string crash_on_hang_seconds =
- command_line.GetSwitchValueASCII(switches::kCrashOnHangSeconds);
- if (!crash_on_hang_seconds.empty()) {
- int crash_seconds = atoi(crash_on_hang_seconds.c_str());
- if (crash_seconds > 0) {
- *unresponsive_threshold = static_cast<uint32>(
- ceil(static_cast<float>(crash_seconds) / kUnresponsiveSeconds));
- }
- }
-
+ uint32 crash_seconds = *unresponsive_threshold * kUnresponsiveSeconds;
std::string crash_on_hang_thread_names;
-
- // Default to crashing the browser if UI or IO threads are not responsive
- // except in stable channel.
- if (channel == chrome::VersionInfo::CHANNEL_STABLE)
- crash_on_hang_thread_names = "";
- else
- crash_on_hang_thread_names = "UI:3,IO:9";
-
bool has_command_line_overwrite = false;
if (command_line.HasSwitch(switches::kCrashOnHangThreads)) {
crash_on_hang_thread_names =
command_line.GetSwitchValueASCII(switches::kCrashOnHangThreads);
has_command_line_overwrite = true;
+ } else if (channel != chrome::VersionInfo::CHANNEL_STABLE) {
+ // Default to crashing the browser if UI or IO or FILE threads are not
+ // responsive except in stable channel.
+ crash_on_hang_thread_names = base::StringPrintf(
+ "UI:%d:%d,IO:%d:%d,FILE:%d:%d",
+ kLiveThreadsThreshold, crash_seconds,
+ kLiveThreadsThreshold, crash_seconds,
+ kLiveThreadsThreshold, crash_seconds * 5);
}
- base::StringTokenizer tokens(crash_on_hang_thread_names, ",");
- std::vector<std::string> values;
- while (tokens.GetNext()) {
- const std::string& token = tokens.token();
- base::SplitString(token, ':', &values);
- if (values.size() != 2)
- continue;
- std::string thread_name = values[0];
- uint32 live_threads_threshold;
- if (!base::StringToUint(values[1], &live_threads_threshold))
- continue;
- CrashOnHangThreadMap::iterator it =
- crash_on_hang_threads->find(thread_name);
- if (crash_on_hang_threads->end() == it)
- (*crash_on_hang_threads)[thread_name] = live_threads_threshold;
- }
+
+ ParseCommandLineCrashOnHangThreads(crash_on_hang_thread_names,
+ kLiveThreadsThreshold,
+ crash_seconds,
+ crash_on_hang_threads);
if (channel != chrome::VersionInfo::CHANNEL_CANARY ||
has_command_line_overwrite) {
@@ -576,8 +569,43 @@ void ThreadWatcherList::ParseCommandLine(
"ThreadWatcher", 100, "default_hung_threads",
2013, 10, 30, NULL));
int io_hung_thread_group = field_trial->AppendGroup("io_hung_thread", 10);
- if (field_trial->group() == io_hung_thread_group)
- it->second = INT_MAX; // Crash anytime IO thread hangs.
+ if (field_trial->group() == io_hung_thread_group) {
+ // Crash anytime IO thread hangs.
+ it->second.live_threads_threshold = INT_MAX;
+ }
+}
+
+// static
+void ThreadWatcherList::ParseCommandLineCrashOnHangThreads(
+ const std::string& crash_on_hang_thread_names,
+ uint32 default_live_threads_threshold,
+ uint32 default_crash_seconds,
+ CrashOnHangThreadMap* crash_on_hang_threads) {
+ base::StringTokenizer tokens(crash_on_hang_thread_names, ",");
+ std::vector<std::string> values;
+ while (tokens.GetNext()) {
+ const std::string& token = tokens.token();
+ base::SplitString(token, ':', &values);
+ std::string thread_name = values[0];
+
+ uint32 live_threads_threshold = default_live_threads_threshold;
+ uint32 crash_seconds = default_crash_seconds;
+ if (values.size() >= 2 &&
+ (!base::StringToUint(values[1], &live_threads_threshold))) {
+ continue;
+ }
+ if (values.size() >= 3 &&
+ (!base::StringToUint(values[2], &crash_seconds))) {
+ continue;
+ }
+ uint32 unresponsive_threshold = static_cast<uint32>(
+ ceil(static_cast<float>(crash_seconds) / kUnresponsiveSeconds));
+
+ CrashDataThresholds crash_data(live_threads_threshold,
+ unresponsive_threshold);
+ // Use the last specifier.
+ (*crash_on_hang_threads)[thread_name] = crash_data;
+ }
}
// static
@@ -630,7 +658,8 @@ void ThreadWatcherList::StartWatching(
uint32 live_threads_threshold = 0;
if (it != crash_on_hang_threads.end()) {
crash_on_hang = true;
- live_threads_threshold = it->second;
+ live_threads_threshold = it->second.live_threads_threshold;
+ unresponsive_threshold = it->second.unresponsive_threshold;
}
ThreadWatcher::StartWatching(
diff --git a/chrome/browser/metrics/thread_watcher.h b/chrome/browser/metrics/thread_watcher.h
index c9f824b..39d0468 100644
--- a/chrome/browser/metrics/thread_watcher.h
+++ b/chrome/browser/metrics/thread_watcher.h
@@ -308,10 +308,22 @@ class ThreadWatcherList {
// A map from BrowserThread to the actual instances.
typedef std::map<content::BrowserThread::ID, ThreadWatcher*> RegistrationList;
- // A map from thread names (UI, IO, etc) to |live_threads_threshold|.
+ // A map from thread names (UI, IO, etc) to |CrashDataThresholds|.
// |live_threads_threshold| specifies the maximum number of browser threads
// that have to be responsive when we want to crash the browser because of
- // hung watched thread.
+ // hung watched thread. This threshold allows us to either look for a system
+ // deadlock, or look for a solo hung thread. A small live_threads_threshold
+ // looks for a broad deadlock (few browser threads left running), and a large
+ // threshold looks for a single hung thread (this in only appropriate for a
+ // thread that *should* never have much jank, such as the IO).
+ //
+ // |unresponsive_threshold| specifies the number of unanswered ping messages
+ // after which watched (UI, IO, etc) thread is considered as not responsive.
+ // We translate "time" (given in seconds) into a number of pings. As a result,
+ // we only declare a thread unresponsive when a lot of "time" has passed (many
+ // pings), and yet our pinging thread has continued to process messages (so we
+ // know the entire PC is not hung). Set this number higher to crash less
+ // often, and lower to crash more often.
//
// The map lists all threads (by name) that can induce a crash by hanging. It
// is populated from the command line, or given a default list. See
@@ -319,18 +331,52 @@ class ThreadWatcherList {
// watched, as they provide the system context of how hung *other* threads
// are.
//
- // Example 1: If the value for "IO" was 3, then we would crash if at least one
- // thread is responding and total responding threads is less than or equal to
- // 3 (this thread, plus at least one other thread is unresponsive). We would
- // not crash if none of the threads are not responding, as we'd assume such
- // large hang counts mean that the system is generally unresponsive.
- // Example 2: If the value for "UI" was INT_MAX, then we would always crash if
- // the UI thread was hung, no matter what the other threads are doing.
- // Example 3: If the value of "FILE" was 5, then we would only crash if the
- // FILE thread was the ONLY hung thread (because we watch 6 threads). IF there
- // was another unresponsive thread, we would not consider this a problem worth
- // crashing for.
- typedef std::map<std::string, uint32> CrashOnHangThreadMap;
+ // ThreadWatcher monitors six browser threads (i.e., UI, IO, DB, FILE,
+ // FILE_USER_BLOCKING and CACHE). Out of the 6 threads, any subset may be
+ // watched, to potentially cause a crash. The following example's command line
+ // causes exactly 3 threads to be watched.
+ //
+ // The example command line argument consists of "UI:3:18,IO:3:18,FILE:5:90".
+ // In that string, the first parameter specifies the thread_id: UI, IO or
+ // FILE. The second parameter specifies |live_threads_threshold|. For UI and
+ // IO threads, we would crash if the number of threads responding is less than
+ // or equal to 3. The third parameter specifies the unresponsive threshold
+ // seconds. This number is used to calculate |unresponsive_threshold|. In this
+ // example for UI and IO threads, we would crash if those threads don't
+ // respond for 18 seconds (or 9 unanswered ping messages) and for FILE thread,
+ // crash_seconds is set to 90 seconds (or 45 unanswered ping messages).
+ //
+ // The following examples explain how the data in |CrashDataThresholds|
+ // controls the crashes.
+ //
+ // Example 1: If the |live_threads_threshold| value for "IO" was 3 and
+ // unresponsive threshold seconds is 18 (or |unresponsive_threshold| is 9),
+ // then we would crash if the IO thread was hung (9 unanswered ping messages)
+ // and if at least one thread is responding and total responding threads is
+ // less than or equal to 3 (this thread, plus at least one other thread is
+ // unresponsive). We would not crash if none of the threads are responding, as
+ // we'd assume such large hang counts mean that the system is generally
+ // unresponsive.
+ // Example 2: If the |live_threads_threshold| value for "UI" was any number
+ // higher than 6 and unresponsive threshold seconds is 18 (or
+ // |unresponsive_threshold| is 9), then we would always crash if the UI thread
+ // was hung (9 unanswered ping messages), no matter what the other threads are
+ // doing.
+ // Example 3: If the |live_threads_threshold| value of "FILE" was 5 and
+ // unresponsive threshold seconds is 90 (or |unresponsive_threshold| is 45),
+ // then we would only crash if the FILE thread was the ONLY hung thread
+ // (because we watch 6 threads). If there was another unresponsive thread, we
+ // would not consider this a problem worth crashing for. FILE thread would be
+ // considered as hung if it didn't respond for 45 ping messages.
+ struct CrashDataThresholds {
+ CrashDataThresholds(uint32 live_threads_threshold,
+ uint32 unresponsive_threshold);
+ CrashDataThresholds();
+
+ uint32 live_threads_threshold;
+ uint32 unresponsive_threshold;
+ };
+ typedef std::map<std::string, CrashDataThresholds> CrashOnHangThreadMap;
// This method posts a task on WatchDogThread to start watching all browser
// threads.
@@ -361,7 +407,9 @@ class ThreadWatcherList {
// Allow tests to access our innards for testing purposes.
friend class CustomThreadWatcher;
friend class ThreadWatcherTest;
- FRIEND_TEST_ALL_PREFIXES(ThreadWatcherTest, CommandLineArgs);
+ FRIEND_TEST_ALL_PREFIXES(ThreadWatcherTest, ThreadNamesOnlyArgs);
+ FRIEND_TEST_ALL_PREFIXES(ThreadWatcherTest, ThreadNamesAndLiveThresholdArgs);
+ FRIEND_TEST_ALL_PREFIXES(ThreadWatcherTest, CrashOnHangThreadsAllArgs);
// This singleton holds the global list of registered ThreadWatchers.
ThreadWatcherList();
@@ -369,16 +417,26 @@ class ThreadWatcherList {
// Destructor deletes all registered ThreadWatcher instances.
virtual ~ThreadWatcherList();
- // Parses the command line to get |unresponsive_threshold| from
- // switches::kCrashOnHangSeconds, |crash_on_hang| thread names from
- // switches::kCrashOnHangThreads and |live_threads_threshold| from
- // switches::kCrashOnLive. |crash_on_hang_threads| is a map of
- // |crash_on_hang| thread's names to |live_threads_threshold|.
+ // Parses the command line to get |crash_on_hang_threads| map from
+ // switches::kCrashOnHangThreads. |crash_on_hang_threads| is a map of
+ // |crash_on_hang| thread's names to |CrashDataThresholds|.
static void ParseCommandLine(
const CommandLine& command_line,
uint32* unresponsive_threshold,
CrashOnHangThreadMap* crash_on_hang_threads);
+ // Parses the argument |crash_on_hang_thread_names| and creates
+ // |crash_on_hang_threads| map of |crash_on_hang| thread's names to
+ // |CrashDataThresholds|. If |crash_on_hang_thread_names| doesn't specify
+ // |live_threads_threshold|, then it uses |default_live_threads_threshold| as
+ // the value. If |crash_on_hang_thread_names| doesn't specify |crash_seconds|,
+ // then it uses |default_crash_seconds| as the value.
+ static void ParseCommandLineCrashOnHangThreads(
+ const std::string& crash_on_hang_thread_names,
+ uint32 default_live_threads_threshold,
+ uint32 default_crash_seconds,
+ CrashOnHangThreadMap* crash_on_hang_threads);
+
// This constructs the |ThreadWatcherList| singleton and starts watching
// browser threads by calling StartWatching() on each browser thread that is
// watched. It disarms StartupTimeBomb.
diff --git a/chrome/browser/metrics/thread_watcher_unittest.cc b/chrome/browser/metrics/thread_watcher_unittest.cc
index e097aee..529358d 100644
--- a/chrome/browser/metrics/thread_watcher_unittest.cc
+++ b/chrome/browser/metrics/thread_watcher_unittest.cc
@@ -242,6 +242,8 @@ class ThreadWatcherTest : public ::testing::Test {
static const std::string webkit_thread_name;
static const std::string crash_on_hang_seconds;
static const std::string crash_on_hang_thread_names;
+ static const std::string thread_names_and_live_threshold;
+ static const std::string crash_on_hang_thread_data;
CustomThreadWatcher* io_watcher_;
CustomThreadWatcher* webkit_watcher_;
ThreadWatcherList* thread_watcher_list_;
@@ -326,47 +328,106 @@ const std::string ThreadWatcherTest::io_thread_name = "IO";
const BrowserThread::ID ThreadWatcherTest::webkit_thread_id =
BrowserThread::WEBKIT_DEPRECATED;
const std::string ThreadWatcherTest::webkit_thread_name = "WEBKIT";
-const std::string ThreadWatcherTest::crash_on_hang_seconds = "24";
-const std::string ThreadWatcherTest::crash_on_hang_thread_names = "IO:3,UI:3";
+const std::string ThreadWatcherTest::crash_on_hang_thread_names = "UI,IO";
+const std::string ThreadWatcherTest::thread_names_and_live_threshold =
+ "UI:4,IO:4";
+const std::string ThreadWatcherTest::crash_on_hang_thread_data =
+ "UI:5:12,IO:5:12,FILE:5:12";
-TEST_F(ThreadWatcherTest, CommandLineArgs) {
+TEST_F(ThreadWatcherTest, ThreadNamesOnlyArgs) {
// Setup command_line arguments.
CommandLine command_line(CommandLine::NO_PROGRAM);
- command_line.AppendSwitchASCII(switches::kCrashOnHangSeconds,
- crash_on_hang_seconds);
command_line.AppendSwitchASCII(switches::kCrashOnHangThreads,
crash_on_hang_thread_names);
// Parse command_line arguments.
+ ThreadWatcherList::CrashOnHangThreadMap crash_on_hang_threads;
uint32 unresponsive_threshold;
+ ThreadWatcherList::ParseCommandLine(command_line,
+ &unresponsive_threshold,
+ &crash_on_hang_threads);
+
+ // Verify the data.
+ base::StringTokenizer tokens(crash_on_hang_thread_names, ",");
+ std::vector<std::string> values;
+ while (tokens.GetNext()) {
+ const std::string& token = tokens.token();
+ base::SplitString(token, ':', &values);
+ std::string thread_name = values[0];
+
+ ThreadWatcherList::CrashOnHangThreadMap::iterator it =
+ crash_on_hang_threads.find(thread_name);
+ bool crash_on_hang = (it != crash_on_hang_threads.end());
+ EXPECT_TRUE(crash_on_hang);
+ EXPECT_LT(0u, it->second.live_threads_threshold);
+ EXPECT_LT(0u, it->second.unresponsive_threshold);
+ }
+}
+
+TEST_F(ThreadWatcherTest, ThreadNamesAndLiveThresholdArgs) {
+ // Setup command_line arguments.
+ CommandLine command_line(CommandLine::NO_PROGRAM);
+ command_line.AppendSwitchASCII(switches::kCrashOnHangThreads,
+ thread_names_and_live_threshold);
+
+ // Parse command_line arguments.
ThreadWatcherList::CrashOnHangThreadMap crash_on_hang_threads;
+ uint32 unresponsive_threshold;
ThreadWatcherList::ParseCommandLine(command_line,
&unresponsive_threshold,
&crash_on_hang_threads);
// Verify the data.
- uint32 crash_on_unresponsive_seconds =
- ThreadWatcherList::kUnresponsiveSeconds * unresponsive_threshold;
- EXPECT_EQ(static_cast<int>(crash_on_unresponsive_seconds),
- atoi(crash_on_hang_seconds.c_str()));
+ base::StringTokenizer tokens(thread_names_and_live_threshold, ",");
+ std::vector<std::string> values;
+ while (tokens.GetNext()) {
+ const std::string& token = tokens.token();
+ base::SplitString(token, ':', &values);
+ std::string thread_name = values[0];
- // Check ThreadWatcherTestList has the right crash_on_hang_thread_names.
- base::StringTokenizer tokens(crash_on_hang_thread_names, ",");
+ ThreadWatcherList::CrashOnHangThreadMap::iterator it =
+ crash_on_hang_threads.find(thread_name);
+ bool crash_on_hang = (it != crash_on_hang_threads.end());
+ EXPECT_TRUE(crash_on_hang);
+ EXPECT_EQ(4u, it->second.live_threads_threshold);
+ EXPECT_LT(0u, it->second.unresponsive_threshold);
+ }
+}
+
+TEST_F(ThreadWatcherTest, CrashOnHangThreadsAllArgs) {
+ // Setup command_line arguments.
+ CommandLine command_line(CommandLine::NO_PROGRAM);
+ command_line.AppendSwitchASCII(switches::kCrashOnHangThreads,
+ crash_on_hang_thread_data);
+
+ // Parse command_line arguments.
+ ThreadWatcherList::CrashOnHangThreadMap crash_on_hang_threads;
+ uint32 unresponsive_threshold;
+ ThreadWatcherList::ParseCommandLine(command_line,
+ &unresponsive_threshold,
+ &crash_on_hang_threads);
+
+ // Verify the data.
+ base::StringTokenizer tokens(crash_on_hang_thread_data, ",");
std::vector<std::string> values;
while (tokens.GetNext()) {
const std::string& token = tokens.token();
base::SplitString(token, ':', &values);
- if (values.size() != 2)
- continue;
std::string thread_name = values[0];
- uint32 live_threads_threshold;
- if (!base::StringToUint(values[1], &live_threads_threshold))
- continue;
+
ThreadWatcherList::CrashOnHangThreadMap::iterator it =
crash_on_hang_threads.find(thread_name);
+
bool crash_on_hang = (it != crash_on_hang_threads.end());
EXPECT_TRUE(crash_on_hang);
- EXPECT_EQ(it->second, live_threads_threshold);
+
+ uint32 crash_live_threads_threshold = it->second.live_threads_threshold;
+ EXPECT_EQ(5u, crash_live_threads_threshold);
+
+ uint32 crash_unresponsive_threshold = it->second.unresponsive_threshold;
+ uint32 crash_on_unresponsive_seconds =
+ ThreadWatcherList::kUnresponsiveSeconds * crash_unresponsive_threshold;
+ EXPECT_EQ(12u, crash_on_unresponsive_seconds);
}
}
diff --git a/chrome/common/chrome_switches.cc b/chrome/common/chrome_switches.cc
index 5a301af..e9e2a06 100644
--- a/chrome/common/chrome_switches.cc
+++ b/chrome/common/chrome_switches.cc
@@ -202,19 +202,14 @@ const char kContentSettings2[] = "new-content-settings";
// string value, the 2 letter code from ISO 3166-1.
const char kCountry[] = "country";
-// Causes the browser process to crash if browser threads are not responding
-// for the given number of seconds.
-const char kCrashOnHangSeconds[] = "crash-on-hang-seconds";
-
// Comma-separated list of BrowserThreads that cause browser process to crash
// if the given browser thread is not responsive. UI,IO,DB,FILE,CACHE are the
// list of BrowserThreads that are supported.
//
// For example:
-// --crash-on-hang-threads=UI:3,IO:3 --> Crash the browser if UI or IO thread
-// is not responsive and the number of
-// browser threads that are responding
-// is less than or equal to 3.
+// --crash-on-hang-threads=UI:3:18,IO:3:18 --> Crash the browser if UI or IO
+// is not responsive for 18 seconds and the number of browser threads that
+// are responding is less than or equal to 3.
const char kCrashOnHangThreads[] = "crash-on-hang-threads";
// Some platforms like ChromeOS default to empty desktop.
diff --git a/chrome/common/chrome_switches.h b/chrome/common/chrome_switches.h
index 9d7f1df..df5c0c3 100644
--- a/chrome/common/chrome_switches.h
+++ b/chrome/common/chrome_switches.h
@@ -69,7 +69,6 @@ extern const char kComponentUpdaterDebug[];
extern const char kConflictingModulesCheck[];
extern const char kContentSettings2[];
extern const char kCountry[];
-extern const char kCrashOnHangSeconds[];
extern const char kCrashOnHangThreads[];
extern const char kCreateBrowserOnStartupForTests[];
extern const char kDebugEnableFrameToggle[];