diff options
-rw-r--r-- | chrome/browser/metrics/thread_watcher.cc | 105 | ||||
-rw-r--r-- | chrome/browser/metrics/thread_watcher.h | 98 | ||||
-rw-r--r-- | chrome/browser/metrics/thread_watcher_unittest.cc | 95 | ||||
-rw-r--r-- | chrome/common/chrome_switches.cc | 11 | ||||
-rw-r--r-- | chrome/common/chrome_switches.h | 1 |
5 files changed, 226 insertions, 84 deletions
diff --git a/chrome/browser/metrics/thread_watcher.cc b/chrome/browser/metrics/thread_watcher.cc index afc84f2..a76e6e3 100644 --- a/chrome/browser/metrics/thread_watcher.cc +++ b/chrome/browser/metrics/thread_watcher.cc @@ -11,6 +11,7 @@ #include "base/debug/alias.h" #include "base/lazy_instance.h" #include "base/string_split.h" +#include "base/stringprintf.h" #include "base/strings/string_number_conversions.h" #include "base/strings/string_tokenizer.h" #include "base/threading/thread_restrictions.h" @@ -408,6 +409,18 @@ const int ThreadWatcherList::kUnresponsiveCount = 9; // static const int ThreadWatcherList::kLiveThreadsThreshold = 3; +ThreadWatcherList::CrashDataThresholds::CrashDataThresholds( + uint32 live_threads_threshold, + uint32 unresponsive_threshold) + : live_threads_threshold(live_threads_threshold), + unresponsive_threshold(unresponsive_threshold) { +} + +ThreadWatcherList::CrashDataThresholds::CrashDataThresholds() + : live_threads_threshold(kLiveThreadsThreshold), + unresponsive_threshold(kUnresponsiveCount) { +} + // static void ThreadWatcherList::StartWatchingAll(const CommandLine& command_line) { uint32 unresponsive_threshold; @@ -498,7 +511,7 @@ void ThreadWatcherList::ParseCommandLine( const CommandLine& command_line, uint32* unresponsive_threshold, CrashOnHangThreadMap* crash_on_hang_threads) { - // Determine |unresponsive_threshold| based on switches::kCrashOnHangSeconds. + // Initialize |unresponsive_threshold| to a default value. *unresponsive_threshold = kUnresponsiveCount; // Increase the unresponsive_threshold on the Stable and Beta channels to @@ -518,47 +531,27 @@ void ThreadWatcherList::ParseCommandLine( *unresponsive_threshold *= 2; #endif - std::string crash_on_hang_seconds = - command_line.GetSwitchValueASCII(switches::kCrashOnHangSeconds); - if (!crash_on_hang_seconds.empty()) { - int crash_seconds = atoi(crash_on_hang_seconds.c_str()); - if (crash_seconds > 0) { - *unresponsive_threshold = static_cast<uint32>( - ceil(static_cast<float>(crash_seconds) / kUnresponsiveSeconds)); - } - } - + uint32 crash_seconds = *unresponsive_threshold * kUnresponsiveSeconds; std::string crash_on_hang_thread_names; - - // Default to crashing the browser if UI or IO threads are not responsive - // except in stable channel. - if (channel == chrome::VersionInfo::CHANNEL_STABLE) - crash_on_hang_thread_names = ""; - else - crash_on_hang_thread_names = "UI:3,IO:9"; - bool has_command_line_overwrite = false; if (command_line.HasSwitch(switches::kCrashOnHangThreads)) { crash_on_hang_thread_names = command_line.GetSwitchValueASCII(switches::kCrashOnHangThreads); has_command_line_overwrite = true; + } else if (channel != chrome::VersionInfo::CHANNEL_STABLE) { + // Default to crashing the browser if UI or IO or FILE threads are not + // responsive except in stable channel. + crash_on_hang_thread_names = base::StringPrintf( + "UI:%d:%d,IO:%d:%d,FILE:%d:%d", + kLiveThreadsThreshold, crash_seconds, + kLiveThreadsThreshold, crash_seconds, + kLiveThreadsThreshold, crash_seconds * 5); } - base::StringTokenizer tokens(crash_on_hang_thread_names, ","); - std::vector<std::string> values; - while (tokens.GetNext()) { - const std::string& token = tokens.token(); - base::SplitString(token, ':', &values); - if (values.size() != 2) - continue; - std::string thread_name = values[0]; - uint32 live_threads_threshold; - if (!base::StringToUint(values[1], &live_threads_threshold)) - continue; - CrashOnHangThreadMap::iterator it = - crash_on_hang_threads->find(thread_name); - if (crash_on_hang_threads->end() == it) - (*crash_on_hang_threads)[thread_name] = live_threads_threshold; - } + + ParseCommandLineCrashOnHangThreads(crash_on_hang_thread_names, + kLiveThreadsThreshold, + crash_seconds, + crash_on_hang_threads); if (channel != chrome::VersionInfo::CHANNEL_CANARY || has_command_line_overwrite) { @@ -576,8 +569,43 @@ void ThreadWatcherList::ParseCommandLine( "ThreadWatcher", 100, "default_hung_threads", 2013, 10, 30, NULL)); int io_hung_thread_group = field_trial->AppendGroup("io_hung_thread", 10); - if (field_trial->group() == io_hung_thread_group) - it->second = INT_MAX; // Crash anytime IO thread hangs. + if (field_trial->group() == io_hung_thread_group) { + // Crash anytime IO thread hangs. + it->second.live_threads_threshold = INT_MAX; + } +} + +// static +void ThreadWatcherList::ParseCommandLineCrashOnHangThreads( + const std::string& crash_on_hang_thread_names, + uint32 default_live_threads_threshold, + uint32 default_crash_seconds, + CrashOnHangThreadMap* crash_on_hang_threads) { + base::StringTokenizer tokens(crash_on_hang_thread_names, ","); + std::vector<std::string> values; + while (tokens.GetNext()) { + const std::string& token = tokens.token(); + base::SplitString(token, ':', &values); + std::string thread_name = values[0]; + + uint32 live_threads_threshold = default_live_threads_threshold; + uint32 crash_seconds = default_crash_seconds; + if (values.size() >= 2 && + (!base::StringToUint(values[1], &live_threads_threshold))) { + continue; + } + if (values.size() >= 3 && + (!base::StringToUint(values[2], &crash_seconds))) { + continue; + } + uint32 unresponsive_threshold = static_cast<uint32>( + ceil(static_cast<float>(crash_seconds) / kUnresponsiveSeconds)); + + CrashDataThresholds crash_data(live_threads_threshold, + unresponsive_threshold); + // Use the last specifier. + (*crash_on_hang_threads)[thread_name] = crash_data; + } } // static @@ -630,7 +658,8 @@ void ThreadWatcherList::StartWatching( uint32 live_threads_threshold = 0; if (it != crash_on_hang_threads.end()) { crash_on_hang = true; - live_threads_threshold = it->second; + live_threads_threshold = it->second.live_threads_threshold; + unresponsive_threshold = it->second.unresponsive_threshold; } ThreadWatcher::StartWatching( diff --git a/chrome/browser/metrics/thread_watcher.h b/chrome/browser/metrics/thread_watcher.h index c9f824b..39d0468 100644 --- a/chrome/browser/metrics/thread_watcher.h +++ b/chrome/browser/metrics/thread_watcher.h @@ -308,10 +308,22 @@ class ThreadWatcherList { // A map from BrowserThread to the actual instances. typedef std::map<content::BrowserThread::ID, ThreadWatcher*> RegistrationList; - // A map from thread names (UI, IO, etc) to |live_threads_threshold|. + // A map from thread names (UI, IO, etc) to |CrashDataThresholds|. // |live_threads_threshold| specifies the maximum number of browser threads // that have to be responsive when we want to crash the browser because of - // hung watched thread. + // hung watched thread. This threshold allows us to either look for a system + // deadlock, or look for a solo hung thread. A small live_threads_threshold + // looks for a broad deadlock (few browser threads left running), and a large + // threshold looks for a single hung thread (this in only appropriate for a + // thread that *should* never have much jank, such as the IO). + // + // |unresponsive_threshold| specifies the number of unanswered ping messages + // after which watched (UI, IO, etc) thread is considered as not responsive. + // We translate "time" (given in seconds) into a number of pings. As a result, + // we only declare a thread unresponsive when a lot of "time" has passed (many + // pings), and yet our pinging thread has continued to process messages (so we + // know the entire PC is not hung). Set this number higher to crash less + // often, and lower to crash more often. // // The map lists all threads (by name) that can induce a crash by hanging. It // is populated from the command line, or given a default list. See @@ -319,18 +331,52 @@ class ThreadWatcherList { // watched, as they provide the system context of how hung *other* threads // are. // - // Example 1: If the value for "IO" was 3, then we would crash if at least one - // thread is responding and total responding threads is less than or equal to - // 3 (this thread, plus at least one other thread is unresponsive). We would - // not crash if none of the threads are not responding, as we'd assume such - // large hang counts mean that the system is generally unresponsive. - // Example 2: If the value for "UI" was INT_MAX, then we would always crash if - // the UI thread was hung, no matter what the other threads are doing. - // Example 3: If the value of "FILE" was 5, then we would only crash if the - // FILE thread was the ONLY hung thread (because we watch 6 threads). IF there - // was another unresponsive thread, we would not consider this a problem worth - // crashing for. - typedef std::map<std::string, uint32> CrashOnHangThreadMap; + // ThreadWatcher monitors six browser threads (i.e., UI, IO, DB, FILE, + // FILE_USER_BLOCKING and CACHE). Out of the 6 threads, any subset may be + // watched, to potentially cause a crash. The following example's command line + // causes exactly 3 threads to be watched. + // + // The example command line argument consists of "UI:3:18,IO:3:18,FILE:5:90". + // In that string, the first parameter specifies the thread_id: UI, IO or + // FILE. The second parameter specifies |live_threads_threshold|. For UI and + // IO threads, we would crash if the number of threads responding is less than + // or equal to 3. The third parameter specifies the unresponsive threshold + // seconds. This number is used to calculate |unresponsive_threshold|. In this + // example for UI and IO threads, we would crash if those threads don't + // respond for 18 seconds (or 9 unanswered ping messages) and for FILE thread, + // crash_seconds is set to 90 seconds (or 45 unanswered ping messages). + // + // The following examples explain how the data in |CrashDataThresholds| + // controls the crashes. + // + // Example 1: If the |live_threads_threshold| value for "IO" was 3 and + // unresponsive threshold seconds is 18 (or |unresponsive_threshold| is 9), + // then we would crash if the IO thread was hung (9 unanswered ping messages) + // and if at least one thread is responding and total responding threads is + // less than or equal to 3 (this thread, plus at least one other thread is + // unresponsive). We would not crash if none of the threads are responding, as + // we'd assume such large hang counts mean that the system is generally + // unresponsive. + // Example 2: If the |live_threads_threshold| value for "UI" was any number + // higher than 6 and unresponsive threshold seconds is 18 (or + // |unresponsive_threshold| is 9), then we would always crash if the UI thread + // was hung (9 unanswered ping messages), no matter what the other threads are + // doing. + // Example 3: If the |live_threads_threshold| value of "FILE" was 5 and + // unresponsive threshold seconds is 90 (or |unresponsive_threshold| is 45), + // then we would only crash if the FILE thread was the ONLY hung thread + // (because we watch 6 threads). If there was another unresponsive thread, we + // would not consider this a problem worth crashing for. FILE thread would be + // considered as hung if it didn't respond for 45 ping messages. + struct CrashDataThresholds { + CrashDataThresholds(uint32 live_threads_threshold, + uint32 unresponsive_threshold); + CrashDataThresholds(); + + uint32 live_threads_threshold; + uint32 unresponsive_threshold; + }; + typedef std::map<std::string, CrashDataThresholds> CrashOnHangThreadMap; // This method posts a task on WatchDogThread to start watching all browser // threads. @@ -361,7 +407,9 @@ class ThreadWatcherList { // Allow tests to access our innards for testing purposes. friend class CustomThreadWatcher; friend class ThreadWatcherTest; - FRIEND_TEST_ALL_PREFIXES(ThreadWatcherTest, CommandLineArgs); + FRIEND_TEST_ALL_PREFIXES(ThreadWatcherTest, ThreadNamesOnlyArgs); + FRIEND_TEST_ALL_PREFIXES(ThreadWatcherTest, ThreadNamesAndLiveThresholdArgs); + FRIEND_TEST_ALL_PREFIXES(ThreadWatcherTest, CrashOnHangThreadsAllArgs); // This singleton holds the global list of registered ThreadWatchers. ThreadWatcherList(); @@ -369,16 +417,26 @@ class ThreadWatcherList { // Destructor deletes all registered ThreadWatcher instances. virtual ~ThreadWatcherList(); - // Parses the command line to get |unresponsive_threshold| from - // switches::kCrashOnHangSeconds, |crash_on_hang| thread names from - // switches::kCrashOnHangThreads and |live_threads_threshold| from - // switches::kCrashOnLive. |crash_on_hang_threads| is a map of - // |crash_on_hang| thread's names to |live_threads_threshold|. + // Parses the command line to get |crash_on_hang_threads| map from + // switches::kCrashOnHangThreads. |crash_on_hang_threads| is a map of + // |crash_on_hang| thread's names to |CrashDataThresholds|. static void ParseCommandLine( const CommandLine& command_line, uint32* unresponsive_threshold, CrashOnHangThreadMap* crash_on_hang_threads); + // Parses the argument |crash_on_hang_thread_names| and creates + // |crash_on_hang_threads| map of |crash_on_hang| thread's names to + // |CrashDataThresholds|. If |crash_on_hang_thread_names| doesn't specify + // |live_threads_threshold|, then it uses |default_live_threads_threshold| as + // the value. If |crash_on_hang_thread_names| doesn't specify |crash_seconds|, + // then it uses |default_crash_seconds| as the value. + static void ParseCommandLineCrashOnHangThreads( + const std::string& crash_on_hang_thread_names, + uint32 default_live_threads_threshold, + uint32 default_crash_seconds, + CrashOnHangThreadMap* crash_on_hang_threads); + // This constructs the |ThreadWatcherList| singleton and starts watching // browser threads by calling StartWatching() on each browser thread that is // watched. It disarms StartupTimeBomb. diff --git a/chrome/browser/metrics/thread_watcher_unittest.cc b/chrome/browser/metrics/thread_watcher_unittest.cc index e097aee..529358d 100644 --- a/chrome/browser/metrics/thread_watcher_unittest.cc +++ b/chrome/browser/metrics/thread_watcher_unittest.cc @@ -242,6 +242,8 @@ class ThreadWatcherTest : public ::testing::Test { static const std::string webkit_thread_name; static const std::string crash_on_hang_seconds; static const std::string crash_on_hang_thread_names; + static const std::string thread_names_and_live_threshold; + static const std::string crash_on_hang_thread_data; CustomThreadWatcher* io_watcher_; CustomThreadWatcher* webkit_watcher_; ThreadWatcherList* thread_watcher_list_; @@ -326,47 +328,106 @@ const std::string ThreadWatcherTest::io_thread_name = "IO"; const BrowserThread::ID ThreadWatcherTest::webkit_thread_id = BrowserThread::WEBKIT_DEPRECATED; const std::string ThreadWatcherTest::webkit_thread_name = "WEBKIT"; -const std::string ThreadWatcherTest::crash_on_hang_seconds = "24"; -const std::string ThreadWatcherTest::crash_on_hang_thread_names = "IO:3,UI:3"; +const std::string ThreadWatcherTest::crash_on_hang_thread_names = "UI,IO"; +const std::string ThreadWatcherTest::thread_names_and_live_threshold = + "UI:4,IO:4"; +const std::string ThreadWatcherTest::crash_on_hang_thread_data = + "UI:5:12,IO:5:12,FILE:5:12"; -TEST_F(ThreadWatcherTest, CommandLineArgs) { +TEST_F(ThreadWatcherTest, ThreadNamesOnlyArgs) { // Setup command_line arguments. CommandLine command_line(CommandLine::NO_PROGRAM); - command_line.AppendSwitchASCII(switches::kCrashOnHangSeconds, - crash_on_hang_seconds); command_line.AppendSwitchASCII(switches::kCrashOnHangThreads, crash_on_hang_thread_names); // Parse command_line arguments. + ThreadWatcherList::CrashOnHangThreadMap crash_on_hang_threads; uint32 unresponsive_threshold; + ThreadWatcherList::ParseCommandLine(command_line, + &unresponsive_threshold, + &crash_on_hang_threads); + + // Verify the data. + base::StringTokenizer tokens(crash_on_hang_thread_names, ","); + std::vector<std::string> values; + while (tokens.GetNext()) { + const std::string& token = tokens.token(); + base::SplitString(token, ':', &values); + std::string thread_name = values[0]; + + ThreadWatcherList::CrashOnHangThreadMap::iterator it = + crash_on_hang_threads.find(thread_name); + bool crash_on_hang = (it != crash_on_hang_threads.end()); + EXPECT_TRUE(crash_on_hang); + EXPECT_LT(0u, it->second.live_threads_threshold); + EXPECT_LT(0u, it->second.unresponsive_threshold); + } +} + +TEST_F(ThreadWatcherTest, ThreadNamesAndLiveThresholdArgs) { + // Setup command_line arguments. + CommandLine command_line(CommandLine::NO_PROGRAM); + command_line.AppendSwitchASCII(switches::kCrashOnHangThreads, + thread_names_and_live_threshold); + + // Parse command_line arguments. ThreadWatcherList::CrashOnHangThreadMap crash_on_hang_threads; + uint32 unresponsive_threshold; ThreadWatcherList::ParseCommandLine(command_line, &unresponsive_threshold, &crash_on_hang_threads); // Verify the data. - uint32 crash_on_unresponsive_seconds = - ThreadWatcherList::kUnresponsiveSeconds * unresponsive_threshold; - EXPECT_EQ(static_cast<int>(crash_on_unresponsive_seconds), - atoi(crash_on_hang_seconds.c_str())); + base::StringTokenizer tokens(thread_names_and_live_threshold, ","); + std::vector<std::string> values; + while (tokens.GetNext()) { + const std::string& token = tokens.token(); + base::SplitString(token, ':', &values); + std::string thread_name = values[0]; - // Check ThreadWatcherTestList has the right crash_on_hang_thread_names. - base::StringTokenizer tokens(crash_on_hang_thread_names, ","); + ThreadWatcherList::CrashOnHangThreadMap::iterator it = + crash_on_hang_threads.find(thread_name); + bool crash_on_hang = (it != crash_on_hang_threads.end()); + EXPECT_TRUE(crash_on_hang); + EXPECT_EQ(4u, it->second.live_threads_threshold); + EXPECT_LT(0u, it->second.unresponsive_threshold); + } +} + +TEST_F(ThreadWatcherTest, CrashOnHangThreadsAllArgs) { + // Setup command_line arguments. + CommandLine command_line(CommandLine::NO_PROGRAM); + command_line.AppendSwitchASCII(switches::kCrashOnHangThreads, + crash_on_hang_thread_data); + + // Parse command_line arguments. + ThreadWatcherList::CrashOnHangThreadMap crash_on_hang_threads; + uint32 unresponsive_threshold; + ThreadWatcherList::ParseCommandLine(command_line, + &unresponsive_threshold, + &crash_on_hang_threads); + + // Verify the data. + base::StringTokenizer tokens(crash_on_hang_thread_data, ","); std::vector<std::string> values; while (tokens.GetNext()) { const std::string& token = tokens.token(); base::SplitString(token, ':', &values); - if (values.size() != 2) - continue; std::string thread_name = values[0]; - uint32 live_threads_threshold; - if (!base::StringToUint(values[1], &live_threads_threshold)) - continue; + ThreadWatcherList::CrashOnHangThreadMap::iterator it = crash_on_hang_threads.find(thread_name); + bool crash_on_hang = (it != crash_on_hang_threads.end()); EXPECT_TRUE(crash_on_hang); - EXPECT_EQ(it->second, live_threads_threshold); + + uint32 crash_live_threads_threshold = it->second.live_threads_threshold; + EXPECT_EQ(5u, crash_live_threads_threshold); + + uint32 crash_unresponsive_threshold = it->second.unresponsive_threshold; + uint32 crash_on_unresponsive_seconds = + ThreadWatcherList::kUnresponsiveSeconds * crash_unresponsive_threshold; + EXPECT_EQ(12u, crash_on_unresponsive_seconds); } } diff --git a/chrome/common/chrome_switches.cc b/chrome/common/chrome_switches.cc index 5a301af..e9e2a06 100644 --- a/chrome/common/chrome_switches.cc +++ b/chrome/common/chrome_switches.cc @@ -202,19 +202,14 @@ const char kContentSettings2[] = "new-content-settings"; // string value, the 2 letter code from ISO 3166-1. const char kCountry[] = "country"; -// Causes the browser process to crash if browser threads are not responding -// for the given number of seconds. -const char kCrashOnHangSeconds[] = "crash-on-hang-seconds"; - // Comma-separated list of BrowserThreads that cause browser process to crash // if the given browser thread is not responsive. UI,IO,DB,FILE,CACHE are the // list of BrowserThreads that are supported. // // For example: -// --crash-on-hang-threads=UI:3,IO:3 --> Crash the browser if UI or IO thread -// is not responsive and the number of -// browser threads that are responding -// is less than or equal to 3. +// --crash-on-hang-threads=UI:3:18,IO:3:18 --> Crash the browser if UI or IO +// is not responsive for 18 seconds and the number of browser threads that +// are responding is less than or equal to 3. const char kCrashOnHangThreads[] = "crash-on-hang-threads"; // Some platforms like ChromeOS default to empty desktop. diff --git a/chrome/common/chrome_switches.h b/chrome/common/chrome_switches.h index 9d7f1df..df5c0c3 100644 --- a/chrome/common/chrome_switches.h +++ b/chrome/common/chrome_switches.h @@ -69,7 +69,6 @@ extern const char kComponentUpdaterDebug[]; extern const char kConflictingModulesCheck[]; extern const char kContentSettings2[]; extern const char kCountry[]; -extern const char kCrashOnHangSeconds[]; extern const char kCrashOnHangThreads[]; extern const char kCreateBrowserOnStartupForTests[]; extern const char kDebugEnableFrameToggle[]; |