diff options
author | rtenneti@chromium.org <rtenneti@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2013-02-12 05:29:32 +0000 |
---|---|---|
committer | rtenneti@chromium.org <rtenneti@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2013-02-12 05:29:32 +0000 |
commit | b1c20f53f919d5d14694562b659d0957fcd30997 (patch) | |
tree | ba7e80e35de7d92e6c5c2595a488cdc0e599577f | |
parent | d02f31dc8c5daeeb431f7f9820dd9b5d9705cdc9 (diff) | |
download | chromium_src-b1c20f53f919d5d14694562b659d0957fcd30997.zip chromium_src-b1c20f53f919d5d14694562b659d0957fcd30997.tar.gz chromium_src-b1c20f53f919d5d14694562b659d0957fcd30997.tar.bz2 |
ThreadWatcher - Added jankiness monitor for FILE thread.
Extended crash-on-hang-threads command line switch to
accept the unresponsive seconds.
For example, if --crash-on-hang-threads=UI:3:18,IO:3:18 is passed as
command line argument, ThreadWatcher crashes the browser if UI or IO
is not responsive for 18 seconds and the number of browser threads that
are responding is less than or equal to 3.
If there is no command line switch, it defaults to
"UI:3:18,IO:3:18,FILE:3,90" as the value in Canary and Dev.
It defaults to empty string in STABLE channel (i.e., we don't crash
the browser due to jankiness).
For beta channel it defaults to "UI:3:36,IO:3:36,FILE:3:180".
Deleted crash-on-hang-threads command line switch because
crash-on-hang-threads supports this.
R=jar@chromium.org
TEST=unit tests for ThreadWatcher
Review URL: https://chromiumcodereview.appspot.com/12183008
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@181874 0039d316-1c4b-4281-b951-d872f2087c98
-rw-r--r-- | chrome/browser/metrics/thread_watcher.cc | 105 | ||||
-rw-r--r-- | chrome/browser/metrics/thread_watcher.h | 98 | ||||
-rw-r--r-- | chrome/browser/metrics/thread_watcher_unittest.cc | 95 | ||||
-rw-r--r-- | chrome/common/chrome_switches.cc | 11 | ||||
-rw-r--r-- | chrome/common/chrome_switches.h | 1 |
5 files changed, 226 insertions, 84 deletions
diff --git a/chrome/browser/metrics/thread_watcher.cc b/chrome/browser/metrics/thread_watcher.cc index afc84f2..a76e6e3 100644 --- a/chrome/browser/metrics/thread_watcher.cc +++ b/chrome/browser/metrics/thread_watcher.cc @@ -11,6 +11,7 @@ #include "base/debug/alias.h" #include "base/lazy_instance.h" #include "base/string_split.h" +#include "base/stringprintf.h" #include "base/strings/string_number_conversions.h" #include "base/strings/string_tokenizer.h" #include "base/threading/thread_restrictions.h" @@ -408,6 +409,18 @@ const int ThreadWatcherList::kUnresponsiveCount = 9; // static const int ThreadWatcherList::kLiveThreadsThreshold = 3; +ThreadWatcherList::CrashDataThresholds::CrashDataThresholds( + uint32 live_threads_threshold, + uint32 unresponsive_threshold) + : live_threads_threshold(live_threads_threshold), + unresponsive_threshold(unresponsive_threshold) { +} + +ThreadWatcherList::CrashDataThresholds::CrashDataThresholds() + : live_threads_threshold(kLiveThreadsThreshold), + unresponsive_threshold(kUnresponsiveCount) { +} + // static void ThreadWatcherList::StartWatchingAll(const CommandLine& command_line) { uint32 unresponsive_threshold; @@ -498,7 +511,7 @@ void ThreadWatcherList::ParseCommandLine( const CommandLine& command_line, uint32* unresponsive_threshold, CrashOnHangThreadMap* crash_on_hang_threads) { - // Determine |unresponsive_threshold| based on switches::kCrashOnHangSeconds. + // Initialize |unresponsive_threshold| to a default value. *unresponsive_threshold = kUnresponsiveCount; // Increase the unresponsive_threshold on the Stable and Beta channels to @@ -518,47 +531,27 @@ void ThreadWatcherList::ParseCommandLine( *unresponsive_threshold *= 2; #endif - std::string crash_on_hang_seconds = - command_line.GetSwitchValueASCII(switches::kCrashOnHangSeconds); - if (!crash_on_hang_seconds.empty()) { - int crash_seconds = atoi(crash_on_hang_seconds.c_str()); - if (crash_seconds > 0) { - *unresponsive_threshold = static_cast<uint32>( - ceil(static_cast<float>(crash_seconds) / kUnresponsiveSeconds)); - } - } - + uint32 crash_seconds = *unresponsive_threshold * kUnresponsiveSeconds; std::string crash_on_hang_thread_names; - - // Default to crashing the browser if UI or IO threads are not responsive - // except in stable channel. - if (channel == chrome::VersionInfo::CHANNEL_STABLE) - crash_on_hang_thread_names = ""; - else - crash_on_hang_thread_names = "UI:3,IO:9"; - bool has_command_line_overwrite = false; if (command_line.HasSwitch(switches::kCrashOnHangThreads)) { crash_on_hang_thread_names = command_line.GetSwitchValueASCII(switches::kCrashOnHangThreads); has_command_line_overwrite = true; + } else if (channel != chrome::VersionInfo::CHANNEL_STABLE) { + // Default to crashing the browser if UI or IO or FILE threads are not + // responsive except in stable channel. + crash_on_hang_thread_names = base::StringPrintf( + "UI:%d:%d,IO:%d:%d,FILE:%d:%d", + kLiveThreadsThreshold, crash_seconds, + kLiveThreadsThreshold, crash_seconds, + kLiveThreadsThreshold, crash_seconds * 5); } - base::StringTokenizer tokens(crash_on_hang_thread_names, ","); - std::vector<std::string> values; - while (tokens.GetNext()) { - const std::string& token = tokens.token(); - base::SplitString(token, ':', &values); - if (values.size() != 2) - continue; - std::string thread_name = values[0]; - uint32 live_threads_threshold; - if (!base::StringToUint(values[1], &live_threads_threshold)) - continue; - CrashOnHangThreadMap::iterator it = - crash_on_hang_threads->find(thread_name); - if (crash_on_hang_threads->end() == it) - (*crash_on_hang_threads)[thread_name] = live_threads_threshold; - } + + ParseCommandLineCrashOnHangThreads(crash_on_hang_thread_names, + kLiveThreadsThreshold, + crash_seconds, + crash_on_hang_threads); if (channel != chrome::VersionInfo::CHANNEL_CANARY || has_command_line_overwrite) { @@ -576,8 +569,43 @@ void ThreadWatcherList::ParseCommandLine( "ThreadWatcher", 100, "default_hung_threads", 2013, 10, 30, NULL)); int io_hung_thread_group = field_trial->AppendGroup("io_hung_thread", 10); - if (field_trial->group() == io_hung_thread_group) - it->second = INT_MAX; // Crash anytime IO thread hangs. + if (field_trial->group() == io_hung_thread_group) { + // Crash anytime IO thread hangs. + it->second.live_threads_threshold = INT_MAX; + } +} + +// static +void ThreadWatcherList::ParseCommandLineCrashOnHangThreads( + const std::string& crash_on_hang_thread_names, + uint32 default_live_threads_threshold, + uint32 default_crash_seconds, + CrashOnHangThreadMap* crash_on_hang_threads) { + base::StringTokenizer tokens(crash_on_hang_thread_names, ","); + std::vector<std::string> values; + while (tokens.GetNext()) { + const std::string& token = tokens.token(); + base::SplitString(token, ':', &values); + std::string thread_name = values[0]; + + uint32 live_threads_threshold = default_live_threads_threshold; + uint32 crash_seconds = default_crash_seconds; + if (values.size() >= 2 && + (!base::StringToUint(values[1], &live_threads_threshold))) { + continue; + } + if (values.size() >= 3 && + (!base::StringToUint(values[2], &crash_seconds))) { + continue; + } + uint32 unresponsive_threshold = static_cast<uint32>( + ceil(static_cast<float>(crash_seconds) / kUnresponsiveSeconds)); + + CrashDataThresholds crash_data(live_threads_threshold, + unresponsive_threshold); + // Use the last specifier. + (*crash_on_hang_threads)[thread_name] = crash_data; + } } // static @@ -630,7 +658,8 @@ void ThreadWatcherList::StartWatching( uint32 live_threads_threshold = 0; if (it != crash_on_hang_threads.end()) { crash_on_hang = true; - live_threads_threshold = it->second; + live_threads_threshold = it->second.live_threads_threshold; + unresponsive_threshold = it->second.unresponsive_threshold; } ThreadWatcher::StartWatching( diff --git a/chrome/browser/metrics/thread_watcher.h b/chrome/browser/metrics/thread_watcher.h index c9f824b..39d0468 100644 --- a/chrome/browser/metrics/thread_watcher.h +++ b/chrome/browser/metrics/thread_watcher.h @@ -308,10 +308,22 @@ class ThreadWatcherList { // A map from BrowserThread to the actual instances. typedef std::map<content::BrowserThread::ID, ThreadWatcher*> RegistrationList; - // A map from thread names (UI, IO, etc) to |live_threads_threshold|. + // A map from thread names (UI, IO, etc) to |CrashDataThresholds|. // |live_threads_threshold| specifies the maximum number of browser threads // that have to be responsive when we want to crash the browser because of - // hung watched thread. + // hung watched thread. This threshold allows us to either look for a system + // deadlock, or look for a solo hung thread. A small live_threads_threshold + // looks for a broad deadlock (few browser threads left running), and a large + // threshold looks for a single hung thread (this in only appropriate for a + // thread that *should* never have much jank, such as the IO). + // + // |unresponsive_threshold| specifies the number of unanswered ping messages + // after which watched (UI, IO, etc) thread is considered as not responsive. + // We translate "time" (given in seconds) into a number of pings. As a result, + // we only declare a thread unresponsive when a lot of "time" has passed (many + // pings), and yet our pinging thread has continued to process messages (so we + // know the entire PC is not hung). Set this number higher to crash less + // often, and lower to crash more often. // // The map lists all threads (by name) that can induce a crash by hanging. It // is populated from the command line, or given a default list. See @@ -319,18 +331,52 @@ class ThreadWatcherList { // watched, as they provide the system context of how hung *other* threads // are. // - // Example 1: If the value for "IO" was 3, then we would crash if at least one - // thread is responding and total responding threads is less than or equal to - // 3 (this thread, plus at least one other thread is unresponsive). We would - // not crash if none of the threads are not responding, as we'd assume such - // large hang counts mean that the system is generally unresponsive. - // Example 2: If the value for "UI" was INT_MAX, then we would always crash if - // the UI thread was hung, no matter what the other threads are doing. - // Example 3: If the value of "FILE" was 5, then we would only crash if the - // FILE thread was the ONLY hung thread (because we watch 6 threads). IF there - // was another unresponsive thread, we would not consider this a problem worth - // crashing for. - typedef std::map<std::string, uint32> CrashOnHangThreadMap; + // ThreadWatcher monitors six browser threads (i.e., UI, IO, DB, FILE, + // FILE_USER_BLOCKING and CACHE). Out of the 6 threads, any subset may be + // watched, to potentially cause a crash. The following example's command line + // causes exactly 3 threads to be watched. + // + // The example command line argument consists of "UI:3:18,IO:3:18,FILE:5:90". + // In that string, the first parameter specifies the thread_id: UI, IO or + // FILE. The second parameter specifies |live_threads_threshold|. For UI and + // IO threads, we would crash if the number of threads responding is less than + // or equal to 3. The third parameter specifies the unresponsive threshold + // seconds. This number is used to calculate |unresponsive_threshold|. In this + // example for UI and IO threads, we would crash if those threads don't + // respond for 18 seconds (or 9 unanswered ping messages) and for FILE thread, + // crash_seconds is set to 90 seconds (or 45 unanswered ping messages). + // + // The following examples explain how the data in |CrashDataThresholds| + // controls the crashes. + // + // Example 1: If the |live_threads_threshold| value for "IO" was 3 and + // unresponsive threshold seconds is 18 (or |unresponsive_threshold| is 9), + // then we would crash if the IO thread was hung (9 unanswered ping messages) + // and if at least one thread is responding and total responding threads is + // less than or equal to 3 (this thread, plus at least one other thread is + // unresponsive). We would not crash if none of the threads are responding, as + // we'd assume such large hang counts mean that the system is generally + // unresponsive. + // Example 2: If the |live_threads_threshold| value for "UI" was any number + // higher than 6 and unresponsive threshold seconds is 18 (or + // |unresponsive_threshold| is 9), then we would always crash if the UI thread + // was hung (9 unanswered ping messages), no matter what the other threads are + // doing. + // Example 3: If the |live_threads_threshold| value of "FILE" was 5 and + // unresponsive threshold seconds is 90 (or |unresponsive_threshold| is 45), + // then we would only crash if the FILE thread was the ONLY hung thread + // (because we watch 6 threads). If there was another unresponsive thread, we + // would not consider this a problem worth crashing for. FILE thread would be + // considered as hung if it didn't respond for 45 ping messages. + struct CrashDataThresholds { + CrashDataThresholds(uint32 live_threads_threshold, + uint32 unresponsive_threshold); + CrashDataThresholds(); + + uint32 live_threads_threshold; + uint32 unresponsive_threshold; + }; + typedef std::map<std::string, CrashDataThresholds> CrashOnHangThreadMap; // This method posts a task on WatchDogThread to start watching all browser // threads. @@ -361,7 +407,9 @@ class ThreadWatcherList { // Allow tests to access our innards for testing purposes. friend class CustomThreadWatcher; friend class ThreadWatcherTest; - FRIEND_TEST_ALL_PREFIXES(ThreadWatcherTest, CommandLineArgs); + FRIEND_TEST_ALL_PREFIXES(ThreadWatcherTest, ThreadNamesOnlyArgs); + FRIEND_TEST_ALL_PREFIXES(ThreadWatcherTest, ThreadNamesAndLiveThresholdArgs); + FRIEND_TEST_ALL_PREFIXES(ThreadWatcherTest, CrashOnHangThreadsAllArgs); // This singleton holds the global list of registered ThreadWatchers. ThreadWatcherList(); @@ -369,16 +417,26 @@ class ThreadWatcherList { // Destructor deletes all registered ThreadWatcher instances. virtual ~ThreadWatcherList(); - // Parses the command line to get |unresponsive_threshold| from - // switches::kCrashOnHangSeconds, |crash_on_hang| thread names from - // switches::kCrashOnHangThreads and |live_threads_threshold| from - // switches::kCrashOnLive. |crash_on_hang_threads| is a map of - // |crash_on_hang| thread's names to |live_threads_threshold|. + // Parses the command line to get |crash_on_hang_threads| map from + // switches::kCrashOnHangThreads. |crash_on_hang_threads| is a map of + // |crash_on_hang| thread's names to |CrashDataThresholds|. static void ParseCommandLine( const CommandLine& command_line, uint32* unresponsive_threshold, CrashOnHangThreadMap* crash_on_hang_threads); + // Parses the argument |crash_on_hang_thread_names| and creates + // |crash_on_hang_threads| map of |crash_on_hang| thread's names to + // |CrashDataThresholds|. If |crash_on_hang_thread_names| doesn't specify + // |live_threads_threshold|, then it uses |default_live_threads_threshold| as + // the value. If |crash_on_hang_thread_names| doesn't specify |crash_seconds|, + // then it uses |default_crash_seconds| as the value. + static void ParseCommandLineCrashOnHangThreads( + const std::string& crash_on_hang_thread_names, + uint32 default_live_threads_threshold, + uint32 default_crash_seconds, + CrashOnHangThreadMap* crash_on_hang_threads); + // This constructs the |ThreadWatcherList| singleton and starts watching // browser threads by calling StartWatching() on each browser thread that is // watched. It disarms StartupTimeBomb. diff --git a/chrome/browser/metrics/thread_watcher_unittest.cc b/chrome/browser/metrics/thread_watcher_unittest.cc index e097aee..529358d 100644 --- a/chrome/browser/metrics/thread_watcher_unittest.cc +++ b/chrome/browser/metrics/thread_watcher_unittest.cc @@ -242,6 +242,8 @@ class ThreadWatcherTest : public ::testing::Test { static const std::string webkit_thread_name; static const std::string crash_on_hang_seconds; static const std::string crash_on_hang_thread_names; + static const std::string thread_names_and_live_threshold; + static const std::string crash_on_hang_thread_data; CustomThreadWatcher* io_watcher_; CustomThreadWatcher* webkit_watcher_; ThreadWatcherList* thread_watcher_list_; @@ -326,47 +328,106 @@ const std::string ThreadWatcherTest::io_thread_name = "IO"; const BrowserThread::ID ThreadWatcherTest::webkit_thread_id = BrowserThread::WEBKIT_DEPRECATED; const std::string ThreadWatcherTest::webkit_thread_name = "WEBKIT"; -const std::string ThreadWatcherTest::crash_on_hang_seconds = "24"; -const std::string ThreadWatcherTest::crash_on_hang_thread_names = "IO:3,UI:3"; +const std::string ThreadWatcherTest::crash_on_hang_thread_names = "UI,IO"; +const std::string ThreadWatcherTest::thread_names_and_live_threshold = + "UI:4,IO:4"; +const std::string ThreadWatcherTest::crash_on_hang_thread_data = + "UI:5:12,IO:5:12,FILE:5:12"; -TEST_F(ThreadWatcherTest, CommandLineArgs) { +TEST_F(ThreadWatcherTest, ThreadNamesOnlyArgs) { // Setup command_line arguments. CommandLine command_line(CommandLine::NO_PROGRAM); - command_line.AppendSwitchASCII(switches::kCrashOnHangSeconds, - crash_on_hang_seconds); command_line.AppendSwitchASCII(switches::kCrashOnHangThreads, crash_on_hang_thread_names); // Parse command_line arguments. + ThreadWatcherList::CrashOnHangThreadMap crash_on_hang_threads; uint32 unresponsive_threshold; + ThreadWatcherList::ParseCommandLine(command_line, + &unresponsive_threshold, + &crash_on_hang_threads); + + // Verify the data. + base::StringTokenizer tokens(crash_on_hang_thread_names, ","); + std::vector<std::string> values; + while (tokens.GetNext()) { + const std::string& token = tokens.token(); + base::SplitString(token, ':', &values); + std::string thread_name = values[0]; + + ThreadWatcherList::CrashOnHangThreadMap::iterator it = + crash_on_hang_threads.find(thread_name); + bool crash_on_hang = (it != crash_on_hang_threads.end()); + EXPECT_TRUE(crash_on_hang); + EXPECT_LT(0u, it->second.live_threads_threshold); + EXPECT_LT(0u, it->second.unresponsive_threshold); + } +} + +TEST_F(ThreadWatcherTest, ThreadNamesAndLiveThresholdArgs) { + // Setup command_line arguments. + CommandLine command_line(CommandLine::NO_PROGRAM); + command_line.AppendSwitchASCII(switches::kCrashOnHangThreads, + thread_names_and_live_threshold); + + // Parse command_line arguments. ThreadWatcherList::CrashOnHangThreadMap crash_on_hang_threads; + uint32 unresponsive_threshold; ThreadWatcherList::ParseCommandLine(command_line, &unresponsive_threshold, &crash_on_hang_threads); // Verify the data. - uint32 crash_on_unresponsive_seconds = - ThreadWatcherList::kUnresponsiveSeconds * unresponsive_threshold; - EXPECT_EQ(static_cast<int>(crash_on_unresponsive_seconds), - atoi(crash_on_hang_seconds.c_str())); + base::StringTokenizer tokens(thread_names_and_live_threshold, ","); + std::vector<std::string> values; + while (tokens.GetNext()) { + const std::string& token = tokens.token(); + base::SplitString(token, ':', &values); + std::string thread_name = values[0]; - // Check ThreadWatcherTestList has the right crash_on_hang_thread_names. - base::StringTokenizer tokens(crash_on_hang_thread_names, ","); + ThreadWatcherList::CrashOnHangThreadMap::iterator it = + crash_on_hang_threads.find(thread_name); + bool crash_on_hang = (it != crash_on_hang_threads.end()); + EXPECT_TRUE(crash_on_hang); + EXPECT_EQ(4u, it->second.live_threads_threshold); + EXPECT_LT(0u, it->second.unresponsive_threshold); + } +} + +TEST_F(ThreadWatcherTest, CrashOnHangThreadsAllArgs) { + // Setup command_line arguments. + CommandLine command_line(CommandLine::NO_PROGRAM); + command_line.AppendSwitchASCII(switches::kCrashOnHangThreads, + crash_on_hang_thread_data); + + // Parse command_line arguments. + ThreadWatcherList::CrashOnHangThreadMap crash_on_hang_threads; + uint32 unresponsive_threshold; + ThreadWatcherList::ParseCommandLine(command_line, + &unresponsive_threshold, + &crash_on_hang_threads); + + // Verify the data. + base::StringTokenizer tokens(crash_on_hang_thread_data, ","); std::vector<std::string> values; while (tokens.GetNext()) { const std::string& token = tokens.token(); base::SplitString(token, ':', &values); - if (values.size() != 2) - continue; std::string thread_name = values[0]; - uint32 live_threads_threshold; - if (!base::StringToUint(values[1], &live_threads_threshold)) - continue; + ThreadWatcherList::CrashOnHangThreadMap::iterator it = crash_on_hang_threads.find(thread_name); + bool crash_on_hang = (it != crash_on_hang_threads.end()); EXPECT_TRUE(crash_on_hang); - EXPECT_EQ(it->second, live_threads_threshold); + + uint32 crash_live_threads_threshold = it->second.live_threads_threshold; + EXPECT_EQ(5u, crash_live_threads_threshold); + + uint32 crash_unresponsive_threshold = it->second.unresponsive_threshold; + uint32 crash_on_unresponsive_seconds = + ThreadWatcherList::kUnresponsiveSeconds * crash_unresponsive_threshold; + EXPECT_EQ(12u, crash_on_unresponsive_seconds); } } diff --git a/chrome/common/chrome_switches.cc b/chrome/common/chrome_switches.cc index 5a301af..e9e2a06 100644 --- a/chrome/common/chrome_switches.cc +++ b/chrome/common/chrome_switches.cc @@ -202,19 +202,14 @@ const char kContentSettings2[] = "new-content-settings"; // string value, the 2 letter code from ISO 3166-1. const char kCountry[] = "country"; -// Causes the browser process to crash if browser threads are not responding -// for the given number of seconds. -const char kCrashOnHangSeconds[] = "crash-on-hang-seconds"; - // Comma-separated list of BrowserThreads that cause browser process to crash // if the given browser thread is not responsive. UI,IO,DB,FILE,CACHE are the // list of BrowserThreads that are supported. // // For example: -// --crash-on-hang-threads=UI:3,IO:3 --> Crash the browser if UI or IO thread -// is not responsive and the number of -// browser threads that are responding -// is less than or equal to 3. +// --crash-on-hang-threads=UI:3:18,IO:3:18 --> Crash the browser if UI or IO +// is not responsive for 18 seconds and the number of browser threads that +// are responding is less than or equal to 3. const char kCrashOnHangThreads[] = "crash-on-hang-threads"; // Some platforms like ChromeOS default to empty desktop. diff --git a/chrome/common/chrome_switches.h b/chrome/common/chrome_switches.h index 9d7f1df..df5c0c3 100644 --- a/chrome/common/chrome_switches.h +++ b/chrome/common/chrome_switches.h @@ -69,7 +69,6 @@ extern const char kComponentUpdaterDebug[]; extern const char kConflictingModulesCheck[]; extern const char kContentSettings2[]; extern const char kCountry[]; -extern const char kCrashOnHangSeconds[]; extern const char kCrashOnHangThreads[]; extern const char kCreateBrowserOnStartupForTests[]; extern const char kDebugEnableFrameToggle[]; |