diff options
Diffstat (limited to 'chrome/browser/metrics/thread_watcher.h')
-rw-r--r-- | chrome/browser/metrics/thread_watcher.h | 98 |
1 files changed, 78 insertions, 20 deletions
diff --git a/chrome/browser/metrics/thread_watcher.h b/chrome/browser/metrics/thread_watcher.h index c9f824b..39d0468 100644 --- a/chrome/browser/metrics/thread_watcher.h +++ b/chrome/browser/metrics/thread_watcher.h @@ -308,10 +308,22 @@ class ThreadWatcherList { // A map from BrowserThread to the actual instances. typedef std::map<content::BrowserThread::ID, ThreadWatcher*> RegistrationList; - // A map from thread names (UI, IO, etc) to |live_threads_threshold|. + // A map from thread names (UI, IO, etc) to |CrashDataThresholds|. // |live_threads_threshold| specifies the maximum number of browser threads // that have to be responsive when we want to crash the browser because of - // hung watched thread. + // hung watched thread. This threshold allows us to either look for a system + // deadlock, or look for a solo hung thread. A small live_threads_threshold + // looks for a broad deadlock (few browser threads left running), and a large + // threshold looks for a single hung thread (this in only appropriate for a + // thread that *should* never have much jank, such as the IO). + // + // |unresponsive_threshold| specifies the number of unanswered ping messages + // after which watched (UI, IO, etc) thread is considered as not responsive. + // We translate "time" (given in seconds) into a number of pings. As a result, + // we only declare a thread unresponsive when a lot of "time" has passed (many + // pings), and yet our pinging thread has continued to process messages (so we + // know the entire PC is not hung). Set this number higher to crash less + // often, and lower to crash more often. // // The map lists all threads (by name) that can induce a crash by hanging. It // is populated from the command line, or given a default list. See @@ -319,18 +331,52 @@ class ThreadWatcherList { // watched, as they provide the system context of how hung *other* threads // are. // - // Example 1: If the value for "IO" was 3, then we would crash if at least one - // thread is responding and total responding threads is less than or equal to - // 3 (this thread, plus at least one other thread is unresponsive). We would - // not crash if none of the threads are not responding, as we'd assume such - // large hang counts mean that the system is generally unresponsive. - // Example 2: If the value for "UI" was INT_MAX, then we would always crash if - // the UI thread was hung, no matter what the other threads are doing. - // Example 3: If the value of "FILE" was 5, then we would only crash if the - // FILE thread was the ONLY hung thread (because we watch 6 threads). IF there - // was another unresponsive thread, we would not consider this a problem worth - // crashing for. - typedef std::map<std::string, uint32> CrashOnHangThreadMap; + // ThreadWatcher monitors six browser threads (i.e., UI, IO, DB, FILE, + // FILE_USER_BLOCKING and CACHE). Out of the 6 threads, any subset may be + // watched, to potentially cause a crash. The following example's command line + // causes exactly 3 threads to be watched. + // + // The example command line argument consists of "UI:3:18,IO:3:18,FILE:5:90". + // In that string, the first parameter specifies the thread_id: UI, IO or + // FILE. The second parameter specifies |live_threads_threshold|. For UI and + // IO threads, we would crash if the number of threads responding is less than + // or equal to 3. The third parameter specifies the unresponsive threshold + // seconds. This number is used to calculate |unresponsive_threshold|. In this + // example for UI and IO threads, we would crash if those threads don't + // respond for 18 seconds (or 9 unanswered ping messages) and for FILE thread, + // crash_seconds is set to 90 seconds (or 45 unanswered ping messages). + // + // The following examples explain how the data in |CrashDataThresholds| + // controls the crashes. + // + // Example 1: If the |live_threads_threshold| value for "IO" was 3 and + // unresponsive threshold seconds is 18 (or |unresponsive_threshold| is 9), + // then we would crash if the IO thread was hung (9 unanswered ping messages) + // and if at least one thread is responding and total responding threads is + // less than or equal to 3 (this thread, plus at least one other thread is + // unresponsive). We would not crash if none of the threads are responding, as + // we'd assume such large hang counts mean that the system is generally + // unresponsive. + // Example 2: If the |live_threads_threshold| value for "UI" was any number + // higher than 6 and unresponsive threshold seconds is 18 (or + // |unresponsive_threshold| is 9), then we would always crash if the UI thread + // was hung (9 unanswered ping messages), no matter what the other threads are + // doing. + // Example 3: If the |live_threads_threshold| value of "FILE" was 5 and + // unresponsive threshold seconds is 90 (or |unresponsive_threshold| is 45), + // then we would only crash if the FILE thread was the ONLY hung thread + // (because we watch 6 threads). If there was another unresponsive thread, we + // would not consider this a problem worth crashing for. FILE thread would be + // considered as hung if it didn't respond for 45 ping messages. + struct CrashDataThresholds { + CrashDataThresholds(uint32 live_threads_threshold, + uint32 unresponsive_threshold); + CrashDataThresholds(); + + uint32 live_threads_threshold; + uint32 unresponsive_threshold; + }; + typedef std::map<std::string, CrashDataThresholds> CrashOnHangThreadMap; // This method posts a task on WatchDogThread to start watching all browser // threads. @@ -361,7 +407,9 @@ class ThreadWatcherList { // Allow tests to access our innards for testing purposes. friend class CustomThreadWatcher; friend class ThreadWatcherTest; - FRIEND_TEST_ALL_PREFIXES(ThreadWatcherTest, CommandLineArgs); + FRIEND_TEST_ALL_PREFIXES(ThreadWatcherTest, ThreadNamesOnlyArgs); + FRIEND_TEST_ALL_PREFIXES(ThreadWatcherTest, ThreadNamesAndLiveThresholdArgs); + FRIEND_TEST_ALL_PREFIXES(ThreadWatcherTest, CrashOnHangThreadsAllArgs); // This singleton holds the global list of registered ThreadWatchers. ThreadWatcherList(); @@ -369,16 +417,26 @@ class ThreadWatcherList { // Destructor deletes all registered ThreadWatcher instances. virtual ~ThreadWatcherList(); - // Parses the command line to get |unresponsive_threshold| from - // switches::kCrashOnHangSeconds, |crash_on_hang| thread names from - // switches::kCrashOnHangThreads and |live_threads_threshold| from - // switches::kCrashOnLive. |crash_on_hang_threads| is a map of - // |crash_on_hang| thread's names to |live_threads_threshold|. + // Parses the command line to get |crash_on_hang_threads| map from + // switches::kCrashOnHangThreads. |crash_on_hang_threads| is a map of + // |crash_on_hang| thread's names to |CrashDataThresholds|. static void ParseCommandLine( const CommandLine& command_line, uint32* unresponsive_threshold, CrashOnHangThreadMap* crash_on_hang_threads); + // Parses the argument |crash_on_hang_thread_names| and creates + // |crash_on_hang_threads| map of |crash_on_hang| thread's names to + // |CrashDataThresholds|. If |crash_on_hang_thread_names| doesn't specify + // |live_threads_threshold|, then it uses |default_live_threads_threshold| as + // the value. If |crash_on_hang_thread_names| doesn't specify |crash_seconds|, + // then it uses |default_crash_seconds| as the value. + static void ParseCommandLineCrashOnHangThreads( + const std::string& crash_on_hang_thread_names, + uint32 default_live_threads_threshold, + uint32 default_crash_seconds, + CrashOnHangThreadMap* crash_on_hang_threads); + // This constructs the |ThreadWatcherList| singleton and starts watching // browser threads by calling StartWatching() on each browser thread that is // watched. It disarms StartupTimeBomb. |