From 443b80e00142dab08385eba06fcea79c96bbee6b Mon Sep 17 00:00:00 2001 From: "gspencer@chromium.org" Date: Tue, 14 Dec 2010 00:42:23 +0000 Subject: This adds some plumbing for propagating the status and error code of a renderer process that went away so that we can tell at the UI level what happened to the tab: did it crash, or was it killed by the OOM killer (or some other reason). This is in preparation for implementing a new UI for when a process is killed by the OOM on ChromeOS which handles it differently from a crash. Most of the changes are modifications of the argument list to include a status and error code for the exited process, but in addition the following was done: - Changed the name of DidProcessCrash to GetTerminationStatus. - Added TerminationStatus enum in process_util.h, so it can be used as the status returned by GetTerminationStatus. - Improved process_util_unittest to actually test for crashing and terminated processes on all platforms. - Added a new notification for renderers that were killed. - Added error code information to crash notification. - Added status and error code information to renderer IPC message for RenderViewGone. - Added a UMA histogram count for number of renderer kills. BUG=http://crosbug.com/8505 TEST=ran new unit test. Test passes on try servers. Review URL: http://codereview.chromium.org/5172009 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@69082 0039d316-1c4b-4281-b951-d872f2087c98 --- base/process_util.h | 31 +++++--- base/process_util_posix.cc | 41 +++++----- base/process_util_unittest.cc | 172 +++++++++++++++++++++++++++++++++++++----- base/process_util_win.cc | 86 +++++++++++++-------- 4 files changed, 252 insertions(+), 78 deletions(-) (limited to 'base') diff --git a/base/process_util.h b/base/process_util.h index ca43289..ccbb687 100644 --- a/base/process_util.h +++ b/base/process_util.h @@ -118,13 +118,15 @@ const uint32 kProcessAccessQueryLimitedInfomation = 0; const uint32 kProcessAccessWaitForTermination = 0; #endif // defined(OS_POSIX) -// A minimalistic but hopefully cross-platform set of exit codes. -// Do not change the enumeration values or you will break third-party -// installers. -enum { - PROCESS_END_NORMAL_TERMINATION = 0, - PROCESS_END_KILLED_BY_USER = 1, - PROCESS_END_PROCESS_WAS_HUNG = 2 +// Return status values from GetTerminationStatus. Don't use these as +// exit code arguments to KillProcess*(), use platform/application +// specific values instead. +enum TerminationStatus { + TERMINATION_STATUS_NORMAL_TERMINATION, // zero exit status + TERMINATION_STATUS_ABNORMAL_TERMINATION, // non-zero exit status + TERMINATION_STATUS_PROCESS_WAS_KILLED, // e.g. SIGKILL or task manager kill + TERMINATION_STATUS_PROCESS_CRASHED, // e.g. Segmentation fault + TERMINATION_STATUS_STILL_RUNNING // child hasn't exited yet }; // Returns the id of the current process. @@ -180,7 +182,7 @@ bool AdjustOOMScore(ProcessId process, int score); #endif #if defined(OS_POSIX) -// Close all file descriptors, expect those which are a destination in the +// Close all file descriptors, except those which are a destination in the // given multimap. Only call this function in a child process where you know // that there aren't any other threads. void CloseSuperfluousFds(const InjectiveMultimap& saved_map); @@ -347,10 +349,15 @@ bool KillProcessGroup(ProcessHandle process_group_id); bool KillProcessById(ProcessId process_id, int exit_code, bool wait); #endif -// Get the termination status (exit code) of the process and return true if the -// status indicates the process crashed. |child_exited| is set to true iff the -// child process has terminated. (|child_exited| may be NULL.) -bool DidProcessCrash(bool* child_exited, ProcessHandle handle); +// Get the termination status of the process by interpreting the +// circumstances of the child process' death. |exit_code| is set to +// the status returned by waitpid() on POSIX, and from +// GetExitCodeProcess() on Windows. |exit_code| may be NULL if the +// caller is not interested in it. Note that on Linux, this function +// will only return a useful result the first time it is called after +// the child exits (because it will reap the child and the information +// will no longer be available). +TerminationStatus GetTerminationStatus(ProcessHandle handle, int* exit_code); // Waits for process to exit. In POSIX systems, if the process hasn't been // signaled then puts the exit code in |exit_code|; otherwise it's considered diff --git a/base/process_util_posix.cc b/base/process_util_posix.cc index 92ef964..3d29674 100644 --- a/base/process_util_posix.cc +++ b/base/process_util_posix.cc @@ -237,6 +237,8 @@ bool KillProcess(ProcessHandle process_id, int exit_code, bool wait) { sleep_ms *= 2; } + // If we're waiting and the child hasn't died by now, force it + // with a SIGKILL. if (!exited) result = kill(process_id, SIGKILL) == 0; } @@ -634,40 +636,45 @@ void RaiseProcessToHighPriority() { // setpriority() or sched_getscheduler, but these all require extra rights. } -bool DidProcessCrash(bool* child_exited, ProcessHandle handle) { - int status; +TerminationStatus GetTerminationStatus(ProcessHandle handle, int* exit_code) { + int status = 0; const pid_t result = HANDLE_EINTR(waitpid(handle, &status, WNOHANG)); if (result == -1) { PLOG(ERROR) << "waitpid(" << handle << ")"; - if (child_exited) - *child_exited = false; - return false; + if (exit_code) + *exit_code = 0; + return TERMINATION_STATUS_NORMAL_TERMINATION; } else if (result == 0) { // the child hasn't exited yet. - if (child_exited) - *child_exited = false; - return false; + if (exit_code) + *exit_code = 0; + return TERMINATION_STATUS_STILL_RUNNING; } - if (child_exited) - *child_exited = true; + if (exit_code) + *exit_code = status; if (WIFSIGNALED(status)) { switch (WTERMSIG(status)) { - case SIGSEGV: - case SIGILL: case SIGABRT: + case SIGBUS: case SIGFPE: - return true; + case SIGILL: + case SIGSEGV: + return TERMINATION_STATUS_PROCESS_CRASHED; + case SIGINT: + case SIGKILL: + case SIGTERM: + return TERMINATION_STATUS_PROCESS_WAS_KILLED; default: - return false; + break; } } - if (WIFEXITED(status)) - return WEXITSTATUS(status) != 0; + if (WIFEXITED(status) && WEXITSTATUS(status) != 0) + return TERMINATION_STATUS_ABNORMAL_TERMINATION; - return false; + return TERMINATION_STATUS_NORMAL_TERMINATION; } bool WaitForExitCode(ProcessHandle handle, int* exit_code) { diff --git a/base/process_util_unittest.cc b/base/process_util_unittest.cc index 34c444c..0eaf5d4 100644 --- a/base/process_util_unittest.cc +++ b/base/process_util_unittest.cc @@ -7,6 +7,7 @@ #include #include "base/command_line.h" +#include "base/debug_util.h" #include "base/eintr_wrapper.h" #include "base/file_path.h" #include "base/logging.h" @@ -15,6 +16,7 @@ #include "base/process_util.h" #include "base/scoped_ptr.h" #include "base/test/multiprocess_test.h" +#include "base/test/test_timeouts.h" #include "base/utf_string_conversions.h" #include "testing/gtest/include/gtest/gtest.h" #include "testing/multiprocess_func_list.h" @@ -27,6 +29,7 @@ #if defined(OS_POSIX) #include #include +#include #include #include #endif @@ -41,11 +44,25 @@ namespace { #if defined(OS_WIN) -const wchar_t* const kProcessName = L"base_unittests.exe"; +const wchar_t kProcessName[] = L"base_unittests.exe"; #else -const wchar_t* const kProcessName = L"base_unittests"; +const wchar_t kProcessName[] = L"base_unittests"; #endif // defined(OS_WIN) +const char kSignalFileSlow[] = "SlowChildProcess.die"; +const char kSignalFileCrash[] = "CrashingChildProcess.die"; +const char kSignalFileKill[] = "KilledChildProcess.die"; + +#if defined(OS_WIN) +const int kExpectedStillRunningExitCode = 0x102; +const int kExpectedKilledExitCode = 1; +#else +const int kExpectedStillRunningExitCode = 0; +#endif + +// The longest we'll wait for a process, in milliseconds. +const int kMaxWaitTimeMs = TestTimeouts::action_max_timeout_ms(); + // Sleeps until file filename is created. void WaitToDie(const char* filename) { FILE *fp; @@ -62,6 +79,27 @@ void SignalChildren(const char* filename) { fclose(fp); } +// Using a pipe to the child to wait for an event was considered, but +// there were cases in the past where pipes caused problems (other +// libraries closing the fds, child deadlocking). This is a simple +// case, so it's not worth the risk. Using wait loops is discouraged +// in most instances. +base::TerminationStatus WaitForChildTermination(base::ProcessHandle handle, + int* exit_code) { + // Now we wait until the result is something other than STILL_RUNNING. + base::TerminationStatus status = base::TERMINATION_STATUS_STILL_RUNNING; + const int kIntervalMs = 20; + int waited = 0; + do { + status = base::GetTerminationStatus(handle, exit_code); + PlatformThread::Sleep(kIntervalMs); + waited += kIntervalMs; + } while (status == base::TERMINATION_STATUS_STILL_RUNNING && + waited < kMaxWaitTimeMs); + + return status; +} + } // namespace class ProcessUtilTest : public base::MultiProcessTest { @@ -79,40 +117,140 @@ MULTIPROCESS_TEST_MAIN(SimpleChildProcess) { TEST_F(ProcessUtilTest, SpawnChild) { base::ProcessHandle handle = this->SpawnChild("SimpleChildProcess", false); ASSERT_NE(base::kNullProcessHandle, handle); - EXPECT_TRUE(base::WaitForSingleProcess(handle, 5000)); + EXPECT_TRUE(base::WaitForSingleProcess(handle, kMaxWaitTimeMs)); base::CloseProcessHandle(handle); } MULTIPROCESS_TEST_MAIN(SlowChildProcess) { - WaitToDie("SlowChildProcess.die"); + WaitToDie(kSignalFileSlow); return 0; } TEST_F(ProcessUtilTest, KillSlowChild) { - remove("SlowChildProcess.die"); + remove(kSignalFileSlow); base::ProcessHandle handle = this->SpawnChild("SlowChildProcess", false); ASSERT_NE(base::kNullProcessHandle, handle); - SignalChildren("SlowChildProcess.die"); - EXPECT_TRUE(base::WaitForSingleProcess(handle, 5000)); + SignalChildren(kSignalFileSlow); + EXPECT_TRUE(base::WaitForSingleProcess(handle, kMaxWaitTimeMs)); base::CloseProcessHandle(handle); - remove("SlowChildProcess.die"); + remove(kSignalFileSlow); } -TEST_F(ProcessUtilTest, DidProcessCrash) { - remove("SlowChildProcess.die"); +TEST_F(ProcessUtilTest, GetTerminationStatusExit) { + remove(kSignalFileSlow); base::ProcessHandle handle = this->SpawnChild("SlowChildProcess", false); ASSERT_NE(base::kNullProcessHandle, handle); - bool child_exited = true; - EXPECT_FALSE(base::DidProcessCrash(&child_exited, handle)); - EXPECT_FALSE(child_exited); + int exit_code = 42; + EXPECT_EQ(base::TERMINATION_STATUS_STILL_RUNNING, + base::GetTerminationStatus(handle, &exit_code)); + EXPECT_EQ(kExpectedStillRunningExitCode, exit_code); + + SignalChildren(kSignalFileSlow); + exit_code = 42; + base::TerminationStatus status = + WaitForChildTermination(handle, &exit_code); + EXPECT_EQ(base::TERMINATION_STATUS_NORMAL_TERMINATION, status); + EXPECT_EQ(0, exit_code); + base::CloseProcessHandle(handle); + remove(kSignalFileSlow); +} - SignalChildren("SlowChildProcess.die"); - EXPECT_TRUE(base::WaitForSingleProcess(handle, 5000)); +#if !defined(OS_MACOSX) +// This test is disabled on Mac, since it's flaky due to ReportCrash +// taking a variable amount of time to parse and load the debug and +// symbol data for this unit test's executable before firing the +// signal handler. +// +// TODO(gspencer): turn this test process into a very small program +// with no symbols (instead of using the multiprocess testing +// framework) to reduce the ReportCrash overhead. + +MULTIPROCESS_TEST_MAIN(CrashingChildProcess) { + WaitToDie(kSignalFileCrash); +#if defined(OS_POSIX) + // Have to disable to signal handler for segv so we can get a crash + // instead of an abnormal termination through the crash dump handler. + ::signal(SIGSEGV, SIG_DFL); +#endif + // Make this process have a segmentation fault. + int* oops = NULL; + *oops = 0xDEAD; + return 1; +} + +TEST_F(ProcessUtilTest, GetTerminationStatusCrash) { + remove(kSignalFileCrash); + base::ProcessHandle handle = this->SpawnChild("CrashingChildProcess", + false); + ASSERT_NE(base::kNullProcessHandle, handle); - EXPECT_FALSE(base::DidProcessCrash(&child_exited, handle)); + int exit_code = 42; + EXPECT_EQ(base::TERMINATION_STATUS_STILL_RUNNING, + base::GetTerminationStatus(handle, &exit_code)); + EXPECT_EQ(kExpectedStillRunningExitCode, exit_code); + + SignalChildren(kSignalFileCrash); + exit_code = 42; + base::TerminationStatus status = + WaitForChildTermination(handle, &exit_code); + EXPECT_EQ(base::TERMINATION_STATUS_PROCESS_CRASHED, status); + +#if defined(OS_WIN) + EXPECT_EQ(0xc0000005, exit_code); +#elif defined(OS_POSIX) + int signaled = WIFSIGNALED(exit_code); + EXPECT_NE(0, signaled); + int signal = WTERMSIG(exit_code); + EXPECT_EQ(SIGSEGV, signal); +#endif + base::CloseProcessHandle(handle); + + // Reset signal handlers back to "normal". + base::EnableInProcessStackDumping(); + remove(kSignalFileCrash); +} +#endif // !defined(OS_MACOSX) + +MULTIPROCESS_TEST_MAIN(KilledChildProcess) { + WaitToDie(kSignalFileKill); +#if defined(OS_WIN) + // Kill ourselves. + HANDLE handle = ::OpenProcess(PROCESS_ALL_ACCESS, 0, ::GetCurrentProcessId()); + ::TerminateProcess(handle, kExpectedKilledExitCode); +#elif defined(OS_POSIX) + // Send a SIGKILL to this process, just like the OOM killer would. + ::kill(getpid(), SIGKILL); +#endif + return 1; +} + +TEST_F(ProcessUtilTest, GetTerminationStatusKill) { + remove(kSignalFileKill); + base::ProcessHandle handle = this->SpawnChild("KilledChildProcess", + false); + ASSERT_NE(base::kNullProcessHandle, handle); + + int exit_code = 42; + EXPECT_EQ(base::TERMINATION_STATUS_STILL_RUNNING, + base::GetTerminationStatus(handle, &exit_code)); + EXPECT_EQ(kExpectedStillRunningExitCode, exit_code); + + SignalChildren(kSignalFileKill); + exit_code = 42; + base::TerminationStatus status = + WaitForChildTermination(handle, &exit_code); + EXPECT_EQ(base::TERMINATION_STATUS_PROCESS_WAS_KILLED, status); +#if defined(OS_WIN) + EXPECT_EQ(kExpectedKilledExitCode, exit_code); +#elif defined(OS_POSIX) + int signaled = WIFSIGNALED(exit_code); + EXPECT_NE(0, signaled); + int signal = WTERMSIG(exit_code); + EXPECT_EQ(SIGKILL, signal); +#endif base::CloseProcessHandle(handle); - remove("SlowChildProcess.die"); + remove(kSignalFileKill); } // Ensure that the priority of a process is restored correctly after diff --git a/base/process_util_win.cc b/base/process_util_win.cc index 097888e..71f0a1a9 100644 --- a/base/process_util_win.cc +++ b/base/process_util_win.cc @@ -30,6 +30,20 @@ namespace { // System pagesize. This value remains constant on x86/64 architectures. const int PAGESIZE_KB = 4; +// Exit codes with special meanings on Windows. +const DWORD kNormalTerminationExitCode = 0; +const DWORD kDebuggerInactiveExitCode = 0xC0000354; +const DWORD kKeyboardInterruptExitCode = 0xC000013A; +const DWORD kDebuggerTerminatedExitCode = 0x40010004; + +// This exit code is used by the Windows task manager when it kills a +// process. It's value is obviously not that unique, and it's +// surprising to me that the task manager uses this value, but it +// seems to be common practice on Windows to test for it as an +// indication that the task manager has killed something if the +// process goes away. +const DWORD kProcessKilledExitCode = 1; + // HeapSetInformation function pointer. typedef BOOL (WINAPI* HeapSetFn)(HANDLE, HEAP_INFORMATION_CLASS, PVOID, SIZE_T); @@ -105,10 +119,10 @@ bool OpenProcessHandle(ProcessId pid, ProcessHandle* handle) { bool OpenPrivilegedProcessHandle(ProcessId pid, ProcessHandle* handle) { ProcessHandle result = OpenProcess(PROCESS_DUP_HANDLE | - PROCESS_TERMINATE | - PROCESS_QUERY_INFORMATION | - PROCESS_VM_READ | - SYNCHRONIZE, + PROCESS_TERMINATE | + PROCESS_QUERY_INFORMATION | + PROCESS_VM_READ | + SYNCHRONIZE, FALSE, pid); if (result == INVALID_HANDLE_VALUE) @@ -394,22 +408,33 @@ bool KillProcess(ProcessHandle process, int exit_code, bool wait) { return result; } -bool DidProcessCrash(bool* child_exited, ProcessHandle handle) { - DWORD exitcode = 0; +TerminationStatus GetTerminationStatus(ProcessHandle handle, int* exit_code) { + DWORD tmp_exit_code = 0; - if (!::GetExitCodeProcess(handle, &exitcode)) { + if (!::GetExitCodeProcess(handle, &tmp_exit_code)) { NOTREACHED(); - // Assume the child has exited. - if (child_exited) - *child_exited = true; - return false; + if (exit_code) { + // This really is a random number. We haven't received any + // information about the exit code, presumably because this + // process doesn't have permission to get the exit code, or + // because of some other cause for GetExitCodeProcess to fail + // (MSDN docs don't give the possible failure error codes for + // this function, so it could be anything). But we don't want + // to leave exit_code uninitialized, since that could cause + // random interpretations of the exit code. So we assume it + // terminated "normally" in this case. + *exit_code = kNormalTerminationExitCode; + } + // Assume the child has exited normally if we can't get the exit + // code. + return TERMINATION_STATUS_NORMAL_TERMINATION; } - if (exitcode == STILL_ACTIVE) { + if (tmp_exit_code == STILL_ACTIVE) { DWORD wait_result = WaitForSingleObject(handle, 0); if (wait_result == WAIT_TIMEOUT) { - if (child_exited) - *child_exited = false; - return false; + if (exit_code) + *exit_code = wait_result; + return TERMINATION_STATUS_STILL_RUNNING; } DCHECK_EQ(WAIT_OBJECT_0, wait_result); @@ -417,27 +442,24 @@ bool DidProcessCrash(bool* child_exited, ProcessHandle handle) { // Strange, the process used 0x103 (STILL_ACTIVE) as exit code. NOTREACHED(); - return false; + return TERMINATION_STATUS_ABNORMAL_TERMINATION; } - // We're sure the child has exited. - if (child_exited) - *child_exited = true; + if (exit_code) + *exit_code = tmp_exit_code; - // Warning, this is not generic code; it heavily depends on the way - // the rest of the code kills a process. - - if (exitcode == PROCESS_END_NORMAL_TERMINATION || - exitcode == PROCESS_END_KILLED_BY_USER || - exitcode == PROCESS_END_PROCESS_WAS_HUNG || - exitcode == 0xC0000354 || // STATUS_DEBUGGER_INACTIVE. - exitcode == 0xC000013A || // Control-C/end session. - exitcode == 0x40010004) { // Debugger terminated process/end session. - return false; + switch (tmp_exit_code) { + case kNormalTerminationExitCode: + return TERMINATION_STATUS_NORMAL_TERMINATION; + case kDebuggerInactiveExitCode: // STATUS_DEBUGGER_INACTIVE. + case kKeyboardInterruptExitCode: // Control-C/end session. + case kDebuggerTerminatedExitCode: // Debugger terminated process. + case kProcessKilledExitCode: // Task manager kill. + return TERMINATION_STATUS_PROCESS_WAS_KILLED; + default: + // All other exit codes indicate crashes. + return TERMINATION_STATUS_PROCESS_CRASHED; } - - // All other exit codes indicate crashes. - return true; } bool WaitForExitCode(ProcessHandle handle, int* exit_code) { -- cgit v1.1