Fixed a startup race condition.

Although the new test was written in a platform independent way, it is only added to the Widows specific portion of the ui_test target in the gyp file because it wasn't tried yet on the other platforms. The bug was found and the fix was written in the windows specific version of the process singleton anyway... But if people working on the other platforms would like to try the test there, that would be great. :-) BUG=9593 TEST=A new test have been created to validate this. Review URL: http://codereview.chromium.org/661339 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@40629 0039d316-1c4b-4281-b951-d872f2087c98
author: mad@chromium.org <mad@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2010-03-04 16:16:19 +0000
committer: mad@chromium.org <mad@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2010-03-04 16:16:19 +0000
commit: bbef41f026b0c7aecac8169e40020b892163fd08 (patch)
tree: 6a1e7fbd24b55e871cf776e4ada46b95a80bfddb
parent: 40b01df66eca8855d00a65baf27bd6bfd16454f6 (diff)
download: chromium_src-bbef41f026b0c7aecac8169e40020b892163fd08.zip
chromium_src-bbef41f026b0c7aecac8169e40020b892163fd08.tar.gz
chromium_src-bbef41f026b0c7aecac8169e40020b892163fd08.tar.bz2
3 files changed, 305 insertions, 6 deletions
diff --git a/chrome/browser/process_singleton_win.cc b/chrome/browser/process_singleton_win.cc
index 0f2b9af..68844235 100644
--- a/chrome/browser/process_singleton_win.cc
+++ b/chrome/browser/process_singleton_win.cc
@@ -10,6 +10,7 @@
 #include "base/command_line.h"
 #include "base/path_service.h"
 #include "base/process_util.h"
+#include "base/scoped_handle.h"
 #include "base/win_util.h"
 #include "chrome/browser/browser_init.h"
 #include "chrome/browser/browser_process.h"
@@ -18,6 +19,7 @@
 #include "chrome/common/chrome_constants.h"
 #include "chrome/common/chrome_paths.h"
 #include "chrome/common/result_codes.h"
+#include "chrome/installer/util/browser_distribution.h"
 #include "grit/chromium_strings.h"
 #include "grit/generated_resources.h"
 
@@ -37,12 +39,38 @@ BOOL CALLBACK BrowserWindowEnumeration(HWND window, LPARAM param) {
 // Look for a Chrome instance that uses the same profile directory.
 ProcessSingleton::ProcessSingleton(const FilePath& user_data_dir)
     : window_(NULL), locked_(false), foreground_window_(NULL) {
-  // FindWindoEx and Create() should be one atomic operation in order to not
-  // have a race condition.
-  remote_window_ = FindWindowEx(HWND_MESSAGE, NULL, chrome::kMessageWindowClass,
-                                user_data_dir.ToWStringHack().c_str());
-  if (!remote_window_)
-    Create();
+  std::wstring user_data_dir_str(user_data_dir.ToWStringHack());
+  remote_window_ = FindWindowEx(HWND_MESSAGE, NULL,
+                                chrome::kMessageWindowClass,
+                                user_data_dir_str.c_str());
+  if (!remote_window_) {
+    // Make sure we will be the one and only process creating the window.
+    // We use a named Mutex since we are protecting against multi-process
+    // access. As documented, it's clearer to NOT request ownership on creation
+    // since it isn't guaranteed we will get it. It is better to create it
+    // without ownership and explicitly get the ownership afterward.
+    std::wstring mutex_name(L"Local\\ProcessSingletonStartup!");
+    mutex_name += BrowserDistribution::GetDistribution()->GetAppGuid();
+    ScopedHandle only_me(CreateMutex(NULL, FALSE, mutex_name.c_str()));
+    DCHECK(only_me.Get() != NULL) << "GetLastError = " << GetLastError();
+
+    // This is how we acquire the mutex (as opposed to the initial ownership).
+    DWORD result = WaitForSingleObject(only_me, INFINITE);
+    DCHECK(result == WAIT_OBJECT_0) << "Result = " << result <<
+        "GetLastError = " << GetLastError();
+
+    // We now own the mutex so we are the only process that can create the
+    // window at this time, but we must still check if someone created it
+    // between the time where we looked for it above and the time the mutex
+    // was given to us.
+    remote_window_ = FindWindowEx(HWND_MESSAGE, NULL,
+                                  chrome::kMessageWindowClass,
+                                  user_data_dir_str.c_str());
+    if (!remote_window_)
+      Create();
+    BOOL success = ReleaseMutex(only_me);
+    DCHECK(success) << "GetLastError = " << GetLastError();
+  }
 }
 
 ProcessSingleton::~ProcessSingleton() {
diff --git a/chrome/browser/process_singleton_win_uitest.cc b/chrome/browser/process_singleton_win_uitest.cc
new file mode 100644
index 0000000..55ca2e6
--- /dev/null
+++ b/chrome/browser/process_singleton_win_uitest.cc
@@ -0,0 +1,267 @@
+// Copyright (c) 2009 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// This test validates that the ProcessSingleton class properly makes sure
+// that there is only one main browser process.
+//
+// It is currently compiled and ran on the windows platform only but has been
+// written in a platform independent way (using the process/threads/sync
+// routines from base). So it does compile fine on Mac and Linux but fails to
+// launch the app and thus have not been tested for success/failures. Since it
+// was written to validate a change made to fix a bug only seen on Windows, it
+// was left as is until it gets to be needed on the other platforms.
+
+
+#include <list>
+
+#include "base/file_path.h"
+#include "base/file_util.h"
+#include "base/process_util.h"
+#include "base/ref_counted.h"
+#include "base/thread.h"
+#include "base/waitable_event.h"
+#include "chrome/common/chrome_constants.h"
+#include "chrome/test/ui/ui_test.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+namespace {
+
+// This is for the code that is to be ran in multiple threads at once,
+// to stress a race condition on first process start.
+// We use the thread safe ref counted base class so that we can use the
+// NewRunnableMethod class to run the StartChrome methods in many threads.
+class ChromeStarter : public base::RefCountedThreadSafe<ChromeStarter> {
+ public:
+  ChromeStarter()
+      : ready_event_(false /* manual */, false /* signaled */),
+        done_event_(false /* manual */, false /* signaled */),
+        process_handle_(NULL),
+        process_terminated_(false) {
+  }
+
+  // We must reset some data members since we reuse the same ChromeStarter
+  // object and start/stop it a few times. We must start fresh! :-)
+  void Reset() {
+    ready_event_.Reset();
+    done_event_.Reset();
+    if (process_handle_ != NULL)
+      base::CloseProcessHandle(process_handle_);
+    process_handle_ = NULL;
+    process_terminated_ = false;
+  }
+
+  void StartChrome(base::WaitableEvent* start_event) {
+    // TODO(port): For some reason the LaunchApp call below fails even though
+    // we use the platform independent constant for the executable path.
+    // This is the current blocker for running this test on Mac & Linux.
+    CommandLine command_line(FilePath::FromWStringHack(
+        chrome::kBrowserProcessExecutablePath));
+
+    // Try to get all threads to launch the app at the same time.
+    // So let the test know we are ready.
+    ready_event_.Signal();
+    // And then wait for the test to tell us to GO!
+    ASSERT_NE(static_cast<base::WaitableEvent*>(NULL), start_event);
+    ASSERT_TRUE(start_event->Wait());
+
+    // Here we don't wait for the app to be terminated because one of the
+    // process will stay alive while the others will be restarted. If we would
+    // wait here, we would never get a handle to the main process...
+    base::LaunchApp(command_line, false /* wait */,
+                    false /* hidden */, &process_handle_);
+    ASSERT_NE(static_cast<base::ProcessHandle>(NULL), process_handle_);
+
+    // We can wait on the handle here, we should get stuck on one and only
+    // one process. The test below will take care of killing that process
+    // to unstuck us once it confirms there is only one.
+    static const int64 kWaitForProcessDeath = 5000;
+    process_terminated_ = base::WaitForSingleProcess(process_handle_,
+                                                     kWaitForProcessDeath);
+    // Let the test know we are done.
+    done_event_.Signal();
+  }
+
+  // Public access to simplify the test code using them.
+  base::WaitableEvent ready_event_;
+  base::WaitableEvent done_event_;
+  base::ProcessHandle process_handle_;
+  bool process_terminated_;
+
+ private:
+  friend class base::RefCountedThreadSafe<ChromeStarter>;
+  ~ChromeStarter() {
+    if (process_handle_ != NULL)
+      base::CloseProcessHandle(process_handle_);
+  }
+  DISALLOW_COPY_AND_ASSIGN(ChromeStarter);
+};
+
+// Our test fixture that initializes and holds onto a few global vars.
+class ProcessSingletonWinTest : public UITest {
+ public:
+  ProcessSingletonWinTest()
+      // We use a manual reset so that all threads wake up at once when signaled
+      // and thus we must manually reset it for each attempt.
+      : threads_waker_(true /* manual */, false /* signaled */) {
+  }
+
+  void SetUp() {
+    // Start the threads and create the starters.
+    for (size_t i = 0; i < kNbThreads; ++i) {
+      chrome_starter_threads_[i].reset(new base::Thread("ChromeStarter"));
+      ASSERT_TRUE(chrome_starter_threads_[i]->Start());
+      chrome_starters_[i] = new ChromeStarter;
+    }
+  }
+
+  void TearDown() {
+    // Stop the threads.
+    for (size_t i = 0; i < kNbThreads; ++i)
+      chrome_starter_threads_[i]->Stop();
+  }
+
+  // This method is used to make sure we kill the main browser process after
+  // all of its child processes have successfully attached to it. This was added
+  // when we realized that if we just kill the parent process right away, we
+  // sometimes end up with dangling child processes. If we Sleep for a certain
+  // amount of time, we are OK... So we introduced this method to avoid a
+  // flaky wait. Instead, we kill all descendants of the main process after we
+  // killed it, relying on the fact that we can still get the parent id of a
+  // child process, even when the parent dies.
+  void KillProcessTree(base::ProcessHandle process_handle) {
+    class ProcessTreeFilter : public base::ProcessFilter {
+     public:
+      explicit ProcessTreeFilter(base::ProcessId parent_pid) {
+        ancestor_pids_.insert(parent_pid);
+      }
+      virtual bool Includes(base::ProcessId pid,
+                            base::ProcessId parent_pid) const {
+        if (ancestor_pids_.find(parent_pid) != ancestor_pids_.end()) {
+          ancestor_pids_.insert(pid);
+          return true;
+        } else {
+          return false;
+        }
+      }
+     private:
+      mutable std::set<base::ProcessId> ancestor_pids_;
+    } process_tree_filter(base::GetProcId(process_handle));
+
+    // Start by explicitly killing the main process we know about...
+    static const int kExitCode = 42;
+    EXPECT_TRUE(base::KillProcess(process_handle, kExitCode, true /* wait */));
+
+    // Then loop until we can't find any of its descendant.
+    // But don't try more than kNbTries times...
+    static const int kNbTries = 10;
+    int num_tries = 0;
+    while (base::GetProcessCount(chrome::kBrowserProcessExecutablePath,
+        &process_tree_filter) > 0 && num_tries++ < kNbTries) {
+      base::KillProcesses(chrome::kBrowserProcessExecutablePath,
+                          kExitCode, &process_tree_filter);
+    }
+    DLOG_IF(ERROR, num_tries >= kNbTries) << "Failed to kill all processes!";
+  }
+
+  // Since this is a hard to reproduce problem, we make a few attempts.
+  // We stop the attempts at the first error, and when there are no errors,
+  // we don't time-out of any wait, so it executes quite fast anyway.
+  static const size_t kNbAttempts = 5;
+
+  // The idea is to start chrome from multiple threads all at once.
+  static const size_t kNbThreads = 5;
+  scoped_refptr<ChromeStarter> chrome_starters_[kNbThreads];
+  scoped_ptr<base::Thread> chrome_starter_threads_[kNbThreads];
+
+  // The event that will get all threads to wake up simultaneously and try
+  // to start a chrome process at the same time.
+  base::WaitableEvent threads_waker_;
+};
+
+
+TEST_F(ProcessSingletonWinTest, StartupRaceCondition) {
+  // We use this to stop the attempts loop on the first failure.
+  bool failed = false;
+  for (size_t attempt = 0; attempt < kNbAttempts && !failed; ++attempt) {
+    SCOPED_TRACE(testing::Message() << "Attempt: " << attempt << ".");
+    // We use a single event to get all threads to do the AppLaunch at the same
+    // time...
+    threads_waker_.Reset();
+
+    // Here we prime all the threads with a ChromeStarter that will wait for
+    // our signal to launch its chrome process.
+    for (size_t i = 0; i < kNbThreads; ++i) {
+      ASSERT_NE(static_cast<ChromeStarter*>(NULL), chrome_starters_[i].get());
+      chrome_starters_[i]->Reset();
+
+      ASSERT_TRUE(chrome_starter_threads_[i]->IsRunning());
+      ASSERT_NE(static_cast<MessageLoop*>(NULL),
+                chrome_starter_threads_[i]->message_loop());
+
+      chrome_starter_threads_[i]->message_loop()->PostTask(
+          FROM_HERE, NewRunnableMethod(chrome_starters_[i].get(),
+                                       &ChromeStarter::StartChrome,
+                                       &threads_waker_));
+    }
+
+    // Wait for all the starters to be ready.
+    // We could replace this loop if we ever implement a WaitAll().
+    for (size_t i = 0; i < kNbThreads; ++i) {
+      SCOPED_TRACE(testing::Message() << "Waiting on thread: " << i << ".");
+      ASSERT_TRUE(chrome_starters_[i]->ready_event_.Wait());
+    }
+    // GO!
+    threads_waker_.Signal();
+
+    // As we wait for all threads to signal that they are done, we remove their
+    // index from this vector so that we get left with only the index of
+    // the thread that started the main process.
+    std::vector<size_t> pending_starters(kNbThreads);
+    for (size_t i = 0; i < kNbThreads; ++i)
+      pending_starters[i] = i;
+
+    // We use a local array of starter's done events we must wait on...
+    // These are collected from the starters that we have not yet been removed
+    // from the pending_starters vector.
+    base::WaitableEvent* starters_done_events[kNbThreads];
+    // At the end, "There can be only one" main browser process alive.
+    while (pending_starters.size() > 1) {
+      SCOPED_TRACE(testing::Message() << pending_starters.size() <<
+                   " starters left.");
+      for (size_t i = 0; i < pending_starters.size(); ++i) {
+        starters_done_events[i] =
+            &chrome_starters_[pending_starters[i]]->done_event_;
+      }
+      size_t done_index = base::WaitableEvent::WaitMany(
+          starters_done_events, pending_starters.size());
+      size_t starter_index = pending_starters[done_index];
+      // If the starter is done but has not marked itself as terminated,
+      // it is because it timed out of its WaitForSingleProcess(). Only the
+      // last one standing should be left waiting... So we failed...
+      EXPECT_TRUE(chrome_starters_[starter_index]->process_terminated_ ||
+                  failed) << "There is more than one main process.";
+      if (!chrome_starters_[starter_index]->process_terminated_) {
+        // This will stop the "for kNbAttempts" loop.
+        failed = true;
+        // But we let the last loop turn finish so that we can properly
+        // kill all remaining processes. Starting with this one...
+        if (chrome_starters_[starter_index]->process_handle_ != NULL) {
+          KillProcessTree(chrome_starters_[starter_index]->process_handle_);
+        }
+      }
+      pending_starters.erase(pending_starters.begin() + done_index);
+    }
+
+    // "There can be only one!" :-)
+    ASSERT_EQ(static_cast<size_t>(1), pending_starters.size());
+    size_t last_index = pending_starters.front();
+    pending_starters.empty();
+    if (chrome_starters_[last_index]->process_handle_ != NULL) {
+      KillProcessTree(chrome_starters_[last_index]->process_handle_);
+      chrome_starters_[last_index]->done_event_.Wait();
+    }
+  }
+}
+
+}  // namespace
diff --git a/chrome/chrome_tests.gypi b/chrome/chrome_tests.gypi
index 537ab5c..b18ff4c 100644
--- a/chrome/chrome_tests.gypi
+++ b/chrome/chrome_tests.gypi
@@ -291,6 +291,7 @@
         'browser/pref_service_uitest.cc',
         'browser/printing/printing_layout_uitest.cc',
         'browser/process_singleton_linux_uitest.cc',
+        'browser/process_singleton_win_uitest.cc',
         'browser/renderer_host/resource_dispatcher_host_uitest.cc',
         'browser/sanity_uitest.cc',
         'browser/session_history_uitest.cc',
@@ -391,6 +392,9 @@
             'browser/extensions/extension_uitest.cc',
             'browser/media_uitest.cc',
             'browser/printing/printing_layout_uitest.cc',
+            # TODO(port)? (this one compiles fine on mac and linux, but it fails
+            # to LaunchApp and thus have not been tested for success either).
+            'browser/process_singleton_win_uitest.cc',
             'browser/views/find_bar_host_uitest.cc',
             'common/logging_chrome_uitest.cc',
             'test/ui/sandbox_uitests.cc',
author	mad@chromium.org <mad@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2010-03-04 16:16:19 +0000
committer	mad@chromium.org <mad@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2010-03-04 16:16:19 +0000
commit	bbef41f026b0c7aecac8169e40020b892163fd08 (patch)
tree	6a1e7fbd24b55e871cf776e4ada46b95a80bfddb
parent	40b01df66eca8855d00a65baf27bd6bfd16454f6 (diff)
download	chromium_src-bbef41f026b0c7aecac8169e40020b892163fd08.zip chromium_src-bbef41f026b0c7aecac8169e40020b892163fd08.tar.gz chromium_src-bbef41f026b0c7aecac8169e40020b892163fd08.tar.bz2