base: Add perf tests target and threading perf-test.

Some base utilities are inherently expensive. Part of this is the underyling operating system cost, but there is also some overhead in our own code. Even for operations we can't improve, it's nice to have visibility into their cost, to guide designs and optimizations. This patch adds a "base_perftests" target for all platforms, and adds tests for our tasks posting performance. base::WaitableEvent, ConditionVariable, and a minimal posix WaitableEvent are also tested for comparison. BUG=345471 Review URL: https://codereview.chromium.org/187833004 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@256197 0039d316-1c4b-4281-b951-d872f2087c98
author: epenner@chromium.org <epenner@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2014-03-11 11:22:10 +0000
committer: epenner@chromium.org <epenner@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2014-03-11 11:22:10 +0000
commit: a54ae59eaa5a46c78fceda6214cc11f9e573bc3c (patch)
tree: bafd931df2a5d10814cd95a8a26222983a0a3967 /base/threading
parent: 65a8738ea33cf2ed401f6a157cff3a6f349c66f0 (diff)
download: chromium_src-a54ae59eaa5a46c78fceda6214cc11f9e573bc3c.zip
chromium_src-a54ae59eaa5a46c78fceda6214cc11f9e573bc3c.tar.gz
chromium_src-a54ae59eaa5a46c78fceda6214cc11f9e573bc3c.tar.bz2
1 files changed, 288 insertions, 0 deletions
diff --git a/base/threading/thread_perftest.cc b/base/threading/thread_perftest.cc
new file mode 100644
index 0000000..b525722
--- /dev/null
+++ b/base/threading/thread_perftest.cc
@@ -0,0 +1,288 @@
+// Copyright 2014 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "base/base_switches.h"
+#include "base/bind.h"
+#include "base/command_line.h"
+#include "base/memory/scoped_vector.h"
+#include "base/synchronization/condition_variable.h"
+#include "base/synchronization/lock.h"
+#include "base/synchronization/waitable_event.h"
+#include "base/threading/thread.h"
+#include "base/time/time.h"
+#include "build/build_config.h"
+#include "testing/gtest/include/gtest/gtest.h"
+#include "testing/perf/perf_test.h"
+
+#if defined(OS_POSIX)
+#include <pthread.h>
+#endif
+
+namespace base {
+
+namespace {
+
+const int kNumRuns = 100000;
+
+// Base class for a threading perf-test. This sets up some threads for the
+// test and measures the clock-time in addition to time spent on each thread.
+class ThreadPerfTest : public testing::Test {
+ public:
+  ThreadPerfTest()
+      : done_(false, false) {
+    // Disable the task profiler as it adds significant cost!
+    CommandLine::Init(0, NULL);
+    CommandLine::ForCurrentProcess()->AppendSwitchASCII(
+        switches::kProfilerTiming,
+        switches::kProfilerTimingDisabledValue);
+  }
+
+  // To be implemented by each test. Subclass must uses threads_ such that
+  // their cpu-time can be measured. Test must return from PingPong() _and_
+  // call FinishMeasurement from any thread to complete the test.
+  virtual void Init() {}
+  virtual void PingPong(int hops) = 0;
+  virtual void Reset() {}
+
+  void TimeOnThread(base::TimeTicks* ticks, base::WaitableEvent* done) {
+    *ticks = base::TimeTicks::ThreadNow();
+    done->Signal();
+  }
+
+  base::TimeTicks ThreadNow(base::Thread* thread) {
+    base::WaitableEvent done(false, false);
+    base::TimeTicks ticks;
+    thread->message_loop_proxy()->PostTask(
+        FROM_HERE,
+        base::Bind(&ThreadPerfTest::TimeOnThread,
+                   base::Unretained(this),
+                   &ticks,
+                   &done));
+    done.Wait();
+    return ticks;
+  }
+
+  void RunPingPongTest(const std::string& name, unsigned num_threads) {
+    // Create threads and collect starting cpu-time for each thread.
+    std::vector<base::TimeTicks> thread_starts;
+    while (threads_.size() < num_threads) {
+      threads_.push_back(new base::Thread("PingPonger"));
+      threads_.back()->Start();
+      if (base::TimeTicks::IsThreadNowSupported())
+        thread_starts.push_back(ThreadNow(threads_.back()));
+    }
+
+    Init();
+
+    base::TimeTicks start = base::TimeTicks::HighResNow();
+    PingPong(kNumRuns);
+    done_.Wait();
+    base::TimeTicks end = base::TimeTicks::HighResNow();
+
+    // Gather the cpu-time spent on each thread. This does one extra tasks,
+    // but that should be in the noise given enough runs.
+    base::TimeDelta thread_time;
+    while (threads_.size()) {
+      if (base::TimeTicks::IsThreadNowSupported()) {
+        thread_time += ThreadNow(threads_.back()) - thread_starts.back();
+        thread_starts.pop_back();
+      }
+      threads_.pop_back();
+    }
+
+    Reset();
+
+    double num_runs = static_cast<double>(kNumRuns);
+    double us_per_task_clock = (end - start).InMicroseconds() / num_runs;
+    double us_per_task_cpu = thread_time.InMicroseconds() / num_runs;
+
+    // Clock time per task.
+    perf_test::PrintResult(
+        "task", "", name + "_time ", us_per_task_clock, "us/hop", true);
+
+    // Total utilization across threads if available (likely higher).
+    if (base::TimeTicks::IsThreadNowSupported()) {
+      perf_test::PrintResult(
+          "task", "", name + "_cpu ", us_per_task_cpu, "us/hop", true);
+    }
+  }
+
+ protected:
+  void FinishMeasurement() { done_.Signal(); }
+  ScopedVector<base::Thread> threads_;
+
+ private:
+  base::WaitableEvent done_;
+};
+
+// Class to test task performance by posting empty tasks back and forth.
+class TaskPerfTest : public ThreadPerfTest {
+  base::Thread* NextThread(int count) {
+    return threads_[count % threads_.size()];
+  }
+
+  virtual void PingPong(int hops) OVERRIDE {
+    if (!hops) {
+      FinishMeasurement();
+      return;
+    }
+    NextThread(hops)->message_loop_proxy()->PostTask(
+        FROM_HERE,
+        base::Bind(
+            &ThreadPerfTest::PingPong, base::Unretained(this), hops - 1));
+  }
+};
+
+// This tries to test the 'best-case' as well as the 'worst-case' task posting
+// performance. The best-case keeps one thread alive such that it never yeilds,
+// while the worse-case forces a context switch for every task. Four threads are
+// used to ensure the threads do yeild (with just two it might be possible for
+// both threads to stay awake if they can signal each other fast enough).
+TEST_F(TaskPerfTest, TaskPingPong) {
+  RunPingPongTest("1_Task_Threads", 1);
+  RunPingPongTest("4_Task_Threads", 4);
+}
+
+// Class to test our WaitableEvent performance by signaling back and fort.
+// WaitableEvent is templated so we can also compare with other versions.
+template <typename WaitableEventType>
+class EventPerfTest : public ThreadPerfTest {
+ public:
+  virtual void Init() OVERRIDE {
+    for (size_t i = 0; i < threads_.size(); i++)
+      events_.push_back(new WaitableEventType(false, false));
+  }
+
+  virtual void Reset() OVERRIDE { events_.clear(); }
+
+  void WaitAndSignalOnThread(size_t event) {
+    int next_event = (event + 1) % events_.size();
+    int my_hops = 0;
+    do {
+      events_[event]->Wait();
+      my_hops = --remaining_hops_;  // We own 'hops' between Wait and Signal.
+      events_[next_event]->Signal();
+    } while (my_hops > 0);
+    // Once we are done, all threads will signal as hops passes zero.
+    // We only signal completion once, on the thread that reaches zero.
+    if (!my_hops)
+      FinishMeasurement();
+  }
+
+  virtual void PingPong(int hops) OVERRIDE {
+    remaining_hops_ = hops;
+    for (size_t i = 0; i < threads_.size(); i++) {
+      threads_[i]->message_loop_proxy()->PostTask(
+          FROM_HERE,
+          base::Bind(&EventPerfTest::WaitAndSignalOnThread,
+                     base::Unretained(this),
+                     i));
+    }
+
+    // Kick off the Signal ping-ponging.
+    events_.front()->Signal();
+  }
+
+  int remaining_hops_;
+  ScopedVector<WaitableEventType> events_;
+};
+
+// Similar to the task posting test, this just tests similar functionality
+// using WaitableEvents. We only test four threads (worst-case), but we
+// might want to craft a way to test the best-case (where the thread doesn't
+// end up blocking because the event is already signalled).
+typedef EventPerfTest<base::WaitableEvent> WaitableEventPerfTest;
+TEST_F(WaitableEventPerfTest, EventPingPong) {
+  RunPingPongTest("4_WaitableEvent_Threads", 4);
+}
+
+// Build a minimal event using ConditionVariable.
+class ConditionVariableEvent {
+ public:
+  ConditionVariableEvent(bool manual_reset, bool initially_signaled)
+      : cond_(&lock_), signaled_(false) {
+    DCHECK(!manual_reset);
+    DCHECK(!initially_signaled);
+  }
+
+  void Signal() {
+    {
+      base::AutoLock scoped_lock(lock_);
+      signaled_ = true;
+    }
+    cond_.Signal();
+  }
+
+  void Wait() {
+    base::AutoLock scoped_lock(lock_);
+    while (!signaled_)
+      cond_.Wait();
+    signaled_ = false;
+  }
+
+ private:
+  base::Lock lock_;
+  base::ConditionVariable cond_;
+  bool signaled_;
+};
+
+// This is meant to test the absolute minimal context switching time
+// using our own base synchronization code.
+typedef EventPerfTest<ConditionVariableEvent> ConditionVariablePerfTest;
+TEST_F(ConditionVariablePerfTest, EventPingPong) {
+  RunPingPongTest("4_ConditionVariable_Threads", 4);
+}
+
+#if defined(OS_POSIX)
+
+// Absolutely 100% minimal posix waitable event. If there is a better/faster
+// way to force a context switch, we should use that instead.
+class PthreadEvent {
+ public:
+  PthreadEvent(bool manual_reset, bool initially_signaled) {
+    DCHECK(!manual_reset);
+    DCHECK(!initially_signaled);
+    pthread_mutex_init(&mutex_, 0);
+    pthread_cond_init(&cond_, 0);
+    signaled_ = false;
+  }
+
+  ~PthreadEvent() {
+    pthread_cond_destroy(&cond_);
+    pthread_mutex_destroy(&mutex_);
+  }
+
+  void Signal() {
+    pthread_mutex_lock(&mutex_);
+    signaled_ = true;
+    pthread_mutex_unlock(&mutex_);
+    pthread_cond_signal(&cond_);
+  }
+
+  void Wait() {
+    pthread_mutex_lock(&mutex_);
+    while (!signaled_)
+      pthread_cond_wait(&cond_, &mutex_);
+    signaled_ = false;
+    pthread_mutex_unlock(&mutex_);
+  }
+
+ private:
+  bool signaled_;
+  pthread_mutex_t mutex_;
+  pthread_cond_t cond_;
+};
+
+// This is meant to test the absolute minimal context switching time.
+// If there is any faster way to do this we should substitute it in.
+typedef EventPerfTest<PthreadEvent> PthreadEventPerfTest;
+TEST_F(PthreadEventPerfTest, EventPingPong) {
+  RunPingPongTest("4_PthreadCondVar_Threads", 4);
+}
+
+#endif
+
+}  // namespace
+
+}  // namespace base
author	epenner@chromium.org <epenner@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2014-03-11 11:22:10 +0000
committer	epenner@chromium.org <epenner@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2014-03-11 11:22:10 +0000
commit	a54ae59eaa5a46c78fceda6214cc11f9e573bc3c (patch)
tree	bafd931df2a5d10814cd95a8a26222983a0a3967 /base/threading
parent	65a8738ea33cf2ed401f6a157cff3a6f349c66f0 (diff)
download	chromium_src-a54ae59eaa5a46c78fceda6214cc11f9e573bc3c.zip chromium_src-a54ae59eaa5a46c78fceda6214cc11f9e573bc3c.tar.gz chromium_src-a54ae59eaa5a46c78fceda6214cc11f9e573bc3c.tar.bz2