summaryrefslogtreecommitdiffstats
path: root/base/time
diff options
context:
space:
mode:
authorfdoray <fdoray@chromium.org>2015-10-14 20:46:40 -0700
committerCommit bot <commit-bot@chromium.org>2015-10-15 03:47:30 +0000
commitdc2a659b0ad54c3dcb632b876e40a71fa03019e9 (patch)
treee3f3a87c7e8a03716389609435214f0be6bd87bf /base/time
parent5d49f7978f42d05431b8ed851050ce0687938512 (diff)
downloadchromium_src-dc2a659b0ad54c3dcb632b876e40a71fa03019e9.zip
chromium_src-dc2a659b0ad54c3dcb632b876e40a71fa03019e9.tar.gz
chromium_src-dc2a659b0ad54c3dcb632b876e40a71fa03019e9.tar.bz2
Implement ThreadTicks::Now on Windows.
Use QueryThreadCycleTime() to get the number of CPU clock cycles used by the current thread. Convert it to microseconds using a measured TSC frequency. The value returned by QueryThreadCycleTime() is based on the rdtsc instruction. For several years, Intel has been shipping CPUs with a constant-rate counter, which means that the QueryThreadCycleTime() results are directly proportional to wall-clock time on most systems (see crbug.com/280743#c15). ThreadTicks::IsSupported() will return false if the CPU doesn't have a constant rate TSC. BUG=280743 CQ_INCLUDE_TRYBOTS=tryserver.blink:linux_blink_rel Review URL: https://codereview.chromium.org/1390743002 Cr-Commit-Position: refs/heads/master@{#354213}
Diffstat (limited to 'base/time')
-rw-r--r--base/time/time.h29
-rw-r--r--base/time/time_unittest.cc1
-rw-r--r--base/time/time_win.cc114
-rw-r--r--base/time/time_win_unittest.cc57
4 files changed, 178 insertions, 23 deletions
diff --git a/base/time/time.h b/base/time/time.h
index e0435d0..8da08ff 100644
--- a/base/time/time.h
+++ b/base/time/time.h
@@ -79,6 +79,8 @@
// For FILETIME in FromFileTime, until it moves to a new converter class.
// See TODO(iyengar) below.
#include <windows.h>
+
+#include "base/gtest_prod_util.h"
#endif
#include <limits>
@@ -733,16 +735,28 @@ class BASE_EXPORT ThreadTicks : public time_internal::TimeBase<ThreadTicks> {
#if (defined(_POSIX_THREAD_CPUTIME) && (_POSIX_THREAD_CPUTIME >= 0)) || \
(defined(OS_MACOSX) && !defined(OS_IOS)) || defined(OS_ANDROID)
return true;
+#elif defined(OS_WIN)
+ return IsSupportedWin();
#else
return false;
#endif
}
+ // Waits until the initialization is completed. Needs to be guarded with a
+ // call to IsSupported().
+ static void WaitUntilInitialized() {
+#if defined(OS_WIN)
+ WaitUntilInitializedWin();
+#endif
+ }
+
// Returns thread-specific CPU-time on systems that support this feature.
// Needs to be guarded with a call to IsSupported(). Use this timer
// to (approximately) measure how much time the calling thread spent doing
// actual work vs. being de-scheduled. May return bogus results if the thread
- // migrates to another CPU between two calls.
+ // migrates to another CPU between two calls. Returns an empty ThreadTicks
+ // object until the initialization is completed. If a clock reading is
+ // absolutely needed, call WaitUntilInitialized() before this method.
static ThreadTicks Now();
private:
@@ -752,6 +766,19 @@ class BASE_EXPORT ThreadTicks : public time_internal::TimeBase<ThreadTicks> {
// and testing.
explicit ThreadTicks(int64 us) : TimeBase(us) {
}
+
+#if defined(OS_WIN)
+ FRIEND_TEST_ALL_PREFIXES(TimeTicks, TSCTicksPerSecond);
+
+ // Returns the frequency of the TSC in ticks per second, or 0 if it hasn't
+ // been measured yet. Needs to be guarded with a call to IsSupported().
+ // This method is declared here rather than in the anonymous namespace to
+ // allow testing.
+ static double TSCTicksPerSecond();
+
+ static bool IsSupportedWin();
+ static void WaitUntilInitializedWin();
+#endif
};
// For logging use only.
diff --git a/base/time/time_unittest.cc b/base/time/time_unittest.cc
index 512fc37..c8b403b 100644
--- a/base/time/time_unittest.cc
+++ b/base/time/time_unittest.cc
@@ -646,6 +646,7 @@ TEST(TimeTicks, HighRes) {
#endif
TEST(ThreadTicks, MAYBE_ThreadNow) {
if (ThreadTicks::IsSupported()) {
+ ThreadTicks::WaitUntilInitialized();
TimeTicks begin = TimeTicks::Now();
ThreadTicks begin_thread = ThreadTicks::Now();
// Make sure that ThreadNow value is non-zero.
diff --git a/base/time/time_win.cc b/base/time/time_win.cc
index 45436807..cadf4b6 100644
--- a/base/time/time_win.cc
+++ b/base/time/time_win.cc
@@ -100,6 +100,26 @@ uint32_t g_high_res_timer_count = 0;
base::LazyInstance<base::Lock>::Leaky g_high_res_lock =
LAZY_INSTANCE_INITIALIZER;
+// Returns a pointer to the QueryThreadCycleTime() function from Windows.
+// Can't statically link to it because it is not available on XP.
+using QueryThreadCycleTimePtr = decltype(::QueryThreadCycleTime)*;
+QueryThreadCycleTimePtr GetQueryThreadCycleTimeFunction() {
+ static const QueryThreadCycleTimePtr query_thread_cycle_time_fn =
+ reinterpret_cast<QueryThreadCycleTimePtr>(::GetProcAddress(
+ ::GetModuleHandle(L"kernel32.dll"), "QueryThreadCycleTime"));
+ return query_thread_cycle_time_fn;
+}
+
+// Returns the current value of the performance counter.
+uint64 QPCNowRaw() {
+ LARGE_INTEGER perf_counter_now = {};
+ // According to the MSDN documentation for QueryPerformanceCounter(), this
+ // will never fail on systems that run XP or later.
+ // https://msdn.microsoft.com/library/windows/desktop/ms644904.aspx
+ ::QueryPerformanceCounter(&perf_counter_now);
+ return perf_counter_now.QuadPart;
+}
+
} // namespace
// Time -----------------------------------------------------------------------
@@ -415,9 +435,7 @@ TimeDelta QPCValueToTimeDelta(LONGLONG qpc_value) {
}
TimeDelta QPCNow() {
- LARGE_INTEGER now;
- QueryPerformanceCounter(&now);
- return QPCValueToTimeDelta(now.QuadPart);
+ return QPCValueToTimeDelta(QPCNowRaw());
}
bool IsBuggyAthlon(const base::CPU& cpu) {
@@ -504,8 +522,94 @@ bool TimeTicks::IsHighResolution() {
// static
ThreadTicks ThreadTicks::Now() {
- NOTREACHED();
- return ThreadTicks();
+ DCHECK(IsSupported());
+
+ // Get the number of TSC ticks used by the current thread.
+ ULONG64 thread_cycle_time = 0;
+ GetQueryThreadCycleTimeFunction()(::GetCurrentThread(), &thread_cycle_time);
+
+ // Get the frequency of the TSC.
+ double tsc_ticks_per_second = TSCTicksPerSecond();
+ if (tsc_ticks_per_second == 0)
+ return ThreadTicks();
+
+ // Return the CPU time of the current thread.
+ double thread_time_seconds = thread_cycle_time / tsc_ticks_per_second;
+ return ThreadTicks(static_cast<int64>(
+ thread_time_seconds * Time::kMicrosecondsPerSecond));
+}
+
+// static
+bool ThreadTicks::IsSupportedWin() {
+ static bool is_supported = GetQueryThreadCycleTimeFunction() &&
+ base::CPU().has_non_stop_time_stamp_counter() &&
+ !IsBuggyAthlon(base::CPU());
+ return is_supported;
+}
+
+// static
+void ThreadTicks::WaitUntilInitializedWin() {
+ while (TSCTicksPerSecond() == 0)
+ ::Sleep(10);
+}
+
+double ThreadTicks::TSCTicksPerSecond() {
+ DCHECK(IsSupported());
+
+ // The value returned by QueryPerformanceFrequency() cannot be used as the TSC
+ // frequency, because there is no guarantee that the TSC frequency is equal to
+ // the performance counter frequency.
+
+ // The TSC frequency is cached in a static variable because it takes some time
+ // to compute it.
+ static double tsc_ticks_per_second = 0;
+ if (tsc_ticks_per_second != 0)
+ return tsc_ticks_per_second;
+
+ // Increase the thread priority to reduces the chances of having a context
+ // switch during a reading of the TSC and the performance counter.
+ int previous_priority = ::GetThreadPriority(::GetCurrentThread());
+ ::SetThreadPriority(::GetCurrentThread(), THREAD_PRIORITY_HIGHEST);
+
+ // The first time that this function is called, make an initial reading of the
+ // TSC and the performance counter.
+ static const uint64 tsc_initial = __rdtsc();
+ static const uint64 perf_counter_initial = QPCNowRaw();
+
+ // Make a another reading of the TSC and the performance counter every time
+ // that this function is called.
+ uint64 tsc_now = __rdtsc();
+ uint64 perf_counter_now = QPCNowRaw();
+
+ // Reset the thread priority.
+ ::SetThreadPriority(::GetCurrentThread(), previous_priority);
+
+ // Make sure that at least 50 ms elapsed between the 2 readings. The first
+ // time that this function is called, we don't expect this to be the case.
+ // Note: The longer the elapsed time between the 2 readings is, the more
+ // accurate the computed TSC frequency will be. The 50 ms value was
+ // chosen because local benchmarks show that it allows us to get a
+ // stddev of less than 1 tick/us between multiple runs.
+ // Note: According to the MSDN documentation for QueryPerformanceFrequency(),
+ // this will never fail on systems that run XP or later.
+ // https://msdn.microsoft.com/library/windows/desktop/ms644905.aspx
+ LARGE_INTEGER perf_counter_frequency = {};
+ ::QueryPerformanceFrequency(&perf_counter_frequency);
+ DCHECK_GE(perf_counter_now, perf_counter_initial);
+ uint64 perf_counter_ticks = perf_counter_now - perf_counter_initial;
+ double elapsed_time_seconds =
+ perf_counter_ticks / static_cast<double>(perf_counter_frequency.QuadPart);
+
+ const double kMinimumEvaluationPeriodSeconds = 0.05;
+ if (elapsed_time_seconds < kMinimumEvaluationPeriodSeconds)
+ return 0;
+
+ // Compute the frequency of the TSC.
+ DCHECK_GE(tsc_now, tsc_initial);
+ uint64 tsc_ticks = tsc_now - tsc_initial;
+ tsc_ticks_per_second = tsc_ticks / elapsed_time_seconds;
+
+ return tsc_ticks_per_second;
}
// static
diff --git a/base/time/time_win_unittest.cc b/base/time/time_win_unittest.cc
index c894b68..6f8a9b7 100644
--- a/base/time/time_win_unittest.cc
+++ b/base/time/time_win_unittest.cc
@@ -12,13 +12,10 @@
#include "base/threading/platform_thread.h"
#include "base/time/time.h"
+#include "base/win/registry.h"
#include "testing/gtest/include/gtest/gtest.h"
-using base::Time;
-using base::TimeDelta;
-using base::TimeTicks;
-using base::TraceTicks;
-
+namespace base {
namespace {
class MockTimeTicks : public TimeTicks {
@@ -192,18 +189,22 @@ TEST(TimeTicks, TimerPerformance) {
// in order to create a single test case list.
COMPILE_ASSERT(sizeof(TimeTicks) == sizeof(Time),
test_only_works_with_same_sizes);
- TestCase cases[] = {
- { reinterpret_cast<TestFunc>(&Time::Now), "Time::Now" },
- { &TimeTicks::Now, "TimeTicks::Now" },
- { reinterpret_cast<TestFunc>(&TraceTicks::Now), "TraceTicks::Now" },
- { NULL, "" }
- };
+ std::vector<TestCase> cases;
+ cases.push_back({reinterpret_cast<TestFunc>(&Time::Now), "Time::Now"});
+ cases.push_back({&TimeTicks::Now, "TimeTicks::Now"});
+ cases.push_back(
+ {reinterpret_cast<TestFunc>(&TraceTicks::Now), "TraceTicks::Now"});
+
+ if (ThreadTicks::IsSupported()) {
+ ThreadTicks::WaitUntilInitialized();
+ cases.push_back(
+ {reinterpret_cast<TestFunc>(&ThreadTicks::Now), "ThreadTicks::Now"});
+ }
- int test_case = 0;
- while (cases[test_case].func) {
+ for (const auto& test_case : cases) {
TimeTicks start = TimeTicks::Now();
for (int index = 0; index < kLoops; index++)
- cases[test_case].func();
+ test_case.func();
TimeTicks stop = TimeTicks::Now();
// Turning off the check for acceptible delays. Without this check,
// the test really doesn't do much other than measure. But the
@@ -213,9 +214,29 @@ TEST(TimeTicks, TimerPerformance) {
// slow, and there is really no value for checking against a max timer.
//const int kMaxTime = 35; // Maximum acceptible milliseconds for test.
//EXPECT_LT((stop - start).InMilliseconds(), kMaxTime);
- printf("%s: %1.2fus per call\n", cases[test_case].description,
- (stop - start).InMillisecondsF() * 1000 / kLoops);
- test_case++;
+ printf("%s: %1.2fus per call\n", test_case.description,
+ (stop - start).InMillisecondsF() * 1000 / kLoops);
+ }
+}
+
+TEST(TimeTicks, TSCTicksPerSecond) {
+ if (ThreadTicks::IsSupported()) {
+ ThreadTicks::WaitUntilInitialized();
+
+ // Read the CPU frequency from the registry.
+ base::win::RegKey processor_key(
+ HKEY_LOCAL_MACHINE,
+ L"Hardware\\Description\\System\\CentralProcessor\\0", KEY_QUERY_VALUE);
+ ASSERT_TRUE(processor_key.Valid());
+ DWORD processor_mhz_from_registry;
+ ASSERT_EQ(ERROR_SUCCESS,
+ processor_key.ReadValueDW(L"~MHz", &processor_mhz_from_registry));
+
+ // Expect the measured TSC frequency to be similar to the processor
+ // frequency from the registry (0.5% error).
+ double tsc_mhz_measured = ThreadTicks::TSCTicksPerSecond() / 1e6;
+ EXPECT_NEAR(tsc_mhz_measured, processor_mhz_from_registry,
+ 0.005 * processor_mhz_from_registry);
}
}
@@ -273,3 +294,5 @@ TEST(TimeTicks, FromQPCValue) {
<< (ticks < Time::kQPCOverflowThreshold ? "FAST" : "SAFE");
}
}
+
+} // namespace base