diff options
author | fdoray <fdoray@chromium.org> | 2015-10-14 20:46:40 -0700 |
---|---|---|
committer | Commit bot <commit-bot@chromium.org> | 2015-10-15 03:47:30 +0000 |
commit | dc2a659b0ad54c3dcb632b876e40a71fa03019e9 (patch) | |
tree | e3f3a87c7e8a03716389609435214f0be6bd87bf /base/time | |
parent | 5d49f7978f42d05431b8ed851050ce0687938512 (diff) | |
download | chromium_src-dc2a659b0ad54c3dcb632b876e40a71fa03019e9.zip chromium_src-dc2a659b0ad54c3dcb632b876e40a71fa03019e9.tar.gz chromium_src-dc2a659b0ad54c3dcb632b876e40a71fa03019e9.tar.bz2 |
Implement ThreadTicks::Now on Windows.
Use QueryThreadCycleTime() to get the number of CPU clock cycles used
by the current thread. Convert it to microseconds using a measured
TSC frequency.
The value returned by QueryThreadCycleTime() is based on the rdtsc
instruction. For several years, Intel has been shipping CPUs with a
constant-rate counter, which means that the QueryThreadCycleTime()
results are directly proportional to wall-clock time on most systems
(see crbug.com/280743#c15). ThreadTicks::IsSupported() will return
false if the CPU doesn't have a constant rate TSC.
BUG=280743
CQ_INCLUDE_TRYBOTS=tryserver.blink:linux_blink_rel
Review URL: https://codereview.chromium.org/1390743002
Cr-Commit-Position: refs/heads/master@{#354213}
Diffstat (limited to 'base/time')
-rw-r--r-- | base/time/time.h | 29 | ||||
-rw-r--r-- | base/time/time_unittest.cc | 1 | ||||
-rw-r--r-- | base/time/time_win.cc | 114 | ||||
-rw-r--r-- | base/time/time_win_unittest.cc | 57 |
4 files changed, 178 insertions, 23 deletions
diff --git a/base/time/time.h b/base/time/time.h index e0435d0..8da08ff 100644 --- a/base/time/time.h +++ b/base/time/time.h @@ -79,6 +79,8 @@ // For FILETIME in FromFileTime, until it moves to a new converter class. // See TODO(iyengar) below. #include <windows.h> + +#include "base/gtest_prod_util.h" #endif #include <limits> @@ -733,16 +735,28 @@ class BASE_EXPORT ThreadTicks : public time_internal::TimeBase<ThreadTicks> { #if (defined(_POSIX_THREAD_CPUTIME) && (_POSIX_THREAD_CPUTIME >= 0)) || \ (defined(OS_MACOSX) && !defined(OS_IOS)) || defined(OS_ANDROID) return true; +#elif defined(OS_WIN) + return IsSupportedWin(); #else return false; #endif } + // Waits until the initialization is completed. Needs to be guarded with a + // call to IsSupported(). + static void WaitUntilInitialized() { +#if defined(OS_WIN) + WaitUntilInitializedWin(); +#endif + } + // Returns thread-specific CPU-time on systems that support this feature. // Needs to be guarded with a call to IsSupported(). Use this timer // to (approximately) measure how much time the calling thread spent doing // actual work vs. being de-scheduled. May return bogus results if the thread - // migrates to another CPU between two calls. + // migrates to another CPU between two calls. Returns an empty ThreadTicks + // object until the initialization is completed. If a clock reading is + // absolutely needed, call WaitUntilInitialized() before this method. static ThreadTicks Now(); private: @@ -752,6 +766,19 @@ class BASE_EXPORT ThreadTicks : public time_internal::TimeBase<ThreadTicks> { // and testing. explicit ThreadTicks(int64 us) : TimeBase(us) { } + +#if defined(OS_WIN) + FRIEND_TEST_ALL_PREFIXES(TimeTicks, TSCTicksPerSecond); + + // Returns the frequency of the TSC in ticks per second, or 0 if it hasn't + // been measured yet. Needs to be guarded with a call to IsSupported(). + // This method is declared here rather than in the anonymous namespace to + // allow testing. + static double TSCTicksPerSecond(); + + static bool IsSupportedWin(); + static void WaitUntilInitializedWin(); +#endif }; // For logging use only. diff --git a/base/time/time_unittest.cc b/base/time/time_unittest.cc index 512fc37..c8b403b 100644 --- a/base/time/time_unittest.cc +++ b/base/time/time_unittest.cc @@ -646,6 +646,7 @@ TEST(TimeTicks, HighRes) { #endif TEST(ThreadTicks, MAYBE_ThreadNow) { if (ThreadTicks::IsSupported()) { + ThreadTicks::WaitUntilInitialized(); TimeTicks begin = TimeTicks::Now(); ThreadTicks begin_thread = ThreadTicks::Now(); // Make sure that ThreadNow value is non-zero. diff --git a/base/time/time_win.cc b/base/time/time_win.cc index 45436807..cadf4b6 100644 --- a/base/time/time_win.cc +++ b/base/time/time_win.cc @@ -100,6 +100,26 @@ uint32_t g_high_res_timer_count = 0; base::LazyInstance<base::Lock>::Leaky g_high_res_lock = LAZY_INSTANCE_INITIALIZER; +// Returns a pointer to the QueryThreadCycleTime() function from Windows. +// Can't statically link to it because it is not available on XP. +using QueryThreadCycleTimePtr = decltype(::QueryThreadCycleTime)*; +QueryThreadCycleTimePtr GetQueryThreadCycleTimeFunction() { + static const QueryThreadCycleTimePtr query_thread_cycle_time_fn = + reinterpret_cast<QueryThreadCycleTimePtr>(::GetProcAddress( + ::GetModuleHandle(L"kernel32.dll"), "QueryThreadCycleTime")); + return query_thread_cycle_time_fn; +} + +// Returns the current value of the performance counter. +uint64 QPCNowRaw() { + LARGE_INTEGER perf_counter_now = {}; + // According to the MSDN documentation for QueryPerformanceCounter(), this + // will never fail on systems that run XP or later. + // https://msdn.microsoft.com/library/windows/desktop/ms644904.aspx + ::QueryPerformanceCounter(&perf_counter_now); + return perf_counter_now.QuadPart; +} + } // namespace // Time ----------------------------------------------------------------------- @@ -415,9 +435,7 @@ TimeDelta QPCValueToTimeDelta(LONGLONG qpc_value) { } TimeDelta QPCNow() { - LARGE_INTEGER now; - QueryPerformanceCounter(&now); - return QPCValueToTimeDelta(now.QuadPart); + return QPCValueToTimeDelta(QPCNowRaw()); } bool IsBuggyAthlon(const base::CPU& cpu) { @@ -504,8 +522,94 @@ bool TimeTicks::IsHighResolution() { // static ThreadTicks ThreadTicks::Now() { - NOTREACHED(); - return ThreadTicks(); + DCHECK(IsSupported()); + + // Get the number of TSC ticks used by the current thread. + ULONG64 thread_cycle_time = 0; + GetQueryThreadCycleTimeFunction()(::GetCurrentThread(), &thread_cycle_time); + + // Get the frequency of the TSC. + double tsc_ticks_per_second = TSCTicksPerSecond(); + if (tsc_ticks_per_second == 0) + return ThreadTicks(); + + // Return the CPU time of the current thread. + double thread_time_seconds = thread_cycle_time / tsc_ticks_per_second; + return ThreadTicks(static_cast<int64>( + thread_time_seconds * Time::kMicrosecondsPerSecond)); +} + +// static +bool ThreadTicks::IsSupportedWin() { + static bool is_supported = GetQueryThreadCycleTimeFunction() && + base::CPU().has_non_stop_time_stamp_counter() && + !IsBuggyAthlon(base::CPU()); + return is_supported; +} + +// static +void ThreadTicks::WaitUntilInitializedWin() { + while (TSCTicksPerSecond() == 0) + ::Sleep(10); +} + +double ThreadTicks::TSCTicksPerSecond() { + DCHECK(IsSupported()); + + // The value returned by QueryPerformanceFrequency() cannot be used as the TSC + // frequency, because there is no guarantee that the TSC frequency is equal to + // the performance counter frequency. + + // The TSC frequency is cached in a static variable because it takes some time + // to compute it. + static double tsc_ticks_per_second = 0; + if (tsc_ticks_per_second != 0) + return tsc_ticks_per_second; + + // Increase the thread priority to reduces the chances of having a context + // switch during a reading of the TSC and the performance counter. + int previous_priority = ::GetThreadPriority(::GetCurrentThread()); + ::SetThreadPriority(::GetCurrentThread(), THREAD_PRIORITY_HIGHEST); + + // The first time that this function is called, make an initial reading of the + // TSC and the performance counter. + static const uint64 tsc_initial = __rdtsc(); + static const uint64 perf_counter_initial = QPCNowRaw(); + + // Make a another reading of the TSC and the performance counter every time + // that this function is called. + uint64 tsc_now = __rdtsc(); + uint64 perf_counter_now = QPCNowRaw(); + + // Reset the thread priority. + ::SetThreadPriority(::GetCurrentThread(), previous_priority); + + // Make sure that at least 50 ms elapsed between the 2 readings. The first + // time that this function is called, we don't expect this to be the case. + // Note: The longer the elapsed time between the 2 readings is, the more + // accurate the computed TSC frequency will be. The 50 ms value was + // chosen because local benchmarks show that it allows us to get a + // stddev of less than 1 tick/us between multiple runs. + // Note: According to the MSDN documentation for QueryPerformanceFrequency(), + // this will never fail on systems that run XP or later. + // https://msdn.microsoft.com/library/windows/desktop/ms644905.aspx + LARGE_INTEGER perf_counter_frequency = {}; + ::QueryPerformanceFrequency(&perf_counter_frequency); + DCHECK_GE(perf_counter_now, perf_counter_initial); + uint64 perf_counter_ticks = perf_counter_now - perf_counter_initial; + double elapsed_time_seconds = + perf_counter_ticks / static_cast<double>(perf_counter_frequency.QuadPart); + + const double kMinimumEvaluationPeriodSeconds = 0.05; + if (elapsed_time_seconds < kMinimumEvaluationPeriodSeconds) + return 0; + + // Compute the frequency of the TSC. + DCHECK_GE(tsc_now, tsc_initial); + uint64 tsc_ticks = tsc_now - tsc_initial; + tsc_ticks_per_second = tsc_ticks / elapsed_time_seconds; + + return tsc_ticks_per_second; } // static diff --git a/base/time/time_win_unittest.cc b/base/time/time_win_unittest.cc index c894b68..6f8a9b7 100644 --- a/base/time/time_win_unittest.cc +++ b/base/time/time_win_unittest.cc @@ -12,13 +12,10 @@ #include "base/threading/platform_thread.h" #include "base/time/time.h" +#include "base/win/registry.h" #include "testing/gtest/include/gtest/gtest.h" -using base::Time; -using base::TimeDelta; -using base::TimeTicks; -using base::TraceTicks; - +namespace base { namespace { class MockTimeTicks : public TimeTicks { @@ -192,18 +189,22 @@ TEST(TimeTicks, TimerPerformance) { // in order to create a single test case list. COMPILE_ASSERT(sizeof(TimeTicks) == sizeof(Time), test_only_works_with_same_sizes); - TestCase cases[] = { - { reinterpret_cast<TestFunc>(&Time::Now), "Time::Now" }, - { &TimeTicks::Now, "TimeTicks::Now" }, - { reinterpret_cast<TestFunc>(&TraceTicks::Now), "TraceTicks::Now" }, - { NULL, "" } - }; + std::vector<TestCase> cases; + cases.push_back({reinterpret_cast<TestFunc>(&Time::Now), "Time::Now"}); + cases.push_back({&TimeTicks::Now, "TimeTicks::Now"}); + cases.push_back( + {reinterpret_cast<TestFunc>(&TraceTicks::Now), "TraceTicks::Now"}); + + if (ThreadTicks::IsSupported()) { + ThreadTicks::WaitUntilInitialized(); + cases.push_back( + {reinterpret_cast<TestFunc>(&ThreadTicks::Now), "ThreadTicks::Now"}); + } - int test_case = 0; - while (cases[test_case].func) { + for (const auto& test_case : cases) { TimeTicks start = TimeTicks::Now(); for (int index = 0; index < kLoops; index++) - cases[test_case].func(); + test_case.func(); TimeTicks stop = TimeTicks::Now(); // Turning off the check for acceptible delays. Without this check, // the test really doesn't do much other than measure. But the @@ -213,9 +214,29 @@ TEST(TimeTicks, TimerPerformance) { // slow, and there is really no value for checking against a max timer. //const int kMaxTime = 35; // Maximum acceptible milliseconds for test. //EXPECT_LT((stop - start).InMilliseconds(), kMaxTime); - printf("%s: %1.2fus per call\n", cases[test_case].description, - (stop - start).InMillisecondsF() * 1000 / kLoops); - test_case++; + printf("%s: %1.2fus per call\n", test_case.description, + (stop - start).InMillisecondsF() * 1000 / kLoops); + } +} + +TEST(TimeTicks, TSCTicksPerSecond) { + if (ThreadTicks::IsSupported()) { + ThreadTicks::WaitUntilInitialized(); + + // Read the CPU frequency from the registry. + base::win::RegKey processor_key( + HKEY_LOCAL_MACHINE, + L"Hardware\\Description\\System\\CentralProcessor\\0", KEY_QUERY_VALUE); + ASSERT_TRUE(processor_key.Valid()); + DWORD processor_mhz_from_registry; + ASSERT_EQ(ERROR_SUCCESS, + processor_key.ReadValueDW(L"~MHz", &processor_mhz_from_registry)); + + // Expect the measured TSC frequency to be similar to the processor + // frequency from the registry (0.5% error). + double tsc_mhz_measured = ThreadTicks::TSCTicksPerSecond() / 1e6; + EXPECT_NEAR(tsc_mhz_measured, processor_mhz_from_registry, + 0.005 * processor_mhz_from_registry); } } @@ -273,3 +294,5 @@ TEST(TimeTicks, FromQPCValue) { << (ticks < Time::kQPCOverflowThreshold ? "FAST" : "SAFE"); } } + +} // namespace base |