diff options
author | siggi@chromium.org <siggi@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2011-12-07 21:44:28 +0000 |
---|---|---|
committer | siggi@chromium.org <siggi@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2011-12-07 21:44:28 +0000 |
commit | da00f0032bbce4026ab4f7c3d2336ea99833ac4c (patch) | |
tree | aef10a5e61081c996551bccd60a4542d9838562e /base/win | |
parent | d7f7524bb07b7789490e2c5532c139f81b69578f (diff) | |
download | chromium_src-da00f0032bbce4026ab4f7c3d2336ea99833ac4c.zip chromium_src-da00f0032bbce4026ab4f7c3d2336ea99833ac4c.tar.gz chromium_src-da00f0032bbce4026ab4f7c3d2336ea99833ac4c.tar.bz2 |
Windows-native sampling profiler wrapper class.
Review URL: http://codereview.chromium.org/8803022
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@113473 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'base/win')
-rw-r--r-- | base/win/sampling_profiler.cc | 233 | ||||
-rw-r--r-- | base/win/sampling_profiler.h | 74 | ||||
-rw-r--r-- | base/win/sampling_profiler_unittest.cc | 116 |
3 files changed, 423 insertions, 0 deletions
diff --git a/base/win/sampling_profiler.cc b/base/win/sampling_profiler.cc new file mode 100644 index 0000000..d1f6fbc --- /dev/null +++ b/base/win/sampling_profiler.cc @@ -0,0 +1,233 @@ +// Copyright (c) 2011 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "base/win/sampling_profiler.h" + +#include <winternl.h> // for NTSTATUS. + +#include "base/lazy_instance.h" + +// Copied from wdm.h in the WDK as we don't want to take +// a dependency on the WDK. +typedef enum _KPROFILE_SOURCE { + ProfileTime, + ProfileAlignmentFixup, + ProfileTotalIssues, + ProfilePipelineDry, + ProfileLoadInstructions, + ProfilePipelineFrozen, + ProfileBranchInstructions, + ProfileTotalNonissues, + ProfileDcacheMisses, + ProfileIcacheMisses, + ProfileCacheMisses, + ProfileBranchMispredictions, + ProfileStoreInstructions, + ProfileFpInstructions, + ProfileIntegerInstructions, + Profile2Issue, + Profile3Issue, + Profile4Issue, + ProfileSpecialInstructions, + ProfileTotalCycles, + ProfileIcacheIssues, + ProfileDcacheAccesses, + ProfileMemoryBarrierCycles, + ProfileLoadLinkedIssues, + ProfileMaximum +} KPROFILE_SOURCE; + + +namespace { + +// Signatures for the native functions we need to access the sampling profiler. +typedef NTSTATUS (NTAPI *ZwSetIntervalProfileFunc)(ULONG, KPROFILE_SOURCE); +typedef NTSTATUS (NTAPI *ZwQueryIntervalProfileFunc)(KPROFILE_SOURCE, PULONG); + +typedef NTSTATUS (NTAPI *ZwCreateProfileFunc)(PHANDLE profile, + HANDLE process, + PVOID code_start, + ULONG code_size, + ULONG eip_bucket_shift, + PULONG buckets, + ULONG buckets_byte_size, + KPROFILE_SOURCE source, + DWORD_PTR processor_mask); + +typedef NTSTATUS (NTAPI *ZwStartProfileFunc)(HANDLE); +typedef NTSTATUS (NTAPI *ZwStopProfileFunc)(HANDLE); + +// This class is used to lazy-initialize pointers to the native +// functions we need to access. +class ProfilerFuncs { + public: + ProfilerFuncs(); + + ZwSetIntervalProfileFunc ZwSetIntervalProfile; + ZwQueryIntervalProfileFunc ZwQueryIntervalProfile; + ZwCreateProfileFunc ZwCreateProfile; + ZwStartProfileFunc ZwStartProfile; + ZwStopProfileFunc ZwStopProfile; + + // True iff all of the function pointers above were successfully initialized. + bool initialized_; +}; + +ProfilerFuncs::ProfilerFuncs() : initialized_(false) { + HMODULE ntdll = ::GetModuleHandle(L"ntdll.dll"); + if (ntdll != NULL) { + ZwSetIntervalProfile = reinterpret_cast<ZwSetIntervalProfileFunc>( + ::GetProcAddress(ntdll, "ZwSetIntervalProfile")); + ZwQueryIntervalProfile = reinterpret_cast<ZwQueryIntervalProfileFunc>( + ::GetProcAddress(ntdll, "ZwQueryIntervalProfile")); + ZwCreateProfile = reinterpret_cast<ZwCreateProfileFunc>( + ::GetProcAddress(ntdll, "ZwCreateProfile")); + ZwStartProfile = reinterpret_cast<ZwStartProfileFunc>( + ::GetProcAddress(ntdll, "ZwStartProfile")); + ZwStopProfile = reinterpret_cast<ZwStopProfileFunc>( + ::GetProcAddress(ntdll, "ZwStopProfile")); + + if (ZwSetIntervalProfile && + ZwQueryIntervalProfile && + ZwCreateProfile && + ZwStartProfile && + ZwStopProfile) { + initialized_ = true; + } + } +} + +base::LazyInstance<ProfilerFuncs, base::LeakyLazyInstanceTraits<ProfilerFuncs>> + funcs = LAZY_INSTANCE_INITIALIZER; + +} // namespace + + +namespace base { +namespace win { + +SamplingProfiler::SamplingProfiler() : is_started_(false) { +} + +SamplingProfiler::~SamplingProfiler() { + if (is_started_) { + CHECK(Stop()) << + "Unable to stop sampling profiler, this will cause memory corruption."; + } +} + +bool SamplingProfiler::Initialize(HANDLE process, + void* start, + size_t size, + size_t log2_bucket_size) { + // You only get to initialize each instance once. + DCHECK(!profile_handle_.IsValid()); + DCHECK(!is_started_); + DCHECK(start != NULL); + DCHECK_NE(0U, size); + DCHECK_LE(2, log2_bucket_size); + DCHECK_GE(32, log2_bucket_size); + + // Bail if the native functions weren't found. + if (!funcs.Get().initialized_) + return false; + + size_t bucket_size = 1 << log2_bucket_size; + size_t num_buckets = (size + bucket_size - 1) / bucket_size; + DCHECK(num_buckets != 0); + buckets_.resize(num_buckets); + + // Get our affinity mask for the call below. + DWORD_PTR process_affinity = 0; + DWORD_PTR system_affinity = 0; + if (!::GetProcessAffinityMask(process, &process_affinity, &system_affinity)) { + LOG(ERROR) << "Failed to get process affinity mask."; + return false; + } + + HANDLE profile = NULL; + NTSTATUS status = + funcs.Get().ZwCreateProfile(&profile, + process, + start, + static_cast<ULONG>(size), + static_cast<ULONG>(log2_bucket_size), + &buckets_[0], + static_cast<ULONG>( + sizeof(buckets_[0]) * num_buckets), + ProfileTime, + process_affinity); + + if (!NT_SUCCESS(status)) { + // Might as well deallocate the buckets. + buckets_.resize(0); + LOG(ERROR) << "Failed to create profile, error 0x" << std::hex << status; + return false; + } + + DCHECK(profile != NULL); + profile_handle_.Set(profile); + + return true; +} + +bool SamplingProfiler::Start() { + DCHECK(profile_handle_.IsValid()); + DCHECK(!is_started_); + DCHECK(funcs.Get().initialized_); + + NTSTATUS status = funcs.Get().ZwStartProfile(profile_handle_.Get()); + if (!NT_SUCCESS(status)) + return false; + + is_started_ = true; + + return true; +} + +bool SamplingProfiler::Stop() { + DCHECK(profile_handle_.IsValid()); + DCHECK(is_started_); + DCHECK(funcs.Get().initialized_); + + NTSTATUS status = funcs.Get().ZwStopProfile(profile_handle_.Get()); + if (!NT_SUCCESS(status)) + return false; + is_started_ = false; + + return true; +} + +bool SamplingProfiler::SetSamplingInterval(base::TimeDelta sampling_interval) { + if (!funcs.Get().initialized_) + return false; + + // According to Nebbet, the sampling interval is in units of 100ns. + ULONG interval = sampling_interval.InMicroseconds() * 10; + NTSTATUS status = funcs.Get().ZwSetIntervalProfile(interval, ProfileTime); + if (!NT_SUCCESS(status)) + return false; + + return true; +} + +bool SamplingProfiler::GetSamplingInterval(base::TimeDelta* sampling_interval) { + DCHECK(sampling_interval != NULL); + + if (!funcs.Get().initialized_) + return false; + + ULONG interval = 0; + NTSTATUS status = funcs.Get().ZwQueryIntervalProfile(ProfileTime, &interval); + if (!NT_SUCCESS(status)) + return false; + + // According to Nebbet, the sampling interval is in units of 100ns. + *sampling_interval = base::TimeDelta::FromMicroseconds(interval / 10); + + return true; +} + +} // namespace win +} // namespace base diff --git a/base/win/sampling_profiler.h b/base/win/sampling_profiler.h new file mode 100644 index 0000000..bf1be8c --- /dev/null +++ b/base/win/sampling_profiler.h @@ -0,0 +1,74 @@ +// Copyright (c) 2011 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef BASE_WIN_SAMPLING_PROFILER_H_ +#define BASE_WIN_SAMPLING_PROFILER_H_ +#pragma once + +#include <vector> + +#include "base/basictypes.h" +#include "base/time.h" +#include "base/win/scoped_handle.h" + +namespace base { +namespace win { + +// This class exposes the functionality of Window's built-in sampling profiler. +// Each profiler instance covers a range of memory, and while the profiler is +// running, its buckets will count the number of times the instruction counter +// lands in the associated range of memory on a sample. +// The sampling interval is settable, but the setting is system-wide. +class BASE_EXPORT SamplingProfiler { + public: + // Create an uninitialized sampling profiler. + SamplingProfiler(); + ~SamplingProfiler(); + + // Initializes the profiler to cover the memory range |start| through + // |start| + |size|, in the process |process_handle| with bucket size + // |2^log2_bucket_size|, |log2_bucket_size| must be in the range 2-31, + // for bucket sizes of 4 bytes to 2 gigabytes. + // The process handle must grant at least PROCESS_QUERY_INFORMATION. + // The memory range should be exectuable code, like e.g. the text segment + // of an exectuable (whether DLL or EXE). + // Returns true on success. + bool Initialize(HANDLE process_handle, + void* start, + size_t size, + size_t log2_bucket_size); + + // Start this profiler, which must be initialized and not started. + bool Start(); + // Stop this profiler, which must be started. + bool Stop(); + + // Get and set the sampling interval. + // Note that this is a system-wide setting. + static bool SetSamplingInterval(base::TimeDelta sampling_interval); + static bool GetSamplingInterval(base::TimeDelta* sampling_interval); + + // Accessors. + bool is_started() const { return is_started_; } + + // It is safe to read the counts in the sampling buckets at any time. + // Note however that there's no guarantee that you'll read consistent counts + // until the profiler has been stopped, as the counts may be updating on other + // CPU cores. + const std::vector<ULONG>& buckets() const { return buckets_; } + + private: + // Handle to the corresponding kernel object. + ScopedHandle profile_handle_; + // True iff this profiler is started. + bool is_started_; + std::vector<ULONG> buckets_; + + DISALLOW_COPY_AND_ASSIGN(SamplingProfiler); +}; + +} // namespace win +} // namespace base + +#endif // BASE_WIN_SAMPLING_PROFILER_H_ diff --git a/base/win/sampling_profiler_unittest.cc b/base/win/sampling_profiler_unittest.cc new file mode 100644 index 0000000..caa4c63 --- /dev/null +++ b/base/win/sampling_profiler_unittest.cc @@ -0,0 +1,116 @@ +// Copyright (c) 2011 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "base/win/sampling_profiler.h" + +#include "base/test/test_timeouts.h" +#include "base/win/pe_image.h" +#include "base/win/scoped_handle.h" +#include "testing/gtest/include/gtest/gtest.h" + +// The address of our image base. +extern "C" IMAGE_DOS_HEADER __ImageBase; + +namespace base { +namespace win { + +namespace { + +class SamplingProfilerTest : public testing::Test { + public: + SamplingProfilerTest() : code_start(NULL), code_size(0) { + } + + virtual void SetUp() { + process.Set(::OpenProcess(PROCESS_QUERY_INFORMATION, + FALSE, + ::GetCurrentProcessId())); + ASSERT_TRUE(process.IsValid()); + + PEImage image(&__ImageBase); + + // Get the address of the .text section, which is the first section output + // by the VS tools. + ASSERT_TRUE(image.GetNumSections() > 0); + const IMAGE_SECTION_HEADER* text_section = image.GetSectionHeader(0); + ASSERT_EQ(0, strncmp(".text", + reinterpret_cast<const char*>(text_section->Name), + arraysize(text_section->Name))); + ASSERT_NE(0U, text_section->Characteristics & IMAGE_SCN_MEM_EXECUTE); + + code_start = reinterpret_cast<uint8*>(&__ImageBase) + + text_section->VirtualAddress; + code_size = text_section->Misc.VirtualSize; + } + + protected: + ScopedHandle process; + void* code_start; + size_t code_size; +}; + +} // namespace + +TEST_F(SamplingProfilerTest, Initialize) { + SamplingProfiler profiler; + + ASSERT_TRUE(profiler.Initialize(process.Get(), code_start, code_size, 8)); +} + +TEST_F(SamplingProfilerTest, Sample) { + SamplingProfiler profiler; + + // Initialize with a huge bucket size, aiming for a single bucket. + ASSERT_TRUE( + profiler.Initialize(process.Get(), code_start, code_size, 31)); + + ASSERT_EQ(1, profiler.buckets().size()); + ASSERT_EQ(0, profiler.buckets()[0]); + + // We use a roomy timeout to make sure this test is not flaky. + // On the buildbots, there may not be a whole lot of CPU time + // allotted to our process in this wall-clock time duration, + // and samples will only accrue while this thread is busy on + // a CPU core. + base::TimeDelta spin_time = + base::TimeDelta::FromMilliseconds(TestTimeouts::action_timeout_ms()); + + base::TimeDelta save_sampling_interval; + ASSERT_TRUE(SamplingProfiler::GetSamplingInterval(&save_sampling_interval)); + + // Sample every 0.5 millisecs. + ASSERT_TRUE(SamplingProfiler::SetSamplingInterval( + base::TimeDelta::FromMicroseconds(500))); + + ASSERT_TRUE(SamplingProfiler::SetSamplingInterval( + base::TimeDelta::FromMicroseconds(500))); + + // Start the profiler. + ASSERT_TRUE(profiler.Start()); + + // Get a volatile pointer to our bucket to make sure that the compiler + // doesn't optimize out the test in the loop that follows. + volatile const ULONG* bucket_ptr = &profiler.buckets()[0]; + + // Spin for spin_time wall-clock seconds, or until we get some samples. + // Note that sleeping isn't going to do us any good, the samples only + // accrue while we're executing code. + base::Time start = base::Time::Now(); + base::TimeDelta elapsed; + do { + elapsed = base::Time::Now() - start; + } while((elapsed < spin_time) && *bucket_ptr == 0); + + // Stop the profiler. + ASSERT_TRUE(profiler.Stop()); + + // Restore the sampling interval we found. + ASSERT_TRUE(SamplingProfiler::SetSamplingInterval(save_sampling_interval)); + + // Check that we got some samples. + ASSERT_NE(0U, profiler.buckets()[0]); +} + +} // namespace win +} // namespace base |