summaryrefslogtreecommitdiffstats
path: root/base/win
diff options
context:
space:
mode:
authorsiggi@chromium.org <siggi@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2011-12-07 21:44:28 +0000
committersiggi@chromium.org <siggi@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2011-12-07 21:44:28 +0000
commitda00f0032bbce4026ab4f7c3d2336ea99833ac4c (patch)
treeaef10a5e61081c996551bccd60a4542d9838562e /base/win
parentd7f7524bb07b7789490e2c5532c139f81b69578f (diff)
downloadchromium_src-da00f0032bbce4026ab4f7c3d2336ea99833ac4c.zip
chromium_src-da00f0032bbce4026ab4f7c3d2336ea99833ac4c.tar.gz
chromium_src-da00f0032bbce4026ab4f7c3d2336ea99833ac4c.tar.bz2
Windows-native sampling profiler wrapper class.
Review URL: http://codereview.chromium.org/8803022 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@113473 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'base/win')
-rw-r--r--base/win/sampling_profiler.cc233
-rw-r--r--base/win/sampling_profiler.h74
-rw-r--r--base/win/sampling_profiler_unittest.cc116
3 files changed, 423 insertions, 0 deletions
diff --git a/base/win/sampling_profiler.cc b/base/win/sampling_profiler.cc
new file mode 100644
index 0000000..d1f6fbc
--- /dev/null
+++ b/base/win/sampling_profiler.cc
@@ -0,0 +1,233 @@
+// Copyright (c) 2011 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "base/win/sampling_profiler.h"
+
+#include <winternl.h> // for NTSTATUS.
+
+#include "base/lazy_instance.h"
+
+// Copied from wdm.h in the WDK as we don't want to take
+// a dependency on the WDK.
+typedef enum _KPROFILE_SOURCE {
+ ProfileTime,
+ ProfileAlignmentFixup,
+ ProfileTotalIssues,
+ ProfilePipelineDry,
+ ProfileLoadInstructions,
+ ProfilePipelineFrozen,
+ ProfileBranchInstructions,
+ ProfileTotalNonissues,
+ ProfileDcacheMisses,
+ ProfileIcacheMisses,
+ ProfileCacheMisses,
+ ProfileBranchMispredictions,
+ ProfileStoreInstructions,
+ ProfileFpInstructions,
+ ProfileIntegerInstructions,
+ Profile2Issue,
+ Profile3Issue,
+ Profile4Issue,
+ ProfileSpecialInstructions,
+ ProfileTotalCycles,
+ ProfileIcacheIssues,
+ ProfileDcacheAccesses,
+ ProfileMemoryBarrierCycles,
+ ProfileLoadLinkedIssues,
+ ProfileMaximum
+} KPROFILE_SOURCE;
+
+
+namespace {
+
+// Signatures for the native functions we need to access the sampling profiler.
+typedef NTSTATUS (NTAPI *ZwSetIntervalProfileFunc)(ULONG, KPROFILE_SOURCE);
+typedef NTSTATUS (NTAPI *ZwQueryIntervalProfileFunc)(KPROFILE_SOURCE, PULONG);
+
+typedef NTSTATUS (NTAPI *ZwCreateProfileFunc)(PHANDLE profile,
+ HANDLE process,
+ PVOID code_start,
+ ULONG code_size,
+ ULONG eip_bucket_shift,
+ PULONG buckets,
+ ULONG buckets_byte_size,
+ KPROFILE_SOURCE source,
+ DWORD_PTR processor_mask);
+
+typedef NTSTATUS (NTAPI *ZwStartProfileFunc)(HANDLE);
+typedef NTSTATUS (NTAPI *ZwStopProfileFunc)(HANDLE);
+
+// This class is used to lazy-initialize pointers to the native
+// functions we need to access.
+class ProfilerFuncs {
+ public:
+ ProfilerFuncs();
+
+ ZwSetIntervalProfileFunc ZwSetIntervalProfile;
+ ZwQueryIntervalProfileFunc ZwQueryIntervalProfile;
+ ZwCreateProfileFunc ZwCreateProfile;
+ ZwStartProfileFunc ZwStartProfile;
+ ZwStopProfileFunc ZwStopProfile;
+
+ // True iff all of the function pointers above were successfully initialized.
+ bool initialized_;
+};
+
+ProfilerFuncs::ProfilerFuncs() : initialized_(false) {
+ HMODULE ntdll = ::GetModuleHandle(L"ntdll.dll");
+ if (ntdll != NULL) {
+ ZwSetIntervalProfile = reinterpret_cast<ZwSetIntervalProfileFunc>(
+ ::GetProcAddress(ntdll, "ZwSetIntervalProfile"));
+ ZwQueryIntervalProfile = reinterpret_cast<ZwQueryIntervalProfileFunc>(
+ ::GetProcAddress(ntdll, "ZwQueryIntervalProfile"));
+ ZwCreateProfile = reinterpret_cast<ZwCreateProfileFunc>(
+ ::GetProcAddress(ntdll, "ZwCreateProfile"));
+ ZwStartProfile = reinterpret_cast<ZwStartProfileFunc>(
+ ::GetProcAddress(ntdll, "ZwStartProfile"));
+ ZwStopProfile = reinterpret_cast<ZwStopProfileFunc>(
+ ::GetProcAddress(ntdll, "ZwStopProfile"));
+
+ if (ZwSetIntervalProfile &&
+ ZwQueryIntervalProfile &&
+ ZwCreateProfile &&
+ ZwStartProfile &&
+ ZwStopProfile) {
+ initialized_ = true;
+ }
+ }
+}
+
+base::LazyInstance<ProfilerFuncs, base::LeakyLazyInstanceTraits<ProfilerFuncs>>
+ funcs = LAZY_INSTANCE_INITIALIZER;
+
+} // namespace
+
+
+namespace base {
+namespace win {
+
+SamplingProfiler::SamplingProfiler() : is_started_(false) {
+}
+
+SamplingProfiler::~SamplingProfiler() {
+ if (is_started_) {
+ CHECK(Stop()) <<
+ "Unable to stop sampling profiler, this will cause memory corruption.";
+ }
+}
+
+bool SamplingProfiler::Initialize(HANDLE process,
+ void* start,
+ size_t size,
+ size_t log2_bucket_size) {
+ // You only get to initialize each instance once.
+ DCHECK(!profile_handle_.IsValid());
+ DCHECK(!is_started_);
+ DCHECK(start != NULL);
+ DCHECK_NE(0U, size);
+ DCHECK_LE(2, log2_bucket_size);
+ DCHECK_GE(32, log2_bucket_size);
+
+ // Bail if the native functions weren't found.
+ if (!funcs.Get().initialized_)
+ return false;
+
+ size_t bucket_size = 1 << log2_bucket_size;
+ size_t num_buckets = (size + bucket_size - 1) / bucket_size;
+ DCHECK(num_buckets != 0);
+ buckets_.resize(num_buckets);
+
+ // Get our affinity mask for the call below.
+ DWORD_PTR process_affinity = 0;
+ DWORD_PTR system_affinity = 0;
+ if (!::GetProcessAffinityMask(process, &process_affinity, &system_affinity)) {
+ LOG(ERROR) << "Failed to get process affinity mask.";
+ return false;
+ }
+
+ HANDLE profile = NULL;
+ NTSTATUS status =
+ funcs.Get().ZwCreateProfile(&profile,
+ process,
+ start,
+ static_cast<ULONG>(size),
+ static_cast<ULONG>(log2_bucket_size),
+ &buckets_[0],
+ static_cast<ULONG>(
+ sizeof(buckets_[0]) * num_buckets),
+ ProfileTime,
+ process_affinity);
+
+ if (!NT_SUCCESS(status)) {
+ // Might as well deallocate the buckets.
+ buckets_.resize(0);
+ LOG(ERROR) << "Failed to create profile, error 0x" << std::hex << status;
+ return false;
+ }
+
+ DCHECK(profile != NULL);
+ profile_handle_.Set(profile);
+
+ return true;
+}
+
+bool SamplingProfiler::Start() {
+ DCHECK(profile_handle_.IsValid());
+ DCHECK(!is_started_);
+ DCHECK(funcs.Get().initialized_);
+
+ NTSTATUS status = funcs.Get().ZwStartProfile(profile_handle_.Get());
+ if (!NT_SUCCESS(status))
+ return false;
+
+ is_started_ = true;
+
+ return true;
+}
+
+bool SamplingProfiler::Stop() {
+ DCHECK(profile_handle_.IsValid());
+ DCHECK(is_started_);
+ DCHECK(funcs.Get().initialized_);
+
+ NTSTATUS status = funcs.Get().ZwStopProfile(profile_handle_.Get());
+ if (!NT_SUCCESS(status))
+ return false;
+ is_started_ = false;
+
+ return true;
+}
+
+bool SamplingProfiler::SetSamplingInterval(base::TimeDelta sampling_interval) {
+ if (!funcs.Get().initialized_)
+ return false;
+
+ // According to Nebbet, the sampling interval is in units of 100ns.
+ ULONG interval = sampling_interval.InMicroseconds() * 10;
+ NTSTATUS status = funcs.Get().ZwSetIntervalProfile(interval, ProfileTime);
+ if (!NT_SUCCESS(status))
+ return false;
+
+ return true;
+}
+
+bool SamplingProfiler::GetSamplingInterval(base::TimeDelta* sampling_interval) {
+ DCHECK(sampling_interval != NULL);
+
+ if (!funcs.Get().initialized_)
+ return false;
+
+ ULONG interval = 0;
+ NTSTATUS status = funcs.Get().ZwQueryIntervalProfile(ProfileTime, &interval);
+ if (!NT_SUCCESS(status))
+ return false;
+
+ // According to Nebbet, the sampling interval is in units of 100ns.
+ *sampling_interval = base::TimeDelta::FromMicroseconds(interval / 10);
+
+ return true;
+}
+
+} // namespace win
+} // namespace base
diff --git a/base/win/sampling_profiler.h b/base/win/sampling_profiler.h
new file mode 100644
index 0000000..bf1be8c
--- /dev/null
+++ b/base/win/sampling_profiler.h
@@ -0,0 +1,74 @@
+// Copyright (c) 2011 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef BASE_WIN_SAMPLING_PROFILER_H_
+#define BASE_WIN_SAMPLING_PROFILER_H_
+#pragma once
+
+#include <vector>
+
+#include "base/basictypes.h"
+#include "base/time.h"
+#include "base/win/scoped_handle.h"
+
+namespace base {
+namespace win {
+
+// This class exposes the functionality of Window's built-in sampling profiler.
+// Each profiler instance covers a range of memory, and while the profiler is
+// running, its buckets will count the number of times the instruction counter
+// lands in the associated range of memory on a sample.
+// The sampling interval is settable, but the setting is system-wide.
+class BASE_EXPORT SamplingProfiler {
+ public:
+ // Create an uninitialized sampling profiler.
+ SamplingProfiler();
+ ~SamplingProfiler();
+
+ // Initializes the profiler to cover the memory range |start| through
+ // |start| + |size|, in the process |process_handle| with bucket size
+ // |2^log2_bucket_size|, |log2_bucket_size| must be in the range 2-31,
+ // for bucket sizes of 4 bytes to 2 gigabytes.
+ // The process handle must grant at least PROCESS_QUERY_INFORMATION.
+ // The memory range should be exectuable code, like e.g. the text segment
+ // of an exectuable (whether DLL or EXE).
+ // Returns true on success.
+ bool Initialize(HANDLE process_handle,
+ void* start,
+ size_t size,
+ size_t log2_bucket_size);
+
+ // Start this profiler, which must be initialized and not started.
+ bool Start();
+ // Stop this profiler, which must be started.
+ bool Stop();
+
+ // Get and set the sampling interval.
+ // Note that this is a system-wide setting.
+ static bool SetSamplingInterval(base::TimeDelta sampling_interval);
+ static bool GetSamplingInterval(base::TimeDelta* sampling_interval);
+
+ // Accessors.
+ bool is_started() const { return is_started_; }
+
+ // It is safe to read the counts in the sampling buckets at any time.
+ // Note however that there's no guarantee that you'll read consistent counts
+ // until the profiler has been stopped, as the counts may be updating on other
+ // CPU cores.
+ const std::vector<ULONG>& buckets() const { return buckets_; }
+
+ private:
+ // Handle to the corresponding kernel object.
+ ScopedHandle profile_handle_;
+ // True iff this profiler is started.
+ bool is_started_;
+ std::vector<ULONG> buckets_;
+
+ DISALLOW_COPY_AND_ASSIGN(SamplingProfiler);
+};
+
+} // namespace win
+} // namespace base
+
+#endif // BASE_WIN_SAMPLING_PROFILER_H_
diff --git a/base/win/sampling_profiler_unittest.cc b/base/win/sampling_profiler_unittest.cc
new file mode 100644
index 0000000..caa4c63
--- /dev/null
+++ b/base/win/sampling_profiler_unittest.cc
@@ -0,0 +1,116 @@
+// Copyright (c) 2011 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "base/win/sampling_profiler.h"
+
+#include "base/test/test_timeouts.h"
+#include "base/win/pe_image.h"
+#include "base/win/scoped_handle.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+// The address of our image base.
+extern "C" IMAGE_DOS_HEADER __ImageBase;
+
+namespace base {
+namespace win {
+
+namespace {
+
+class SamplingProfilerTest : public testing::Test {
+ public:
+ SamplingProfilerTest() : code_start(NULL), code_size(0) {
+ }
+
+ virtual void SetUp() {
+ process.Set(::OpenProcess(PROCESS_QUERY_INFORMATION,
+ FALSE,
+ ::GetCurrentProcessId()));
+ ASSERT_TRUE(process.IsValid());
+
+ PEImage image(&__ImageBase);
+
+ // Get the address of the .text section, which is the first section output
+ // by the VS tools.
+ ASSERT_TRUE(image.GetNumSections() > 0);
+ const IMAGE_SECTION_HEADER* text_section = image.GetSectionHeader(0);
+ ASSERT_EQ(0, strncmp(".text",
+ reinterpret_cast<const char*>(text_section->Name),
+ arraysize(text_section->Name)));
+ ASSERT_NE(0U, text_section->Characteristics & IMAGE_SCN_MEM_EXECUTE);
+
+ code_start = reinterpret_cast<uint8*>(&__ImageBase) +
+ text_section->VirtualAddress;
+ code_size = text_section->Misc.VirtualSize;
+ }
+
+ protected:
+ ScopedHandle process;
+ void* code_start;
+ size_t code_size;
+};
+
+} // namespace
+
+TEST_F(SamplingProfilerTest, Initialize) {
+ SamplingProfiler profiler;
+
+ ASSERT_TRUE(profiler.Initialize(process.Get(), code_start, code_size, 8));
+}
+
+TEST_F(SamplingProfilerTest, Sample) {
+ SamplingProfiler profiler;
+
+ // Initialize with a huge bucket size, aiming for a single bucket.
+ ASSERT_TRUE(
+ profiler.Initialize(process.Get(), code_start, code_size, 31));
+
+ ASSERT_EQ(1, profiler.buckets().size());
+ ASSERT_EQ(0, profiler.buckets()[0]);
+
+ // We use a roomy timeout to make sure this test is not flaky.
+ // On the buildbots, there may not be a whole lot of CPU time
+ // allotted to our process in this wall-clock time duration,
+ // and samples will only accrue while this thread is busy on
+ // a CPU core.
+ base::TimeDelta spin_time =
+ base::TimeDelta::FromMilliseconds(TestTimeouts::action_timeout_ms());
+
+ base::TimeDelta save_sampling_interval;
+ ASSERT_TRUE(SamplingProfiler::GetSamplingInterval(&save_sampling_interval));
+
+ // Sample every 0.5 millisecs.
+ ASSERT_TRUE(SamplingProfiler::SetSamplingInterval(
+ base::TimeDelta::FromMicroseconds(500)));
+
+ ASSERT_TRUE(SamplingProfiler::SetSamplingInterval(
+ base::TimeDelta::FromMicroseconds(500)));
+
+ // Start the profiler.
+ ASSERT_TRUE(profiler.Start());
+
+ // Get a volatile pointer to our bucket to make sure that the compiler
+ // doesn't optimize out the test in the loop that follows.
+ volatile const ULONG* bucket_ptr = &profiler.buckets()[0];
+
+ // Spin for spin_time wall-clock seconds, or until we get some samples.
+ // Note that sleeping isn't going to do us any good, the samples only
+ // accrue while we're executing code.
+ base::Time start = base::Time::Now();
+ base::TimeDelta elapsed;
+ do {
+ elapsed = base::Time::Now() - start;
+ } while((elapsed < spin_time) && *bucket_ptr == 0);
+
+ // Stop the profiler.
+ ASSERT_TRUE(profiler.Stop());
+
+ // Restore the sampling interval we found.
+ ASSERT_TRUE(SamplingProfiler::SetSamplingInterval(save_sampling_interval));
+
+ // Check that we got some samples.
+ ASSERT_NE(0U, profiler.buckets()[0]);
+}
+
+} // namespace win
+} // namespace base