diff options
author | wittman <wittman@chromium.org> | 2015-03-20 15:52:19 -0700 |
---|---|---|
committer | Commit bot <commit-bot@chromium.org> | 2015-03-20 22:53:13 +0000 |
commit | b450e197e878aec522e7299004dba511c65f254c (patch) | |
tree | 272675488a53a0205d536dd074eddd7959d01364 | |
parent | 19faf60586e58f244371697b49eb61959d9c7be9 (diff) | |
download | chromium_src-b450e197e878aec522e7299004dba511c65f254c.zip chromium_src-b450e197e878aec522e7299004dba511c65f254c.tar.gz chromium_src-b450e197e878aec522e7299004dba511c65f254c.tar.bz2 |
Statistical stack profiler for Windows x64
Provides the collection part of a client stack collection pipeline for
Win x64. This is intended to be used initially to provide real-world
stack trace data to UMA to facilitate implementation of server-side
processing. This CL has just the profiler implementation. Instantiation
and use of the profiler in Chrome, and handling by UMA, will be
addressed in follow-on CLs.
BUG=464929
Review URL: https://codereview.chromium.org/1016563004
Cr-Commit-Position: refs/heads/master@{#321658}
-rw-r--r-- | base/BUILD.gn | 5 | ||||
-rw-r--r-- | base/base.gyp | 1 | ||||
-rw-r--r-- | base/base.gypi | 4 | ||||
-rw-r--r-- | base/profiler/stack_sampling_profiler.cc | 261 | ||||
-rw-r--r-- | base/profiler/stack_sampling_profiler.h | 206 | ||||
-rw-r--r-- | base/profiler/stack_sampling_profiler_posix.cc | 14 | ||||
-rw-r--r-- | base/profiler/stack_sampling_profiler_unittest.cc | 324 | ||||
-rw-r--r-- | base/profiler/stack_sampling_profiler_win.cc | 325 |
8 files changed, 1140 insertions, 0 deletions
diff --git a/base/BUILD.gn b/base/BUILD.gn index b95ff72..8b9b5bf 100644 --- a/base/BUILD.gn +++ b/base/BUILD.gn @@ -370,6 +370,10 @@ component("base") { "profiler/scoped_profile.h", "profiler/scoped_tracker.cc", "profiler/scoped_tracker.h", + "profiler/stack_sampling_profiler.cc", + "profiler/stack_sampling_profiler.h", + "profiler/stack_sampling_profiler_posix.cc", + "profiler/stack_sampling_profiler_win.cc", "profiler/tracked_time.cc", "profiler/tracked_time.h", "rand_util.cc", @@ -1191,6 +1195,7 @@ test("base_unittests") { "process/process_unittest.cc", "process/process_util_unittest.cc", "process/process_util_unittest_ios.cc", + "profiler/stack_sampling_profiler_unittest.cc", "profiler/tracked_time_unittest.cc", "rand_util_unittest.cc", "scoped_clear_errno_unittest.cc", diff --git a/base/base.gyp b/base/base.gyp index 6f8102c..ac37a89 100644 --- a/base/base.gyp +++ b/base/base.gyp @@ -595,6 +595,7 @@ 'process/process_metrics_unittest_ios.cc', 'process/process_unittest.cc', 'process/process_util_unittest.cc', + 'profiler/stack_sampling_profiler_unittest.cc', 'profiler/tracked_time_unittest.cc', 'rand_util_unittest.cc', 'scoped_clear_errno_unittest.cc', diff --git a/base/base.gypi b/base/base.gypi index 40c7d35..825bca1 100644 --- a/base/base.gypi +++ b/base/base.gypi @@ -488,6 +488,10 @@ 'profiler/scoped_profile.h', 'profiler/scoped_tracker.cc', 'profiler/scoped_tracker.h', + 'profiler/stack_sampling_profiler.cc', + 'profiler/stack_sampling_profiler.h', + 'profiler/stack_sampling_profiler_posix.cc', + 'profiler/stack_sampling_profiler_win.cc', 'profiler/tracked_time.cc', 'profiler/tracked_time.h', 'rand_util.cc', diff --git a/base/profiler/stack_sampling_profiler.cc b/base/profiler/stack_sampling_profiler.cc new file mode 100644 index 0000000..57b7b35 --- /dev/null +++ b/base/profiler/stack_sampling_profiler.cc @@ -0,0 +1,261 @@ +// Copyright 2015 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "base/profiler/stack_sampling_profiler.h" + +#include <algorithm> + +#include "base/bind.h" +#include "base/callback.h" +#include "base/memory/singleton.h" +#include "base/synchronization/lock.h" +#include "base/synchronization/waitable_event.h" +#include "base/timer/elapsed_timer.h" + +template <typename T> struct DefaultSingletonTraits; + +namespace base { + +namespace { + +// Thread-safe singleton class that stores collected profiles waiting to be +// processed. +class PendingProfiles { + public: + PendingProfiles(); + ~PendingProfiles(); + + static PendingProfiles* GetInstance(); + + // Appends |profiles|. This function is thread safe. + void PutProfiles(const std::vector<StackSamplingProfiler::Profile>& profiles); + // Gets the pending profiles into *|profiles|. This function is thread safe. + void GetProfiles(std::vector<StackSamplingProfiler::Profile>* profiles); + + private: + Lock profiles_lock_; + std::vector<StackSamplingProfiler::Profile> profiles_; + + DISALLOW_COPY_AND_ASSIGN(PendingProfiles); +}; + +PendingProfiles::PendingProfiles() {} + +PendingProfiles::~PendingProfiles() {} + +// static +PendingProfiles* PendingProfiles::GetInstance() { + return Singleton<PendingProfiles>::get(); +} + +void PendingProfiles::PutProfiles( + const std::vector<StackSamplingProfiler::Profile>& profiles) { + AutoLock scoped_lock(profiles_lock_); + profiles_.insert(profiles_.end(), profiles.begin(), profiles.end()); +} + +void PendingProfiles::GetProfiles( + std::vector<StackSamplingProfiler::Profile>* profiles) { + profiles->clear(); + + AutoLock scoped_lock(profiles_lock_); + profiles_.swap(*profiles); +} +} // namespace + +StackSamplingProfiler::Module::Module() : base_address(nullptr) {} + +StackSamplingProfiler::Module::~Module() {} + +StackSamplingProfiler::Frame::Frame() + : instruction_pointer(nullptr), + module_index(-1) {} + +StackSamplingProfiler::Frame::~Frame() {} + +StackSamplingProfiler::Profile::Profile() : preserve_sample_ordering(false) {} + +StackSamplingProfiler::Profile::~Profile() {} + +class StackSamplingProfiler::SamplingThread : public PlatformThread::Delegate { + public: + // Samples stacks using |native_sampler|. When complete, invokes + // |profiles_callback| with the collected profiles. |profiles_callback| must + // be thread-safe and may consume the contents of the vector. + SamplingThread( + scoped_ptr<NativeStackSampler> native_sampler, + const SamplingParams& params, + Callback<void(const std::vector<Profile>&)> completed_callback); + ~SamplingThread() override; + + // Implementation of PlatformThread::Delegate: + void ThreadMain() override; + + void Stop(); + + private: + // Collects a profile from a single burst. Returns true if the profile was + // collected, or false if collection was stopped before it completed. + bool CollectProfile(Profile* profile, TimeDelta* elapsed_time); + // Collects profiles from all bursts, or until the sampling is stopped. If + // stopped before complete, |profiles| will contains only full bursts. + void CollectProfiles(std::vector<Profile>* profiles); + + scoped_ptr<NativeStackSampler> native_sampler_; + + const SamplingParams params_; + + WaitableEvent stop_event_; + + Callback<void(const std::vector<Profile>&)> completed_callback_; + + DISALLOW_COPY_AND_ASSIGN(SamplingThread); +}; + +StackSamplingProfiler::SamplingThread::SamplingThread( + scoped_ptr<NativeStackSampler> native_sampler, + const SamplingParams& params, + Callback<void(const std::vector<Profile>&)> completed_callback) + : native_sampler_(native_sampler.Pass()), + params_(params), + stop_event_(false, false), + completed_callback_(completed_callback) { +} + +StackSamplingProfiler::SamplingThread::~SamplingThread() {} + +void StackSamplingProfiler::SamplingThread::ThreadMain() { + PlatformThread::SetName("Chrome_SamplingProfilerThread"); + + std::vector<Profile> profiles; + CollectProfiles(&profiles); + completed_callback_.Run(profiles); +} + +bool StackSamplingProfiler::SamplingThread::CollectProfile( + Profile* profile, + TimeDelta* elapsed_time) { + ElapsedTimer profile_timer; + Profile current_profile; + native_sampler_->ProfileRecordingStarting(¤t_profile); + current_profile.sampling_period = params_.sampling_interval; + bool stopped_early = false; + for (int i = 0; i < params_.samples_per_burst; ++i) { + ElapsedTimer sample_timer; + current_profile.samples.push_back(Sample()); + native_sampler_->RecordStackSample(¤t_profile.samples.back()); + TimeDelta elapsed_sample_time = sample_timer.Elapsed(); + if (i != params_.samples_per_burst - 1) { + if (stop_event_.TimedWait( + std::max(params_.sampling_interval - elapsed_sample_time, + TimeDelta()))) { + stopped_early = true; + break; + } + } + } + + *elapsed_time = profile_timer.Elapsed(); + current_profile.profile_duration = *elapsed_time; + native_sampler_->ProfileRecordingStopped(); + + if (!stopped_early) + *profile = current_profile; + + return !stopped_early; +} + +void StackSamplingProfiler::SamplingThread::CollectProfiles( + std::vector<Profile>* profiles) { + if (stop_event_.TimedWait(params_.initial_delay)) + return; + + for (int i = 0; i < params_.bursts; ++i) { + Profile profile; + TimeDelta elapsed_profile_time; + if (CollectProfile(&profile, &elapsed_profile_time)) + profiles->push_back(profile); + else + return; + + if (stop_event_.TimedWait( + std::max(params_.burst_interval - elapsed_profile_time, + TimeDelta()))) + return; + } +} + +void StackSamplingProfiler::SamplingThread::Stop() { + stop_event_.Signal(); +} + +void StackSamplingProfiler::SamplingThreadDeleter::operator()( + SamplingThread* thread) const { + delete thread; +} + +StackSamplingProfiler::NativeStackSampler::NativeStackSampler() {} + +StackSamplingProfiler::NativeStackSampler::~NativeStackSampler() {} + +StackSamplingProfiler::SamplingParams::SamplingParams() + : initial_delay(TimeDelta::FromMilliseconds(0)), + bursts(1), + burst_interval(TimeDelta::FromMilliseconds(10000)), + samples_per_burst(300), + sampling_interval(TimeDelta::FromMilliseconds(100)), + preserve_sample_ordering(false) { +} + +StackSamplingProfiler::StackSamplingProfiler(PlatformThreadId thread_id, + const SamplingParams& params) + : thread_id_(thread_id), params_(params) {} + +StackSamplingProfiler::~StackSamplingProfiler() {} + +void StackSamplingProfiler::Start() { + native_sampler_ = NativeStackSampler::Create(thread_id_); + if (!native_sampler_) + return; + + sampling_thread_.reset( + new SamplingThread( + native_sampler_.Pass(), params_, + (custom_completed_callback_.is_null() ? + Bind(&PendingProfiles::PutProfiles, + Unretained(PendingProfiles::GetInstance())) : + custom_completed_callback_))); + if (!PlatformThread::CreateNonJoinable(0, sampling_thread_.get())) + LOG(ERROR) << "failed to create thread"; +} + +void StackSamplingProfiler::Stop() { + if (sampling_thread_) + sampling_thread_->Stop(); +} + +// static +void StackSamplingProfiler::GetPendingProfiles(std::vector<Profile>* profiles) { + PendingProfiles::GetInstance()->GetProfiles(profiles); +} + +void StackSamplingProfiler::SetCustomCompletedCallback( + Callback<void(const std::vector<Profile>&)> callback) { + custom_completed_callback_ = callback; +} + +bool operator==(const StackSamplingProfiler::Frame &a, + const StackSamplingProfiler::Frame &b) { + return a.instruction_pointer == b.instruction_pointer && + a.module_index == b.module_index; +} + +bool operator<(const StackSamplingProfiler::Frame &a, + const StackSamplingProfiler::Frame &b) { + return (a.module_index < b.module_index) || + (a.module_index == b.module_index && + a.instruction_pointer < b.instruction_pointer); +} + +} // namespace base diff --git a/base/profiler/stack_sampling_profiler.h b/base/profiler/stack_sampling_profiler.h new file mode 100644 index 0000000..60faa51 --- /dev/null +++ b/base/profiler/stack_sampling_profiler.h @@ -0,0 +1,206 @@ +// Copyright 2015 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef BASE_PROFILER_STACK_SAMPLING_PROFILER_H_ +#define BASE_PROFILER_STACK_SAMPLING_PROFILER_H_ + +#include <string> +#include <vector> + +#include "base/base_export.h" +#include "base/callback.h" +#include "base/files/file_path.h" +#include "base/memory/scoped_ptr.h" +#include "base/strings/string16.h" +#include "base/threading/platform_thread.h" +#include "base/time/time.h" + +namespace base { + +// StackSamplingProfiler periodically stops a thread to sample its stack, for +// the purpose of collecting information about which code paths are +// executing. This information is used in aggregate by UMA to identify hot +// and/or janky code paths. +// +// Sample StackStackSamplingProfiler usage: +// +// // Create and customize params as desired. +// base::StackStackSamplingProfiler::SamplingParams params; +// // Any thread's ID may be passed as the target. +// base::StackSamplingProfiler profiler(base::PlatformThread::CurrentId()), +// params); +// +// // To process the profiles within Chrome rather than via UMA, set a custom +// // completed callback: +// base::Callback<void(const std::vector<Profile>&)> +// thread_safe_callback = ...; +// profiler.SetCustomCompletedCallback(thread_safe_callback); +// +// profiler.Start(); +// // ... work being done on the target thread here ... +// profiler.Stop(); // optional, stops collection before complete per params +// +// When all profiles are complete or the profiler is stopped, if the custom +// completed callback was set it will be called from the profiler thread with +// the completed profiles. If no callback was set, the profiles are stored +// internally and retrieved for UMA through +// GetPendingProfiles(). GetPendingProfiles() should never be called by other +// code; to retrieve profiles for in-process processing, set a completed +// callback. +class BASE_EXPORT StackSamplingProfiler { + public: + // Module represents the module (DLL or exe) corresponding to a stack frame. + struct Module { + Module(); + ~Module(); + + // Points to the base address of the module. + const void* base_address; + // An opaque binary string that uniquely identifies a particular program + // version with high probability. This is parsed from headers of the loaded + // module. + // For binaries generated by GNU tools: + // Contents of the .note.gnu.build-id field. + // On Windows: + // GUID + AGE in the debug image headers of a module. + std::string id; + // The filename of the module. + FilePath filename; + }; + + // Frame represents an individual sampled stack frame with module information. + struct Frame { + Frame(); + ~Frame(); + + // The sampled instruction pointer within the function. + const void* instruction_pointer; + // Index of the module in the array of modules. We don't represent module + // state directly here to save space. + int module_index; + }; + + // Sample represents a set of stack frames. + using Sample = std::vector<Frame>; + + // Profile represents a set of samples. + struct BASE_EXPORT Profile { + Profile(); + ~Profile(); + + std::vector<Module> modules; + std::vector<Sample> samples; + // Duration of this profile. + TimeDelta profile_duration; + // Time between samples. + TimeDelta sampling_period; + // True if sample ordering is important and should be preserved if and when + // this profile is compressed and processed. + bool preserve_sample_ordering; + }; + + // NativeStackSampler abstracts the native implementation required to record a + // stack sample for a given thread. + class NativeStackSampler { + public: + virtual ~NativeStackSampler(); + + // Create a stack sampler that records samples for |thread_handle|. Returns + // null if this platform does not support stack sampling. + static scoped_ptr<NativeStackSampler> Create(PlatformThreadId thread_id); + + // Notify the sampler that we're starting to record a new profile. This + // function is called on the SamplingThread. + virtual void ProfileRecordingStarting(Profile* profile) = 0; + + // Record a stack sample. This function is called on the SamplingThread. + virtual void RecordStackSample(Sample* sample) = 0; + + // Notify the sampler that we've stopped recording the current profile. This + // function is called on the SamplingThread. + virtual void ProfileRecordingStopped() = 0; + + protected: + NativeStackSampler(); + + private: + DISALLOW_COPY_AND_ASSIGN(NativeStackSampler); + }; + + // Represents parameters that configure the sampling. + struct BASE_EXPORT SamplingParams { + SamplingParams(); + + // Time to delay before first samples are taken. Defaults to 0. + TimeDelta initial_delay; + // Number of sampling bursts to perform. Defaults to 1. + int bursts; + // Interval between sampling bursts. This is the desired duration from the + // start of one burst to the start of the next burst. Defaults to 10s. + TimeDelta burst_interval; + // Number of samples to record per burst. Defaults to 300. + int samples_per_burst; + // Interval between samples during a sampling burst. This is the desired + // duration from the start of one burst to the start of the next + // burst. Defaults to 100ms. + TimeDelta sampling_interval; + // True if sample ordering is important and should be preserved if and when + // this profile is compressed and processed. Defaults to false. + bool preserve_sample_ordering; + }; + + StackSamplingProfiler(PlatformThreadId thread_id, + const SamplingParams& params); + ~StackSamplingProfiler(); + + // Initializes the profiler and starts sampling. + void Start(); + // Stops the profiler and any ongoing sampling. Calling this function is + // optional; if not invoked profiling will terminate when all the profiling + // bursts specified in the SamplingParams are completed. + void Stop(); + + // Gets the pending profiles into *|profiles| and clears the internal + // storage. This function is thread safe. + // + // ***This is intended for use only by UMA.*** Callers who want to process the + // collected profiles should use SetCustomCompletedCallback. + static void GetPendingProfiles(std::vector<Profile>* profiles); + + // By default, collected profiles are stored internally and can be retrieved + // by GetPendingProfiles. If a callback is provided via this function, + // however, it will be called with the collected profiles instead. Note that + // this call to the callback occurs *on the profiler thread*. + void SetCustomCompletedCallback( + Callback<void(const std::vector<Profile>&)> callback); + + private: + class SamplingThread; + struct SamplingThreadDeleter { + void operator() (SamplingThread* thread) const; + }; + + // The thread whose stack will be sampled. + PlatformThreadId thread_id_; + + const SamplingParams params_; + + scoped_ptr<SamplingThread, SamplingThreadDeleter> sampling_thread_; + scoped_ptr<NativeStackSampler> native_sampler_; + + Callback<void(const std::vector<Profile>&)> custom_completed_callback_; + + DISALLOW_COPY_AND_ASSIGN(StackSamplingProfiler); +}; + +// Defined to allow equality check of Samples. +BASE_EXPORT bool operator==(const StackSamplingProfiler::Frame& a, + const StackSamplingProfiler::Frame& b); +// Defined to allow ordering of Samples. +BASE_EXPORT bool operator<(const StackSamplingProfiler::Frame& a, + const StackSamplingProfiler::Frame& b); + +} // namespace base + +#endif // BASE_PROFILER_STACK_SAMPLING_PROFILER_H_ diff --git a/base/profiler/stack_sampling_profiler_posix.cc b/base/profiler/stack_sampling_profiler_posix.cc new file mode 100644 index 0000000..6a44d7e --- /dev/null +++ b/base/profiler/stack_sampling_profiler_posix.cc @@ -0,0 +1,14 @@ +// Copyright 2015 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "base/profiler/stack_sampling_profiler.h" + +namespace base { + +scoped_ptr<StackSamplingProfiler::NativeStackSampler> +StackSamplingProfiler::NativeStackSampler::Create(PlatformThreadId thread_id) { + return scoped_ptr<NativeStackSampler>(); +} + +} // namespace base diff --git a/base/profiler/stack_sampling_profiler_unittest.cc b/base/profiler/stack_sampling_profiler_unittest.cc new file mode 100644 index 0000000..dfccab4 --- /dev/null +++ b/base/profiler/stack_sampling_profiler_unittest.cc @@ -0,0 +1,324 @@ +// Copyright 2015 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include <sstream> + +#include "base/bind.h" +#include "base/compiler_specific.h" +#include "base/path_service.h" +#include "base/profiler/stack_sampling_profiler.h" +#include "base/synchronization/waitable_event.h" +#include "base/threading/platform_thread.h" +#include "base/time/time.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace base { + +using Frame = StackSamplingProfiler::Frame; +using Module = StackSamplingProfiler::Module; +using Sample = StackSamplingProfiler::Sample; +using Profile = StackSamplingProfiler::Profile; + +namespace { +// A thread to target for profiling, whose stack is guaranteed to contain +// SignalAndWaitUntilSignaled() when coordinated with the main thread. +class TargetThread : public PlatformThread::Delegate { + public: + TargetThread(); + + // Implementation of PlatformThread::Delegate: + void ThreadMain() override; + + // Wait for the thread to have started and be executing in + // SignalAndWaitUntilSignaled(). + void WaitForThreadStart(); + // Allow the thread to return from SignalAndWaitUntilSignaled() and finish + // execution. + void SignalThreadToFinish(); + + // This function is guaranteed to be executing between calls to + // WaitForThreadStart() and SignalThreadToFinish(). + static void SignalAndWaitUntilSignaled(WaitableEvent* thread_started_event, + WaitableEvent* finish_event); + + PlatformThreadId id() const { return id_; } + + private: + WaitableEvent thread_started_event_; + WaitableEvent finish_event_; + PlatformThreadId id_; + + DISALLOW_COPY_AND_ASSIGN(TargetThread); +}; + +TargetThread::TargetThread() + : thread_started_event_(false, false), finish_event_(false, false), + id_(0) {} + +void TargetThread::ThreadMain() { + id_ = PlatformThread::CurrentId(); + SignalAndWaitUntilSignaled(&thread_started_event_, &finish_event_); +} + +void TargetThread::WaitForThreadStart() { + thread_started_event_.Wait(); +} + +void TargetThread::SignalThreadToFinish() { + finish_event_.Signal(); +} + +// static +#if defined(_WIN64) +// Disable optimizations for this function so that it gets its own stack frame. +#pragma optimize("", off) +#endif +void TargetThread::SignalAndWaitUntilSignaled( + WaitableEvent* thread_started_event, + WaitableEvent* finish_event) { + thread_started_event->Signal(); + finish_event->Wait(); +} +#if defined(_WIN64) +#pragma optimize("", on) +#endif + +// Called on the profiler thread when complete. Collects profiles produced by +// the profiler, and signals an event to allow the main thread to know that that +// the profiler is done. +void SaveProfilesAndSignalEvent(std::vector<Profile>* profiles, + WaitableEvent* event, + const std::vector<Profile>& pending_profiles) { + *profiles = pending_profiles; + event->Signal(); +} + +// Captures profiles as specified by |params| on the TargetThread, and returns +// them in |profiles|. Waits up to |profiler_wait_time| for the profiler to +// complete. +void CaptureProfiles(const StackSamplingProfiler::SamplingParams& params, + std::vector<Profile>* profiles, + TimeDelta profiler_wait_time) { + TargetThread target_thread; + PlatformThreadHandle target_thread_handle; + EXPECT_TRUE(PlatformThread::Create(0, &target_thread, &target_thread_handle)); + + target_thread.WaitForThreadStart(); + + WaitableEvent sampling_thread_completed(true, false); + profiles->clear(); + StackSamplingProfiler profiler(target_thread.id(), params); + profiler.SetCustomCompletedCallback( + Bind(&SaveProfilesAndSignalEvent, Unretained(profiles), + Unretained(&sampling_thread_completed))); + profiler.Start(); + sampling_thread_completed.TimedWait(profiler_wait_time); + profiler.Stop(); + sampling_thread_completed.Wait(); + + target_thread.SignalThreadToFinish(); + + PlatformThread::Join(target_thread_handle); +} + +// If this executable was linked with /INCREMENTAL (the default for non-official +// debug and release builds on Windows), function addresses do not correspond to +// function code itself, but instead to instructions in the Incremental Link +// Table that jump to the functions. Check for a jump instruction and if present +// do a little decompilation to find the function's actual starting address. +const void* MaybeFixupFunctionAddressForILT(const void* function_address) { +#if defined(_WIN64) + const unsigned char* opcode = + reinterpret_cast<const unsigned char*>(function_address); + if (*opcode == 0xe9) { + // This is a relative jump instruction. Assume we're in the ILT and compute + // the function start address from the instruction offset. + const unsigned char* offset = opcode + 1; + const unsigned char* next_instruction = opcode + 5; + return next_instruction + + static_cast<int64>(*reinterpret_cast<const int32*>(offset)); + } +#endif + return function_address; +} + +// Searches through the frames in |sample|, returning an iterator to the first +// frame that has an instruction pointer between |function_address| and +// |function_address| + |size|. Returns sample.end() if no such frames are +// found. +Sample::const_iterator FindFirstFrameWithinFunction( + const Sample& sample, + const void* function_address, + int function_size) { + function_address = MaybeFixupFunctionAddressForILT(function_address); + for (auto it = sample.begin(); it != sample.end(); ++it) { + if ((reinterpret_cast<const unsigned char*>(it->instruction_pointer) >= + reinterpret_cast<const unsigned char*>(function_address)) && + (reinterpret_cast<const unsigned char*>(it->instruction_pointer) < + (reinterpret_cast<const unsigned char*>(function_address) + + function_size))) + return it; + } + return sample.end(); +} + +// Formats a sample into a string that can be output for test diagnostics. +std::string FormatSampleForDiagnosticOutput( + const Sample& sample, + const std::vector<Module>& modules) { + std::ostringstream stream; + for (const Frame& frame: sample) { + stream << frame.instruction_pointer << " " + << modules[frame.module_index].filename.value() << std::endl; + } + return stream.str(); +} + +// Returns a duration that is longer than the test timeout. We would use +// TimeDelta::Max() but https://crbug.com/465948. +TimeDelta AVeryLongTimeDelta() { return TimeDelta::FromDays(1); } +} // namespace + + +// The tests below are enabled for Win x64 only, pending implementation of the +// tested functionality on other platforms/architectures. + +// Checks that the basic expected information is present in a sampled profile. +#if defined(_WIN64) +#define MAYBE_Basic Basic +#else +#define MAYBE_Basic DISABLED_Basic +#endif +TEST(StackSamplingProfilerTest, MAYBE_Basic) { + StackSamplingProfiler::SamplingParams params; + params.initial_delay = params.burst_interval = params.sampling_interval = + TimeDelta::FromMilliseconds(0); + params.bursts = 1; + params.samples_per_burst = 1; + + std::vector<Profile> profiles; + CaptureProfiles(params, &profiles, AVeryLongTimeDelta()); + + // Check that the profile and samples sizes are correct, and the module + // indices are in range. + + ASSERT_EQ(1u, profiles.size()); + const Profile& profile = profiles[0]; + ASSERT_EQ(1u, profile.samples.size()); + EXPECT_EQ(params.sampling_interval, profile.sampling_period); + const Sample& sample = profile.samples[0]; + for (const auto& frame : sample) { + ASSERT_GE(frame.module_index, 0); + ASSERT_LT(frame.module_index, static_cast<int>(profile.modules.size())); + } + + // Check that the stack contains a frame for + // TargetThread::SignalAndWaitUntilSignaled() and that the frame has this + // executable's module. + + // Since we don't have a good way to know the function size, use 100 bytes as + // a reasonable window to locate the instruction pointer. + Sample::const_iterator loc = FindFirstFrameWithinFunction( + sample, + reinterpret_cast<const void*>(&TargetThread::SignalAndWaitUntilSignaled), + 100); + ASSERT_TRUE(loc != sample.end()) + << "Function at " + << MaybeFixupFunctionAddressForILT( + reinterpret_cast<const void*>( + &TargetThread::SignalAndWaitUntilSignaled)) + << " was not found in stack:" << std::endl + << FormatSampleForDiagnosticOutput(sample, profile.modules); + + FilePath executable_path; + bool got_executable_path = PathService::Get(FILE_EXE, &executable_path); + EXPECT_TRUE(got_executable_path); + EXPECT_EQ(executable_path, profile.modules[loc->module_index].filename); +} + +// Checks that the expected number of profiles and samples are present in the +// profiles produced. +#if defined(_WIN64) +#define MAYBE_MultipleProfilesAndSamples MultipleProfilesAndSamples +#else +#define MAYBE_MultipleProfilesAndSamples DISABLED_MultipleProfilesAndSamples +#endif +TEST(StackSamplingProfilerTest, MAYBE_MultipleProfilesAndSamples) { + StackSamplingProfiler::SamplingParams params; + params.initial_delay = params.burst_interval = params.sampling_interval = + TimeDelta::FromMilliseconds(0); + params.bursts = 2; + params.samples_per_burst = 3; + + std::vector<Profile> profiles; + CaptureProfiles(params, &profiles, AVeryLongTimeDelta()); + + ASSERT_EQ(2u, profiles.size()); + EXPECT_EQ(3u, profiles[0].samples.size()); + EXPECT_EQ(3u, profiles[1].samples.size()); +} + +// Checks that no profiles are captured if the profiling is stopped during the +// initial delay. +#if defined(_WIN64) +#define MAYBE_StopDuringInitialDelay StopDuringInitialDelay +#else +#define MAYBE_StopDuringInitialDelay DISABLED_StopDuringInitialDelay +#endif +TEST(StackSamplingProfilerTest, MAYBE_StopDuringInitialDelay) { + StackSamplingProfiler::SamplingParams params; + params.burst_interval = params.sampling_interval = + TimeDelta::FromMilliseconds(0); + params.initial_delay = TimeDelta::FromSeconds(60); + params.bursts = params.samples_per_burst = 1; + + std::vector<Profile> profiles; + CaptureProfiles(params, &profiles, TimeDelta::FromMilliseconds(0)); + + EXPECT_TRUE(profiles.empty()); +} + +// Checks that the single completed profile is captured if the profiling is +// stopped between bursts. +#if defined(_WIN64) +#define MAYBE_StopDuringInterBurstInterval StopDuringInterBurstInterval +#else +#define MAYBE_StopDuringInterBurstInterval DISABLED_StopDuringInterBurstInterval +#endif +TEST(StackSamplingProfilerTest, MAYBE_StopDuringInterBurstInterval) { + StackSamplingProfiler::SamplingParams params; + params.initial_delay = params.sampling_interval = + TimeDelta::FromMilliseconds(0); + params.burst_interval = TimeDelta::FromSeconds(60); + params.bursts = 2; + params.samples_per_burst = 1; + + std::vector<Profile> profiles; + CaptureProfiles(params, &profiles, TimeDelta::FromMilliseconds(50)); + + ASSERT_EQ(1u, profiles.size()); + EXPECT_EQ(1u, profiles[0].samples.size()); +} + +// Checks that only completed profiles are captured. +#if defined(_WIN64) +#define MAYBE_StopDuringInterSampleInterval StopDuringInterSampleInterval +#else +#define MAYBE_StopDuringInterSampleInterval \ + DISABLED_StopDuringInterSampleInterval +#endif +TEST(StackSamplingProfilerTest, MAYBE_StopDuringInterSampleInterval) { + StackSamplingProfiler::SamplingParams params; + params.initial_delay = params.burst_interval = TimeDelta::FromMilliseconds(0); + params.sampling_interval = TimeDelta::FromSeconds(60); + params.bursts = 1; + params.samples_per_burst = 2; + + std::vector<Profile> profiles; + CaptureProfiles(params, &profiles, TimeDelta::FromMilliseconds(50)); + + EXPECT_TRUE(profiles.empty()); +} + +} // namespace tracked_objects diff --git a/base/profiler/stack_sampling_profiler_win.cc b/base/profiler/stack_sampling_profiler_win.cc new file mode 100644 index 0000000..ba46cf0 --- /dev/null +++ b/base/profiler/stack_sampling_profiler_win.cc @@ -0,0 +1,325 @@ +// Copyright 2015 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "base/profiler/stack_sampling_profiler.h" + +#include <dbghelp.h> +#include <map> +#include <utility> +#include <windows.h> + +#include "base/logging.h" +#include "base/time/time.h" +#include "base/win/pe_image.h" +#include "base/win/scoped_handle.h" + +namespace base { + +namespace { + +class NativeStackSamplerWin : public StackSamplingProfiler::NativeStackSampler { + public: + explicit NativeStackSamplerWin(win::ScopedHandle thread_handle); + ~NativeStackSamplerWin() override; + + // StackSamplingProfiler::NativeStackSampler: + void ProfileRecordingStarting( + StackSamplingProfiler::Profile* profile) override; + void RecordStackSample(StackSamplingProfiler::Sample* sample) override; + void ProfileRecordingStopped() override; + + private: + static bool GetModuleInfo(HMODULE module, + StackSamplingProfiler::Module* module_info); + + void CopyToSample(const void* const instruction_pointers[], + const HMODULE modules[], + int stack_depth, + StackSamplingProfiler::Sample* sample, + std::vector<StackSamplingProfiler::Module>* module_infos); + + win::ScopedHandle thread_handle_; + // Weak. Points to the profile being recorded between + // ProfileRecordingStarting() and ProfileRecordingStopped(). + StackSamplingProfiler::Profile* current_profile_; + // Maps a module to the module's index within current_profile_->modules. + std::map<HMODULE, int> profile_module_index_; + + DISALLOW_COPY_AND_ASSIGN(NativeStackSamplerWin); +}; + +// Walk the stack represented by |context| from the current frame downwards, +// recording the instruction pointers for each frame in |instruction_pointers|. +int RecordStack(CONTEXT* context, + int max_stack_size, + const void* instruction_pointers[], + bool* last_frame_is_unknown_function) { +#ifdef _WIN64 + *last_frame_is_unknown_function = false; + + IMAGEHLP_SYMBOL64 sym; + sym.SizeOfStruct = sizeof(sym); + sym.MaxNameLength = 0; + + for (int i = 0; i < max_stack_size; ++i) { + // Try to look up unwind metadata for the current function. + ULONG64 image_base; + PRUNTIME_FUNCTION runtime_function = + RtlLookupFunctionEntry(context->Rip, &image_base, nullptr); + + instruction_pointers[i] = reinterpret_cast<void*>(context->Rip); + + if (runtime_function) { + KNONVOLATILE_CONTEXT_POINTERS nvcontext = {0}; + void* handler_data; + ULONG64 establisher_frame; + RtlVirtualUnwind(0, image_base, context->Rip, runtime_function, context, + &handler_data, &establisher_frame, &nvcontext); + } else { + // If we don't have a RUNTIME_FUNCTION, then we've encountered + // a leaf function. Adjust the stack appropriately. + context->Rip = *reinterpret_cast<PDWORD64>(context->Rsp); + context->Rsp += 8; + *last_frame_is_unknown_function = true; + } + + if (!context->Rip) + return i; + } + return max_stack_size; +#else + return 0; +#endif +} + +// Fills in |modules| corresponding to the pointers to code in |addresses|. The +// modules are returned with reference counts incremented should be freed with +// FreeModules. +void FindModulesForAddresses(const void* const addresses[], HMODULE modules[], + int stack_depth, + bool last_frame_is_unknown_function) { + const int module_frames = last_frame_is_unknown_function ? stack_depth - 1 : + stack_depth; + for (int i = 0; i < module_frames; ++i) { + HMODULE module = NULL; + if (GetModuleHandleEx(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS, + reinterpret_cast<LPCTSTR>(addresses[i]), + &module)) { + // HMODULE is the base address of the module. + DCHECK_LT(reinterpret_cast<const void*>(module), addresses[i]); + modules[i] = module; + } + } +} + +// Free the modules returned by FindModulesForAddresses. +void FreeModules(int stack_depth, HMODULE modules[]) { + for (int i = 0; i < stack_depth; ++i) { + if (modules[i]) + ::FreeLibrary(modules[i]); + } +} + +// Disables priority boost on a thread for the lifetime of the object. +class ScopedDisablePriorityBoost { + public: + ScopedDisablePriorityBoost(HANDLE thread_handle); + ~ScopedDisablePriorityBoost(); + + private: + HANDLE thread_handle_; + BOOL got_previous_boost_state_; + BOOL boost_state_was_disabled_; + + DISALLOW_COPY_AND_ASSIGN(ScopedDisablePriorityBoost); +}; + +ScopedDisablePriorityBoost::ScopedDisablePriorityBoost(HANDLE thread_handle) + : thread_handle_(thread_handle), + got_previous_boost_state_(false), + boost_state_was_disabled_(false) { + got_previous_boost_state_ = + ::GetThreadPriorityBoost(thread_handle_, &boost_state_was_disabled_); + if (got_previous_boost_state_ && !boost_state_was_disabled_) { + // Confusingly, TRUE disables priority boost ... + ::SetThreadPriorityBoost(thread_handle_, TRUE); + } +} + +ScopedDisablePriorityBoost::~ScopedDisablePriorityBoost() { + if (got_previous_boost_state_ && !boost_state_was_disabled_) { + // ... and FALSE enables priority boost. + ::SetThreadPriorityBoost(thread_handle_, FALSE); + } +} + +// Suspends the thread with |thread_handle|, records the stack into +// |instruction_pointers|, then resumes the thread. Returns the size of the +// stack. +int SuspendThreadAndRecordStack(HANDLE thread_handle, int max_stack_size, + const void* instruction_pointers[], + bool* last_frame_is_unknown_function) { +#if defined(_WIN64) + if (RtlVirtualUnwind == nullptr || RtlLookupFunctionEntry == nullptr) + return 0; +#endif + + if (::SuspendThread(thread_handle) == -1) { + LOG(ERROR) << "SuspendThread failed: " << GetLastError(); + return 0; + } + + CONTEXT thread_context = {0}; + thread_context.ContextFlags = CONTEXT_FULL; + if (!::GetThreadContext(thread_handle, &thread_context)) { + LOG(ERROR) << "GetThreadContext failed: " << GetLastError(); + } + + int stack_depth = RecordStack(&thread_context, max_stack_size, + instruction_pointers, + last_frame_is_unknown_function); + + { + ScopedDisablePriorityBoost disable_priority_boost(thread_handle); + if (::ResumeThread(thread_handle) == -1) + LOG(ERROR) << "ResumeThread failed: " << GetLastError(); + } + + return stack_depth; +} + +} // namespace + +scoped_ptr<StackSamplingProfiler::NativeStackSampler> +StackSamplingProfiler::NativeStackSampler::Create(PlatformThreadId thread_id) { +#if _WIN64 + // Get the thread's handle. + HANDLE thread_handle = ::OpenThread( + THREAD_GET_CONTEXT | THREAD_SUSPEND_RESUME | THREAD_QUERY_INFORMATION, + FALSE, + thread_id); + DCHECK(thread_handle) << "OpenThread failed"; + + return scoped_ptr<NativeStackSampler>(new NativeStackSamplerWin( + win::ScopedHandle(thread_handle))); +#else + return scoped_ptr<NativeStackSampler>(); +#endif +} + +NativeStackSamplerWin::NativeStackSamplerWin(win::ScopedHandle thread_handle) + : thread_handle_(thread_handle.Take()) { +#ifdef _WIN64 + if (RtlVirtualUnwind == nullptr && RtlLookupFunctionEntry == nullptr) { + const HMODULE nt_dll_handle = ::GetModuleHandle(L"ntdll.dll"); + // This should always be non-null, but handle just in case. + if (nt_dll_handle) { + reinterpret_cast<void*&>(RtlVirtualUnwind) = + ::GetProcAddress(nt_dll_handle, "RtlVirtualUnwind"); + reinterpret_cast<void*&>(RtlLookupFunctionEntry) = + ::GetProcAddress(nt_dll_handle, "RtlLookupFunctionEntry"); + } + } +#endif +} + +NativeStackSamplerWin::~NativeStackSamplerWin() { +} + +void NativeStackSamplerWin::ProfileRecordingStarting( + StackSamplingProfiler::Profile* profile) { + current_profile_ = profile; + profile_module_index_.clear(); +} + +void NativeStackSamplerWin::RecordStackSample( + StackSamplingProfiler::Sample* sample) { + DCHECK(current_profile_); + + const int max_stack_size = 64; + const void* instruction_pointers[max_stack_size] = {0}; + HMODULE modules[max_stack_size] = {0}; + + bool last_frame_is_unknown_function = false; + int stack_depth = SuspendThreadAndRecordStack( + thread_handle_.Get(), max_stack_size, instruction_pointers, + &last_frame_is_unknown_function); + FindModulesForAddresses(instruction_pointers, modules, stack_depth, + last_frame_is_unknown_function); + CopyToSample(instruction_pointers, modules, stack_depth, sample, + ¤t_profile_->modules); + FreeModules(stack_depth, modules); +} + +void NativeStackSamplerWin::ProfileRecordingStopped() { + current_profile_ = nullptr; +} + +// static +bool NativeStackSamplerWin::GetModuleInfo( + HMODULE module, + StackSamplingProfiler::Module* module_info) { + wchar_t module_name[MAX_PATH]; + DWORD result_length = + GetModuleFileName(module, module_name, arraysize(module_name)); + if (result_length == 0) + return false; + + module_info->filename = base::FilePath(module_name); + + module_info->base_address = reinterpret_cast<const void*>(module); + + GUID guid; + DWORD age; + win::PEImage(module).GetDebugId(&guid, &age); + module_info->id.insert(module_info->id.end(), + reinterpret_cast<char*>(&guid), + reinterpret_cast<char*>(&guid + 1)); + module_info->id.insert(module_info->id.end(), + reinterpret_cast<char*>(&age), + reinterpret_cast<char*>(&age + 1)); + + return true; +} + +void NativeStackSamplerWin::CopyToSample( + const void* const instruction_pointers[], + const HMODULE modules[], + int stack_depth, + StackSamplingProfiler::Sample* sample, + std::vector<StackSamplingProfiler::Module>* module_infos) { + sample->clear(); + sample->reserve(stack_depth); + + for (int i = 0; i < stack_depth; ++i) { + sample->push_back(StackSamplingProfiler::Frame()); + StackSamplingProfiler::Frame& frame = sample->back(); + + frame.instruction_pointer = instruction_pointers[i]; + + // Record an invalid module index if we don't have a valid module. + if (!modules[i]) { + frame.module_index = -1; + continue; + } + + auto loc = profile_module_index_.find(modules[i]); + if (loc == profile_module_index_.end()) { + StackSamplingProfiler::Module module_info; + // Record an invalid module index if we have a module but can't find + // information on it. + if (!GetModuleInfo(modules[i], &module_info)) { + frame.module_index = -1; + continue; + } + module_infos->push_back(module_info); + loc = profile_module_index_.insert(std::make_pair( + modules[i], static_cast<int>(module_infos->size() - 1))).first; + } + + frame.module_index = loc->second; + } +} + +} // namespace base |