diff options
-rw-r--r-- | base/allocator/allocator.gyp | 2 | ||||
-rw-r--r-- | base/allocator/allocator_shim.cc | 18 | ||||
-rw-r--r-- | base/allocator/allocator_shim.h | 2 | ||||
-rw-r--r-- | base/base.gypi | 2 | ||||
-rw-r--r-- | base/profiler/alternate_timer.cc | 25 | ||||
-rw-r--r-- | base/profiler/alternate_timer.h | 36 | ||||
-rw-r--r-- | base/profiler/tracked_time.h | 2 | ||||
-rw-r--r-- | base/tracked_objects.cc | 52 | ||||
-rw-r--r-- | base/tracked_objects.h | 11 | ||||
-rw-r--r-- | third_party/tcmalloc/chromium/src/tcmalloc.cc | 8 | ||||
-rw-r--r-- | third_party/tcmalloc/chromium/src/thread_cache.cc | 6 | ||||
-rw-r--r-- | third_party/tcmalloc/chromium/src/thread_cache.h | 23 |
12 files changed, 183 insertions, 4 deletions
diff --git a/base/allocator/allocator.gyp b/base/allocator/allocator.gyp index b72f6f7..5aef0a6 100644 --- a/base/allocator/allocator.gyp +++ b/base/allocator/allocator.gyp @@ -416,6 +416,8 @@ ], 'sources': [ 'allocator_unittests.cc', + '../profiler/alternate_timer.cc', + '../profiler/alternate_timer.h', ], }, ], diff --git a/base/allocator/allocator_shim.cc b/base/allocator/allocator_shim.cc index 097fff2..d61a9a4 100644 --- a/base/allocator/allocator_shim.cc +++ b/base/allocator/allocator_shim.cc @@ -5,6 +5,7 @@ #include "base/allocator/allocator_shim.h" #include <config.h> +#include "base/profiler/alternate_timer.h" #include "base/sysinfo.h" // When defined, different heap allocators can be used via an environment @@ -48,8 +49,8 @@ static Allocator allocator = TCMALLOC; // selection of the allocator. The primary may be used to control overall // allocator selection, and the secondary can be used to specify an allocator // to use in sub-processes. -static const char* primary_name = "CHROME_ALLOCATOR"; -static const char* secondary_name = "CHROME_ALLOCATOR_2"; +static const char primary_name[] = "CHROME_ALLOCATOR"; +static const char secondary_name[] = "CHROME_ALLOCATOR_2"; // We include tcmalloc and the win_allocator to get as much inlining as // possible. @@ -261,6 +262,17 @@ extern "C" int _heap_init() { // lifetime. Trying to teardown at _heap_term() is so late that // you can't do anything useful anyway. new TCMallocGuard(); + + // Provide optional hook for monitoring allocation quantities on a per-thread + // basis. Only set the hook if the environment indicates this needs to be + // enabled. + const char* profiling = + GetenvBeforeMain(tracked_objects::kAlternateProfilerTime); + if (profiling && *profiling == '1') { + tracked_objects::SetAlternateTimeSource( + tcmalloc::ThreadCache::GetBytesAllocatedOnCurrentThread); + } + return 1; } @@ -302,5 +314,5 @@ void SetupSubprocessAllocator() { #endif // ENABLE_DYNAMIC_ALLOCATOR_SWITCHING } -} // namespace base. } // namespace allocator. +} // namespace base. diff --git a/base/allocator/allocator_shim.h b/base/allocator/allocator_shim.h index 342710f..b16f6ce 100644 --- a/base/allocator/allocator_shim.h +++ b/base/allocator/allocator_shim.h @@ -14,7 +14,7 @@ namespace allocator { // then a default value (typically set to TCMALLOC). void SetupSubprocessAllocator(); -} // namespace base. } // namespace allocator. +} // namespace base. #endif // BASE_ALLOCATOR_ALLOCATOR_SHIM_H_ diff --git a/base/base.gypi b/base/base.gypi index a5e819b..9d9d3f5 100644 --- a/base/base.gypi +++ b/base/base.gypi @@ -238,6 +238,8 @@ 'process_win.cc', 'profiler/scoped_profile.cc', 'profiler/scoped_profile.h', + 'profiler/alternate_timer.cc', + 'profiler/alternate_timer.h', 'profiler/tracked_time.cc', 'profiler/tracked_time.h', 'property_bag.cc', diff --git a/base/profiler/alternate_timer.cc b/base/profiler/alternate_timer.cc new file mode 100644 index 0000000..05a983c --- /dev/null +++ b/base/profiler/alternate_timer.cc @@ -0,0 +1,25 @@ +// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "base/profiler/alternate_timer.h" + +#include "base/logging.h" + +namespace tracked_objects { + +static NowFunction* g_time_function = NULL; + +const char kAlternateProfilerTime[] = "CHROME_PROFILER_TIME"; + +// Set an alternate timer function to replace the OS time function when +// profiling. +void SetAlternateTimeSource(NowFunction* now_function) { + DCHECK_EQ(g_time_function, reinterpret_cast<NowFunction*>(NULL)); + g_time_function = now_function; +} + +extern NowFunction* GetAlternateTimeSource() { + return g_time_function; +} +} // tracked_objects diff --git a/base/profiler/alternate_timer.h b/base/profiler/alternate_timer.h new file mode 100644 index 0000000..883b24f --- /dev/null +++ b/base/profiler/alternate_timer.h @@ -0,0 +1,36 @@ +// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// This is a glue file, which allows third party code to call into our profiler +// without having to include most any functions from base. + + +#ifndef BASE_PROFILER_ALTERNATE_TIMER_H_ +#define BASE_PROFILER_ALTERNATE_TIMER_H_ + +namespace tracked_objects { + +// Provide type for an alternate timer function. +typedef unsigned int NowFunction(); + +// Set an alternate timer function to replace the OS time function when +// profiling. Typically this is called by an allocator that is providing a +// function that indicates how much memory has been allocated on any given +// thread. +extern void SetAlternateTimeSource(NowFunction* now_function); + +// Gets the pointer to a function that was set via SetAlternateTimeSource(). +// Returns NULL if no set was done prior to calling GetAlternateTimeSource. +extern NowFunction* GetAlternateTimeSource(); + +// Environment variable name that is used to activate alternate timer profiling +// (such as using TCMalloc allocations to provide a pseudo-timer) for tasks +// instead of wall clock profiling. +extern const char kAlternateProfilerTime[]; + + + +} // tracked_objects + +#endif // BASE_PROFILER_ALTERNATE_TIMER_H_ diff --git a/base/profiler/tracked_time.h b/base/profiler/tracked_time.h index 6f4bc7e..5493e20 100644 --- a/base/profiler/tracked_time.h +++ b/base/profiler/tracked_time.h @@ -62,6 +62,8 @@ class BASE_EXPORT TrackedTime { // Similar to base::TimeTicks. TrackedTime operator+(const Duration& other) const; bool is_null() const; + static TrackedTime FromMilliseconds(int32 ms) { return TrackedTime(ms); } + private: friend class Duration; explicit TrackedTime(int32 ms); diff --git a/base/tracked_objects.cc b/base/tracked_objects.cc index 87af8dc..c837b9a 100644 --- a/base/tracked_objects.cc +++ b/base/tracked_objects.cc @@ -8,6 +8,7 @@ #include "base/format_macros.h" #include "base/message_loop.h" +#include "base/profiler/alternate_timer.h" #include "base/stringprintf.h" #include "base/third_party/valgrind/memcheck.h" #include "base/threading/thread_restrictions.h" @@ -35,6 +36,14 @@ const bool kTrackParentChildLinks = false; const ThreadData::Status kInitialStartupState = ThreadData::PROFILING_CHILDREN_ACTIVE; +// Control whether an alternate time source (Now() function) is supported by +// the ThreadData class. This compile time flag should be set to true if we +// want other modules (such as a memory allocator, or a thread-specific CPU time +// clock) to be able to provide a thread-specific Now() function. Without this +// compile-time flag, the code will only support the wall-clock time. This flag +// can be flipped to efficiently disable this path (if there is a performance +// problem with its presence). +static const bool kAllowAlternateTimeSourceHandling = true; } // namespace //------------------------------------------------------------------------------ @@ -176,6 +185,9 @@ void Births::Clear() { birth_count_ = 0; } // optimize layout so that we benefit from locality of reference during accesses // to them. +// static +NowFunction* ThreadData::now_function_ = NULL; + // A TLS slot which points to the ThreadData instance for the current thread. We // do a fake initialization here (zeroing out data), and then the real in-place // construction happens when we call tls_index_.Initialize(). @@ -370,6 +382,12 @@ void ThreadData::TallyADeath(const Births& birth, // An address is going to have some randomness to it as well ;-). random_number_ ^= static_cast<int32>(&birth - reinterpret_cast<Births*>(0)); + // We don't have queue durations without OS timer. OS timer is automatically + // used for task-post-timing, so the use of an alternate timer implies all + // queue times are invalid. + if (kAllowAlternateTimeSourceHandling && now_function_) + queue_duration = 0; + DeathMap::iterator it = death_map_.find(&birth); DeathData* death_data; if (it != death_map_.end()) { @@ -579,6 +597,24 @@ void ThreadData::Reset() { it->second->Clear(); } +static void OptionallyInitializeAlternateTimer() { + char* alternate_selector = getenv(kAlternateProfilerTime); + if (!alternate_selector) + return; + switch (*alternate_selector) { + case '0': // This is the default value, and uses the wall clock time. + break; + case '1': { + // Use the TCMalloc allocations-on-thread as a pseudo-time. + ThreadData::SetAlternateTimeSource(GetAlternateTimeSource()); + break; + } + default: + NOTREACHED(); + break; + } +} + bool ThreadData::Initialize() { if (!kTrackAllTaskObjects) return false; // Not compiled in. @@ -594,6 +630,13 @@ bool ThreadData::Initialize() { if (status_ >= DEACTIVATED) return true; // Someone raced in here and beat us. + // Put an alternate timer in place if the environment calls for it, such as + // for tracking TCMalloc allocations. This insertion is idempotent, so we + // don't mind if there is a race, and we'd prefer not to be in a lock while + // doing this work. + if (kAllowAlternateTimeSourceHandling) + OptionallyInitializeAlternateTimer(); + // Perform the "real" TLS initialization now, and leave it intact through // process termination. if (!tls_index_.initialized()) { // Testing may have initialized this. @@ -666,7 +709,16 @@ TrackedTime ThreadData::NowForEndOfRun() { } // static +void ThreadData::SetAlternateTimeSource(NowFunction* now_function) { + DCHECK(now_function); + if (kAllowAlternateTimeSourceHandling) + now_function_ = now_function; +} + +// static TrackedTime ThreadData::Now() { + if (kAllowAlternateTimeSourceHandling && now_function_) + return TrackedTime::FromMilliseconds((*now_function_)()); if (kTrackAllTaskObjects && TrackingStatus()) return TrackedTime::Now(); return TrackedTime(); // Super fast when disabled, or not compiled. diff --git a/base/tracked_objects.h b/base/tracked_objects.h index 96814f6..cd50594 100644 --- a/base/tracked_objects.h +++ b/base/tracked_objects.h @@ -17,6 +17,7 @@ #include "base/gtest_prod_util.h" #include "base/lazy_instance.h" #include "base/location.h" +#include "base/profiler/alternate_timer.h" #include "base/profiler/tracked_time.h" #include "base/time.h" #include "base/synchronization/lock.h" @@ -467,6 +468,12 @@ class BASE_EXPORT ThreadData { // the code). static TrackedTime Now(); + // Use the function |now| to provide current times, instead of calling the + // TrackedTime::Now() function. Since this alternate function is being used, + // the other time arguments (used for calculating queueing delay) will be + // ignored. + static void SetAlternateTimeSource(NowFunction* now); + // This function can be called at process termination to validate that thread // cleanup routines have been called for at least some number of named // threads. @@ -543,6 +550,10 @@ class BASE_EXPORT ThreadData { // ThreadData instances. static void ShutdownSingleThreadedCleanup(bool leak); + // When non-null, this specifies an external function that supplies monotone + // increasing time functcion. + static NowFunction* now_function_; + // We use thread local store to identify which ThreadData to interact with. static base::ThreadLocalStorage::StaticSlot tls_index_; diff --git a/third_party/tcmalloc/chromium/src/tcmalloc.cc b/third_party/tcmalloc/chromium/src/tcmalloc.cc index 51fe5b3..9381aaf 100644 --- a/third_party/tcmalloc/chromium/src/tcmalloc.cc +++ b/third_party/tcmalloc/chromium/src/tcmalloc.cc @@ -1130,6 +1130,8 @@ inline void* do_malloc_pages(ThreadCache* heap, size_t size) { Length num_pages = tcmalloc::pages(size); size = num_pages << kPageShift; + heap->AddToByteAllocatedTotal(size); // Chromium profiling. + if ((FLAGS_tcmalloc_sample_parameter > 0) && heap->SampleAllocation(size)) { result = DoSampledAllocation(size); @@ -1159,6 +1161,12 @@ inline void* do_malloc(size_t size) { size_t cl = Static::sizemap()->SizeClass(size); size = Static::sizemap()->class_to_size(cl); + // TODO(jar): If this has any detectable performance impact, it can be + // optimized by only tallying sizes if the profiler was activated to recall + // these tallies. I don't think this is performance critical, but we really + // should measure it. + heap->AddToByteAllocatedTotal(size); // Chromium profiling. + if ((FLAGS_tcmalloc_sample_parameter > 0) && heap->SampleAllocation(size)) { ret = DoSampledAllocation(size); MarkAllocatedRegion(ret); diff --git a/third_party/tcmalloc/chromium/src/thread_cache.cc b/third_party/tcmalloc/chromium/src/thread_cache.cc index a951f77..1c189f3 100644 --- a/third_party/tcmalloc/chromium/src/thread_cache.cc +++ b/third_party/tcmalloc/chromium/src/thread_cache.cc @@ -103,6 +103,7 @@ bool kernel_supports_tls = false; // be conservative void ThreadCache::Init(pthread_t tid) { size_ = 0; + total_bytes_allocated_ = 0; max_size_ = 0; IncreaseCacheLimitLocked(); @@ -303,6 +304,11 @@ int ThreadCache::GetSamplePeriod() { return sampler_.GetSamplePeriod(); } +// static +unsigned int ThreadCache::GetBytesAllocatedOnCurrentThread() { + return ThreadCache::GetThreadHeap()->GetTotalBytesAllocated(); +} + void ThreadCache::InitModule() { SpinLockHolder h(Static::pageheap_lock()); if (!phinited) { diff --git a/third_party/tcmalloc/chromium/src/thread_cache.h b/third_party/tcmalloc/chromium/src/thread_cache.h index 9220aab..d631f45 100644 --- a/third_party/tcmalloc/chromium/src/thread_cache.h +++ b/third_party/tcmalloc/chromium/src/thread_cache.h @@ -96,6 +96,17 @@ class ThreadCache { // should be sampled bool SampleAllocation(size_t k); + // Record additional bytes allocated. + void AddToByteAllocatedTotal(size_t k) { total_bytes_allocated_ += k; } + + // Return the total number of bytes allocated from this heap. The value will + // wrap when there is an overflow, and so only the differences between two + // values should be relied on (and even then, modulo 2^32). + uint32 GetTotalBytesAllocated() const; + + // On the current thread, return GetTotalBytesAllocated(). + static uint32 GetBytesAllocatedOnCurrentThread(); + static void InitModule(); static void InitTSD(); static ThreadCache* GetThreadHeap(); @@ -291,6 +302,14 @@ class ThreadCache { size_t size_; // Combined size of data size_t max_size_; // size_ > max_size_ --> Scavenge() + // The following is the tally of bytes allocated on a thread as a response to + // any flavor of malloc() call. The aggegated amount includes all padding to + // the smallest class that can hold the request, or to the nearest whole page + // when a large allocation is made without using a class. This sum is + // currently used for Chromium profiling, where tallies are kept of the amount + // of memory allocated during the running of each task on each thread. + uint32 total_bytes_allocated_; // Total, modulo 2^32. + // We sample allocations, biased by the size of the allocation Sampler sampler_; // A sampler @@ -327,6 +346,10 @@ inline bool ThreadCache::SampleAllocation(size_t k) { return sampler_.SampleAllocation(k); } +inline uint32 ThreadCache::GetTotalBytesAllocated() const { + return total_bytes_allocated_; +} + inline void* ThreadCache::Allocate(size_t size, size_t cl) { ASSERT(size <= kMaxSize); ASSERT(size == Static::sizemap()->ByteSizeForClass(cl)); |