summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorjar@chromium.org <jar@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2012-02-15 23:05:01 +0000
committerjar@chromium.org <jar@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2012-02-15 23:05:01 +0000
commit90895d0f3265926221f00b3571332066a0bcb375 (patch)
treeee36e96f7bc1bfd52d0370cfc9324bee7ac1d631
parenta8c119da0066b18c472559dd80e58e9895d5ee06 (diff)
downloadchromium_src-90895d0f3265926221f00b3571332066a0bcb375.zip
chromium_src-90895d0f3265926221f00b3571332066a0bcb375.tar.gz
chromium_src-90895d0f3265926221f00b3571332066a0bcb375.tar.bz2
Support use of third party time function for about:profiler
This uses TCMalloc to provide a time function, allowing us to see how much memory was allocated on a single thread during the running of a task. The alternate time function is put in place only when a specific environment variable is detected during TCMalloc startup. This change currently is activated only in Windows/Linux, as it is based on changes TCMalloc (not used on Mac). We also create an infrastructure for using any alternate timer, to replace the "wall clock time," on a per-thread basis, in the about:profiler infrastructure. That interface may be used on other platforms, including scenarios where we have a per-thread-CPU-time function to replace the wall-clock timer. In all cases, when this alternate timer is activated, we lose the ability to calculated queueing time. Queueing time is based on a time snapshot taken on a second thread, and hence is not comparable to the alternate timer (when the alternate timer is engaged). r=rtenneti BUG=103321 Review URL: https://chromiumcodereview.appspot.com/9212025 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@122180 0039d316-1c4b-4281-b951-d872f2087c98
-rw-r--r--base/allocator/allocator.gyp2
-rw-r--r--base/allocator/allocator_shim.cc18
-rw-r--r--base/allocator/allocator_shim.h2
-rw-r--r--base/base.gypi2
-rw-r--r--base/profiler/alternate_timer.cc25
-rw-r--r--base/profiler/alternate_timer.h36
-rw-r--r--base/profiler/tracked_time.h2
-rw-r--r--base/tracked_objects.cc52
-rw-r--r--base/tracked_objects.h11
-rw-r--r--third_party/tcmalloc/chromium/src/tcmalloc.cc8
-rw-r--r--third_party/tcmalloc/chromium/src/thread_cache.cc6
-rw-r--r--third_party/tcmalloc/chromium/src/thread_cache.h23
12 files changed, 183 insertions, 4 deletions
diff --git a/base/allocator/allocator.gyp b/base/allocator/allocator.gyp
index b72f6f7..5aef0a6 100644
--- a/base/allocator/allocator.gyp
+++ b/base/allocator/allocator.gyp
@@ -416,6 +416,8 @@
],
'sources': [
'allocator_unittests.cc',
+ '../profiler/alternate_timer.cc',
+ '../profiler/alternate_timer.h',
],
},
],
diff --git a/base/allocator/allocator_shim.cc b/base/allocator/allocator_shim.cc
index 097fff2..d61a9a4 100644
--- a/base/allocator/allocator_shim.cc
+++ b/base/allocator/allocator_shim.cc
@@ -5,6 +5,7 @@
#include "base/allocator/allocator_shim.h"
#include <config.h>
+#include "base/profiler/alternate_timer.h"
#include "base/sysinfo.h"
// When defined, different heap allocators can be used via an environment
@@ -48,8 +49,8 @@ static Allocator allocator = TCMALLOC;
// selection of the allocator. The primary may be used to control overall
// allocator selection, and the secondary can be used to specify an allocator
// to use in sub-processes.
-static const char* primary_name = "CHROME_ALLOCATOR";
-static const char* secondary_name = "CHROME_ALLOCATOR_2";
+static const char primary_name[] = "CHROME_ALLOCATOR";
+static const char secondary_name[] = "CHROME_ALLOCATOR_2";
// We include tcmalloc and the win_allocator to get as much inlining as
// possible.
@@ -261,6 +262,17 @@ extern "C" int _heap_init() {
// lifetime. Trying to teardown at _heap_term() is so late that
// you can't do anything useful anyway.
new TCMallocGuard();
+
+ // Provide optional hook for monitoring allocation quantities on a per-thread
+ // basis. Only set the hook if the environment indicates this needs to be
+ // enabled.
+ const char* profiling =
+ GetenvBeforeMain(tracked_objects::kAlternateProfilerTime);
+ if (profiling && *profiling == '1') {
+ tracked_objects::SetAlternateTimeSource(
+ tcmalloc::ThreadCache::GetBytesAllocatedOnCurrentThread);
+ }
+
return 1;
}
@@ -302,5 +314,5 @@ void SetupSubprocessAllocator() {
#endif // ENABLE_DYNAMIC_ALLOCATOR_SWITCHING
}
-} // namespace base.
} // namespace allocator.
+} // namespace base.
diff --git a/base/allocator/allocator_shim.h b/base/allocator/allocator_shim.h
index 342710f..b16f6ce 100644
--- a/base/allocator/allocator_shim.h
+++ b/base/allocator/allocator_shim.h
@@ -14,7 +14,7 @@ namespace allocator {
// then a default value (typically set to TCMALLOC).
void SetupSubprocessAllocator();
-} // namespace base.
} // namespace allocator.
+} // namespace base.
#endif // BASE_ALLOCATOR_ALLOCATOR_SHIM_H_
diff --git a/base/base.gypi b/base/base.gypi
index a5e819b..9d9d3f5 100644
--- a/base/base.gypi
+++ b/base/base.gypi
@@ -238,6 +238,8 @@
'process_win.cc',
'profiler/scoped_profile.cc',
'profiler/scoped_profile.h',
+ 'profiler/alternate_timer.cc',
+ 'profiler/alternate_timer.h',
'profiler/tracked_time.cc',
'profiler/tracked_time.h',
'property_bag.cc',
diff --git a/base/profiler/alternate_timer.cc b/base/profiler/alternate_timer.cc
new file mode 100644
index 0000000..05a983c
--- /dev/null
+++ b/base/profiler/alternate_timer.cc
@@ -0,0 +1,25 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "base/profiler/alternate_timer.h"
+
+#include "base/logging.h"
+
+namespace tracked_objects {
+
+static NowFunction* g_time_function = NULL;
+
+const char kAlternateProfilerTime[] = "CHROME_PROFILER_TIME";
+
+// Set an alternate timer function to replace the OS time function when
+// profiling.
+void SetAlternateTimeSource(NowFunction* now_function) {
+ DCHECK_EQ(g_time_function, reinterpret_cast<NowFunction*>(NULL));
+ g_time_function = now_function;
+}
+
+extern NowFunction* GetAlternateTimeSource() {
+ return g_time_function;
+}
+} // tracked_objects
diff --git a/base/profiler/alternate_timer.h b/base/profiler/alternate_timer.h
new file mode 100644
index 0000000..883b24f
--- /dev/null
+++ b/base/profiler/alternate_timer.h
@@ -0,0 +1,36 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// This is a glue file, which allows third party code to call into our profiler
+// without having to include most any functions from base.
+
+
+#ifndef BASE_PROFILER_ALTERNATE_TIMER_H_
+#define BASE_PROFILER_ALTERNATE_TIMER_H_
+
+namespace tracked_objects {
+
+// Provide type for an alternate timer function.
+typedef unsigned int NowFunction();
+
+// Set an alternate timer function to replace the OS time function when
+// profiling. Typically this is called by an allocator that is providing a
+// function that indicates how much memory has been allocated on any given
+// thread.
+extern void SetAlternateTimeSource(NowFunction* now_function);
+
+// Gets the pointer to a function that was set via SetAlternateTimeSource().
+// Returns NULL if no set was done prior to calling GetAlternateTimeSource.
+extern NowFunction* GetAlternateTimeSource();
+
+// Environment variable name that is used to activate alternate timer profiling
+// (such as using TCMalloc allocations to provide a pseudo-timer) for tasks
+// instead of wall clock profiling.
+extern const char kAlternateProfilerTime[];
+
+
+
+} // tracked_objects
+
+#endif // BASE_PROFILER_ALTERNATE_TIMER_H_
diff --git a/base/profiler/tracked_time.h b/base/profiler/tracked_time.h
index 6f4bc7e..5493e20 100644
--- a/base/profiler/tracked_time.h
+++ b/base/profiler/tracked_time.h
@@ -62,6 +62,8 @@ class BASE_EXPORT TrackedTime { // Similar to base::TimeTicks.
TrackedTime operator+(const Duration& other) const;
bool is_null() const;
+ static TrackedTime FromMilliseconds(int32 ms) { return TrackedTime(ms); }
+
private:
friend class Duration;
explicit TrackedTime(int32 ms);
diff --git a/base/tracked_objects.cc b/base/tracked_objects.cc
index 87af8dc..c837b9a 100644
--- a/base/tracked_objects.cc
+++ b/base/tracked_objects.cc
@@ -8,6 +8,7 @@
#include "base/format_macros.h"
#include "base/message_loop.h"
+#include "base/profiler/alternate_timer.h"
#include "base/stringprintf.h"
#include "base/third_party/valgrind/memcheck.h"
#include "base/threading/thread_restrictions.h"
@@ -35,6 +36,14 @@ const bool kTrackParentChildLinks = false;
const ThreadData::Status kInitialStartupState =
ThreadData::PROFILING_CHILDREN_ACTIVE;
+// Control whether an alternate time source (Now() function) is supported by
+// the ThreadData class. This compile time flag should be set to true if we
+// want other modules (such as a memory allocator, or a thread-specific CPU time
+// clock) to be able to provide a thread-specific Now() function. Without this
+// compile-time flag, the code will only support the wall-clock time. This flag
+// can be flipped to efficiently disable this path (if there is a performance
+// problem with its presence).
+static const bool kAllowAlternateTimeSourceHandling = true;
} // namespace
//------------------------------------------------------------------------------
@@ -176,6 +185,9 @@ void Births::Clear() { birth_count_ = 0; }
// optimize layout so that we benefit from locality of reference during accesses
// to them.
+// static
+NowFunction* ThreadData::now_function_ = NULL;
+
// A TLS slot which points to the ThreadData instance for the current thread. We
// do a fake initialization here (zeroing out data), and then the real in-place
// construction happens when we call tls_index_.Initialize().
@@ -370,6 +382,12 @@ void ThreadData::TallyADeath(const Births& birth,
// An address is going to have some randomness to it as well ;-).
random_number_ ^= static_cast<int32>(&birth - reinterpret_cast<Births*>(0));
+ // We don't have queue durations without OS timer. OS timer is automatically
+ // used for task-post-timing, so the use of an alternate timer implies all
+ // queue times are invalid.
+ if (kAllowAlternateTimeSourceHandling && now_function_)
+ queue_duration = 0;
+
DeathMap::iterator it = death_map_.find(&birth);
DeathData* death_data;
if (it != death_map_.end()) {
@@ -579,6 +597,24 @@ void ThreadData::Reset() {
it->second->Clear();
}
+static void OptionallyInitializeAlternateTimer() {
+ char* alternate_selector = getenv(kAlternateProfilerTime);
+ if (!alternate_selector)
+ return;
+ switch (*alternate_selector) {
+ case '0': // This is the default value, and uses the wall clock time.
+ break;
+ case '1': {
+ // Use the TCMalloc allocations-on-thread as a pseudo-time.
+ ThreadData::SetAlternateTimeSource(GetAlternateTimeSource());
+ break;
+ }
+ default:
+ NOTREACHED();
+ break;
+ }
+}
+
bool ThreadData::Initialize() {
if (!kTrackAllTaskObjects)
return false; // Not compiled in.
@@ -594,6 +630,13 @@ bool ThreadData::Initialize() {
if (status_ >= DEACTIVATED)
return true; // Someone raced in here and beat us.
+ // Put an alternate timer in place if the environment calls for it, such as
+ // for tracking TCMalloc allocations. This insertion is idempotent, so we
+ // don't mind if there is a race, and we'd prefer not to be in a lock while
+ // doing this work.
+ if (kAllowAlternateTimeSourceHandling)
+ OptionallyInitializeAlternateTimer();
+
// Perform the "real" TLS initialization now, and leave it intact through
// process termination.
if (!tls_index_.initialized()) { // Testing may have initialized this.
@@ -666,7 +709,16 @@ TrackedTime ThreadData::NowForEndOfRun() {
}
// static
+void ThreadData::SetAlternateTimeSource(NowFunction* now_function) {
+ DCHECK(now_function);
+ if (kAllowAlternateTimeSourceHandling)
+ now_function_ = now_function;
+}
+
+// static
TrackedTime ThreadData::Now() {
+ if (kAllowAlternateTimeSourceHandling && now_function_)
+ return TrackedTime::FromMilliseconds((*now_function_)());
if (kTrackAllTaskObjects && TrackingStatus())
return TrackedTime::Now();
return TrackedTime(); // Super fast when disabled, or not compiled.
diff --git a/base/tracked_objects.h b/base/tracked_objects.h
index 96814f6..cd50594 100644
--- a/base/tracked_objects.h
+++ b/base/tracked_objects.h
@@ -17,6 +17,7 @@
#include "base/gtest_prod_util.h"
#include "base/lazy_instance.h"
#include "base/location.h"
+#include "base/profiler/alternate_timer.h"
#include "base/profiler/tracked_time.h"
#include "base/time.h"
#include "base/synchronization/lock.h"
@@ -467,6 +468,12 @@ class BASE_EXPORT ThreadData {
// the code).
static TrackedTime Now();
+ // Use the function |now| to provide current times, instead of calling the
+ // TrackedTime::Now() function. Since this alternate function is being used,
+ // the other time arguments (used for calculating queueing delay) will be
+ // ignored.
+ static void SetAlternateTimeSource(NowFunction* now);
+
// This function can be called at process termination to validate that thread
// cleanup routines have been called for at least some number of named
// threads.
@@ -543,6 +550,10 @@ class BASE_EXPORT ThreadData {
// ThreadData instances.
static void ShutdownSingleThreadedCleanup(bool leak);
+ // When non-null, this specifies an external function that supplies monotone
+ // increasing time functcion.
+ static NowFunction* now_function_;
+
// We use thread local store to identify which ThreadData to interact with.
static base::ThreadLocalStorage::StaticSlot tls_index_;
diff --git a/third_party/tcmalloc/chromium/src/tcmalloc.cc b/third_party/tcmalloc/chromium/src/tcmalloc.cc
index 51fe5b3..9381aaf 100644
--- a/third_party/tcmalloc/chromium/src/tcmalloc.cc
+++ b/third_party/tcmalloc/chromium/src/tcmalloc.cc
@@ -1130,6 +1130,8 @@ inline void* do_malloc_pages(ThreadCache* heap, size_t size) {
Length num_pages = tcmalloc::pages(size);
size = num_pages << kPageShift;
+ heap->AddToByteAllocatedTotal(size); // Chromium profiling.
+
if ((FLAGS_tcmalloc_sample_parameter > 0) && heap->SampleAllocation(size)) {
result = DoSampledAllocation(size);
@@ -1159,6 +1161,12 @@ inline void* do_malloc(size_t size) {
size_t cl = Static::sizemap()->SizeClass(size);
size = Static::sizemap()->class_to_size(cl);
+ // TODO(jar): If this has any detectable performance impact, it can be
+ // optimized by only tallying sizes if the profiler was activated to recall
+ // these tallies. I don't think this is performance critical, but we really
+ // should measure it.
+ heap->AddToByteAllocatedTotal(size); // Chromium profiling.
+
if ((FLAGS_tcmalloc_sample_parameter > 0) && heap->SampleAllocation(size)) {
ret = DoSampledAllocation(size);
MarkAllocatedRegion(ret);
diff --git a/third_party/tcmalloc/chromium/src/thread_cache.cc b/third_party/tcmalloc/chromium/src/thread_cache.cc
index a951f77..1c189f3 100644
--- a/third_party/tcmalloc/chromium/src/thread_cache.cc
+++ b/third_party/tcmalloc/chromium/src/thread_cache.cc
@@ -103,6 +103,7 @@ bool kernel_supports_tls = false; // be conservative
void ThreadCache::Init(pthread_t tid) {
size_ = 0;
+ total_bytes_allocated_ = 0;
max_size_ = 0;
IncreaseCacheLimitLocked();
@@ -303,6 +304,11 @@ int ThreadCache::GetSamplePeriod() {
return sampler_.GetSamplePeriod();
}
+// static
+unsigned int ThreadCache::GetBytesAllocatedOnCurrentThread() {
+ return ThreadCache::GetThreadHeap()->GetTotalBytesAllocated();
+}
+
void ThreadCache::InitModule() {
SpinLockHolder h(Static::pageheap_lock());
if (!phinited) {
diff --git a/third_party/tcmalloc/chromium/src/thread_cache.h b/third_party/tcmalloc/chromium/src/thread_cache.h
index 9220aab..d631f45 100644
--- a/third_party/tcmalloc/chromium/src/thread_cache.h
+++ b/third_party/tcmalloc/chromium/src/thread_cache.h
@@ -96,6 +96,17 @@ class ThreadCache {
// should be sampled
bool SampleAllocation(size_t k);
+ // Record additional bytes allocated.
+ void AddToByteAllocatedTotal(size_t k) { total_bytes_allocated_ += k; }
+
+ // Return the total number of bytes allocated from this heap. The value will
+ // wrap when there is an overflow, and so only the differences between two
+ // values should be relied on (and even then, modulo 2^32).
+ uint32 GetTotalBytesAllocated() const;
+
+ // On the current thread, return GetTotalBytesAllocated().
+ static uint32 GetBytesAllocatedOnCurrentThread();
+
static void InitModule();
static void InitTSD();
static ThreadCache* GetThreadHeap();
@@ -291,6 +302,14 @@ class ThreadCache {
size_t size_; // Combined size of data
size_t max_size_; // size_ > max_size_ --> Scavenge()
+ // The following is the tally of bytes allocated on a thread as a response to
+ // any flavor of malloc() call. The aggegated amount includes all padding to
+ // the smallest class that can hold the request, or to the nearest whole page
+ // when a large allocation is made without using a class. This sum is
+ // currently used for Chromium profiling, where tallies are kept of the amount
+ // of memory allocated during the running of each task on each thread.
+ uint32 total_bytes_allocated_; // Total, modulo 2^32.
+
// We sample allocations, biased by the size of the allocation
Sampler sampler_; // A sampler
@@ -327,6 +346,10 @@ inline bool ThreadCache::SampleAllocation(size_t k) {
return sampler_.SampleAllocation(k);
}
+inline uint32 ThreadCache::GetTotalBytesAllocated() const {
+ return total_bytes_allocated_;
+}
+
inline void* ThreadCache::Allocate(size_t size, size_t cl) {
ASSERT(size <= kMaxSize);
ASSERT(size == Static::sizemap()->ByteSizeForClass(cl));