diff options
author | glider@chromium.org <glider@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2010-05-24 12:07:37 +0000 |
---|---|---|
committer | glider@chromium.org <glider@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2010-05-24 12:07:37 +0000 |
commit | 0f3eaca32c31fdbacbedb6638c43984c11fcd191 (patch) | |
tree | 32ec0c611c25a1508b48e973a552f8df52dd9b77 /third_party/tcmalloc | |
parent | f6c3483efa3d0cb4a7f49c2e3fc563100722e21b (diff) | |
download | chromium_src-0f3eaca32c31fdbacbedb6638c43984c11fcd191.zip chromium_src-0f3eaca32c31fdbacbedb6638c43984c11fcd191.tar.gz chromium_src-0f3eaca32c31fdbacbedb6638c43984c11fcd191.tar.bz2 |
Revert 48024 - Reland http://codereview.chromium.org/1735024/show to assess the performance.
Review URL: http://codereview.chromium.org/2164001
TBR=antonm,willchan
Review URL: http://codereview.chromium.org/2155002
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@48032 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'third_party/tcmalloc')
52 files changed, 1748 insertions, 2154 deletions
diff --git a/third_party/tcmalloc/README.chromium b/third_party/tcmalloc/README.chromium index 1ceda91..a8352a9 100644 --- a/third_party/tcmalloc/README.chromium +++ b/third_party/tcmalloc/README.chromium @@ -22,8 +22,9 @@ Contents: The current revision is: - Last Changed Rev: 94 - Last Changed Date: 2010-05-08 01:53:24 +0400 (Sat, 08 May 2010) + Last Changed Rev: 77 + Last Changed Date: 2009-10-27 10:30:52 -0700 (Tue, 27 Oct 2009) + HOWTOs: diff --git a/third_party/tcmalloc/chromium/src/base/basictypes.h b/third_party/tcmalloc/chromium/src/base/basictypes.h index ab9cdabc..9991413 100644 --- a/third_party/tcmalloc/chromium/src/base/basictypes.h +++ b/third_party/tcmalloc/chromium/src/base/basictypes.h @@ -308,14 +308,6 @@ class AssignAttributeStartEnd { #endif // HAVE___ATTRIBUTE__ and __ELF__ or __MACH__ -#if defined(HAVE___ATTRIBUTE__) && (defined(__i386__) || defined(__x86_64__)) -# define CACHELINE_SIZE 64 -# define CACHELINE_ALIGNED __attribute__((aligned(CACHELINE_SIZE))) -#else -# define CACHELINE_ALIGNED -#endif // defined(HAVE___ATTRIBUTE__) && (__i386__ || __x86_64__) - - // The following enum should be used only as a constructor argument to indicate // that the variable has static storage class, and that the constructor should // do nothing to its state. It indicates to the reader that it is legal to diff --git a/third_party/tcmalloc/chromium/src/base/dynamic_annotations.cc b/third_party/tcmalloc/chromium/src/base/dynamic_annotations.cc index e69de29..c8bbcd7 100644 --- a/third_party/tcmalloc/chromium/src/base/dynamic_annotations.cc +++ b/third_party/tcmalloc/chromium/src/base/dynamic_annotations.cc @@ -0,0 +1,110 @@ +/* Copyright (c) 2008, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * Author: Kostya Serebryany + */ + +#include <config.h> +#include <stdlib.h> +#include <string.h> + +#include "base/dynamic_annotations.h" +#include "base/sysinfo.h" + +// Each function is empty and called (via a macro) only in debug mode. +// The arguments are captured by dynamic tools at runtime. + +extern "C" void AnnotateRWLockCreate(const char *file, int line, + const volatile void *lock){} +extern "C" void AnnotateRWLockDestroy(const char *file, int line, + const volatile void *lock){} +extern "C" void AnnotateRWLockAcquired(const char *file, int line, + const volatile void *lock, long is_w){} +extern "C" void AnnotateRWLockReleased(const char *file, int line, + const volatile void *lock, long is_w){} +extern "C" void AnnotateCondVarWait(const char *file, int line, + const volatile void *cv, + const volatile void *lock){} +extern "C" void AnnotateCondVarSignal(const char *file, int line, + const volatile void *cv){} +extern "C" void AnnotateCondVarSignalAll(const char *file, int line, + const volatile void *cv){} +extern "C" void AnnotatePublishMemoryRange(const char *file, int line, + const volatile void *address, + long size){} +extern "C" void AnnotateUnpublishMemoryRange(const char *file, int line, + const volatile void *address, + long size){} +extern "C" void AnnotatePCQCreate(const char *file, int line, + const volatile void *pcq){} +extern "C" void AnnotatePCQDestroy(const char *file, int line, + const volatile void *pcq){} +extern "C" void AnnotatePCQPut(const char *file, int line, + const volatile void *pcq){} +extern "C" void AnnotatePCQGet(const char *file, int line, + const volatile void *pcq){} +extern "C" void AnnotateNewMemory(const char *file, int line, + const volatile void *mem, + long size){} +extern "C" void AnnotateExpectRace(const char *file, int line, + const volatile void *mem, + const char *description){} +extern "C" void AnnotateBenignRace(const char *file, int line, + const volatile void *mem, + const char *description){} +extern "C" void AnnotateMutexIsUsedAsCondVar(const char *file, int line, + const volatile void *mu){} +extern "C" void AnnotateTraceMemory(const char *file, int line, + const volatile void *arg){} +extern "C" void AnnotateThreadName(const char *file, int line, + const char *name){} +extern "C" void AnnotateIgnoreReadsBegin(const char *file, int line){} +extern "C" void AnnotateIgnoreReadsEnd(const char *file, int line){} +extern "C" void AnnotateIgnoreWritesBegin(const char *file, int line){} +extern "C" void AnnotateIgnoreWritesEnd(const char *file, int line){} +extern "C" void AnnotateNoOp(const char *file, int line, + const volatile void *arg){} + +static int GetRunningOnValgrind() { + const char *running_on_valgrind_str = GetenvBeforeMain("RUNNING_ON_VALGRIND"); + if (running_on_valgrind_str) { + return strcmp(running_on_valgrind_str, "0") != 0; + } + return 0; +} + +// When running under valgrind, this function will be intercepted +// and a non-zero value will be returned. +// Some valgrind-based tools (e.g. callgrind) do not intercept functions, +// so we also read environment variable. +extern "C" int RunningOnValgrind() { + static int running_on_valgrind = GetRunningOnValgrind(); + return running_on_valgrind; +} diff --git a/third_party/tcmalloc/chromium/src/base/dynamic_annotations.h b/third_party/tcmalloc/chromium/src/base/dynamic_annotations.h index dae1a14..a2a268f 100644 --- a/third_party/tcmalloc/chromium/src/base/dynamic_annotations.h +++ b/third_party/tcmalloc/chromium/src/base/dynamic_annotations.h @@ -1,10 +1,10 @@ /* Copyright (c) 2008, Google Inc. * All rights reserved. - * + * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: - * + * * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above @@ -14,7 +14,7 @@ * * Neither the name of Google Inc. nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. - * + * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR @@ -31,471 +31,445 @@ * Author: Kostya Serebryany */ -/* This file defines dynamic annotations for use with dynamic analysis - tool such as valgrind, PIN, etc. - - Dynamic annotation is a source code annotation that affects - the generated code (that is, the annotation is not a comment). - Each such annotation is attached to a particular - instruction and/or to a particular object (address) in the program. - - The annotations that should be used by users are macros in all upper-case - (e.g., ANNOTATE_NEW_MEMORY). - - Actual implementation of these macros may differ depending on the - dynamic analysis tool being used. - - See http://code.google.com/p/data-race-test/ for more information. - - This file supports the following dynamic analysis tools: - - None (DYNAMIC_ANNOTATIONS_ENABLED is not defined or zero). - Macros are defined empty. - - ThreadSanitizer, Helgrind, DRD (DYNAMIC_ANNOTATIONS_ENABLED is 1). - Macros are defined as calls to non-inlinable empty functions - that are intercepted by Valgrind. */ - +// This file defines dynamic annotations for use with dynamic analysis +// tool such as valgrind, PIN, etc. +// +// Dynamic annotation is a source code annotation that affects +// the generated code (that is, the annotation is not a comment). +// Each such annotation is attached to a particular +// instruction and/or to a particular object (address) in the program. +// +// The annotations that should be used by users are macros in all upper-case +// (e.g., ANNOTATE_NEW_MEMORY). +// +// Actual implementation of these macros may differ depending on the +// dynamic analysis tool being used. +// +// This file supports the following dynamic analysis tools: +// - None (NDEBUG is defined). +// Macros are defined empty. +// - Helgrind (NDEBUG is not defined). +// Macros are defined as calls to non-inlinable empty functions +// that are intercepted by helgrind. +// #ifndef BASE_DYNAMIC_ANNOTATIONS_H_ #define BASE_DYNAMIC_ANNOTATIONS_H_ -#ifndef DYNAMIC_ANNOTATIONS_ENABLED -# define DYNAMIC_ANNOTATIONS_ENABLED 0 -#endif - -#if DYNAMIC_ANNOTATIONS_ENABLED != 0 - - /* ------------------------------------------------------------- - Annotations useful when implementing condition variables such as CondVar, - using conditional critical sections (Await/LockWhen) and when constructing - user-defined synchronization mechanisms. - - The annotations ANNOTATE_HAPPENS_BEFORE() and ANNOTATE_HAPPENS_AFTER() can - be used to define happens-before arcs in user-defined synchronization - mechanisms: the race detector will infer an arc from the former to the - latter when they share the same argument pointer. - - Example 1 (reference counting): - - void Unref() { - ANNOTATE_HAPPENS_BEFORE(&refcount_); - if (AtomicDecrementByOne(&refcount_) == 0) { - ANNOTATE_HAPPENS_AFTER(&refcount_); - delete this; - } - } - - Example 2 (message queue): - - void MyQueue::Put(Type *e) { - MutexLock lock(&mu_); - ANNOTATE_HAPPENS_BEFORE(e); - PutElementIntoMyQueue(e); - } - - Type *MyQueue::Get() { - MutexLock lock(&mu_); - Type *e = GetElementFromMyQueue(); - ANNOTATE_HAPPENS_AFTER(e); - return e; - } - - Note: when possible, please use the existing reference counting and message - queue implementations instead of inventing new ones. */ - - /* Report that wait on the condition variable at address "cv" has succeeded - and the lock at address "lock" is held. */ +#include "base/thread_annotations.h" + +// All the annotation macros are in effect only in debug mode. +#ifndef NDEBUG + + // ------------------------------------------------------------- + // Annotations useful when implementing condition variables such as CondVar, + // using conditional critical sections (Await/LockWhen) and when constructing + // user-defined synchronization mechanisms. + // + // The annotations ANNOTATE_HAPPENS_BEFORE() and ANNOTATE_HAPPENS_AFTER() can + // be used to define happens-before arcs in user-defined synchronization + // mechanisms: the race detector will infer an arc from the former to the + // latter when they share the same argument pointer. + // + // Example 1 (reference counting): + // + // void Unref() { + // ANNOTATE_HAPPENS_BEFORE(&refcount_); + // if (AtomicDecrementByOne(&refcount_) == 0) { + // ANNOTATE_HAPPENS_AFTER(&refcount_); + // delete this; + // } + // } + // + // Example 2 (message queue): + // + // void MyQueue::Put(Type *e) { + // MutexLock lock(&mu_); + // ANNOTATE_HAPPENS_BEFORE(e); + // PutElementIntoMyQueue(e); + // } + // + // Type *MyQueue::Get() { + // MutexLock lock(&mu_); + // Type *e = GetElementFromMyQueue(); + // ANNOTATE_HAPPENS_AFTER(e); + // return e; + // } + // + // Note: when possible, please use the existing reference counting and message + // queue implementations instead of inventing new ones. + + // Report that wait on the condition variable at address "cv" has succeeded + // and the lock at address "lock" is held. #define ANNOTATE_CONDVAR_LOCK_WAIT(cv, lock) \ AnnotateCondVarWait(__FILE__, __LINE__, cv, lock) - /* Report that wait on the condition variable at "cv" has succeeded. Variant - w/o lock. */ + // Report that wait on the condition variable at "cv" has succeeded. Variant + // w/o lock. #define ANNOTATE_CONDVAR_WAIT(cv) \ AnnotateCondVarWait(__FILE__, __LINE__, cv, NULL) - /* Report that we are about to signal on the condition variable at address - "cv". */ + // Report that we are about to signal on the condition variable at address + // "cv". #define ANNOTATE_CONDVAR_SIGNAL(cv) \ AnnotateCondVarSignal(__FILE__, __LINE__, cv) - /* Report that we are about to signal_all on the condition variable at "cv". */ + // Report that we are about to signal_all on the condition variable at "cv". #define ANNOTATE_CONDVAR_SIGNAL_ALL(cv) \ AnnotateCondVarSignalAll(__FILE__, __LINE__, cv) - /* Annotations for user-defined synchronization mechanisms. */ + // Annotations for user-defined synchronization mechanisms. #define ANNOTATE_HAPPENS_BEFORE(obj) ANNOTATE_CONDVAR_SIGNAL(obj) #define ANNOTATE_HAPPENS_AFTER(obj) ANNOTATE_CONDVAR_WAIT(obj) - /* Report that the bytes in the range [pointer, pointer+size) are about - to be published safely. The race checker will create a happens-before - arc from the call ANNOTATE_PUBLISH_MEMORY_RANGE(pointer, size) to - subsequent accesses to this memory. - Note: this annotation may not work properly if the race detector uses - sampling, i.e. does not observe all memory accesses. - */ + // Report that the bytes in the range [pointer, pointer+size) are about + // to be published safely. The race checker will create a happens-before + // arc from the call ANNOTATE_PUBLISH_MEMORY_RANGE(pointer, size) to + // subsequent accesses to this memory. #define ANNOTATE_PUBLISH_MEMORY_RANGE(pointer, size) \ AnnotatePublishMemoryRange(__FILE__, __LINE__, pointer, size) - /* DEPRECATED. Don't use it. */ + // Report that the bytes in the range [pointer, pointer+size) are not shared + // between threads any more and can be safely used by the current thread w/o + // synchronization. The race checker will create a happens-before arc from + // all previous accesses to this memory to this call. + // + // This annotation could be applied to complex objects, such as STL + // containers, with one condition: the accesses to the object itself + // and its internal data should not be separated with any synchronization. + // + // Example that works: + // + // map<int, int> the_map; + // void Thread1() { + // MutexLock lock(&mu); + // // Ok: accesses to the_map and its internal data is not separated by + // // synchronization. + // the_map[1]++; + // } + // void Thread2() { + // { + // MutexLock lock(&mu); + // ... + // // because of some reason we know that the_map will not be used by + // // other threads any more + // ANNOTATE_UNPUBLISH_MEMORY_RANGE(&the_map, sizeof(the_map)); + // } + // the_map->DoSomething(); + // } + // + // Example that does not work (due to the way happens-before arcs are + // represented in some race detectors): + // + // void Thread1() { + // MutexLock lock(&mu); + // int *guts_of_the_map = &(*the_map)[1]; + // // we have some synchronization between access to 'c' and its guts. + // // This will make ANNOTATE_UNPUBLISH_MEMORY_RANGE in Thread2 useless. + // some_other_lock_or_other_synchronization_utility.Lock(); + // (*guts_of_the_map)++; + // ... + // } + // + // void Thread1() { // same as above... #define ANNOTATE_UNPUBLISH_MEMORY_RANGE(pointer, size) \ AnnotateUnpublishMemoryRange(__FILE__, __LINE__, pointer, size) - /* DEPRECATED. Don't use it. */ + // This annotation should be used to annotate thread-safe swapping of + // containers. Required only when using hybrid (i.e. not pure happens-before) + // detectors. + // + // This annotation has the same limitation as ANNOTATE_UNPUBLISH_MEMORY_RANGE + // (see above). + // + // Example: + // map<int, int> the_map; + // void Thread1() { + // MutexLock lock(&mu); + // the_map[1]++; + // } + // void Thread2() { + // map<int,int> tmp; + // { + // MutexLock lock(&mu); + // the_map.swap(tmp); + // ANNOTATE_SWAP_MEMORY_RANGE(&the_map, sizeof(the_map)); + // } + // tmp->DoSomething(); + // } #define ANNOTATE_SWAP_MEMORY_RANGE(pointer, size) \ do { \ ANNOTATE_UNPUBLISH_MEMORY_RANGE(pointer, size); \ ANNOTATE_PUBLISH_MEMORY_RANGE(pointer, size); \ } while (0) - /* Instruct the tool to create a happens-before arc between mu->Unlock() and - mu->Lock(). This annotation may slow down the race detector and hide real - races. Normally it is used only when it would be difficult to annotate each - of the mutex's critical sections individually using the annotations above. - This annotation makes sense only for hybrid race detectors. For pure - happens-before detectors this is a no-op. For more details see - http://code.google.com/p/data-race-test/wiki/PureHappensBeforeVsHybrid . */ + // Instruct the tool to create a happens-before arc between mu->Unlock() and + // mu->Lock(). This annotation may slow down the race detector and hide real + // races. Normally it is used only when it would be difficult to annotate each + // of the mutex's critical sections individually using the annotations above. + // This annotation makes sense only for hybrid race detectors. For pure + // happens-before detectors this is a no-op. For more details see + // http://code.google.com/p/data-race-test/wiki/PureHappensBeforeVsHybrid . #define ANNOTATE_PURE_HAPPENS_BEFORE_MUTEX(mu) \ AnnotateMutexIsUsedAsCondVar(__FILE__, __LINE__, mu) - /* Deprecated. Use ANNOTATE_PURE_HAPPENS_BEFORE_MUTEX. */ + // Deprecated. Use ANNOTATE_PURE_HAPPENS_BEFORE_MUTEX. #define ANNOTATE_MUTEX_IS_USED_AS_CONDVAR(mu) \ AnnotateMutexIsUsedAsCondVar(__FILE__, __LINE__, mu) - /* ------------------------------------------------------------- - Annotations useful when defining memory allocators, or when memory that - was protected in one way starts to be protected in another. */ + // ------------------------------------------------------------- + // Annotations useful when defining memory allocators, or when memory that + // was protected in one way starts to be protected in another. - /* Report that a new memory at "address" of size "size" has been allocated. - This might be used when the memory has been retrieved from a free list and - is about to be reused, or when a the locking discipline for a variable - changes. */ + // Report that a new memory at "address" of size "size" has been allocated. + // This might be used when the memory has been retrieved from a free list and + // is about to be reused, or when a the locking discipline for a variable + // changes. #define ANNOTATE_NEW_MEMORY(address, size) \ AnnotateNewMemory(__FILE__, __LINE__, address, size) - /* ------------------------------------------------------------- - Annotations useful when defining FIFO queues that transfer data between - threads. */ + // ------------------------------------------------------------- + // Annotations useful when defining FIFO queues that transfer data between + // threads. - /* Report that the producer-consumer queue (such as ProducerConsumerQueue) at - address "pcq" has been created. The ANNOTATE_PCQ_* annotations - should be used only for FIFO queues. For non-FIFO queues use - ANNOTATE_HAPPENS_BEFORE (for put) and ANNOTATE_HAPPENS_AFTER (for get). */ + // Report that the producer-consumer queue (such as ProducerConsumerQueue) at + // address "pcq" has been created. The ANNOTATE_PCQ_* annotations + // should be used only for FIFO queues. For non-FIFO queues use + // ANNOTATE_HAPPENS_BEFORE (for put) and ANNOTATE_HAPPENS_AFTER (for get). #define ANNOTATE_PCQ_CREATE(pcq) \ AnnotatePCQCreate(__FILE__, __LINE__, pcq) - /* Report that the queue at address "pcq" is about to be destroyed. */ + // Report that the queue at address "pcq" is about to be destroyed. #define ANNOTATE_PCQ_DESTROY(pcq) \ AnnotatePCQDestroy(__FILE__, __LINE__, pcq) - /* Report that we are about to put an element into a FIFO queue at address - "pcq". */ + // Report that we are about to put an element into a FIFO queue at address + // "pcq". #define ANNOTATE_PCQ_PUT(pcq) \ AnnotatePCQPut(__FILE__, __LINE__, pcq) - /* Report that we've just got an element from a FIFO queue at address "pcq". */ + // Report that we've just got an element from a FIFO queue at address "pcq". #define ANNOTATE_PCQ_GET(pcq) \ AnnotatePCQGet(__FILE__, __LINE__, pcq) - /* ------------------------------------------------------------- - Annotations that suppress errors. It is usually better to express the - program's synchronization using the other annotations, but these can - be used when all else fails. */ - - /* Report that we may have a benign race at "pointer", with size - "sizeof(*(pointer))". "pointer" must be a non-void* pointer. Insert at the - point where "pointer" has been allocated, preferably close to the point - where the race happens. See also ANNOTATE_BENIGN_RACE_STATIC. */ - #define ANNOTATE_BENIGN_RACE(pointer, description) \ - AnnotateBenignRaceSized(__FILE__, __LINE__, pointer, \ - sizeof(*(pointer)), description) - - /* Same as ANNOTATE_BENIGN_RACE(address, description), but applies to - the memory range [address, address+size). */ - #define ANNOTATE_BENIGN_RACE_SIZED(address, size, description) \ - AnnotateBenignRaceSized(__FILE__, __LINE__, address, size, description) - - /* Request the analysis tool to ignore all reads in the current thread - until ANNOTATE_IGNORE_READS_END is called. - Useful to ignore intentional racey reads, while still checking - other reads and all writes. - See also ANNOTATE_UNPROTECTED_READ. */ + // ------------------------------------------------------------- + // Annotations that suppress errors. It is usually better to express the + // program's synchronization using the other annotations, but these can + // be used when all else fails. + + // Report that we may have a benign race on at "address". + // Insert at the point where "address" has been allocated, preferably close + // to the point where the race happens. + // See also ANNOTATE_BENIGN_RACE_STATIC. + #define ANNOTATE_BENIGN_RACE(address, description) \ + AnnotateBenignRace(__FILE__, __LINE__, address, description) + + // Request the analysis tool to ignore all reads in the current thread + // until ANNOTATE_IGNORE_READS_END is called. + // Useful to ignore intentional racey reads, while still checking + // other reads and all writes. + // See also ANNOTATE_UNPROTECTED_READ. #define ANNOTATE_IGNORE_READS_BEGIN() \ AnnotateIgnoreReadsBegin(__FILE__, __LINE__) - /* Stop ignoring reads. */ + // Stop ignoring reads. #define ANNOTATE_IGNORE_READS_END() \ AnnotateIgnoreReadsEnd(__FILE__, __LINE__) - /* Similar to ANNOTATE_IGNORE_READS_BEGIN, but ignore writes. */ + // Similar to ANNOTATE_IGNORE_READS_BEGIN, but ignore writes. #define ANNOTATE_IGNORE_WRITES_BEGIN() \ AnnotateIgnoreWritesBegin(__FILE__, __LINE__) - /* Stop ignoring writes. */ + // Stop ignoring writes. #define ANNOTATE_IGNORE_WRITES_END() \ AnnotateIgnoreWritesEnd(__FILE__, __LINE__) - /* Start ignoring all memory accesses (reads and writes). */ + // Start ignoring all memory accesses (reads and writes). #define ANNOTATE_IGNORE_READS_AND_WRITES_BEGIN() \ do {\ ANNOTATE_IGNORE_READS_BEGIN();\ ANNOTATE_IGNORE_WRITES_BEGIN();\ }while(0)\ - /* Stop ignoring all memory accesses. */ + // Stop ignoring all memory accesses. #define ANNOTATE_IGNORE_READS_AND_WRITES_END() \ do {\ ANNOTATE_IGNORE_WRITES_END();\ ANNOTATE_IGNORE_READS_END();\ }while(0)\ - /* Enable (enable!=0) or disable (enable==0) race detection for all threads. - This annotation could be useful if you want to skip expensive race analysis - during some period of program execution, e.g. during initialization. */ - #define ANNOTATE_ENABLE_RACE_DETECTION(enable) \ - AnnotateEnableRaceDetection(__FILE__, __LINE__, enable) + // ------------------------------------------------------------- + // Annotations useful for debugging. - /* ------------------------------------------------------------- - Annotations useful for debugging. */ - - /* Request to trace every access to "address". */ + // Request to trace every access to "address". #define ANNOTATE_TRACE_MEMORY(address) \ AnnotateTraceMemory(__FILE__, __LINE__, address) - /* Report the current thread name to a race detector. */ + // Report the current thread name to a race detector. #define ANNOTATE_THREAD_NAME(name) \ AnnotateThreadName(__FILE__, __LINE__, name) - /* ------------------------------------------------------------- - Annotations useful when implementing locks. They are not - normally needed by modules that merely use locks. - The "lock" argument is a pointer to the lock object. */ + // ------------------------------------------------------------- + // Annotations useful when implementing locks. They are not + // normally needed by modules that merely use locks. + // The "lock" argument is a pointer to the lock object. - /* Report that a lock has been created at address "lock". */ + // Report that a lock has been created at address "lock". #define ANNOTATE_RWLOCK_CREATE(lock) \ AnnotateRWLockCreate(__FILE__, __LINE__, lock) - /* Report that the lock at address "lock" is about to be destroyed. */ + // Report that the lock at address "lock" is about to be destroyed. #define ANNOTATE_RWLOCK_DESTROY(lock) \ AnnotateRWLockDestroy(__FILE__, __LINE__, lock) - /* Report that the lock at address "lock" has been acquired. - is_w=1 for writer lock, is_w=0 for reader lock. */ + // Report that the lock at address "lock" has been acquired. + // is_w=1 for writer lock, is_w=0 for reader lock. #define ANNOTATE_RWLOCK_ACQUIRED(lock, is_w) \ AnnotateRWLockAcquired(__FILE__, __LINE__, lock, is_w) - /* Report that the lock at address "lock" is about to be released. */ + // Report that the lock at address "lock" is about to be released. #define ANNOTATE_RWLOCK_RELEASED(lock, is_w) \ AnnotateRWLockReleased(__FILE__, __LINE__, lock, is_w) - /* ------------------------------------------------------------- - Annotations useful when implementing barriers. They are not - normally needed by modules that merely use barriers. - The "barrier" argument is a pointer to the barrier object. */ - - /* Report that the "barrier" has been initialized with initial "count". - If 'reinitialization_allowed' is true, initialization is allowed to happen - multiple times w/o calling barrier_destroy() */ - #define ANNOTATE_BARRIER_INIT(barrier, count, reinitialization_allowed) \ - AnnotateBarrierInit(__FILE__, __LINE__, barrier, count, \ - reinitialization_allowed) - - /* Report that we are about to enter barrier_wait("barrier"). */ - #define ANNOTATE_BARRIER_WAIT_BEFORE(barrier) \ - AnnotateBarrierWaitBefore(__FILE__, __LINE__, barrier) - - /* Report that we just exited barrier_wait("barrier"). */ - #define ANNOTATE_BARRIER_WAIT_AFTER(barrier) \ - AnnotateBarrierWaitAfter(__FILE__, __LINE__, barrier) - - /* Report that the "barrier" has been destroyed. */ - #define ANNOTATE_BARRIER_DESTROY(barrier) \ - AnnotateBarrierDestroy(__FILE__, __LINE__, barrier) - - /* ------------------------------------------------------------- - Annotations useful for testing race detectors. */ + // ------------------------------------------------------------- + // Annotations useful for testing race detectors. - /* Report that we expect a race on the variable at "address". - Use only in unit tests for a race detector. */ + // Report that we expect a race on the variable at "address". + // Use only in unit tests for a race detector. #define ANNOTATE_EXPECT_RACE(address, description) \ AnnotateExpectRace(__FILE__, __LINE__, address, description) - /* A no-op. Insert where you like to test the interceptors. */ + // A no-op. Insert where you like to test the interceptors. #define ANNOTATE_NO_OP(arg) \ AnnotateNoOp(__FILE__, __LINE__, arg) - /* Force the race detector to flush its state. The actual effect depends on - * the implementation of the detector. */ - #define ANNOTATE_FLUSH_STATE() \ - AnnotateFlushState(__FILE__, __LINE__) - - -#else /* DYNAMIC_ANNOTATIONS_ENABLED == 0 */ - - #define ANNOTATE_RWLOCK_CREATE(lock) /* empty */ - #define ANNOTATE_RWLOCK_DESTROY(lock) /* empty */ - #define ANNOTATE_RWLOCK_ACQUIRED(lock, is_w) /* empty */ - #define ANNOTATE_RWLOCK_RELEASED(lock, is_w) /* empty */ - #define ANNOTATE_BARRIER_INIT(barrier, count, reinitialization_allowed) /* */ - #define ANNOTATE_BARRIER_WAIT_BEFORE(barrier) /* empty */ - #define ANNOTATE_BARRIER_WAIT_AFTER(barrier) /* empty */ - #define ANNOTATE_BARRIER_DESTROY(barrier) /* empty */ - #define ANNOTATE_CONDVAR_LOCK_WAIT(cv, lock) /* empty */ - #define ANNOTATE_CONDVAR_WAIT(cv) /* empty */ - #define ANNOTATE_CONDVAR_SIGNAL(cv) /* empty */ - #define ANNOTATE_CONDVAR_SIGNAL_ALL(cv) /* empty */ - #define ANNOTATE_HAPPENS_BEFORE(obj) /* empty */ - #define ANNOTATE_HAPPENS_AFTER(obj) /* empty */ - #define ANNOTATE_PUBLISH_MEMORY_RANGE(address, size) /* empty */ - #define ANNOTATE_UNPUBLISH_MEMORY_RANGE(address, size) /* empty */ - #define ANNOTATE_SWAP_MEMORY_RANGE(address, size) /* empty */ - #define ANNOTATE_PCQ_CREATE(pcq) /* empty */ - #define ANNOTATE_PCQ_DESTROY(pcq) /* empty */ - #define ANNOTATE_PCQ_PUT(pcq) /* empty */ - #define ANNOTATE_PCQ_GET(pcq) /* empty */ - #define ANNOTATE_NEW_MEMORY(address, size) /* empty */ - #define ANNOTATE_EXPECT_RACE(address, description) /* empty */ - #define ANNOTATE_BENIGN_RACE(address, description) /* empty */ - #define ANNOTATE_BENIGN_RACE_SIZED(address, size, description) /* empty */ - #define ANNOTATE_PURE_HAPPENS_BEFORE_MUTEX(mu) /* empty */ - #define ANNOTATE_MUTEX_IS_USED_AS_CONDVAR(mu) /* empty */ - #define ANNOTATE_TRACE_MEMORY(arg) /* empty */ - #define ANNOTATE_THREAD_NAME(name) /* empty */ - #define ANNOTATE_IGNORE_READS_BEGIN() /* empty */ - #define ANNOTATE_IGNORE_READS_END() /* empty */ - #define ANNOTATE_IGNORE_WRITES_BEGIN() /* empty */ - #define ANNOTATE_IGNORE_WRITES_END() /* empty */ - #define ANNOTATE_IGNORE_READS_AND_WRITES_BEGIN() /* empty */ - #define ANNOTATE_IGNORE_READS_AND_WRITES_END() /* empty */ - #define ANNOTATE_ENABLE_RACE_DETECTION(enable) /* empty */ - #define ANNOTATE_NO_OP(arg) /* empty */ - #define ANNOTATE_FLUSH_STATE() /* empty */ - -#endif /* DYNAMIC_ANNOTATIONS_ENABLED */ - -/* Use the macros above rather than using these functions directly. */ -#ifdef __cplusplus -extern "C" { -#endif -void AnnotateRWLockCreate(const char *file, int line, - const volatile void *lock); -void AnnotateRWLockDestroy(const char *file, int line, - const volatile void *lock); -void AnnotateRWLockAcquired(const char *file, int line, - const volatile void *lock, long is_w); -void AnnotateRWLockReleased(const char *file, int line, - const volatile void *lock, long is_w); -void AnnotateBarrierInit(const char *file, int line, - const volatile void *barrier, long count, - long reinitialization_allowed); -void AnnotateBarrierWaitBefore(const char *file, int line, - const volatile void *barrier); -void AnnotateBarrierWaitAfter(const char *file, int line, - const volatile void *barrier); -void AnnotateBarrierDestroy(const char *file, int line, - const volatile void *barrier); -void AnnotateCondVarWait(const char *file, int line, - const volatile void *cv, - const volatile void *lock); -void AnnotateCondVarSignal(const char *file, int line, - const volatile void *cv); -void AnnotateCondVarSignalAll(const char *file, int line, - const volatile void *cv); -void AnnotatePublishMemoryRange(const char *file, int line, - const volatile void *address, - long size); -void AnnotateUnpublishMemoryRange(const char *file, int line, +#else // NDEBUG is defined + + #define ANNOTATE_RWLOCK_CREATE(lock) // empty + #define ANNOTATE_RWLOCK_DESTROY(lock) // empty + #define ANNOTATE_RWLOCK_ACQUIRED(lock, is_w) // empty + #define ANNOTATE_RWLOCK_RELEASED(lock, is_w) // empty + #define ANNOTATE_CONDVAR_LOCK_WAIT(cv, lock) // empty + #define ANNOTATE_CONDVAR_WAIT(cv) // empty + #define ANNOTATE_CONDVAR_SIGNAL(cv) // empty + #define ANNOTATE_CONDVAR_SIGNAL_ALL(cv) // empty + #define ANNOTATE_HAPPENS_BEFORE(obj) // empty + #define ANNOTATE_HAPPENS_AFTER(obj) // empty + #define ANNOTATE_PUBLISH_MEMORY_RANGE(address, size) // empty + #define ANNOTATE_UNPUBLISH_MEMORY_RANGE(address, size) // empty + #define ANNOTATE_SWAP_MEMORY_RANGE(address, size) // empty + #define ANNOTATE_PCQ_CREATE(pcq) // empty + #define ANNOTATE_PCQ_DESTROY(pcq) // empty + #define ANNOTATE_PCQ_PUT(pcq) // empty + #define ANNOTATE_PCQ_GET(pcq) // empty + #define ANNOTATE_NEW_MEMORY(address, size) // empty + #define ANNOTATE_EXPECT_RACE(address, description) // empty + #define ANNOTATE_BENIGN_RACE(address, description) // empty + #define ANNOTATE_PURE_HAPPENS_BEFORE_MUTEX(mu) // empty + #define ANNOTATE_MUTEX_IS_USED_AS_CONDVAR(mu) // empty + #define ANNOTATE_TRACE_MEMORY(arg) // empty + #define ANNOTATE_THREAD_NAME(name) // empty + #define ANNOTATE_IGNORE_READS_BEGIN() // empty + #define ANNOTATE_IGNORE_READS_END() // empty + #define ANNOTATE_IGNORE_WRITES_BEGIN() // empty + #define ANNOTATE_IGNORE_WRITES_END() // empty + #define ANNOTATE_IGNORE_READS_AND_WRITES_BEGIN() // empty + #define ANNOTATE_IGNORE_READS_AND_WRITES_END() // empty + #define ANNOTATE_NO_OP(arg) // empty + +#endif // NDEBUG + +// Use the macros above rather than using these functions directly. +extern "C" void AnnotateRWLockCreate(const char *file, int line, + const volatile void *lock); +extern "C" void AnnotateRWLockDestroy(const char *file, int line, + const volatile void *lock); +extern "C" void AnnotateRWLockAcquired(const char *file, int line, + const volatile void *lock, long is_w); +extern "C" void AnnotateRWLockReleased(const char *file, int line, + const volatile void *lock, long is_w); +extern "C" void AnnotateCondVarWait(const char *file, int line, + const volatile void *cv, + const volatile void *lock); +extern "C" void AnnotateCondVarSignal(const char *file, int line, + const volatile void *cv); +extern "C" void AnnotateCondVarSignalAll(const char *file, int line, + const volatile void *cv); +extern "C" void AnnotatePublishMemoryRange(const char *file, int line, + const volatile void *address, + long size); +extern "C" void AnnotateUnpublishMemoryRange(const char *file, int line, + const volatile void *address, + long size); +extern "C" void AnnotatePCQCreate(const char *file, int line, + const volatile void *pcq); +extern "C" void AnnotatePCQDestroy(const char *file, int line, + const volatile void *pcq); +extern "C" void AnnotatePCQPut(const char *file, int line, + const volatile void *pcq); +extern "C" void AnnotatePCQGet(const char *file, int line, + const volatile void *pcq); +extern "C" void AnnotateNewMemory(const char *file, int line, const volatile void *address, long size); -void AnnotatePCQCreate(const char *file, int line, - const volatile void *pcq); -void AnnotatePCQDestroy(const char *file, int line, - const volatile void *pcq); -void AnnotatePCQPut(const char *file, int line, - const volatile void *pcq); -void AnnotatePCQGet(const char *file, int line, - const volatile void *pcq); -void AnnotateNewMemory(const char *file, int line, - const volatile void *address, - long size); -void AnnotateExpectRace(const char *file, int line, - const volatile void *address, - const char *description); -void AnnotateBenignRace(const char *file, int line, - const volatile void *address, - const char *description); -void AnnotateBenignRaceSized(const char *file, int line, - const volatile void *address, - long size, - const char *description); -void AnnotateMutexIsUsedAsCondVar(const char *file, int line, - const volatile void *mu); -void AnnotateTraceMemory(const char *file, int line, - const volatile void *arg); -void AnnotateThreadName(const char *file, int line, - const char *name); -void AnnotateIgnoreReadsBegin(const char *file, int line); -void AnnotateIgnoreReadsEnd(const char *file, int line); -void AnnotateIgnoreWritesBegin(const char *file, int line); -void AnnotateIgnoreWritesEnd(const char *file, int line); -void AnnotateEnableRaceDetection(const char *file, int line, int enable); -void AnnotateNoOp(const char *file, int line, - const volatile void *arg); -void AnnotateFlushState(const char *file, int line); - -/* Return non-zero value if running under valgrind. - - If "valgrind.h" is included into dynamic_annotations.c, - the regular valgrind mechanism will be used. - See http://valgrind.org/docs/manual/manual-core-adv.html about - RUNNING_ON_VALGRIND and other valgrind "client requests". - The file "valgrind.h" may be obtained by doing - svn co svn://svn.valgrind.org/valgrind/trunk/include - - If for some reason you can't use "valgrind.h" or want to fake valgrind, - there are two ways to make this function return non-zero: - - Use environment variable: export RUNNING_ON_VALGRIND=1 - - Make your tool intercept the function RunningOnValgrind() and - change its return value. - */ -int RunningOnValgrind(void); - -#ifdef __cplusplus -} -#endif - -#if DYNAMIC_ANNOTATIONS_ENABLED != 0 && defined(__cplusplus) - - /* ANNOTATE_UNPROTECTED_READ is the preferred way to annotate racey reads. - - Instead of doing - ANNOTATE_IGNORE_READS_BEGIN(); - ... = x; - ANNOTATE_IGNORE_READS_END(); - one can use - ... = ANNOTATE_UNPROTECTED_READ(x); */ +extern "C" void AnnotateExpectRace(const char *file, int line, + const volatile void *address, + const char *description); +extern "C" void AnnotateBenignRace(const char *file, int line, + const volatile void *address, + const char *description); +extern "C" void AnnotateMutexIsUsedAsCondVar(const char *file, int line, + const volatile void *mu); +extern "C" void AnnotateTraceMemory(const char *file, int line, + const volatile void *arg); +extern "C" void AnnotateThreadName(const char *file, int line, + const char *name); +extern "C" void AnnotateIgnoreReadsBegin(const char *file, int line); +extern "C" void AnnotateIgnoreReadsEnd(const char *file, int line); +extern "C" void AnnotateIgnoreWritesBegin(const char *file, int line); +extern "C" void AnnotateIgnoreWritesEnd(const char *file, int line); +extern "C" void AnnotateNoOp(const char *file, int line, + const volatile void *arg); + +#ifndef NDEBUG + + // ANNOTATE_UNPROTECTED_READ is the preferred way to annotate racey reads. + // + // Instead of doing + // ANNOTATE_IGNORE_READS_BEGIN(); + // ... = x; + // ANNOTATE_IGNORE_READS_END(); + // one can use + // ... = ANNOTATE_UNPROTECTED_READ(x); template <class T> - inline T ANNOTATE_UNPROTECTED_READ(const volatile T &x) { + inline T ANNOTATE_UNPROTECTED_READ(const volatile T &x) + NO_THREAD_SAFETY_ANALYSIS { ANNOTATE_IGNORE_READS_BEGIN(); T res = x; ANNOTATE_IGNORE_READS_END(); return res; } - /* Apply ANNOTATE_BENIGN_RACE_SIZED to a static variable. */ + + // Apply ANNOTATE_BENIGN_RACE to a static variable. #define ANNOTATE_BENIGN_RACE_STATIC(static_var, description) \ namespace { \ class static_var ## _annotator { \ public: \ static_var ## _annotator() { \ - ANNOTATE_BENIGN_RACE_SIZED(&static_var, \ - sizeof(static_var), \ + ANNOTATE_BENIGN_RACE(&static_var, \ # static_var ": " description); \ } \ }; \ static static_var ## _annotator the ## static_var ## _annotator;\ } -#else /* DYNAMIC_ANNOTATIONS_ENABLED == 0 */ +#else // !NDEBUG #define ANNOTATE_UNPROTECTED_READ(x) (x) - #define ANNOTATE_BENIGN_RACE_STATIC(static_var, description) /* empty */ + #define ANNOTATE_BENIGN_RACE_STATIC(static_var, description) // empty + +#endif // !NDEBUG + +// Return non-zero value if running under valgrind. +extern "C" int RunningOnValgrind(); -#endif /* DYNAMIC_ANNOTATIONS_ENABLED */ -#endif /* BASE_DYNAMIC_ANNOTATIONS_H_ */ +#endif // BASE_DYNAMIC_ANNOTATIONS_H_ diff --git a/third_party/tcmalloc/chromium/src/base/low_level_alloc.cc b/third_party/tcmalloc/chromium/src/base/low_level_alloc.cc index 7ca3953a..2bbce54 100644 --- a/third_party/tcmalloc/chromium/src/base/low_level_alloc.cc +++ b/third_party/tcmalloc/chromium/src/base/low_level_alloc.cc @@ -210,9 +210,8 @@ static const intptr_t kMagicUnallocated = ~kMagicAllocated; namespace { class ArenaLock { public: - explicit ArenaLock(LowLevelAlloc::Arena *arena) - EXCLUSIVE_LOCK_FUNCTION(arena->mu) - : left_(false), mask_valid_(false), arena_(arena) { + explicit ArenaLock(LowLevelAlloc::Arena *arena) : + left_(false), mask_valid_(false), arena_(arena) { if ((arena->flags & LowLevelAlloc::kAsyncSignalSafe) != 0) { // We've decided not to support async-signal-safe arena use until // there a demonstrated need. Here's how one could do it though @@ -229,7 +228,7 @@ namespace { this->arena_->mu.Lock(); } ~ArenaLock() { RAW_CHECK(this->left_, "haven't left Arena region"); } - void Leave() UNLOCK_FUNCTION(arena_->mu) { + void Leave() { this->arena_->mu.Unlock(); #if 0 if (this->mask_valid_) { diff --git a/third_party/tcmalloc/chromium/src/base/vdso_support.cc b/third_party/tcmalloc/chromium/src/base/vdso_support.cc index fce7c2c..ddaca37 100644 --- a/third_party/tcmalloc/chromium/src/base/vdso_support.cc +++ b/third_party/tcmalloc/chromium/src/base/vdso_support.cc @@ -42,8 +42,8 @@ #include <fcntl.h> #include "base/atomicops.h" // for MemoryBarrier -#include "base/linux_syscall_support.h" #include "base/logging.h" +#include "base/linux_syscall_support.h" #include "base/dynamic_annotations.h" #include "base/basictypes.h" // for COMPILE_ASSERT diff --git a/third_party/tcmalloc/chromium/src/central_freelist.cc b/third_party/tcmalloc/chromium/src/central_freelist.cc index 5b7dfbb..674ff9b 100644 --- a/third_party/tcmalloc/chromium/src/central_freelist.cc +++ b/third_party/tcmalloc/chromium/src/central_freelist.cc @@ -266,7 +266,8 @@ void CentralFreeList::Populate() { Span* span; { SpinLockHolder h(Static::pageheap_lock()); - span = Static::pageheap()->New(npages, size_class_, kPageSize); + span = Static::pageheap()->New(npages); + if (span) Static::pageheap()->RegisterSizeClass(span, size_class_); } if (span == NULL) { MESSAGE("tcmalloc: allocation failed", npages << kPageShift); @@ -274,6 +275,12 @@ void CentralFreeList::Populate() { return; } ASSERT(span->length == npages); + // Cache sizeclass info eagerly. Locking is not necessary. + // (Instead of being eager, we could just replace any stale info + // about this span, but that seems to be no better in practice.) + for (int i = 0; i < npages; i++) { + Static::pageheap()->CacheSizeClass(span->start + i, size_class_); + } // Split the block into pieces and add to the free-list // TODO: coloring of objects to avoid cache conflicts? diff --git a/third_party/tcmalloc/chromium/src/common.h b/third_party/tcmalloc/chromium/src/common.h index f9557c9..53a0a0b 100644 --- a/third_party/tcmalloc/chromium/src/common.h +++ b/third_party/tcmalloc/chromium/src/common.h @@ -62,7 +62,6 @@ static const size_t kPageSize = 1 << kPageShift; static const size_t kMaxSize = 8u * kPageSize; static const size_t kAlignment = 8; static const size_t kNumClasses = 61; -static const size_t kLargeSizeClass = 0; // Maximum length we allow a per-thread free-list to have before we // move objects from it into the corresponding central free-list. We diff --git a/third_party/tcmalloc/chromium/src/config.h.in b/third_party/tcmalloc/chromium/src/config.h.in index 49bbf0d..1ad2642 100644 --- a/third_party/tcmalloc/chromium/src/config.h.in +++ b/third_party/tcmalloc/chromium/src/config.h.in @@ -132,7 +132,7 @@ /* Define to 1 if you have the <sys/types.h> header file. */ #undef HAVE_SYS_TYPES_H -/* <sys/ucontext.h> is broken on redhat 7 */ +/* Define to 1 if you have the <sys/ucontext.h> header file. */ #undef HAVE_SYS_UCONTEXT_H /* Define to 1 if you have the <sys/wait.h> header file. */ @@ -150,9 +150,6 @@ /* Define to 1 if you have the <unwind.h> header file. */ #undef HAVE_UNWIND_H -/* Define to 1 if you have the <valgrind.h> header file. */ -#undef HAVE_VALGRIND_H - /* define if your compiler has __attribute__ */ #undef HAVE___ATTRIBUTE__ diff --git a/third_party/tcmalloc/chromium/src/config_linux.h b/third_party/tcmalloc/chromium/src/config_linux.h index 9786b3e..398f303 100644 --- a/third_party/tcmalloc/chromium/src/config_linux.h +++ b/third_party/tcmalloc/chromium/src/config_linux.h @@ -136,7 +136,7 @@ /* Define to 1 if compiler supports __thread */ #define HAVE_TLS 1 -/* <sys/ucontext.h> is broken on redhat 7 */ +/* Define to 1 if you have the <ucontext.h> header file. */ #define HAVE_UCONTEXT_H 1 /* Define to 1 if you have the <unistd.h> header file. */ @@ -145,9 +145,6 @@ /* Define to 1 if you have the <unwind.h> header file. */ #define HAVE_UNWIND_H 1 -/* Define to 1 if you have the <valgrind.h> header file. */ -#undef HAVE_VALGRIND_H - /* define if your compiler has __attribute__ */ #define HAVE___ATTRIBUTE__ 1 diff --git a/third_party/tcmalloc/chromium/src/config_win.h b/third_party/tcmalloc/chromium/src/config_win.h index 236bd6b..30daf4f 100644 --- a/third_party/tcmalloc/chromium/src/config_win.h +++ b/third_party/tcmalloc/chromium/src/config_win.h @@ -255,12 +255,10 @@ // --------------------------------------------------------------------- // Extra stuff not found in config.h.in -// This must be defined before the windows.h is included. We need at -// least 0x0400 for mutex.h to have access to TryLock, and at least -// 0x0501 for patch_functions.cc to have access to GetModuleHandleEx. -// (This latter is an optimization we could take out if need be.) +// This must be defined before the windows.h is included. It's needed +// for mutex.h, to give access to the TryLock method. #ifndef _WIN32_WINNT -# define _WIN32_WINNT 0x0501 +# define _WIN32_WINNT 0x0400 #endif // We want to make sure not to ever try to #include heap-checker.h diff --git a/third_party/tcmalloc/chromium/src/debugallocation.cc b/third_party/tcmalloc/chromium/src/debugallocation.cc index 949fbe9..1a9ddcb 100644 --- a/third_party/tcmalloc/chromium/src/debugallocation.cc +++ b/third_party/tcmalloc/chromium/src/debugallocation.cc @@ -1010,7 +1010,7 @@ static void *MemalignOverride(size_t align, size_t size, const void *caller) __THROW ATTRIBUTE_SECTION(google_malloc); -void* operator new(size_t size) throw (std::bad_alloc) +void* operator new(size_t size) ATTRIBUTE_SECTION(google_malloc); void* operator new(size_t size, const std::nothrow_t&) __THROW ATTRIBUTE_SECTION(google_malloc); @@ -1018,7 +1018,7 @@ void operator delete(void* p) __THROW ATTRIBUTE_SECTION(google_malloc); void operator delete(void* p, const std::nothrow_t&) __THROW ATTRIBUTE_SECTION(google_malloc); -void* operator new[](size_t size) throw (std::bad_alloc) +void* operator new[](size_t size) ATTRIBUTE_SECTION(google_malloc); void* operator new[](size_t size, const std::nothrow_t&) __THROW ATTRIBUTE_SECTION(google_malloc); @@ -1176,12 +1176,12 @@ extern "C" void* pvalloc(size_t size) __THROW { return p; } -extern "C" int mallopt(int cmd, int value) __THROW { +extern "C" int mallopt(int cmd, int value) { return BASE_MALLOPT(cmd, value); } #ifdef HAVE_STRUCT_MALLINFO -extern "C" struct mallinfo mallinfo(void) __THROW { +extern "C" struct mallinfo mallinfo(void) { return BASE_MALLINFO(); } #endif @@ -1239,7 +1239,7 @@ inline void* cpp_debug_alloc(size_t size, int new_type, bool nothrow) { } } -void* operator new(size_t size) throw (std::bad_alloc) { +void* operator new(size_t size) { void* ptr = cpp_debug_alloc(size, MallocBlock::kNewType, false); MallocHook::InvokeNewHook(ptr, size); if (ptr == NULL) { @@ -1259,8 +1259,7 @@ void operator delete(void* ptr) __THROW { DebugDeallocate(ptr, MallocBlock::kNewType); } -// Some STL implementations explicitly invoke this. -// It is completely equivalent to a normal delete (delete never throws). +// Compilers use this, though I can't see how it differs from normal delete. void operator delete(void* ptr, const std::nothrow_t&) __THROW { MallocHook::InvokeDeleteHook(ptr); DebugDeallocate(ptr, MallocBlock::kNewType); @@ -1270,7 +1269,7 @@ void operator delete(void* ptr, const std::nothrow_t&) __THROW { // Alloc/free stuff for debug operator new[] & friends -void* operator new[](size_t size) throw (std::bad_alloc) { +void* operator new[](size_t size) { void* ptr = cpp_debug_alloc(size, MallocBlock::kArrayNewType, false); MallocHook::InvokeNewHook(ptr, size); if (ptr == NULL) { @@ -1290,8 +1289,7 @@ void operator delete[](void* ptr) __THROW { DebugDeallocate(ptr, MallocBlock::kArrayNewType); } -// Some STL implementations explicitly invoke this. -// It is completely equivalent to a normal delete (delete never throws). +// Compilers use this, though I can't see how it differs from normal delete. void operator delete[](void* ptr, const std::nothrow_t&) __THROW { MallocHook::InvokeDeleteHook(ptr); DebugDeallocate(ptr, MallocBlock::kArrayNewType); @@ -1361,22 +1359,17 @@ class DebugMallocImplementation : public ParentImplementation { static DebugMallocImplementation debug_malloc_implementation; REGISTER_MODULE_INITIALIZER(debugallocation, { - // Either we or valgrind will control memory management. We - // register our extension if we're the winner. - if (RunningOnValgrind()) { - // Let Valgrind uses its own malloc (so don't register our extension). - } else { - MallocExtension::Register(&debug_malloc_implementation); - // When the program exits, check all blocks still in the free - // queue for corruption. - atexit(DanglingWriteChecker); - } + MallocExtension::Register(&debug_malloc_implementation); + + // When the program exits, check all blocks still in the free queue for + // corruption. + atexit(DanglingWriteChecker); }); #ifdef TCMALLOC_FOR_DEBUGALLOCATION // Redefine malloc_stats to use tcmalloc's implementation: -extern "C" void malloc_stats(void) __THROW { +extern "C" void malloc_stats(void) { do_malloc_stats(); } diff --git a/third_party/tcmalloc/chromium/src/google/heap-profiler.h b/third_party/tcmalloc/chromium/src/google/heap-profiler.h index 57cb97a..5efaf64 100644 --- a/third_party/tcmalloc/chromium/src/google/heap-profiler.h +++ b/third_party/tcmalloc/chromium/src/google/heap-profiler.h @@ -71,13 +71,12 @@ extern "C" { */ PERFTOOLS_DLL_DECL void HeapProfilerStart(const char* prefix); -/* Returns non-zero if we are currently profiling the heap. (Returns - * an int rather than a bool so it's usable from C.) This is true +/* Returns true if we are currently profiling the heap. This is true * between calls to HeapProfilerStart() and HeapProfilerStop(), and * also if the program has been run with HEAPPROFILER, or some other * way to turn on whole-program profiling. */ -int IsHeapProfilerRunning(); +bool IsHeapProfilerRunning(); /* Stop heap profiling. Can be restarted again with HeapProfilerStart(), * but the currently accumulated profiling information will be cleared. diff --git a/third_party/tcmalloc/chromium/src/google/profiler.h b/third_party/tcmalloc/chromium/src/google/profiler.h index a6883f4..74b936f 100644 --- a/third_party/tcmalloc/chromium/src/google/profiler.h +++ b/third_party/tcmalloc/chromium/src/google/profiler.h @@ -108,15 +108,13 @@ struct ProfilerOptions { void *filter_in_thread_arg; }; -/* Start profiling and write profile info into fname, discarding any - * existing profiling data in that file. +/* Start profiling and write profile info into fname. * * This is equivalent to calling ProfilerStartWithOptions(fname, NULL). */ PERFTOOLS_DLL_DECL int ProfilerStart(const char* fname); -/* Start profiling and write profile into fname, discarding any - * existing profiling data in that file. +/* Start profiling and write profile into fname. * * The profiler is configured using the options given by 'options'. * Options which are not specified are given default values. diff --git a/third_party/tcmalloc/chromium/src/google/stacktrace.h b/third_party/tcmalloc/chromium/src/google/stacktrace.h index fd186d6..8188ce3 100644 --- a/third_party/tcmalloc/chromium/src/google/stacktrace.h +++ b/third_party/tcmalloc/chromium/src/google/stacktrace.h @@ -49,23 +49,23 @@ // Skips the most recent "skip_count" stack frames (also skips the // frame generated for the "GetStackFrames" routine itself), and then // records the pc values for up to the next "max_depth" frames in -// "result", and the corresponding stack frame sizes in "sizes". -// Returns the number of values recorded in "result"/"sizes". +// "pcs", and the corresponding stack frame sizes in "sizes". Returns +// the number of values recorded in "pcs"/"sizes". // // Example: // main() { foo(); } // foo() { bar(); } // bar() { -// void* result[10]; +// void* pcs[10]; // int sizes[10]; -// int depth = GetStackFrames(result, sizes, 10, 1); +// int depth = GetStackFrames(pcs, sizes, 10, 1); // } // // The GetStackFrames call will skip the frame for "bar". It will // return 2 and will produce pc values that map to the following // procedures: -// result[0] foo -// result[1] main +// pcs[0] foo +// pcs[1] main // (Actually, there may be a few more entries after "main" to account for // startup procedures.) // And corresponding stack frame sizes will also be recorded: @@ -76,15 +76,15 @@ // be identified. // // This routine may return fewer stack frame entries than are -// available. Also note that "result" and "sizes" must both be non-NULL. -extern PERFTOOLS_DLL_DECL int GetStackFrames(void** result, int* sizes, int max_depth, +// available. Also note that "pcs" and "sizes" must both be non-NULL. +extern PERFTOOLS_DLL_DECL int GetStackFrames(void** pcs, int* sizes, int max_depth, int skip_count); // Same as above, but to be used from a signal handler. The "uc" parameter // should be the pointer to ucontext_t which was passed as the 3rd parameter // to sa_sigaction signal handler. It may help the unwinder to get a // better stack trace under certain conditions. The "uc" may safely be NULL. -extern PERFTOOLS_DLL_DECL int GetStackFramesWithContext(void** result, int* sizes, int max_depth, +extern PERFTOOLS_DLL_DECL int GetStackFramesWithContext(void** pcs, int* sizes, int max_depth, int skip_count, const void *uc); // This is similar to the GetStackFrames routine, except that it returns diff --git a/third_party/tcmalloc/chromium/src/google/tcmalloc.h.in b/third_party/tcmalloc/chromium/src/google/tcmalloc.h.in index fbb70ab..e5c873d 100644 --- a/third_party/tcmalloc/chromium/src/google/tcmalloc.h.in +++ b/third_party/tcmalloc/chromium/src/google/tcmalloc.h.in @@ -60,8 +60,7 @@ #endif #ifdef __cplusplus -#include <new> // for std::nothrow_t - +#include <new> // for nothrow_t extern "C" { #endif // Returns a human-readable version string. If major, minor, @@ -92,15 +91,16 @@ extern "C" { #ifdef __cplusplus PERFTOOLS_DLL_DECL int tc_set_new_mode(int flag) __THROW; PERFTOOLS_DLL_DECL void* tc_new(size_t size); - PERFTOOLS_DLL_DECL void* tc_new_nothrow(size_t size, - const std::nothrow_t&) __THROW; PERFTOOLS_DLL_DECL void tc_delete(void* p) __THROW; - PERFTOOLS_DLL_DECL void tc_delete_nothrow(void* p, - const std::nothrow_t&) __THROW; PERFTOOLS_DLL_DECL void* tc_newarray(size_t size); + PERFTOOLS_DLL_DECL void tc_deletearray(void* p) __THROW; + + PERFTOOLS_DLL_DECL void* tc_new_nothrow(size_t size, + const std::nothrow_t&) __THROW; PERFTOOLS_DLL_DECL void* tc_newarray_nothrow(size_t size, const std::nothrow_t&) __THROW; - PERFTOOLS_DLL_DECL void tc_deletearray(void* p) __THROW; + PERFTOOLS_DLL_DECL void tc_delete_nothrow(void* p, + const std::nothrow_t&) __THROW; PERFTOOLS_DLL_DECL void tc_deletearray_nothrow(void* p, const std::nothrow_t&) __THROW; } diff --git a/third_party/tcmalloc/chromium/src/heap-checker.cc b/third_party/tcmalloc/chromium/src/heap-checker.cc index 2779c97..82a7adb 100644 --- a/third_party/tcmalloc/chromium/src/heap-checker.cc +++ b/third_party/tcmalloc/chromium/src/heap-checker.cc @@ -159,23 +159,6 @@ DEFINE_bool(heap_check_test_pointer_alignment, "Set to true to check if the found leak can be due to " "use of unaligned pointers"); -// Alignment at which all pointers in memory are supposed to be located; -// use 1 if any alignment is ok. -// heap_check_test_pointer_alignment flag guides if we try the value of 1. -// The larger it can be, the lesser is the chance of missing real leaks. -// -// sizeof(void)* is correct. However gold (the new linker) has a bug where it -// sometimes places global pointers on 4-byte boundaries, even when pointers -// are 8 bytes long. While we are fixing the linker, degrade to 4-byte -// alignment on all targets. http://b/1226481 -// -static const size_t kPointerSourceAlignment = sizeof(void*); -DEFINE_int32(heap_check_pointer_source_alignment, - EnvToInt("HEAP_CHECK_POINTER_SOURCE_ALIGNMENT", - kPointerSourceAlignment), - "Alignment at which all pointers in memory are supposed to be " - "located. Use 1 if any alignment is ok."); - // A reasonable default to handle pointers inside of typical class objects: // Too low and we won't be able to traverse pointers to normally-used // nested objects and base parts of multiple-inherited objects. @@ -262,6 +245,13 @@ static bool constructor_heap_profiling = false; static const int heap_checker_info_level = 0; //---------------------------------------------------------------------- + +// Alignment at which all pointers in memory are supposed to be located; +// use 1 if any alignment is ok. +// heap_check_test_pointer_alignment flag guides if we try the value of 1. +// The larger it can be, the lesser is the chance of missing real leaks. +static const size_t kPointerSourceAlignment = sizeof(void*); + // Cancel our InitialMallocHook_* if present. static void CancelInitialMallocHooks(); // defined below @@ -494,7 +484,7 @@ HeapLeakChecker::Disabler::Disabler() { // in a thread-safe manner. int counter = get_thread_disable_counter(); set_thread_disable_counter(counter + 1); - RAW_VLOG(10, "Increasing thread disable counter to %d", counter + 1); + RAW_VLOG(1, "Increasing thread disable counter to %d", counter + 1); } HeapLeakChecker::Disabler::~Disabler() { @@ -502,7 +492,7 @@ HeapLeakChecker::Disabler::~Disabler() { RAW_DCHECK(counter > 0, ""); if (counter > 0) { set_thread_disable_counter(counter - 1); - RAW_VLOG(10, "Decreasing thread disable counter to %d", counter); + RAW_VLOG(1, "Decreasing thread disable counter to %d", counter); } else { RAW_VLOG(0, "Thread disable counter underflow : %d", counter); } @@ -535,7 +525,7 @@ static void NewHook(const void* ptr, size_t size) { if (ptr != NULL) { const int counter = get_thread_disable_counter(); const bool ignore = (counter > 0); - RAW_VLOG(16, "Recording Alloc: %p of %"PRIuS "; %d", ptr, size, + RAW_VLOG(7, "Recording Alloc: %p of %"PRIuS "; %d", ptr, size, int(counter)); { SpinLockHolder l(&heap_checker_lock); if (size > max_heap_object_size) max_heap_object_size = size; @@ -550,17 +540,17 @@ static void NewHook(const void* ptr, size_t size) { } } } - RAW_VLOG(17, "Alloc Recorded: %p of %"PRIuS"", ptr, size); + RAW_VLOG(8, "Alloc Recorded: %p of %"PRIuS"", ptr, size); } } static void DeleteHook(const void* ptr) { if (ptr != NULL) { - RAW_VLOG(16, "Recording Free %p", ptr); + RAW_VLOG(7, "Recording Free %p", ptr); { SpinLockHolder l(&heap_checker_lock); if (heap_checker_on) heap_profile->RecordFree(ptr); } - RAW_VLOG(17, "Free Recorded: %p", ptr); + RAW_VLOG(8, "Free Recorded: %p", ptr); } } @@ -594,7 +584,7 @@ static StackDirection stack_direction = UNKNOWN_DIRECTION; static void RegisterStackLocked(const void* top_ptr) { RAW_DCHECK(heap_checker_lock.IsHeld(), ""); RAW_DCHECK(MemoryRegionMap::LockIsHeld(), ""); - RAW_VLOG(10, "Thread stack at %p", top_ptr); + RAW_VLOG(1, "Thread stack at %p", top_ptr); uintptr_t top = AsInt(top_ptr); stack_tops->insert(top); // add for later use @@ -608,12 +598,12 @@ static void RegisterStackLocked(const void* top_ptr) { if (MemoryRegionMap::FindAndMarkStackRegion(top, ®ion)) { // Make the proper portion of the stack live: if (stack_direction == GROWS_TOWARDS_LOW_ADDRESSES) { - RAW_VLOG(11, "Live stack at %p of %"PRIuPTR" bytes", + RAW_VLOG(2, "Live stack at %p of %"PRIuPTR" bytes", top_ptr, region.end_addr - top); live_objects->push_back(AllocObject(top_ptr, region.end_addr - top, THREAD_DATA)); } else { // GROWS_TOWARDS_HIGH_ADDRESSES - RAW_VLOG(11, "Live stack at %p of %"PRIuPTR" bytes", + RAW_VLOG(2, "Live stack at %p of %"PRIuPTR" bytes", AsPtr(region.start_addr), top - region.start_addr); live_objects->push_back(AllocObject(AsPtr(region.start_addr), @@ -629,7 +619,7 @@ static void RegisterStackLocked(const void* top_ptr) { uintptr_t start = AsInt(span->ptr); uintptr_t end = start + span->size; if (start <= top && top < end) { - RAW_VLOG(11, "Stack at %p is inside /proc/self/maps chunk %p..%p", + RAW_VLOG(2, "Stack at %p is inside /proc/self/maps chunk %p..%p", top_ptr, AsPtr(start), AsPtr(end)); // Shrink start..end region by chopping away the memory regions in // MemoryRegionMap that land in it to undo merging of regions @@ -650,17 +640,17 @@ static void RegisterStackLocked(const void* top_ptr) { } } if (stack_start != start || stack_end != end) { - RAW_VLOG(11, "Stack at %p is actually inside memory chunk %p..%p", + RAW_VLOG(2, "Stack at %p is actually inside memory chunk %p..%p", top_ptr, AsPtr(stack_start), AsPtr(stack_end)); } // Make the proper portion of the stack live: if (stack_direction == GROWS_TOWARDS_LOW_ADDRESSES) { - RAW_VLOG(11, "Live stack at %p of %"PRIuPTR" bytes", + RAW_VLOG(2, "Live stack at %p of %"PRIuPTR" bytes", top_ptr, stack_end - top); live_objects->push_back( AllocObject(top_ptr, stack_end - top, THREAD_DATA)); } else { // GROWS_TOWARDS_HIGH_ADDRESSES - RAW_VLOG(11, "Live stack at %p of %"PRIuPTR" bytes", + RAW_VLOG(2, "Live stack at %p of %"PRIuPTR" bytes", AsPtr(stack_start), top - stack_start); live_objects->push_back( AllocObject(AsPtr(stack_start), top - stack_start, THREAD_DATA)); @@ -733,14 +723,14 @@ static void MakeDisabledLiveCallbackLocked( // and the rest of the region where the stack lives can well // contain outdated stack variables which are not live anymore, // hence should not be treated as such. - RAW_VLOG(11, "Not %s-disabling %"PRIuS" bytes at %p" + RAW_VLOG(2, "Not %s-disabling %"PRIuS" bytes at %p" ": have stack inside: %p", (stack_disable ? "stack" : "range"), info.object_size, ptr, AsPtr(*iter)); return; } } - RAW_VLOG(11, "%s-disabling %"PRIuS" bytes at %p", + RAW_VLOG(2, "%s-disabling %"PRIuS" bytes at %p", (stack_disable ? "Stack" : "Range"), info.object_size, ptr); live_objects->push_back(AllocObject(ptr, info.object_size, MUST_BE_ON_HEAP)); @@ -765,7 +755,7 @@ static void RecordGlobalDataLocked(uintptr_t start_address, // Ignore non-writeable regions. if (strchr(permissions, 'w') == NULL) return; if (filename == NULL || *filename == '\0') filename = "UNNAMED"; - RAW_VLOG(11, "Looking into %s: 0x%" PRIxPTR "..0x%" PRIxPTR, + RAW_VLOG(2, "Looking into %s: 0x%" PRIxPTR "..0x%" PRIxPTR, filename, start_address, end_address); (*library_live_objects)[filename]. push_back(AllocObject(AsPtr(start_address), @@ -824,12 +814,12 @@ void HeapLeakChecker::DisableLibraryAllocsLocked(const char* library, // does not call user code. } if (depth) { - RAW_VLOG(10, "Disabling allocations from %s at depth %d:", library, depth); + RAW_VLOG(1, "Disabling allocations from %s at depth %d:", library, depth); DisableChecksFromToLocked(AsPtr(start_address), AsPtr(end_address), depth); if (IsLibraryNamed(library, "/libpthread") || IsLibraryNamed(library, "/libdl") || IsLibraryNamed(library, "/ld")) { - RAW_VLOG(10, "Global memory regions made by %s will be live data", + RAW_VLOG(1, "Global memory regions made by %s will be live data", library); if (global_region_caller_ranges == NULL) { global_region_caller_ranges = @@ -946,7 +936,7 @@ static enum { va_list /*ap*/) { RAW_DCHECK(heap_checker_lock.IsHeld(), ""); thread_listing_status = CALLBACK_STARTED; - RAW_VLOG(11, "Found %d threads (from pid %d)", num_threads, getpid()); + RAW_VLOG(2, "Found %d threads (from pid %d)", num_threads, getpid()); if (FLAGS_heap_check_ignore_global_live) { UseProcMapsLocked(RECORD_GLOBAL_DATA); @@ -961,7 +951,7 @@ static enum { // the leak checking thread itself is handled // specially via self_thread_stack, not here: if (thread_pids[i] == self_thread_pid) continue; - RAW_VLOG(11, "Handling thread with pid %d", thread_pids[i]); + RAW_VLOG(2, "Handling thread with pid %d", thread_pids[i]); #if defined(HAVE_LINUX_PTRACE_H) && defined(HAVE_SYS_SYSCALL_H) && defined(DUMPER) i386_regs thread_regs; #define sys_ptrace(r, p, a, d) syscall(SYS_ptrace, (r), (p), (a), (d)) @@ -977,7 +967,7 @@ static enum { // register pointers still being in the registers and not on the stack): for (void** p = reinterpret_cast<void**>(&thread_regs); p < reinterpret_cast<void**>(&thread_regs + 1); ++p) { - RAW_VLOG(12, "Thread register %p", *p); + RAW_VLOG(3, "Thread register %p", *p); thread_registers.push_back(*p); } } else { @@ -992,7 +982,7 @@ static enum { if (thread_registers.size()) { // Make thread registers be live heap data sources. // we rely here on the fact that vector is in one memory chunk: - RAW_VLOG(11, "Live registers at %p of %"PRIuS" bytes", + RAW_VLOG(2, "Live registers at %p of %"PRIuS" bytes", &thread_registers[0], thread_registers.size() * sizeof(void*)); live_objects->push_back(AllocObject(&thread_registers[0], thread_registers.size() * sizeof(void*), @@ -1015,7 +1005,7 @@ static const void* self_thread_stack_top; void HeapLeakChecker::IgnoreNonThreadLiveObjectsLocked() { RAW_DCHECK(heap_checker_lock.IsHeld(), ""); RAW_DCHECK(MemoryRegionMap::LockIsHeld(), ""); - RAW_VLOG(11, "Handling self thread with pid %d", self_thread_pid); + RAW_VLOG(2, "Handling self thread with pid %d", self_thread_pid); // Register our own stack: // Important that all stack ranges (including the one here) @@ -1029,7 +1019,7 @@ void HeapLeakChecker::IgnoreNonThreadLiveObjectsLocked() { for (IgnoredObjectsMap::const_iterator object = ignored_objects->begin(); object != ignored_objects->end(); ++object) { const void* ptr = AsPtr(object->first); - RAW_VLOG(11, "Ignored live object at %p of %"PRIuS" bytes", + RAW_VLOG(2, "Ignored live object at %p of %"PRIuS" bytes", ptr, object->second); live_objects-> push_back(AllocObject(ptr, object->second, MUST_BE_ON_HEAP)); @@ -1142,10 +1132,10 @@ void HeapLeakChecker::IgnoreNonThreadLiveObjectsLocked() { } } // Now get and use live_objects from the final version of l->second: - if (VLOG_IS_ON(11)) { + if (VLOG_IS_ON(2)) { for (LiveObjectsStack::const_iterator i = l->second.begin(); i != l->second.end(); ++i) { - RAW_VLOG(11, "Library live region at %p of %"PRIuPTR" bytes", + RAW_VLOG(2, "Library live region at %p of %"PRIuPTR" bytes", i->ptr, i->size); } } @@ -1250,7 +1240,7 @@ void HeapLeakChecker::IgnoreAllLiveObjectsLocked(const void* self_stack_top) { RAW_LOG(ERROR, "Thread stacks not found for %d threads. " "Will likely report false leak positives.", r); } else { - RAW_VLOG(11, "Thread stacks appear to be found for all threads"); + RAW_VLOG(2, "Thread stacks appear to be found for all threads"); } } else { RAW_LOG(WARNING, "Not looking for thread stacks; " @@ -1266,7 +1256,7 @@ void HeapLeakChecker::IgnoreAllLiveObjectsLocked(const void* self_stack_top) { IgnoreNonThreadLiveObjectsLocked(); } if (live_objects_total) { - RAW_VLOG(10, "Ignoring %"PRId64" reachable objects of %"PRId64" bytes", + RAW_VLOG(1, "Ignoring %"PRId64" reachable objects of %"PRId64" bytes", live_objects_total, live_bytes_total); } // Free these: we made them here and heap_profile never saw them @@ -1276,8 +1266,7 @@ void HeapLeakChecker::IgnoreAllLiveObjectsLocked(const void* self_stack_top) { } // Alignment at which we should consider pointer positions -// in IgnoreLiveObjectsLocked. Will normally use the value of -// FLAGS_heap_check_pointer_source_alignment. +// in IgnoreLiveObjectsLocked. Use 1 if any alignment is ok. static size_t pointer_source_alignment = kPointerSourceAlignment; // Global lock for HeapLeakChecker::DoNoLeaks // to protect pointer_source_alignment. @@ -1325,7 +1314,7 @@ static SpinLock alignment_checker_lock(SpinLock::LINKER_INITIALIZED); live_object_count += 1; live_byte_count += size; } - RAW_VLOG(13, "Looking for heap pointers in %p of %"PRIuS" bytes", + RAW_VLOG(4, "Looking for heap pointers in %p of %"PRIuS" bytes", object, size); const char* const whole_object = object; size_t const whole_size = size; @@ -1362,7 +1351,7 @@ static SpinLock alignment_checker_lock(SpinLock::LINKER_INITIALIZED); if (can_be_on_heap) { const void* ptr = reinterpret_cast<const void*>(addr); // Too expensive (inner loop): manually uncomment when debugging: - // RAW_VLOG(17, "Trying pointer to %p at %p", ptr, object); + // RAW_VLOG(8, "Trying pointer to %p at %p", ptr, object); size_t object_size; if (HaveOnHeapLocked(&ptr, &object_size) && heap_profile->MarkAsLive(ptr)) { @@ -1371,15 +1360,15 @@ static SpinLock alignment_checker_lock(SpinLock::LINKER_INITIALIZED); // a heap object which is in fact leaked. // I.e. in very rare and probably not repeatable/lasting cases // we might miss some real heap memory leaks. - RAW_VLOG(14, "Found pointer to %p of %"PRIuS" bytes at %p " + RAW_VLOG(5, "Found pointer to %p of %"PRIuS" bytes at %p " "inside %p of size %"PRIuS"", ptr, object_size, object, whole_object, whole_size); - if (VLOG_IS_ON(15)) { + if (VLOG_IS_ON(6)) { // log call stacks to help debug how come something is not a leak HeapProfileTable::AllocInfo alloc; - if (!heap_profile->FindAllocDetails(ptr, &alloc)) { - RAW_LOG(FATAL, "FindAllocDetails failed on ptr %p", ptr); - } + bool r = heap_profile->FindAllocDetails(ptr, &alloc); + r = r; // suppress compiler warning in non-debug mode + RAW_DCHECK(r, ""); // sanity RAW_LOG(INFO, "New live %p object's alloc stack:", ptr); for (int i = 0; i < alloc.stack_depth; ++i) { RAW_LOG(INFO, " @ %p", alloc.call_stack[i]); @@ -1397,7 +1386,7 @@ static SpinLock alignment_checker_lock(SpinLock::LINKER_INITIALIZED); live_objects_total += live_object_count; live_bytes_total += live_byte_count; if (live_object_count) { - RAW_VLOG(10, "Removed %"PRId64" live heap objects of %"PRId64" bytes: %s%s", + RAW_VLOG(1, "Removed %"PRId64" live heap objects of %"PRId64" bytes: %s%s", live_object_count, live_byte_count, name, name2); } } @@ -1419,7 +1408,7 @@ void HeapLeakChecker::IgnoreObject(const void* ptr) { if (!HaveOnHeapLocked(&ptr, &object_size)) { RAW_LOG(ERROR, "No live heap object at %p to ignore", ptr); } else { - RAW_VLOG(10, "Going to ignore live object at %p of %"PRIuS" bytes", + RAW_VLOG(1, "Going to ignore live object at %p of %"PRIuS" bytes", ptr, object_size); if (ignored_objects == NULL) { ignored_objects = new(Allocator::Allocate(sizeof(IgnoredObjectsMap))) @@ -1445,7 +1434,7 @@ void HeapLeakChecker::UnIgnoreObject(const void* ptr) { if (object != ignored_objects->end() && object_size == object->second) { ignored_objects->erase(object); found = true; - RAW_VLOG(10, "Now not going to ignore live object " + RAW_VLOG(1, "Now not going to ignore live object " "at %p of %"PRIuS" bytes", ptr, object_size); } } @@ -1494,7 +1483,7 @@ void HeapLeakChecker::Create(const char *name, bool make_start_snapshot) { const HeapProfileTable::Stats& t = heap_profile->total(); const size_t start_inuse_bytes = t.alloc_size - t.free_size; const size_t start_inuse_allocs = t.allocs - t.frees; - RAW_VLOG(10, "Start check \"%s\" profile: %"PRIuS" bytes " + RAW_VLOG(1, "Start check \"%s\" profile: %"PRIuS" bytes " "in %"PRIuS" objects", name_, start_inuse_bytes, start_inuse_allocs); } else { @@ -1623,7 +1612,7 @@ bool HeapLeakChecker::DoNoLeaks(ShouldSymbolize should_symbolize) { { // Heap activity in other threads is paused during this function // (i.e. until we got all profile difference info). - SpinLockHolder hl(&heap_checker_lock); + SpinLockHolder l(&heap_checker_lock); if (heap_checker_on == false) { if (name_ != NULL) { // leak checking enabled when created the checker RAW_LOG(WARNING, "Heap leak checker got turned off after checker " @@ -1660,8 +1649,6 @@ bool HeapLeakChecker::DoNoLeaks(ShouldSymbolize should_symbolize) { // Make the heap profile, other threads are locked out. HeapProfileTable::Snapshot* base = reinterpret_cast<HeapProfileTable::Snapshot*>(start_snapshot_); - RAW_DCHECK(FLAGS_heap_check_pointer_source_alignment > 0, ""); - pointer_source_alignment = FLAGS_heap_check_pointer_source_alignment; IgnoreAllLiveObjectsLocked(&a_local_var); leaks = heap_profile->NonLiveSnapshot(base); @@ -1681,28 +1668,23 @@ bool HeapLeakChecker::DoNoLeaks(ShouldSymbolize should_symbolize) { initial_allocs, Allocator::alloc_count()); } } else if (FLAGS_heap_check_test_pointer_alignment) { - if (pointer_source_alignment == 1) { - RAW_LOG(WARNING, "--heap_check_test_pointer_alignment has no effect: " - "--heap_check_pointer_source_alignment was already set to 1"); + // Try with reduced pointer aligment + pointer_source_alignment = 1; + IgnoreAllLiveObjectsLocked(&a_local_var); + HeapProfileTable::Snapshot* leaks_wo_align = + heap_profile->NonLiveSnapshot(base); + pointer_source_alignment = kPointerSourceAlignment; + if (leaks_wo_align->Empty()) { + RAW_LOG(WARNING, "Found no leaks without pointer alignment: " + "something might be placing pointers at " + "unaligned addresses! This needs to be fixed."); } else { - // Try with reduced pointer aligment - pointer_source_alignment = 1; - IgnoreAllLiveObjectsLocked(&a_local_var); - HeapProfileTable::Snapshot* leaks_wo_align = - heap_profile->NonLiveSnapshot(base); - pointer_source_alignment = FLAGS_heap_check_pointer_source_alignment; - if (leaks_wo_align->Empty()) { - RAW_LOG(WARNING, "Found no leaks without pointer alignment: " - "something might be placing pointers at " - "unaligned addresses! This needs to be fixed."); - } else { - RAW_LOG(INFO, "Found leaks without pointer alignment as well: " - "unaligned pointers must not be the cause of leaks."); - RAW_LOG(INFO, "--heap_check_test_pointer_alignment did not help " - "to diagnose the leaks."); - } - heap_profile->ReleaseSnapshot(leaks_wo_align); + RAW_LOG(INFO, "Found leaks without pointer alignment as well: " + "unaligned pointers must not be the cause of leaks."); + RAW_LOG(INFO, "--heap_check_test_pointer_alignment did not help " + "to diagnose the leaks."); } + heap_profile->ReleaseSnapshot(leaks_wo_align); } if (leaks != NULL) { @@ -1759,7 +1741,7 @@ bool HeapLeakChecker::DoNoLeaks(ShouldSymbolize should_symbolize) { SuggestPprofCommand(pprof_file); { - SpinLockHolder hl(&heap_checker_lock); + SpinLockHolder l(&heap_checker_lock); heap_profile->ReleaseSnapshot(leaks); Allocator::Free(pprof_file); } @@ -1892,7 +1874,6 @@ static bool internal_init_start_has_run = false; } // Set all flags - RAW_DCHECK(FLAGS_heap_check_pointer_source_alignment > 0, ""); if (FLAGS_heap_check == "minimal") { // The least we can check. FLAGS_heap_check_before_constructors = false; // from after main @@ -2062,7 +2043,7 @@ bool HeapLeakChecker::NoGlobalLeaks() { // we never delete or change main_heap_checker once it's set: HeapLeakChecker* main_hc = GlobalChecker(); if (main_hc) { - RAW_VLOG(10, "Checking for whole-program memory leaks"); + RAW_VLOG(1, "Checking for whole-program memory leaks"); // The program is over, so it's safe to symbolize addresses (which // requires a fork) because no serious work is expected to be done // after this. Symbolizing is really useful -- knowing what @@ -2184,7 +2165,7 @@ void HeapLeakChecker::BeforeConstructorsLocked() { RAW_CHECK(heap_profile == NULL, ""); heap_profile = new(Allocator::Allocate(sizeof(HeapProfileTable))) HeapProfileTable(&Allocator::Allocate, &Allocator::Free); - RAW_VLOG(10, "Starting tracking the heap"); + RAW_VLOG(1, "Starting tracking the heap"); heap_checker_on = true; } @@ -2348,7 +2329,7 @@ void HeapLeakChecker::DisableChecksFromToLocked(const void* start_address, value.start_address = AsInt(start_address); value.max_depth = max_depth; if (disabled_ranges->insert(make_pair(AsInt(end_address), value)).second) { - RAW_VLOG(10, "Disabling leak checking in stack traces " + RAW_VLOG(1, "Disabling leak checking in stack traces " "under frame addresses between %p..%p", start_address, end_address); } else { // check that this is just a verbatim repetition @@ -2371,7 +2352,7 @@ inline bool HeapLeakChecker::HaveOnHeapLocked(const void** ptr, const uintptr_t addr = AsInt(*ptr); if (heap_profile->FindInsideAlloc( *ptr, max_heap_object_size, ptr, object_size)) { - RAW_VLOG(16, "Got pointer into %p at +%"PRIuPTR" offset", + RAW_VLOG(7, "Got pointer into %p at +%"PRIuPTR" offset", *ptr, addr - AsInt(*ptr)); return true; } diff --git a/third_party/tcmalloc/chromium/src/heap-profile-table.cc b/third_party/tcmalloc/chromium/src/heap-profile-table.cc index ecaf75f..66e4f20 100644 --- a/third_party/tcmalloc/chromium/src/heap-profile-table.cc +++ b/third_party/tcmalloc/chromium/src/heap-profile-table.cc @@ -99,7 +99,7 @@ const char HeapProfileTable::kFileExt[] = ".heap"; //---------------------------------------------------------------------- static const int kHashTableSize = 179999; // Size for table_. -/*static*/ const int HeapProfileTable::kMaxStackDepth; +/*static*/ const int HeapProfileTable::kMaxStackDepth = 32; //---------------------------------------------------------------------- diff --git a/third_party/tcmalloc/chromium/src/heap-profile-table.h b/third_party/tcmalloc/chromium/src/heap-profile-table.h index c9bee15..5403257 100644 --- a/third_party/tcmalloc/chromium/src/heap-profile-table.h +++ b/third_party/tcmalloc/chromium/src/heap-profile-table.h @@ -52,8 +52,8 @@ class HeapProfileTable { // Extension to be used for heap pforile files. static const char kFileExt[]; - // Longest stack trace we record. - static const int kMaxStackDepth = 32; + // Longest stack trace we record. Defined in the .cc file. + static const int kMaxStackDepth; // data types ---------------------------- diff --git a/third_party/tcmalloc/chromium/src/heap-profiler.cc b/third_party/tcmalloc/chromium/src/heap-profiler.cc index 3055f4ce..a1c643a9 100644 --- a/third_party/tcmalloc/chromium/src/heap-profiler.cc +++ b/third_party/tcmalloc/chromium/src/heap-profiler.cc @@ -524,9 +524,9 @@ extern "C" void HeapProfilerStart(const char* prefix) { filename_prefix[prefix_length] = '\0'; } -extern "C" int IsHeapProfilerRunning() { +extern "C" bool IsHeapProfilerRunning() { SpinLockHolder l(&heap_lock); - return is_on ? 1 : 0; // return an int, because C code doesn't have bool + return is_on; } extern "C" void HeapProfilerStop() { diff --git a/third_party/tcmalloc/chromium/src/internal_logging.h b/third_party/tcmalloc/chromium/src/internal_logging.h index 731b2d9..0cb9ba2 100644 --- a/third_party/tcmalloc/chromium/src/internal_logging.h +++ b/third_party/tcmalloc/chromium/src/internal_logging.h @@ -119,9 +119,7 @@ do { \ #ifndef NDEBUG #define ASSERT(cond) CHECK_CONDITION(cond) #else -#define ASSERT(cond) \ - do { \ - } while (0 && (cond)) +#define ASSERT(cond) ((void) 0) #endif // Print into buffer diff --git a/third_party/tcmalloc/chromium/src/malloc_extension.cc b/third_party/tcmalloc/chromium/src/malloc_extension.cc index c2f8b54..4ce262f 100644 --- a/third_party/tcmalloc/chromium/src/malloc_extension.cc +++ b/third_party/tcmalloc/chromium/src/malloc_extension.cc @@ -187,10 +187,7 @@ MallocExtension* MallocExtension::instance() { void MallocExtension::Register(MallocExtension* implementation) { perftools_pthread_once(&module_init, InitModule); // When running under valgrind, our custom malloc is replaced with - // valgrind's one and malloc extensions will not work. (Note: - // callers should be responsible for checking that they are the - // malloc that is really being run, before calling Register. This - // is just here as an extra sanity check.) + // valgrind's one and malloc extensions will not work. if (!RunningOnValgrind()) { current_instance = implementation; } diff --git a/third_party/tcmalloc/chromium/src/malloc_hook.cc b/third_party/tcmalloc/chromium/src/malloc_hook.cc index 4315b86..2a7f542 100644 --- a/third_party/tcmalloc/chromium/src/malloc_hook.cc +++ b/third_party/tcmalloc/chromium/src/malloc_hook.cc @@ -326,8 +326,8 @@ extern "C" int MallocHook_GetCallerStackTrace(void** result, int max_depth, return 0; for (int i = 0; i < depth; ++i) { // stack[0] is our immediate caller if (InHookCaller(stack[i])) { - RAW_VLOG(10, "Found hooked allocator at %d: %p <- %p", - i, stack[i], stack[i+1]); + RAW_VLOG(4, "Found hooked allocator at %d: %p <- %p", + i, stack[i], stack[i+1]); i += 1; // skip hook caller frame depth -= i; // correct depth if (depth > max_depth) depth = max_depth; diff --git a/third_party/tcmalloc/chromium/src/memory_region_map.cc b/third_party/tcmalloc/chromium/src/memory_region_map.cc index f6bed45..05fdc06 100644 --- a/third_party/tcmalloc/chromium/src/memory_region_map.cc +++ b/third_party/tcmalloc/chromium/src/memory_region_map.cc @@ -181,7 +181,7 @@ static MemoryRegionMap::RegionSetRep regions_rep; static bool recursive_insert = false; void MemoryRegionMap::Init(int max_stack_depth) { - RAW_VLOG(10, "MemoryRegionMap Init"); + RAW_VLOG(2, "MemoryRegionMap Init"); RAW_CHECK(max_stack_depth >= 0, ""); // Make sure we don't overflow the memory in region stacks: RAW_CHECK(max_stack_depth <= kMaxStackDepth, @@ -192,7 +192,7 @@ void MemoryRegionMap::Init(int max_stack_depth) { if (client_count_ > 1) { // not first client: already did initialization-proper Unlock(); - RAW_VLOG(10, "MemoryRegionMap Init increment done"); + RAW_VLOG(2, "MemoryRegionMap Init increment done"); return; } // Set our hooks and make sure no other hooks existed: @@ -217,17 +217,17 @@ void MemoryRegionMap::Init(int max_stack_depth) { // recursive_insert = false; as InsertRegionLocked will also construct // regions_ on demand for us. Unlock(); - RAW_VLOG(10, "MemoryRegionMap Init done"); + RAW_VLOG(2, "MemoryRegionMap Init done"); } bool MemoryRegionMap::Shutdown() { - RAW_VLOG(10, "MemoryRegionMap Shutdown"); + RAW_VLOG(2, "MemoryRegionMap Shutdown"); Lock(); RAW_CHECK(client_count_ > 0, ""); client_count_ -= 1; if (client_count_ != 0) { // not last client; need not really shutdown Unlock(); - RAW_VLOG(10, "MemoryRegionMap Shutdown decrement done"); + RAW_VLOG(2, "MemoryRegionMap Shutdown decrement done"); return true; } CheckMallocHooks(); // we assume no other hooks @@ -244,7 +244,7 @@ bool MemoryRegionMap::Shutdown() { RAW_LOG(WARNING, "Can't delete LowLevelAlloc arena: it's being used"); } Unlock(); - RAW_VLOG(10, "MemoryRegionMap Shutdown done"); + RAW_VLOG(2, "MemoryRegionMap Shutdown done"); return deleted_arena; } @@ -336,7 +336,7 @@ bool MemoryRegionMap::FindAndMarkStackRegion(uintptr_t stack_top, Lock(); const Region* region = DoFindRegionLocked(stack_top); if (region != NULL) { - RAW_VLOG(10, "Stack at %p is inside region %p..%p", + RAW_VLOG(2, "Stack at %p is inside region %p..%p", reinterpret_cast<void*>(stack_top), reinterpret_cast<void*>(region->start_addr), reinterpret_cast<void*>(region->end_addr)); @@ -361,7 +361,7 @@ MemoryRegionMap::RegionIterator MemoryRegionMap::EndRegionLocked() { } inline void MemoryRegionMap::DoInsertRegionLocked(const Region& region) { - RAW_VLOG(12, "Inserting region %p..%p from %p", + RAW_VLOG(4, "Inserting region %p..%p from %p", reinterpret_cast<void*>(region.start_addr), reinterpret_cast<void*>(region.end_addr), reinterpret_cast<void*>(region.caller())); @@ -385,10 +385,10 @@ inline void MemoryRegionMap::DoInsertRegionLocked(const Region& region) { // This inserts and allocates permanent storage for region // and its call stack data: it's safe to do it now: regions_->insert(region); - RAW_VLOG(12, "Inserted region %p..%p :", + RAW_VLOG(4, "Inserted region %p..%p :", reinterpret_cast<void*>(region.start_addr), reinterpret_cast<void*>(region.end_addr)); - if (VLOG_IS_ON(12)) LogAllLocked(); + if (VLOG_IS_ON(4)) LogAllLocked(); } // These variables are local to MemoryRegionMap::InsertRegionLocked() @@ -425,7 +425,7 @@ inline void MemoryRegionMap::InsertRegionLocked(const Region& region) { // and taken into account when the recursion unwinds. // Do the insert: if (recursive_insert) { // recursion: save in saved_regions - RAW_VLOG(12, "Saving recursive insert of region %p..%p from %p", + RAW_VLOG(4, "Saving recursive insert of region %p..%p from %p", reinterpret_cast<void*>(region.start_addr), reinterpret_cast<void*>(region.end_addr), reinterpret_cast<void*>(region.caller())); @@ -436,7 +436,7 @@ inline void MemoryRegionMap::InsertRegionLocked(const Region& region) { saved_regions[saved_regions_count++] = region; } else { // not a recusrive call if (regions_ == NULL) { // init regions_ - RAW_VLOG(12, "Initializing region set"); + RAW_VLOG(4, "Initializing region set"); regions_ = regions_rep.region_set(); recursive_insert = true; new(regions_) RegionSet(); @@ -470,7 +470,7 @@ void MemoryRegionMap::RecordRegionAddition(const void* start, size_t size) { max_stack_depth_, kStripFrames + 1) : 0; region.set_call_stack_depth(depth); // record stack info fully - RAW_VLOG(10, "New global region %p..%p from %p", + RAW_VLOG(2, "New global region %p..%p from %p", reinterpret_cast<void*>(region.start_addr), reinterpret_cast<void*>(region.end_addr), reinterpret_cast<void*>(region.caller())); @@ -499,7 +499,7 @@ void MemoryRegionMap::RecordRegionRemoval(const void* start, size_t size) { // An exact match, so it's safe to remove. --saved_regions_count; --put_pos; - RAW_VLOG(10, ("Insta-Removing saved region %p..%p; " + RAW_VLOG(2, ("Insta-Removing saved region %p..%p; " "now have %d saved regions"), reinterpret_cast<void*>(start_addr), reinterpret_cast<void*>(end_addr), @@ -523,7 +523,7 @@ void MemoryRegionMap::RecordRegionRemoval(const void* start, size_t size) { uintptr_t start_addr = reinterpret_cast<uintptr_t>(start); uintptr_t end_addr = start_addr + size; // subtract start_addr, end_addr from all the regions - RAW_VLOG(10, "Removing global region %p..%p; have %"PRIuS" regions", + RAW_VLOG(2, "Removing global region %p..%p; have %"PRIuS" regions", reinterpret_cast<void*>(start_addr), reinterpret_cast<void*>(end_addr), regions_->size()); @@ -533,12 +533,12 @@ void MemoryRegionMap::RecordRegionRemoval(const void* start, size_t size) { for (RegionSet::iterator region = regions_->lower_bound(sample); region != regions_->end() && region->start_addr < end_addr; /*noop*/) { - RAW_VLOG(13, "Looking at region %p..%p", + RAW_VLOG(5, "Looking at region %p..%p", reinterpret_cast<void*>(region->start_addr), reinterpret_cast<void*>(region->end_addr)); if (start_addr <= region->start_addr && region->end_addr <= end_addr) { // full deletion - RAW_VLOG(12, "Deleting region %p..%p", + RAW_VLOG(4, "Deleting region %p..%p", reinterpret_cast<void*>(region->start_addr), reinterpret_cast<void*>(region->end_addr)); RegionSet::iterator d = region; @@ -547,7 +547,7 @@ void MemoryRegionMap::RecordRegionRemoval(const void* start, size_t size) { continue; } else if (region->start_addr < start_addr && end_addr < region->end_addr) { // cutting-out split - RAW_VLOG(12, "Splitting region %p..%p in two", + RAW_VLOG(4, "Splitting region %p..%p in two", reinterpret_cast<void*>(region->start_addr), reinterpret_cast<void*>(region->end_addr)); // Make another region for the start portion: @@ -560,13 +560,13 @@ void MemoryRegionMap::RecordRegionRemoval(const void* start, size_t size) { const_cast<Region&>(*region).set_start_addr(end_addr); } else if (end_addr > region->start_addr && start_addr <= region->start_addr) { // cut from start - RAW_VLOG(12, "Start-chopping region %p..%p", + RAW_VLOG(4, "Start-chopping region %p..%p", reinterpret_cast<void*>(region->start_addr), reinterpret_cast<void*>(region->end_addr)); const_cast<Region&>(*region).set_start_addr(end_addr); } else if (start_addr > region->start_addr && start_addr < region->end_addr) { // cut from end - RAW_VLOG(12, "End-chopping region %p..%p", + RAW_VLOG(4, "End-chopping region %p..%p", reinterpret_cast<void*>(region->start_addr), reinterpret_cast<void*>(region->end_addr)); // Can't just modify region->end_addr (it's the sorting key): @@ -582,11 +582,11 @@ void MemoryRegionMap::RecordRegionRemoval(const void* start, size_t size) { } ++region; } - RAW_VLOG(12, "Removed region %p..%p; have %"PRIuS" regions", + RAW_VLOG(4, "Removed region %p..%p; have %"PRIuS" regions", reinterpret_cast<void*>(start_addr), reinterpret_cast<void*>(end_addr), regions_->size()); - if (VLOG_IS_ON(12)) LogAllLocked(); + if (VLOG_IS_ON(4)) LogAllLocked(); Unlock(); } @@ -596,7 +596,7 @@ void MemoryRegionMap::MmapHook(const void* result, int fd, off_t offset) { // TODO(maxim): replace all 0x%"PRIxS" by %p when RAW_VLOG uses a safe // snprintf reimplementation that does not malloc to pretty-print NULL - RAW_VLOG(10, "MMap = 0x%"PRIxPTR" of %"PRIuS" at %llu " + RAW_VLOG(2, "MMap = 0x%"PRIxPTR" of %"PRIuS" at %llu " "prot %d flags %d fd %d offs %lld", reinterpret_cast<uintptr_t>(result), size, reinterpret_cast<uint64>(start), prot, flags, fd, @@ -607,7 +607,7 @@ void MemoryRegionMap::MmapHook(const void* result, } void MemoryRegionMap::MunmapHook(const void* ptr, size_t size) { - RAW_VLOG(10, "MUnmap of %p %"PRIuS"", ptr, size); + RAW_VLOG(2, "MUnmap of %p %"PRIuS"", ptr, size); if (size != 0) { RecordRegionRemoval(ptr, size); } @@ -617,7 +617,7 @@ void MemoryRegionMap::MremapHook(const void* result, const void* old_addr, size_t old_size, size_t new_size, int flags, const void* new_addr) { - RAW_VLOG(10, "MRemap = 0x%"PRIxPTR" of 0x%"PRIxPTR" %"PRIuS" " + RAW_VLOG(2, "MRemap = 0x%"PRIxPTR" of 0x%"PRIxPTR" %"PRIuS" " "to %"PRIuS" flags %d new_addr=0x%"PRIxPTR, (uintptr_t)result, (uintptr_t)old_addr, old_size, new_size, flags, @@ -631,7 +631,7 @@ void MemoryRegionMap::MremapHook(const void* result, extern "C" void* __sbrk(ptrdiff_t increment); // defined in libc void MemoryRegionMap::SbrkHook(const void* result, ptrdiff_t increment) { - RAW_VLOG(10, "Sbrk = 0x%"PRIxPTR" of %"PRIdS"", (uintptr_t)result, increment); + RAW_VLOG(2, "Sbrk = 0x%"PRIxPTR" of %"PRIdS"", (uintptr_t)result, increment); if (result != reinterpret_cast<void*>(-1)) { if (increment > 0) { void* new_end = sbrk(0); diff --git a/third_party/tcmalloc/chromium/src/page_heap.cc b/third_party/tcmalloc/chromium/src/page_heap.cc index a256b64..31130e9 100644 --- a/third_party/tcmalloc/chromium/src/page_heap.cc +++ b/third_party/tcmalloc/chromium/src/page_heap.cc @@ -61,65 +61,50 @@ PageHeap::PageHeap() } } -// Returns the minimum number of pages necessary to ensure that an -// allocation of size n can be aligned to the given alignment. -static Length AlignedAllocationSize(Length n, size_t alignment) { - ASSERT(alignment >= kPageSize); - return n + tcmalloc::pages(alignment - kPageSize); -} - -Span* PageHeap::New(Length n, size_t sc, size_t align) { +Span* PageHeap::New(Length n) { ASSERT(Check()); ASSERT(n > 0); - if (align < kPageSize) { - align = kPageSize; - } - - Length aligned_size = AlignedAllocationSize(n, align); - // Find first size >= n that has a non-empty list - for (Length s = aligned_size; s < kMaxPages; s++) { + for (Length s = n; s < kMaxPages; s++) { Span* ll = &free_[s].normal; // If we're lucky, ll is non-empty, meaning it has a suitable span. if (!DLL_IsEmpty(ll)) { ASSERT(ll->next->location == Span::ON_NORMAL_FREELIST); - return Carve(ll->next, n, sc, align); + return Carve(ll->next, n); } // Alternatively, maybe there's a usable returned span. ll = &free_[s].returned; if (!DLL_IsEmpty(ll)) { ASSERT(ll->next->location == Span::ON_RETURNED_FREELIST); - return Carve(ll->next, n, sc, align); + return Carve(ll->next, n); } // Still no luck, so keep looking in larger classes. } - Span* result = AllocLarge(n, sc, align); + Span* result = AllocLarge(n); if (result != NULL) return result; // Grow the heap and try again - if (!GrowHeap(aligned_size)) { + if (!GrowHeap(n)) { ASSERT(stats_.unmapped_bytes+ stats_.committed_bytes==stats_.system_bytes); ASSERT(Check()); return NULL; } - return AllocLarge(n, sc, align); + return AllocLarge(n); } -Span* PageHeap::AllocLarge(Length n, size_t sc, size_t align) { - // Find the best span (closest to n in size). +Span* PageHeap::AllocLarge(Length n) { + // find the best span (closest to n in size). // The following loops implements address-ordered best-fit. Span *best = NULL; - Length aligned_size = AlignedAllocationSize(n, align); - // Search through normal list for (Span* span = large_.normal.next; span != &large_.normal; span = span->next) { - if (span->length >= aligned_size) { + if (span->length >= n) { if ((best == NULL) || (span->length < best->length) || ((span->length == best->length) && (span->start < best->start))) { @@ -133,7 +118,7 @@ Span* PageHeap::AllocLarge(Length n, size_t sc, size_t align) { for (Span* span = large_.returned.next; span != &large_.returned; span = span->next) { - if (span->length >= aligned_size) { + if (span->length >= n) { if ((best == NULL) || (span->length < best->length) || ((span->length == best->length) && (span->start < best->start))) { @@ -143,18 +128,19 @@ Span* PageHeap::AllocLarge(Length n, size_t sc, size_t align) { } } - return best == NULL ? NULL : Carve(best, n, sc, align); + return best == NULL ? NULL : Carve(best, n); } Span* PageHeap::Split(Span* span, Length n) { ASSERT(0 < n); ASSERT(n < span->length); - ASSERT((span->location != Span::IN_USE) || span->sizeclass == 0); + ASSERT(span->location == Span::IN_USE); + ASSERT(span->sizeclass == 0); Event(span, 'T', n); const int extra = span->length - n; Span* leftover = NewSpan(span->start + n, extra); - leftover->location = span->location; + ASSERT(leftover->location == Span::IN_USE); Event(leftover, 'U', extra); RecordSpan(leftover); pagemap_.set(span->start + n - 1, span); // Update map from pageid to span @@ -175,71 +161,43 @@ void PageHeap::DecommitSpan(Span* span) { stats_.committed_bytes -= span->length << kPageShift; } -Span* PageHeap::Carve(Span* span, Length n, size_t sc, size_t align) { +Span* PageHeap::Carve(Span* span, Length n) { ASSERT(n > 0); ASSERT(span->location != Span::IN_USE); - ASSERT(align >= kPageSize); const int old_location = span->location; - - Length align_pages = align >> kPageShift; RemoveFromFreeList(span); - - if (span->start & (align_pages - 1)) { - Length skip_for_alignment = align_pages - (span->start & (align_pages - 1)); - Span* aligned = Split(span, skip_for_alignment); - - // The next span of |span| was just splitted -- no need to - // coalesce them. The previous span of |span| was not previously coalesced - // with |span|, i.e. is NULL or has location other than |old_location|. - const PageID p = span->start; - const Length n = span->length; - Span* prev = GetDescriptor(p-1); - ASSERT(prev == NULL || - prev->location == Span::IN_USE || - prev->location != old_location); - PrependToFreeList(span); // Skip coalescing - no candidates possible - span = aligned; - } + span->location = Span::IN_USE; + Event(span, 'A', n); const int extra = span->length - n; ASSERT(extra >= 0); if (extra > 0) { - Span* leftover = Split(span, n); + Span* leftover = NewSpan(span->start + n, extra); + leftover->location = old_location; + Event(leftover, 'S', extra); + RecordSpan(leftover); + // The previous span of |leftover| was just splitted -- no need to // coalesce them. The next span of |leftover| was not previously coalesced - // with |span|, i.e. is NULL or has location other than |old_location|. + // with |span|, i.e. is NULL or has got location other than |old_location|. const PageID p = leftover->start; const Length len = leftover->length; Span* next = GetDescriptor(p+len); ASSERT (next == NULL || next->location == Span::IN_USE || next->location != leftover->location); - PrependToFreeList(leftover); - } - + PrependToFreeList(leftover); // Skip coalescing - no candidates possible + span->length = n; + pagemap_.set(span->start + n - 1, span); + } ASSERT(Check()); if (old_location == Span::ON_RETURNED_FREELIST) { // We need to recommit this address space. CommitSpan(span); } - - span->location = Span::IN_USE; - span->sizeclass = sc; - Event(span, 'A', n); - - // Cache sizeclass info eagerly. Locking is not necessary. - // (Instead of being eager, we could just replace any stale info - // about this span, but that seems to be no better in practice.) - CacheSizeClass(span->start, sc); - - if (sc != kLargeSizeClass) { - for (Length i = 1; i < n; i++) { - pagemap_.set(span->start + i, span); - CacheSizeClass(span->start + i, sc); - } - } - + ASSERT(span->location == Span::IN_USE); + ASSERT(span->length == n); ASSERT(stats_.unmapped_bytes+ stats_.committed_bytes==stats_.system_bytes); return span; } @@ -421,6 +379,18 @@ Length PageHeap::ReleaseAtLeastNPages(Length num_pages) { return released_pages; } +void PageHeap::RegisterSizeClass(Span* span, size_t sc) { + // Associate span object with all interior pages as well + ASSERT(span->location == Span::IN_USE); + ASSERT(GetDescriptor(span->start) == span); + ASSERT(GetDescriptor(span->start+span->length-1) == span); + Event(span, 'C', sc); + span->sizeclass = sc; + for (Length i = 1; i < span->length-1; i++) { + pagemap_.set(span->start+i, span); + } +} + static double MB(uint64_t bytes) { return bytes / 1048576.0; } diff --git a/third_party/tcmalloc/chromium/src/page_heap.h b/third_party/tcmalloc/chromium/src/page_heap.h index 63f21b2..52acedb 100644 --- a/third_party/tcmalloc/chromium/src/page_heap.h +++ b/third_party/tcmalloc/chromium/src/page_heap.h @@ -101,49 +101,21 @@ class PERFTOOLS_DLL_DECL PageHeap { public: PageHeap(); - // Allocate a run of "n" pages. Returns NULL if out of memory. - // Caller should not pass "n == 0" -- instead, n should have been - // rounded up already. The span will be used for allocating objects - // with the specifled sizeclass sc (sc must be zero for large - // objects). The first page of the span will be aligned to the value - // specified by align, which must be a power of two. - Span* New(Length n, size_t sc, size_t align); + // Allocate a run of "n" pages. Returns zero if out of memory. + // Caller should not pass "n == 0" -- instead, n should have + // been rounded up already. + Span* New(Length n); // Delete the span "[p, p+n-1]". // REQUIRES: span was returned by earlier call to New() and // has not yet been deleted. void Delete(Span* span); - // Gets either the size class of addr, if it is a small object, or it's span. - // Return: - // if addr is invalid: - // leave *out_sc and *out_span unchanged and return false; - // if addr is valid and has a small size class: - // *out_sc = the size class - // *out_span = <undefined> - // return true - // if addr is valid and has a large size class: - // *out_sc = kLargeSizeClass - // *out_span = the span pointer - // return true - bool GetSizeClassOrSpan(void* addr, size_t* out_sc, Span** out_span) { - const PageID p = reinterpret_cast<uintptr_t>(addr) >> kPageShift; - size_t cl = GetSizeClassIfCached(p); - Span* span = NULL; - - if (cl != kLargeSizeClass) { - ASSERT(cl == GetDescriptor(p)->sizeclass); - } else { - span = GetDescriptor(p); - if (!span) { - return false; - } - cl = span->sizeclass; - } - *out_span = span; - *out_sc = cl; - return true; - } + // Mark an allocated span as being used for small objects of the + // specified size-class. + // REQUIRES: span was returned by an earlier call to New() + // and has not yet been deleted. + void RegisterSizeClass(Span* span, size_t sc); // Split an allocated span into two spans: one of length "n" pages // followed by another span of length "span->length - n" pages. @@ -151,29 +123,14 @@ class PERFTOOLS_DLL_DECL PageHeap { // Returns a pointer to the second span. // // REQUIRES: "0 < n < span->length" - // REQUIRES: a) the span is free or b) sizeclass == 0 + // REQUIRES: span->location == IN_USE + // REQUIRES: span->sizeclass == 0 Span* Split(Span* span, Length n); // Return the descriptor for the specified page. Returns NULL if // this PageID was not allocated previously. inline Span* GetDescriptor(PageID p) const { - Span* ret = reinterpret_cast<Span*>(pagemap_.get(p)); -#ifndef NDEBUG - if (ret != NULL && ret->location == Span::IN_USE) { - size_t cl = GetSizeClassIfCached(p); - // Three cases: - // - The object is not cached - // - The object is cached correctly - // - It is a large object and we're not looking at the first - // page. This happens in coalescing. - ASSERT(cl == kLargeSizeClass || cl == ret->sizeclass || - (ret->start != p && ret->sizeclass == kLargeSizeClass)); - // If the object is sampled, it must have be kLargeSizeClass - ASSERT(ret->sizeclass == kLargeSizeClass || !ret->sample); - } -#endif - - return ret; + return reinterpret_cast<Span*>(pagemap_.get(p)); } // Dump state to stderr @@ -277,7 +234,7 @@ class PERFTOOLS_DLL_DECL PageHeap { // length exactly "n" and mark it as non-free so it can be returned // to the client. After all that, decrease free_pages_ by n and // return span. - Span* Carve(Span* span, Length n, size_t sc, size_t align); + Span* Carve(Span* span, Length n); void RecordSpan(Span* span) { pagemap_.set(span->start, span); @@ -288,7 +245,7 @@ class PERFTOOLS_DLL_DECL PageHeap { // Allocate a large span of length == n. If successful, returns a // span of exactly the specified length. Else, returns NULL. - Span* AllocLarge(Length n, size_t sc, size_t align); + Span* AllocLarge(Length n); // Coalesce span with neighboring spans if possible, prepend to // appropriate free list, and adjust stats. diff --git a/third_party/tcmalloc/chromium/src/page_heap_allocator.h b/third_party/tcmalloc/chromium/src/page_heap_allocator.h index 3f75939..20e1ab1 100644 --- a/third_party/tcmalloc/chromium/src/page_heap_allocator.h +++ b/third_party/tcmalloc/chromium/src/page_heap_allocator.h @@ -44,7 +44,7 @@ class PageHeapAllocator { // allocated and their constructors might not have run by the time some // other static variable tries to allocate memory. void Init() { - ASSERT(sizeof(T) <= kAllocIncrement); + ASSERT(kAlignedSize <= kAllocIncrement); inuse_ = 0; free_area_ = NULL; free_avail_ = 0; @@ -60,9 +60,8 @@ class PageHeapAllocator { result = free_list_; free_list_ = *(reinterpret_cast<void**>(result)); } else { - if (free_avail_ < sizeof(T)) { - // Need more room. We assume that MetaDataAlloc returns - // suitably aligned memory. + if (free_avail_ < kAlignedSize) { + // Need more room free_area_ = reinterpret_cast<char*>(MetaDataAlloc(kAllocIncrement)); if (free_area_ == NULL) { CRASH("FATAL ERROR: Out of memory trying to allocate internal " @@ -72,8 +71,8 @@ class PageHeapAllocator { free_avail_ = kAllocIncrement; } result = free_area_; - free_area_ += sizeof(T); - free_avail_ -= sizeof(T); + free_area_ += kAlignedSize; + free_avail_ -= kAlignedSize; } inuse_++; return reinterpret_cast<T*>(result); @@ -91,6 +90,10 @@ class PageHeapAllocator { // How much to allocate from system at a time static const int kAllocIncrement = 128 << 10; + // Aligned size of T + static const size_t kAlignedSize + = (((sizeof(T) + kAlignment - 1) / kAlignment) * kAlignment); + // Free area from which to carve new objects char* free_area_; size_t free_avail_; diff --git a/third_party/tcmalloc/chromium/src/pprof b/third_party/tcmalloc/chromium/src/pprof index 8aff380..fec0c9e 100755 --- a/third_party/tcmalloc/chromium/src/pprof +++ b/third_party/tcmalloc/chromium/src/pprof @@ -89,10 +89,11 @@ my %obj_tool_map = ( ); my $DOT = "dot"; # leave non-absolute, since it may be in /usr/local my $GV = "gv"; -my $KCACHEGRIND = "kcachegrind"; my $PS2PDF = "ps2pdf"; # These are used for dynamic profiles -my $URL_FETCHER = "curl -s"; +my $WGET = "wget"; +my $WGET_FLAGS = "--no-http-keep-alive"; # only supported by some wgets +my $CURL = "curl"; # These are the web pages that servers need to support for dynamic profiles my $HEAP_PAGE = "/pprof/heap"; @@ -106,12 +107,6 @@ my $FILTEREDPROFILE_PAGE = "/pprof/filteredprofile(?:\\?.*)?"; my $SYMBOL_PAGE = "/pprof/symbol"; # must support symbol lookup via POST my $PROGRAM_NAME_PAGE = "/pprof/cmdline"; -# These are the web pages that can be named on the command line. -# All the alternatives must begin with /. -my $PROFILES = "($HEAP_PAGE|$PROFILE_PAGE|$PMUPROFILE_PAGE|" . - "$GROWTH_PAGE|$CONTENTION_PAGE|$WALL_PAGE|" . - "$FILTEREDPROFILE_PAGE)"; - # default binary name my $UNKNOWN_BINARY = "(unknown)"; @@ -180,14 +175,12 @@ Output type: --text Generate text report --callgrind Generate callgrind format to stdout --gv Generate Postscript and display - --web Generate SVG and display --list=<regexp> Generate source listing of matching routines --disasm=<regexp> Generate disassembly of matching routines --symbols Print demangled symbol names found at given addresses --dot Generate DOT file to stdout --ps Generate Postcript to stdout --pdf Generate PDF to stdout - --svg Generate SVG to stdout --gif Generate GIF to stdout --raw Generate symbolized pprof data (useful with remote fetch) @@ -230,8 +223,6 @@ pprof /bin/ls ls.prof Enters "interactive" mode pprof --text /bin/ls ls.prof Outputs one line per procedure -pprof --web /bin/ls ls.prof - Displays annotated call-graph in web browser pprof --gv /bin/ls ls.prof Displays annotated call-graph via 'gv' pprof --gv --focus=Mutex /bin/ls ls.prof @@ -242,9 +233,6 @@ pprof --list=getdir /bin/ls ls.prof (Per-line) annotated source listing for getdir() pprof --disasm=getdir /bin/ls ls.prof (Per-PC) annotated disassembly for getdir() - -pprof http://localhost:1234/ - Enters "interactive" mode pprof --text localhost:1234 Outputs one line per procedure for localhost:1234 pprof --raw localhost:1234 > ./local.raw @@ -304,12 +292,10 @@ sub Init() { $main::opt_disasm = ""; $main::opt_symbols = 0; $main::opt_gv = 0; - $main::opt_web = 0; $main::opt_dot = 0; $main::opt_ps = 0; $main::opt_pdf = 0; $main::opt_gif = 0; - $main::opt_svg = 0; $main::opt_raw = 0; $main::opt_nodecount = 80; @@ -344,16 +330,13 @@ sub Init() { # Are we using $SYMBOL_PAGE? $main::use_symbol_page = 0; - # Files returned by TempName. - %main::tempnames = (); - # Type of profile we are dealing with # Supported types: - # cpu - # heap - # growth - # contention - $main::profile_type = ''; # Empty type means "unknown" + # cpu + # heap + # growth + # contention + $main::profile_type = ''; # Empty type means "unknown" GetOptions("help!" => \$main::opt_help, "version!" => \$main::opt_version, @@ -372,11 +355,9 @@ sub Init() { "disasm=s" => \$main::opt_disasm, "symbols!" => \$main::opt_symbols, "gv!" => \$main::opt_gv, - "web!" => \$main::opt_web, "dot!" => \$main::opt_dot, "ps!" => \$main::opt_ps, "pdf!" => \$main::opt_pdf, - "svg!" => \$main::opt_svg, "gif!" => \$main::opt_gif, "raw!" => \$main::opt_raw, "interactive!" => \$main::opt_interactive, @@ -399,8 +380,8 @@ sub Init() { "tools=s" => \$main::opt_tools, "test!" => \$main::opt_test, "debug!" => \$main::opt_debug, - # Undocumented flags used only by unittests: - "test_stride=i" => \$main::opt_test_stride, + # Undocumented flags used only by unittests: + "test_stride=i" => \$main::opt_test_stride, ) || usage("Invalid option(s)"); # Deal with the standard --help and --version @@ -452,11 +433,9 @@ sub Init() { ($main::opt_disasm eq '' ? 0 : 1) + ($main::opt_symbols == 0 ? 0 : 1) + $main::opt_gv + - $main::opt_web + $main::opt_dot + $main::opt_ps + $main::opt_pdf + - $main::opt_svg + $main::opt_gif + $main::opt_raw + $main::opt_interactive + @@ -531,6 +510,20 @@ sub Init() { ConfigureObjTools($main::prog) } + # Check what flags our commandline utilities support + if (open(TFILE, "$WGET $WGET_FLAGS -V 2>&1 |")) { + my @lines = <TFILE>; + if (grep(/unrecognized/, @lines) > 0) { + # grep found 'unrecognized' token from WGET, clear WGET flags + $WGET_FLAGS = ""; + } + close(TFILE); + } + # TODO(csilvers): check all the other binaries and objtools to see + # if they are installed and what flags they support, and store that + # in a data structure here, rather than scattering these tests about. + # Then, ideally, rewrite code to use wget OR curl OR GET or ... + # Break the opt_list_prefix into the prefix_list array @prefix_list = split (',', $main::opt_lib_prefix); @@ -641,24 +634,9 @@ sub Main() { } else { if (PrintDot($main::prog, $symbols, $profile, $flat, $cumulative, $total)) { if ($main::opt_gv) { - RunGV(TempName($main::next_tmpfile, "ps"), ""); - } elsif ($main::opt_web) { - my $tmp = TempName($main::next_tmpfile, "svg"); - RunWeb($tmp); - # The command we run might hand the file name off - # to an already running browser instance and then exit. - # Normally, we'd remove $tmp on exit (right now), - # but fork a child to remove $tmp a little later, so that the - # browser has time to load it first. - delete $main::tempnames{$tmp}; - if (fork() == 0) { - sleep 5; - unlink($tmp); - exit(0); - } + RunGV(PsTempName($main::next_tmpfile), ""); } } else { - cleanup(); exit(1); } } @@ -689,7 +667,7 @@ sub ReadlineMightFail { sub RunGV { my $fname = shift; - my $bg = shift; # "" or " &" if we should run in background + my $bg = shift; # "" or " &" if we should run in background if (!system("$GV --version >/dev/null 2>&1")) { # Options using double dash are supported by this gv version. # Also, turn on noantialias to better handle bug in gv for @@ -704,41 +682,6 @@ sub RunGV { } } -sub RunWeb { - my $fname = shift; - print STDERR "Loading web page file:///$fname\n"; - - if (`uname` =~ /Darwin/) { - # OS X: open will use standard preference for SVG files. - system("/usr/bin/open", $fname); - return; - } - - # Some kind of Unix; try generic symlinks, then specific browsers. - # (Stop once we find one.) - # Works best if the browser is already running. - my @alt = ( - "/etc/alternatives/gnome-www-browser", - "/etc/alternatives/x-www-browser", - "google-chrome", - "firefox", - ); - foreach my $b (@alt) { - if (system($b, $fname) == 0) { - return; - } - } - - print STDERR "Could not load web browser.\n"; -} - -sub RunKcachegrind { - my $fname = shift; - my $bg = shift; # "" or " &" if we should run in background - print STDERR "Starting '$KCACHEGRIND " . $fname . $bg . "'\n"; - system("$KCACHEGRIND " . $fname . $bg); -} - ##### Interactive helper routines ##### @@ -746,11 +689,10 @@ sub InteractiveMode { $| = 1; # Make output unbuffered for interactive mode my ($orig_profile, $symbols, $libs, $total) = @_; - print STDERR "Welcome to pprof! For help, type 'help'.\n"; + print "Welcome to pprof! For help, type 'help'.\n"; - # Use ReadLine if it's installed and input comes from a console. - if ( -t STDIN && - !ReadlineMightFail() && + # Use ReadLine if it's installed. + if ( !ReadlineMightFail() && defined(eval {require Term::ReadLine}) ) { my $term = new Term::ReadLine 'pprof'; while ( defined ($_ = $term->readline('(pprof) '))) { @@ -761,7 +703,7 @@ sub InteractiveMode { } } else { # don't have readline while (1) { - print STDERR "(pprof) "; + print "(pprof) "; $_ = <STDIN>; last if ! defined $_ ; s/\r//g; # turn windows-looking lines into unix-looking lines @@ -785,13 +727,13 @@ sub InteractiveCommand { my($orig_profile, $symbols, $libs, $total, $command) = @_; $_ = $command; # just to make future m//'s easier if (!defined($_)) { - print STDERR "\n"; + print "\n"; return 0; } - if (m/^\s*quit/) { + if (m/^ *quit/) { return 0; } - if (m/^\s*help/) { + if (m/^ *help/) { InteractiveHelpMessage(); return 1; } @@ -803,7 +745,7 @@ sub InteractiveCommand { $main::opt_gv = 0; $main::opt_cum = 0; - if (m/^\s*(text|top)(\d*)\s*(.*)/) { + if (m/^ *(text|top)(\d*) *(.*)/) { $main::opt_text = 1; my $line_limit = ($2 ne "") ? int($2) : 10; @@ -822,24 +764,7 @@ sub InteractiveCommand { PrintText($symbols, $flat, $cumulative, $total, $line_limit); return 1; } - if (m/^\s*callgrind\s*([^ \n]*)/) { - $main::opt_callgrind = 1; - - # Get derived profiles - my $calls = ExtractCalls($symbols, $orig_profile); - my $filename = $1; - if ( $1 eq '' ) { - $filename = TempName($main::next_tmpfile, "callgrind"); - } - PrintCallgrind($calls, $filename); - if ( $1 eq '' ) { - RunKcachegrind($filename, " & "); - $main::next_tmpfile++; - } - - return 1; - } - if (m/^\s*list\s*(.+)/) { + if (m/^ *list *(.+)/) { $main::opt_list = 1; my $routine; @@ -856,7 +781,7 @@ sub InteractiveCommand { PrintListing($libs, $flat, $cumulative, $routine); return 1; } - if (m/^\s*disasm\s*(.+)/) { + if (m/^ *disasm *(.+)/) { $main::opt_disasm = 1; my $routine; @@ -874,18 +799,12 @@ sub InteractiveCommand { PrintDisassembly($libs, $flat, $cumulative, $routine, $total); return 1; } - if (m/^\s*(gv|web)\s*(.*)/) { - $main::opt_gv = 0; - $main::opt_web = 0; - if ($1 eq "gv") { - $main::opt_gv = 1; - } elsif ($1 eq "web") { - $main::opt_web = 1; - } + if (m/^ *gv *(.*)/) { + $main::opt_gv = 1; my $focus; my $ignore; - ($focus, $ignore) = ParseInteractiveArgs($2); + ($focus, $ignore) = ParseInteractiveArgs($1); # Process current profile to account for various settings my $profile = ProcessProfile($orig_profile, $symbols, $focus, $ignore); @@ -896,19 +815,11 @@ sub InteractiveCommand { my $cumulative = CumulativeProfile($reduced); if (PrintDot($main::prog, $symbols, $profile, $flat, $cumulative, $total)) { - if ($main::opt_gv) { - RunGV(TempName($main::next_tmpfile, "ps"), " &"); - } elsif ($main::opt_web) { - RunWeb(TempName($main::next_tmpfile, "svg")); - } + RunGV(PsTempName($main::next_tmpfile), " &"); $main::next_tmpfile++; } return 1; } - if (m/^\s*$/) { - return 1; - } - print STDERR "Unknown command: try 'help'.\n"; return 1; } @@ -945,7 +856,7 @@ sub ProcessProfile { } sub InteractiveHelpMessage { - print STDERR <<ENDOFHELP; + print <<ENDOFHELP; Interactive pprof mode Commands: @@ -957,14 +868,6 @@ Commands: the "focus" regular expression matches a routine name on the stack trace. - web - web [focus] [-ignore1] [-ignore2] - Like GV, but displays profile in your web browser instead of using - Ghostview. Works best if your web browser is already running. - To change the browser that gets used: - On Linux, set the /etc/alternatives/gnome-www-browser symlink. - On OS X, change the Finder association for SVG files. - list [routine_regexp] [-ignore1] [-ignore2] Show source listing of routines whose names match "routine_regexp" @@ -979,10 +882,6 @@ Commands: Show disassembly of routines whose names match "routine_regexp", annotated with sample counts. - callgrind - callgrind [filename] - Generates callgrind file. If no filename is given, kcachegrind is called. - help - This listing quit or ^D - End pprof @@ -1014,19 +913,16 @@ sub ParseInteractiveArgs { } } if ($ignore ne "") { - print STDERR "Ignoring samples in call stacks that match '$ignore'\n"; + print "Ignoring samples in call stacks that match '$ignore'\n"; } return ($focus, $ignore); } ##### Output code ##### -sub TempName { +sub PsTempName { my $fnum = shift; - my $ext = shift; - my $file = "$main::tmpfile_ps.$fnum.$ext"; - $main::tempnames{$file} = 1; - return $file; + return "$main::tmpfile_ps" . "." . "$fnum" . ".ps"; } # Print profile data in packed binary format (64-bit) to standard out @@ -1149,15 +1045,7 @@ sub PrintText { # Print the call graph in a way that's suiteable for callgrind. sub PrintCallgrind { my $calls = shift; - my $filename; - if ($main::opt_interactive) { - $filename = shift; - print STDERR "Writing callgrind file to '$filename'.\n" - } else { - $filename = "&STDOUT"; - } - open(CG, ">".$filename ); - printf CG ("events: Hits\n\n"); + printf("events: Hits\n\n"); foreach my $call ( map { $_->[0] } sort { $a->[1] cmp $b ->[1] || $a->[2] <=> $b->[2] } @@ -1169,15 +1057,13 @@ sub PrintCallgrind { my ( $caller_file, $caller_line, $caller_function, $callee_file, $callee_line, $callee_function ) = ( $1, $2, $3, $5, $6, $7 ); - - - printf CG ("fl=$caller_file\nfn=$caller_function\n"); + printf("fl=$caller_file\nfn=$caller_function\n"); if (defined $6) { - printf CG ("cfl=$callee_file\n"); - printf CG ("cfn=$callee_function\n"); - printf CG ("calls=$count $callee_line\n"); + printf("cfl=$callee_file\n"); + printf("cfn=$callee_function\n"); + printf("calls=$count $callee_line\n"); } - printf CG ("$caller_line $count\n\n"); + printf("$caller_line $count\n\n"); } } @@ -1499,7 +1385,7 @@ sub SourceLine { return undef; } my $lines = []; - push(@{$lines}, ""); # So we can use 1-based line numbers as indices + push(@{$lines}, ""); # So we can use 1-based line numbers as indices while (<FILE>) { push(@{$lines}, $_); } @@ -1591,8 +1477,8 @@ sub PrintDisassembledFunction { # Find run of instructions for this range of source lines my $first_inst = $i; while (($i <= $#instructions) && - ($instructions[$i]->[2] >= $first_line) && - ($instructions[$i]->[2] <= $last_line)) { + ($instructions[$i]->[2] >= $first_line) && + ($instructions[$i]->[2] <= $last_line)) { $e = $instructions[$i]; $flat_sum{$e->[2]} += $flat_count[$i]; $cum_sum{$e->[2]} += $cum_count[$i]; @@ -1604,16 +1490,16 @@ sub PrintDisassembledFunction { for (my $l = $first_line; $l <= $last_line; $l++) { my $line = SourceLine($current_file, $l); if (!defined($line)) { - $line = "?\n"; + $line = "?\n"; next; } else { $line =~ s/^\s+//; } printf("%6s %6s %5d: %s", - UnparseAlt($flat_sum{$l}), - UnparseAlt($cum_sum{$l}), - $l, - $line); + UnparseAlt($flat_sum{$l}), + UnparseAlt($cum_sum{$l}), + $l, + $line); } # Print disassembly @@ -1630,9 +1516,9 @@ sub PrintDisassembledFunction { while ($d =~ s/(\w+)<[^<>]*>/$1/g) { } # Remove template arguments printf("%6s %6s %8s: %6s\n", - UnparseAlt($flat_count[$x]), - UnparseAlt($cum_count[$x]), - $address, + UnparseAlt($flat_count[$x]), + UnparseAlt($cum_count[$x]), + $address, $d); } } @@ -1656,7 +1542,7 @@ sub PrintDot { # Find nodes to include my @list = (sort { abs(GetEntry($cumulative, $b)) <=> abs(GetEntry($cumulative, $a)) - || $a cmp $b } + || $a cmp $b } keys(%{$cumulative})); my $last = $nodecount - 1; if ($last > $#list) { @@ -1668,6 +1554,7 @@ sub PrintDot { } if ($last < 0) { print STDERR "No nodes to print\n"; + cleanup(); return 0; } @@ -1680,14 +1567,11 @@ sub PrintDot { # Open DOT output file my $output; if ($main::opt_gv) { - $output = "| $DOT -Tps2 >" . TempName($main::next_tmpfile, "ps"); + $output = "| $DOT -Tps2 >" . PsTempName($main::next_tmpfile); } elsif ($main::opt_ps) { $output = "| $DOT -Tps2"; } elsif ($main::opt_pdf) { $output = "| $DOT -Tps2 | $PS2PDF - -"; - } elsif ($main::opt_web || $main::opt_svg) { - # We need to post-process the SVG, so write to a temporary file always. - $output = "| $DOT -Tsvg >" . TempName($main::next_tmpfile, "svg"); } elsif ($main::opt_gif) { $output = "| $DOT -Tgif"; } else { @@ -1798,10 +1682,7 @@ sub PrintDot { my $fraction = abs($local_total ? (3 * ($n / $local_total)) : 0); if ($fraction > 1) { $fraction = 1; } my $w = $fraction * 2; - if ($w < 1 && ($main::opt_web || $main::opt_svg)) { - # SVG output treats line widths < 1 poorly. - $w = 1; - } + #if ($w < 1) { $w = 1; } # Dot sometimes segfaults if given edge weights that are too large, so # we cap the weights at a large value @@ -1825,312 +1706,11 @@ sub PrintDot { } print DOT ("}\n"); - close(DOT); - - if ($main::opt_web || $main::opt_svg) { - # Rewrite SVG to be more usable inside web browser. - RewriteSvg(TempName($main::next_tmpfile, "svg")); - } + close(DOT); return 1; } -sub RewriteSvg { - my $svgfile = shift; - - open(SVG, $svgfile) || die "open temp svg: $!"; - my @svg = <SVG>; - close(SVG); - unlink $svgfile; - my $svg = join('', @svg); - - # Dot's SVG output is - # - # <svg width="___" height="___" - # viewBox="___" xmlns=...> - # <g id="graph0" transform="..."> - # ... - # </g> - # </svg> - # - # Change it to - # - # <svg width="100%" height="100%" - # xmlns=...> - # $svg_javascript - # <g id="viewport" transform="translate(0,0)"> - # <g id="graph0" transform="..."> - # ... - # </g> - # </g> - # </svg> - - # Fix width, height; drop viewBox. - $svg =~ s/(?s)<svg width="[^"]+" height="[^"]+"(.*?)viewBox="[^"]+"/<svg width="100%" height="100%"$1/; - - # Insert script, viewport <g> above first <g> - my $svg_javascript = SvgJavascript(); - my $viewport = "<g id=\"viewport\" transform=\"translate(0,0)\">\n"; - $svg =~ s/<g id="graph\d"/$svg_javascript$viewport$&/; - - # Insert final </g> above </svg>. - $svg =~ s/(.*)(<\/svg>)/$1<\/g>$2/; - $svg =~ s/<g id="graph\d"(.*?)/<g id="viewport"$1/; - - if ($main::opt_svg) { - # --svg: write to standard output. - print $svg; - } else { - # Write back to temporary file. - open(SVG, ">$svgfile") || die "open $svgfile: $!"; - print SVG $svg; - close(SVG); - } -} - -sub SvgJavascript { - return <<'EOF'; -<script type="text/ecmascript"><![CDATA[ -// SVGPan -// http://www.cyberz.org/blog/2009/12/08/svgpan-a-javascript-svg-panzoomdrag-library/ -// Local modification: if(true || ...) below to force panning, never moving. - -/** - * SVGPan library 1.2 - * ==================== - * - * Given an unique existing element with id "viewport", including the - * the library into any SVG adds the following capabilities: - * - * - Mouse panning - * - Mouse zooming (using the wheel) - * - Object dargging - * - * Known issues: - * - * - Zooming (while panning) on Safari has still some issues - * - * Releases: - * - * 1.2, Sat Mar 20 08:42:50 GMT 2010, Zeng Xiaohui - * Fixed a bug with browser mouse handler interaction - * - * 1.1, Wed Feb 3 17:39:33 GMT 2010, Zeng Xiaohui - * Updated the zoom code to support the mouse wheel on Safari/Chrome - * - * 1.0, Andrea Leofreddi - * First release - * - * This code is licensed under the following BSD license: - * - * Copyright 2009-2010 Andrea Leofreddi <a.leofreddi@itcharm.com>. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without modification, are - * permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, this list of - * conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright notice, this list - * of conditions and the following disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY Andrea Leofreddi ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND - * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL Andrea Leofreddi OR - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR - * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON - * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF - * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * The views and conclusions contained in the software and documentation are those of the - * authors and should not be interpreted as representing official policies, either expressed - * or implied, of Andrea Leofreddi. - */ - -var root = document.documentElement; - -var state = 'none', stateTarget, stateOrigin, stateTf; - -setupHandlers(root); - -/** - * Register handlers - */ -function setupHandlers(root){ - setAttributes(root, { - "onmouseup" : "add(evt)", - "onmousedown" : "handleMouseDown(evt)", - "onmousemove" : "handleMouseMove(evt)", - "onmouseup" : "handleMouseUp(evt)", - //"onmouseout" : "handleMouseUp(evt)", // Decomment this to stop the pan functionality when dragging out of the SVG element - }); - - if(navigator.userAgent.toLowerCase().indexOf('webkit') >= 0) - window.addEventListener('mousewheel', handleMouseWheel, false); // Chrome/Safari - else - window.addEventListener('DOMMouseScroll', handleMouseWheel, false); // Others - - var g = svgDoc.getElementById("svg"); - g.width = "100%"; - g.height = "100%"; -} - -/** - * Instance an SVGPoint object with given event coordinates. - */ -function getEventPoint(evt) { - var p = root.createSVGPoint(); - - p.x = evt.clientX; - p.y = evt.clientY; - - return p; -} - -/** - * Sets the current transform matrix of an element. - */ -function setCTM(element, matrix) { - var s = "matrix(" + matrix.a + "," + matrix.b + "," + matrix.c + "," + matrix.d + "," + matrix.e + "," + matrix.f + ")"; - - element.setAttribute("transform", s); -} - -/** - * Dumps a matrix to a string (useful for debug). - */ -function dumpMatrix(matrix) { - var s = "[ " + matrix.a + ", " + matrix.c + ", " + matrix.e + "\n " + matrix.b + ", " + matrix.d + ", " + matrix.f + "\n 0, 0, 1 ]"; - - return s; -} - -/** - * Sets attributes of an element. - */ -function setAttributes(element, attributes){ - for (i in attributes) - element.setAttributeNS(null, i, attributes[i]); -} - -/** - * Handle mouse move event. - */ -function handleMouseWheel(evt) { - if(evt.preventDefault) - evt.preventDefault(); - - evt.returnValue = false; - - var svgDoc = evt.target.ownerDocument; - - var delta; - - if(evt.wheelDelta) - delta = evt.wheelDelta / 3600; // Chrome/Safari - else - delta = evt.detail / -90; // Mozilla - - var z = 1 + delta; // Zoom factor: 0.9/1.1 - - var g = svgDoc.getElementById("viewport"); - - var p = getEventPoint(evt); - - p = p.matrixTransform(g.getCTM().inverse()); - - // Compute new scale matrix in current mouse position - var k = root.createSVGMatrix().translate(p.x, p.y).scale(z).translate(-p.x, -p.y); - - setCTM(g, g.getCTM().multiply(k)); - - stateTf = stateTf.multiply(k.inverse()); -} - -/** - * Handle mouse move event. - */ -function handleMouseMove(evt) { - if(evt.preventDefault) - evt.preventDefault(); - - evt.returnValue = false; - - var svgDoc = evt.target.ownerDocument; - - var g = svgDoc.getElementById("viewport"); - - if(state == 'pan') { - // Pan mode - var p = getEventPoint(evt).matrixTransform(stateTf); - - setCTM(g, stateTf.inverse().translate(p.x - stateOrigin.x, p.y - stateOrigin.y)); - } else if(state == 'move') { - // Move mode - var p = getEventPoint(evt).matrixTransform(g.getCTM().inverse()); - - setCTM(stateTarget, root.createSVGMatrix().translate(p.x - stateOrigin.x, p.y - stateOrigin.y).multiply(g.getCTM().inverse()).multiply(stateTarget.getCTM())); - - stateOrigin = p; - } -} - -/** - * Handle click event. - */ -function handleMouseDown(evt) { - if(evt.preventDefault) - evt.preventDefault(); - - evt.returnValue = false; - - var svgDoc = evt.target.ownerDocument; - - var g = svgDoc.getElementById("viewport"); - - if(true || evt.target.tagName == "svg") { - // Pan mode - state = 'pan'; - - stateTf = g.getCTM().inverse(); - - stateOrigin = getEventPoint(evt).matrixTransform(stateTf); - } else { - // Move mode - state = 'move'; - - stateTarget = evt.target; - - stateTf = g.getCTM().inverse(); - - stateOrigin = getEventPoint(evt).matrixTransform(stateTf); - } -} - -/** - * Handle mouse button release event. - */ -function handleMouseUp(evt) { - if(evt.preventDefault) - evt.preventDefault(); - - evt.returnValue = false; - - var svgDoc = evt.target.ownerDocument; - - if(state == 'pan' || state == 'move') { - // Quit pan mode - state = ''; - } -} - -]]></script> -EOF -} - # Translate a stack of addresses into a stack of symbols sub TranslateStack { my $symbols = shift; @@ -2226,7 +1806,7 @@ sub Unparse { } } } elsif ($main::profile_type eq 'contention' && !$main::opt_contentions) { - return sprintf("%.3f", $num / 1e9); # Convert nanoseconds to seconds + return sprintf("%.3f", $num / 1e9); # Convert nanoseconds to seconds } else { return sprintf("%d", $num); } @@ -2367,42 +1947,42 @@ sub RemoveUninterestingFrames { 'malloc', 'free', 'memalign', - 'posix_memalign', + 'posix_memalign', 'pvalloc', 'valloc', 'realloc', - 'tc_calloc', + 'tc_calloc', 'tc_cfree', 'tc_malloc', 'tc_free', 'tc_memalign', - 'tc_posix_memalign', + 'tc_posix_memalign', 'tc_pvalloc', 'tc_valloc', 'tc_realloc', - 'tc_new', - 'tc_delete', - 'tc_newarray', - 'tc_deletearray', - 'tc_new_nothrow', - 'tc_newarray_nothrow', - 'do_malloc', + 'tc_new', + 'tc_delete', + 'tc_newarray', + 'tc_deletearray', + 'tc_new_nothrow', + 'tc_newarray_nothrow', + 'do_malloc', '::do_malloc', # new name -- got moved to an unnamed ns '::do_malloc_or_cpp_alloc', 'DoSampledAllocation', - 'simple_alloc::allocate', - '__malloc_alloc_template::allocate', + 'simple_alloc::allocate', + '__malloc_alloc_template::allocate', '__builtin_delete', '__builtin_new', '__builtin_vec_delete', '__builtin_vec_new', 'operator new', 'operator new[]', - # These mark the beginning/end of our custom sections - '__start_google_malloc', - '__stop_google_malloc', - '__start_malloc_hook', - '__stop_malloc_hook') { + # These mark the beginning/end of our custom sections + '__start_google_malloc', + '__stop_google_malloc', + '__start_malloc_hook', + '__stop_malloc_hook') { $skip{$name} = 1; $skip{"_" . $name} = 1; # Mach (OS X) adds a _ prefix to everything } @@ -2419,11 +1999,11 @@ sub RemoveUninterestingFrames { # TODO(dpeng): this should not be necessary; it's taken # care of by the general 2nd-pc mechanism below. foreach my $name ('ProfileData::Add', # historical - 'ProfileData::prof_handler', # historical - 'CpuProfiler::prof_handler', + 'ProfileData::prof_handler', # historical + 'CpuProfiler::prof_handler', '__FRAME_END__', - '__pthread_sighandler', - '__restore') { + '__pthread_sighandler', + '__restore') { $skip{$name} = 1; } } else { @@ -2462,10 +2042,10 @@ sub RemoveUninterestingFrames { my @path = (); foreach my $a (@addrs) { if (exists($symbols->{$a})) { - my $func = $symbols->{$a}->[0]; - if ($skip{$func} || ($func =~ m/$skip_regexp/)) { - next; - } + my $func = $symbols->{$a}->[0]; + if ($skip{$func} || ($func =~ m/$skip_regexp/)) { + next; + } } push(@path, $a); } @@ -2490,8 +2070,8 @@ sub ReduceProfile { # To avoid double-counting due to recursion, skip a stack-trace # entry if it has already been seen if (!$seen{$e}) { - $seen{$e} = 1; - push(@path, $e); + $seen{$e} = 1; + push(@path, $e); } } my $reduced_path = join("\n", @path); @@ -2685,11 +2265,28 @@ sub AddEntries { AddEntry($profile, (join "\n", @k), $count); } +sub IsSymbolizedProfileFile { + my $file_name = shift; + + if (!(-e $file_name) || !(-r $file_name)) { + return 0; + } + + $SYMBOL_PAGE =~ m,[^/]+$,; # matches everything after the last slash + my $symbol_marker = $&; + # Check if the file contains a symbol-section marker. + open(TFILE, "<$file_name"); + my @lines = <TFILE>; + my $result = grep(/^--- *$symbol_marker/, @lines); + close(TFILE); + return $result > 0; +} + ##### Code to profile a server dynamically ##### sub CheckSymbolPage { my $url = SymbolPageURL(); - open(SYMBOL, "$URL_FETCHER '$url' |"); + open(SYMBOL, "$WGET $WGET_FLAGS -qO- '$url' |"); my $line = <SYMBOL>; $line =~ s/\r//g; # turn windows-looking lines into unix-looking lines close(SYMBOL); @@ -2708,45 +2305,33 @@ sub CheckSymbolPage { sub IsProfileURL { my $profile_name = shift; - if (-f $profile_name) { - printf STDERR "Using local file $profile_name.\n"; - return 0; - } - return 1; + my ($host, $port, $path) = ParseProfileURL($profile_name); + return defined($host) and defined($port) and defined($path); } sub ParseProfileURL { my $profile_name = shift; - - if (!defined($profile_name) || $profile_name eq "") { - return (); - } - - # Split profile URL - matches all non-empty strings, so no test. - $profile_name =~ m,^(https?://)?([^/]+)(.*?)(/|$PROFILES)?$,; - - my $proto = $1 || "http://"; - my $hostport = $2; - my $prefix = $3; - my $profile = $4 || "/"; - - my $host = $hostport; - $host =~ s/:.*//; - - my $baseurl = "$proto$hostport$prefix"; - return ($host, $baseurl, $profile); + if (defined($profile_name) && + $profile_name =~ m,^(http://|)([^/:]+):(\d+)(|\@\d+)(|/|.*($PROFILE_PAGE|$PMUPROFILE_PAGE|$HEAP_PAGE|$GROWTH_PAGE|$CONTENTION_PAGE|$WALL_PAGE|$FILTEREDPROFILE_PAGE))$,o) { + # $6 is $PROFILE_PAGE/$HEAP_PAGE/etc. $5 is *everything* after + # the hostname, as long as that everything is the empty string, + # a slash, or something ending in $PROFILE_PAGE/$HEAP_PAGE/etc. + # So "$6 || $5" is $PROFILE_PAGE/etc if there, or else it's "/" or "". + return ($2, $3, $6 || $5); + } + return (); } # We fetch symbols from the first profile argument. sub SymbolPageURL { - my ($host, $baseURL, $path) = ParseProfileURL($main::pfile_args[0]); - return "$baseURL$SYMBOL_PAGE"; + my ($host, $port, $path) = ParseProfileURL($main::pfile_args[0]); + return "http://$host:$port$SYMBOL_PAGE"; } sub FetchProgramName() { - my ($host, $baseURL, $path) = ParseProfileURL($main::pfile_args[0]); - my $url = "$baseURL$PROGRAM_NAME_PAGE"; - my $command_line = "$URL_FETCHER '$url'"; + my ($host, $port, $path) = ParseProfileURL($main::pfile_args[0]); + my $url = "http://$host:$port$PROGRAM_NAME_PAGE"; + my $command_line = "$WGET $WGET_FLAGS -qO- '$url'"; open(CMDLINE, "$command_line |") or error($command_line); my $cmdline = <CMDLINE>; $cmdline =~ s/\r//g; # turn windows-looking lines into unix-looking lines @@ -2763,7 +2348,7 @@ sub FetchProgramName() { # curl. Redirection happens on borg hosts. sub ResolveRedirectionForCurl { my $url = shift; - my $command_line = "$URL_FETCHER --head '$url'"; + my $command_line = "$CURL -s --head '$url'"; open(CMDLINE, "$command_line |") or error($command_line); while (<CMDLINE>) { s/\r//g; # turn windows-looking lines into unix-looking lines @@ -2775,20 +2360,6 @@ sub ResolveRedirectionForCurl { return $url; } -# Add a timeout flat to URL_FETCHER -sub AddFetchTimeout { - my $fetcher = shift; - my $timeout = shift; - if (defined($timeout)) { - if ($fetcher =~ m/\bcurl -s/) { - $fetcher .= sprintf(" --max-time %d", $timeout); - } elsif ($fetcher =~ m/\brpcget\b/) { - $fetcher .= sprintf(" --deadline=%d", $timeout); - } - } - return $fetcher; -} - # Reads a symbol map from the file handle name given as $1, returning # the resulting symbol map. Also processes variables relating to symbols. # Currently, the only variable processed is 'binary=<value>' which updates @@ -2833,6 +2404,7 @@ sub FetchSymbols { my $pcset = shift; my $symbol_map = shift; + my %seen = (); my @pcs = grep { !$seen{$_}++ } keys(%$pcset); # uniq @@ -2842,16 +2414,12 @@ sub FetchSymbols { open(POSTFILE, ">$main::tmpfile_sym"); print POSTFILE $post_data; close(POSTFILE); - + my $url = SymbolPageURL(); - - my $command_line; - if ($URL_FETCHER =~ m/\bcurl -s/) { - $url = ResolveRedirectionForCurl($url); - $command_line = "$URL_FETCHER -d '\@$main::tmpfile_sym' '$url'"; - } else { - $command_line = "$URL_FETCHER --post '$url' < '$main::tmpfile_sym'"; - } + # Here we use curl for sending data via POST since old + # wget doesn't have --post-file option. + $url = ResolveRedirectionForCurl($url); + my $command_line = "$CURL -sd '\@$main::tmpfile_sym' '$url'"; # We use c++filt in case $SYMBOL_PAGE gives us mangled symbols. my $cppfilt = $obj_tool_map{"c++filt"}; open(SYMBOL, "$command_line | $cppfilt |") or error($command_line); @@ -2896,10 +2464,10 @@ sub BaseName { sub MakeProfileBaseName { my ($binary_name, $profile_name) = @_; - my ($host, $baseURL, $path) = ParseProfileURL($profile_name); + my ($host, $port, $path) = ParseProfileURL($profile_name); my $binary_shortname = BaseName($binary_name); - return sprintf("%s.%s.%s", - $binary_shortname, $main::op_time, $host); + return sprintf("%s.%s.%s-port%s", + $binary_shortname, $main::op_time, $host, $port); } sub FetchDynamicProfile { @@ -2911,7 +2479,7 @@ sub FetchDynamicProfile { if (!IsProfileURL($profile_name)) { return $profile_name; } else { - my ($host, $baseURL, $path) = ParseProfileURL($profile_name); + my ($host, $port, $path) = ParseProfileURL($profile_name); if ($path eq "" || $path eq "/") { # Missing type specifier defaults to cpu-profile $path = $PROFILE_PAGE; @@ -2919,28 +2487,37 @@ sub FetchDynamicProfile { my $profile_file = MakeProfileBaseName($binary_name, $profile_name); - my $url = "$baseURL$path"; - my $fetch_timeout = undef; - if ($path =~ m/$PROFILE_PAGE|$PMUPROFILE_PAGE/) { - if ($path =~ m/[?]/) { - $url .= "&"; + my $url; + my $wget_timeout; + if (($path =~ m/$PROFILE_PAGE/) || ($path =~ m/$PMUPROFILE_PAGE/)) { + if ($path =~ m/$PROFILE_PAGE/) { + $url = sprintf("http://$host:$port$path?seconds=%d", + $main::opt_seconds); } else { - $url .= "?"; + if ($profile_name =~ m/[?]/) { + $profile_name .= "&" + } else { + $profile_name .= "?" + } + $url = sprintf("http://$profile_name" . "seconds=%d", + $main::opt_seconds); } - $url .= sprintf("seconds=%d", $main::opt_seconds); - $fetch_timeout = $main::opt_seconds * 1.01 + 60; + $wget_timeout = sprintf("--timeout=%d", + int($main::opt_seconds * 1.01 + 60)); } else { # For non-CPU profiles, we add a type-extension to # the target profile file name. my $suffix = $path; $suffix =~ s,/,.,g; - $profile_file .= $suffix; + $profile_file .= "$suffix"; + $url = "http://$host:$port$path"; + $wget_timeout = ""; } my $profile_dir = $ENV{"PPROF_TMPDIR"} || ($ENV{HOME} . "/pprof"); - if (! -d $profile_dir) { + if (!(-d $profile_dir)) { mkdir($profile_dir) - || die("Unable to create profile directory $profile_dir: $!\n"); + || die("Unable to create profile directory $profile_dir: $!\n"); } my $tmp_profile = "$profile_dir/.tmp.$profile_file"; my $real_profile = "$profile_dir/$profile_file"; @@ -2949,15 +2526,14 @@ sub FetchDynamicProfile { return $real_profile; } - my $fetcher = AddFetchTimeout($URL_FETCHER, $fetch_timeout); - my $cmd = "$fetcher '$url' > '$tmp_profile'"; - if ($path =~ m/$PROFILE_PAGE|$PMUPROFILE_PAGE/){ + my $cmd = "$WGET $WGET_FLAGS $wget_timeout -q -O $tmp_profile '$url'"; + if (($path =~ m/$PROFILE_PAGE/) || ($path =~ m/$PMUPROFILE_PAGE/)){ print STDERR "Gathering CPU profile from $url for $main::opt_seconds seconds to\n ${real_profile}\n"; if ($encourage_patience) { print STDERR "Be patient...\n"; } } else { - print STDERR "Fetching $path profile from $url to\n ${real_profile}\n"; + print STDERR "Fetching $path profile from $host:$port to\n ${real_profile}\n"; } (system($cmd) == 0) || error("Failed to get profile: $cmd: $!\n"); @@ -3004,7 +2580,6 @@ sub FetchDynamicProfilesRecurse { } else { $position = 1 | ($position << 1); TryCollectProfile($maxlevel, $level, $position); - cleanup(); exit(0); } } @@ -3028,69 +2603,22 @@ sub TryCollectProfile { # Provide a small streaming-read module to handle very large # cpu-profile files. Stream in chunks along a sliding window. -# Provides an interface to get one 'slot', correctly handling -# endian-ness differences. A slot is one 32-bit or 64-bit word -# (depending on the input profile). We tell endianness and bit-size -# for the profile by looking at the first 8 bytes: in cpu profiles, -# the second slot is always 3 (we'll accept anything that's not 0). BEGIN { package CpuProfileStream; sub new { - my ($class, $file, $fname) = @_; - my $self = { file => $file, - base => 0, - stride => 512 * 1024, # must be a multiple of bitsize/8 - slots => [], - unpack_code => "", # N for big-endian, V for little + my ($class, $file) = @_; + my $self = { file => $file, + base => 0, + stride => 512 * 1024, # must be a multiple of |long| + slots => [] }; bless $self, $class; # Let unittests adjust the stride if ($main::opt_test_stride > 0) { $self->{stride} = $main::opt_test_stride; } - # Read the first two slots to figure out bitsize and endianness. - my $slots = $self->{slots}; - my $str; - read($self->{file}, $str, 8); - # Set the global $address_length based on what we see here. - # 8 is 32-bit (8 hexadecimal chars); 16 is 64-bit (16 hexadecimal chars). - $address_length = ($str eq (chr(0)x8)) ? 16 : 8; - if ($address_length == 8) { - if (substr($str, 6, 2) eq chr(0)x2) { - $self->{unpack_code} = 'V'; # Little-endian. - } elsif (substr($str, 4, 2) eq chr(0)x2) { - $self->{unpack_code} = 'N'; # Big-endian - } else { - ::error("$fname: header size >= 2**16\n"); - } - @$slots = unpack($self->{unpack_code} . "*", $str); - } else { - # If we're a 64-bit profile, make sure we're a 64-bit-capable - # perl. Otherwise, each slot will be represented as a float - # instead of an int64, losing precision and making all the - # 64-bit addresses right. We *could* try to handle this with - # software emulation of 64-bit ints, but that's added complexity - # for no clear benefit (yet). We use 'Q' to test for 64-bit-ness; - # perl docs say it's only available on 64-bit perl systems. - my $has_q = 0; - eval { $has_q = pack("Q", "1") ? 1 : 1; }; - if (!$has_q) { - ::error("$fname: need a 64-bit perl to process this 64-bit profile.\n"); - } - read($self->{file}, $str, 8); - if (substr($str, 4, 4) eq chr(0)x4) { - # We'd love to use 'Q', but it's a) not universal, b) not endian-proof. - $self->{unpack_code} = 'V'; # Little-endian. - } elsif (substr($str, 0, 4) eq chr(0)x4) { - $self->{unpack_code} = 'N'; # Big-endian - } else { - ::error("$fname: header size >= 2**32\n"); - } - my @pair = unpack($self->{unpack_code} . "*", $str); - # Since we know one of the pair is 0, it's fine to just add them. - @$slots = (0, $pair[0] + $pair[1]); - } + $self->overflow(); return $self; } @@ -3101,25 +2629,7 @@ BEGIN { $self->{base} += $#$slots + 1; # skip over data we're replacing my $str; read($self->{file}, $str, $self->{stride}); - if ($address_length == 8) { # the 32-bit case - # This is the easy case: unpack provides 32-bit unpacking primitives. - @$slots = unpack($self->{unpack_code} . "*", $str); - } else { - # We need to unpack 32 bits at a time and combine. - my @b32_values = unpack($self->{unpack_code} . "*", $str); - my @b64_values = (); - for (my $i = 0; $i < $#b32_values; $i += 2) { - # TODO(csilvers): if this is a 32-bit perl, the math below - # could end up in a too-large int, which perl will promote - # to a double, losing necessary precision. Deal with that. - if ($self->{unpack_code} eq 'V') { # little-endian - push(@b64_values, $b32_values[$i] + $b32_values[$i+1] * (2**32)); - } else { - push(@b64_values, $b32_values[$i] * (2**32) + $b32_values[$i+1]); - } - } - @$slots = @b64_values; - } + @$slots = unpack("L*", $str); } # Access the i-th long in the file (logically), or -1 at EOF. @@ -3128,16 +2638,16 @@ BEGIN { my $slots = $self->{slots}; while ($#$slots >= 0) { if ($idx < $self->{base}) { - # The only time we expect a reference to $slots[$i - something] - # after referencing $slots[$i] is reading the very first header. - # Since $stride > |header|, that shouldn't cause any lookback - # errors. And everything after the header is sequential. - print STDERR "Unexpected look-back reading CPU profile"; - return -1; # shrug, don't know what better to return + # The only time we expect a reference to $slots[$i - something] + # after referencing $slots[$i] is reading the very first header. + # Since $stride > |header|, that shouldn't cause any lookback + # errors. And everything after the header is sequential. + print STDERR "Unexpected look-back reading CPU profile"; + return -1; # shrug, don't know what better to return } elsif ($idx > $self->{base} + $#$slots) { - $self->overflow(); + $self->overflow(); } else { - return $slots->[$idx - $self->{base}]; + return $slots->[$idx - $self->{base}]; } } # If we get here, $slots is [], which means we've reached EOF @@ -3145,44 +2655,6 @@ BEGIN { } } -# Return the next line from the profile file, assuming it's a text -# line (which in this case means, doesn't start with a NUL byte). If -# it's not a text line, return "". At EOF, return undef, like perl does. -# Input file should be in binmode. -sub ReadProfileLine { - local *PROFILE = shift; - my $firstchar = ""; - my $line = ""; - read(PROFILE, $firstchar, 1); - seek(PROFILE, -1, 1); # unread the firstchar - if ($firstchar eq "\0") { - return ""; - } - $line = <PROFILE>; - if (defined($line)) { - $line =~ s/\r//g; # turn windows-looking lines into unix-looking lines - } - return $line; -} - -sub IsSymbolizedProfileFile { - my $file_name = shift; - if (!(-e $file_name) || !(-r $file_name)) { - return 0; - } - # Check if the file contains a symbol-section marker. - open(TFILE, "<$file_name"); - binmode TFILE; - my $firstline = ReadProfileLine(*TFILE); - close(TFILE); - if (!$firstline) { - return 0; - } - $SYMBOL_PAGE =~ m,[^/]+$,; # matches everything after the last slash - my $symbol_marker = $&; - return $firstline =~ /^--- *$symbol_marker/; -} - # Parse profile generated by common/profiler.cc and return a reference # to a map: # $result->{version} Version number of profile file @@ -3217,17 +2689,28 @@ sub ReadProfile { # whole firstline, since it may be gigabytes(!) of data. open(PROFILE, "<$fname") || error("$fname: $!\n"); binmode PROFILE; # New perls do UTF-8 processing - my $header = ReadProfileLine(*PROFILE); - if (!defined($header)) { # means "at EOF" - error("Profile is empty.\n"); + my $firstchar = ""; + my $header = ""; + read(PROFILE, $firstchar, 1); + seek(PROFILE, -1, 1); # unread the firstchar + if ($firstchar ne "\0") { + $header = <PROFILE>; + $header =~ s/\r//g; # turn windows-looking lines into unix-looking lines } my $symbols; if ($header =~ m/^--- *$symbol_marker/o) { - # Read the symbol section of the symbolized profile file. + # read the symbol section of the symbolized profile file $symbols = ReadSymbols(*PROFILE{IO}); - # Read the next line to get the header for the remaining profile. - $header = ReadProfileLine(*PROFILE) || ""; + + # read the next line to get the header for the remaining profile + $header = ""; + read(PROFILE, $firstchar, 1); + seek(PROFILE, -1, 1); # unread the firstchar + if ($firstchar ne "\0") { + $header = <PROFILE>; + $header =~ s/\r//g; + } } my $result; @@ -3269,33 +2752,6 @@ sub ReadProfile { return $result; } -# Subtract one from caller pc so we map back to call instr. -# However, don't do this if we're reading a symbolized profile -# file, in which case the subtract-one was done when the file -# was written. -# -# We apply the same logic to all readers, though ReadCPUProfile uses an -# independent implementation. -sub FixCallerAddresses { - my $stack = shift; - if ($main::use_symbolized_profile) { - return $stack; - } else { - $stack =~ /(\s)/; - my $delimiter = $1; - my @addrs = split(' ', $stack); - my @fixedaddrs; - $#fixedaddrs = $#addrs; - if ($#addrs >= 0) { - $fixedaddrs[0] = $addrs[0]; - } - for (my $i = 1; $i <= $#addrs; $i++) { - $fixedaddrs[$i] = AddressSub($addrs[$i], "0x1"); - } - return join $delimiter, @fixedaddrs; - } -} - # CPU profile reader sub ReadCPUProfile { my $prog = shift; @@ -3307,7 +2763,10 @@ sub ReadCPUProfile { my $pcs = {}; # Parse string into array of slots. - my $slots = CpuProfileStream->new(*PROFILE, $fname); + # L! cannot be used because with a native 64-bit build, it will cause + # 1) a valid 64-bit profile to use the 32-bit codepath, and + # 2) a valid 32-bit profile to be unrecognized. + my $slots = CpuProfileStream->new(*PROFILE); # Read header. The current header version is a 5-element structure # containing: @@ -3316,50 +2775,108 @@ sub ReadCPUProfile { # 2: format version (0) # 3: sampling period (usec) # 4: unused padding (always 0) + # The header words are 32-bit or 64-bit depending on the ABI of the program + # that generated the profile. In the 64-bit case, since our x86-architecture + # machines are little-endian, the actual value of each of these elements is + # in the first 32-bit word, and the second is always zero. The @slots array + # above was read as a sequence of 32-bit words in both cases, so we need to + # explicitly check for both cases. A typical slot sequence for each is: + # 32-bit: 0 3 0 100 0 + # 64-bit: 0 0 3 0 0 0 100 0 0 0 + # if ($slots->get(0) != 0 ) { error("$fname: not a profile file, or old format profile file\n"); } - $i = 2 + $slots->get(1); - $version = $slots->get(2); - $period = $slots->get(3); - # Do some sanity checking on these header values. - if ($version > (2**32) || $period > (2**32) || $i > (2**32) || $i < 5) { - error("$fname: not a profile file, or corrupted profile file\n"); - } - - # Parse profile - while ($slots->get($i) != -1) { - my $n = $slots->get($i++); - my $d = $slots->get($i++); - if ($d > (2**16)) { # TODO(csilvers): what's a reasonable max-stack-depth? - my $addr = sprintf("0%o", $i * ($address_length == 8 ? 4 : 8)); - print STDERR "At index $i (address $addr):\n"; - error("$fname: stack trace depth >= 2**32\n"); - } - if ($slots->get($i) == 0) { - # End of profile data marker + if ($slots->get(1) >= 3) { + # Normal 32-bit header: + $version = $slots->get(2); + $period = $slots->get(3); + $i = 2 + $slots->get(1); + $address_length = 8; + + # Parse profile + while ($slots->get($i) != -1) { + my $n = $slots->get($i++); + my $d = $slots->get($i++); + if ($slots->get($i) == 0) { + # End of profile data marker + $i += $d; + last; + } + + # Make key out of the stack entries + my @k = (); + for (my $j = 0; $j < $d; $j++) { + my $pc = sprintf("%08x", $slots->get($i+$j)); + $pcs->{$pc} = 1; + push @k, $pc; + } + + AddEntry($profile, (join "\n", @k), $n); $i += $d; - last; } - # Make key out of the stack entries - my @k = (); - for (my $j = 0; $j < $d; $j++) { - my $pc = $slots->get($i+$j); - # Subtract one from caller pc so we map back to call instr. - # However, don't do this if we're reading a symbolized profile - # file, in which case the subtract-one was done when the file - # was written. - if ($j > 0 && !$main::use_symbolized_profile) { - $pc--; + # Normal 64-bit header: All entries are doubled in size. The first + # word (little-endian) should contain the real value, the second should + # be zero. + } elsif ($slots->get(1) != 0 || + $slots->get(2) < 3 || + $slots->get(3) != 0 || + $slots->get(5) != 0 || + $slots->get(7) != 0) { + error("$fname: not a profile file, or old format profile file\n"); + } else { + $version = $slots->get(4); + $period = $slots->get(6); + $i = 4 + 2 * $slots->get(2); + $address_length = 16; + + # Parse profile + while ($slots->get($i) != -1) { + my $n = $slots->get($i++); + my $nhi = $slots->get($i++); + # Huge counts may coerce to floating point, keeping scale, not precision + if ($nhi != 0) { $n += $nhi*(2**32); } + my $d = $slots->get($i++); + if ($slots->get($i++) != 0) { + my $addr = sprintf("%o", 4 * $i); + print STDERR "At index $i ($addr):\n"; + error("$fname: stack trace depth >= 2**32\n"); } - $pc = sprintf("%0*x", $address_length, $pc); - $pcs->{$pc} = 1; - push @k, $pc; - } + if ($slots->get($i) == 0 && $slots->get($i+1) == 0) { + # End of profile data marker + $i += 2 * $d; + last; + } + + # Make key out of the stack entries + my @k = (); + for (my $j = 0; $j < $d; $j++) { + my $pclo = $slots->get($i++); + my $pchi = $slots->get($i++); + if ($pclo == -1 || $pchi == -1) { + error("$fname: Unexpected EOF when reading stack of depth $d\n"); + } + + # Subtract one from caller pc so we map back to call instr. + # However, don't do this if we're reading a symbolized profile + # file, in which case the subtract-one was done when the file + # was written. + if ($j > 0 && !$main::use_symbolized_profile) { + if ($pclo == 0) { + $pchi--; + $pclo = 0xffffffff; + } else { + $pclo--; + } + } - AddEntry($profile, (join "\n", @k), $n); - $i += $d; + my $pc = sprintf("%08x%08x", $pchi, $pclo); + $pcs->{$pc} = 1; + push @k, $pc; + } + AddEntry($profile, (join "\n", @k), $n); + } } # Parse map @@ -3430,18 +2947,18 @@ sub ReadHeapProfile { # found for profiles generated locally, and the others for # remote profiles. if (($type eq "heapprofile") || ($type !~ /heap/) ) { - # No need to adjust for the sampling rate with heap-profiler-derived data - $sampling_algorithm = 0; + # No need to adjust for the sampling rate with heap-profiler-derived data + $sampling_algorithm = 0; } elsif ($type =~ /_v2/) { - $sampling_algorithm = 2; # version 2 sampling + $sampling_algorithm = 2; # version 2 sampling if (defined($sample_period) && ($sample_period ne '')) { - $sample_adjustment = int($sample_period); - } + $sample_adjustment = int($sample_period); + } } else { - $sampling_algorithm = 1; # version 1 sampling + $sampling_algorithm = 1; # version 1 sampling if (defined($sample_period) && ($sample_period ne '')) { - $sample_adjustment = int($sample_period)/2; - } + $sample_adjustment = int($sample_period)/2; + } } } else { # We detect whether or not this is a remote-heap profile by checking @@ -3453,7 +2970,7 @@ sub ReadHeapProfile { my ($n1, $s1, $n2, $s2) = ($1, $2, $3, $4); if (($n1 == $n2) && ($s1 == $s2)) { # This is likely to be a remote-heap based sample profile - $sampling_algorithm = 1; + $sampling_algorithm = 1; } } } @@ -3467,7 +2984,7 @@ sub ReadHeapProfile { print STDERR "Adjusting heap profiles for 1-in-128KB sampling rate\n"; } else { printf STDERR ("Adjusting heap profiles for 1-in-%d sampling rate\n", - $sample_adjustment); + $sample_adjustment); } if ($sampling_algorithm > 1) { # We don't bother printing anything for the original version (version 1) @@ -3484,7 +3001,7 @@ sub ReadHeapProfile { if (/^MAPPED_LIBRARIES:/) { # Read the /proc/self/maps data while (<PROFILE>) { - s/\r//g; # turn windows-looking lines into unix-looking lines + s/\r//g; # turn windows-looking lines into unix-looking lines $map .= $_; } last; @@ -3494,7 +3011,7 @@ sub ReadHeapProfile { # Read /proc/self/maps data as formatted by DumpAddressMap() my $buildvar = ""; while (<PROFILE>) { - s/\r//g; # turn windows-looking lines into unix-looking lines + s/\r//g; # turn windows-looking lines into unix-looking lines # Parse "build=<dir>" specification if supplied if (m/^\s*build=(.*)\n/) { $buildvar = $1; @@ -3549,7 +3066,7 @@ sub ReadHeapProfile { } my @counts = ($n1, $s1, $n2, $s2); - AddEntries($profile, $pcs, FixCallerAddresses($stack), $counts[$index]); + AddEntries($profile, $pcs, $stack, $counts[$index]); } } @@ -3569,7 +3086,7 @@ sub ReadSynchProfile { my $profile = {}; my $pcs = {}; my $sampling_period = 1; - my $cyclespernanosec = 2.8; # Default assumption for old binaries + my $cyclespernanosec = 2.8; # Default assumption for old binaries my $seen_clockrate = 0; my $line; @@ -3595,7 +3112,7 @@ sub ReadSynchProfile { $count *= $sampling_period; my @values = ($cycles, $count, $cycles / $count); - AddEntries($profile, $pcs, FixCallerAddresses($stack), $values[$index]); + AddEntries($profile, $pcs, $stack, $values[$index]); } elsif ( $line =~ /^(slow release).*thread \d+ \@\s*(.*?)\s*$/ || $line =~ /^\s*(\d+) \@\s*(.*?)\s*$/ ) { @@ -3610,7 +3127,7 @@ sub ReadSynchProfile { # Adjust for sampling done by application $cycles *= $sampling_period; - AddEntries($profile, $pcs, FixCallerAddresses($stack), $cycles); + AddEntries($profile, $pcs, $stack, $cycles); } elsif ( $line =~ m/^([a-z][^=]*)=(.*)$/ ) { my ($variable, $value) = ($1,$2); @@ -3791,8 +3308,8 @@ sub ParseTextSectionHeaderFromOtool { } elsif ($line =~ /segname (\w+)/) { $segname = $1; } elsif (!(($cmd eq "LC_SEGMENT" || $cmd eq "LC_SEGMENT_64") && - $sectname eq "__text" && - $segname eq "__TEXT")) { + $sectname eq "__text" && + $segname eq "__TEXT")) { next; } elsif ($line =~ /\baddr 0x([0-9a-fA-F]+)/) { $vma = $1; @@ -3852,7 +3369,7 @@ sub ParseLibraries { my $finish; my $offset; my $lib; - if ($l =~ /^($h)-($h)\s+..x.\s+($h)\s+\S+:\S+\s+\d+\s+(\S+\.(so|dll|dylib|bundle)((\.\d+)+\w*(\.\d+){0,3})?)$/i) { + if ($l =~ /^($h)-($h)\s+..x.\s+($h)\s+\S+:\S+\s+\d+\s+(\S+\.(so|dll|dylib|bundle)((\.\d+)+\w*)?)$/i) { # Full line from /proc/self/maps. Example: # 40000000-40015000 r-xp 00000000 03:01 12845071 /lib/ld-2.3.2.so $start = HexExtend($1); @@ -4158,7 +3675,7 @@ sub MapToSymbols { if ($debug) { print("---- $image ---\n"); } for (my $i = 0; $i <= $#{$pclist}; $i++) { # addr2line always reads hex addresses, and does not need '0x' prefix. - if ($debug) { printf STDERR ("%s\n", $pclist->[$i]); } + if ($debug) { printf("%s\n", $pclist->[$i]); } printf ADDRESSES ("%s\n", AddressSub($pclist->[$i], $offset)); if (defined($sep_address)) { printf ADDRESSES ("%s\n", $sep_address); @@ -4210,7 +3727,7 @@ sub MapToSymbols { $symbols->{$pcstr} = $sym; } unshift(@{$sym}, $function, $filelinenum, $fullfunction); - if ($debug) { printf STDERR ("%s => [%s]\n", $pcstr, join(" ", @{$sym})); } + if ($debug) { printf("%s => [%s]\n", $pcstr, join(" ", @{$sym})); } if (!defined($sep_address)) { # Inlining is off, se this entry ends immediately $count++; @@ -4266,7 +3783,7 @@ sub MapSymbolsWithNM { } return 1; } - + sub ShortFunctionName { my $function = shift; while ($function =~ s/\([^()]*\)(\s*const)?//g) { } # Argument types @@ -4313,8 +3830,6 @@ sub ConfigureObjTools { if ($file_type =~ /Mach-O/) { # OS X uses otool to examine Mach-O files, rather than objdump. $obj_tool_map{"otool"} = "otool"; - $obj_tool_map{"addr2line"} = "false"; # no addr2line - $obj_tool_map{"objdump"} = "false"; # no objdump } # Go fill in %obj_tool_map with the pathnames to use: @@ -4361,8 +3876,9 @@ sub ConfigureTool { sub cleanup { unlink($main::tmpfile_sym); - unlink(keys %main::tempnames); - + for (my $i = 0; $i < $main::next_tmpfile; $i++) { + unlink(PsTempName($i)); + } # We leave any collected profiles in $HOME/pprof in case the user wants # to look at them later. We print a message informing them of this. if ((scalar(@main::profile_files) > 0) && @@ -4405,7 +3921,7 @@ sub GetProcedureBoundariesViaNm { my $routine = ""; while (<NM>) { s/\r//g; # turn windows-looking lines into unix-looking lines - if (m/^\s*([0-9a-f]+) (.) (..*)/) { + if (m/^([0-9a-f]+) (.) (..*)/) { my $start_val = $1; my $type = $2; my $this_routine = $3; @@ -4426,12 +3942,12 @@ sub GetProcedureBoundariesViaNm { # we'll just go ahead and process the first entry (which never # got touched in the queue), and ignore the others. if ($start_val eq $last_start && $type =~ /t/i) { - # We are the 'T' symbol at this address, replace previous symbol. - $routine = $this_routine; - next; + # We are the 'T' symbol at this address, replace previous symbol. + $routine = $this_routine; + next; } elsif ($start_val eq $last_start) { - # We're not the 'T' symbol at this address, so ignore us. - next; + # We're not the 'T' symbol at this address, so ignore us. + next; } if ($this_routine eq $sep_symbol) { @@ -4446,7 +3962,7 @@ sub GetProcedureBoundariesViaNm { if (defined($routine) && $routine =~ m/$regexp/) { $symbol_table->{$routine} = [HexExtend($last_start), - HexExtend($start_val)]; + HexExtend($start_val)]; } $last_start = $start_val; $routine = $this_routine; @@ -4465,8 +3981,9 @@ sub GetProcedureBoundariesViaNm { # TODO(csilvers): do better here. if (defined($routine) && $routine =~ m/$regexp/) { $symbol_table->{$routine} = [HexExtend($last_start), - HexExtend($last_start)]; + HexExtend($last_start)]; } + return $symbol_table; } @@ -4512,13 +4029,9 @@ sub GetProcedureBoundaries { # -D to at least get *exported* symbols. If we can't use --demangle, # we use c++filt instead, if it exists on this system. my @nm_commands = ("$nm -n $flatten_flag $demangle_flag" . - " $image 2>/dev/null $cppfilt_flag", - "$nm -D -n $flatten_flag $demangle_flag" . - " $image 2>/dev/null $cppfilt_flag", - # 6nm is for Go binaries - "6nm $image 2>/dev/null | sort", - ); - + " $image 2>/dev/null $cppfilt_flag", + "$nm -D -n $flatten_flag $demangle_flag" . + " $image 2>/dev/null $cppfilt_flag"); # If the executable is an MS Windows PDB-format executable, we'll # have set up obj_tool_map("nm_pdb"). In this case, we actually # want to use both unix nm and windows-specific nm_pdb, since @@ -4750,3 +4263,4 @@ sub RunUnitTests { } exit ($error_count); } + diff --git a/third_party/tcmalloc/chromium/src/span.h b/third_party/tcmalloc/chromium/src/span.h index b3483ca..ab9a796 100644 --- a/third_party/tcmalloc/chromium/src/span.h +++ b/third_party/tcmalloc/chromium/src/span.h @@ -60,10 +60,6 @@ struct Span { int value[64]; #endif - void* start_ptr() { - return reinterpret_cast<void*>(start << kPageShift); - } - // What freelist the span is on: IN_USE if on none, or normal or returned enum { IN_USE, ON_NORMAL_FREELIST, ON_RETURNED_FREELIST }; }; diff --git a/third_party/tcmalloc/chromium/src/stacktrace.cc b/third_party/tcmalloc/chromium/src/stacktrace.cc index 68cb865..d158eea 100644 --- a/third_party/tcmalloc/chromium/src/stacktrace.cc +++ b/third_party/tcmalloc/chromium/src/stacktrace.cc @@ -57,45 +57,7 @@ #include "stacktrace_config.h" #if defined(STACKTRACE_INL_HEADER) - -#define IS_STACK_FRAMES 0 -#define IS_WITH_CONTEXT 0 -#define GET_STACK_TRACE_OR_FRAMES \ - GetStackTrace(void **result, int max_depth, int skip_count) -#include STACKTRACE_INL_HEADER -#undef IS_STACK_FRAMES -#undef IS_WITH_CONTEXT -#undef GET_STACK_TRACE_OR_FRAMES - -#define IS_STACK_FRAMES 1 -#define IS_WITH_CONTEXT 0 -#define GET_STACK_TRACE_OR_FRAMES \ - GetStackFrames(void **result, int *sizes, int max_depth, int skip_count) -#include STACKTRACE_INL_HEADER -#undef IS_STACK_FRAMES -#undef IS_WITH_CONTEXT -#undef GET_STACK_TRACE_OR_FRAMES - -#define IS_STACK_FRAMES 0 -#define IS_WITH_CONTEXT 1 -#define GET_STACK_TRACE_OR_FRAMES \ - GetStackTraceWithContext(void **result, int max_depth, \ - int skip_count, const void *ucp) -#include STACKTRACE_INL_HEADER -#undef IS_STACK_FRAMES -#undef IS_WITH_CONTEXT -#undef GET_STACK_TRACE_OR_FRAMES - -#define IS_STACK_FRAMES 1 -#define IS_WITH_CONTEXT 1 -#define GET_STACK_TRACE_OR_FRAMES \ - GetStackFramesWithContext(void **result, int *sizes, int max_depth, \ - int skip_count, const void *ucp) -#include STACKTRACE_INL_HEADER -#undef IS_STACK_FRAMES -#undef IS_WITH_CONTEXT -#undef GET_STACK_TRACE_OR_FRAMES - +# include STACKTRACE_INL_HEADER #elif 0 // This is for the benefit of code analysis tools that may have // trouble with the computed #include above. diff --git a/third_party/tcmalloc/chromium/src/stacktrace_config.h b/third_party/tcmalloc/chromium/src/stacktrace_config.h index 18f16ab..b58ab1d 100644 --- a/third_party/tcmalloc/chromium/src/stacktrace_config.h +++ b/third_party/tcmalloc/chromium/src/stacktrace_config.h @@ -53,7 +53,6 @@ # define STACKTRACE_SKIP_CONTEXT_ROUTINES 1 # elif defined(HAVE_LIBUNWIND_H) // a proxy for having libunwind installed # define STACKTRACE_INL_HEADER "stacktrace_libunwind-inl.h" -# define STACKTRACE_USES_LIBUNWIND 1 # elif defined(__linux) # error Cannnot calculate stack trace: need either libunwind or frame-pointers (see INSTALL file) # else diff --git a/third_party/tcmalloc/chromium/src/stacktrace_generic-inl.h b/third_party/tcmalloc/chromium/src/stacktrace_generic-inl.h index 0e72ee7..490cd9d 100644 --- a/third_party/tcmalloc/chromium/src/stacktrace_generic-inl.h +++ b/third_party/tcmalloc/chromium/src/stacktrace_generic-inl.h @@ -34,32 +34,57 @@ // // Note: The glibc implementation may cause a call to malloc. // This can cause a deadlock in HeapProfiler. - -#ifndef BASE_STACKTRACE_GENERIC_INL_H_ -#define BASE_STACKTRACE_GENERIC_INL_H_ -// Note: this file is included into stacktrace.cc more than once. -// Anything that should only be defined once should be here: - #include <execinfo.h> #include <string.h> #include "google/stacktrace.h" -#endif // BASE_STACKTRACE_GENERIC_INL_H_ -// Note: this part of the file is included several times. -// Do not put globals below. +// If you change this function, also change GetStackFrames below. +int GetStackTrace(void** result, int max_depth, int skip_count) { + static const int kStackLength = 64; + void * stack[kStackLength]; + int size; + + size = backtrace(stack, kStackLength); + skip_count++; // we want to skip the current frame as well + int result_count = size - skip_count; + if (result_count < 0) + result_count = 0; + if (result_count > max_depth) + result_count = max_depth; + for (int i = 0; i < result_count; i++) + result[i] = stack[i + skip_count]; + + return result_count; +} -// The following 4 functions are generated from the code below: -// GetStack{Trace,Frames}() -// GetStack{Trace,Frames}WithContext() +// If you change this function, also change GetStackTrace above: +// +// This GetStackFrames routine shares a lot of code with GetStackTrace +// above. This code could have been refactored into a common routine, +// and then both GetStackTrace/GetStackFrames could call that routine. +// There are two problems with that: +// +// (1) The performance of the refactored-code suffers substantially - the +// refactored needs to be able to record the stack trace when called +// from GetStackTrace, and both the stack trace and stack frame sizes, +// when called from GetStackFrames - this introduces enough new +// conditionals that GetStackTrace performance can degrade by as much +// as 50%. // -// These functions take the following args: -// void** result: the stack-trace, as an array -// int* sizes: the size of each stack frame, as an array -// (GetStackFrames* only) -// int max_depth: the size of the result (and sizes) array(s) -// int skip_count: how many stack pointers to skip before storing in result -// void* ucp: a ucontext_t* (GetStack{Trace,Frames}WithContext only) -int GET_STACK_TRACE_OR_FRAMES { +// (2) Whether the refactored routine gets inlined into GetStackTrace and +// GetStackFrames depends on the compiler, and we can't guarantee the +// behavior either-way, even with "__attribute__ ((always_inline))" +// or "__attribute__ ((noinline))". But we need this guarantee or the +// frame counts may be off by one. +// +// Both (1) and (2) can be addressed without this code duplication, by +// clever use of template functions, and by defining GetStackTrace and +// GetStackFrames as macros that expand to these template functions. +// However, this approach comes with its own set of problems - namely, +// macros and preprocessor trouble - for example, if GetStackTrace +// and/or GetStackFrames is ever defined as a member functions in some +// class, we are in trouble. +int GetStackFrames(void** pcs, int* sizes, int max_depth, int skip_count) { static const int kStackLength = 64; void * stack[kStackLength]; int size; @@ -72,12 +97,10 @@ int GET_STACK_TRACE_OR_FRAMES { if (result_count > max_depth) result_count = max_depth; for (int i = 0; i < result_count; i++) - result[i] = stack[i + skip_count]; + pcs[i] = stack[i + skip_count]; -#if IS_STACK_FRAMES // No implementation for finding out the stack frame sizes yet. memset(sizes, 0, sizeof(*sizes) * result_count); -#endif return result_count; } diff --git a/third_party/tcmalloc/chromium/src/stacktrace_libunwind-inl.h b/third_party/tcmalloc/chromium/src/stacktrace_libunwind-inl.h index a1d5249..d9d829a 100644 --- a/third_party/tcmalloc/chromium/src/stacktrace_libunwind-inl.h +++ b/third_party/tcmalloc/chromium/src/stacktrace_libunwind-inl.h @@ -32,11 +32,6 @@ // // Produce stack trace using libunwind -#ifndef BASE_STACKTRACE_LIBINWIND_INL_H_ -#define BASE_STACKTRACE_LIBINWIND_INL_H_ -// Note: this file is included into stacktrace.cc more than once. -// Anything that should only be defined once should be here: - // We only need local unwinder. #define UNW_LOCAL_ONLY @@ -57,30 +52,12 @@ extern "C" { // cases, we return 0 to indicate the situation. static __thread int recursive; -#endif // BASE_STACKTRACE_LIBINWIND_INL_H_ - -// Note: this part of the file is included several times. -// Do not put globals below. - -// The following 4 functions are generated from the code below: -// GetStack{Trace,Frames}() -// GetStack{Trace,Frames}WithContext() -// -// These functions take the following args: -// void** result: the stack-trace, as an array -// int* sizes: the size of each stack frame, as an array -// (GetStackFrames* only) -// int max_depth: the size of the result (and sizes) array(s) -// int skip_count: how many stack pointers to skip before storing in result -// void* ucp: a ucontext_t* (GetStack{Trace,Frames}WithContext only) -int GET_STACK_TRACE_OR_FRAMES { +// If you change this function, also change GetStackFrames below. +int GetStackTrace(void** result, int max_depth, int skip_count) { void *ip; int n = 0; unw_cursor_t cursor; unw_context_t uc; -#if IS_STACK_FRAMES - unw_word_t sp = 0, next_sp = 0; -#endif if (recursive) { return 0; @@ -90,39 +67,90 @@ int GET_STACK_TRACE_OR_FRAMES { unw_getcontext(&uc); int ret = unw_init_local(&cursor, &uc); assert(ret >= 0); - skip_count++; // Do not include current frame - - while (skip_count--) { - if (unw_step(&cursor) <= 0) { - goto out; - } -#if IS_STACK_FRAMES - if (unw_get_reg(&cursor, UNW_REG_SP, &next_sp)) { - goto out; - } -#endif - } + skip_count++; // Do not include the "GetStackTrace" frame while (n < max_depth) { if (unw_get_reg(&cursor, UNW_REG_IP, (unw_word_t *) &ip) < 0) { break; } -#if IS_STACK_FRAMES - sizes[n] = 0; -#endif - result[n++] = ip; + if (skip_count > 0) { + skip_count--; + } else { + result[n++] = ip; + } if (unw_step(&cursor) <= 0) { break; } -#if IS_STACK_FRAMES + } + --recursive; + return n; +} + +// If you change this function, also change GetStackTrace above: +// +// This GetStackFrames routine shares a lot of code with GetStackTrace +// above. This code could have been refactored into a common routine, +// and then both GetStackTrace/GetStackFrames could call that routine. +// There are two problems with that: +// +// (1) The performance of the refactored-code suffers substantially - the +// refactored needs to be able to record the stack trace when called +// from GetStackTrace, and both the stack trace and stack frame sizes, +// when called from GetStackFrames - this introduces enough new +// conditionals that GetStackTrace performance can degrade by as much +// as 50%. +// +// (2) Whether the refactored routine gets inlined into GetStackTrace and +// GetStackFrames depends on the compiler, and we can't guarantee the +// behavior either-way, even with "__attribute__ ((always_inline))" +// or "__attribute__ ((noinline))". But we need this guarantee or the +// frame counts may be off by one. +// +// Both (1) and (2) can be addressed without this code duplication, by +// clever use of template functions, and by defining GetStackTrace and +// GetStackFrames as macros that expand to these template functions. +// However, this approach comes with its own set of problems - namely, +// macros and preprocessor trouble - for example, if GetStackTrace +// and/or GetStackFrames is ever defined as a member functions in some +// class, we are in trouble. +int GetStackFrames(void** pcs, int* sizes, int max_depth, int skip_count) { + void *ip; + int n = 0; + unw_cursor_t cursor; + unw_context_t uc; + unw_word_t sp = 0, next_sp = 0; + + if (recursive) { + return 0; + } + ++recursive; + + unw_getcontext(&uc); + RAW_CHECK(unw_init_local(&cursor, &uc) >= 0, "unw_init_local failed"); + skip_count++; // Do not include the "GetStackFrames" frame + + while (skip_count--) { + if (unw_step(&cursor) <= 0 || + unw_get_reg(&cursor, UNW_REG_SP, &next_sp) < 0) { + goto out; + } + } + while (n < max_depth) { sp = next_sp; - if (unw_get_reg(&cursor, UNW_REG_SP, &next_sp) , 0) { + if (unw_get_reg(&cursor, UNW_REG_IP, (unw_word_t *) &ip) < 0) + break; + if (unw_step(&cursor) <= 0 || + unw_get_reg(&cursor, UNW_REG_SP, &next_sp)) { + // We couldn't step any further (possibly because we reached _start). + // Provide the last good PC we've got, and get out. + sizes[n] = 0; + pcs[n++] = ip; break; } - sizes[n - 1] = next_sp - sp; -#endif + sizes[n] = next_sp - sp; + pcs[n++] = ip; } -out: + out: --recursive; return n; } diff --git a/third_party/tcmalloc/chromium/src/stacktrace_powerpc-inl.h b/third_party/tcmalloc/chromium/src/stacktrace_powerpc-inl.h index 9a07eea..5631e49 100644 --- a/third_party/tcmalloc/chromium/src/stacktrace_powerpc-inl.h +++ b/third_party/tcmalloc/chromium/src/stacktrace_powerpc-inl.h @@ -36,11 +36,6 @@ // http://www.linux-foundation.org/spec/ELF/ppc64/PPC-elf64abi-1.9.html#STACK // Linux has similar code: http://patchwork.ozlabs.org/linuxppc/patch?id=8882 -#ifndef BASE_STACKTRACE_POWERPC_INL_H_ -#define BASE_STACKTRACE_POWERPC_INL_H_ -// Note: this file is included into stacktrace.cc more than once. -// Anything that should only be defined once should be here: - #include <stdint.h> // for uintptr_t #include <stdlib.h> // for NULL #include <google/stacktrace.h> @@ -76,23 +71,9 @@ static void **NextStackFrame(void **old_sp) { // This ensures that GetStackTrace stes up the Link Register properly. void StacktracePowerPCDummyFunction() __attribute__((noinline)); void StacktracePowerPCDummyFunction() { __asm__ volatile(""); } -#endif // BASE_STACKTRACE_POWERPC_INL_H_ - -// Note: this part of the file is included several times. -// Do not put globals below. -// The following 4 functions are generated from the code below: -// GetStack{Trace,Frames}() -// GetStack{Trace,Frames}WithContext() -// -// These functions take the following args: -// void** result: the stack-trace, as an array -// int* sizes: the size of each stack frame, as an array -// (GetStackFrames* only) -// int max_depth: the size of the result (and sizes) array(s) -// int skip_count: how many stack pointers to skip before storing in result -// void* ucp: a ucontext_t* (GetStack{Trace,Frames}WithContext only) -int GET_STACK_TRACE_OR_FRAMES { +// If you change this function, also change GetStackFrames below. +int GetStackTrace(void** result, int max_depth, int skip_count) { void **sp; // Apple OS X uses an old version of gnu as -- both Darwin 7.9.0 (Panther) // and Darwin 8.8.1 (Tiger) use as 1.38. This means we have to use a @@ -114,29 +95,11 @@ int GET_STACK_TRACE_OR_FRAMES { // This routine forces the compiler (at least gcc) to push it anyway. StacktracePowerPCDummyFunction(); -#if IS_STACK_FRAMES - // Note we do *not* increment skip_count here for the SYSV ABI. If - // we did, the list of stack frames wouldn't properly match up with - // the list of return addresses. Note this means the top pc entry - // is probably bogus for linux/ppc (and other SYSV-ABI systems). -#else // The LR save area is used by the callee, so the top entry is bogus. skip_count++; -#endif int n = 0; while (sp && n < max_depth) { -#if IS_STACK_FRAMES - // The GetStackFrames routine is called when we are in some - // informational context (the failure signal handler for example). - // Use the non-strict unwinding rules to produce a stack trace - // that is as complete as possible (even if it contains a few bogus - // entries in some rare cases). - void **next_sp = NextStackFrame<false>(sp); -#else - void **next_sp = NextStackFrame<true>(sp); -#endif - if (skip_count > 0) { skip_count--; } else { @@ -157,15 +120,85 @@ int GET_STACK_TRACE_OR_FRAMES { #else #error Need to specify the PPC ABI for your archiecture. #endif + } + // Use strict unwinding rules. + sp = NextStackFrame<true>(sp); + } + return n; +} + +// If you change this function, also change GetStackTrace above: +// +// This GetStackFrames routine shares a lot of code with GetStackTrace +// above. This code could have been refactored into a common routine, +// and then both GetStackTrace/GetStackFrames could call that routine. +// There are two problems with that: +// +// (1) The performance of the refactored-code suffers substantially - the +// refactored needs to be able to record the stack trace when called +// from GetStackTrace, and both the stack trace and stack frame sizes, +// when called from GetStackFrames - this introduces enough new +// conditionals that GetStackTrace performance can degrade by as much +// as 50%. +// +// (2) Whether the refactored routine gets inlined into GetStackTrace and +// GetStackFrames depends on the compiler, and we can't guarantee the +// behavior either-way, even with "__attribute__ ((always_inline))" +// or "__attribute__ ((noinline))". But we need this guarantee or the +// frame counts may be off by one. +// +// Both (1) and (2) can be addressed without this code duplication, by +// clever use of template functions, and by defining GetStackTrace and +// GetStackFrames as macros that expand to these template functions. +// However, this approach comes with its own set of problems - namely, +// macros and preprocessor trouble - for example, if GetStackTrace +// and/or GetStackFrames is ever defined as a member functions in some +// class, we are in trouble. +int GetStackFrames(void** pcs, int *sizes, int max_depth, int skip_count) { + void **sp; +#ifdef __APPLE__ + __asm__ volatile ("mr %0,r1" : "=r" (sp)); +#else + __asm__ volatile ("mr %0,1" : "=r" (sp)); +#endif -#if IS_STACK_FRAME + StacktracePowerPCDummyFunction(); + // Note we do *not* increment skip_count here for the SYSV ABI. If + // we did, the list of stack frames wouldn't properly match up with + // the list of return addresses. Note this means the top pc entry + // is probably bogus for linux/ppc (and other SYSV-ABI systems). + + int n = 0; + while (sp && n < max_depth) { + // The GetStackFrames routine is called when we are in some + // informational context (the failure signal handler for example). + // Use the non-strict unwinding rules to produce a stack trace + // that is as complete as possible (even if it contains a few bogus + // entries in some rare cases). + void **next_sp = NextStackFrame<false>(sp); + if (skip_count > 0) { + skip_count--; + } else { +#if defined(_CALL_AIX) || defined(_CALL_DARWIN) + pcs[n++] = *(sp+2); +#elif defined(_CALL_SYSV) + pcs[n++] = *(sp+1); +#elif defined(__APPLE__) || (defined(__linux) && defined(__PPC64__)) + // This check is in case the compiler doesn't define _CALL_AIX/etc. + pcs[n++] = *(sp+2); +#elif defined(__linux) + // This check is in case the compiler doesn't define _CALL_SYSV. + pcs[n++] = *(sp+1); +#else +#error Need to specify the PPC ABI for your archiecture. +#endif if (next_sp > sp) { sizes[n] = (uintptr_t)next_sp - (uintptr_t)sp; } else { // A frame-size of 0 is used to indicate unknown frame size. sizes[n] = 0; } -#endif + n++; } sp = next_sp; } diff --git a/third_party/tcmalloc/chromium/src/stacktrace_win32-inl.h b/third_party/tcmalloc/chromium/src/stacktrace_win32-inl.h index bbd4c43..892cd7c 100644 --- a/third_party/tcmalloc/chromium/src/stacktrace_win32-inl.h +++ b/third_party/tcmalloc/chromium/src/stacktrace_win32-inl.h @@ -49,11 +49,6 @@ // This code is inspired by a patch from David Vitek: // http://code.google.com/p/google-perftools/issues/detail?id=83 -#ifndef BASE_STACKTRACE_WIN32_INL_H_ -#define BASE_STACKTRACE_WIN32_INL_H_ -// Note: this file is included into stacktrace.cc more than once. -// Anything that should only be defined once should be here: - #include "config.h" #include <windows.h> // for GetProcAddress and GetModuleHandle #include <assert.h> @@ -87,5 +82,3 @@ PERFTOOLS_DLL_DECL int GetStackFrames(void** /* pcs */, assert(0 == "Not yet implemented"); return 0; } - -#endif // BASE_STACKTRACE_WIN32_INL_H_ diff --git a/third_party/tcmalloc/chromium/src/stacktrace_x86-inl.h b/third_party/tcmalloc/chromium/src/stacktrace_x86-inl.h index 6753fdb..05701e7 100644 --- a/third_party/tcmalloc/chromium/src/stacktrace_x86-inl.h +++ b/third_party/tcmalloc/chromium/src/stacktrace_x86-inl.h @@ -31,13 +31,17 @@ // Author: Sanjay Ghemawat // // Produce stack trace - -#ifndef BASE_STACKTRACE_X86_INL_H_ -#define BASE_STACKTRACE_X86_INL_H_ -// Note: this file is included into stacktrace.cc more than once. -// Anything that should only be defined once should be here: +// +// NOTE: there is code duplication between +// GetStackTrace, GetStackTraceWithContext, GetStackFrames and +// GetStackFramesWithContext. If you update one, update them all. +// +// There is no easy way to avoid this, because inlining +// interferes with skip_count, and there is no portable +// way to turn inlining off, or force it always on. #include "config.h" + #include <stdlib.h> // for NULL #include <assert.h> #if defined(HAVE_SYS_UCONTEXT_H) @@ -186,8 +190,8 @@ static void **NextStackFrame(void **old_sp, const void *uc) { const ucontext_t *ucv = static_cast<const ucontext_t *>(uc); // This kernel does not use frame pointer in its VDSO code, // and so %ebp is not suitable for unwinding. - void **const reg_ebp = - reinterpret_cast<void **>(ucv->uc_mcontext.gregs[REG_EBP]); + const void **const reg_ebp = + reinterpret_cast<const void **>(ucv->uc_mcontext.gregs[REG_EBP]); const unsigned char *const reg_eip = reinterpret_cast<unsigned char *>(ucv->uc_mcontext.gregs[REG_EIP]); if (new_sp == reg_ebp && @@ -265,24 +269,209 @@ static void **NextStackFrame(void **old_sp, const void *uc) { return new_sp; } -#endif // BASE_STACKTRACE_X86_INL_H_ +// If you change this function, see NOTE at the top of file. +// Same as above, but with signal ucontext_t pointer. +int GetStackTraceWithContext(void** result, + int max_depth, + int skip_count, + const void *uc) { + void **sp; +#if (__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 2) || __llvm__ + // __builtin_frame_address(0) can return the wrong address on gcc-4.1.0-k8. + // It's always correct on llvm, and the techniques below aren't (in + // particular, llvm-gcc will make a copy of pcs, so it's not in sp[2]), + // so we also prefer __builtin_frame_address when running under llvm. + sp = reinterpret_cast<void**>(__builtin_frame_address(0)); +#elif defined(__i386__) + // Stack frame format: + // sp[0] pointer to previous frame + // sp[1] caller address + // sp[2] first argument + // ... + // NOTE: This will break under llvm, since result is a copy and not in sp[2] + sp = (void **)&result - 2; +#elif defined(__x86_64__) + unsigned long rbp; + // Move the value of the register %rbp into the local variable rbp. + // We need 'volatile' to prevent this instruction from getting moved + // around during optimization to before function prologue is done. + // An alternative way to achieve this + // would be (before this __asm__ instruction) to call Noop() defined as + // static void Noop() __attribute__ ((noinline)); // prevent inlining + // static void Noop() { asm(""); } // prevent optimizing-away + __asm__ volatile ("mov %%rbp, %0" : "=r" (rbp)); + // Arguments are passed in registers on x86-64, so we can't just + // offset from &result + sp = (void **) rbp; +#else +# error Using stacktrace_x86-inl.h on a non x86 architecture! +#endif + + int n = 0; + while (sp && n < max_depth) { + if (*(sp+1) == reinterpret_cast<void *>(0)) { + // In 64-bit code, we often see a frame that + // points to itself and has a return address of 0. + break; + } + if (skip_count > 0) { + skip_count--; + } else { + result[n++] = *(sp+1); + } + // Use strict unwinding rules. + sp = NextStackFrame<true, true>(sp, uc); + } + return n; +} + +int GetStackTrace(void** result, int max_depth, int skip_count) { + void **sp; +#if (__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 2) || __llvm__ + // __builtin_frame_address(0) can return the wrong address on gcc-4.1.0-k8. + // It's always correct on llvm, and the techniques below aren't (in + // particular, llvm-gcc will make a copy of pcs, so it's not in sp[2]), + // so we also prefer __builtin_frame_address when running under llvm. + sp = reinterpret_cast<void**>(__builtin_frame_address(0)); +#elif defined(__i386__) + // Stack frame format: + // sp[0] pointer to previous frame + // sp[1] caller address + // sp[2] first argument + // ... + // NOTE: This will break under llvm, since result is a copy and not in sp[2] + sp = (void **)&result - 2; +#elif defined(__x86_64__) + unsigned long rbp; + // Move the value of the register %rbp into the local variable rbp. + // We need 'volatile' to prevent this instruction from getting moved + // around during optimization to before function prologue is done. + // An alternative way to achieve this + // would be (before this __asm__ instruction) to call Noop() defined as + // static void Noop() __attribute__ ((noinline)); // prevent inlining + // static void Noop() { asm(""); } // prevent optimizing-away + __asm__ volatile ("mov %%rbp, %0" : "=r" (rbp)); + // Arguments are passed in registers on x86-64, so we can't just + // offset from &result + sp = (void **) rbp; +#else +# error Using stacktrace_x86-inl.h on a non x86 architecture! +#endif -// Note: this part of the file is included several times. -// Do not put globals below. + int n = 0; + while (sp && n < max_depth) { + if (*(sp+1) == reinterpret_cast<void *>(0)) { + // In 64-bit code, we often see a frame that + // points to itself and has a return address of 0. + break; + } + if (skip_count > 0) { + skip_count--; + } else { + result[n++] = *(sp+1); + } + // Use strict unwinding rules. + sp = NextStackFrame<true, false>(sp, NULL); + } + return n; +} -// The following 4 functions are generated from the code below: -// GetStack{Trace,Frames}() -// GetStack{Trace,Frames}WithContext() +// If you change this function, see NOTE at the top of file. +// +// This GetStackFrames routine shares a lot of code with GetStackTrace +// above. This code could have been refactored into a common routine, +// and then both GetStackTrace/GetStackFrames could call that routine. +// There are two problems with that: // -// These functions take the following args: -// void** result: the stack-trace, as an array -// int* sizes: the size of each stack frame, as an array -// (GetStackFrames* only) -// int max_depth: the size of the result (and sizes) array(s) -// int skip_count: how many stack pointers to skip before storing in result -// void* ucp: a ucontext_t* (GetStack{Trace,Frames}WithContext only) +// (1) The performance of the refactored-code suffers substantially - the +// refactored needs to be able to record the stack trace when called +// from GetStackTrace, and both the stack trace and stack frame sizes, +// when called from GetStackFrames - this introduces enough new +// conditionals that GetStackTrace performance can degrade by as much +// as 50%. +// +// (2) Whether the refactored routine gets inlined into GetStackTrace and +// GetStackFrames depends on the compiler, and we can't guarantee the +// behavior either-way, even with "__attribute__ ((always_inline))" +// or "__attribute__ ((noinline))". But we need this guarantee or the +// frame counts may be off by one. +// +// Both (1) and (2) can be addressed without this code duplication, by +// clever use of template functions, and by defining GetStackTrace and +// GetStackFrames as macros that expand to these template functions. +// However, this approach comes with its own set of problems - namely, +// macros and preprocessor trouble - for example, if GetStackTrace +// and/or GetStackFrames is ever defined as a member functions in some +// class, we are in trouble. +int GetStackFrames(void** pcs, int* sizes, int max_depth, int skip_count) { + void **sp; +#if (__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 2) || __llvm__ + // __builtin_frame_address(0) can return the wrong address on gcc-4.1.0-k8. + // It's always correct on llvm, and the techniques below aren't (in + // particular, llvm-gcc will make a copy of pcs, so it's not in sp[2]), + // so we also prefer __builtin_frame_address when running under llvm. + sp = reinterpret_cast<void**>(__builtin_frame_address(0)); +#elif defined(__i386__) + // Stack frame format: + // sp[0] pointer to previous frame + // sp[1] caller address + // sp[2] first argument + // ... + sp = (void **)&pcs - 2; +#elif defined(__x86_64__) + unsigned long rbp; + // Move the value of the register %rbp into the local variable rbp. + // We need 'volatile' to prevent this instruction from getting moved + // around during optimization to before function prologue is done. + // An alternative way to achieve this + // would be (before this __asm__ instruction) to call Noop() defined as + // static void Noop() __attribute__ ((noinline)); // prevent inlining + // static void Noop() { asm(""); } // prevent optimizing-away + __asm__ volatile ("mov %%rbp, %0" : "=r" (rbp)); + // Arguments are passed in registers on x86-64, so we can't just + // offset from &result + sp = (void **) rbp; +#else +# error Using stacktrace_x86-inl.h on a non x86 architecture! +#endif + + int n = 0; + while (sp && n < max_depth) { + if (*(sp+1) == reinterpret_cast<void *>(0)) { + // In 64-bit code, we often see a frame that + // points to itself and has a return address of 0. + break; + } + // The GetStackFrames routine is called when we are in some + // informational context (the failure signal handler for example). + // Use the non-strict unwinding rules to produce a stack trace + // that is as complete as possible (even if it contains a few bogus + // entries in some rare cases). + void **next_sp = NextStackFrame<false, false>(sp, NULL); + if (skip_count > 0) { + skip_count--; + } else { + pcs[n] = *(sp+1); + if (next_sp > sp) { + sizes[n] = (uintptr_t)next_sp - (uintptr_t)sp; + } else { + // A frame-size of 0 is used to indicate unknown frame size. + sizes[n] = 0; + } + n++; + } + sp = next_sp; + } + return n; +} -int GET_STACK_TRACE_OR_FRAMES { +// If you change this function, see NOTE at the top of file. +// Same as above, but with signal ucontext_t pointer. +int GetStackFramesWithContext(void** pcs, + int* sizes, + int max_depth, + int skip_count, + const void *uc) { void **sp; #if (__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 2) || __llvm__ // __builtin_frame_address(0) can return the wrong address on gcc-4.1.0-k8. @@ -322,22 +511,22 @@ int GET_STACK_TRACE_OR_FRAMES { // points to itself and has a return address of 0. break; } -#if !IS_WITH_CONTEXT - const void *const ucp = NULL; -#endif - void **next_sp = NextStackFrame<!IS_STACK_FRAMES, IS_WITH_CONTEXT>(sp, ucp); + // The GetStackFrames routine is called when we are in some + // informational context (the failure signal handler for example). + // Use the non-strict unwinding rules to produce a stack trace + // that is as complete as possible (even if it contains a few bogus + // entries in some rare cases). + void **next_sp = NextStackFrame<false, true>(sp, uc); if (skip_count > 0) { skip_count--; } else { - result[n] = *(sp+1); -#if IS_STACK_FRAMES + pcs[n] = *(sp+1); if (next_sp > sp) { sizes[n] = (uintptr_t)next_sp - (uintptr_t)sp; } else { // A frame-size of 0 is used to indicate unknown frame size. sizes[n] = 0; } -#endif n++; } sp = next_sp; diff --git a/third_party/tcmalloc/chromium/src/symbolize.cc b/third_party/tcmalloc/chromium/src/symbolize.cc index ff45e3e..9dd890e 100644 --- a/third_party/tcmalloc/chromium/src/symbolize.cc +++ b/third_party/tcmalloc/chromium/src/symbolize.cc @@ -87,40 +87,16 @@ int SymbolTable::Symbolize() { #else // All this work is to do two-way communication. ugh. extern char* program_invocation_name; // gcc provides this - int *child_in = NULL; // file descriptors - int *child_out = NULL; // for now, we don't worry about child_err - int child_fds[5][2]; // socketpair may be called up to five times below - - // The client program may close its stdin and/or stdout and/or stderr - // thus allowing socketpair to reuse file descriptors 0, 1 or 2. - // In this case the communication between the forked processes may be broken - // if either the parent or the child tries to close or duplicate these - // descriptors. The loop below produces two pairs of file descriptors, each - // greater than 2 (stderr). - for (int i = 0; i < 5; i++) { - if (socketpair(AF_UNIX, SOCK_STREAM, 0, child_fds[i]) == -1) { - for (int j = 0; j < i; j++) { - close(child_fds[j][0]); - close(child_fds[j][1]); - return 0; - } - } else { - if ((child_fds[i][0] > 2) && (child_fds[i][1] > 2)) { - if (child_in == NULL) { - child_in = child_fds[i]; - } else { - child_out = child_fds[i]; - for (int j = 0; j < i; j++) { - if (child_fds[j] == child_in) continue; - close(child_fds[j][0]); - close(child_fds[j][1]); - } - break; - } - } - } + int child_in[2]; // file descriptors + int child_out[2]; // for now, we don't worry about child_err + if (socketpair(AF_UNIX, SOCK_STREAM, 0, child_in) == -1) { + return 0; + } + if (socketpair(AF_UNIX, SOCK_STREAM, 0, child_out) == -1) { + close(child_in[0]); + close(child_in[1]); + return 0; } - switch (fork()) { case -1: { // error close(child_in[0]); diff --git a/third_party/tcmalloc/chromium/src/system-alloc.cc b/third_party/tcmalloc/chromium/src/system-alloc.cc index 29bed80..21d9b43 100644 --- a/third_party/tcmalloc/chromium/src/system-alloc.cc +++ b/third_party/tcmalloc/chromium/src/system-alloc.cc @@ -78,7 +78,7 @@ union MemoryAligner { void* p; double d; size_t s; -} CACHELINE_ALIGNED; +}; static SpinLock spinlock(SpinLock::LINKER_INITIALIZED); @@ -150,10 +150,6 @@ bool RegisterSystemAllocator(SysAllocator *a, int priority) { void* SbrkSysAllocator::Alloc(size_t size, size_t *actual_size, size_t alignment) { -#ifndef HAVE_SBRK - failed_ = true; - return NULL; -#else // Check if we should use sbrk allocation. // FLAGS_malloc_skip_sbrk starts out as false (its uninitialized // state) and eventually gets initialized to the specified value. Note @@ -168,16 +164,16 @@ void* SbrkSysAllocator::Alloc(size_t size, size_t *actual_size, // a strict check here if (static_cast<ptrdiff_t>(size + alignment) < 0) return NULL; - // This doesn't overflow because TCMalloc_SystemAlloc has already - // tested for overflow at the alignment boundary. - size = ((size + alignment - 1) / alignment) * alignment; - - // "actual_size" indicates that the bytes from the returned pointer - // p up to and including (p + actual_size - 1) have been allocated. + // could theoretically return the "extra" bytes here, but this + // is simple and correct. if (actual_size) { *actual_size = size; } + // This doesn't overflow because TCMalloc_SystemAlloc has already + // tested for overflow at the alignment boundary. + size = ((size + alignment - 1) / alignment) * alignment; + // Check that we we're not asking for so much more memory that we'd // wrap around the end of the virtual address space. (This seems // like something sbrk() should check for us, and indeed opensolaris @@ -220,7 +216,6 @@ void* SbrkSysAllocator::Alloc(size_t size, size_t *actual_size, ptr += alignment - (ptr & (alignment-1)); } return reinterpret_cast<void*>(ptr); -#endif // HAVE_SBRK } void SbrkSysAllocator::DumpStats(TCMalloc_Printer* printer) { @@ -243,6 +238,12 @@ void* MmapSysAllocator::Alloc(size_t size, size_t *actual_size, return NULL; } + // could theoretically return the "extra" bytes here, but this + // is simple and correct. + if (actual_size) { + *actual_size = size; + } + // Enforce page alignment if (pagesize == 0) pagesize = getpagesize(); if (alignment < pagesize) alignment = pagesize; @@ -252,12 +253,6 @@ void* MmapSysAllocator::Alloc(size_t size, size_t *actual_size, } size = aligned_size; - // "actual_size" indicates that the bytes from the returned pointer - // p up to and including (p + actual_size - 1) have been allocated. - if (actual_size) { - *actual_size = size; - } - // Ask for extra memory if alignment > pagesize size_t extra = 0; if (alignment > pagesize) { @@ -333,6 +328,12 @@ void* DevMemSysAllocator::Alloc(size_t size, size_t *actual_size, initialized = true; } + // could theoretically return the "extra" bytes here, but this + // is simple and correct. + if (actual_size) { + *actual_size = size; + } + // Enforce page alignment if (pagesize == 0) pagesize = getpagesize(); if (alignment < pagesize) alignment = pagesize; @@ -342,12 +343,6 @@ void* DevMemSysAllocator::Alloc(size_t size, size_t *actual_size, } size = aligned_size; - // "actual_size" indicates that the bytes from the returned pointer - // p up to and including (p + actual_size - 1) have been allocated. - if (actual_size) { - *actual_size = size; - } - // Ask for extra memory if alignment > pagesize size_t extra = 0; if (alignment > pagesize) { diff --git a/third_party/tcmalloc/chromium/src/system-alloc.h b/third_party/tcmalloc/chromium/src/system-alloc.h index 8d982ef..60affed 100644 --- a/third_party/tcmalloc/chromium/src/system-alloc.h +++ b/third_party/tcmalloc/chromium/src/system-alloc.h @@ -48,11 +48,7 @@ // may optionally return more bytes than asked for (i.e. return an // entire "huge" page if a huge page allocator is in use). // -// The returned pointer is a multiple of "alignment" if non-zero. The -// returned pointer will always be aligned suitably for holding a -// void*, double, or size_t. In addition, if this platform defines -// CACHELINE_ALIGNED, the return pointer will always be cacheline -// aligned. +// The returned pointer is a multiple of "alignment" if non-zero. // // Returns NULL when out of memory. extern void* TCMalloc_SystemAlloc(size_t bytes, size_t *actual_bytes, diff --git a/third_party/tcmalloc/chromium/src/tcmalloc.cc b/third_party/tcmalloc/chromium/src/tcmalloc.cc index 79825ce..6acead8 100644 --- a/third_party/tcmalloc/chromium/src/tcmalloc.cc +++ b/third_party/tcmalloc/chromium/src/tcmalloc.cc @@ -228,9 +228,8 @@ extern "C" { ATTRIBUTE_SECTION(google_malloc); void* tc_newarray_nothrow(size_t size, const std::nothrow_t&) __THROW ATTRIBUTE_SECTION(google_malloc); - // Surprisingly, standard C++ library implementations use a - // nothrow-delete internally. See, eg: - // http://www.dinkumware.com/manuals/?manual=compleat&page=new.html + // Surprisingly, compilers use a nothrow-delete internally. See, eg: + // http://www.dinkumware.com/manuals/?manual=compleat&page=new.html void tc_delete_nothrow(void* ptr, const std::nothrow_t&) __THROW ATTRIBUTE_SECTION(google_malloc); void tc_deletearray_nothrow(void* ptr, const std::nothrow_t&) __THROW @@ -254,9 +253,9 @@ extern "C" { // NOTE: we make many of these symbols weak, but do so in the makefile // (via objcopy -W) and not here. That ends up being more portable. # define ALIAS(x) __attribute__ ((alias (x))) -void* operator new(size_t size) throw (std::bad_alloc) ALIAS("tc_new"); +void* operator new(size_t size) ALIAS("tc_new"); void operator delete(void* p) __THROW ALIAS("tc_delete"); -void* operator new[](size_t size) throw (std::bad_alloc) ALIAS("tc_newarray"); +void* operator new[](size_t size) ALIAS("tc_newarray"); void operator delete[](void* p) __THROW ALIAS("tc_deletearray"); void* operator new(size_t size, const std::nothrow_t&) __THROW ALIAS("tc_new_nothrow"); @@ -265,7 +264,7 @@ void* operator new[](size_t size, const std::nothrow_t&) __THROW void operator delete(void* size, const std::nothrow_t&) __THROW ALIAS("tc_delete_nothrow"); void operator delete[](void* size, const std::nothrow_t&) __THROW - ALIAS("tc_deletearray_nothrow"); + ALIAS("tc_deletearray_nothrow"); extern "C" { void* malloc(size_t size) __THROW ALIAS("tc_malloc"); void free(void* ptr) __THROW ALIAS("tc_free"); @@ -805,17 +804,7 @@ TCMallocGuard::TCMallocGuard() { tc_free(tc_malloc(1)); ThreadCache::InitTSD(); tc_free(tc_malloc(1)); - // Either we, or debugallocation.cc, or valgrind will control memory - // management. We register our extension if we're the winner. -#ifdef TCMALLOC_FOR_DEBUGALLOCATION - // Let debugallocation register its extension. -#else - if (RunningOnValgrind()) { - // Let Valgrind uses its own malloc (so don't register our extension). - } else { - MallocExtension::Register(new TCMallocImplementation); - } -#endif + MallocExtension::Register(new TCMallocImplementation); } } @@ -837,28 +826,7 @@ static TCMallocGuard module_enter_exit_hook; // Helpers for the exported routines below //------------------------------------------------------------------- -static inline void* CheckedMallocResult(void *result) { - Span* fetched_span; - size_t cl; - - if (result != NULL) { - ASSERT(Static::pageheap()->GetSizeClassOrSpan(result, &cl, &fetched_span)); - } - - return result; -} - -static inline void* SpanToMallocResult(Span *span) { - Span* fetched_span = NULL; - size_t cl = 0; - ASSERT(Static::pageheap()->GetSizeClassOrSpan(span->start_ptr(), - &cl, &fetched_span)); - ASSERT(cl == kLargeSizeClass); - ASSERT(span == fetched_span); - return span->start_ptr(); -} - -static void* DoSampledAllocation(size_t size) { +static Span* DoSampledAllocation(size_t size) { // Grab the stack trace outside the heap lock StackTrace tmp; tmp.depth = GetStackTrace(tmp.stack, tcmalloc::kMaxStackDepth, 1); @@ -866,8 +834,7 @@ static void* DoSampledAllocation(size_t size) { SpinLockHolder h(Static::pageheap_lock()); // Allocate span - Span *span = Static::pageheap()->New(tcmalloc::pages(size == 0 ? 1 : size), - kLargeSizeClass, kPageSize); + Span *span = Static::pageheap()->New(tcmalloc::pages(size == 0 ? 1 : size)); if (span == NULL) { return NULL; } @@ -884,7 +851,26 @@ static void* DoSampledAllocation(size_t size) { span->objects = stack; tcmalloc::DLL_Prepend(Static::sampled_objects(), span); - return SpanToMallocResult(span); + return span; +} + +static inline bool CheckCachedSizeClass(void *ptr) { + PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift; + size_t cached_value = Static::pageheap()->GetSizeClassIfCached(p); + return cached_value == 0 || + cached_value == Static::pageheap()->GetDescriptor(p)->sizeclass; +} + +static inline void* CheckedMallocResult(void *result) +{ + ASSERT(result == 0 || CheckCachedSizeClass(result)); + return result; +} + +static inline void* SpanToMallocResult(Span *span) { + Static::pageheap()->CacheSizeClass(span->start, 0); + return + CheckedMallocResult(reinterpret_cast<void*>(span->start << kPageShift)); } // Copy of FLAGS_tcmalloc_large_alloc_report_threshold with @@ -930,39 +916,24 @@ inline void* do_memalign_or_cpp_memalign(size_t align, size_t size) { return tc_new_mode ? cpp_memalign(align, size) : do_memalign(align, size); } -// Must be called with the page lock held. -inline bool should_report_large(Length num_pages) { - const int64 threshold = large_alloc_threshold; - if (threshold > 0 && num_pages >= (threshold >> kPageShift)) { - // Increase the threshold by 1/8 every time we generate a report. - // We cap the threshold at 8GB to avoid overflow problems. - large_alloc_threshold = (threshold + threshold/8 < 8ll<<30 - ? threshold + threshold/8 : 8ll<<30); - return true; - } - return false; -} - // Helper for do_malloc(). -inline void* do_malloc_pages(ThreadCache* heap, size_t size) { - void* result; - bool report_large; - - Length num_pages = tcmalloc::pages(size); - size = num_pages << kPageShift; - - if ((FLAGS_tcmalloc_sample_parameter > 0) && heap->SampleAllocation(size)) { - result = DoSampledAllocation(size); - - SpinLockHolder h(Static::pageheap_lock()); - report_large = should_report_large(num_pages); - } else { +inline void* do_malloc_pages(Length num_pages) { + Span *span; + bool report_large = false; + { SpinLockHolder h(Static::pageheap_lock()); - Span* span = Static::pageheap()->New(num_pages, kLargeSizeClass, kPageSize); - result = (span == NULL ? NULL : SpanToMallocResult(span)); - report_large = should_report_large(num_pages); + span = Static::pageheap()->New(num_pages); + const int64 threshold = large_alloc_threshold; + if (threshold > 0 && num_pages >= (threshold >> kPageShift)) { + // Increase the threshold by 1/8 every time we generate a report. + // We cap the threshold at 8GB to avoid overflow problems. + large_alloc_threshold = (threshold + threshold/8 < 8ll<<30 + ? threshold + threshold/8 : 8ll<<30); + report_large = true; + } } + void* result = (span == NULL ? NULL : SpanToMallocResult(span)); if (report_large) { ReportLargeAlloc(num_pages, result); } @@ -974,19 +945,17 @@ inline void* do_malloc(size_t size) { // The following call forces module initialization ThreadCache* heap = ThreadCache::GetCache(); - if (size <= kMaxSize) { - size_t cl = Static::sizemap()->SizeClass(size); - size = Static::sizemap()->class_to_size(cl); - - if ((FLAGS_tcmalloc_sample_parameter > 0) && heap->SampleAllocation(size)) { - ret = DoSampledAllocation(size); - } else { - // The common case, and also the simplest. This just pops the - // size-appropriate freelist, after replenishing it if it's empty. - ret = CheckedMallocResult(heap->Allocate(size, cl)); + if ((FLAGS_tcmalloc_sample_parameter > 0) && heap->SampleAllocation(size)) { + Span* span = DoSampledAllocation(size); + if (span != NULL) { + ret = SpanToMallocResult(span); } + } else if (size <= kMaxSize) { + // The common case, and also the simplest. This just pops the + // size-appropriate freelist, after replenishing it if it's empty. + ret = CheckedMallocResult(heap->Allocate(size)); } else { - ret = do_malloc_pages(heap, size); + ret = do_malloc_pages(tcmalloc::pages(size)); } if (ret == NULL) errno = ENOMEM; return ret; @@ -1014,22 +983,28 @@ static inline ThreadCache* GetCacheIfPresent() { inline void do_free_with_callback(void* ptr, void (*invalid_free_fn)(void*)) { if (ptr == NULL) return; ASSERT(Static::pageheap() != NULL); // Should not call free() before malloc() - Span* span; - size_t cl; - - if (!Static::pageheap()->GetSizeClassOrSpan(ptr, &cl, &span)) { - // result can be false because the pointer passed in is invalid - // (not something returned by malloc or friends), or because the - // pointer was allocated with some other allocator besides - // tcmalloc. The latter can happen if tcmalloc is linked in via - // a dynamic library, but is not listed last on the link line. - // In that case, libraries after it on the link line will - // allocate with libc malloc, but free with tcmalloc's free. - (*invalid_free_fn)(ptr); // Decide how to handle the bad free request - return; + const PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift; + Span* span = NULL; + size_t cl = Static::pageheap()->GetSizeClassIfCached(p); + + if (cl == 0) { + span = Static::pageheap()->GetDescriptor(p); + if (!span) { + // span can be NULL because the pointer passed in is invalid + // (not something returned by malloc or friends), or because the + // pointer was allocated with some other allocator besides + // tcmalloc. The latter can happen if tcmalloc is linked in via + // a dynamic library, but is not listed last on the link line. + // In that case, libraries after it on the link line will + // allocate with libc malloc, but free with tcmalloc's free. + (*invalid_free_fn)(ptr); // Decide how to handle the bad free request + return; + } + cl = span->sizeclass; + Static::pageheap()->CacheSizeClass(p, cl); } - - if (cl != kLargeSizeClass) { + if (cl != 0) { + ASSERT(!Static::pageheap()->GetDescriptor(p)->sample); ThreadCache* heap = GetCacheIfPresent(); if (heap != NULL) { heap->Deallocate(ptr, cl); @@ -1040,7 +1015,8 @@ inline void do_free_with_callback(void* ptr, void (*invalid_free_fn)(void*)) { } } else { SpinLockHolder h(Static::pageheap_lock()); - ASSERT(span != NULL && ptr == span->start_ptr()); + ASSERT(reinterpret_cast<uintptr_t>(ptr) % kPageSize == 0); + ASSERT(span != NULL && span->start == p); if (span->sample) { tcmalloc::DLL_Remove(span); Static::stacktrace_allocator()->Delete( @@ -1060,17 +1036,20 @@ inline size_t GetSizeWithCallback(void* ptr, size_t (*invalid_getsize_fn)(void*)) { if (ptr == NULL) return 0; - - Span* span; - size_t cl; - if (!Static::pageheap()->GetSizeClassOrSpan(ptr, &cl, &span)) { - return (*invalid_getsize_fn)(ptr); - } - - if (cl != kLargeSizeClass) { + const PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift; + size_t cl = Static::pageheap()->GetSizeClassIfCached(p); + if (cl != 0) { return Static::sizemap()->ByteSizeForClass(cl); } else { - return span->length << kPageShift; + Span *span = Static::pageheap()->GetDescriptor(p); + if (span == NULL) { // means we do not own this memory + return (*invalid_getsize_fn)(ptr); + } else if (span->sizeclass != 0) { + Static::pageheap()->CacheSizeClass(p, span->sizeclass); + return Static::sizemap()->ByteSizeForClass(span->sizeclass); + } else { + return span->length << kPageShift; + } } } @@ -1157,18 +1136,47 @@ void* do_memalign(size_t align, size_t size) { } if (cl < kNumClasses) { ThreadCache* heap = ThreadCache::GetCache(); - size = Static::sizemap()->class_to_size(cl); - return CheckedMallocResult(heap->Allocate(size, cl)); + return CheckedMallocResult(heap->Allocate( + Static::sizemap()->class_to_size(cl))); } } // We will allocate directly from the page heap SpinLockHolder h(Static::pageheap_lock()); - // Any page-level allocation will be fine - Span* span = Static::pageheap()->New(tcmalloc::pages(size), - kLargeSizeClass, align); - return span == NULL ? NULL : SpanToMallocResult(span); + if (align <= kPageSize) { + // Any page-level allocation will be fine + // TODO: We could put the rest of this page in the appropriate + // TODO: cache but it does not seem worth it. + Span* span = Static::pageheap()->New(tcmalloc::pages(size)); + return span == NULL ? NULL : SpanToMallocResult(span); + } + + // Allocate extra pages and carve off an aligned portion + const Length alloc = tcmalloc::pages(size + align); + Span* span = Static::pageheap()->New(alloc); + if (span == NULL) return NULL; + + // Skip starting portion so that we end up aligned + Length skip = 0; + while ((((span->start+skip) << kPageShift) & (align - 1)) != 0) { + skip++; + } + ASSERT(skip < alloc); + if (skip > 0) { + Span* rest = Static::pageheap()->Split(span, skip); + Static::pageheap()->Delete(span); + span = rest; + } + + // Skip trailing portion that we do not need to return + const Length needed = tcmalloc::pages(size); + ASSERT(span->length >= needed); + if (span->length > needed) { + Span* trailer = Static::pageheap()->Split(span, needed); + Static::pageheap()->Delete(trailer); + } + return SpanToMallocResult(span); } // Helpers for use by exported routines below: @@ -1384,7 +1392,8 @@ extern "C" PERFTOOLS_DLL_DECL void* tc_new(size_t size) { return p; } -extern "C" PERFTOOLS_DLL_DECL void* tc_new_nothrow(size_t size, const std::nothrow_t&) __THROW { +extern "C" PERFTOOLS_DLL_DECL void* tc_new_nothrow( + size_t size, const std::nothrow_t&) __THROW { void* p = cpp_alloc(size, true); MallocHook::InvokeNewHook(p, size); return p; @@ -1395,10 +1404,10 @@ extern "C" PERFTOOLS_DLL_DECL void tc_delete(void* p) __THROW { do_free(p); } -// Standard C++ library implementations define and use this -// (via ::operator delete(ptr, nothrow)). +// Compilers define and use this (via ::operator delete(ptr, nothrow)). // But it's really the same as normal delete, so we just do the same thing. -extern "C" PERFTOOLS_DLL_DECL void tc_delete_nothrow(void* p, const std::nothrow_t&) __THROW { +extern "C" PERFTOOLS_DLL_DECL void tc_delete_nothrow( + void* p, const std::nothrow_t&) __THROW { MallocHook::InvokeDeleteHook(p); do_free(p); } @@ -1414,8 +1423,8 @@ extern "C" PERFTOOLS_DLL_DECL void* tc_newarray(size_t size) { return p; } -extern "C" PERFTOOLS_DLL_DECL void* tc_newarray_nothrow(size_t size, const std::nothrow_t&) - __THROW { +extern "C" PERFTOOLS_DLL_DECL void* tc_newarray_nothrow( + size_t size, const std::nothrow_t&) __THROW { void* p = cpp_alloc(size, true); MallocHook::InvokeNewHook(p, size); return p; @@ -1426,7 +1435,8 @@ extern "C" PERFTOOLS_DLL_DECL void tc_deletearray(void* p) __THROW { do_free(p); } -extern "C" PERFTOOLS_DLL_DECL void tc_deletearray_nothrow(void* p, const std::nothrow_t&) __THROW { +extern "C" PERFTOOLS_DLL_DECL void tc_deletearray_nothrow( + void* p, const std::nothrow_t&) __THROW { MallocHook::InvokeDeleteHook(p); do_free(p); } diff --git a/third_party/tcmalloc/chromium/src/tests/debugallocation_test.cc b/third_party/tcmalloc/chromium/src/tests/debugallocation_test.cc index c482187..ca00e36 100644 --- a/third_party/tcmalloc/chromium/src/tests/debugallocation_test.cc +++ b/third_party/tcmalloc/chromium/src/tests/debugallocation_test.cc @@ -75,14 +75,7 @@ static int test_counter = 0; // incremented every time the macro is called // This flag won't be compiled in in opt mode. DECLARE_int32(max_free_queue_size); -// Test match as well as mismatch rules: TEST(DebugAllocationTest, DeallocMismatch) { - // malloc can be matched only by free - // new can be matched only by delete and delete(nothrow) - // new[] can be matched only by delete[] and delete[](nothrow) - // new(nothrow) can be matched only by delete and delete(nothrow) - // new(nothrow)[] can be matched only by delete[] and delete[](nothrow) - // Allocate with malloc. { int* x = static_cast<int*>(malloc(sizeof(*x))); @@ -95,41 +88,17 @@ TEST(DebugAllocationTest, DeallocMismatch) { // Allocate with new. { int* x = new int; - int* y = new int; IF_DEBUG_EXPECT_DEATH(free(x), "mismatch.*being dealloc.*free"); IF_DEBUG_EXPECT_DEATH(delete [] x, "mismatch.*being dealloc.*delete *[[]"); delete x; - ::operator delete(y, std::nothrow); } // Allocate with new[]. { int* x = new int[1]; - int* y = new int[1]; - IF_DEBUG_EXPECT_DEATH(free(x), "mismatch.*being dealloc.*free"); - IF_DEBUG_EXPECT_DEATH(delete x, "mismatch.*being dealloc.*delete"); - delete [] x; - ::operator delete[](y, std::nothrow); - } - - // Allocate with new(nothrow). - { - int* x = new(std::nothrow) int; - int* y = new(std::nothrow) int; - IF_DEBUG_EXPECT_DEATH(free(x), "mismatch.*being dealloc.*free"); - IF_DEBUG_EXPECT_DEATH(delete [] x, "mismatch.*being dealloc.*delete *[[]"); - delete x; - ::operator delete(y, std::nothrow); - } - - // Allocate with new(nothrow)[]. - { - int* x = new(std::nothrow) int[1]; - int* y = new(std::nothrow) int[1]; IF_DEBUG_EXPECT_DEATH(free(x), "mismatch.*being dealloc.*free"); IF_DEBUG_EXPECT_DEATH(delete x, "mismatch.*being dealloc.*delete"); delete [] x; - ::operator delete[](y, std::nothrow); } } diff --git a/third_party/tcmalloc/chromium/src/tests/heap-checker-death_unittest.sh b/third_party/tcmalloc/chromium/src/tests/heap-checker-death_unittest.sh index 4a83fc2..9f0c08c 100644 --- a/third_party/tcmalloc/chromium/src/tests/heap-checker-death_unittest.sh +++ b/third_party/tcmalloc/chromium/src/tests/heap-checker-death_unittest.sh @@ -139,13 +139,13 @@ EARLY_MSG="Starting tracking the heap$" Test 60 0 "$EARLY_MSG" "" \ HEAPCHECK="" HEAP_CHECKER_TEST_TEST_LEAK=1 HEAP_CHECKER_TEST_NO_THREADS=1 \ - PERFTOOLS_VERBOSE=10 || exit 5 + PERFTOOLS_VERBOSE=1 || exit 5 Test 60 0 "MemoryRegionMap Init$" "" \ HEAPCHECK="" HEAP_CHECKER_TEST_TEST_LEAK=1 HEAP_CHECKER_TEST_NO_THREADS=1 \ - PERFTOOLS_VERBOSE=11 || exit 6 + PERFTOOLS_VERBOSE=2 || exit 6 Test 60 0 "" "$EARLY_MSG" \ HEAPCHECK="" HEAP_CHECKER_TEST_TEST_LEAK=1 HEAP_CHECKER_TEST_NO_THREADS=1 \ - PERFTOOLS_VERBOSE=-11 || exit 7 + PERFTOOLS_VERBOSE=-2 || exit 7 # These invocations should fail with very high probability, # rather than return 0 or hang (1 == exit(1), 134 == abort(), 139 = SIGSEGV): @@ -162,10 +162,10 @@ Test 60 1 "MakeALeak" "" \ # Test that very early log messages are present and controllable: Test 60 1 "Starting tracking the heap$" "" \ - HEAP_CHECKER_TEST_TEST_LEAK=1 HEAP_CHECKER_TEST_NO_THREADS=1 PERFTOOLS_VERBOSE=10 \ + HEAP_CHECKER_TEST_TEST_LEAK=1 HEAP_CHECKER_TEST_NO_THREADS=1 PERFTOOLS_VERBOSE=1 \ || exit 11 Test 60 1 "" "Starting tracking the heap" \ - HEAP_CHECKER_TEST_TEST_LEAK=1 HEAP_CHECKER_TEST_NO_THREADS=1 PERFTOOLS_VERBOSE=-10 \ + HEAP_CHECKER_TEST_TEST_LEAK=1 HEAP_CHECKER_TEST_NO_THREADS=1 PERFTOOLS_VERBOSE=-1 \ || exit 12 cd / # so we're not in TMPDIR when we delete it diff --git a/third_party/tcmalloc/chromium/src/tests/page_heap_test.cc b/third_party/tcmalloc/chromium/src/tests/page_heap_test.cc index fd444da..9120b78 100644 --- a/third_party/tcmalloc/chromium/src/tests/page_heap_test.cc +++ b/third_party/tcmalloc/chromium/src/tests/page_heap_test.cc @@ -26,7 +26,7 @@ static void TestPageHeap_Stats() { CheckStats(ph, 0, 0, 0); // Allocate a span 's1' - tcmalloc::Span* s1 = ph->New(256, kLargeSizeClass, kPageSize); + tcmalloc::Span* s1 = ph->New(256); CheckStats(ph, 256, 0, 0); // Split span 's1' into 's1', 's2'. Delete 's2' diff --git a/third_party/tcmalloc/chromium/src/tests/profiler_unittest.cc b/third_party/tcmalloc/chromium/src/tests/profiler_unittest.cc index 19371b7..1908b03 100644 --- a/third_party/tcmalloc/chromium/src/tests/profiler_unittest.cc +++ b/third_party/tcmalloc/chromium/src/tests/profiler_unittest.cc @@ -56,11 +56,12 @@ static void test_other_thread() { int i, m; char b[128]; - MutexLock ml(&mutex); for (m = 0; m < 1000000; ++m) { // run millions of times for (i = 0; i < g_iters; ++i ) { + MutexLock ml(&mutex); result ^= i; } + MutexLock ml(&mutex); snprintf(b, sizeof(b), "%d", result); // get some libc action } #endif @@ -69,11 +70,12 @@ static void test_other_thread() { static void test_main_thread() { int i, m; char b[128]; - MutexLock ml(&mutex); for (m = 0; m < 1000000; ++m) { // run millions of times for (i = 0; i < g_iters; ++i ) { + MutexLock ml(&mutex); result ^= i; } + MutexLock ml(&mutex); snprintf(b, sizeof(b), "%d", result); // get some libc action } } diff --git a/third_party/tcmalloc/chromium/src/tests/profiler_unittest.sh b/third_party/tcmalloc/chromium/src/tests/profiler_unittest.sh index 4668fa7..5766f2e 100644 --- a/third_party/tcmalloc/chromium/src/tests/profiler_unittest.sh +++ b/third_party/tcmalloc/chromium/src/tests/profiler_unittest.sh @@ -206,27 +206,28 @@ CPUPROFILE="$TMPDIR/p5" "$PROFILER2" 50 || RegisterFailure CPUPROFILE="$TMPDIR/p6" "$PROFILER2" 100 || RegisterFailure VerifySimilar p5 "$PROFILER2_REALNAME" p6 "$PROFILER2_REALNAME" 2 -CPUPROFILE="$TMPDIR/p5b" "$PROFILER3" 30 || RegisterFailure -CPUPROFILE="$TMPDIR/p5c" "$PROFILER3" 60 || RegisterFailure +# When we compile with threads, things take a lot longer even when we only use 1 +CPUPROFILE="$TMPDIR/p5b" "$PROFILER3" 10 || RegisterFailure +CPUPROFILE="$TMPDIR/p5c" "$PROFILER3" 20 || RegisterFailure VerifySimilar p5b "$PROFILER3_REALNAME" p5c "$PROFILER3_REALNAME" 2 # Now try what happens when we use threads -"$PROFILER3" 30 2 "$TMPDIR/p7" || RegisterFailure -"$PROFILER3" 60 2 "$TMPDIR/p8" || RegisterFailure +"$PROFILER3" 5 2 "$TMPDIR/p7" || RegisterFailure +"$PROFILER3" 10 2 "$TMPDIR/p8" || RegisterFailure VerifySimilar p7 "$PROFILER3_REALNAME" p8 "$PROFILER3_REALNAME" 2 -"$PROFILER4" 30 2 "$TMPDIR/p9" || RegisterFailure -"$PROFILER4" 60 2 "$TMPDIR/p10" || RegisterFailure +"$PROFILER4" 5 2 "$TMPDIR/p9" || RegisterFailure +"$PROFILER4" 10 2 "$TMPDIR/p10" || RegisterFailure VerifySimilar p9 "$PROFILER4_REALNAME" p10 "$PROFILER4_REALNAME" 2 # More threads! -"$PROFILER4" 25 3 "$TMPDIR/p9" || RegisterFailure -"$PROFILER4" 50 3 "$TMPDIR/p10" || RegisterFailure +"$PROFILER4" 2 3 "$TMPDIR/p9" || RegisterFailure +"$PROFILER4" 4 3 "$TMPDIR/p10" || RegisterFailure VerifySimilar p9 "$PROFILER4_REALNAME" p10 "$PROFILER4_REALNAME" 2 # Compare how much time the main thread takes compared to the other threads # Recall the main thread runs twice as long as the other threads, by design. -"$PROFILER4" 20 4 "$TMPDIR/p11" || RegisterFailure +"$PROFILER4" 2 4 "$TMPDIR/p11" || RegisterFailure VerifyAcrossThreads p11 "$PROFILER4_REALNAME" 2 # Test symbol save and restore @@ -235,14 +236,14 @@ VerifyAcrossThreads p11 "$PROFILER4_REALNAME" 2 >"$TMPDIR/p13" 2>/dev/null || RegisterFailure VerifyIdentical p12 "$PROFILER1_REALNAME" p13 "" || RegisterFailure -"$PROFILER3" 30 2 "$TMPDIR/p14" || RegisterFailure +"$PROFILER3" 5 2 "$TMPDIR/p14" || RegisterFailure "$PPROF" $PPROF_FLAGS "$PROFILER3_REALNAME" "$TMPDIR/p14" --raw \ >"$TMPDIR/p15" 2>/dev/null || RegisterFailure VerifyIdentical p14 "$PROFILER3_REALNAME" p15 "" || RegisterFailure # Test using ITIMER_REAL instead of ITIMER_PROF. -env CPUPROFILE_REALTIME=1 "$PROFILER3" 30 2 "$TMPDIR/p16" || RegisterFailure -env CPUPROFILE_REALTIME=1 "$PROFILER3" 60 2 "$TMPDIR/p17" || RegisterFailure +env CPUPROFILE_REALTIME=1 "$PROFILER3" 5 2 "$TMPDIR/p16" || RegisterFailure +env CPUPROFILE_REALTIME=1 "$PROFILER3" 10 2 "$TMPDIR/p17" || RegisterFailure VerifySimilar p16 "$PROFILER3_REALNAME" p17 "$PROFILER3_REALNAME" 2 diff --git a/third_party/tcmalloc/chromium/src/tests/tcmalloc_unittest.cc b/third_party/tcmalloc/chromium/src/tests/tcmalloc_unittest.cc index 6b2ec26..25bfd6a 100644 --- a/third_party/tcmalloc/chromium/src/tests/tcmalloc_unittest.cc +++ b/third_party/tcmalloc/chromium/src/tests/tcmalloc_unittest.cc @@ -977,7 +977,7 @@ static int RunAllTests(int argc, char** argv) { } // This code stresses some of the memory allocation via STL. - // It may call operator delete(void*, nothrow_t). + // In particular, it calls operator delete(void*, nothrow_t). fprintf(LOGSTREAM, "Testing STL use\n"); { std::vector<int> v; diff --git a/third_party/tcmalloc/chromium/src/thread_cache.h b/third_party/tcmalloc/chromium/src/thread_cache.h index 1165447..4c6a233 100644 --- a/third_party/tcmalloc/chromium/src/thread_cache.h +++ b/third_party/tcmalloc/chromium/src/thread_cache.h @@ -79,9 +79,7 @@ class ThreadCache { // Total byte size in cache size_t Size() const { return size_; } - // Allocate an object of the given size and class. The size given - // must be the same as the size of the class in the size map. - void* Allocate(size_t size, size_t cl); + void* Allocate(size_t size); void Deallocate(void* ptr, size_t size_class); void Scavenge(); @@ -295,18 +293,15 @@ class ThreadCache { // across all ThreadCaches. Protected by Static::pageheap_lock. static ssize_t unclaimed_cache_space_; - // This class is laid out with the most frequently used fields - // first so that hot elements are placed on the same cache line. + // Warning: the offset of list_ affects performance. On general + // principles, we don't like list_[x] to span multiple L1 cache + // lines. However, merely placing list_ at offset 0 here seems to + // cause cache conflicts. size_t size_; // Combined size of data size_t max_size_; // size_ > max_size_ --> Scavenge() - - // We sample allocations, biased by the size of the allocation - Sampler sampler_; // A sampler - - FreeList list_[kNumClasses]; // Array indexed by size-class - pthread_t tid_; // Which thread owns it + FreeList list_[kNumClasses]; // Array indexed by size-class bool in_setspecific_; // In call to pthread_setspecific? // Allocate a new heap. REQUIRES: Static::pageheap_lock is held. @@ -318,10 +313,9 @@ class ThreadCache { static void DeleteCache(ThreadCache* heap); static void RecomputePerThreadCacheSize(); - // Ensure that this class is cacheline-aligned. This is critical for - // performance, as false sharing would negate many of the benefits - // of a per-thread cache. -} CACHELINE_ALIGNED; + // We sample allocations, biased by the size of the allocation + Sampler sampler_; // A sampler +}; // Allocator for thread heaps // This is logically part of the ThreadCache class, but MSVC, at @@ -337,15 +331,15 @@ inline bool ThreadCache::SampleAllocation(size_t k) { return sampler_.SampleAllocation(k); } -inline void* ThreadCache::Allocate(size_t size, size_t cl) { +inline void* ThreadCache::Allocate(size_t size) { ASSERT(size <= kMaxSize); - ASSERT(size == Static::sizemap()->ByteSizeForClass(cl)); - + const size_t cl = Static::sizemap()->SizeClass(size); + const size_t alloc_size = Static::sizemap()->ByteSizeForClass(cl); FreeList* list = &list_[cl]; if (list->empty()) { - return FetchFromCentralCache(cl, size); + return FetchFromCentralCache(cl, alloc_size); } - size_ -= size; + size_ -= alloc_size; return list->Pop(); } diff --git a/third_party/tcmalloc/chromium/src/windows/addr2line-pdb.c b/third_party/tcmalloc/chromium/src/windows/addr2line-pdb.c index 5c65a03..97b614b 100644 --- a/third_party/tcmalloc/chromium/src/windows/addr2line-pdb.c +++ b/third_party/tcmalloc/chromium/src/windows/addr2line-pdb.c @@ -48,12 +48,6 @@ #define SEARCH_CAP (1024*1024) #define WEBSYM "SRV*c:\\websymbols*http://msdl.microsoft.com/download/symbols" -void usage() { - fprintf(stderr, "usage: " - "addr2line-pdb [-f|--functions] [-C|--demangle] [-e filename]\n"); - fprintf(stderr, "(Then list the hex addresses on stdin, one per line)\n"); -} - int main(int argc, char *argv[]) { DWORD error; HANDLE process; @@ -80,11 +74,10 @@ int main(int argc, char *argv[]) { } filename = argv[i+1]; i++; /* to skip over filename too */ - } else if (strcmp(argv[i], "--help") == 0) { - usage(); - exit(0); } else { - usage(); + fprintf(stderr, "usage: " + "addr2line-pdb [-f|--functions] [-C|--demangle] [-e filename]\n"); + fprintf(stderr, "(Then list the hex addresses on stdin, one per line)\n"); exit(1); } } diff --git a/third_party/tcmalloc/chromium/src/windows/config.h b/third_party/tcmalloc/chromium/src/windows/config.h index b5d9bb6..99de82c 100644 --- a/third_party/tcmalloc/chromium/src/windows/config.h +++ b/third_party/tcmalloc/chromium/src/windows/config.h @@ -261,12 +261,10 @@ // --------------------------------------------------------------------- // Extra stuff not found in config.h.in -// This must be defined before the windows.h is included. We need at -// least 0x0400 for mutex.h to have access to TryLock, and at least -// 0x0501 for patch_functions.cc to have access to GetModuleHandleEx. -// (This latter is an optimization we could take out if need be.) +// This must be defined before the windows.h is included. It's needed +// for mutex.h, to give access to the TryLock method. #ifndef _WIN32_WINNT -# define _WIN32_WINNT 0x0501 +# define _WIN32_WINNT 0x0400 #endif // We want to make sure not to ever try to #include heap-checker.h diff --git a/third_party/tcmalloc/chromium/src/windows/google/tcmalloc.h b/third_party/tcmalloc/chromium/src/windows/google/tcmalloc.h index 663b7f9..4b97b15 100644 --- a/third_party/tcmalloc/chromium/src/windows/google/tcmalloc.h +++ b/third_party/tcmalloc/chromium/src/windows/google/tcmalloc.h @@ -61,8 +61,7 @@ #endif #ifdef __cplusplus -#include <new> // for std::nothrow_t - +#include <new> // for nothrow_t extern "C" { #endif // Returns a human-readable version string. If major, minor, @@ -93,15 +92,16 @@ extern "C" { #ifdef __cplusplus PERFTOOLS_DLL_DECL int tc_set_new_mode(int flag) __THROW; PERFTOOLS_DLL_DECL void* tc_new(size_t size); - PERFTOOLS_DLL_DECL void* tc_new_nothrow(size_t size, - const std::nothrow_t&) __THROW; PERFTOOLS_DLL_DECL void tc_delete(void* p) __THROW; - PERFTOOLS_DLL_DECL void tc_delete_nothrow(void* p, - const std::nothrow_t&) __THROW; PERFTOOLS_DLL_DECL void* tc_newarray(size_t size); + PERFTOOLS_DLL_DECL void tc_deletearray(void* p) __THROW; + + PERFTOOLS_DLL_DECL void* tc_new_nothrow(size_t size, + const std::nothrow_t&) __THROW; PERFTOOLS_DLL_DECL void* tc_newarray_nothrow(size_t size, const std::nothrow_t&) __THROW; - PERFTOOLS_DLL_DECL void tc_deletearray(void* p) __THROW; + PERFTOOLS_DLL_DECL void tc_delete_nothrow(void* p, + const std::nothrow_t&) __THROW; PERFTOOLS_DLL_DECL void tc_deletearray_nothrow(void* p, const std::nothrow_t&) __THROW; } diff --git a/third_party/tcmalloc/chromium/src/windows/nm-pdb.c b/third_party/tcmalloc/chromium/src/windows/nm-pdb.c index 9beb21d..726d345 100644 --- a/third_party/tcmalloc/chromium/src/windows/nm-pdb.c +++ b/third_party/tcmalloc/chromium/src/windows/nm-pdb.c @@ -180,10 +180,6 @@ static void ShowSymbolInfo(HANDLE process, ULONG64 module_base) { #endif } -void usage() { - fprintf(stderr, "usage: nm-pdb [-C|--demangle] <module or filename>\n"); -} - int main(int argc, char *argv[]) { DWORD error; HANDLE process; @@ -199,15 +195,12 @@ int main(int argc, char *argv[]) { for (i = 1; i < argc; i++) { if (strcmp(argv[i], "--demangle") == 0 || strcmp(argv[i], "-C") == 0) { symopts |= SYMOPT_UNDNAME; - } else if (strcmp(argv[i], "--help") == 0) { - usage(); - exit(0); } else { break; } } if (i != argc - 1) { - usage(); + fprintf(stderr, "usage: nm-pdb [-C|--demangle] <module or filename>\n"); exit(1); } filename = argv[i]; diff --git a/third_party/tcmalloc/chromium/src/windows/patch_functions.cc b/third_party/tcmalloc/chromium/src/windows/patch_functions.cc index deb841b..c1ed37f 100644 --- a/third_party/tcmalloc/chromium/src/windows/patch_functions.cc +++ b/third_party/tcmalloc/chromium/src/windows/patch_functions.cc @@ -83,7 +83,6 @@ #endif #include <windows.h> -#include <stdio.h> #include <malloc.h> // for _msize and _expand #include <Psapi.h> // for EnumProcessModules, GetModuleInformation, etc. #include <set> @@ -97,6 +96,8 @@ // The maximum number of modules we allow to be in one executable const int kMaxModules = 8182; +// The maximum size of a module's basename +const int kMaxModuleNameSize = 256; // These are hard-coded, unfortunately. :-( They are also probably // compiler specific. See get_mangled_names.cc, in this directory, @@ -144,15 +145,13 @@ class LibcInfo { LibcInfo() { memset(this, 0, sizeof(*this)); // easiest way to initialize the array } + bool SameAs(const LibcInfo& that) const; + bool SameAsModuleEntry(const ModuleEntryCopy& module_entry) const; + + bool patched() const { return is_valid() && module_name_[0] != '\0'; } + const char* module_name() const { return is_valid() ? module_name_ : ""; } - bool patched() const { return is_valid(); } void set_is_valid(bool b) { is_valid_ = b; } - // According to http://msdn.microsoft.com/en-us/library/ms684229(VS.85).aspx: - // "The load address of a module (lpBaseOfDll) is the same as the HMODULE - // value." - HMODULE hmodule() const { - return reinterpret_cast<HMODULE>(const_cast<void*>(module_base_address_)); - } // Populates all the windows_fn_[] vars based on our module info. // Returns false if windows_fn_ is all NULL's, because there's @@ -168,6 +167,7 @@ class LibcInfo { memcpy(this->windows_fn_, that.windows_fn_, sizeof(windows_fn_)); this->module_base_address_ = that.module_base_address_; this->module_base_size_ = that.module_base_size_; + memcpy(this->module_name_, that.module_name_, sizeof(module_name_)); } enum { @@ -207,6 +207,7 @@ class LibcInfo { const void *module_base_address_; size_t module_base_size_; + char module_name_[kMaxModuleNameSize]; public: // These shouldn't have to be public, since only subclasses of @@ -284,8 +285,10 @@ template<int> class LibcInfoWithPatchFunctions : public LibcInfo { // This is a subset of MODDULEENTRY32, that we need for patching. struct ModuleEntryCopy { - LPVOID modBaseAddr; // the same as hmodule + LPVOID modBaseAddr; DWORD modBaseSize; + HMODULE hModule; + TCHAR szModule[kMaxModuleNameSize]; // This is not part of MODDULEENTRY32, but is needed to avoid making // windows syscalls while we're holding patch_all_modules_lock (see // lock-inversion comments at patch_all_modules_lock definition, below). @@ -294,16 +297,26 @@ struct ModuleEntryCopy { ModuleEntryCopy() { modBaseAddr = NULL; modBaseSize = 0; + hModule = NULL; + strcpy(szModule, "<executable>"); for (int i = 0; i < sizeof(rgProcAddresses)/sizeof(*rgProcAddresses); i++) rgProcAddresses[i] = LibcInfo::static_fn(i); } - ModuleEntryCopy(const MODULEINFO& mi) { + ModuleEntryCopy(HANDLE hprocess, HMODULE hmodule, const MODULEINFO& mi) { this->modBaseAddr = mi.lpBaseOfDll; this->modBaseSize = mi.SizeOfImage; + this->hModule = hmodule; + // TODO(csilvers): we could make more efficient by calling these + // lazily (not until the vars are needed, which is often never). + // However, there's tricky business with calling windows functions + // inside the patch_all_modules_lock (see the lock inversion + // comments with the patch_all_modules_lock definition, below), so + // it's safest to do it all here, where no lock is needed. + ::GetModuleBaseNameA(hprocess, hmodule, + this->szModule, sizeof(this->szModule)); for (int i = 0; i < sizeof(rgProcAddresses)/sizeof(*rgProcAddresses); i++) - rgProcAddresses[i] = (GenericFnPtr)::GetProcAddress( - reinterpret_cast<const HMODULE>(mi.lpBaseOfDll), - LibcInfo::function_name(i)); + rgProcAddresses[i] = + (GenericFnPtr)::GetProcAddress(hModule, LibcInfo::function_name(i)); } }; @@ -466,6 +479,18 @@ const GenericFnPtr LibcInfoWithPatchFunctions<T>::perftools_fn_[] = { { "FreeLibrary", NULL, NULL, (GenericFnPtr)&Perftools_FreeLibrary }, }; +bool LibcInfo::SameAs(const LibcInfo& that) const { + return (is_valid() && + module_base_address_ == that.module_base_address_ && + module_base_size_ == that.module_base_size_); +} + +bool LibcInfo::SameAsModuleEntry(const ModuleEntryCopy& module_entry) const { + return (is_valid() && + module_base_address_ == module_entry.modBaseAddr && + module_base_size_ == module_entry.modBaseSize); +} + bool LibcInfo::PopulateWindowsFn(const ModuleEntryCopy& module_entry) { // First, store the location of the function to patch before // patching it. If none of these functions are found in the module, @@ -527,9 +552,10 @@ bool LibcInfo::PopulateWindowsFn(const ModuleEntryCopy& module_entry) { CHECK(windows_fn_[kFree]); CHECK(windows_fn_[kRealloc]); - // OK, we successfully populated. Let's store our member information. + // OK, we successfully patched. Let's store our member information. module_base_address_ = module_entry.modBaseAddr; module_base_size_ = module_entry.modBaseSize; + strcpy(module_name_, module_entry.szModule); return true; } @@ -610,6 +636,14 @@ void WindowsInfo::Unpatch() { // You should hold the patch_all_modules_lock when calling this. void PatchOneModuleLocked(const LibcInfo& me_info) { + // Double-check we haven't seen this module before. + for (int i = 0; i < sizeof(g_module_libcs)/sizeof(*g_module_libcs); i++) { + if (g_module_libcs[i]->SameAs(me_info)) { + fprintf(stderr, "%s:%d: FATAL PERFTOOLS ERROR: %s double-patched somehow.\n", + __FILE__, __LINE__, g_module_libcs[i]->module_name()); + CHECK(false); + } + } // If we don't already have info on this module, let's add it. This // is where we're sad that each libcX has a different type, so we // can't use an array; instead, we have to use a switch statement. @@ -652,70 +686,52 @@ void PatchMainExecutableLocked() { // patch_all_modules_lock, inside PatchAllModules(). static SpinLock patch_all_modules_lock(SpinLock::LINKER_INITIALIZED); -// last_loaded: The set of modules that were loaded the last time -// PatchAllModules was called. This is an optimization for only -// looking at modules that were added or removed from the last call. -static std::set<HMODULE> *g_last_loaded; - // Iterates over all the modules currently loaded by the executable, -// according to windows, and makes sure they're all patched. Most -// modules will already be in loaded_modules, meaning we have already -// loaded and either patched them or determined they did not need to -// be patched. Others will not, which means we need to patch them -// (if necessary). Finally, we have to go through the existing -// g_module_libcs and see if any of those are *not* in the modules -// currently loaded by the executable. If so, we need to invalidate -// them. Returns true if we did any work (patching or invalidating), -// false if we were a noop. May update loaded_modules as well. -// NOTE: you must hold the patch_all_modules_lock to access loaded_modules. +// and makes sure they're all patched. For ones that aren't, we patch +// them in. We also check that every module we had patched in the +// past is still loaded, and update internal data structures if so. +// We return true if this PatchAllModules did any work, false else. bool PatchAllModules() { std::vector<ModuleEntryCopy> modules; bool made_changes = false; const HANDLE hCurrentProcess = GetCurrentProcess(); - DWORD num_modules = 0; + MODULEINFO mi; + DWORD cbNeeded = 0; HMODULE hModules[kMaxModules]; // max # of modules we support in one process - if (!::EnumProcessModules(hCurrentProcess, hModules, sizeof(hModules), - &num_modules)) { - num_modules = 0; - } - // EnumProcessModules actually set the bytes written into hModules, - // so we need to divide to make num_modules actually be a module-count. - num_modules /= sizeof(*hModules); - if (num_modules >= kMaxModules) { - printf("PERFTOOLS ERROR: Too many modules in this executable to try" - " to patch them all (if you need to, raise kMaxModules in" - " patch_functions.cc).\n"); - num_modules = kMaxModules; + if (::EnumProcessModules(hCurrentProcess, hModules, sizeof(hModules), + &cbNeeded)) { + for (int i = 0; i < cbNeeded / sizeof(*hModules); ++i) { + if (i >= kMaxModules) { + printf("PERFTOOLS ERROR: Too many modules in this executable to try" + " to patch them all (if you need to, raise kMaxModules in" + " patch_functions.cc).\n"); + break; + } + if (::GetModuleInformation(hCurrentProcess, hModules[i], &mi, sizeof(mi))) + modules.push_back(ModuleEntryCopy(hCurrentProcess, hModules[i], mi)); + } } - // Now we handle the unpatching of modules we have in g_module_libcs - // but that were not found in EnumProcessModules. We need to - // invalidate them. To speed that up, we store the EnumProcessModules - // output in a set. - // At the same time, we prepare for the adding of new modules, by - // removing from hModules all the modules we know we've already - // patched (or decided don't need to be patched). At the end, - // hModules will hold only the modules that we need to consider patching. - std::set<HMODULE> currently_loaded_modules; + // Now do the actual patching and unpatching. { SpinLockHolder h(&patch_all_modules_lock); - if (!g_last_loaded) g_last_loaded = new std::set<HMODULE>; - // At the end of this loop, currently_loaded_modules contains the - // full list of EnumProcessModules, and hModules just the ones we - // haven't handled yet. - for (int i = 0; i < num_modules; ) { - currently_loaded_modules.insert(hModules[i]); - if (g_last_loaded->count(hModules[i]) > 0) { - hModules[i] = hModules[--num_modules]; // replace element i with tail - } else { - i++; // keep element i - } - } - // Now we do the unpatching/invalidation. for (int i = 0; i < sizeof(g_module_libcs)/sizeof(*g_module_libcs); i++) { - if (g_module_libcs[i]->patched() && - currently_loaded_modules.count(g_module_libcs[i]->hmodule()) == 0) { + if (!g_module_libcs[i]->is_valid()) + continue; + bool still_loaded = false; + for (std::vector<ModuleEntryCopy>::iterator it = modules.begin(); + it != modules.end(); ++it) { + if (g_module_libcs[i]->SameAsModuleEntry(*it)) { + // Both g_module_libcs[i] and it are still valid. Mark it by + // removing it from the vector; mark g_module_libcs[i] by + // setting a bool. + modules.erase(it); + still_loaded = true; + break; + } + } + if (!still_loaded) { // Means g_module_libcs[i] is no longer loaded (no me32 matched). // We could call Unpatch() here, but why bother? The module // has gone away, so nobody is going to call into it anyway. @@ -723,28 +739,14 @@ bool PatchAllModules() { made_changes = true; } } - // Update the loaded module cache. - g_last_loaded->swap(currently_loaded_modules); - } - - // Now that we know what modules are new, let's get the info we'll - // need to patch them. Note this *cannot* be done while holding the - // lock, since it needs to make windows calls (see the lock-inversion - // comments before the definition of patch_all_modules_lock). - MODULEINFO mi; - for (int i = 0; i < num_modules; i++) { - if (::GetModuleInformation(hCurrentProcess, hModules[i], &mi, sizeof(mi))) - modules.push_back(ModuleEntryCopy(mi)); - } - // Now we can do the patching of new modules. - { - SpinLockHolder h(&patch_all_modules_lock); - for (std::vector<ModuleEntryCopy>::iterator it = modules.begin(); + // We've handled all the g_module_libcs. Now let's handle the rest + // of the module-entries: those that haven't already been loaded. + for (std::vector<ModuleEntryCopy>::const_iterator it = modules.begin(); it != modules.end(); ++it) { LibcInfo libc_info; if (libc_info.PopulateWindowsFn(*it)) { // true==module has libc routines - PatchOneModuleLocked(libc_info); + PatchOneModuleLocked(libc_info); // updates num_patched_modules made_changes = true; } } @@ -757,10 +759,6 @@ bool PatchAllModules() { made_changes = true; } } - // TODO(csilvers): for this to be reliable, we need to also take - // into account if we *would* have patched any modules had they not - // already been loaded. (That is, made_changes should ignore - // g_last_loaded.) return made_changes; } @@ -768,9 +766,59 @@ bool PatchAllModules() { } // end unnamed namespace // --------------------------------------------------------------------- -// Now that we've done all the patching machinery, let's actually -// define the functions we're patching in. Mostly these are -// simple wrappers around the do_* routines in tcmalloc.cc. +// PatchWindowsFunctions() +// This is the function that is exposed to the outside world. +// It should be called before the program becomes multi-threaded, +// since main_executable_windows.Patch() is not thread-safe. +// --------------------------------------------------------------------- + +void PatchWindowsFunctions() { + // This does the libc patching in every module, and the main executable. + PatchAllModules(); + main_executable_windows.Patch(); +} + +#if 0 +// It's possible to unpatch all the functions when we are exiting. + +// The idea is to handle properly windows-internal data that is +// allocated before PatchWindowsFunctions is called. If all +// destruction happened in reverse order from construction, then we +// could call UnpatchWindowsFunctions at just the right time, so that +// that early-allocated data would be freed using the windows +// allocation functions rather than tcmalloc. The problem is that +// windows allocates some structures lazily, so it would allocate them +// late (using tcmalloc) and then try to deallocate them late as well. +// So instead of unpatching, we just modify all the tcmalloc routines +// so they call through to the libc rountines if the memory in +// question doesn't seem to have been allocated with tcmalloc. I keep +// this unpatch code around for reference. + +void UnpatchWindowsFunctions() { + // We need to go back to the system malloc/etc at global destruct time, + // so objects that were constructed before tcmalloc, using the system + // malloc, can destroy themselves using the system free. This depends + // on DLLs unloading in the reverse order in which they load! + // + // We also go back to the default HeapAlloc/etc, just for consistency. + // Who knows, it may help avoid weird bugs in some situations. + main_executable_windows.Unpatch(); + main_executable.Unpatch(); + if (libc1.is_valid()) libc1.Unpatch(); + if (libc2.is_valid()) libc2.Unpatch(); + if (libc3.is_valid()) libc3.Unpatch(); + if (libc4.is_valid()) libc4.Unpatch(); + if (libc5.is_valid()) libc5.Unpatch(); + if (libc6.is_valid()) libc6.Unpatch(); + if (libc7.is_valid()) libc7.Unpatch(); + if (libc8.is_valid()) libc8.Unpatch(); +} +#endif + +// --------------------------------------------------------------------- +// Now that we've done all the patching machinery, let's end the file +// by actually defining the functions we're patching in. Mostly these +// are simple wrappers around the do_* routines in tcmalloc.cc. // // In fact, we #include tcmalloc.cc to get at the tcmalloc internal // do_* functions, the better to write our own hook functions. @@ -981,107 +1029,19 @@ BOOL WINAPI WindowsInfo::Perftools_UnmapViewOfFile(LPCVOID lpBaseAddress) { lpBaseAddress); } -// g_load_map holds a copy of windows' refcount for how many times -// each currently loaded module has been loaded and unloaded. We use -// it as an optimization when the same module is loaded more than -// once: as long as the refcount stays above 1, we don't need to worry -// about patching because it's already patched. Likewise, we don't -// need to unpatch until the refcount drops to 0. load_map is -// maintained in LoadLibraryExW and FreeLibrary, and only covers -// modules explicitly loaded/freed via those interfaces. -static std::map<HMODULE, int>* g_load_map = NULL; - HMODULE WINAPI WindowsInfo::Perftools_LoadLibraryExW(LPCWSTR lpFileName, HANDLE hFile, DWORD dwFlags) { - HMODULE rv; - // Check to see if the modules is already loaded, flag 0 gets a - // reference if it was loaded. If it was loaded no need to call - // PatchAllModules, just increase the reference count to match - // what GetModuleHandleExW does internally inside windows. - if (::GetModuleHandleExW(0, lpFileName, &rv)) { - return rv; - } else { - // Not already loaded, so load it. - rv = ((HMODULE (WINAPI *)(LPCWSTR, HANDLE, DWORD)) - function_info_[kLoadLibraryExW].origstub_fn)( - lpFileName, hFile, dwFlags); - // This will patch any newly loaded libraries, if patching needs - // to be done. - PatchAllModules(); - - return rv; - } + HMODULE rv = ((HMODULE (WINAPI *)(LPCWSTR, HANDLE, DWORD)) + function_info_[kLoadLibraryExW].origstub_fn)( + lpFileName, hFile, dwFlags); + PatchAllModules(); + return rv; } BOOL WINAPI WindowsInfo::Perftools_FreeLibrary(HMODULE hLibModule) { BOOL rv = ((BOOL (WINAPI *)(HMODULE)) function_info_[kFreeLibrary].origstub_fn)(hLibModule); - - // Check to see if the module is still loaded by passing the base - // address and seeing if it comes back with the same address. If it - // is the same address it's still loaded, so the FreeLibrary() call - // was a noop, and there's no need to redo the patching. - HMODULE owner = NULL; - BOOL result = ::GetModuleHandleExW( - (GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS | - GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT), - (LPCWSTR)hLibModule, - &owner); - if (result && owner == hLibModule) - return rv; - PatchAllModules(); // this will fix up the list of patched libraries return rv; } - - -// --------------------------------------------------------------------- -// PatchWindowsFunctions() -// This is the function that is exposed to the outside world. -// It should be called before the program becomes multi-threaded, -// since main_executable_windows.Patch() is not thread-safe. -// --------------------------------------------------------------------- - -void PatchWindowsFunctions() { - // This does the libc patching in every module, and the main executable. - PatchAllModules(); - main_executable_windows.Patch(); -} - -#if 0 -// It's possible to unpatch all the functions when we are exiting. - -// The idea is to handle properly windows-internal data that is -// allocated before PatchWindowsFunctions is called. If all -// destruction happened in reverse order from construction, then we -// could call UnpatchWindowsFunctions at just the right time, so that -// that early-allocated data would be freed using the windows -// allocation functions rather than tcmalloc. The problem is that -// windows allocates some structures lazily, so it would allocate them -// late (using tcmalloc) and then try to deallocate them late as well. -// So instead of unpatching, we just modify all the tcmalloc routines -// so they call through to the libc rountines if the memory in -// question doesn't seem to have been allocated with tcmalloc. I keep -// this unpatch code around for reference. - -void UnpatchWindowsFunctions() { - // We need to go back to the system malloc/etc at global destruct time, - // so objects that were constructed before tcmalloc, using the system - // malloc, can destroy themselves using the system free. This depends - // on DLLs unloading in the reverse order in which they load! - // - // We also go back to the default HeapAlloc/etc, just for consistency. - // Who knows, it may help avoid weird bugs in some situations. - main_executable_windows.Unpatch(); - main_executable.Unpatch(); - if (libc1.is_valid()) libc1.Unpatch(); - if (libc2.is_valid()) libc2.Unpatch(); - if (libc3.is_valid()) libc3.Unpatch(); - if (libc4.is_valid()) libc4.Unpatch(); - if (libc5.is_valid()) libc5.Unpatch(); - if (libc6.is_valid()) libc6.Unpatch(); - if (libc7.is_valid()) libc7.Unpatch(); - if (libc8.is_valid()) libc8.Unpatch(); -} -#endif |