diff options
author | glider@chromium.org <glider@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2010-05-20 12:22:51 +0000 |
---|---|---|
committer | glider@chromium.org <glider@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2010-05-20 12:22:51 +0000 |
commit | db3fb1cb119f862b96ca3de8f74ff647fef6f94e (patch) | |
tree | d547159ec152ac05ed793f9cac867b500a42a8f7 /third_party/tcmalloc | |
parent | fa82f93da256dede111ee4143c340e55a195d7e3 (diff) | |
download | chromium_src-db3fb1cb119f862b96ca3de8f74ff647fef6f94e.zip chromium_src-db3fb1cb119f862b96ca3de8f74ff647fef6f94e.tar.gz chromium_src-db3fb1cb119f862b96ca3de8f74ff647fef6f94e.tar.bz2 |
The newer version of tcmalloc should fix the problems with running tcmalloc under Valgrind.
Review URL: http://codereview.chromium.org/1735024
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@47789 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'third_party/tcmalloc')
54 files changed, 6226 insertions, 1748 deletions
diff --git a/third_party/tcmalloc/README.chromium b/third_party/tcmalloc/README.chromium index a8352a9..1ceda91 100644 --- a/third_party/tcmalloc/README.chromium +++ b/third_party/tcmalloc/README.chromium @@ -22,9 +22,8 @@ Contents: The current revision is: - Last Changed Rev: 77 - Last Changed Date: 2009-10-27 10:30:52 -0700 (Tue, 27 Oct 2009) - + Last Changed Rev: 94 + Last Changed Date: 2010-05-08 01:53:24 +0400 (Sat, 08 May 2010) HOWTOs: diff --git a/third_party/tcmalloc/chromium/src/base/basictypes.h b/third_party/tcmalloc/chromium/src/base/basictypes.h index 9991413..ab9cdabc 100644 --- a/third_party/tcmalloc/chromium/src/base/basictypes.h +++ b/third_party/tcmalloc/chromium/src/base/basictypes.h @@ -308,6 +308,14 @@ class AssignAttributeStartEnd { #endif // HAVE___ATTRIBUTE__ and __ELF__ or __MACH__ +#if defined(HAVE___ATTRIBUTE__) && (defined(__i386__) || defined(__x86_64__)) +# define CACHELINE_SIZE 64 +# define CACHELINE_ALIGNED __attribute__((aligned(CACHELINE_SIZE))) +#else +# define CACHELINE_ALIGNED +#endif // defined(HAVE___ATTRIBUTE__) && (__i386__ || __x86_64__) + + // The following enum should be used only as a constructor argument to indicate // that the variable has static storage class, and that the constructor should // do nothing to its state. It indicates to the reader that it is legal to diff --git a/third_party/tcmalloc/chromium/src/base/dynamic_annotations.c b/third_party/tcmalloc/chromium/src/base/dynamic_annotations.c new file mode 100644 index 0000000..cdefaa7 --- /dev/null +++ b/third_party/tcmalloc/chromium/src/base/dynamic_annotations.c @@ -0,0 +1,148 @@ +/* Copyright (c) 2008-2009, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * Author: Kostya Serebryany + */ + +#ifdef __cplusplus +# error "This file should be built as pure C to avoid name mangling" +#endif + +#include <stdlib.h> +#include <string.h> + +#include "base/dynamic_annotations.h" + +#ifdef __GNUC__ +/* valgrind.h uses gcc extensions so it won't build with other compilers */ +# ifdef HAVE_VALGRIND_H /* prefer the user's copy if they have it */ +# include <valgrind.h> +# else /* otherwise just use the copy that we have */ +# include "third_party/valgrind.h" +# endif +#endif + +/* Each function is empty and called (via a macro) only in debug mode. + The arguments are captured by dynamic tools at runtime. */ + +#if DYNAMIC_ANNOTATIONS_ENABLED == 1 + +void AnnotateRWLockCreate(const char *file, int line, + const volatile void *lock){} +void AnnotateRWLockDestroy(const char *file, int line, + const volatile void *lock){} +void AnnotateRWLockAcquired(const char *file, int line, + const volatile void *lock, long is_w){} +void AnnotateRWLockReleased(const char *file, int line, + const volatile void *lock, long is_w){} +void AnnotateBarrierInit(const char *file, int line, + const volatile void *barrier, long count, + long reinitialization_allowed) {} +void AnnotateBarrierWaitBefore(const char *file, int line, + const volatile void *barrier) {} +void AnnotateBarrierWaitAfter(const char *file, int line, + const volatile void *barrier) {} +void AnnotateBarrierDestroy(const char *file, int line, + const volatile void *barrier) {} + +void AnnotateCondVarWait(const char *file, int line, + const volatile void *cv, + const volatile void *lock){} +void AnnotateCondVarSignal(const char *file, int line, + const volatile void *cv){} +void AnnotateCondVarSignalAll(const char *file, int line, + const volatile void *cv){} +void AnnotatePublishMemoryRange(const char *file, int line, + const volatile void *address, + long size){} +void AnnotateUnpublishMemoryRange(const char *file, int line, + const volatile void *address, + long size){} +void AnnotatePCQCreate(const char *file, int line, + const volatile void *pcq){} +void AnnotatePCQDestroy(const char *file, int line, + const volatile void *pcq){} +void AnnotatePCQPut(const char *file, int line, + const volatile void *pcq){} +void AnnotatePCQGet(const char *file, int line, + const volatile void *pcq){} +void AnnotateNewMemory(const char *file, int line, + const volatile void *mem, + long size){} +void AnnotateExpectRace(const char *file, int line, + const volatile void *mem, + const char *description){} +void AnnotateBenignRace(const char *file, int line, + const volatile void *mem, + const char *description){} +void AnnotateBenignRaceSized(const char *file, int line, + const volatile void *mem, + long size, + const char *description) {} +void AnnotateMutexIsUsedAsCondVar(const char *file, int line, + const volatile void *mu){} +void AnnotateTraceMemory(const char *file, int line, + const volatile void *arg){} +void AnnotateThreadName(const char *file, int line, + const char *name){} +void AnnotateIgnoreReadsBegin(const char *file, int line){} +void AnnotateIgnoreReadsEnd(const char *file, int line){} +void AnnotateIgnoreWritesBegin(const char *file, int line){} +void AnnotateIgnoreWritesEnd(const char *file, int line){} +void AnnotateEnableRaceDetection(const char *file, int line, int enable){} +void AnnotateNoOp(const char *file, int line, + const volatile void *arg){} +void AnnotateFlushState(const char *file, int line){} + +#endif /* DYNAMIC_ANNOTATIONS_ENABLED == 1 */ + +static int GetRunningOnValgrind(void) { +#ifdef RUNNING_ON_VALGRIND + if (RUNNING_ON_VALGRIND) return 1; +#endif + // TODO(csilvers): use GetenvBeforeMain() instead? Will need to + // change it to be extern "C". + char *running_on_valgrind_str = getenv("RUNNING_ON_VALGRIND"); + if (running_on_valgrind_str) { + return strcmp(running_on_valgrind_str, "0") != 0; + } + return 0; +} + +/* See the comments in dynamic_annotations.h */ +int RunningOnValgrind(void) { + static volatile int running_on_valgrind = -1; + /* C doesn't have thread-safe initialization of statics, and we + don't want to depend on pthread_once here, so hack it. */ + int local_running_on_valgrind = running_on_valgrind; + if (local_running_on_valgrind == -1) + running_on_valgrind = local_running_on_valgrind = GetRunningOnValgrind(); + return local_running_on_valgrind; +} diff --git a/third_party/tcmalloc/chromium/src/base/dynamic_annotations.cc b/third_party/tcmalloc/chromium/src/base/dynamic_annotations.cc deleted file mode 100644 index c8bbcd7..0000000 --- a/third_party/tcmalloc/chromium/src/base/dynamic_annotations.cc +++ /dev/null @@ -1,110 +0,0 @@ -/* Copyright (c) 2008, Google Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following disclaimer - * in the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Google Inc. nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * --- - * Author: Kostya Serebryany - */ - -#include <config.h> -#include <stdlib.h> -#include <string.h> - -#include "base/dynamic_annotations.h" -#include "base/sysinfo.h" - -// Each function is empty and called (via a macro) only in debug mode. -// The arguments are captured by dynamic tools at runtime. - -extern "C" void AnnotateRWLockCreate(const char *file, int line, - const volatile void *lock){} -extern "C" void AnnotateRWLockDestroy(const char *file, int line, - const volatile void *lock){} -extern "C" void AnnotateRWLockAcquired(const char *file, int line, - const volatile void *lock, long is_w){} -extern "C" void AnnotateRWLockReleased(const char *file, int line, - const volatile void *lock, long is_w){} -extern "C" void AnnotateCondVarWait(const char *file, int line, - const volatile void *cv, - const volatile void *lock){} -extern "C" void AnnotateCondVarSignal(const char *file, int line, - const volatile void *cv){} -extern "C" void AnnotateCondVarSignalAll(const char *file, int line, - const volatile void *cv){} -extern "C" void AnnotatePublishMemoryRange(const char *file, int line, - const volatile void *address, - long size){} -extern "C" void AnnotateUnpublishMemoryRange(const char *file, int line, - const volatile void *address, - long size){} -extern "C" void AnnotatePCQCreate(const char *file, int line, - const volatile void *pcq){} -extern "C" void AnnotatePCQDestroy(const char *file, int line, - const volatile void *pcq){} -extern "C" void AnnotatePCQPut(const char *file, int line, - const volatile void *pcq){} -extern "C" void AnnotatePCQGet(const char *file, int line, - const volatile void *pcq){} -extern "C" void AnnotateNewMemory(const char *file, int line, - const volatile void *mem, - long size){} -extern "C" void AnnotateExpectRace(const char *file, int line, - const volatile void *mem, - const char *description){} -extern "C" void AnnotateBenignRace(const char *file, int line, - const volatile void *mem, - const char *description){} -extern "C" void AnnotateMutexIsUsedAsCondVar(const char *file, int line, - const volatile void *mu){} -extern "C" void AnnotateTraceMemory(const char *file, int line, - const volatile void *arg){} -extern "C" void AnnotateThreadName(const char *file, int line, - const char *name){} -extern "C" void AnnotateIgnoreReadsBegin(const char *file, int line){} -extern "C" void AnnotateIgnoreReadsEnd(const char *file, int line){} -extern "C" void AnnotateIgnoreWritesBegin(const char *file, int line){} -extern "C" void AnnotateIgnoreWritesEnd(const char *file, int line){} -extern "C" void AnnotateNoOp(const char *file, int line, - const volatile void *arg){} - -static int GetRunningOnValgrind() { - const char *running_on_valgrind_str = GetenvBeforeMain("RUNNING_ON_VALGRIND"); - if (running_on_valgrind_str) { - return strcmp(running_on_valgrind_str, "0") != 0; - } - return 0; -} - -// When running under valgrind, this function will be intercepted -// and a non-zero value will be returned. -// Some valgrind-based tools (e.g. callgrind) do not intercept functions, -// so we also read environment variable. -extern "C" int RunningOnValgrind() { - static int running_on_valgrind = GetRunningOnValgrind(); - return running_on_valgrind; -} diff --git a/third_party/tcmalloc/chromium/src/base/dynamic_annotations.h b/third_party/tcmalloc/chromium/src/base/dynamic_annotations.h index a2a268f..dae1a14 100644 --- a/third_party/tcmalloc/chromium/src/base/dynamic_annotations.h +++ b/third_party/tcmalloc/chromium/src/base/dynamic_annotations.h @@ -1,10 +1,10 @@ /* Copyright (c) 2008, Google Inc. * All rights reserved. - * + * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: - * + * * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above @@ -14,7 +14,7 @@ * * Neither the name of Google Inc. nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. - * + * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR @@ -31,445 +31,471 @@ * Author: Kostya Serebryany */ -// This file defines dynamic annotations for use with dynamic analysis -// tool such as valgrind, PIN, etc. -// -// Dynamic annotation is a source code annotation that affects -// the generated code (that is, the annotation is not a comment). -// Each such annotation is attached to a particular -// instruction and/or to a particular object (address) in the program. -// -// The annotations that should be used by users are macros in all upper-case -// (e.g., ANNOTATE_NEW_MEMORY). -// -// Actual implementation of these macros may differ depending on the -// dynamic analysis tool being used. -// -// This file supports the following dynamic analysis tools: -// - None (NDEBUG is defined). -// Macros are defined empty. -// - Helgrind (NDEBUG is not defined). -// Macros are defined as calls to non-inlinable empty functions -// that are intercepted by helgrind. -// +/* This file defines dynamic annotations for use with dynamic analysis + tool such as valgrind, PIN, etc. + + Dynamic annotation is a source code annotation that affects + the generated code (that is, the annotation is not a comment). + Each such annotation is attached to a particular + instruction and/or to a particular object (address) in the program. + + The annotations that should be used by users are macros in all upper-case + (e.g., ANNOTATE_NEW_MEMORY). + + Actual implementation of these macros may differ depending on the + dynamic analysis tool being used. + + See http://code.google.com/p/data-race-test/ for more information. + + This file supports the following dynamic analysis tools: + - None (DYNAMIC_ANNOTATIONS_ENABLED is not defined or zero). + Macros are defined empty. + - ThreadSanitizer, Helgrind, DRD (DYNAMIC_ANNOTATIONS_ENABLED is 1). + Macros are defined as calls to non-inlinable empty functions + that are intercepted by Valgrind. */ + #ifndef BASE_DYNAMIC_ANNOTATIONS_H_ #define BASE_DYNAMIC_ANNOTATIONS_H_ -#include "base/thread_annotations.h" - -// All the annotation macros are in effect only in debug mode. -#ifndef NDEBUG - - // ------------------------------------------------------------- - // Annotations useful when implementing condition variables such as CondVar, - // using conditional critical sections (Await/LockWhen) and when constructing - // user-defined synchronization mechanisms. - // - // The annotations ANNOTATE_HAPPENS_BEFORE() and ANNOTATE_HAPPENS_AFTER() can - // be used to define happens-before arcs in user-defined synchronization - // mechanisms: the race detector will infer an arc from the former to the - // latter when they share the same argument pointer. - // - // Example 1 (reference counting): - // - // void Unref() { - // ANNOTATE_HAPPENS_BEFORE(&refcount_); - // if (AtomicDecrementByOne(&refcount_) == 0) { - // ANNOTATE_HAPPENS_AFTER(&refcount_); - // delete this; - // } - // } - // - // Example 2 (message queue): - // - // void MyQueue::Put(Type *e) { - // MutexLock lock(&mu_); - // ANNOTATE_HAPPENS_BEFORE(e); - // PutElementIntoMyQueue(e); - // } - // - // Type *MyQueue::Get() { - // MutexLock lock(&mu_); - // Type *e = GetElementFromMyQueue(); - // ANNOTATE_HAPPENS_AFTER(e); - // return e; - // } - // - // Note: when possible, please use the existing reference counting and message - // queue implementations instead of inventing new ones. - - // Report that wait on the condition variable at address "cv" has succeeded - // and the lock at address "lock" is held. +#ifndef DYNAMIC_ANNOTATIONS_ENABLED +# define DYNAMIC_ANNOTATIONS_ENABLED 0 +#endif + +#if DYNAMIC_ANNOTATIONS_ENABLED != 0 + + /* ------------------------------------------------------------- + Annotations useful when implementing condition variables such as CondVar, + using conditional critical sections (Await/LockWhen) and when constructing + user-defined synchronization mechanisms. + + The annotations ANNOTATE_HAPPENS_BEFORE() and ANNOTATE_HAPPENS_AFTER() can + be used to define happens-before arcs in user-defined synchronization + mechanisms: the race detector will infer an arc from the former to the + latter when they share the same argument pointer. + + Example 1 (reference counting): + + void Unref() { + ANNOTATE_HAPPENS_BEFORE(&refcount_); + if (AtomicDecrementByOne(&refcount_) == 0) { + ANNOTATE_HAPPENS_AFTER(&refcount_); + delete this; + } + } + + Example 2 (message queue): + + void MyQueue::Put(Type *e) { + MutexLock lock(&mu_); + ANNOTATE_HAPPENS_BEFORE(e); + PutElementIntoMyQueue(e); + } + + Type *MyQueue::Get() { + MutexLock lock(&mu_); + Type *e = GetElementFromMyQueue(); + ANNOTATE_HAPPENS_AFTER(e); + return e; + } + + Note: when possible, please use the existing reference counting and message + queue implementations instead of inventing new ones. */ + + /* Report that wait on the condition variable at address "cv" has succeeded + and the lock at address "lock" is held. */ #define ANNOTATE_CONDVAR_LOCK_WAIT(cv, lock) \ AnnotateCondVarWait(__FILE__, __LINE__, cv, lock) - // Report that wait on the condition variable at "cv" has succeeded. Variant - // w/o lock. + /* Report that wait on the condition variable at "cv" has succeeded. Variant + w/o lock. */ #define ANNOTATE_CONDVAR_WAIT(cv) \ AnnotateCondVarWait(__FILE__, __LINE__, cv, NULL) - // Report that we are about to signal on the condition variable at address - // "cv". + /* Report that we are about to signal on the condition variable at address + "cv". */ #define ANNOTATE_CONDVAR_SIGNAL(cv) \ AnnotateCondVarSignal(__FILE__, __LINE__, cv) - // Report that we are about to signal_all on the condition variable at "cv". + /* Report that we are about to signal_all on the condition variable at "cv". */ #define ANNOTATE_CONDVAR_SIGNAL_ALL(cv) \ AnnotateCondVarSignalAll(__FILE__, __LINE__, cv) - // Annotations for user-defined synchronization mechanisms. + /* Annotations for user-defined synchronization mechanisms. */ #define ANNOTATE_HAPPENS_BEFORE(obj) ANNOTATE_CONDVAR_SIGNAL(obj) #define ANNOTATE_HAPPENS_AFTER(obj) ANNOTATE_CONDVAR_WAIT(obj) - // Report that the bytes in the range [pointer, pointer+size) are about - // to be published safely. The race checker will create a happens-before - // arc from the call ANNOTATE_PUBLISH_MEMORY_RANGE(pointer, size) to - // subsequent accesses to this memory. + /* Report that the bytes in the range [pointer, pointer+size) are about + to be published safely. The race checker will create a happens-before + arc from the call ANNOTATE_PUBLISH_MEMORY_RANGE(pointer, size) to + subsequent accesses to this memory. + Note: this annotation may not work properly if the race detector uses + sampling, i.e. does not observe all memory accesses. + */ #define ANNOTATE_PUBLISH_MEMORY_RANGE(pointer, size) \ AnnotatePublishMemoryRange(__FILE__, __LINE__, pointer, size) - // Report that the bytes in the range [pointer, pointer+size) are not shared - // between threads any more and can be safely used by the current thread w/o - // synchronization. The race checker will create a happens-before arc from - // all previous accesses to this memory to this call. - // - // This annotation could be applied to complex objects, such as STL - // containers, with one condition: the accesses to the object itself - // and its internal data should not be separated with any synchronization. - // - // Example that works: - // - // map<int, int> the_map; - // void Thread1() { - // MutexLock lock(&mu); - // // Ok: accesses to the_map and its internal data is not separated by - // // synchronization. - // the_map[1]++; - // } - // void Thread2() { - // { - // MutexLock lock(&mu); - // ... - // // because of some reason we know that the_map will not be used by - // // other threads any more - // ANNOTATE_UNPUBLISH_MEMORY_RANGE(&the_map, sizeof(the_map)); - // } - // the_map->DoSomething(); - // } - // - // Example that does not work (due to the way happens-before arcs are - // represented in some race detectors): - // - // void Thread1() { - // MutexLock lock(&mu); - // int *guts_of_the_map = &(*the_map)[1]; - // // we have some synchronization between access to 'c' and its guts. - // // This will make ANNOTATE_UNPUBLISH_MEMORY_RANGE in Thread2 useless. - // some_other_lock_or_other_synchronization_utility.Lock(); - // (*guts_of_the_map)++; - // ... - // } - // - // void Thread1() { // same as above... + /* DEPRECATED. Don't use it. */ #define ANNOTATE_UNPUBLISH_MEMORY_RANGE(pointer, size) \ AnnotateUnpublishMemoryRange(__FILE__, __LINE__, pointer, size) - // This annotation should be used to annotate thread-safe swapping of - // containers. Required only when using hybrid (i.e. not pure happens-before) - // detectors. - // - // This annotation has the same limitation as ANNOTATE_UNPUBLISH_MEMORY_RANGE - // (see above). - // - // Example: - // map<int, int> the_map; - // void Thread1() { - // MutexLock lock(&mu); - // the_map[1]++; - // } - // void Thread2() { - // map<int,int> tmp; - // { - // MutexLock lock(&mu); - // the_map.swap(tmp); - // ANNOTATE_SWAP_MEMORY_RANGE(&the_map, sizeof(the_map)); - // } - // tmp->DoSomething(); - // } + /* DEPRECATED. Don't use it. */ #define ANNOTATE_SWAP_MEMORY_RANGE(pointer, size) \ do { \ ANNOTATE_UNPUBLISH_MEMORY_RANGE(pointer, size); \ ANNOTATE_PUBLISH_MEMORY_RANGE(pointer, size); \ } while (0) - // Instruct the tool to create a happens-before arc between mu->Unlock() and - // mu->Lock(). This annotation may slow down the race detector and hide real - // races. Normally it is used only when it would be difficult to annotate each - // of the mutex's critical sections individually using the annotations above. - // This annotation makes sense only for hybrid race detectors. For pure - // happens-before detectors this is a no-op. For more details see - // http://code.google.com/p/data-race-test/wiki/PureHappensBeforeVsHybrid . + /* Instruct the tool to create a happens-before arc between mu->Unlock() and + mu->Lock(). This annotation may slow down the race detector and hide real + races. Normally it is used only when it would be difficult to annotate each + of the mutex's critical sections individually using the annotations above. + This annotation makes sense only for hybrid race detectors. For pure + happens-before detectors this is a no-op. For more details see + http://code.google.com/p/data-race-test/wiki/PureHappensBeforeVsHybrid . */ #define ANNOTATE_PURE_HAPPENS_BEFORE_MUTEX(mu) \ AnnotateMutexIsUsedAsCondVar(__FILE__, __LINE__, mu) - // Deprecated. Use ANNOTATE_PURE_HAPPENS_BEFORE_MUTEX. + /* Deprecated. Use ANNOTATE_PURE_HAPPENS_BEFORE_MUTEX. */ #define ANNOTATE_MUTEX_IS_USED_AS_CONDVAR(mu) \ AnnotateMutexIsUsedAsCondVar(__FILE__, __LINE__, mu) - // ------------------------------------------------------------- - // Annotations useful when defining memory allocators, or when memory that - // was protected in one way starts to be protected in another. + /* ------------------------------------------------------------- + Annotations useful when defining memory allocators, or when memory that + was protected in one way starts to be protected in another. */ - // Report that a new memory at "address" of size "size" has been allocated. - // This might be used when the memory has been retrieved from a free list and - // is about to be reused, or when a the locking discipline for a variable - // changes. + /* Report that a new memory at "address" of size "size" has been allocated. + This might be used when the memory has been retrieved from a free list and + is about to be reused, or when a the locking discipline for a variable + changes. */ #define ANNOTATE_NEW_MEMORY(address, size) \ AnnotateNewMemory(__FILE__, __LINE__, address, size) - // ------------------------------------------------------------- - // Annotations useful when defining FIFO queues that transfer data between - // threads. + /* ------------------------------------------------------------- + Annotations useful when defining FIFO queues that transfer data between + threads. */ - // Report that the producer-consumer queue (such as ProducerConsumerQueue) at - // address "pcq" has been created. The ANNOTATE_PCQ_* annotations - // should be used only for FIFO queues. For non-FIFO queues use - // ANNOTATE_HAPPENS_BEFORE (for put) and ANNOTATE_HAPPENS_AFTER (for get). + /* Report that the producer-consumer queue (such as ProducerConsumerQueue) at + address "pcq" has been created. The ANNOTATE_PCQ_* annotations + should be used only for FIFO queues. For non-FIFO queues use + ANNOTATE_HAPPENS_BEFORE (for put) and ANNOTATE_HAPPENS_AFTER (for get). */ #define ANNOTATE_PCQ_CREATE(pcq) \ AnnotatePCQCreate(__FILE__, __LINE__, pcq) - // Report that the queue at address "pcq" is about to be destroyed. + /* Report that the queue at address "pcq" is about to be destroyed. */ #define ANNOTATE_PCQ_DESTROY(pcq) \ AnnotatePCQDestroy(__FILE__, __LINE__, pcq) - // Report that we are about to put an element into a FIFO queue at address - // "pcq". + /* Report that we are about to put an element into a FIFO queue at address + "pcq". */ #define ANNOTATE_PCQ_PUT(pcq) \ AnnotatePCQPut(__FILE__, __LINE__, pcq) - // Report that we've just got an element from a FIFO queue at address "pcq". + /* Report that we've just got an element from a FIFO queue at address "pcq". */ #define ANNOTATE_PCQ_GET(pcq) \ AnnotatePCQGet(__FILE__, __LINE__, pcq) - // ------------------------------------------------------------- - // Annotations that suppress errors. It is usually better to express the - // program's synchronization using the other annotations, but these can - // be used when all else fails. - - // Report that we may have a benign race on at "address". - // Insert at the point where "address" has been allocated, preferably close - // to the point where the race happens. - // See also ANNOTATE_BENIGN_RACE_STATIC. - #define ANNOTATE_BENIGN_RACE(address, description) \ - AnnotateBenignRace(__FILE__, __LINE__, address, description) - - // Request the analysis tool to ignore all reads in the current thread - // until ANNOTATE_IGNORE_READS_END is called. - // Useful to ignore intentional racey reads, while still checking - // other reads and all writes. - // See also ANNOTATE_UNPROTECTED_READ. + /* ------------------------------------------------------------- + Annotations that suppress errors. It is usually better to express the + program's synchronization using the other annotations, but these can + be used when all else fails. */ + + /* Report that we may have a benign race at "pointer", with size + "sizeof(*(pointer))". "pointer" must be a non-void* pointer. Insert at the + point where "pointer" has been allocated, preferably close to the point + where the race happens. See also ANNOTATE_BENIGN_RACE_STATIC. */ + #define ANNOTATE_BENIGN_RACE(pointer, description) \ + AnnotateBenignRaceSized(__FILE__, __LINE__, pointer, \ + sizeof(*(pointer)), description) + + /* Same as ANNOTATE_BENIGN_RACE(address, description), but applies to + the memory range [address, address+size). */ + #define ANNOTATE_BENIGN_RACE_SIZED(address, size, description) \ + AnnotateBenignRaceSized(__FILE__, __LINE__, address, size, description) + + /* Request the analysis tool to ignore all reads in the current thread + until ANNOTATE_IGNORE_READS_END is called. + Useful to ignore intentional racey reads, while still checking + other reads and all writes. + See also ANNOTATE_UNPROTECTED_READ. */ #define ANNOTATE_IGNORE_READS_BEGIN() \ AnnotateIgnoreReadsBegin(__FILE__, __LINE__) - // Stop ignoring reads. + /* Stop ignoring reads. */ #define ANNOTATE_IGNORE_READS_END() \ AnnotateIgnoreReadsEnd(__FILE__, __LINE__) - // Similar to ANNOTATE_IGNORE_READS_BEGIN, but ignore writes. + /* Similar to ANNOTATE_IGNORE_READS_BEGIN, but ignore writes. */ #define ANNOTATE_IGNORE_WRITES_BEGIN() \ AnnotateIgnoreWritesBegin(__FILE__, __LINE__) - // Stop ignoring writes. + /* Stop ignoring writes. */ #define ANNOTATE_IGNORE_WRITES_END() \ AnnotateIgnoreWritesEnd(__FILE__, __LINE__) - // Start ignoring all memory accesses (reads and writes). + /* Start ignoring all memory accesses (reads and writes). */ #define ANNOTATE_IGNORE_READS_AND_WRITES_BEGIN() \ do {\ ANNOTATE_IGNORE_READS_BEGIN();\ ANNOTATE_IGNORE_WRITES_BEGIN();\ }while(0)\ - // Stop ignoring all memory accesses. + /* Stop ignoring all memory accesses. */ #define ANNOTATE_IGNORE_READS_AND_WRITES_END() \ do {\ ANNOTATE_IGNORE_WRITES_END();\ ANNOTATE_IGNORE_READS_END();\ }while(0)\ - // ------------------------------------------------------------- - // Annotations useful for debugging. + /* Enable (enable!=0) or disable (enable==0) race detection for all threads. + This annotation could be useful if you want to skip expensive race analysis + during some period of program execution, e.g. during initialization. */ + #define ANNOTATE_ENABLE_RACE_DETECTION(enable) \ + AnnotateEnableRaceDetection(__FILE__, __LINE__, enable) - // Request to trace every access to "address". + /* ------------------------------------------------------------- + Annotations useful for debugging. */ + + /* Request to trace every access to "address". */ #define ANNOTATE_TRACE_MEMORY(address) \ AnnotateTraceMemory(__FILE__, __LINE__, address) - // Report the current thread name to a race detector. + /* Report the current thread name to a race detector. */ #define ANNOTATE_THREAD_NAME(name) \ AnnotateThreadName(__FILE__, __LINE__, name) - // ------------------------------------------------------------- - // Annotations useful when implementing locks. They are not - // normally needed by modules that merely use locks. - // The "lock" argument is a pointer to the lock object. + /* ------------------------------------------------------------- + Annotations useful when implementing locks. They are not + normally needed by modules that merely use locks. + The "lock" argument is a pointer to the lock object. */ - // Report that a lock has been created at address "lock". + /* Report that a lock has been created at address "lock". */ #define ANNOTATE_RWLOCK_CREATE(lock) \ AnnotateRWLockCreate(__FILE__, __LINE__, lock) - // Report that the lock at address "lock" is about to be destroyed. + /* Report that the lock at address "lock" is about to be destroyed. */ #define ANNOTATE_RWLOCK_DESTROY(lock) \ AnnotateRWLockDestroy(__FILE__, __LINE__, lock) - // Report that the lock at address "lock" has been acquired. - // is_w=1 for writer lock, is_w=0 for reader lock. + /* Report that the lock at address "lock" has been acquired. + is_w=1 for writer lock, is_w=0 for reader lock. */ #define ANNOTATE_RWLOCK_ACQUIRED(lock, is_w) \ AnnotateRWLockAcquired(__FILE__, __LINE__, lock, is_w) - // Report that the lock at address "lock" is about to be released. + /* Report that the lock at address "lock" is about to be released. */ #define ANNOTATE_RWLOCK_RELEASED(lock, is_w) \ AnnotateRWLockReleased(__FILE__, __LINE__, lock, is_w) - // ------------------------------------------------------------- - // Annotations useful for testing race detectors. + /* ------------------------------------------------------------- + Annotations useful when implementing barriers. They are not + normally needed by modules that merely use barriers. + The "barrier" argument is a pointer to the barrier object. */ + + /* Report that the "barrier" has been initialized with initial "count". + If 'reinitialization_allowed' is true, initialization is allowed to happen + multiple times w/o calling barrier_destroy() */ + #define ANNOTATE_BARRIER_INIT(barrier, count, reinitialization_allowed) \ + AnnotateBarrierInit(__FILE__, __LINE__, barrier, count, \ + reinitialization_allowed) + + /* Report that we are about to enter barrier_wait("barrier"). */ + #define ANNOTATE_BARRIER_WAIT_BEFORE(barrier) \ + AnnotateBarrierWaitBefore(__FILE__, __LINE__, barrier) + + /* Report that we just exited barrier_wait("barrier"). */ + #define ANNOTATE_BARRIER_WAIT_AFTER(barrier) \ + AnnotateBarrierWaitAfter(__FILE__, __LINE__, barrier) + + /* Report that the "barrier" has been destroyed. */ + #define ANNOTATE_BARRIER_DESTROY(barrier) \ + AnnotateBarrierDestroy(__FILE__, __LINE__, barrier) + + /* ------------------------------------------------------------- + Annotations useful for testing race detectors. */ - // Report that we expect a race on the variable at "address". - // Use only in unit tests for a race detector. + /* Report that we expect a race on the variable at "address". + Use only in unit tests for a race detector. */ #define ANNOTATE_EXPECT_RACE(address, description) \ AnnotateExpectRace(__FILE__, __LINE__, address, description) - // A no-op. Insert where you like to test the interceptors. + /* A no-op. Insert where you like to test the interceptors. */ #define ANNOTATE_NO_OP(arg) \ AnnotateNoOp(__FILE__, __LINE__, arg) -#else // NDEBUG is defined - - #define ANNOTATE_RWLOCK_CREATE(lock) // empty - #define ANNOTATE_RWLOCK_DESTROY(lock) // empty - #define ANNOTATE_RWLOCK_ACQUIRED(lock, is_w) // empty - #define ANNOTATE_RWLOCK_RELEASED(lock, is_w) // empty - #define ANNOTATE_CONDVAR_LOCK_WAIT(cv, lock) // empty - #define ANNOTATE_CONDVAR_WAIT(cv) // empty - #define ANNOTATE_CONDVAR_SIGNAL(cv) // empty - #define ANNOTATE_CONDVAR_SIGNAL_ALL(cv) // empty - #define ANNOTATE_HAPPENS_BEFORE(obj) // empty - #define ANNOTATE_HAPPENS_AFTER(obj) // empty - #define ANNOTATE_PUBLISH_MEMORY_RANGE(address, size) // empty - #define ANNOTATE_UNPUBLISH_MEMORY_RANGE(address, size) // empty - #define ANNOTATE_SWAP_MEMORY_RANGE(address, size) // empty - #define ANNOTATE_PCQ_CREATE(pcq) // empty - #define ANNOTATE_PCQ_DESTROY(pcq) // empty - #define ANNOTATE_PCQ_PUT(pcq) // empty - #define ANNOTATE_PCQ_GET(pcq) // empty - #define ANNOTATE_NEW_MEMORY(address, size) // empty - #define ANNOTATE_EXPECT_RACE(address, description) // empty - #define ANNOTATE_BENIGN_RACE(address, description) // empty - #define ANNOTATE_PURE_HAPPENS_BEFORE_MUTEX(mu) // empty - #define ANNOTATE_MUTEX_IS_USED_AS_CONDVAR(mu) // empty - #define ANNOTATE_TRACE_MEMORY(arg) // empty - #define ANNOTATE_THREAD_NAME(name) // empty - #define ANNOTATE_IGNORE_READS_BEGIN() // empty - #define ANNOTATE_IGNORE_READS_END() // empty - #define ANNOTATE_IGNORE_WRITES_BEGIN() // empty - #define ANNOTATE_IGNORE_WRITES_END() // empty - #define ANNOTATE_IGNORE_READS_AND_WRITES_BEGIN() // empty - #define ANNOTATE_IGNORE_READS_AND_WRITES_END() // empty - #define ANNOTATE_NO_OP(arg) // empty - -#endif // NDEBUG - -// Use the macros above rather than using these functions directly. -extern "C" void AnnotateRWLockCreate(const char *file, int line, - const volatile void *lock); -extern "C" void AnnotateRWLockDestroy(const char *file, int line, - const volatile void *lock); -extern "C" void AnnotateRWLockAcquired(const char *file, int line, - const volatile void *lock, long is_w); -extern "C" void AnnotateRWLockReleased(const char *file, int line, - const volatile void *lock, long is_w); -extern "C" void AnnotateCondVarWait(const char *file, int line, - const volatile void *cv, - const volatile void *lock); -extern "C" void AnnotateCondVarSignal(const char *file, int line, - const volatile void *cv); -extern "C" void AnnotateCondVarSignalAll(const char *file, int line, - const volatile void *cv); -extern "C" void AnnotatePublishMemoryRange(const char *file, int line, - const volatile void *address, - long size); -extern "C" void AnnotateUnpublishMemoryRange(const char *file, int line, - const volatile void *address, - long size); -extern "C" void AnnotatePCQCreate(const char *file, int line, - const volatile void *pcq); -extern "C" void AnnotatePCQDestroy(const char *file, int line, - const volatile void *pcq); -extern "C" void AnnotatePCQPut(const char *file, int line, - const volatile void *pcq); -extern "C" void AnnotatePCQGet(const char *file, int line, - const volatile void *pcq); -extern "C" void AnnotateNewMemory(const char *file, int line, + /* Force the race detector to flush its state. The actual effect depends on + * the implementation of the detector. */ + #define ANNOTATE_FLUSH_STATE() \ + AnnotateFlushState(__FILE__, __LINE__) + + +#else /* DYNAMIC_ANNOTATIONS_ENABLED == 0 */ + + #define ANNOTATE_RWLOCK_CREATE(lock) /* empty */ + #define ANNOTATE_RWLOCK_DESTROY(lock) /* empty */ + #define ANNOTATE_RWLOCK_ACQUIRED(lock, is_w) /* empty */ + #define ANNOTATE_RWLOCK_RELEASED(lock, is_w) /* empty */ + #define ANNOTATE_BARRIER_INIT(barrier, count, reinitialization_allowed) /* */ + #define ANNOTATE_BARRIER_WAIT_BEFORE(barrier) /* empty */ + #define ANNOTATE_BARRIER_WAIT_AFTER(barrier) /* empty */ + #define ANNOTATE_BARRIER_DESTROY(barrier) /* empty */ + #define ANNOTATE_CONDVAR_LOCK_WAIT(cv, lock) /* empty */ + #define ANNOTATE_CONDVAR_WAIT(cv) /* empty */ + #define ANNOTATE_CONDVAR_SIGNAL(cv) /* empty */ + #define ANNOTATE_CONDVAR_SIGNAL_ALL(cv) /* empty */ + #define ANNOTATE_HAPPENS_BEFORE(obj) /* empty */ + #define ANNOTATE_HAPPENS_AFTER(obj) /* empty */ + #define ANNOTATE_PUBLISH_MEMORY_RANGE(address, size) /* empty */ + #define ANNOTATE_UNPUBLISH_MEMORY_RANGE(address, size) /* empty */ + #define ANNOTATE_SWAP_MEMORY_RANGE(address, size) /* empty */ + #define ANNOTATE_PCQ_CREATE(pcq) /* empty */ + #define ANNOTATE_PCQ_DESTROY(pcq) /* empty */ + #define ANNOTATE_PCQ_PUT(pcq) /* empty */ + #define ANNOTATE_PCQ_GET(pcq) /* empty */ + #define ANNOTATE_NEW_MEMORY(address, size) /* empty */ + #define ANNOTATE_EXPECT_RACE(address, description) /* empty */ + #define ANNOTATE_BENIGN_RACE(address, description) /* empty */ + #define ANNOTATE_BENIGN_RACE_SIZED(address, size, description) /* empty */ + #define ANNOTATE_PURE_HAPPENS_BEFORE_MUTEX(mu) /* empty */ + #define ANNOTATE_MUTEX_IS_USED_AS_CONDVAR(mu) /* empty */ + #define ANNOTATE_TRACE_MEMORY(arg) /* empty */ + #define ANNOTATE_THREAD_NAME(name) /* empty */ + #define ANNOTATE_IGNORE_READS_BEGIN() /* empty */ + #define ANNOTATE_IGNORE_READS_END() /* empty */ + #define ANNOTATE_IGNORE_WRITES_BEGIN() /* empty */ + #define ANNOTATE_IGNORE_WRITES_END() /* empty */ + #define ANNOTATE_IGNORE_READS_AND_WRITES_BEGIN() /* empty */ + #define ANNOTATE_IGNORE_READS_AND_WRITES_END() /* empty */ + #define ANNOTATE_ENABLE_RACE_DETECTION(enable) /* empty */ + #define ANNOTATE_NO_OP(arg) /* empty */ + #define ANNOTATE_FLUSH_STATE() /* empty */ + +#endif /* DYNAMIC_ANNOTATIONS_ENABLED */ + +/* Use the macros above rather than using these functions directly. */ +#ifdef __cplusplus +extern "C" { +#endif +void AnnotateRWLockCreate(const char *file, int line, + const volatile void *lock); +void AnnotateRWLockDestroy(const char *file, int line, + const volatile void *lock); +void AnnotateRWLockAcquired(const char *file, int line, + const volatile void *lock, long is_w); +void AnnotateRWLockReleased(const char *file, int line, + const volatile void *lock, long is_w); +void AnnotateBarrierInit(const char *file, int line, + const volatile void *barrier, long count, + long reinitialization_allowed); +void AnnotateBarrierWaitBefore(const char *file, int line, + const volatile void *barrier); +void AnnotateBarrierWaitAfter(const char *file, int line, + const volatile void *barrier); +void AnnotateBarrierDestroy(const char *file, int line, + const volatile void *barrier); +void AnnotateCondVarWait(const char *file, int line, + const volatile void *cv, + const volatile void *lock); +void AnnotateCondVarSignal(const char *file, int line, + const volatile void *cv); +void AnnotateCondVarSignalAll(const char *file, int line, + const volatile void *cv); +void AnnotatePublishMemoryRange(const char *file, int line, + const volatile void *address, + long size); +void AnnotateUnpublishMemoryRange(const char *file, int line, const volatile void *address, long size); -extern "C" void AnnotateExpectRace(const char *file, int line, - const volatile void *address, - const char *description); -extern "C" void AnnotateBenignRace(const char *file, int line, - const volatile void *address, - const char *description); -extern "C" void AnnotateMutexIsUsedAsCondVar(const char *file, int line, - const volatile void *mu); -extern "C" void AnnotateTraceMemory(const char *file, int line, - const volatile void *arg); -extern "C" void AnnotateThreadName(const char *file, int line, - const char *name); -extern "C" void AnnotateIgnoreReadsBegin(const char *file, int line); -extern "C" void AnnotateIgnoreReadsEnd(const char *file, int line); -extern "C" void AnnotateIgnoreWritesBegin(const char *file, int line); -extern "C" void AnnotateIgnoreWritesEnd(const char *file, int line); -extern "C" void AnnotateNoOp(const char *file, int line, - const volatile void *arg); - -#ifndef NDEBUG - - // ANNOTATE_UNPROTECTED_READ is the preferred way to annotate racey reads. - // - // Instead of doing - // ANNOTATE_IGNORE_READS_BEGIN(); - // ... = x; - // ANNOTATE_IGNORE_READS_END(); - // one can use - // ... = ANNOTATE_UNPROTECTED_READ(x); +void AnnotatePCQCreate(const char *file, int line, + const volatile void *pcq); +void AnnotatePCQDestroy(const char *file, int line, + const volatile void *pcq); +void AnnotatePCQPut(const char *file, int line, + const volatile void *pcq); +void AnnotatePCQGet(const char *file, int line, + const volatile void *pcq); +void AnnotateNewMemory(const char *file, int line, + const volatile void *address, + long size); +void AnnotateExpectRace(const char *file, int line, + const volatile void *address, + const char *description); +void AnnotateBenignRace(const char *file, int line, + const volatile void *address, + const char *description); +void AnnotateBenignRaceSized(const char *file, int line, + const volatile void *address, + long size, + const char *description); +void AnnotateMutexIsUsedAsCondVar(const char *file, int line, + const volatile void *mu); +void AnnotateTraceMemory(const char *file, int line, + const volatile void *arg); +void AnnotateThreadName(const char *file, int line, + const char *name); +void AnnotateIgnoreReadsBegin(const char *file, int line); +void AnnotateIgnoreReadsEnd(const char *file, int line); +void AnnotateIgnoreWritesBegin(const char *file, int line); +void AnnotateIgnoreWritesEnd(const char *file, int line); +void AnnotateEnableRaceDetection(const char *file, int line, int enable); +void AnnotateNoOp(const char *file, int line, + const volatile void *arg); +void AnnotateFlushState(const char *file, int line); + +/* Return non-zero value if running under valgrind. + + If "valgrind.h" is included into dynamic_annotations.c, + the regular valgrind mechanism will be used. + See http://valgrind.org/docs/manual/manual-core-adv.html about + RUNNING_ON_VALGRIND and other valgrind "client requests". + The file "valgrind.h" may be obtained by doing + svn co svn://svn.valgrind.org/valgrind/trunk/include + + If for some reason you can't use "valgrind.h" or want to fake valgrind, + there are two ways to make this function return non-zero: + - Use environment variable: export RUNNING_ON_VALGRIND=1 + - Make your tool intercept the function RunningOnValgrind() and + change its return value. + */ +int RunningOnValgrind(void); + +#ifdef __cplusplus +} +#endif + +#if DYNAMIC_ANNOTATIONS_ENABLED != 0 && defined(__cplusplus) + + /* ANNOTATE_UNPROTECTED_READ is the preferred way to annotate racey reads. + + Instead of doing + ANNOTATE_IGNORE_READS_BEGIN(); + ... = x; + ANNOTATE_IGNORE_READS_END(); + one can use + ... = ANNOTATE_UNPROTECTED_READ(x); */ template <class T> - inline T ANNOTATE_UNPROTECTED_READ(const volatile T &x) - NO_THREAD_SAFETY_ANALYSIS { + inline T ANNOTATE_UNPROTECTED_READ(const volatile T &x) { ANNOTATE_IGNORE_READS_BEGIN(); T res = x; ANNOTATE_IGNORE_READS_END(); return res; } - - // Apply ANNOTATE_BENIGN_RACE to a static variable. + /* Apply ANNOTATE_BENIGN_RACE_SIZED to a static variable. */ #define ANNOTATE_BENIGN_RACE_STATIC(static_var, description) \ namespace { \ class static_var ## _annotator { \ public: \ static_var ## _annotator() { \ - ANNOTATE_BENIGN_RACE(&static_var, \ + ANNOTATE_BENIGN_RACE_SIZED(&static_var, \ + sizeof(static_var), \ # static_var ": " description); \ } \ }; \ static static_var ## _annotator the ## static_var ## _annotator;\ } -#else // !NDEBUG +#else /* DYNAMIC_ANNOTATIONS_ENABLED == 0 */ #define ANNOTATE_UNPROTECTED_READ(x) (x) - #define ANNOTATE_BENIGN_RACE_STATIC(static_var, description) // empty - -#endif // !NDEBUG - -// Return non-zero value if running under valgrind. -extern "C" int RunningOnValgrind(); + #define ANNOTATE_BENIGN_RACE_STATIC(static_var, description) /* empty */ +#endif /* DYNAMIC_ANNOTATIONS_ENABLED */ -#endif // BASE_DYNAMIC_ANNOTATIONS_H_ +#endif /* BASE_DYNAMIC_ANNOTATIONS_H_ */ diff --git a/third_party/tcmalloc/chromium/src/base/low_level_alloc.cc b/third_party/tcmalloc/chromium/src/base/low_level_alloc.cc index 2bbce54..7ca3953a 100644 --- a/third_party/tcmalloc/chromium/src/base/low_level_alloc.cc +++ b/third_party/tcmalloc/chromium/src/base/low_level_alloc.cc @@ -210,8 +210,9 @@ static const intptr_t kMagicUnallocated = ~kMagicAllocated; namespace { class ArenaLock { public: - explicit ArenaLock(LowLevelAlloc::Arena *arena) : - left_(false), mask_valid_(false), arena_(arena) { + explicit ArenaLock(LowLevelAlloc::Arena *arena) + EXCLUSIVE_LOCK_FUNCTION(arena->mu) + : left_(false), mask_valid_(false), arena_(arena) { if ((arena->flags & LowLevelAlloc::kAsyncSignalSafe) != 0) { // We've decided not to support async-signal-safe arena use until // there a demonstrated need. Here's how one could do it though @@ -228,7 +229,7 @@ namespace { this->arena_->mu.Lock(); } ~ArenaLock() { RAW_CHECK(this->left_, "haven't left Arena region"); } - void Leave() { + void Leave() UNLOCK_FUNCTION(arena_->mu) { this->arena_->mu.Unlock(); #if 0 if (this->mask_valid_) { diff --git a/third_party/tcmalloc/chromium/src/base/vdso_support.cc b/third_party/tcmalloc/chromium/src/base/vdso_support.cc index ddaca37..fce7c2c 100644 --- a/third_party/tcmalloc/chromium/src/base/vdso_support.cc +++ b/third_party/tcmalloc/chromium/src/base/vdso_support.cc @@ -42,8 +42,8 @@ #include <fcntl.h> #include "base/atomicops.h" // for MemoryBarrier -#include "base/logging.h" #include "base/linux_syscall_support.h" +#include "base/logging.h" #include "base/dynamic_annotations.h" #include "base/basictypes.h" // for COMPILE_ASSERT diff --git a/third_party/tcmalloc/chromium/src/central_freelist.cc b/third_party/tcmalloc/chromium/src/central_freelist.cc index 674ff9b..5b7dfbb 100644 --- a/third_party/tcmalloc/chromium/src/central_freelist.cc +++ b/third_party/tcmalloc/chromium/src/central_freelist.cc @@ -266,8 +266,7 @@ void CentralFreeList::Populate() { Span* span; { SpinLockHolder h(Static::pageheap_lock()); - span = Static::pageheap()->New(npages); - if (span) Static::pageheap()->RegisterSizeClass(span, size_class_); + span = Static::pageheap()->New(npages, size_class_, kPageSize); } if (span == NULL) { MESSAGE("tcmalloc: allocation failed", npages << kPageShift); @@ -275,12 +274,6 @@ void CentralFreeList::Populate() { return; } ASSERT(span->length == npages); - // Cache sizeclass info eagerly. Locking is not necessary. - // (Instead of being eager, we could just replace any stale info - // about this span, but that seems to be no better in practice.) - for (int i = 0; i < npages; i++) { - Static::pageheap()->CacheSizeClass(span->start + i, size_class_); - } // Split the block into pieces and add to the free-list // TODO: coloring of objects to avoid cache conflicts? diff --git a/third_party/tcmalloc/chromium/src/common.h b/third_party/tcmalloc/chromium/src/common.h index 53a0a0b..f9557c9 100644 --- a/third_party/tcmalloc/chromium/src/common.h +++ b/third_party/tcmalloc/chromium/src/common.h @@ -62,6 +62,7 @@ static const size_t kPageSize = 1 << kPageShift; static const size_t kMaxSize = 8u * kPageSize; static const size_t kAlignment = 8; static const size_t kNumClasses = 61; +static const size_t kLargeSizeClass = 0; // Maximum length we allow a per-thread free-list to have before we // move objects from it into the corresponding central free-list. We diff --git a/third_party/tcmalloc/chromium/src/config.h.in b/third_party/tcmalloc/chromium/src/config.h.in index 1ad2642..49bbf0d 100644 --- a/third_party/tcmalloc/chromium/src/config.h.in +++ b/third_party/tcmalloc/chromium/src/config.h.in @@ -132,7 +132,7 @@ /* Define to 1 if you have the <sys/types.h> header file. */ #undef HAVE_SYS_TYPES_H -/* Define to 1 if you have the <sys/ucontext.h> header file. */ +/* <sys/ucontext.h> is broken on redhat 7 */ #undef HAVE_SYS_UCONTEXT_H /* Define to 1 if you have the <sys/wait.h> header file. */ @@ -150,6 +150,9 @@ /* Define to 1 if you have the <unwind.h> header file. */ #undef HAVE_UNWIND_H +/* Define to 1 if you have the <valgrind.h> header file. */ +#undef HAVE_VALGRIND_H + /* define if your compiler has __attribute__ */ #undef HAVE___ATTRIBUTE__ diff --git a/third_party/tcmalloc/chromium/src/config_linux.h b/third_party/tcmalloc/chromium/src/config_linux.h index 398f303..9786b3e 100644 --- a/third_party/tcmalloc/chromium/src/config_linux.h +++ b/third_party/tcmalloc/chromium/src/config_linux.h @@ -136,7 +136,7 @@ /* Define to 1 if compiler supports __thread */ #define HAVE_TLS 1 -/* Define to 1 if you have the <ucontext.h> header file. */ +/* <sys/ucontext.h> is broken on redhat 7 */ #define HAVE_UCONTEXT_H 1 /* Define to 1 if you have the <unistd.h> header file. */ @@ -145,6 +145,9 @@ /* Define to 1 if you have the <unwind.h> header file. */ #define HAVE_UNWIND_H 1 +/* Define to 1 if you have the <valgrind.h> header file. */ +#undef HAVE_VALGRIND_H + /* define if your compiler has __attribute__ */ #define HAVE___ATTRIBUTE__ 1 diff --git a/third_party/tcmalloc/chromium/src/config_win.h b/third_party/tcmalloc/chromium/src/config_win.h index 30daf4f..236bd6b 100644 --- a/third_party/tcmalloc/chromium/src/config_win.h +++ b/third_party/tcmalloc/chromium/src/config_win.h @@ -255,10 +255,12 @@ // --------------------------------------------------------------------- // Extra stuff not found in config.h.in -// This must be defined before the windows.h is included. It's needed -// for mutex.h, to give access to the TryLock method. +// This must be defined before the windows.h is included. We need at +// least 0x0400 for mutex.h to have access to TryLock, and at least +// 0x0501 for patch_functions.cc to have access to GetModuleHandleEx. +// (This latter is an optimization we could take out if need be.) #ifndef _WIN32_WINNT -# define _WIN32_WINNT 0x0400 +# define _WIN32_WINNT 0x0501 #endif // We want to make sure not to ever try to #include heap-checker.h diff --git a/third_party/tcmalloc/chromium/src/debugallocation.cc b/third_party/tcmalloc/chromium/src/debugallocation.cc index 1a9ddcb..949fbe9 100644 --- a/third_party/tcmalloc/chromium/src/debugallocation.cc +++ b/third_party/tcmalloc/chromium/src/debugallocation.cc @@ -1010,7 +1010,7 @@ static void *MemalignOverride(size_t align, size_t size, const void *caller) __THROW ATTRIBUTE_SECTION(google_malloc); -void* operator new(size_t size) +void* operator new(size_t size) throw (std::bad_alloc) ATTRIBUTE_SECTION(google_malloc); void* operator new(size_t size, const std::nothrow_t&) __THROW ATTRIBUTE_SECTION(google_malloc); @@ -1018,7 +1018,7 @@ void operator delete(void* p) __THROW ATTRIBUTE_SECTION(google_malloc); void operator delete(void* p, const std::nothrow_t&) __THROW ATTRIBUTE_SECTION(google_malloc); -void* operator new[](size_t size) +void* operator new[](size_t size) throw (std::bad_alloc) ATTRIBUTE_SECTION(google_malloc); void* operator new[](size_t size, const std::nothrow_t&) __THROW ATTRIBUTE_SECTION(google_malloc); @@ -1176,12 +1176,12 @@ extern "C" void* pvalloc(size_t size) __THROW { return p; } -extern "C" int mallopt(int cmd, int value) { +extern "C" int mallopt(int cmd, int value) __THROW { return BASE_MALLOPT(cmd, value); } #ifdef HAVE_STRUCT_MALLINFO -extern "C" struct mallinfo mallinfo(void) { +extern "C" struct mallinfo mallinfo(void) __THROW { return BASE_MALLINFO(); } #endif @@ -1239,7 +1239,7 @@ inline void* cpp_debug_alloc(size_t size, int new_type, bool nothrow) { } } -void* operator new(size_t size) { +void* operator new(size_t size) throw (std::bad_alloc) { void* ptr = cpp_debug_alloc(size, MallocBlock::kNewType, false); MallocHook::InvokeNewHook(ptr, size); if (ptr == NULL) { @@ -1259,7 +1259,8 @@ void operator delete(void* ptr) __THROW { DebugDeallocate(ptr, MallocBlock::kNewType); } -// Compilers use this, though I can't see how it differs from normal delete. +// Some STL implementations explicitly invoke this. +// It is completely equivalent to a normal delete (delete never throws). void operator delete(void* ptr, const std::nothrow_t&) __THROW { MallocHook::InvokeDeleteHook(ptr); DebugDeallocate(ptr, MallocBlock::kNewType); @@ -1269,7 +1270,7 @@ void operator delete(void* ptr, const std::nothrow_t&) __THROW { // Alloc/free stuff for debug operator new[] & friends -void* operator new[](size_t size) { +void* operator new[](size_t size) throw (std::bad_alloc) { void* ptr = cpp_debug_alloc(size, MallocBlock::kArrayNewType, false); MallocHook::InvokeNewHook(ptr, size); if (ptr == NULL) { @@ -1289,7 +1290,8 @@ void operator delete[](void* ptr) __THROW { DebugDeallocate(ptr, MallocBlock::kArrayNewType); } -// Compilers use this, though I can't see how it differs from normal delete. +// Some STL implementations explicitly invoke this. +// It is completely equivalent to a normal delete (delete never throws). void operator delete[](void* ptr, const std::nothrow_t&) __THROW { MallocHook::InvokeDeleteHook(ptr); DebugDeallocate(ptr, MallocBlock::kArrayNewType); @@ -1359,17 +1361,22 @@ class DebugMallocImplementation : public ParentImplementation { static DebugMallocImplementation debug_malloc_implementation; REGISTER_MODULE_INITIALIZER(debugallocation, { - MallocExtension::Register(&debug_malloc_implementation); - - // When the program exits, check all blocks still in the free queue for - // corruption. - atexit(DanglingWriteChecker); + // Either we or valgrind will control memory management. We + // register our extension if we're the winner. + if (RunningOnValgrind()) { + // Let Valgrind uses its own malloc (so don't register our extension). + } else { + MallocExtension::Register(&debug_malloc_implementation); + // When the program exits, check all blocks still in the free + // queue for corruption. + atexit(DanglingWriteChecker); + } }); #ifdef TCMALLOC_FOR_DEBUGALLOCATION // Redefine malloc_stats to use tcmalloc's implementation: -extern "C" void malloc_stats(void) { +extern "C" void malloc_stats(void) __THROW { do_malloc_stats(); } diff --git a/third_party/tcmalloc/chromium/src/google/heap-profiler.h b/third_party/tcmalloc/chromium/src/google/heap-profiler.h index 5efaf64..57cb97a 100644 --- a/third_party/tcmalloc/chromium/src/google/heap-profiler.h +++ b/third_party/tcmalloc/chromium/src/google/heap-profiler.h @@ -71,12 +71,13 @@ extern "C" { */ PERFTOOLS_DLL_DECL void HeapProfilerStart(const char* prefix); -/* Returns true if we are currently profiling the heap. This is true +/* Returns non-zero if we are currently profiling the heap. (Returns + * an int rather than a bool so it's usable from C.) This is true * between calls to HeapProfilerStart() and HeapProfilerStop(), and * also if the program has been run with HEAPPROFILER, or some other * way to turn on whole-program profiling. */ -bool IsHeapProfilerRunning(); +int IsHeapProfilerRunning(); /* Stop heap profiling. Can be restarted again with HeapProfilerStart(), * but the currently accumulated profiling information will be cleared. diff --git a/third_party/tcmalloc/chromium/src/google/profiler.h b/third_party/tcmalloc/chromium/src/google/profiler.h index 74b936f..a6883f4 100644 --- a/third_party/tcmalloc/chromium/src/google/profiler.h +++ b/third_party/tcmalloc/chromium/src/google/profiler.h @@ -108,13 +108,15 @@ struct ProfilerOptions { void *filter_in_thread_arg; }; -/* Start profiling and write profile info into fname. +/* Start profiling and write profile info into fname, discarding any + * existing profiling data in that file. * * This is equivalent to calling ProfilerStartWithOptions(fname, NULL). */ PERFTOOLS_DLL_DECL int ProfilerStart(const char* fname); -/* Start profiling and write profile into fname. +/* Start profiling and write profile into fname, discarding any + * existing profiling data in that file. * * The profiler is configured using the options given by 'options'. * Options which are not specified are given default values. diff --git a/third_party/tcmalloc/chromium/src/google/stacktrace.h b/third_party/tcmalloc/chromium/src/google/stacktrace.h index 8188ce3..fd186d6 100644 --- a/third_party/tcmalloc/chromium/src/google/stacktrace.h +++ b/third_party/tcmalloc/chromium/src/google/stacktrace.h @@ -49,23 +49,23 @@ // Skips the most recent "skip_count" stack frames (also skips the // frame generated for the "GetStackFrames" routine itself), and then // records the pc values for up to the next "max_depth" frames in -// "pcs", and the corresponding stack frame sizes in "sizes". Returns -// the number of values recorded in "pcs"/"sizes". +// "result", and the corresponding stack frame sizes in "sizes". +// Returns the number of values recorded in "result"/"sizes". // // Example: // main() { foo(); } // foo() { bar(); } // bar() { -// void* pcs[10]; +// void* result[10]; // int sizes[10]; -// int depth = GetStackFrames(pcs, sizes, 10, 1); +// int depth = GetStackFrames(result, sizes, 10, 1); // } // // The GetStackFrames call will skip the frame for "bar". It will // return 2 and will produce pc values that map to the following // procedures: -// pcs[0] foo -// pcs[1] main +// result[0] foo +// result[1] main // (Actually, there may be a few more entries after "main" to account for // startup procedures.) // And corresponding stack frame sizes will also be recorded: @@ -76,15 +76,15 @@ // be identified. // // This routine may return fewer stack frame entries than are -// available. Also note that "pcs" and "sizes" must both be non-NULL. -extern PERFTOOLS_DLL_DECL int GetStackFrames(void** pcs, int* sizes, int max_depth, +// available. Also note that "result" and "sizes" must both be non-NULL. +extern PERFTOOLS_DLL_DECL int GetStackFrames(void** result, int* sizes, int max_depth, int skip_count); // Same as above, but to be used from a signal handler. The "uc" parameter // should be the pointer to ucontext_t which was passed as the 3rd parameter // to sa_sigaction signal handler. It may help the unwinder to get a // better stack trace under certain conditions. The "uc" may safely be NULL. -extern PERFTOOLS_DLL_DECL int GetStackFramesWithContext(void** pcs, int* sizes, int max_depth, +extern PERFTOOLS_DLL_DECL int GetStackFramesWithContext(void** result, int* sizes, int max_depth, int skip_count, const void *uc); // This is similar to the GetStackFrames routine, except that it returns diff --git a/third_party/tcmalloc/chromium/src/google/tcmalloc.h.in b/third_party/tcmalloc/chromium/src/google/tcmalloc.h.in index e5c873d..fbb70ab 100644 --- a/third_party/tcmalloc/chromium/src/google/tcmalloc.h.in +++ b/third_party/tcmalloc/chromium/src/google/tcmalloc.h.in @@ -60,7 +60,8 @@ #endif #ifdef __cplusplus -#include <new> // for nothrow_t +#include <new> // for std::nothrow_t + extern "C" { #endif // Returns a human-readable version string. If major, minor, @@ -91,16 +92,15 @@ extern "C" { #ifdef __cplusplus PERFTOOLS_DLL_DECL int tc_set_new_mode(int flag) __THROW; PERFTOOLS_DLL_DECL void* tc_new(size_t size); - PERFTOOLS_DLL_DECL void tc_delete(void* p) __THROW; - PERFTOOLS_DLL_DECL void* tc_newarray(size_t size); - PERFTOOLS_DLL_DECL void tc_deletearray(void* p) __THROW; - PERFTOOLS_DLL_DECL void* tc_new_nothrow(size_t size, const std::nothrow_t&) __THROW; - PERFTOOLS_DLL_DECL void* tc_newarray_nothrow(size_t size, - const std::nothrow_t&) __THROW; + PERFTOOLS_DLL_DECL void tc_delete(void* p) __THROW; PERFTOOLS_DLL_DECL void tc_delete_nothrow(void* p, const std::nothrow_t&) __THROW; + PERFTOOLS_DLL_DECL void* tc_newarray(size_t size); + PERFTOOLS_DLL_DECL void* tc_newarray_nothrow(size_t size, + const std::nothrow_t&) __THROW; + PERFTOOLS_DLL_DECL void tc_deletearray(void* p) __THROW; PERFTOOLS_DLL_DECL void tc_deletearray_nothrow(void* p, const std::nothrow_t&) __THROW; } diff --git a/third_party/tcmalloc/chromium/src/heap-checker.cc b/third_party/tcmalloc/chromium/src/heap-checker.cc index 82a7adb..2779c97 100644 --- a/third_party/tcmalloc/chromium/src/heap-checker.cc +++ b/third_party/tcmalloc/chromium/src/heap-checker.cc @@ -159,6 +159,23 @@ DEFINE_bool(heap_check_test_pointer_alignment, "Set to true to check if the found leak can be due to " "use of unaligned pointers"); +// Alignment at which all pointers in memory are supposed to be located; +// use 1 if any alignment is ok. +// heap_check_test_pointer_alignment flag guides if we try the value of 1. +// The larger it can be, the lesser is the chance of missing real leaks. +// +// sizeof(void)* is correct. However gold (the new linker) has a bug where it +// sometimes places global pointers on 4-byte boundaries, even when pointers +// are 8 bytes long. While we are fixing the linker, degrade to 4-byte +// alignment on all targets. http://b/1226481 +// +static const size_t kPointerSourceAlignment = sizeof(void*); +DEFINE_int32(heap_check_pointer_source_alignment, + EnvToInt("HEAP_CHECK_POINTER_SOURCE_ALIGNMENT", + kPointerSourceAlignment), + "Alignment at which all pointers in memory are supposed to be " + "located. Use 1 if any alignment is ok."); + // A reasonable default to handle pointers inside of typical class objects: // Too low and we won't be able to traverse pointers to normally-used // nested objects and base parts of multiple-inherited objects. @@ -245,13 +262,6 @@ static bool constructor_heap_profiling = false; static const int heap_checker_info_level = 0; //---------------------------------------------------------------------- - -// Alignment at which all pointers in memory are supposed to be located; -// use 1 if any alignment is ok. -// heap_check_test_pointer_alignment flag guides if we try the value of 1. -// The larger it can be, the lesser is the chance of missing real leaks. -static const size_t kPointerSourceAlignment = sizeof(void*); - // Cancel our InitialMallocHook_* if present. static void CancelInitialMallocHooks(); // defined below @@ -484,7 +494,7 @@ HeapLeakChecker::Disabler::Disabler() { // in a thread-safe manner. int counter = get_thread_disable_counter(); set_thread_disable_counter(counter + 1); - RAW_VLOG(1, "Increasing thread disable counter to %d", counter + 1); + RAW_VLOG(10, "Increasing thread disable counter to %d", counter + 1); } HeapLeakChecker::Disabler::~Disabler() { @@ -492,7 +502,7 @@ HeapLeakChecker::Disabler::~Disabler() { RAW_DCHECK(counter > 0, ""); if (counter > 0) { set_thread_disable_counter(counter - 1); - RAW_VLOG(1, "Decreasing thread disable counter to %d", counter); + RAW_VLOG(10, "Decreasing thread disable counter to %d", counter); } else { RAW_VLOG(0, "Thread disable counter underflow : %d", counter); } @@ -525,7 +535,7 @@ static void NewHook(const void* ptr, size_t size) { if (ptr != NULL) { const int counter = get_thread_disable_counter(); const bool ignore = (counter > 0); - RAW_VLOG(7, "Recording Alloc: %p of %"PRIuS "; %d", ptr, size, + RAW_VLOG(16, "Recording Alloc: %p of %"PRIuS "; %d", ptr, size, int(counter)); { SpinLockHolder l(&heap_checker_lock); if (size > max_heap_object_size) max_heap_object_size = size; @@ -540,17 +550,17 @@ static void NewHook(const void* ptr, size_t size) { } } } - RAW_VLOG(8, "Alloc Recorded: %p of %"PRIuS"", ptr, size); + RAW_VLOG(17, "Alloc Recorded: %p of %"PRIuS"", ptr, size); } } static void DeleteHook(const void* ptr) { if (ptr != NULL) { - RAW_VLOG(7, "Recording Free %p", ptr); + RAW_VLOG(16, "Recording Free %p", ptr); { SpinLockHolder l(&heap_checker_lock); if (heap_checker_on) heap_profile->RecordFree(ptr); } - RAW_VLOG(8, "Free Recorded: %p", ptr); + RAW_VLOG(17, "Free Recorded: %p", ptr); } } @@ -584,7 +594,7 @@ static StackDirection stack_direction = UNKNOWN_DIRECTION; static void RegisterStackLocked(const void* top_ptr) { RAW_DCHECK(heap_checker_lock.IsHeld(), ""); RAW_DCHECK(MemoryRegionMap::LockIsHeld(), ""); - RAW_VLOG(1, "Thread stack at %p", top_ptr); + RAW_VLOG(10, "Thread stack at %p", top_ptr); uintptr_t top = AsInt(top_ptr); stack_tops->insert(top); // add for later use @@ -598,12 +608,12 @@ static void RegisterStackLocked(const void* top_ptr) { if (MemoryRegionMap::FindAndMarkStackRegion(top, ®ion)) { // Make the proper portion of the stack live: if (stack_direction == GROWS_TOWARDS_LOW_ADDRESSES) { - RAW_VLOG(2, "Live stack at %p of %"PRIuPTR" bytes", + RAW_VLOG(11, "Live stack at %p of %"PRIuPTR" bytes", top_ptr, region.end_addr - top); live_objects->push_back(AllocObject(top_ptr, region.end_addr - top, THREAD_DATA)); } else { // GROWS_TOWARDS_HIGH_ADDRESSES - RAW_VLOG(2, "Live stack at %p of %"PRIuPTR" bytes", + RAW_VLOG(11, "Live stack at %p of %"PRIuPTR" bytes", AsPtr(region.start_addr), top - region.start_addr); live_objects->push_back(AllocObject(AsPtr(region.start_addr), @@ -619,7 +629,7 @@ static void RegisterStackLocked(const void* top_ptr) { uintptr_t start = AsInt(span->ptr); uintptr_t end = start + span->size; if (start <= top && top < end) { - RAW_VLOG(2, "Stack at %p is inside /proc/self/maps chunk %p..%p", + RAW_VLOG(11, "Stack at %p is inside /proc/self/maps chunk %p..%p", top_ptr, AsPtr(start), AsPtr(end)); // Shrink start..end region by chopping away the memory regions in // MemoryRegionMap that land in it to undo merging of regions @@ -640,17 +650,17 @@ static void RegisterStackLocked(const void* top_ptr) { } } if (stack_start != start || stack_end != end) { - RAW_VLOG(2, "Stack at %p is actually inside memory chunk %p..%p", + RAW_VLOG(11, "Stack at %p is actually inside memory chunk %p..%p", top_ptr, AsPtr(stack_start), AsPtr(stack_end)); } // Make the proper portion of the stack live: if (stack_direction == GROWS_TOWARDS_LOW_ADDRESSES) { - RAW_VLOG(2, "Live stack at %p of %"PRIuPTR" bytes", + RAW_VLOG(11, "Live stack at %p of %"PRIuPTR" bytes", top_ptr, stack_end - top); live_objects->push_back( AllocObject(top_ptr, stack_end - top, THREAD_DATA)); } else { // GROWS_TOWARDS_HIGH_ADDRESSES - RAW_VLOG(2, "Live stack at %p of %"PRIuPTR" bytes", + RAW_VLOG(11, "Live stack at %p of %"PRIuPTR" bytes", AsPtr(stack_start), top - stack_start); live_objects->push_back( AllocObject(AsPtr(stack_start), top - stack_start, THREAD_DATA)); @@ -723,14 +733,14 @@ static void MakeDisabledLiveCallbackLocked( // and the rest of the region where the stack lives can well // contain outdated stack variables which are not live anymore, // hence should not be treated as such. - RAW_VLOG(2, "Not %s-disabling %"PRIuS" bytes at %p" + RAW_VLOG(11, "Not %s-disabling %"PRIuS" bytes at %p" ": have stack inside: %p", (stack_disable ? "stack" : "range"), info.object_size, ptr, AsPtr(*iter)); return; } } - RAW_VLOG(2, "%s-disabling %"PRIuS" bytes at %p", + RAW_VLOG(11, "%s-disabling %"PRIuS" bytes at %p", (stack_disable ? "Stack" : "Range"), info.object_size, ptr); live_objects->push_back(AllocObject(ptr, info.object_size, MUST_BE_ON_HEAP)); @@ -755,7 +765,7 @@ static void RecordGlobalDataLocked(uintptr_t start_address, // Ignore non-writeable regions. if (strchr(permissions, 'w') == NULL) return; if (filename == NULL || *filename == '\0') filename = "UNNAMED"; - RAW_VLOG(2, "Looking into %s: 0x%" PRIxPTR "..0x%" PRIxPTR, + RAW_VLOG(11, "Looking into %s: 0x%" PRIxPTR "..0x%" PRIxPTR, filename, start_address, end_address); (*library_live_objects)[filename]. push_back(AllocObject(AsPtr(start_address), @@ -814,12 +824,12 @@ void HeapLeakChecker::DisableLibraryAllocsLocked(const char* library, // does not call user code. } if (depth) { - RAW_VLOG(1, "Disabling allocations from %s at depth %d:", library, depth); + RAW_VLOG(10, "Disabling allocations from %s at depth %d:", library, depth); DisableChecksFromToLocked(AsPtr(start_address), AsPtr(end_address), depth); if (IsLibraryNamed(library, "/libpthread") || IsLibraryNamed(library, "/libdl") || IsLibraryNamed(library, "/ld")) { - RAW_VLOG(1, "Global memory regions made by %s will be live data", + RAW_VLOG(10, "Global memory regions made by %s will be live data", library); if (global_region_caller_ranges == NULL) { global_region_caller_ranges = @@ -936,7 +946,7 @@ static enum { va_list /*ap*/) { RAW_DCHECK(heap_checker_lock.IsHeld(), ""); thread_listing_status = CALLBACK_STARTED; - RAW_VLOG(2, "Found %d threads (from pid %d)", num_threads, getpid()); + RAW_VLOG(11, "Found %d threads (from pid %d)", num_threads, getpid()); if (FLAGS_heap_check_ignore_global_live) { UseProcMapsLocked(RECORD_GLOBAL_DATA); @@ -951,7 +961,7 @@ static enum { // the leak checking thread itself is handled // specially via self_thread_stack, not here: if (thread_pids[i] == self_thread_pid) continue; - RAW_VLOG(2, "Handling thread with pid %d", thread_pids[i]); + RAW_VLOG(11, "Handling thread with pid %d", thread_pids[i]); #if defined(HAVE_LINUX_PTRACE_H) && defined(HAVE_SYS_SYSCALL_H) && defined(DUMPER) i386_regs thread_regs; #define sys_ptrace(r, p, a, d) syscall(SYS_ptrace, (r), (p), (a), (d)) @@ -967,7 +977,7 @@ static enum { // register pointers still being in the registers and not on the stack): for (void** p = reinterpret_cast<void**>(&thread_regs); p < reinterpret_cast<void**>(&thread_regs + 1); ++p) { - RAW_VLOG(3, "Thread register %p", *p); + RAW_VLOG(12, "Thread register %p", *p); thread_registers.push_back(*p); } } else { @@ -982,7 +992,7 @@ static enum { if (thread_registers.size()) { // Make thread registers be live heap data sources. // we rely here on the fact that vector is in one memory chunk: - RAW_VLOG(2, "Live registers at %p of %"PRIuS" bytes", + RAW_VLOG(11, "Live registers at %p of %"PRIuS" bytes", &thread_registers[0], thread_registers.size() * sizeof(void*)); live_objects->push_back(AllocObject(&thread_registers[0], thread_registers.size() * sizeof(void*), @@ -1005,7 +1015,7 @@ static const void* self_thread_stack_top; void HeapLeakChecker::IgnoreNonThreadLiveObjectsLocked() { RAW_DCHECK(heap_checker_lock.IsHeld(), ""); RAW_DCHECK(MemoryRegionMap::LockIsHeld(), ""); - RAW_VLOG(2, "Handling self thread with pid %d", self_thread_pid); + RAW_VLOG(11, "Handling self thread with pid %d", self_thread_pid); // Register our own stack: // Important that all stack ranges (including the one here) @@ -1019,7 +1029,7 @@ void HeapLeakChecker::IgnoreNonThreadLiveObjectsLocked() { for (IgnoredObjectsMap::const_iterator object = ignored_objects->begin(); object != ignored_objects->end(); ++object) { const void* ptr = AsPtr(object->first); - RAW_VLOG(2, "Ignored live object at %p of %"PRIuS" bytes", + RAW_VLOG(11, "Ignored live object at %p of %"PRIuS" bytes", ptr, object->second); live_objects-> push_back(AllocObject(ptr, object->second, MUST_BE_ON_HEAP)); @@ -1132,10 +1142,10 @@ void HeapLeakChecker::IgnoreNonThreadLiveObjectsLocked() { } } // Now get and use live_objects from the final version of l->second: - if (VLOG_IS_ON(2)) { + if (VLOG_IS_ON(11)) { for (LiveObjectsStack::const_iterator i = l->second.begin(); i != l->second.end(); ++i) { - RAW_VLOG(2, "Library live region at %p of %"PRIuPTR" bytes", + RAW_VLOG(11, "Library live region at %p of %"PRIuPTR" bytes", i->ptr, i->size); } } @@ -1240,7 +1250,7 @@ void HeapLeakChecker::IgnoreAllLiveObjectsLocked(const void* self_stack_top) { RAW_LOG(ERROR, "Thread stacks not found for %d threads. " "Will likely report false leak positives.", r); } else { - RAW_VLOG(2, "Thread stacks appear to be found for all threads"); + RAW_VLOG(11, "Thread stacks appear to be found for all threads"); } } else { RAW_LOG(WARNING, "Not looking for thread stacks; " @@ -1256,7 +1266,7 @@ void HeapLeakChecker::IgnoreAllLiveObjectsLocked(const void* self_stack_top) { IgnoreNonThreadLiveObjectsLocked(); } if (live_objects_total) { - RAW_VLOG(1, "Ignoring %"PRId64" reachable objects of %"PRId64" bytes", + RAW_VLOG(10, "Ignoring %"PRId64" reachable objects of %"PRId64" bytes", live_objects_total, live_bytes_total); } // Free these: we made them here and heap_profile never saw them @@ -1266,7 +1276,8 @@ void HeapLeakChecker::IgnoreAllLiveObjectsLocked(const void* self_stack_top) { } // Alignment at which we should consider pointer positions -// in IgnoreLiveObjectsLocked. Use 1 if any alignment is ok. +// in IgnoreLiveObjectsLocked. Will normally use the value of +// FLAGS_heap_check_pointer_source_alignment. static size_t pointer_source_alignment = kPointerSourceAlignment; // Global lock for HeapLeakChecker::DoNoLeaks // to protect pointer_source_alignment. @@ -1314,7 +1325,7 @@ static SpinLock alignment_checker_lock(SpinLock::LINKER_INITIALIZED); live_object_count += 1; live_byte_count += size; } - RAW_VLOG(4, "Looking for heap pointers in %p of %"PRIuS" bytes", + RAW_VLOG(13, "Looking for heap pointers in %p of %"PRIuS" bytes", object, size); const char* const whole_object = object; size_t const whole_size = size; @@ -1351,7 +1362,7 @@ static SpinLock alignment_checker_lock(SpinLock::LINKER_INITIALIZED); if (can_be_on_heap) { const void* ptr = reinterpret_cast<const void*>(addr); // Too expensive (inner loop): manually uncomment when debugging: - // RAW_VLOG(8, "Trying pointer to %p at %p", ptr, object); + // RAW_VLOG(17, "Trying pointer to %p at %p", ptr, object); size_t object_size; if (HaveOnHeapLocked(&ptr, &object_size) && heap_profile->MarkAsLive(ptr)) { @@ -1360,15 +1371,15 @@ static SpinLock alignment_checker_lock(SpinLock::LINKER_INITIALIZED); // a heap object which is in fact leaked. // I.e. in very rare and probably not repeatable/lasting cases // we might miss some real heap memory leaks. - RAW_VLOG(5, "Found pointer to %p of %"PRIuS" bytes at %p " + RAW_VLOG(14, "Found pointer to %p of %"PRIuS" bytes at %p " "inside %p of size %"PRIuS"", ptr, object_size, object, whole_object, whole_size); - if (VLOG_IS_ON(6)) { + if (VLOG_IS_ON(15)) { // log call stacks to help debug how come something is not a leak HeapProfileTable::AllocInfo alloc; - bool r = heap_profile->FindAllocDetails(ptr, &alloc); - r = r; // suppress compiler warning in non-debug mode - RAW_DCHECK(r, ""); // sanity + if (!heap_profile->FindAllocDetails(ptr, &alloc)) { + RAW_LOG(FATAL, "FindAllocDetails failed on ptr %p", ptr); + } RAW_LOG(INFO, "New live %p object's alloc stack:", ptr); for (int i = 0; i < alloc.stack_depth; ++i) { RAW_LOG(INFO, " @ %p", alloc.call_stack[i]); @@ -1386,7 +1397,7 @@ static SpinLock alignment_checker_lock(SpinLock::LINKER_INITIALIZED); live_objects_total += live_object_count; live_bytes_total += live_byte_count; if (live_object_count) { - RAW_VLOG(1, "Removed %"PRId64" live heap objects of %"PRId64" bytes: %s%s", + RAW_VLOG(10, "Removed %"PRId64" live heap objects of %"PRId64" bytes: %s%s", live_object_count, live_byte_count, name, name2); } } @@ -1408,7 +1419,7 @@ void HeapLeakChecker::IgnoreObject(const void* ptr) { if (!HaveOnHeapLocked(&ptr, &object_size)) { RAW_LOG(ERROR, "No live heap object at %p to ignore", ptr); } else { - RAW_VLOG(1, "Going to ignore live object at %p of %"PRIuS" bytes", + RAW_VLOG(10, "Going to ignore live object at %p of %"PRIuS" bytes", ptr, object_size); if (ignored_objects == NULL) { ignored_objects = new(Allocator::Allocate(sizeof(IgnoredObjectsMap))) @@ -1434,7 +1445,7 @@ void HeapLeakChecker::UnIgnoreObject(const void* ptr) { if (object != ignored_objects->end() && object_size == object->second) { ignored_objects->erase(object); found = true; - RAW_VLOG(1, "Now not going to ignore live object " + RAW_VLOG(10, "Now not going to ignore live object " "at %p of %"PRIuS" bytes", ptr, object_size); } } @@ -1483,7 +1494,7 @@ void HeapLeakChecker::Create(const char *name, bool make_start_snapshot) { const HeapProfileTable::Stats& t = heap_profile->total(); const size_t start_inuse_bytes = t.alloc_size - t.free_size; const size_t start_inuse_allocs = t.allocs - t.frees; - RAW_VLOG(1, "Start check \"%s\" profile: %"PRIuS" bytes " + RAW_VLOG(10, "Start check \"%s\" profile: %"PRIuS" bytes " "in %"PRIuS" objects", name_, start_inuse_bytes, start_inuse_allocs); } else { @@ -1612,7 +1623,7 @@ bool HeapLeakChecker::DoNoLeaks(ShouldSymbolize should_symbolize) { { // Heap activity in other threads is paused during this function // (i.e. until we got all profile difference info). - SpinLockHolder l(&heap_checker_lock); + SpinLockHolder hl(&heap_checker_lock); if (heap_checker_on == false) { if (name_ != NULL) { // leak checking enabled when created the checker RAW_LOG(WARNING, "Heap leak checker got turned off after checker " @@ -1649,6 +1660,8 @@ bool HeapLeakChecker::DoNoLeaks(ShouldSymbolize should_symbolize) { // Make the heap profile, other threads are locked out. HeapProfileTable::Snapshot* base = reinterpret_cast<HeapProfileTable::Snapshot*>(start_snapshot_); + RAW_DCHECK(FLAGS_heap_check_pointer_source_alignment > 0, ""); + pointer_source_alignment = FLAGS_heap_check_pointer_source_alignment; IgnoreAllLiveObjectsLocked(&a_local_var); leaks = heap_profile->NonLiveSnapshot(base); @@ -1668,23 +1681,28 @@ bool HeapLeakChecker::DoNoLeaks(ShouldSymbolize should_symbolize) { initial_allocs, Allocator::alloc_count()); } } else if (FLAGS_heap_check_test_pointer_alignment) { - // Try with reduced pointer aligment - pointer_source_alignment = 1; - IgnoreAllLiveObjectsLocked(&a_local_var); - HeapProfileTable::Snapshot* leaks_wo_align = - heap_profile->NonLiveSnapshot(base); - pointer_source_alignment = kPointerSourceAlignment; - if (leaks_wo_align->Empty()) { - RAW_LOG(WARNING, "Found no leaks without pointer alignment: " - "something might be placing pointers at " - "unaligned addresses! This needs to be fixed."); + if (pointer_source_alignment == 1) { + RAW_LOG(WARNING, "--heap_check_test_pointer_alignment has no effect: " + "--heap_check_pointer_source_alignment was already set to 1"); } else { - RAW_LOG(INFO, "Found leaks without pointer alignment as well: " - "unaligned pointers must not be the cause of leaks."); - RAW_LOG(INFO, "--heap_check_test_pointer_alignment did not help " - "to diagnose the leaks."); + // Try with reduced pointer aligment + pointer_source_alignment = 1; + IgnoreAllLiveObjectsLocked(&a_local_var); + HeapProfileTable::Snapshot* leaks_wo_align = + heap_profile->NonLiveSnapshot(base); + pointer_source_alignment = FLAGS_heap_check_pointer_source_alignment; + if (leaks_wo_align->Empty()) { + RAW_LOG(WARNING, "Found no leaks without pointer alignment: " + "something might be placing pointers at " + "unaligned addresses! This needs to be fixed."); + } else { + RAW_LOG(INFO, "Found leaks without pointer alignment as well: " + "unaligned pointers must not be the cause of leaks."); + RAW_LOG(INFO, "--heap_check_test_pointer_alignment did not help " + "to diagnose the leaks."); + } + heap_profile->ReleaseSnapshot(leaks_wo_align); } - heap_profile->ReleaseSnapshot(leaks_wo_align); } if (leaks != NULL) { @@ -1741,7 +1759,7 @@ bool HeapLeakChecker::DoNoLeaks(ShouldSymbolize should_symbolize) { SuggestPprofCommand(pprof_file); { - SpinLockHolder l(&heap_checker_lock); + SpinLockHolder hl(&heap_checker_lock); heap_profile->ReleaseSnapshot(leaks); Allocator::Free(pprof_file); } @@ -1874,6 +1892,7 @@ static bool internal_init_start_has_run = false; } // Set all flags + RAW_DCHECK(FLAGS_heap_check_pointer_source_alignment > 0, ""); if (FLAGS_heap_check == "minimal") { // The least we can check. FLAGS_heap_check_before_constructors = false; // from after main @@ -2043,7 +2062,7 @@ bool HeapLeakChecker::NoGlobalLeaks() { // we never delete or change main_heap_checker once it's set: HeapLeakChecker* main_hc = GlobalChecker(); if (main_hc) { - RAW_VLOG(1, "Checking for whole-program memory leaks"); + RAW_VLOG(10, "Checking for whole-program memory leaks"); // The program is over, so it's safe to symbolize addresses (which // requires a fork) because no serious work is expected to be done // after this. Symbolizing is really useful -- knowing what @@ -2165,7 +2184,7 @@ void HeapLeakChecker::BeforeConstructorsLocked() { RAW_CHECK(heap_profile == NULL, ""); heap_profile = new(Allocator::Allocate(sizeof(HeapProfileTable))) HeapProfileTable(&Allocator::Allocate, &Allocator::Free); - RAW_VLOG(1, "Starting tracking the heap"); + RAW_VLOG(10, "Starting tracking the heap"); heap_checker_on = true; } @@ -2329,7 +2348,7 @@ void HeapLeakChecker::DisableChecksFromToLocked(const void* start_address, value.start_address = AsInt(start_address); value.max_depth = max_depth; if (disabled_ranges->insert(make_pair(AsInt(end_address), value)).second) { - RAW_VLOG(1, "Disabling leak checking in stack traces " + RAW_VLOG(10, "Disabling leak checking in stack traces " "under frame addresses between %p..%p", start_address, end_address); } else { // check that this is just a verbatim repetition @@ -2352,7 +2371,7 @@ inline bool HeapLeakChecker::HaveOnHeapLocked(const void** ptr, const uintptr_t addr = AsInt(*ptr); if (heap_profile->FindInsideAlloc( *ptr, max_heap_object_size, ptr, object_size)) { - RAW_VLOG(7, "Got pointer into %p at +%"PRIuPTR" offset", + RAW_VLOG(16, "Got pointer into %p at +%"PRIuPTR" offset", *ptr, addr - AsInt(*ptr)); return true; } diff --git a/third_party/tcmalloc/chromium/src/heap-profile-table.cc b/third_party/tcmalloc/chromium/src/heap-profile-table.cc index 66e4f20..ecaf75f 100644 --- a/third_party/tcmalloc/chromium/src/heap-profile-table.cc +++ b/third_party/tcmalloc/chromium/src/heap-profile-table.cc @@ -99,7 +99,7 @@ const char HeapProfileTable::kFileExt[] = ".heap"; //---------------------------------------------------------------------- static const int kHashTableSize = 179999; // Size for table_. -/*static*/ const int HeapProfileTable::kMaxStackDepth = 32; +/*static*/ const int HeapProfileTable::kMaxStackDepth; //---------------------------------------------------------------------- diff --git a/third_party/tcmalloc/chromium/src/heap-profile-table.h b/third_party/tcmalloc/chromium/src/heap-profile-table.h index 5403257..c9bee15 100644 --- a/third_party/tcmalloc/chromium/src/heap-profile-table.h +++ b/third_party/tcmalloc/chromium/src/heap-profile-table.h @@ -52,8 +52,8 @@ class HeapProfileTable { // Extension to be used for heap pforile files. static const char kFileExt[]; - // Longest stack trace we record. Defined in the .cc file. - static const int kMaxStackDepth; + // Longest stack trace we record. + static const int kMaxStackDepth = 32; // data types ---------------------------- diff --git a/third_party/tcmalloc/chromium/src/heap-profiler.cc b/third_party/tcmalloc/chromium/src/heap-profiler.cc index a1c643a9..3055f4ce 100644 --- a/third_party/tcmalloc/chromium/src/heap-profiler.cc +++ b/third_party/tcmalloc/chromium/src/heap-profiler.cc @@ -524,9 +524,9 @@ extern "C" void HeapProfilerStart(const char* prefix) { filename_prefix[prefix_length] = '\0'; } -extern "C" bool IsHeapProfilerRunning() { +extern "C" int IsHeapProfilerRunning() { SpinLockHolder l(&heap_lock); - return is_on; + return is_on ? 1 : 0; // return an int, because C code doesn't have bool } extern "C" void HeapProfilerStop() { diff --git a/third_party/tcmalloc/chromium/src/internal_logging.h b/third_party/tcmalloc/chromium/src/internal_logging.h index 0cb9ba2..731b2d9 100644 --- a/third_party/tcmalloc/chromium/src/internal_logging.h +++ b/third_party/tcmalloc/chromium/src/internal_logging.h @@ -119,7 +119,9 @@ do { \ #ifndef NDEBUG #define ASSERT(cond) CHECK_CONDITION(cond) #else -#define ASSERT(cond) ((void) 0) +#define ASSERT(cond) \ + do { \ + } while (0 && (cond)) #endif // Print into buffer diff --git a/third_party/tcmalloc/chromium/src/malloc_extension.cc b/third_party/tcmalloc/chromium/src/malloc_extension.cc index 4ce262f..c2f8b54 100644 --- a/third_party/tcmalloc/chromium/src/malloc_extension.cc +++ b/third_party/tcmalloc/chromium/src/malloc_extension.cc @@ -187,7 +187,10 @@ MallocExtension* MallocExtension::instance() { void MallocExtension::Register(MallocExtension* implementation) { perftools_pthread_once(&module_init, InitModule); // When running under valgrind, our custom malloc is replaced with - // valgrind's one and malloc extensions will not work. + // valgrind's one and malloc extensions will not work. (Note: + // callers should be responsible for checking that they are the + // malloc that is really being run, before calling Register. This + // is just here as an extra sanity check.) if (!RunningOnValgrind()) { current_instance = implementation; } diff --git a/third_party/tcmalloc/chromium/src/malloc_hook.cc b/third_party/tcmalloc/chromium/src/malloc_hook.cc index 2a7f542..4315b86 100644 --- a/third_party/tcmalloc/chromium/src/malloc_hook.cc +++ b/third_party/tcmalloc/chromium/src/malloc_hook.cc @@ -326,8 +326,8 @@ extern "C" int MallocHook_GetCallerStackTrace(void** result, int max_depth, return 0; for (int i = 0; i < depth; ++i) { // stack[0] is our immediate caller if (InHookCaller(stack[i])) { - RAW_VLOG(4, "Found hooked allocator at %d: %p <- %p", - i, stack[i], stack[i+1]); + RAW_VLOG(10, "Found hooked allocator at %d: %p <- %p", + i, stack[i], stack[i+1]); i += 1; // skip hook caller frame depth -= i; // correct depth if (depth > max_depth) depth = max_depth; diff --git a/third_party/tcmalloc/chromium/src/memory_region_map.cc b/third_party/tcmalloc/chromium/src/memory_region_map.cc index 05fdc06..f6bed45 100644 --- a/third_party/tcmalloc/chromium/src/memory_region_map.cc +++ b/third_party/tcmalloc/chromium/src/memory_region_map.cc @@ -181,7 +181,7 @@ static MemoryRegionMap::RegionSetRep regions_rep; static bool recursive_insert = false; void MemoryRegionMap::Init(int max_stack_depth) { - RAW_VLOG(2, "MemoryRegionMap Init"); + RAW_VLOG(10, "MemoryRegionMap Init"); RAW_CHECK(max_stack_depth >= 0, ""); // Make sure we don't overflow the memory in region stacks: RAW_CHECK(max_stack_depth <= kMaxStackDepth, @@ -192,7 +192,7 @@ void MemoryRegionMap::Init(int max_stack_depth) { if (client_count_ > 1) { // not first client: already did initialization-proper Unlock(); - RAW_VLOG(2, "MemoryRegionMap Init increment done"); + RAW_VLOG(10, "MemoryRegionMap Init increment done"); return; } // Set our hooks and make sure no other hooks existed: @@ -217,17 +217,17 @@ void MemoryRegionMap::Init(int max_stack_depth) { // recursive_insert = false; as InsertRegionLocked will also construct // regions_ on demand for us. Unlock(); - RAW_VLOG(2, "MemoryRegionMap Init done"); + RAW_VLOG(10, "MemoryRegionMap Init done"); } bool MemoryRegionMap::Shutdown() { - RAW_VLOG(2, "MemoryRegionMap Shutdown"); + RAW_VLOG(10, "MemoryRegionMap Shutdown"); Lock(); RAW_CHECK(client_count_ > 0, ""); client_count_ -= 1; if (client_count_ != 0) { // not last client; need not really shutdown Unlock(); - RAW_VLOG(2, "MemoryRegionMap Shutdown decrement done"); + RAW_VLOG(10, "MemoryRegionMap Shutdown decrement done"); return true; } CheckMallocHooks(); // we assume no other hooks @@ -244,7 +244,7 @@ bool MemoryRegionMap::Shutdown() { RAW_LOG(WARNING, "Can't delete LowLevelAlloc arena: it's being used"); } Unlock(); - RAW_VLOG(2, "MemoryRegionMap Shutdown done"); + RAW_VLOG(10, "MemoryRegionMap Shutdown done"); return deleted_arena; } @@ -336,7 +336,7 @@ bool MemoryRegionMap::FindAndMarkStackRegion(uintptr_t stack_top, Lock(); const Region* region = DoFindRegionLocked(stack_top); if (region != NULL) { - RAW_VLOG(2, "Stack at %p is inside region %p..%p", + RAW_VLOG(10, "Stack at %p is inside region %p..%p", reinterpret_cast<void*>(stack_top), reinterpret_cast<void*>(region->start_addr), reinterpret_cast<void*>(region->end_addr)); @@ -361,7 +361,7 @@ MemoryRegionMap::RegionIterator MemoryRegionMap::EndRegionLocked() { } inline void MemoryRegionMap::DoInsertRegionLocked(const Region& region) { - RAW_VLOG(4, "Inserting region %p..%p from %p", + RAW_VLOG(12, "Inserting region %p..%p from %p", reinterpret_cast<void*>(region.start_addr), reinterpret_cast<void*>(region.end_addr), reinterpret_cast<void*>(region.caller())); @@ -385,10 +385,10 @@ inline void MemoryRegionMap::DoInsertRegionLocked(const Region& region) { // This inserts and allocates permanent storage for region // and its call stack data: it's safe to do it now: regions_->insert(region); - RAW_VLOG(4, "Inserted region %p..%p :", + RAW_VLOG(12, "Inserted region %p..%p :", reinterpret_cast<void*>(region.start_addr), reinterpret_cast<void*>(region.end_addr)); - if (VLOG_IS_ON(4)) LogAllLocked(); + if (VLOG_IS_ON(12)) LogAllLocked(); } // These variables are local to MemoryRegionMap::InsertRegionLocked() @@ -425,7 +425,7 @@ inline void MemoryRegionMap::InsertRegionLocked(const Region& region) { // and taken into account when the recursion unwinds. // Do the insert: if (recursive_insert) { // recursion: save in saved_regions - RAW_VLOG(4, "Saving recursive insert of region %p..%p from %p", + RAW_VLOG(12, "Saving recursive insert of region %p..%p from %p", reinterpret_cast<void*>(region.start_addr), reinterpret_cast<void*>(region.end_addr), reinterpret_cast<void*>(region.caller())); @@ -436,7 +436,7 @@ inline void MemoryRegionMap::InsertRegionLocked(const Region& region) { saved_regions[saved_regions_count++] = region; } else { // not a recusrive call if (regions_ == NULL) { // init regions_ - RAW_VLOG(4, "Initializing region set"); + RAW_VLOG(12, "Initializing region set"); regions_ = regions_rep.region_set(); recursive_insert = true; new(regions_) RegionSet(); @@ -470,7 +470,7 @@ void MemoryRegionMap::RecordRegionAddition(const void* start, size_t size) { max_stack_depth_, kStripFrames + 1) : 0; region.set_call_stack_depth(depth); // record stack info fully - RAW_VLOG(2, "New global region %p..%p from %p", + RAW_VLOG(10, "New global region %p..%p from %p", reinterpret_cast<void*>(region.start_addr), reinterpret_cast<void*>(region.end_addr), reinterpret_cast<void*>(region.caller())); @@ -499,7 +499,7 @@ void MemoryRegionMap::RecordRegionRemoval(const void* start, size_t size) { // An exact match, so it's safe to remove. --saved_regions_count; --put_pos; - RAW_VLOG(2, ("Insta-Removing saved region %p..%p; " + RAW_VLOG(10, ("Insta-Removing saved region %p..%p; " "now have %d saved regions"), reinterpret_cast<void*>(start_addr), reinterpret_cast<void*>(end_addr), @@ -523,7 +523,7 @@ void MemoryRegionMap::RecordRegionRemoval(const void* start, size_t size) { uintptr_t start_addr = reinterpret_cast<uintptr_t>(start); uintptr_t end_addr = start_addr + size; // subtract start_addr, end_addr from all the regions - RAW_VLOG(2, "Removing global region %p..%p; have %"PRIuS" regions", + RAW_VLOG(10, "Removing global region %p..%p; have %"PRIuS" regions", reinterpret_cast<void*>(start_addr), reinterpret_cast<void*>(end_addr), regions_->size()); @@ -533,12 +533,12 @@ void MemoryRegionMap::RecordRegionRemoval(const void* start, size_t size) { for (RegionSet::iterator region = regions_->lower_bound(sample); region != regions_->end() && region->start_addr < end_addr; /*noop*/) { - RAW_VLOG(5, "Looking at region %p..%p", + RAW_VLOG(13, "Looking at region %p..%p", reinterpret_cast<void*>(region->start_addr), reinterpret_cast<void*>(region->end_addr)); if (start_addr <= region->start_addr && region->end_addr <= end_addr) { // full deletion - RAW_VLOG(4, "Deleting region %p..%p", + RAW_VLOG(12, "Deleting region %p..%p", reinterpret_cast<void*>(region->start_addr), reinterpret_cast<void*>(region->end_addr)); RegionSet::iterator d = region; @@ -547,7 +547,7 @@ void MemoryRegionMap::RecordRegionRemoval(const void* start, size_t size) { continue; } else if (region->start_addr < start_addr && end_addr < region->end_addr) { // cutting-out split - RAW_VLOG(4, "Splitting region %p..%p in two", + RAW_VLOG(12, "Splitting region %p..%p in two", reinterpret_cast<void*>(region->start_addr), reinterpret_cast<void*>(region->end_addr)); // Make another region for the start portion: @@ -560,13 +560,13 @@ void MemoryRegionMap::RecordRegionRemoval(const void* start, size_t size) { const_cast<Region&>(*region).set_start_addr(end_addr); } else if (end_addr > region->start_addr && start_addr <= region->start_addr) { // cut from start - RAW_VLOG(4, "Start-chopping region %p..%p", + RAW_VLOG(12, "Start-chopping region %p..%p", reinterpret_cast<void*>(region->start_addr), reinterpret_cast<void*>(region->end_addr)); const_cast<Region&>(*region).set_start_addr(end_addr); } else if (start_addr > region->start_addr && start_addr < region->end_addr) { // cut from end - RAW_VLOG(4, "End-chopping region %p..%p", + RAW_VLOG(12, "End-chopping region %p..%p", reinterpret_cast<void*>(region->start_addr), reinterpret_cast<void*>(region->end_addr)); // Can't just modify region->end_addr (it's the sorting key): @@ -582,11 +582,11 @@ void MemoryRegionMap::RecordRegionRemoval(const void* start, size_t size) { } ++region; } - RAW_VLOG(4, "Removed region %p..%p; have %"PRIuS" regions", + RAW_VLOG(12, "Removed region %p..%p; have %"PRIuS" regions", reinterpret_cast<void*>(start_addr), reinterpret_cast<void*>(end_addr), regions_->size()); - if (VLOG_IS_ON(4)) LogAllLocked(); + if (VLOG_IS_ON(12)) LogAllLocked(); Unlock(); } @@ -596,7 +596,7 @@ void MemoryRegionMap::MmapHook(const void* result, int fd, off_t offset) { // TODO(maxim): replace all 0x%"PRIxS" by %p when RAW_VLOG uses a safe // snprintf reimplementation that does not malloc to pretty-print NULL - RAW_VLOG(2, "MMap = 0x%"PRIxPTR" of %"PRIuS" at %llu " + RAW_VLOG(10, "MMap = 0x%"PRIxPTR" of %"PRIuS" at %llu " "prot %d flags %d fd %d offs %lld", reinterpret_cast<uintptr_t>(result), size, reinterpret_cast<uint64>(start), prot, flags, fd, @@ -607,7 +607,7 @@ void MemoryRegionMap::MmapHook(const void* result, } void MemoryRegionMap::MunmapHook(const void* ptr, size_t size) { - RAW_VLOG(2, "MUnmap of %p %"PRIuS"", ptr, size); + RAW_VLOG(10, "MUnmap of %p %"PRIuS"", ptr, size); if (size != 0) { RecordRegionRemoval(ptr, size); } @@ -617,7 +617,7 @@ void MemoryRegionMap::MremapHook(const void* result, const void* old_addr, size_t old_size, size_t new_size, int flags, const void* new_addr) { - RAW_VLOG(2, "MRemap = 0x%"PRIxPTR" of 0x%"PRIxPTR" %"PRIuS" " + RAW_VLOG(10, "MRemap = 0x%"PRIxPTR" of 0x%"PRIxPTR" %"PRIuS" " "to %"PRIuS" flags %d new_addr=0x%"PRIxPTR, (uintptr_t)result, (uintptr_t)old_addr, old_size, new_size, flags, @@ -631,7 +631,7 @@ void MemoryRegionMap::MremapHook(const void* result, extern "C" void* __sbrk(ptrdiff_t increment); // defined in libc void MemoryRegionMap::SbrkHook(const void* result, ptrdiff_t increment) { - RAW_VLOG(2, "Sbrk = 0x%"PRIxPTR" of %"PRIdS"", (uintptr_t)result, increment); + RAW_VLOG(10, "Sbrk = 0x%"PRIxPTR" of %"PRIdS"", (uintptr_t)result, increment); if (result != reinterpret_cast<void*>(-1)) { if (increment > 0) { void* new_end = sbrk(0); diff --git a/third_party/tcmalloc/chromium/src/page_heap.cc b/third_party/tcmalloc/chromium/src/page_heap.cc index 31130e9..a256b64 100644 --- a/third_party/tcmalloc/chromium/src/page_heap.cc +++ b/third_party/tcmalloc/chromium/src/page_heap.cc @@ -61,50 +61,65 @@ PageHeap::PageHeap() } } -Span* PageHeap::New(Length n) { +// Returns the minimum number of pages necessary to ensure that an +// allocation of size n can be aligned to the given alignment. +static Length AlignedAllocationSize(Length n, size_t alignment) { + ASSERT(alignment >= kPageSize); + return n + tcmalloc::pages(alignment - kPageSize); +} + +Span* PageHeap::New(Length n, size_t sc, size_t align) { ASSERT(Check()); ASSERT(n > 0); + if (align < kPageSize) { + align = kPageSize; + } + + Length aligned_size = AlignedAllocationSize(n, align); + // Find first size >= n that has a non-empty list - for (Length s = n; s < kMaxPages; s++) { + for (Length s = aligned_size; s < kMaxPages; s++) { Span* ll = &free_[s].normal; // If we're lucky, ll is non-empty, meaning it has a suitable span. if (!DLL_IsEmpty(ll)) { ASSERT(ll->next->location == Span::ON_NORMAL_FREELIST); - return Carve(ll->next, n); + return Carve(ll->next, n, sc, align); } // Alternatively, maybe there's a usable returned span. ll = &free_[s].returned; if (!DLL_IsEmpty(ll)) { ASSERT(ll->next->location == Span::ON_RETURNED_FREELIST); - return Carve(ll->next, n); + return Carve(ll->next, n, sc, align); } // Still no luck, so keep looking in larger classes. } - Span* result = AllocLarge(n); + Span* result = AllocLarge(n, sc, align); if (result != NULL) return result; // Grow the heap and try again - if (!GrowHeap(n)) { + if (!GrowHeap(aligned_size)) { ASSERT(stats_.unmapped_bytes+ stats_.committed_bytes==stats_.system_bytes); ASSERT(Check()); return NULL; } - return AllocLarge(n); + return AllocLarge(n, sc, align); } -Span* PageHeap::AllocLarge(Length n) { - // find the best span (closest to n in size). +Span* PageHeap::AllocLarge(Length n, size_t sc, size_t align) { + // Find the best span (closest to n in size). // The following loops implements address-ordered best-fit. Span *best = NULL; + Length aligned_size = AlignedAllocationSize(n, align); + // Search through normal list for (Span* span = large_.normal.next; span != &large_.normal; span = span->next) { - if (span->length >= n) { + if (span->length >= aligned_size) { if ((best == NULL) || (span->length < best->length) || ((span->length == best->length) && (span->start < best->start))) { @@ -118,7 +133,7 @@ Span* PageHeap::AllocLarge(Length n) { for (Span* span = large_.returned.next; span != &large_.returned; span = span->next) { - if (span->length >= n) { + if (span->length >= aligned_size) { if ((best == NULL) || (span->length < best->length) || ((span->length == best->length) && (span->start < best->start))) { @@ -128,19 +143,18 @@ Span* PageHeap::AllocLarge(Length n) { } } - return best == NULL ? NULL : Carve(best, n); + return best == NULL ? NULL : Carve(best, n, sc, align); } Span* PageHeap::Split(Span* span, Length n) { ASSERT(0 < n); ASSERT(n < span->length); - ASSERT(span->location == Span::IN_USE); - ASSERT(span->sizeclass == 0); + ASSERT((span->location != Span::IN_USE) || span->sizeclass == 0); Event(span, 'T', n); const int extra = span->length - n; Span* leftover = NewSpan(span->start + n, extra); - ASSERT(leftover->location == Span::IN_USE); + leftover->location = span->location; Event(leftover, 'U', extra); RecordSpan(leftover); pagemap_.set(span->start + n - 1, span); // Update map from pageid to span @@ -161,43 +175,71 @@ void PageHeap::DecommitSpan(Span* span) { stats_.committed_bytes -= span->length << kPageShift; } -Span* PageHeap::Carve(Span* span, Length n) { +Span* PageHeap::Carve(Span* span, Length n, size_t sc, size_t align) { ASSERT(n > 0); ASSERT(span->location != Span::IN_USE); + ASSERT(align >= kPageSize); const int old_location = span->location; + + Length align_pages = align >> kPageShift; RemoveFromFreeList(span); - span->location = Span::IN_USE; - Event(span, 'A', n); + + if (span->start & (align_pages - 1)) { + Length skip_for_alignment = align_pages - (span->start & (align_pages - 1)); + Span* aligned = Split(span, skip_for_alignment); + + // The next span of |span| was just splitted -- no need to + // coalesce them. The previous span of |span| was not previously coalesced + // with |span|, i.e. is NULL or has location other than |old_location|. + const PageID p = span->start; + const Length n = span->length; + Span* prev = GetDescriptor(p-1); + ASSERT(prev == NULL || + prev->location == Span::IN_USE || + prev->location != old_location); + PrependToFreeList(span); // Skip coalescing - no candidates possible + span = aligned; + } const int extra = span->length - n; ASSERT(extra >= 0); if (extra > 0) { - Span* leftover = NewSpan(span->start + n, extra); - leftover->location = old_location; - Event(leftover, 'S', extra); - RecordSpan(leftover); - + Span* leftover = Split(span, n); // The previous span of |leftover| was just splitted -- no need to // coalesce them. The next span of |leftover| was not previously coalesced - // with |span|, i.e. is NULL or has got location other than |old_location|. + // with |span|, i.e. is NULL or has location other than |old_location|. const PageID p = leftover->start; const Length len = leftover->length; Span* next = GetDescriptor(p+len); ASSERT (next == NULL || next->location == Span::IN_USE || next->location != leftover->location); - - PrependToFreeList(leftover); // Skip coalescing - no candidates possible - span->length = n; - pagemap_.set(span->start + n - 1, span); + PrependToFreeList(leftover); } + + ASSERT(Check()); if (old_location == Span::ON_RETURNED_FREELIST) { // We need to recommit this address space. CommitSpan(span); } - ASSERT(span->location == Span::IN_USE); - ASSERT(span->length == n); + + span->location = Span::IN_USE; + span->sizeclass = sc; + Event(span, 'A', n); + + // Cache sizeclass info eagerly. Locking is not necessary. + // (Instead of being eager, we could just replace any stale info + // about this span, but that seems to be no better in practice.) + CacheSizeClass(span->start, sc); + + if (sc != kLargeSizeClass) { + for (Length i = 1; i < n; i++) { + pagemap_.set(span->start + i, span); + CacheSizeClass(span->start + i, sc); + } + } + ASSERT(stats_.unmapped_bytes+ stats_.committed_bytes==stats_.system_bytes); return span; } @@ -379,18 +421,6 @@ Length PageHeap::ReleaseAtLeastNPages(Length num_pages) { return released_pages; } -void PageHeap::RegisterSizeClass(Span* span, size_t sc) { - // Associate span object with all interior pages as well - ASSERT(span->location == Span::IN_USE); - ASSERT(GetDescriptor(span->start) == span); - ASSERT(GetDescriptor(span->start+span->length-1) == span); - Event(span, 'C', sc); - span->sizeclass = sc; - for (Length i = 1; i < span->length-1; i++) { - pagemap_.set(span->start+i, span); - } -} - static double MB(uint64_t bytes) { return bytes / 1048576.0; } diff --git a/third_party/tcmalloc/chromium/src/page_heap.h b/third_party/tcmalloc/chromium/src/page_heap.h index 52acedb..63f21b2 100644 --- a/third_party/tcmalloc/chromium/src/page_heap.h +++ b/third_party/tcmalloc/chromium/src/page_heap.h @@ -101,21 +101,49 @@ class PERFTOOLS_DLL_DECL PageHeap { public: PageHeap(); - // Allocate a run of "n" pages. Returns zero if out of memory. - // Caller should not pass "n == 0" -- instead, n should have - // been rounded up already. - Span* New(Length n); + // Allocate a run of "n" pages. Returns NULL if out of memory. + // Caller should not pass "n == 0" -- instead, n should have been + // rounded up already. The span will be used for allocating objects + // with the specifled sizeclass sc (sc must be zero for large + // objects). The first page of the span will be aligned to the value + // specified by align, which must be a power of two. + Span* New(Length n, size_t sc, size_t align); // Delete the span "[p, p+n-1]". // REQUIRES: span was returned by earlier call to New() and // has not yet been deleted. void Delete(Span* span); - // Mark an allocated span as being used for small objects of the - // specified size-class. - // REQUIRES: span was returned by an earlier call to New() - // and has not yet been deleted. - void RegisterSizeClass(Span* span, size_t sc); + // Gets either the size class of addr, if it is a small object, or it's span. + // Return: + // if addr is invalid: + // leave *out_sc and *out_span unchanged and return false; + // if addr is valid and has a small size class: + // *out_sc = the size class + // *out_span = <undefined> + // return true + // if addr is valid and has a large size class: + // *out_sc = kLargeSizeClass + // *out_span = the span pointer + // return true + bool GetSizeClassOrSpan(void* addr, size_t* out_sc, Span** out_span) { + const PageID p = reinterpret_cast<uintptr_t>(addr) >> kPageShift; + size_t cl = GetSizeClassIfCached(p); + Span* span = NULL; + + if (cl != kLargeSizeClass) { + ASSERT(cl == GetDescriptor(p)->sizeclass); + } else { + span = GetDescriptor(p); + if (!span) { + return false; + } + cl = span->sizeclass; + } + *out_span = span; + *out_sc = cl; + return true; + } // Split an allocated span into two spans: one of length "n" pages // followed by another span of length "span->length - n" pages. @@ -123,14 +151,29 @@ class PERFTOOLS_DLL_DECL PageHeap { // Returns a pointer to the second span. // // REQUIRES: "0 < n < span->length" - // REQUIRES: span->location == IN_USE - // REQUIRES: span->sizeclass == 0 + // REQUIRES: a) the span is free or b) sizeclass == 0 Span* Split(Span* span, Length n); // Return the descriptor for the specified page. Returns NULL if // this PageID was not allocated previously. inline Span* GetDescriptor(PageID p) const { - return reinterpret_cast<Span*>(pagemap_.get(p)); + Span* ret = reinterpret_cast<Span*>(pagemap_.get(p)); +#ifndef NDEBUG + if (ret != NULL && ret->location == Span::IN_USE) { + size_t cl = GetSizeClassIfCached(p); + // Three cases: + // - The object is not cached + // - The object is cached correctly + // - It is a large object and we're not looking at the first + // page. This happens in coalescing. + ASSERT(cl == kLargeSizeClass || cl == ret->sizeclass || + (ret->start != p && ret->sizeclass == kLargeSizeClass)); + // If the object is sampled, it must have be kLargeSizeClass + ASSERT(ret->sizeclass == kLargeSizeClass || !ret->sample); + } +#endif + + return ret; } // Dump state to stderr @@ -234,7 +277,7 @@ class PERFTOOLS_DLL_DECL PageHeap { // length exactly "n" and mark it as non-free so it can be returned // to the client. After all that, decrease free_pages_ by n and // return span. - Span* Carve(Span* span, Length n); + Span* Carve(Span* span, Length n, size_t sc, size_t align); void RecordSpan(Span* span) { pagemap_.set(span->start, span); @@ -245,7 +288,7 @@ class PERFTOOLS_DLL_DECL PageHeap { // Allocate a large span of length == n. If successful, returns a // span of exactly the specified length. Else, returns NULL. - Span* AllocLarge(Length n); + Span* AllocLarge(Length n, size_t sc, size_t align); // Coalesce span with neighboring spans if possible, prepend to // appropriate free list, and adjust stats. diff --git a/third_party/tcmalloc/chromium/src/page_heap_allocator.h b/third_party/tcmalloc/chromium/src/page_heap_allocator.h index 20e1ab1..3f75939 100644 --- a/third_party/tcmalloc/chromium/src/page_heap_allocator.h +++ b/third_party/tcmalloc/chromium/src/page_heap_allocator.h @@ -44,7 +44,7 @@ class PageHeapAllocator { // allocated and their constructors might not have run by the time some // other static variable tries to allocate memory. void Init() { - ASSERT(kAlignedSize <= kAllocIncrement); + ASSERT(sizeof(T) <= kAllocIncrement); inuse_ = 0; free_area_ = NULL; free_avail_ = 0; @@ -60,8 +60,9 @@ class PageHeapAllocator { result = free_list_; free_list_ = *(reinterpret_cast<void**>(result)); } else { - if (free_avail_ < kAlignedSize) { - // Need more room + if (free_avail_ < sizeof(T)) { + // Need more room. We assume that MetaDataAlloc returns + // suitably aligned memory. free_area_ = reinterpret_cast<char*>(MetaDataAlloc(kAllocIncrement)); if (free_area_ == NULL) { CRASH("FATAL ERROR: Out of memory trying to allocate internal " @@ -71,8 +72,8 @@ class PageHeapAllocator { free_avail_ = kAllocIncrement; } result = free_area_; - free_area_ += kAlignedSize; - free_avail_ -= kAlignedSize; + free_area_ += sizeof(T); + free_avail_ -= sizeof(T); } inuse_++; return reinterpret_cast<T*>(result); @@ -90,10 +91,6 @@ class PageHeapAllocator { // How much to allocate from system at a time static const int kAllocIncrement = 128 << 10; - // Aligned size of T - static const size_t kAlignedSize - = (((sizeof(T) + kAlignment - 1) / kAlignment) * kAlignment); - // Free area from which to carve new objects char* free_area_; size_t free_avail_; diff --git a/third_party/tcmalloc/chromium/src/pprof b/third_party/tcmalloc/chromium/src/pprof index fec0c9e..8aff380 100755 --- a/third_party/tcmalloc/chromium/src/pprof +++ b/third_party/tcmalloc/chromium/src/pprof @@ -89,11 +89,10 @@ my %obj_tool_map = ( ); my $DOT = "dot"; # leave non-absolute, since it may be in /usr/local my $GV = "gv"; +my $KCACHEGRIND = "kcachegrind"; my $PS2PDF = "ps2pdf"; # These are used for dynamic profiles -my $WGET = "wget"; -my $WGET_FLAGS = "--no-http-keep-alive"; # only supported by some wgets -my $CURL = "curl"; +my $URL_FETCHER = "curl -s"; # These are the web pages that servers need to support for dynamic profiles my $HEAP_PAGE = "/pprof/heap"; @@ -107,6 +106,12 @@ my $FILTEREDPROFILE_PAGE = "/pprof/filteredprofile(?:\\?.*)?"; my $SYMBOL_PAGE = "/pprof/symbol"; # must support symbol lookup via POST my $PROGRAM_NAME_PAGE = "/pprof/cmdline"; +# These are the web pages that can be named on the command line. +# All the alternatives must begin with /. +my $PROFILES = "($HEAP_PAGE|$PROFILE_PAGE|$PMUPROFILE_PAGE|" . + "$GROWTH_PAGE|$CONTENTION_PAGE|$WALL_PAGE|" . + "$FILTEREDPROFILE_PAGE)"; + # default binary name my $UNKNOWN_BINARY = "(unknown)"; @@ -175,12 +180,14 @@ Output type: --text Generate text report --callgrind Generate callgrind format to stdout --gv Generate Postscript and display + --web Generate SVG and display --list=<regexp> Generate source listing of matching routines --disasm=<regexp> Generate disassembly of matching routines --symbols Print demangled symbol names found at given addresses --dot Generate DOT file to stdout --ps Generate Postcript to stdout --pdf Generate PDF to stdout + --svg Generate SVG to stdout --gif Generate GIF to stdout --raw Generate symbolized pprof data (useful with remote fetch) @@ -223,6 +230,8 @@ pprof /bin/ls ls.prof Enters "interactive" mode pprof --text /bin/ls ls.prof Outputs one line per procedure +pprof --web /bin/ls ls.prof + Displays annotated call-graph in web browser pprof --gv /bin/ls ls.prof Displays annotated call-graph via 'gv' pprof --gv --focus=Mutex /bin/ls ls.prof @@ -233,6 +242,9 @@ pprof --list=getdir /bin/ls ls.prof (Per-line) annotated source listing for getdir() pprof --disasm=getdir /bin/ls ls.prof (Per-PC) annotated disassembly for getdir() + +pprof http://localhost:1234/ + Enters "interactive" mode pprof --text localhost:1234 Outputs one line per procedure for localhost:1234 pprof --raw localhost:1234 > ./local.raw @@ -292,10 +304,12 @@ sub Init() { $main::opt_disasm = ""; $main::opt_symbols = 0; $main::opt_gv = 0; + $main::opt_web = 0; $main::opt_dot = 0; $main::opt_ps = 0; $main::opt_pdf = 0; $main::opt_gif = 0; + $main::opt_svg = 0; $main::opt_raw = 0; $main::opt_nodecount = 80; @@ -330,13 +344,16 @@ sub Init() { # Are we using $SYMBOL_PAGE? $main::use_symbol_page = 0; + # Files returned by TempName. + %main::tempnames = (); + # Type of profile we are dealing with # Supported types: - # cpu - # heap - # growth - # contention - $main::profile_type = ''; # Empty type means "unknown" + # cpu + # heap + # growth + # contention + $main::profile_type = ''; # Empty type means "unknown" GetOptions("help!" => \$main::opt_help, "version!" => \$main::opt_version, @@ -355,9 +372,11 @@ sub Init() { "disasm=s" => \$main::opt_disasm, "symbols!" => \$main::opt_symbols, "gv!" => \$main::opt_gv, + "web!" => \$main::opt_web, "dot!" => \$main::opt_dot, "ps!" => \$main::opt_ps, "pdf!" => \$main::opt_pdf, + "svg!" => \$main::opt_svg, "gif!" => \$main::opt_gif, "raw!" => \$main::opt_raw, "interactive!" => \$main::opt_interactive, @@ -380,8 +399,8 @@ sub Init() { "tools=s" => \$main::opt_tools, "test!" => \$main::opt_test, "debug!" => \$main::opt_debug, - # Undocumented flags used only by unittests: - "test_stride=i" => \$main::opt_test_stride, + # Undocumented flags used only by unittests: + "test_stride=i" => \$main::opt_test_stride, ) || usage("Invalid option(s)"); # Deal with the standard --help and --version @@ -433,9 +452,11 @@ sub Init() { ($main::opt_disasm eq '' ? 0 : 1) + ($main::opt_symbols == 0 ? 0 : 1) + $main::opt_gv + + $main::opt_web + $main::opt_dot + $main::opt_ps + $main::opt_pdf + + $main::opt_svg + $main::opt_gif + $main::opt_raw + $main::opt_interactive + @@ -510,20 +531,6 @@ sub Init() { ConfigureObjTools($main::prog) } - # Check what flags our commandline utilities support - if (open(TFILE, "$WGET $WGET_FLAGS -V 2>&1 |")) { - my @lines = <TFILE>; - if (grep(/unrecognized/, @lines) > 0) { - # grep found 'unrecognized' token from WGET, clear WGET flags - $WGET_FLAGS = ""; - } - close(TFILE); - } - # TODO(csilvers): check all the other binaries and objtools to see - # if they are installed and what flags they support, and store that - # in a data structure here, rather than scattering these tests about. - # Then, ideally, rewrite code to use wget OR curl OR GET or ... - # Break the opt_list_prefix into the prefix_list array @prefix_list = split (',', $main::opt_lib_prefix); @@ -634,9 +641,24 @@ sub Main() { } else { if (PrintDot($main::prog, $symbols, $profile, $flat, $cumulative, $total)) { if ($main::opt_gv) { - RunGV(PsTempName($main::next_tmpfile), ""); + RunGV(TempName($main::next_tmpfile, "ps"), ""); + } elsif ($main::opt_web) { + my $tmp = TempName($main::next_tmpfile, "svg"); + RunWeb($tmp); + # The command we run might hand the file name off + # to an already running browser instance and then exit. + # Normally, we'd remove $tmp on exit (right now), + # but fork a child to remove $tmp a little later, so that the + # browser has time to load it first. + delete $main::tempnames{$tmp}; + if (fork() == 0) { + sleep 5; + unlink($tmp); + exit(0); + } } } else { + cleanup(); exit(1); } } @@ -667,7 +689,7 @@ sub ReadlineMightFail { sub RunGV { my $fname = shift; - my $bg = shift; # "" or " &" if we should run in background + my $bg = shift; # "" or " &" if we should run in background if (!system("$GV --version >/dev/null 2>&1")) { # Options using double dash are supported by this gv version. # Also, turn on noantialias to better handle bug in gv for @@ -682,6 +704,41 @@ sub RunGV { } } +sub RunWeb { + my $fname = shift; + print STDERR "Loading web page file:///$fname\n"; + + if (`uname` =~ /Darwin/) { + # OS X: open will use standard preference for SVG files. + system("/usr/bin/open", $fname); + return; + } + + # Some kind of Unix; try generic symlinks, then specific browsers. + # (Stop once we find one.) + # Works best if the browser is already running. + my @alt = ( + "/etc/alternatives/gnome-www-browser", + "/etc/alternatives/x-www-browser", + "google-chrome", + "firefox", + ); + foreach my $b (@alt) { + if (system($b, $fname) == 0) { + return; + } + } + + print STDERR "Could not load web browser.\n"; +} + +sub RunKcachegrind { + my $fname = shift; + my $bg = shift; # "" or " &" if we should run in background + print STDERR "Starting '$KCACHEGRIND " . $fname . $bg . "'\n"; + system("$KCACHEGRIND " . $fname . $bg); +} + ##### Interactive helper routines ##### @@ -689,10 +746,11 @@ sub InteractiveMode { $| = 1; # Make output unbuffered for interactive mode my ($orig_profile, $symbols, $libs, $total) = @_; - print "Welcome to pprof! For help, type 'help'.\n"; + print STDERR "Welcome to pprof! For help, type 'help'.\n"; - # Use ReadLine if it's installed. - if ( !ReadlineMightFail() && + # Use ReadLine if it's installed and input comes from a console. + if ( -t STDIN && + !ReadlineMightFail() && defined(eval {require Term::ReadLine}) ) { my $term = new Term::ReadLine 'pprof'; while ( defined ($_ = $term->readline('(pprof) '))) { @@ -703,7 +761,7 @@ sub InteractiveMode { } } else { # don't have readline while (1) { - print "(pprof) "; + print STDERR "(pprof) "; $_ = <STDIN>; last if ! defined $_ ; s/\r//g; # turn windows-looking lines into unix-looking lines @@ -727,13 +785,13 @@ sub InteractiveCommand { my($orig_profile, $symbols, $libs, $total, $command) = @_; $_ = $command; # just to make future m//'s easier if (!defined($_)) { - print "\n"; + print STDERR "\n"; return 0; } - if (m/^ *quit/) { + if (m/^\s*quit/) { return 0; } - if (m/^ *help/) { + if (m/^\s*help/) { InteractiveHelpMessage(); return 1; } @@ -745,7 +803,7 @@ sub InteractiveCommand { $main::opt_gv = 0; $main::opt_cum = 0; - if (m/^ *(text|top)(\d*) *(.*)/) { + if (m/^\s*(text|top)(\d*)\s*(.*)/) { $main::opt_text = 1; my $line_limit = ($2 ne "") ? int($2) : 10; @@ -764,7 +822,24 @@ sub InteractiveCommand { PrintText($symbols, $flat, $cumulative, $total, $line_limit); return 1; } - if (m/^ *list *(.+)/) { + if (m/^\s*callgrind\s*([^ \n]*)/) { + $main::opt_callgrind = 1; + + # Get derived profiles + my $calls = ExtractCalls($symbols, $orig_profile); + my $filename = $1; + if ( $1 eq '' ) { + $filename = TempName($main::next_tmpfile, "callgrind"); + } + PrintCallgrind($calls, $filename); + if ( $1 eq '' ) { + RunKcachegrind($filename, " & "); + $main::next_tmpfile++; + } + + return 1; + } + if (m/^\s*list\s*(.+)/) { $main::opt_list = 1; my $routine; @@ -781,7 +856,7 @@ sub InteractiveCommand { PrintListing($libs, $flat, $cumulative, $routine); return 1; } - if (m/^ *disasm *(.+)/) { + if (m/^\s*disasm\s*(.+)/) { $main::opt_disasm = 1; my $routine; @@ -799,12 +874,18 @@ sub InteractiveCommand { PrintDisassembly($libs, $flat, $cumulative, $routine, $total); return 1; } - if (m/^ *gv *(.*)/) { - $main::opt_gv = 1; + if (m/^\s*(gv|web)\s*(.*)/) { + $main::opt_gv = 0; + $main::opt_web = 0; + if ($1 eq "gv") { + $main::opt_gv = 1; + } elsif ($1 eq "web") { + $main::opt_web = 1; + } my $focus; my $ignore; - ($focus, $ignore) = ParseInteractiveArgs($1); + ($focus, $ignore) = ParseInteractiveArgs($2); # Process current profile to account for various settings my $profile = ProcessProfile($orig_profile, $symbols, $focus, $ignore); @@ -815,11 +896,19 @@ sub InteractiveCommand { my $cumulative = CumulativeProfile($reduced); if (PrintDot($main::prog, $symbols, $profile, $flat, $cumulative, $total)) { - RunGV(PsTempName($main::next_tmpfile), " &"); + if ($main::opt_gv) { + RunGV(TempName($main::next_tmpfile, "ps"), " &"); + } elsif ($main::opt_web) { + RunWeb(TempName($main::next_tmpfile, "svg")); + } $main::next_tmpfile++; } return 1; } + if (m/^\s*$/) { + return 1; + } + print STDERR "Unknown command: try 'help'.\n"; return 1; } @@ -856,7 +945,7 @@ sub ProcessProfile { } sub InteractiveHelpMessage { - print <<ENDOFHELP; + print STDERR <<ENDOFHELP; Interactive pprof mode Commands: @@ -868,6 +957,14 @@ Commands: the "focus" regular expression matches a routine name on the stack trace. + web + web [focus] [-ignore1] [-ignore2] + Like GV, but displays profile in your web browser instead of using + Ghostview. Works best if your web browser is already running. + To change the browser that gets used: + On Linux, set the /etc/alternatives/gnome-www-browser symlink. + On OS X, change the Finder association for SVG files. + list [routine_regexp] [-ignore1] [-ignore2] Show source listing of routines whose names match "routine_regexp" @@ -882,6 +979,10 @@ Commands: Show disassembly of routines whose names match "routine_regexp", annotated with sample counts. + callgrind + callgrind [filename] + Generates callgrind file. If no filename is given, kcachegrind is called. + help - This listing quit or ^D - End pprof @@ -913,16 +1014,19 @@ sub ParseInteractiveArgs { } } if ($ignore ne "") { - print "Ignoring samples in call stacks that match '$ignore'\n"; + print STDERR "Ignoring samples in call stacks that match '$ignore'\n"; } return ($focus, $ignore); } ##### Output code ##### -sub PsTempName { +sub TempName { my $fnum = shift; - return "$main::tmpfile_ps" . "." . "$fnum" . ".ps"; + my $ext = shift; + my $file = "$main::tmpfile_ps.$fnum.$ext"; + $main::tempnames{$file} = 1; + return $file; } # Print profile data in packed binary format (64-bit) to standard out @@ -1045,7 +1149,15 @@ sub PrintText { # Print the call graph in a way that's suiteable for callgrind. sub PrintCallgrind { my $calls = shift; - printf("events: Hits\n\n"); + my $filename; + if ($main::opt_interactive) { + $filename = shift; + print STDERR "Writing callgrind file to '$filename'.\n" + } else { + $filename = "&STDOUT"; + } + open(CG, ">".$filename ); + printf CG ("events: Hits\n\n"); foreach my $call ( map { $_->[0] } sort { $a->[1] cmp $b ->[1] || $a->[2] <=> $b->[2] } @@ -1057,13 +1169,15 @@ sub PrintCallgrind { my ( $caller_file, $caller_line, $caller_function, $callee_file, $callee_line, $callee_function ) = ( $1, $2, $3, $5, $6, $7 ); - printf("fl=$caller_file\nfn=$caller_function\n"); + + + printf CG ("fl=$caller_file\nfn=$caller_function\n"); if (defined $6) { - printf("cfl=$callee_file\n"); - printf("cfn=$callee_function\n"); - printf("calls=$count $callee_line\n"); + printf CG ("cfl=$callee_file\n"); + printf CG ("cfn=$callee_function\n"); + printf CG ("calls=$count $callee_line\n"); } - printf("$caller_line $count\n\n"); + printf CG ("$caller_line $count\n\n"); } } @@ -1385,7 +1499,7 @@ sub SourceLine { return undef; } my $lines = []; - push(@{$lines}, ""); # So we can use 1-based line numbers as indices + push(@{$lines}, ""); # So we can use 1-based line numbers as indices while (<FILE>) { push(@{$lines}, $_); } @@ -1477,8 +1591,8 @@ sub PrintDisassembledFunction { # Find run of instructions for this range of source lines my $first_inst = $i; while (($i <= $#instructions) && - ($instructions[$i]->[2] >= $first_line) && - ($instructions[$i]->[2] <= $last_line)) { + ($instructions[$i]->[2] >= $first_line) && + ($instructions[$i]->[2] <= $last_line)) { $e = $instructions[$i]; $flat_sum{$e->[2]} += $flat_count[$i]; $cum_sum{$e->[2]} += $cum_count[$i]; @@ -1490,16 +1604,16 @@ sub PrintDisassembledFunction { for (my $l = $first_line; $l <= $last_line; $l++) { my $line = SourceLine($current_file, $l); if (!defined($line)) { - $line = "?\n"; + $line = "?\n"; next; } else { $line =~ s/^\s+//; } printf("%6s %6s %5d: %s", - UnparseAlt($flat_sum{$l}), - UnparseAlt($cum_sum{$l}), - $l, - $line); + UnparseAlt($flat_sum{$l}), + UnparseAlt($cum_sum{$l}), + $l, + $line); } # Print disassembly @@ -1516,9 +1630,9 @@ sub PrintDisassembledFunction { while ($d =~ s/(\w+)<[^<>]*>/$1/g) { } # Remove template arguments printf("%6s %6s %8s: %6s\n", - UnparseAlt($flat_count[$x]), - UnparseAlt($cum_count[$x]), - $address, + UnparseAlt($flat_count[$x]), + UnparseAlt($cum_count[$x]), + $address, $d); } } @@ -1542,7 +1656,7 @@ sub PrintDot { # Find nodes to include my @list = (sort { abs(GetEntry($cumulative, $b)) <=> abs(GetEntry($cumulative, $a)) - || $a cmp $b } + || $a cmp $b } keys(%{$cumulative})); my $last = $nodecount - 1; if ($last > $#list) { @@ -1554,7 +1668,6 @@ sub PrintDot { } if ($last < 0) { print STDERR "No nodes to print\n"; - cleanup(); return 0; } @@ -1567,11 +1680,14 @@ sub PrintDot { # Open DOT output file my $output; if ($main::opt_gv) { - $output = "| $DOT -Tps2 >" . PsTempName($main::next_tmpfile); + $output = "| $DOT -Tps2 >" . TempName($main::next_tmpfile, "ps"); } elsif ($main::opt_ps) { $output = "| $DOT -Tps2"; } elsif ($main::opt_pdf) { $output = "| $DOT -Tps2 | $PS2PDF - -"; + } elsif ($main::opt_web || $main::opt_svg) { + # We need to post-process the SVG, so write to a temporary file always. + $output = "| $DOT -Tsvg >" . TempName($main::next_tmpfile, "svg"); } elsif ($main::opt_gif) { $output = "| $DOT -Tgif"; } else { @@ -1682,7 +1798,10 @@ sub PrintDot { my $fraction = abs($local_total ? (3 * ($n / $local_total)) : 0); if ($fraction > 1) { $fraction = 1; } my $w = $fraction * 2; - #if ($w < 1) { $w = 1; } + if ($w < 1 && ($main::opt_web || $main::opt_svg)) { + # SVG output treats line widths < 1 poorly. + $w = 1; + } # Dot sometimes segfaults if given edge weights that are too large, so # we cap the weights at a large value @@ -1706,11 +1825,312 @@ sub PrintDot { } print DOT ("}\n"); - close(DOT); + + if ($main::opt_web || $main::opt_svg) { + # Rewrite SVG to be more usable inside web browser. + RewriteSvg(TempName($main::next_tmpfile, "svg")); + } + return 1; } +sub RewriteSvg { + my $svgfile = shift; + + open(SVG, $svgfile) || die "open temp svg: $!"; + my @svg = <SVG>; + close(SVG); + unlink $svgfile; + my $svg = join('', @svg); + + # Dot's SVG output is + # + # <svg width="___" height="___" + # viewBox="___" xmlns=...> + # <g id="graph0" transform="..."> + # ... + # </g> + # </svg> + # + # Change it to + # + # <svg width="100%" height="100%" + # xmlns=...> + # $svg_javascript + # <g id="viewport" transform="translate(0,0)"> + # <g id="graph0" transform="..."> + # ... + # </g> + # </g> + # </svg> + + # Fix width, height; drop viewBox. + $svg =~ s/(?s)<svg width="[^"]+" height="[^"]+"(.*?)viewBox="[^"]+"/<svg width="100%" height="100%"$1/; + + # Insert script, viewport <g> above first <g> + my $svg_javascript = SvgJavascript(); + my $viewport = "<g id=\"viewport\" transform=\"translate(0,0)\">\n"; + $svg =~ s/<g id="graph\d"/$svg_javascript$viewport$&/; + + # Insert final </g> above </svg>. + $svg =~ s/(.*)(<\/svg>)/$1<\/g>$2/; + $svg =~ s/<g id="graph\d"(.*?)/<g id="viewport"$1/; + + if ($main::opt_svg) { + # --svg: write to standard output. + print $svg; + } else { + # Write back to temporary file. + open(SVG, ">$svgfile") || die "open $svgfile: $!"; + print SVG $svg; + close(SVG); + } +} + +sub SvgJavascript { + return <<'EOF'; +<script type="text/ecmascript"><![CDATA[ +// SVGPan +// http://www.cyberz.org/blog/2009/12/08/svgpan-a-javascript-svg-panzoomdrag-library/ +// Local modification: if(true || ...) below to force panning, never moving. + +/** + * SVGPan library 1.2 + * ==================== + * + * Given an unique existing element with id "viewport", including the + * the library into any SVG adds the following capabilities: + * + * - Mouse panning + * - Mouse zooming (using the wheel) + * - Object dargging + * + * Known issues: + * + * - Zooming (while panning) on Safari has still some issues + * + * Releases: + * + * 1.2, Sat Mar 20 08:42:50 GMT 2010, Zeng Xiaohui + * Fixed a bug with browser mouse handler interaction + * + * 1.1, Wed Feb 3 17:39:33 GMT 2010, Zeng Xiaohui + * Updated the zoom code to support the mouse wheel on Safari/Chrome + * + * 1.0, Andrea Leofreddi + * First release + * + * This code is licensed under the following BSD license: + * + * Copyright 2009-2010 Andrea Leofreddi <a.leofreddi@itcharm.com>. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY Andrea Leofreddi ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL Andrea Leofreddi OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * The views and conclusions contained in the software and documentation are those of the + * authors and should not be interpreted as representing official policies, either expressed + * or implied, of Andrea Leofreddi. + */ + +var root = document.documentElement; + +var state = 'none', stateTarget, stateOrigin, stateTf; + +setupHandlers(root); + +/** + * Register handlers + */ +function setupHandlers(root){ + setAttributes(root, { + "onmouseup" : "add(evt)", + "onmousedown" : "handleMouseDown(evt)", + "onmousemove" : "handleMouseMove(evt)", + "onmouseup" : "handleMouseUp(evt)", + //"onmouseout" : "handleMouseUp(evt)", // Decomment this to stop the pan functionality when dragging out of the SVG element + }); + + if(navigator.userAgent.toLowerCase().indexOf('webkit') >= 0) + window.addEventListener('mousewheel', handleMouseWheel, false); // Chrome/Safari + else + window.addEventListener('DOMMouseScroll', handleMouseWheel, false); // Others + + var g = svgDoc.getElementById("svg"); + g.width = "100%"; + g.height = "100%"; +} + +/** + * Instance an SVGPoint object with given event coordinates. + */ +function getEventPoint(evt) { + var p = root.createSVGPoint(); + + p.x = evt.clientX; + p.y = evt.clientY; + + return p; +} + +/** + * Sets the current transform matrix of an element. + */ +function setCTM(element, matrix) { + var s = "matrix(" + matrix.a + "," + matrix.b + "," + matrix.c + "," + matrix.d + "," + matrix.e + "," + matrix.f + ")"; + + element.setAttribute("transform", s); +} + +/** + * Dumps a matrix to a string (useful for debug). + */ +function dumpMatrix(matrix) { + var s = "[ " + matrix.a + ", " + matrix.c + ", " + matrix.e + "\n " + matrix.b + ", " + matrix.d + ", " + matrix.f + "\n 0, 0, 1 ]"; + + return s; +} + +/** + * Sets attributes of an element. + */ +function setAttributes(element, attributes){ + for (i in attributes) + element.setAttributeNS(null, i, attributes[i]); +} + +/** + * Handle mouse move event. + */ +function handleMouseWheel(evt) { + if(evt.preventDefault) + evt.preventDefault(); + + evt.returnValue = false; + + var svgDoc = evt.target.ownerDocument; + + var delta; + + if(evt.wheelDelta) + delta = evt.wheelDelta / 3600; // Chrome/Safari + else + delta = evt.detail / -90; // Mozilla + + var z = 1 + delta; // Zoom factor: 0.9/1.1 + + var g = svgDoc.getElementById("viewport"); + + var p = getEventPoint(evt); + + p = p.matrixTransform(g.getCTM().inverse()); + + // Compute new scale matrix in current mouse position + var k = root.createSVGMatrix().translate(p.x, p.y).scale(z).translate(-p.x, -p.y); + + setCTM(g, g.getCTM().multiply(k)); + + stateTf = stateTf.multiply(k.inverse()); +} + +/** + * Handle mouse move event. + */ +function handleMouseMove(evt) { + if(evt.preventDefault) + evt.preventDefault(); + + evt.returnValue = false; + + var svgDoc = evt.target.ownerDocument; + + var g = svgDoc.getElementById("viewport"); + + if(state == 'pan') { + // Pan mode + var p = getEventPoint(evt).matrixTransform(stateTf); + + setCTM(g, stateTf.inverse().translate(p.x - stateOrigin.x, p.y - stateOrigin.y)); + } else if(state == 'move') { + // Move mode + var p = getEventPoint(evt).matrixTransform(g.getCTM().inverse()); + + setCTM(stateTarget, root.createSVGMatrix().translate(p.x - stateOrigin.x, p.y - stateOrigin.y).multiply(g.getCTM().inverse()).multiply(stateTarget.getCTM())); + + stateOrigin = p; + } +} + +/** + * Handle click event. + */ +function handleMouseDown(evt) { + if(evt.preventDefault) + evt.preventDefault(); + + evt.returnValue = false; + + var svgDoc = evt.target.ownerDocument; + + var g = svgDoc.getElementById("viewport"); + + if(true || evt.target.tagName == "svg") { + // Pan mode + state = 'pan'; + + stateTf = g.getCTM().inverse(); + + stateOrigin = getEventPoint(evt).matrixTransform(stateTf); + } else { + // Move mode + state = 'move'; + + stateTarget = evt.target; + + stateTf = g.getCTM().inverse(); + + stateOrigin = getEventPoint(evt).matrixTransform(stateTf); + } +} + +/** + * Handle mouse button release event. + */ +function handleMouseUp(evt) { + if(evt.preventDefault) + evt.preventDefault(); + + evt.returnValue = false; + + var svgDoc = evt.target.ownerDocument; + + if(state == 'pan' || state == 'move') { + // Quit pan mode + state = ''; + } +} + +]]></script> +EOF +} + # Translate a stack of addresses into a stack of symbols sub TranslateStack { my $symbols = shift; @@ -1806,7 +2226,7 @@ sub Unparse { } } } elsif ($main::profile_type eq 'contention' && !$main::opt_contentions) { - return sprintf("%.3f", $num / 1e9); # Convert nanoseconds to seconds + return sprintf("%.3f", $num / 1e9); # Convert nanoseconds to seconds } else { return sprintf("%d", $num); } @@ -1947,42 +2367,42 @@ sub RemoveUninterestingFrames { 'malloc', 'free', 'memalign', - 'posix_memalign', + 'posix_memalign', 'pvalloc', 'valloc', 'realloc', - 'tc_calloc', + 'tc_calloc', 'tc_cfree', 'tc_malloc', 'tc_free', 'tc_memalign', - 'tc_posix_memalign', + 'tc_posix_memalign', 'tc_pvalloc', 'tc_valloc', 'tc_realloc', - 'tc_new', - 'tc_delete', - 'tc_newarray', - 'tc_deletearray', - 'tc_new_nothrow', - 'tc_newarray_nothrow', - 'do_malloc', + 'tc_new', + 'tc_delete', + 'tc_newarray', + 'tc_deletearray', + 'tc_new_nothrow', + 'tc_newarray_nothrow', + 'do_malloc', '::do_malloc', # new name -- got moved to an unnamed ns '::do_malloc_or_cpp_alloc', 'DoSampledAllocation', - 'simple_alloc::allocate', - '__malloc_alloc_template::allocate', + 'simple_alloc::allocate', + '__malloc_alloc_template::allocate', '__builtin_delete', '__builtin_new', '__builtin_vec_delete', '__builtin_vec_new', 'operator new', 'operator new[]', - # These mark the beginning/end of our custom sections - '__start_google_malloc', - '__stop_google_malloc', - '__start_malloc_hook', - '__stop_malloc_hook') { + # These mark the beginning/end of our custom sections + '__start_google_malloc', + '__stop_google_malloc', + '__start_malloc_hook', + '__stop_malloc_hook') { $skip{$name} = 1; $skip{"_" . $name} = 1; # Mach (OS X) adds a _ prefix to everything } @@ -1999,11 +2419,11 @@ sub RemoveUninterestingFrames { # TODO(dpeng): this should not be necessary; it's taken # care of by the general 2nd-pc mechanism below. foreach my $name ('ProfileData::Add', # historical - 'ProfileData::prof_handler', # historical - 'CpuProfiler::prof_handler', + 'ProfileData::prof_handler', # historical + 'CpuProfiler::prof_handler', '__FRAME_END__', - '__pthread_sighandler', - '__restore') { + '__pthread_sighandler', + '__restore') { $skip{$name} = 1; } } else { @@ -2042,10 +2462,10 @@ sub RemoveUninterestingFrames { my @path = (); foreach my $a (@addrs) { if (exists($symbols->{$a})) { - my $func = $symbols->{$a}->[0]; - if ($skip{$func} || ($func =~ m/$skip_regexp/)) { - next; - } + my $func = $symbols->{$a}->[0]; + if ($skip{$func} || ($func =~ m/$skip_regexp/)) { + next; + } } push(@path, $a); } @@ -2070,8 +2490,8 @@ sub ReduceProfile { # To avoid double-counting due to recursion, skip a stack-trace # entry if it has already been seen if (!$seen{$e}) { - $seen{$e} = 1; - push(@path, $e); + $seen{$e} = 1; + push(@path, $e); } } my $reduced_path = join("\n", @path); @@ -2265,28 +2685,11 @@ sub AddEntries { AddEntry($profile, (join "\n", @k), $count); } -sub IsSymbolizedProfileFile { - my $file_name = shift; - - if (!(-e $file_name) || !(-r $file_name)) { - return 0; - } - - $SYMBOL_PAGE =~ m,[^/]+$,; # matches everything after the last slash - my $symbol_marker = $&; - # Check if the file contains a symbol-section marker. - open(TFILE, "<$file_name"); - my @lines = <TFILE>; - my $result = grep(/^--- *$symbol_marker/, @lines); - close(TFILE); - return $result > 0; -} - ##### Code to profile a server dynamically ##### sub CheckSymbolPage { my $url = SymbolPageURL(); - open(SYMBOL, "$WGET $WGET_FLAGS -qO- '$url' |"); + open(SYMBOL, "$URL_FETCHER '$url' |"); my $line = <SYMBOL>; $line =~ s/\r//g; # turn windows-looking lines into unix-looking lines close(SYMBOL); @@ -2305,33 +2708,45 @@ sub CheckSymbolPage { sub IsProfileURL { my $profile_name = shift; - my ($host, $port, $path) = ParseProfileURL($profile_name); - return defined($host) and defined($port) and defined($path); + if (-f $profile_name) { + printf STDERR "Using local file $profile_name.\n"; + return 0; + } + return 1; } sub ParseProfileURL { my $profile_name = shift; - if (defined($profile_name) && - $profile_name =~ m,^(http://|)([^/:]+):(\d+)(|\@\d+)(|/|.*($PROFILE_PAGE|$PMUPROFILE_PAGE|$HEAP_PAGE|$GROWTH_PAGE|$CONTENTION_PAGE|$WALL_PAGE|$FILTEREDPROFILE_PAGE))$,o) { - # $6 is $PROFILE_PAGE/$HEAP_PAGE/etc. $5 is *everything* after - # the hostname, as long as that everything is the empty string, - # a slash, or something ending in $PROFILE_PAGE/$HEAP_PAGE/etc. - # So "$6 || $5" is $PROFILE_PAGE/etc if there, or else it's "/" or "". - return ($2, $3, $6 || $5); - } - return (); + + if (!defined($profile_name) || $profile_name eq "") { + return (); + } + + # Split profile URL - matches all non-empty strings, so no test. + $profile_name =~ m,^(https?://)?([^/]+)(.*?)(/|$PROFILES)?$,; + + my $proto = $1 || "http://"; + my $hostport = $2; + my $prefix = $3; + my $profile = $4 || "/"; + + my $host = $hostport; + $host =~ s/:.*//; + + my $baseurl = "$proto$hostport$prefix"; + return ($host, $baseurl, $profile); } # We fetch symbols from the first profile argument. sub SymbolPageURL { - my ($host, $port, $path) = ParseProfileURL($main::pfile_args[0]); - return "http://$host:$port$SYMBOL_PAGE"; + my ($host, $baseURL, $path) = ParseProfileURL($main::pfile_args[0]); + return "$baseURL$SYMBOL_PAGE"; } sub FetchProgramName() { - my ($host, $port, $path) = ParseProfileURL($main::pfile_args[0]); - my $url = "http://$host:$port$PROGRAM_NAME_PAGE"; - my $command_line = "$WGET $WGET_FLAGS -qO- '$url'"; + my ($host, $baseURL, $path) = ParseProfileURL($main::pfile_args[0]); + my $url = "$baseURL$PROGRAM_NAME_PAGE"; + my $command_line = "$URL_FETCHER '$url'"; open(CMDLINE, "$command_line |") or error($command_line); my $cmdline = <CMDLINE>; $cmdline =~ s/\r//g; # turn windows-looking lines into unix-looking lines @@ -2348,7 +2763,7 @@ sub FetchProgramName() { # curl. Redirection happens on borg hosts. sub ResolveRedirectionForCurl { my $url = shift; - my $command_line = "$CURL -s --head '$url'"; + my $command_line = "$URL_FETCHER --head '$url'"; open(CMDLINE, "$command_line |") or error($command_line); while (<CMDLINE>) { s/\r//g; # turn windows-looking lines into unix-looking lines @@ -2360,6 +2775,20 @@ sub ResolveRedirectionForCurl { return $url; } +# Add a timeout flat to URL_FETCHER +sub AddFetchTimeout { + my $fetcher = shift; + my $timeout = shift; + if (defined($timeout)) { + if ($fetcher =~ m/\bcurl -s/) { + $fetcher .= sprintf(" --max-time %d", $timeout); + } elsif ($fetcher =~ m/\brpcget\b/) { + $fetcher .= sprintf(" --deadline=%d", $timeout); + } + } + return $fetcher; +} + # Reads a symbol map from the file handle name given as $1, returning # the resulting symbol map. Also processes variables relating to symbols. # Currently, the only variable processed is 'binary=<value>' which updates @@ -2404,7 +2833,6 @@ sub FetchSymbols { my $pcset = shift; my $symbol_map = shift; - my %seen = (); my @pcs = grep { !$seen{$_}++ } keys(%$pcset); # uniq @@ -2414,12 +2842,16 @@ sub FetchSymbols { open(POSTFILE, ">$main::tmpfile_sym"); print POSTFILE $post_data; close(POSTFILE); - + my $url = SymbolPageURL(); - # Here we use curl for sending data via POST since old - # wget doesn't have --post-file option. - $url = ResolveRedirectionForCurl($url); - my $command_line = "$CURL -sd '\@$main::tmpfile_sym' '$url'"; + + my $command_line; + if ($URL_FETCHER =~ m/\bcurl -s/) { + $url = ResolveRedirectionForCurl($url); + $command_line = "$URL_FETCHER -d '\@$main::tmpfile_sym' '$url'"; + } else { + $command_line = "$URL_FETCHER --post '$url' < '$main::tmpfile_sym'"; + } # We use c++filt in case $SYMBOL_PAGE gives us mangled symbols. my $cppfilt = $obj_tool_map{"c++filt"}; open(SYMBOL, "$command_line | $cppfilt |") or error($command_line); @@ -2464,10 +2896,10 @@ sub BaseName { sub MakeProfileBaseName { my ($binary_name, $profile_name) = @_; - my ($host, $port, $path) = ParseProfileURL($profile_name); + my ($host, $baseURL, $path) = ParseProfileURL($profile_name); my $binary_shortname = BaseName($binary_name); - return sprintf("%s.%s.%s-port%s", - $binary_shortname, $main::op_time, $host, $port); + return sprintf("%s.%s.%s", + $binary_shortname, $main::op_time, $host); } sub FetchDynamicProfile { @@ -2479,7 +2911,7 @@ sub FetchDynamicProfile { if (!IsProfileURL($profile_name)) { return $profile_name; } else { - my ($host, $port, $path) = ParseProfileURL($profile_name); + my ($host, $baseURL, $path) = ParseProfileURL($profile_name); if ($path eq "" || $path eq "/") { # Missing type specifier defaults to cpu-profile $path = $PROFILE_PAGE; @@ -2487,37 +2919,28 @@ sub FetchDynamicProfile { my $profile_file = MakeProfileBaseName($binary_name, $profile_name); - my $url; - my $wget_timeout; - if (($path =~ m/$PROFILE_PAGE/) || ($path =~ m/$PMUPROFILE_PAGE/)) { - if ($path =~ m/$PROFILE_PAGE/) { - $url = sprintf("http://$host:$port$path?seconds=%d", - $main::opt_seconds); + my $url = "$baseURL$path"; + my $fetch_timeout = undef; + if ($path =~ m/$PROFILE_PAGE|$PMUPROFILE_PAGE/) { + if ($path =~ m/[?]/) { + $url .= "&"; } else { - if ($profile_name =~ m/[?]/) { - $profile_name .= "&" - } else { - $profile_name .= "?" - } - $url = sprintf("http://$profile_name" . "seconds=%d", - $main::opt_seconds); + $url .= "?"; } - $wget_timeout = sprintf("--timeout=%d", - int($main::opt_seconds * 1.01 + 60)); + $url .= sprintf("seconds=%d", $main::opt_seconds); + $fetch_timeout = $main::opt_seconds * 1.01 + 60; } else { # For non-CPU profiles, we add a type-extension to # the target profile file name. my $suffix = $path; $suffix =~ s,/,.,g; - $profile_file .= "$suffix"; - $url = "http://$host:$port$path"; - $wget_timeout = ""; + $profile_file .= $suffix; } my $profile_dir = $ENV{"PPROF_TMPDIR"} || ($ENV{HOME} . "/pprof"); - if (!(-d $profile_dir)) { + if (! -d $profile_dir) { mkdir($profile_dir) - || die("Unable to create profile directory $profile_dir: $!\n"); + || die("Unable to create profile directory $profile_dir: $!\n"); } my $tmp_profile = "$profile_dir/.tmp.$profile_file"; my $real_profile = "$profile_dir/$profile_file"; @@ -2526,14 +2949,15 @@ sub FetchDynamicProfile { return $real_profile; } - my $cmd = "$WGET $WGET_FLAGS $wget_timeout -q -O $tmp_profile '$url'"; - if (($path =~ m/$PROFILE_PAGE/) || ($path =~ m/$PMUPROFILE_PAGE/)){ + my $fetcher = AddFetchTimeout($URL_FETCHER, $fetch_timeout); + my $cmd = "$fetcher '$url' > '$tmp_profile'"; + if ($path =~ m/$PROFILE_PAGE|$PMUPROFILE_PAGE/){ print STDERR "Gathering CPU profile from $url for $main::opt_seconds seconds to\n ${real_profile}\n"; if ($encourage_patience) { print STDERR "Be patient...\n"; } } else { - print STDERR "Fetching $path profile from $host:$port to\n ${real_profile}\n"; + print STDERR "Fetching $path profile from $url to\n ${real_profile}\n"; } (system($cmd) == 0) || error("Failed to get profile: $cmd: $!\n"); @@ -2580,6 +3004,7 @@ sub FetchDynamicProfilesRecurse { } else { $position = 1 | ($position << 1); TryCollectProfile($maxlevel, $level, $position); + cleanup(); exit(0); } } @@ -2603,22 +3028,69 @@ sub TryCollectProfile { # Provide a small streaming-read module to handle very large # cpu-profile files. Stream in chunks along a sliding window. +# Provides an interface to get one 'slot', correctly handling +# endian-ness differences. A slot is one 32-bit or 64-bit word +# (depending on the input profile). We tell endianness and bit-size +# for the profile by looking at the first 8 bytes: in cpu profiles, +# the second slot is always 3 (we'll accept anything that's not 0). BEGIN { package CpuProfileStream; sub new { - my ($class, $file) = @_; - my $self = { file => $file, - base => 0, - stride => 512 * 1024, # must be a multiple of |long| - slots => [] + my ($class, $file, $fname) = @_; + my $self = { file => $file, + base => 0, + stride => 512 * 1024, # must be a multiple of bitsize/8 + slots => [], + unpack_code => "", # N for big-endian, V for little }; bless $self, $class; # Let unittests adjust the stride if ($main::opt_test_stride > 0) { $self->{stride} = $main::opt_test_stride; } - $self->overflow(); + # Read the first two slots to figure out bitsize and endianness. + my $slots = $self->{slots}; + my $str; + read($self->{file}, $str, 8); + # Set the global $address_length based on what we see here. + # 8 is 32-bit (8 hexadecimal chars); 16 is 64-bit (16 hexadecimal chars). + $address_length = ($str eq (chr(0)x8)) ? 16 : 8; + if ($address_length == 8) { + if (substr($str, 6, 2) eq chr(0)x2) { + $self->{unpack_code} = 'V'; # Little-endian. + } elsif (substr($str, 4, 2) eq chr(0)x2) { + $self->{unpack_code} = 'N'; # Big-endian + } else { + ::error("$fname: header size >= 2**16\n"); + } + @$slots = unpack($self->{unpack_code} . "*", $str); + } else { + # If we're a 64-bit profile, make sure we're a 64-bit-capable + # perl. Otherwise, each slot will be represented as a float + # instead of an int64, losing precision and making all the + # 64-bit addresses right. We *could* try to handle this with + # software emulation of 64-bit ints, but that's added complexity + # for no clear benefit (yet). We use 'Q' to test for 64-bit-ness; + # perl docs say it's only available on 64-bit perl systems. + my $has_q = 0; + eval { $has_q = pack("Q", "1") ? 1 : 1; }; + if (!$has_q) { + ::error("$fname: need a 64-bit perl to process this 64-bit profile.\n"); + } + read($self->{file}, $str, 8); + if (substr($str, 4, 4) eq chr(0)x4) { + # We'd love to use 'Q', but it's a) not universal, b) not endian-proof. + $self->{unpack_code} = 'V'; # Little-endian. + } elsif (substr($str, 0, 4) eq chr(0)x4) { + $self->{unpack_code} = 'N'; # Big-endian + } else { + ::error("$fname: header size >= 2**32\n"); + } + my @pair = unpack($self->{unpack_code} . "*", $str); + # Since we know one of the pair is 0, it's fine to just add them. + @$slots = (0, $pair[0] + $pair[1]); + } return $self; } @@ -2629,7 +3101,25 @@ BEGIN { $self->{base} += $#$slots + 1; # skip over data we're replacing my $str; read($self->{file}, $str, $self->{stride}); - @$slots = unpack("L*", $str); + if ($address_length == 8) { # the 32-bit case + # This is the easy case: unpack provides 32-bit unpacking primitives. + @$slots = unpack($self->{unpack_code} . "*", $str); + } else { + # We need to unpack 32 bits at a time and combine. + my @b32_values = unpack($self->{unpack_code} . "*", $str); + my @b64_values = (); + for (my $i = 0; $i < $#b32_values; $i += 2) { + # TODO(csilvers): if this is a 32-bit perl, the math below + # could end up in a too-large int, which perl will promote + # to a double, losing necessary precision. Deal with that. + if ($self->{unpack_code} eq 'V') { # little-endian + push(@b64_values, $b32_values[$i] + $b32_values[$i+1] * (2**32)); + } else { + push(@b64_values, $b32_values[$i] * (2**32) + $b32_values[$i+1]); + } + } + @$slots = @b64_values; + } } # Access the i-th long in the file (logically), or -1 at EOF. @@ -2638,16 +3128,16 @@ BEGIN { my $slots = $self->{slots}; while ($#$slots >= 0) { if ($idx < $self->{base}) { - # The only time we expect a reference to $slots[$i - something] - # after referencing $slots[$i] is reading the very first header. - # Since $stride > |header|, that shouldn't cause any lookback - # errors. And everything after the header is sequential. - print STDERR "Unexpected look-back reading CPU profile"; - return -1; # shrug, don't know what better to return + # The only time we expect a reference to $slots[$i - something] + # after referencing $slots[$i] is reading the very first header. + # Since $stride > |header|, that shouldn't cause any lookback + # errors. And everything after the header is sequential. + print STDERR "Unexpected look-back reading CPU profile"; + return -1; # shrug, don't know what better to return } elsif ($idx > $self->{base} + $#$slots) { - $self->overflow(); + $self->overflow(); } else { - return $slots->[$idx - $self->{base}]; + return $slots->[$idx - $self->{base}]; } } # If we get here, $slots is [], which means we've reached EOF @@ -2655,6 +3145,44 @@ BEGIN { } } +# Return the next line from the profile file, assuming it's a text +# line (which in this case means, doesn't start with a NUL byte). If +# it's not a text line, return "". At EOF, return undef, like perl does. +# Input file should be in binmode. +sub ReadProfileLine { + local *PROFILE = shift; + my $firstchar = ""; + my $line = ""; + read(PROFILE, $firstchar, 1); + seek(PROFILE, -1, 1); # unread the firstchar + if ($firstchar eq "\0") { + return ""; + } + $line = <PROFILE>; + if (defined($line)) { + $line =~ s/\r//g; # turn windows-looking lines into unix-looking lines + } + return $line; +} + +sub IsSymbolizedProfileFile { + my $file_name = shift; + if (!(-e $file_name) || !(-r $file_name)) { + return 0; + } + # Check if the file contains a symbol-section marker. + open(TFILE, "<$file_name"); + binmode TFILE; + my $firstline = ReadProfileLine(*TFILE); + close(TFILE); + if (!$firstline) { + return 0; + } + $SYMBOL_PAGE =~ m,[^/]+$,; # matches everything after the last slash + my $symbol_marker = $&; + return $firstline =~ /^--- *$symbol_marker/; +} + # Parse profile generated by common/profiler.cc and return a reference # to a map: # $result->{version} Version number of profile file @@ -2689,28 +3217,17 @@ sub ReadProfile { # whole firstline, since it may be gigabytes(!) of data. open(PROFILE, "<$fname") || error("$fname: $!\n"); binmode PROFILE; # New perls do UTF-8 processing - my $firstchar = ""; - my $header = ""; - read(PROFILE, $firstchar, 1); - seek(PROFILE, -1, 1); # unread the firstchar - if ($firstchar ne "\0") { - $header = <PROFILE>; - $header =~ s/\r//g; # turn windows-looking lines into unix-looking lines + my $header = ReadProfileLine(*PROFILE); + if (!defined($header)) { # means "at EOF" + error("Profile is empty.\n"); } my $symbols; if ($header =~ m/^--- *$symbol_marker/o) { - # read the symbol section of the symbolized profile file + # Read the symbol section of the symbolized profile file. $symbols = ReadSymbols(*PROFILE{IO}); - - # read the next line to get the header for the remaining profile - $header = ""; - read(PROFILE, $firstchar, 1); - seek(PROFILE, -1, 1); # unread the firstchar - if ($firstchar ne "\0") { - $header = <PROFILE>; - $header =~ s/\r//g; - } + # Read the next line to get the header for the remaining profile. + $header = ReadProfileLine(*PROFILE) || ""; } my $result; @@ -2752,6 +3269,33 @@ sub ReadProfile { return $result; } +# Subtract one from caller pc so we map back to call instr. +# However, don't do this if we're reading a symbolized profile +# file, in which case the subtract-one was done when the file +# was written. +# +# We apply the same logic to all readers, though ReadCPUProfile uses an +# independent implementation. +sub FixCallerAddresses { + my $stack = shift; + if ($main::use_symbolized_profile) { + return $stack; + } else { + $stack =~ /(\s)/; + my $delimiter = $1; + my @addrs = split(' ', $stack); + my @fixedaddrs; + $#fixedaddrs = $#addrs; + if ($#addrs >= 0) { + $fixedaddrs[0] = $addrs[0]; + } + for (my $i = 1; $i <= $#addrs; $i++) { + $fixedaddrs[$i] = AddressSub($addrs[$i], "0x1"); + } + return join $delimiter, @fixedaddrs; + } +} + # CPU profile reader sub ReadCPUProfile { my $prog = shift; @@ -2763,10 +3307,7 @@ sub ReadCPUProfile { my $pcs = {}; # Parse string into array of slots. - # L! cannot be used because with a native 64-bit build, it will cause - # 1) a valid 64-bit profile to use the 32-bit codepath, and - # 2) a valid 32-bit profile to be unrecognized. - my $slots = CpuProfileStream->new(*PROFILE); + my $slots = CpuProfileStream->new(*PROFILE, $fname); # Read header. The current header version is a 5-element structure # containing: @@ -2775,108 +3316,50 @@ sub ReadCPUProfile { # 2: format version (0) # 3: sampling period (usec) # 4: unused padding (always 0) - # The header words are 32-bit or 64-bit depending on the ABI of the program - # that generated the profile. In the 64-bit case, since our x86-architecture - # machines are little-endian, the actual value of each of these elements is - # in the first 32-bit word, and the second is always zero. The @slots array - # above was read as a sequence of 32-bit words in both cases, so we need to - # explicitly check for both cases. A typical slot sequence for each is: - # 32-bit: 0 3 0 100 0 - # 64-bit: 0 0 3 0 0 0 100 0 0 0 - # if ($slots->get(0) != 0 ) { error("$fname: not a profile file, or old format profile file\n"); } - if ($slots->get(1) >= 3) { - # Normal 32-bit header: - $version = $slots->get(2); - $period = $slots->get(3); - $i = 2 + $slots->get(1); - $address_length = 8; - - # Parse profile - while ($slots->get($i) != -1) { - my $n = $slots->get($i++); - my $d = $slots->get($i++); - if ($slots->get($i) == 0) { - # End of profile data marker - $i += $d; - last; - } - - # Make key out of the stack entries - my @k = (); - for (my $j = 0; $j < $d; $j++) { - my $pc = sprintf("%08x", $slots->get($i+$j)); - $pcs->{$pc} = 1; - push @k, $pc; - } - - AddEntry($profile, (join "\n", @k), $n); + $i = 2 + $slots->get(1); + $version = $slots->get(2); + $period = $slots->get(3); + # Do some sanity checking on these header values. + if ($version > (2**32) || $period > (2**32) || $i > (2**32) || $i < 5) { + error("$fname: not a profile file, or corrupted profile file\n"); + } + + # Parse profile + while ($slots->get($i) != -1) { + my $n = $slots->get($i++); + my $d = $slots->get($i++); + if ($d > (2**16)) { # TODO(csilvers): what's a reasonable max-stack-depth? + my $addr = sprintf("0%o", $i * ($address_length == 8 ? 4 : 8)); + print STDERR "At index $i (address $addr):\n"; + error("$fname: stack trace depth >= 2**32\n"); + } + if ($slots->get($i) == 0) { + # End of profile data marker $i += $d; + last; } - # Normal 64-bit header: All entries are doubled in size. The first - # word (little-endian) should contain the real value, the second should - # be zero. - } elsif ($slots->get(1) != 0 || - $slots->get(2) < 3 || - $slots->get(3) != 0 || - $slots->get(5) != 0 || - $slots->get(7) != 0) { - error("$fname: not a profile file, or old format profile file\n"); - } else { - $version = $slots->get(4); - $period = $slots->get(6); - $i = 4 + 2 * $slots->get(2); - $address_length = 16; - - # Parse profile - while ($slots->get($i) != -1) { - my $n = $slots->get($i++); - my $nhi = $slots->get($i++); - # Huge counts may coerce to floating point, keeping scale, not precision - if ($nhi != 0) { $n += $nhi*(2**32); } - my $d = $slots->get($i++); - if ($slots->get($i++) != 0) { - my $addr = sprintf("%o", 4 * $i); - print STDERR "At index $i ($addr):\n"; - error("$fname: stack trace depth >= 2**32\n"); + # Make key out of the stack entries + my @k = (); + for (my $j = 0; $j < $d; $j++) { + my $pc = $slots->get($i+$j); + # Subtract one from caller pc so we map back to call instr. + # However, don't do this if we're reading a symbolized profile + # file, in which case the subtract-one was done when the file + # was written. + if ($j > 0 && !$main::use_symbolized_profile) { + $pc--; } - if ($slots->get($i) == 0 && $slots->get($i+1) == 0) { - # End of profile data marker - $i += 2 * $d; - last; - } - - # Make key out of the stack entries - my @k = (); - for (my $j = 0; $j < $d; $j++) { - my $pclo = $slots->get($i++); - my $pchi = $slots->get($i++); - if ($pclo == -1 || $pchi == -1) { - error("$fname: Unexpected EOF when reading stack of depth $d\n"); - } - - # Subtract one from caller pc so we map back to call instr. - # However, don't do this if we're reading a symbolized profile - # file, in which case the subtract-one was done when the file - # was written. - if ($j > 0 && !$main::use_symbolized_profile) { - if ($pclo == 0) { - $pchi--; - $pclo = 0xffffffff; - } else { - $pclo--; - } - } - - my $pc = sprintf("%08x%08x", $pchi, $pclo); - $pcs->{$pc} = 1; - push @k, $pc; - } - AddEntry($profile, (join "\n", @k), $n); + $pc = sprintf("%0*x", $address_length, $pc); + $pcs->{$pc} = 1; + push @k, $pc; } + + AddEntry($profile, (join "\n", @k), $n); + $i += $d; } # Parse map @@ -2947,18 +3430,18 @@ sub ReadHeapProfile { # found for profiles generated locally, and the others for # remote profiles. if (($type eq "heapprofile") || ($type !~ /heap/) ) { - # No need to adjust for the sampling rate with heap-profiler-derived data - $sampling_algorithm = 0; + # No need to adjust for the sampling rate with heap-profiler-derived data + $sampling_algorithm = 0; } elsif ($type =~ /_v2/) { - $sampling_algorithm = 2; # version 2 sampling + $sampling_algorithm = 2; # version 2 sampling if (defined($sample_period) && ($sample_period ne '')) { - $sample_adjustment = int($sample_period); - } + $sample_adjustment = int($sample_period); + } } else { - $sampling_algorithm = 1; # version 1 sampling + $sampling_algorithm = 1; # version 1 sampling if (defined($sample_period) && ($sample_period ne '')) { - $sample_adjustment = int($sample_period)/2; - } + $sample_adjustment = int($sample_period)/2; + } } } else { # We detect whether or not this is a remote-heap profile by checking @@ -2970,7 +3453,7 @@ sub ReadHeapProfile { my ($n1, $s1, $n2, $s2) = ($1, $2, $3, $4); if (($n1 == $n2) && ($s1 == $s2)) { # This is likely to be a remote-heap based sample profile - $sampling_algorithm = 1; + $sampling_algorithm = 1; } } } @@ -2984,7 +3467,7 @@ sub ReadHeapProfile { print STDERR "Adjusting heap profiles for 1-in-128KB sampling rate\n"; } else { printf STDERR ("Adjusting heap profiles for 1-in-%d sampling rate\n", - $sample_adjustment); + $sample_adjustment); } if ($sampling_algorithm > 1) { # We don't bother printing anything for the original version (version 1) @@ -3001,7 +3484,7 @@ sub ReadHeapProfile { if (/^MAPPED_LIBRARIES:/) { # Read the /proc/self/maps data while (<PROFILE>) { - s/\r//g; # turn windows-looking lines into unix-looking lines + s/\r//g; # turn windows-looking lines into unix-looking lines $map .= $_; } last; @@ -3011,7 +3494,7 @@ sub ReadHeapProfile { # Read /proc/self/maps data as formatted by DumpAddressMap() my $buildvar = ""; while (<PROFILE>) { - s/\r//g; # turn windows-looking lines into unix-looking lines + s/\r//g; # turn windows-looking lines into unix-looking lines # Parse "build=<dir>" specification if supplied if (m/^\s*build=(.*)\n/) { $buildvar = $1; @@ -3066,7 +3549,7 @@ sub ReadHeapProfile { } my @counts = ($n1, $s1, $n2, $s2); - AddEntries($profile, $pcs, $stack, $counts[$index]); + AddEntries($profile, $pcs, FixCallerAddresses($stack), $counts[$index]); } } @@ -3086,7 +3569,7 @@ sub ReadSynchProfile { my $profile = {}; my $pcs = {}; my $sampling_period = 1; - my $cyclespernanosec = 2.8; # Default assumption for old binaries + my $cyclespernanosec = 2.8; # Default assumption for old binaries my $seen_clockrate = 0; my $line; @@ -3112,7 +3595,7 @@ sub ReadSynchProfile { $count *= $sampling_period; my @values = ($cycles, $count, $cycles / $count); - AddEntries($profile, $pcs, $stack, $values[$index]); + AddEntries($profile, $pcs, FixCallerAddresses($stack), $values[$index]); } elsif ( $line =~ /^(slow release).*thread \d+ \@\s*(.*?)\s*$/ || $line =~ /^\s*(\d+) \@\s*(.*?)\s*$/ ) { @@ -3127,7 +3610,7 @@ sub ReadSynchProfile { # Adjust for sampling done by application $cycles *= $sampling_period; - AddEntries($profile, $pcs, $stack, $cycles); + AddEntries($profile, $pcs, FixCallerAddresses($stack), $cycles); } elsif ( $line =~ m/^([a-z][^=]*)=(.*)$/ ) { my ($variable, $value) = ($1,$2); @@ -3308,8 +3791,8 @@ sub ParseTextSectionHeaderFromOtool { } elsif ($line =~ /segname (\w+)/) { $segname = $1; } elsif (!(($cmd eq "LC_SEGMENT" || $cmd eq "LC_SEGMENT_64") && - $sectname eq "__text" && - $segname eq "__TEXT")) { + $sectname eq "__text" && + $segname eq "__TEXT")) { next; } elsif ($line =~ /\baddr 0x([0-9a-fA-F]+)/) { $vma = $1; @@ -3369,7 +3852,7 @@ sub ParseLibraries { my $finish; my $offset; my $lib; - if ($l =~ /^($h)-($h)\s+..x.\s+($h)\s+\S+:\S+\s+\d+\s+(\S+\.(so|dll|dylib|bundle)((\.\d+)+\w*)?)$/i) { + if ($l =~ /^($h)-($h)\s+..x.\s+($h)\s+\S+:\S+\s+\d+\s+(\S+\.(so|dll|dylib|bundle)((\.\d+)+\w*(\.\d+){0,3})?)$/i) { # Full line from /proc/self/maps. Example: # 40000000-40015000 r-xp 00000000 03:01 12845071 /lib/ld-2.3.2.so $start = HexExtend($1); @@ -3675,7 +4158,7 @@ sub MapToSymbols { if ($debug) { print("---- $image ---\n"); } for (my $i = 0; $i <= $#{$pclist}; $i++) { # addr2line always reads hex addresses, and does not need '0x' prefix. - if ($debug) { printf("%s\n", $pclist->[$i]); } + if ($debug) { printf STDERR ("%s\n", $pclist->[$i]); } printf ADDRESSES ("%s\n", AddressSub($pclist->[$i], $offset)); if (defined($sep_address)) { printf ADDRESSES ("%s\n", $sep_address); @@ -3727,7 +4210,7 @@ sub MapToSymbols { $symbols->{$pcstr} = $sym; } unshift(@{$sym}, $function, $filelinenum, $fullfunction); - if ($debug) { printf("%s => [%s]\n", $pcstr, join(" ", @{$sym})); } + if ($debug) { printf STDERR ("%s => [%s]\n", $pcstr, join(" ", @{$sym})); } if (!defined($sep_address)) { # Inlining is off, se this entry ends immediately $count++; @@ -3783,7 +4266,7 @@ sub MapSymbolsWithNM { } return 1; } - + sub ShortFunctionName { my $function = shift; while ($function =~ s/\([^()]*\)(\s*const)?//g) { } # Argument types @@ -3830,6 +4313,8 @@ sub ConfigureObjTools { if ($file_type =~ /Mach-O/) { # OS X uses otool to examine Mach-O files, rather than objdump. $obj_tool_map{"otool"} = "otool"; + $obj_tool_map{"addr2line"} = "false"; # no addr2line + $obj_tool_map{"objdump"} = "false"; # no objdump } # Go fill in %obj_tool_map with the pathnames to use: @@ -3876,9 +4361,8 @@ sub ConfigureTool { sub cleanup { unlink($main::tmpfile_sym); - for (my $i = 0; $i < $main::next_tmpfile; $i++) { - unlink(PsTempName($i)); - } + unlink(keys %main::tempnames); + # We leave any collected profiles in $HOME/pprof in case the user wants # to look at them later. We print a message informing them of this. if ((scalar(@main::profile_files) > 0) && @@ -3921,7 +4405,7 @@ sub GetProcedureBoundariesViaNm { my $routine = ""; while (<NM>) { s/\r//g; # turn windows-looking lines into unix-looking lines - if (m/^([0-9a-f]+) (.) (..*)/) { + if (m/^\s*([0-9a-f]+) (.) (..*)/) { my $start_val = $1; my $type = $2; my $this_routine = $3; @@ -3942,12 +4426,12 @@ sub GetProcedureBoundariesViaNm { # we'll just go ahead and process the first entry (which never # got touched in the queue), and ignore the others. if ($start_val eq $last_start && $type =~ /t/i) { - # We are the 'T' symbol at this address, replace previous symbol. - $routine = $this_routine; - next; + # We are the 'T' symbol at this address, replace previous symbol. + $routine = $this_routine; + next; } elsif ($start_val eq $last_start) { - # We're not the 'T' symbol at this address, so ignore us. - next; + # We're not the 'T' symbol at this address, so ignore us. + next; } if ($this_routine eq $sep_symbol) { @@ -3962,7 +4446,7 @@ sub GetProcedureBoundariesViaNm { if (defined($routine) && $routine =~ m/$regexp/) { $symbol_table->{$routine} = [HexExtend($last_start), - HexExtend($start_val)]; + HexExtend($start_val)]; } $last_start = $start_val; $routine = $this_routine; @@ -3981,9 +4465,8 @@ sub GetProcedureBoundariesViaNm { # TODO(csilvers): do better here. if (defined($routine) && $routine =~ m/$regexp/) { $symbol_table->{$routine} = [HexExtend($last_start), - HexExtend($last_start)]; + HexExtend($last_start)]; } - return $symbol_table; } @@ -4029,9 +4512,13 @@ sub GetProcedureBoundaries { # -D to at least get *exported* symbols. If we can't use --demangle, # we use c++filt instead, if it exists on this system. my @nm_commands = ("$nm -n $flatten_flag $demangle_flag" . - " $image 2>/dev/null $cppfilt_flag", - "$nm -D -n $flatten_flag $demangle_flag" . - " $image 2>/dev/null $cppfilt_flag"); + " $image 2>/dev/null $cppfilt_flag", + "$nm -D -n $flatten_flag $demangle_flag" . + " $image 2>/dev/null $cppfilt_flag", + # 6nm is for Go binaries + "6nm $image 2>/dev/null | sort", + ); + # If the executable is an MS Windows PDB-format executable, we'll # have set up obj_tool_map("nm_pdb"). In this case, we actually # want to use both unix nm and windows-specific nm_pdb, since @@ -4263,4 +4750,3 @@ sub RunUnitTests { } exit ($error_count); } - diff --git a/third_party/tcmalloc/chromium/src/span.h b/third_party/tcmalloc/chromium/src/span.h index ab9a796..b3483ca 100644 --- a/third_party/tcmalloc/chromium/src/span.h +++ b/third_party/tcmalloc/chromium/src/span.h @@ -60,6 +60,10 @@ struct Span { int value[64]; #endif + void* start_ptr() { + return reinterpret_cast<void*>(start << kPageShift); + } + // What freelist the span is on: IN_USE if on none, or normal or returned enum { IN_USE, ON_NORMAL_FREELIST, ON_RETURNED_FREELIST }; }; diff --git a/third_party/tcmalloc/chromium/src/stacktrace.cc b/third_party/tcmalloc/chromium/src/stacktrace.cc index d158eea..68cb865 100644 --- a/third_party/tcmalloc/chromium/src/stacktrace.cc +++ b/third_party/tcmalloc/chromium/src/stacktrace.cc @@ -57,7 +57,45 @@ #include "stacktrace_config.h" #if defined(STACKTRACE_INL_HEADER) -# include STACKTRACE_INL_HEADER + +#define IS_STACK_FRAMES 0 +#define IS_WITH_CONTEXT 0 +#define GET_STACK_TRACE_OR_FRAMES \ + GetStackTrace(void **result, int max_depth, int skip_count) +#include STACKTRACE_INL_HEADER +#undef IS_STACK_FRAMES +#undef IS_WITH_CONTEXT +#undef GET_STACK_TRACE_OR_FRAMES + +#define IS_STACK_FRAMES 1 +#define IS_WITH_CONTEXT 0 +#define GET_STACK_TRACE_OR_FRAMES \ + GetStackFrames(void **result, int *sizes, int max_depth, int skip_count) +#include STACKTRACE_INL_HEADER +#undef IS_STACK_FRAMES +#undef IS_WITH_CONTEXT +#undef GET_STACK_TRACE_OR_FRAMES + +#define IS_STACK_FRAMES 0 +#define IS_WITH_CONTEXT 1 +#define GET_STACK_TRACE_OR_FRAMES \ + GetStackTraceWithContext(void **result, int max_depth, \ + int skip_count, const void *ucp) +#include STACKTRACE_INL_HEADER +#undef IS_STACK_FRAMES +#undef IS_WITH_CONTEXT +#undef GET_STACK_TRACE_OR_FRAMES + +#define IS_STACK_FRAMES 1 +#define IS_WITH_CONTEXT 1 +#define GET_STACK_TRACE_OR_FRAMES \ + GetStackFramesWithContext(void **result, int *sizes, int max_depth, \ + int skip_count, const void *ucp) +#include STACKTRACE_INL_HEADER +#undef IS_STACK_FRAMES +#undef IS_WITH_CONTEXT +#undef GET_STACK_TRACE_OR_FRAMES + #elif 0 // This is for the benefit of code analysis tools that may have // trouble with the computed #include above. diff --git a/third_party/tcmalloc/chromium/src/stacktrace_config.h b/third_party/tcmalloc/chromium/src/stacktrace_config.h index b58ab1d..18f16ab 100644 --- a/third_party/tcmalloc/chromium/src/stacktrace_config.h +++ b/third_party/tcmalloc/chromium/src/stacktrace_config.h @@ -53,6 +53,7 @@ # define STACKTRACE_SKIP_CONTEXT_ROUTINES 1 # elif defined(HAVE_LIBUNWIND_H) // a proxy for having libunwind installed # define STACKTRACE_INL_HEADER "stacktrace_libunwind-inl.h" +# define STACKTRACE_USES_LIBUNWIND 1 # elif defined(__linux) # error Cannnot calculate stack trace: need either libunwind or frame-pointers (see INSTALL file) # else diff --git a/third_party/tcmalloc/chromium/src/stacktrace_generic-inl.h b/third_party/tcmalloc/chromium/src/stacktrace_generic-inl.h index 490cd9d..0e72ee7 100644 --- a/third_party/tcmalloc/chromium/src/stacktrace_generic-inl.h +++ b/third_party/tcmalloc/chromium/src/stacktrace_generic-inl.h @@ -34,57 +34,32 @@ // // Note: The glibc implementation may cause a call to malloc. // This can cause a deadlock in HeapProfiler. + +#ifndef BASE_STACKTRACE_GENERIC_INL_H_ +#define BASE_STACKTRACE_GENERIC_INL_H_ +// Note: this file is included into stacktrace.cc more than once. +// Anything that should only be defined once should be here: + #include <execinfo.h> #include <string.h> #include "google/stacktrace.h" +#endif // BASE_STACKTRACE_GENERIC_INL_H_ -// If you change this function, also change GetStackFrames below. -int GetStackTrace(void** result, int max_depth, int skip_count) { - static const int kStackLength = 64; - void * stack[kStackLength]; - int size; - - size = backtrace(stack, kStackLength); - skip_count++; // we want to skip the current frame as well - int result_count = size - skip_count; - if (result_count < 0) - result_count = 0; - if (result_count > max_depth) - result_count = max_depth; - for (int i = 0; i < result_count; i++) - result[i] = stack[i + skip_count]; - - return result_count; -} +// Note: this part of the file is included several times. +// Do not put globals below. -// If you change this function, also change GetStackTrace above: -// -// This GetStackFrames routine shares a lot of code with GetStackTrace -// above. This code could have been refactored into a common routine, -// and then both GetStackTrace/GetStackFrames could call that routine. -// There are two problems with that: -// -// (1) The performance of the refactored-code suffers substantially - the -// refactored needs to be able to record the stack trace when called -// from GetStackTrace, and both the stack trace and stack frame sizes, -// when called from GetStackFrames - this introduces enough new -// conditionals that GetStackTrace performance can degrade by as much -// as 50%. +// The following 4 functions are generated from the code below: +// GetStack{Trace,Frames}() +// GetStack{Trace,Frames}WithContext() // -// (2) Whether the refactored routine gets inlined into GetStackTrace and -// GetStackFrames depends on the compiler, and we can't guarantee the -// behavior either-way, even with "__attribute__ ((always_inline))" -// or "__attribute__ ((noinline))". But we need this guarantee or the -// frame counts may be off by one. -// -// Both (1) and (2) can be addressed without this code duplication, by -// clever use of template functions, and by defining GetStackTrace and -// GetStackFrames as macros that expand to these template functions. -// However, this approach comes with its own set of problems - namely, -// macros and preprocessor trouble - for example, if GetStackTrace -// and/or GetStackFrames is ever defined as a member functions in some -// class, we are in trouble. -int GetStackFrames(void** pcs, int* sizes, int max_depth, int skip_count) { +// These functions take the following args: +// void** result: the stack-trace, as an array +// int* sizes: the size of each stack frame, as an array +// (GetStackFrames* only) +// int max_depth: the size of the result (and sizes) array(s) +// int skip_count: how many stack pointers to skip before storing in result +// void* ucp: a ucontext_t* (GetStack{Trace,Frames}WithContext only) +int GET_STACK_TRACE_OR_FRAMES { static const int kStackLength = 64; void * stack[kStackLength]; int size; @@ -97,10 +72,12 @@ int GetStackFrames(void** pcs, int* sizes, int max_depth, int skip_count) { if (result_count > max_depth) result_count = max_depth; for (int i = 0; i < result_count; i++) - pcs[i] = stack[i + skip_count]; + result[i] = stack[i + skip_count]; +#if IS_STACK_FRAMES // No implementation for finding out the stack frame sizes yet. memset(sizes, 0, sizeof(*sizes) * result_count); +#endif return result_count; } diff --git a/third_party/tcmalloc/chromium/src/stacktrace_libunwind-inl.h b/third_party/tcmalloc/chromium/src/stacktrace_libunwind-inl.h index d9d829a..a1d5249 100644 --- a/third_party/tcmalloc/chromium/src/stacktrace_libunwind-inl.h +++ b/third_party/tcmalloc/chromium/src/stacktrace_libunwind-inl.h @@ -32,6 +32,11 @@ // // Produce stack trace using libunwind +#ifndef BASE_STACKTRACE_LIBINWIND_INL_H_ +#define BASE_STACKTRACE_LIBINWIND_INL_H_ +// Note: this file is included into stacktrace.cc more than once. +// Anything that should only be defined once should be here: + // We only need local unwinder. #define UNW_LOCAL_ONLY @@ -52,73 +57,30 @@ extern "C" { // cases, we return 0 to indicate the situation. static __thread int recursive; -// If you change this function, also change GetStackFrames below. -int GetStackTrace(void** result, int max_depth, int skip_count) { - void *ip; - int n = 0; - unw_cursor_t cursor; - unw_context_t uc; +#endif // BASE_STACKTRACE_LIBINWIND_INL_H_ - if (recursive) { - return 0; - } - ++recursive; - - unw_getcontext(&uc); - int ret = unw_init_local(&cursor, &uc); - assert(ret >= 0); - skip_count++; // Do not include the "GetStackTrace" frame - - while (n < max_depth) { - if (unw_get_reg(&cursor, UNW_REG_IP, (unw_word_t *) &ip) < 0) { - break; - } - if (skip_count > 0) { - skip_count--; - } else { - result[n++] = ip; - } - if (unw_step(&cursor) <= 0) { - break; - } - } - --recursive; - return n; -} +// Note: this part of the file is included several times. +// Do not put globals below. -// If you change this function, also change GetStackTrace above: -// -// This GetStackFrames routine shares a lot of code with GetStackTrace -// above. This code could have been refactored into a common routine, -// and then both GetStackTrace/GetStackFrames could call that routine. -// There are two problems with that: +// The following 4 functions are generated from the code below: +// GetStack{Trace,Frames}() +// GetStack{Trace,Frames}WithContext() // -// (1) The performance of the refactored-code suffers substantially - the -// refactored needs to be able to record the stack trace when called -// from GetStackTrace, and both the stack trace and stack frame sizes, -// when called from GetStackFrames - this introduces enough new -// conditionals that GetStackTrace performance can degrade by as much -// as 50%. -// -// (2) Whether the refactored routine gets inlined into GetStackTrace and -// GetStackFrames depends on the compiler, and we can't guarantee the -// behavior either-way, even with "__attribute__ ((always_inline))" -// or "__attribute__ ((noinline))". But we need this guarantee or the -// frame counts may be off by one. -// -// Both (1) and (2) can be addressed without this code duplication, by -// clever use of template functions, and by defining GetStackTrace and -// GetStackFrames as macros that expand to these template functions. -// However, this approach comes with its own set of problems - namely, -// macros and preprocessor trouble - for example, if GetStackTrace -// and/or GetStackFrames is ever defined as a member functions in some -// class, we are in trouble. -int GetStackFrames(void** pcs, int* sizes, int max_depth, int skip_count) { +// These functions take the following args: +// void** result: the stack-trace, as an array +// int* sizes: the size of each stack frame, as an array +// (GetStackFrames* only) +// int max_depth: the size of the result (and sizes) array(s) +// int skip_count: how many stack pointers to skip before storing in result +// void* ucp: a ucontext_t* (GetStack{Trace,Frames}WithContext only) +int GET_STACK_TRACE_OR_FRAMES { void *ip; int n = 0; unw_cursor_t cursor; unw_context_t uc; +#if IS_STACK_FRAMES unw_word_t sp = 0, next_sp = 0; +#endif if (recursive) { return 0; @@ -126,31 +88,41 @@ int GetStackFrames(void** pcs, int* sizes, int max_depth, int skip_count) { ++recursive; unw_getcontext(&uc); - RAW_CHECK(unw_init_local(&cursor, &uc) >= 0, "unw_init_local failed"); - skip_count++; // Do not include the "GetStackFrames" frame + int ret = unw_init_local(&cursor, &uc); + assert(ret >= 0); + skip_count++; // Do not include current frame while (skip_count--) { - if (unw_step(&cursor) <= 0 || - unw_get_reg(&cursor, UNW_REG_SP, &next_sp) < 0) { + if (unw_step(&cursor) <= 0) { goto out; } +#if IS_STACK_FRAMES + if (unw_get_reg(&cursor, UNW_REG_SP, &next_sp)) { + goto out; + } +#endif } + while (n < max_depth) { - sp = next_sp; - if (unw_get_reg(&cursor, UNW_REG_IP, (unw_word_t *) &ip) < 0) + if (unw_get_reg(&cursor, UNW_REG_IP, (unw_word_t *) &ip) < 0) { break; - if (unw_step(&cursor) <= 0 || - unw_get_reg(&cursor, UNW_REG_SP, &next_sp)) { - // We couldn't step any further (possibly because we reached _start). - // Provide the last good PC we've got, and get out. - sizes[n] = 0; - pcs[n++] = ip; + } +#if IS_STACK_FRAMES + sizes[n] = 0; +#endif + result[n++] = ip; + if (unw_step(&cursor) <= 0) { + break; + } +#if IS_STACK_FRAMES + sp = next_sp; + if (unw_get_reg(&cursor, UNW_REG_SP, &next_sp) , 0) { break; } - sizes[n] = next_sp - sp; - pcs[n++] = ip; + sizes[n - 1] = next_sp - sp; +#endif } - out: +out: --recursive; return n; } diff --git a/third_party/tcmalloc/chromium/src/stacktrace_powerpc-inl.h b/third_party/tcmalloc/chromium/src/stacktrace_powerpc-inl.h index 5631e49..9a07eea 100644 --- a/third_party/tcmalloc/chromium/src/stacktrace_powerpc-inl.h +++ b/third_party/tcmalloc/chromium/src/stacktrace_powerpc-inl.h @@ -36,6 +36,11 @@ // http://www.linux-foundation.org/spec/ELF/ppc64/PPC-elf64abi-1.9.html#STACK // Linux has similar code: http://patchwork.ozlabs.org/linuxppc/patch?id=8882 +#ifndef BASE_STACKTRACE_POWERPC_INL_H_ +#define BASE_STACKTRACE_POWERPC_INL_H_ +// Note: this file is included into stacktrace.cc more than once. +// Anything that should only be defined once should be here: + #include <stdint.h> // for uintptr_t #include <stdlib.h> // for NULL #include <google/stacktrace.h> @@ -71,9 +76,23 @@ static void **NextStackFrame(void **old_sp) { // This ensures that GetStackTrace stes up the Link Register properly. void StacktracePowerPCDummyFunction() __attribute__((noinline)); void StacktracePowerPCDummyFunction() { __asm__ volatile(""); } +#endif // BASE_STACKTRACE_POWERPC_INL_H_ + +// Note: this part of the file is included several times. +// Do not put globals below. -// If you change this function, also change GetStackFrames below. -int GetStackTrace(void** result, int max_depth, int skip_count) { +// The following 4 functions are generated from the code below: +// GetStack{Trace,Frames}() +// GetStack{Trace,Frames}WithContext() +// +// These functions take the following args: +// void** result: the stack-trace, as an array +// int* sizes: the size of each stack frame, as an array +// (GetStackFrames* only) +// int max_depth: the size of the result (and sizes) array(s) +// int skip_count: how many stack pointers to skip before storing in result +// void* ucp: a ucontext_t* (GetStack{Trace,Frames}WithContext only) +int GET_STACK_TRACE_OR_FRAMES { void **sp; // Apple OS X uses an old version of gnu as -- both Darwin 7.9.0 (Panther) // and Darwin 8.8.1 (Tiger) use as 1.38. This means we have to use a @@ -95,11 +114,29 @@ int GetStackTrace(void** result, int max_depth, int skip_count) { // This routine forces the compiler (at least gcc) to push it anyway. StacktracePowerPCDummyFunction(); +#if IS_STACK_FRAMES + // Note we do *not* increment skip_count here for the SYSV ABI. If + // we did, the list of stack frames wouldn't properly match up with + // the list of return addresses. Note this means the top pc entry + // is probably bogus for linux/ppc (and other SYSV-ABI systems). +#else // The LR save area is used by the callee, so the top entry is bogus. skip_count++; +#endif int n = 0; while (sp && n < max_depth) { +#if IS_STACK_FRAMES + // The GetStackFrames routine is called when we are in some + // informational context (the failure signal handler for example). + // Use the non-strict unwinding rules to produce a stack trace + // that is as complete as possible (even if it contains a few bogus + // entries in some rare cases). + void **next_sp = NextStackFrame<false>(sp); +#else + void **next_sp = NextStackFrame<true>(sp); +#endif + if (skip_count > 0) { skip_count--; } else { @@ -120,85 +157,15 @@ int GetStackTrace(void** result, int max_depth, int skip_count) { #else #error Need to specify the PPC ABI for your archiecture. #endif - } - // Use strict unwinding rules. - sp = NextStackFrame<true>(sp); - } - return n; -} - -// If you change this function, also change GetStackTrace above: -// -// This GetStackFrames routine shares a lot of code with GetStackTrace -// above. This code could have been refactored into a common routine, -// and then both GetStackTrace/GetStackFrames could call that routine. -// There are two problems with that: -// -// (1) The performance of the refactored-code suffers substantially - the -// refactored needs to be able to record the stack trace when called -// from GetStackTrace, and both the stack trace and stack frame sizes, -// when called from GetStackFrames - this introduces enough new -// conditionals that GetStackTrace performance can degrade by as much -// as 50%. -// -// (2) Whether the refactored routine gets inlined into GetStackTrace and -// GetStackFrames depends on the compiler, and we can't guarantee the -// behavior either-way, even with "__attribute__ ((always_inline))" -// or "__attribute__ ((noinline))". But we need this guarantee or the -// frame counts may be off by one. -// -// Both (1) and (2) can be addressed without this code duplication, by -// clever use of template functions, and by defining GetStackTrace and -// GetStackFrames as macros that expand to these template functions. -// However, this approach comes with its own set of problems - namely, -// macros and preprocessor trouble - for example, if GetStackTrace -// and/or GetStackFrames is ever defined as a member functions in some -// class, we are in trouble. -int GetStackFrames(void** pcs, int *sizes, int max_depth, int skip_count) { - void **sp; -#ifdef __APPLE__ - __asm__ volatile ("mr %0,r1" : "=r" (sp)); -#else - __asm__ volatile ("mr %0,1" : "=r" (sp)); -#endif - StacktracePowerPCDummyFunction(); - // Note we do *not* increment skip_count here for the SYSV ABI. If - // we did, the list of stack frames wouldn't properly match up with - // the list of return addresses. Note this means the top pc entry - // is probably bogus for linux/ppc (and other SYSV-ABI systems). - - int n = 0; - while (sp && n < max_depth) { - // The GetStackFrames routine is called when we are in some - // informational context (the failure signal handler for example). - // Use the non-strict unwinding rules to produce a stack trace - // that is as complete as possible (even if it contains a few bogus - // entries in some rare cases). - void **next_sp = NextStackFrame<false>(sp); - if (skip_count > 0) { - skip_count--; - } else { -#if defined(_CALL_AIX) || defined(_CALL_DARWIN) - pcs[n++] = *(sp+2); -#elif defined(_CALL_SYSV) - pcs[n++] = *(sp+1); -#elif defined(__APPLE__) || (defined(__linux) && defined(__PPC64__)) - // This check is in case the compiler doesn't define _CALL_AIX/etc. - pcs[n++] = *(sp+2); -#elif defined(__linux) - // This check is in case the compiler doesn't define _CALL_SYSV. - pcs[n++] = *(sp+1); -#else -#error Need to specify the PPC ABI for your archiecture. -#endif +#if IS_STACK_FRAME if (next_sp > sp) { sizes[n] = (uintptr_t)next_sp - (uintptr_t)sp; } else { // A frame-size of 0 is used to indicate unknown frame size. sizes[n] = 0; } - n++; +#endif } sp = next_sp; } diff --git a/third_party/tcmalloc/chromium/src/stacktrace_win32-inl.h b/third_party/tcmalloc/chromium/src/stacktrace_win32-inl.h index 892cd7c..bbd4c43 100644 --- a/third_party/tcmalloc/chromium/src/stacktrace_win32-inl.h +++ b/third_party/tcmalloc/chromium/src/stacktrace_win32-inl.h @@ -49,6 +49,11 @@ // This code is inspired by a patch from David Vitek: // http://code.google.com/p/google-perftools/issues/detail?id=83 +#ifndef BASE_STACKTRACE_WIN32_INL_H_ +#define BASE_STACKTRACE_WIN32_INL_H_ +// Note: this file is included into stacktrace.cc more than once. +// Anything that should only be defined once should be here: + #include "config.h" #include <windows.h> // for GetProcAddress and GetModuleHandle #include <assert.h> @@ -82,3 +87,5 @@ PERFTOOLS_DLL_DECL int GetStackFrames(void** /* pcs */, assert(0 == "Not yet implemented"); return 0; } + +#endif // BASE_STACKTRACE_WIN32_INL_H_ diff --git a/third_party/tcmalloc/chromium/src/stacktrace_x86-inl.h b/third_party/tcmalloc/chromium/src/stacktrace_x86-inl.h index 05701e7..6753fdb 100644 --- a/third_party/tcmalloc/chromium/src/stacktrace_x86-inl.h +++ b/third_party/tcmalloc/chromium/src/stacktrace_x86-inl.h @@ -31,17 +31,13 @@ // Author: Sanjay Ghemawat // // Produce stack trace -// -// NOTE: there is code duplication between -// GetStackTrace, GetStackTraceWithContext, GetStackFrames and -// GetStackFramesWithContext. If you update one, update them all. -// -// There is no easy way to avoid this, because inlining -// interferes with skip_count, and there is no portable -// way to turn inlining off, or force it always on. -#include "config.h" +#ifndef BASE_STACKTRACE_X86_INL_H_ +#define BASE_STACKTRACE_X86_INL_H_ +// Note: this file is included into stacktrace.cc more than once. +// Anything that should only be defined once should be here: +#include "config.h" #include <stdlib.h> // for NULL #include <assert.h> #if defined(HAVE_SYS_UCONTEXT_H) @@ -190,8 +186,8 @@ static void **NextStackFrame(void **old_sp, const void *uc) { const ucontext_t *ucv = static_cast<const ucontext_t *>(uc); // This kernel does not use frame pointer in its VDSO code, // and so %ebp is not suitable for unwinding. - const void **const reg_ebp = - reinterpret_cast<const void **>(ucv->uc_mcontext.gregs[REG_EBP]); + void **const reg_ebp = + reinterpret_cast<void **>(ucv->uc_mcontext.gregs[REG_EBP]); const unsigned char *const reg_eip = reinterpret_cast<unsigned char *>(ucv->uc_mcontext.gregs[REG_EIP]); if (new_sp == reg_ebp && @@ -269,209 +265,24 @@ static void **NextStackFrame(void **old_sp, const void *uc) { return new_sp; } -// If you change this function, see NOTE at the top of file. -// Same as above, but with signal ucontext_t pointer. -int GetStackTraceWithContext(void** result, - int max_depth, - int skip_count, - const void *uc) { - void **sp; -#if (__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 2) || __llvm__ - // __builtin_frame_address(0) can return the wrong address on gcc-4.1.0-k8. - // It's always correct on llvm, and the techniques below aren't (in - // particular, llvm-gcc will make a copy of pcs, so it's not in sp[2]), - // so we also prefer __builtin_frame_address when running under llvm. - sp = reinterpret_cast<void**>(__builtin_frame_address(0)); -#elif defined(__i386__) - // Stack frame format: - // sp[0] pointer to previous frame - // sp[1] caller address - // sp[2] first argument - // ... - // NOTE: This will break under llvm, since result is a copy and not in sp[2] - sp = (void **)&result - 2; -#elif defined(__x86_64__) - unsigned long rbp; - // Move the value of the register %rbp into the local variable rbp. - // We need 'volatile' to prevent this instruction from getting moved - // around during optimization to before function prologue is done. - // An alternative way to achieve this - // would be (before this __asm__ instruction) to call Noop() defined as - // static void Noop() __attribute__ ((noinline)); // prevent inlining - // static void Noop() { asm(""); } // prevent optimizing-away - __asm__ volatile ("mov %%rbp, %0" : "=r" (rbp)); - // Arguments are passed in registers on x86-64, so we can't just - // offset from &result - sp = (void **) rbp; -#else -# error Using stacktrace_x86-inl.h on a non x86 architecture! -#endif - - int n = 0; - while (sp && n < max_depth) { - if (*(sp+1) == reinterpret_cast<void *>(0)) { - // In 64-bit code, we often see a frame that - // points to itself and has a return address of 0. - break; - } - if (skip_count > 0) { - skip_count--; - } else { - result[n++] = *(sp+1); - } - // Use strict unwinding rules. - sp = NextStackFrame<true, true>(sp, uc); - } - return n; -} - -int GetStackTrace(void** result, int max_depth, int skip_count) { - void **sp; -#if (__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 2) || __llvm__ - // __builtin_frame_address(0) can return the wrong address on gcc-4.1.0-k8. - // It's always correct on llvm, and the techniques below aren't (in - // particular, llvm-gcc will make a copy of pcs, so it's not in sp[2]), - // so we also prefer __builtin_frame_address when running under llvm. - sp = reinterpret_cast<void**>(__builtin_frame_address(0)); -#elif defined(__i386__) - // Stack frame format: - // sp[0] pointer to previous frame - // sp[1] caller address - // sp[2] first argument - // ... - // NOTE: This will break under llvm, since result is a copy and not in sp[2] - sp = (void **)&result - 2; -#elif defined(__x86_64__) - unsigned long rbp; - // Move the value of the register %rbp into the local variable rbp. - // We need 'volatile' to prevent this instruction from getting moved - // around during optimization to before function prologue is done. - // An alternative way to achieve this - // would be (before this __asm__ instruction) to call Noop() defined as - // static void Noop() __attribute__ ((noinline)); // prevent inlining - // static void Noop() { asm(""); } // prevent optimizing-away - __asm__ volatile ("mov %%rbp, %0" : "=r" (rbp)); - // Arguments are passed in registers on x86-64, so we can't just - // offset from &result - sp = (void **) rbp; -#else -# error Using stacktrace_x86-inl.h on a non x86 architecture! -#endif +#endif // BASE_STACKTRACE_X86_INL_H_ - int n = 0; - while (sp && n < max_depth) { - if (*(sp+1) == reinterpret_cast<void *>(0)) { - // In 64-bit code, we often see a frame that - // points to itself and has a return address of 0. - break; - } - if (skip_count > 0) { - skip_count--; - } else { - result[n++] = *(sp+1); - } - // Use strict unwinding rules. - sp = NextStackFrame<true, false>(sp, NULL); - } - return n; -} +// Note: this part of the file is included several times. +// Do not put globals below. -// If you change this function, see NOTE at the top of file. -// -// This GetStackFrames routine shares a lot of code with GetStackTrace -// above. This code could have been refactored into a common routine, -// and then both GetStackTrace/GetStackFrames could call that routine. -// There are two problems with that: +// The following 4 functions are generated from the code below: +// GetStack{Trace,Frames}() +// GetStack{Trace,Frames}WithContext() // -// (1) The performance of the refactored-code suffers substantially - the -// refactored needs to be able to record the stack trace when called -// from GetStackTrace, and both the stack trace and stack frame sizes, -// when called from GetStackFrames - this introduces enough new -// conditionals that GetStackTrace performance can degrade by as much -// as 50%. -// -// (2) Whether the refactored routine gets inlined into GetStackTrace and -// GetStackFrames depends on the compiler, and we can't guarantee the -// behavior either-way, even with "__attribute__ ((always_inline))" -// or "__attribute__ ((noinline))". But we need this guarantee or the -// frame counts may be off by one. -// -// Both (1) and (2) can be addressed without this code duplication, by -// clever use of template functions, and by defining GetStackTrace and -// GetStackFrames as macros that expand to these template functions. -// However, this approach comes with its own set of problems - namely, -// macros and preprocessor trouble - for example, if GetStackTrace -// and/or GetStackFrames is ever defined as a member functions in some -// class, we are in trouble. -int GetStackFrames(void** pcs, int* sizes, int max_depth, int skip_count) { - void **sp; -#if (__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 2) || __llvm__ - // __builtin_frame_address(0) can return the wrong address on gcc-4.1.0-k8. - // It's always correct on llvm, and the techniques below aren't (in - // particular, llvm-gcc will make a copy of pcs, so it's not in sp[2]), - // so we also prefer __builtin_frame_address when running under llvm. - sp = reinterpret_cast<void**>(__builtin_frame_address(0)); -#elif defined(__i386__) - // Stack frame format: - // sp[0] pointer to previous frame - // sp[1] caller address - // sp[2] first argument - // ... - sp = (void **)&pcs - 2; -#elif defined(__x86_64__) - unsigned long rbp; - // Move the value of the register %rbp into the local variable rbp. - // We need 'volatile' to prevent this instruction from getting moved - // around during optimization to before function prologue is done. - // An alternative way to achieve this - // would be (before this __asm__ instruction) to call Noop() defined as - // static void Noop() __attribute__ ((noinline)); // prevent inlining - // static void Noop() { asm(""); } // prevent optimizing-away - __asm__ volatile ("mov %%rbp, %0" : "=r" (rbp)); - // Arguments are passed in registers on x86-64, so we can't just - // offset from &result - sp = (void **) rbp; -#else -# error Using stacktrace_x86-inl.h on a non x86 architecture! -#endif - - int n = 0; - while (sp && n < max_depth) { - if (*(sp+1) == reinterpret_cast<void *>(0)) { - // In 64-bit code, we often see a frame that - // points to itself and has a return address of 0. - break; - } - // The GetStackFrames routine is called when we are in some - // informational context (the failure signal handler for example). - // Use the non-strict unwinding rules to produce a stack trace - // that is as complete as possible (even if it contains a few bogus - // entries in some rare cases). - void **next_sp = NextStackFrame<false, false>(sp, NULL); - if (skip_count > 0) { - skip_count--; - } else { - pcs[n] = *(sp+1); - if (next_sp > sp) { - sizes[n] = (uintptr_t)next_sp - (uintptr_t)sp; - } else { - // A frame-size of 0 is used to indicate unknown frame size. - sizes[n] = 0; - } - n++; - } - sp = next_sp; - } - return n; -} +// These functions take the following args: +// void** result: the stack-trace, as an array +// int* sizes: the size of each stack frame, as an array +// (GetStackFrames* only) +// int max_depth: the size of the result (and sizes) array(s) +// int skip_count: how many stack pointers to skip before storing in result +// void* ucp: a ucontext_t* (GetStack{Trace,Frames}WithContext only) -// If you change this function, see NOTE at the top of file. -// Same as above, but with signal ucontext_t pointer. -int GetStackFramesWithContext(void** pcs, - int* sizes, - int max_depth, - int skip_count, - const void *uc) { +int GET_STACK_TRACE_OR_FRAMES { void **sp; #if (__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 2) || __llvm__ // __builtin_frame_address(0) can return the wrong address on gcc-4.1.0-k8. @@ -511,22 +322,22 @@ int GetStackFramesWithContext(void** pcs, // points to itself and has a return address of 0. break; } - // The GetStackFrames routine is called when we are in some - // informational context (the failure signal handler for example). - // Use the non-strict unwinding rules to produce a stack trace - // that is as complete as possible (even if it contains a few bogus - // entries in some rare cases). - void **next_sp = NextStackFrame<false, true>(sp, uc); +#if !IS_WITH_CONTEXT + const void *const ucp = NULL; +#endif + void **next_sp = NextStackFrame<!IS_STACK_FRAMES, IS_WITH_CONTEXT>(sp, ucp); if (skip_count > 0) { skip_count--; } else { - pcs[n] = *(sp+1); + result[n] = *(sp+1); +#if IS_STACK_FRAMES if (next_sp > sp) { sizes[n] = (uintptr_t)next_sp - (uintptr_t)sp; } else { // A frame-size of 0 is used to indicate unknown frame size. sizes[n] = 0; } +#endif n++; } sp = next_sp; diff --git a/third_party/tcmalloc/chromium/src/symbolize.cc b/third_party/tcmalloc/chromium/src/symbolize.cc index 9dd890e..ff45e3e 100644 --- a/third_party/tcmalloc/chromium/src/symbolize.cc +++ b/third_party/tcmalloc/chromium/src/symbolize.cc @@ -87,16 +87,40 @@ int SymbolTable::Symbolize() { #else // All this work is to do two-way communication. ugh. extern char* program_invocation_name; // gcc provides this - int child_in[2]; // file descriptors - int child_out[2]; // for now, we don't worry about child_err - if (socketpair(AF_UNIX, SOCK_STREAM, 0, child_in) == -1) { - return 0; - } - if (socketpair(AF_UNIX, SOCK_STREAM, 0, child_out) == -1) { - close(child_in[0]); - close(child_in[1]); - return 0; + int *child_in = NULL; // file descriptors + int *child_out = NULL; // for now, we don't worry about child_err + int child_fds[5][2]; // socketpair may be called up to five times below + + // The client program may close its stdin and/or stdout and/or stderr + // thus allowing socketpair to reuse file descriptors 0, 1 or 2. + // In this case the communication between the forked processes may be broken + // if either the parent or the child tries to close or duplicate these + // descriptors. The loop below produces two pairs of file descriptors, each + // greater than 2 (stderr). + for (int i = 0; i < 5; i++) { + if (socketpair(AF_UNIX, SOCK_STREAM, 0, child_fds[i]) == -1) { + for (int j = 0; j < i; j++) { + close(child_fds[j][0]); + close(child_fds[j][1]); + return 0; + } + } else { + if ((child_fds[i][0] > 2) && (child_fds[i][1] > 2)) { + if (child_in == NULL) { + child_in = child_fds[i]; + } else { + child_out = child_fds[i]; + for (int j = 0; j < i; j++) { + if (child_fds[j] == child_in) continue; + close(child_fds[j][0]); + close(child_fds[j][1]); + } + break; + } + } + } } + switch (fork()) { case -1: { // error close(child_in[0]); diff --git a/third_party/tcmalloc/chromium/src/system-alloc.cc b/third_party/tcmalloc/chromium/src/system-alloc.cc index 21d9b43..29bed80 100644 --- a/third_party/tcmalloc/chromium/src/system-alloc.cc +++ b/third_party/tcmalloc/chromium/src/system-alloc.cc @@ -78,7 +78,7 @@ union MemoryAligner { void* p; double d; size_t s; -}; +} CACHELINE_ALIGNED; static SpinLock spinlock(SpinLock::LINKER_INITIALIZED); @@ -150,6 +150,10 @@ bool RegisterSystemAllocator(SysAllocator *a, int priority) { void* SbrkSysAllocator::Alloc(size_t size, size_t *actual_size, size_t alignment) { +#ifndef HAVE_SBRK + failed_ = true; + return NULL; +#else // Check if we should use sbrk allocation. // FLAGS_malloc_skip_sbrk starts out as false (its uninitialized // state) and eventually gets initialized to the specified value. Note @@ -164,16 +168,16 @@ void* SbrkSysAllocator::Alloc(size_t size, size_t *actual_size, // a strict check here if (static_cast<ptrdiff_t>(size + alignment) < 0) return NULL; - // could theoretically return the "extra" bytes here, but this - // is simple and correct. - if (actual_size) { - *actual_size = size; - } - // This doesn't overflow because TCMalloc_SystemAlloc has already // tested for overflow at the alignment boundary. size = ((size + alignment - 1) / alignment) * alignment; + // "actual_size" indicates that the bytes from the returned pointer + // p up to and including (p + actual_size - 1) have been allocated. + if (actual_size) { + *actual_size = size; + } + // Check that we we're not asking for so much more memory that we'd // wrap around the end of the virtual address space. (This seems // like something sbrk() should check for us, and indeed opensolaris @@ -216,6 +220,7 @@ void* SbrkSysAllocator::Alloc(size_t size, size_t *actual_size, ptr += alignment - (ptr & (alignment-1)); } return reinterpret_cast<void*>(ptr); +#endif // HAVE_SBRK } void SbrkSysAllocator::DumpStats(TCMalloc_Printer* printer) { @@ -238,12 +243,6 @@ void* MmapSysAllocator::Alloc(size_t size, size_t *actual_size, return NULL; } - // could theoretically return the "extra" bytes here, but this - // is simple and correct. - if (actual_size) { - *actual_size = size; - } - // Enforce page alignment if (pagesize == 0) pagesize = getpagesize(); if (alignment < pagesize) alignment = pagesize; @@ -253,6 +252,12 @@ void* MmapSysAllocator::Alloc(size_t size, size_t *actual_size, } size = aligned_size; + // "actual_size" indicates that the bytes from the returned pointer + // p up to and including (p + actual_size - 1) have been allocated. + if (actual_size) { + *actual_size = size; + } + // Ask for extra memory if alignment > pagesize size_t extra = 0; if (alignment > pagesize) { @@ -328,12 +333,6 @@ void* DevMemSysAllocator::Alloc(size_t size, size_t *actual_size, initialized = true; } - // could theoretically return the "extra" bytes here, but this - // is simple and correct. - if (actual_size) { - *actual_size = size; - } - // Enforce page alignment if (pagesize == 0) pagesize = getpagesize(); if (alignment < pagesize) alignment = pagesize; @@ -343,6 +342,12 @@ void* DevMemSysAllocator::Alloc(size_t size, size_t *actual_size, } size = aligned_size; + // "actual_size" indicates that the bytes from the returned pointer + // p up to and including (p + actual_size - 1) have been allocated. + if (actual_size) { + *actual_size = size; + } + // Ask for extra memory if alignment > pagesize size_t extra = 0; if (alignment > pagesize) { diff --git a/third_party/tcmalloc/chromium/src/system-alloc.h b/third_party/tcmalloc/chromium/src/system-alloc.h index 60affed..8d982ef 100644 --- a/third_party/tcmalloc/chromium/src/system-alloc.h +++ b/third_party/tcmalloc/chromium/src/system-alloc.h @@ -48,7 +48,11 @@ // may optionally return more bytes than asked for (i.e. return an // entire "huge" page if a huge page allocator is in use). // -// The returned pointer is a multiple of "alignment" if non-zero. +// The returned pointer is a multiple of "alignment" if non-zero. The +// returned pointer will always be aligned suitably for holding a +// void*, double, or size_t. In addition, if this platform defines +// CACHELINE_ALIGNED, the return pointer will always be cacheline +// aligned. // // Returns NULL when out of memory. extern void* TCMalloc_SystemAlloc(size_t bytes, size_t *actual_bytes, diff --git a/third_party/tcmalloc/chromium/src/tcmalloc.cc b/third_party/tcmalloc/chromium/src/tcmalloc.cc index 6acead8..79825ce 100644 --- a/third_party/tcmalloc/chromium/src/tcmalloc.cc +++ b/third_party/tcmalloc/chromium/src/tcmalloc.cc @@ -228,8 +228,9 @@ extern "C" { ATTRIBUTE_SECTION(google_malloc); void* tc_newarray_nothrow(size_t size, const std::nothrow_t&) __THROW ATTRIBUTE_SECTION(google_malloc); - // Surprisingly, compilers use a nothrow-delete internally. See, eg: - // http://www.dinkumware.com/manuals/?manual=compleat&page=new.html + // Surprisingly, standard C++ library implementations use a + // nothrow-delete internally. See, eg: + // http://www.dinkumware.com/manuals/?manual=compleat&page=new.html void tc_delete_nothrow(void* ptr, const std::nothrow_t&) __THROW ATTRIBUTE_SECTION(google_malloc); void tc_deletearray_nothrow(void* ptr, const std::nothrow_t&) __THROW @@ -253,9 +254,9 @@ extern "C" { // NOTE: we make many of these symbols weak, but do so in the makefile // (via objcopy -W) and not here. That ends up being more portable. # define ALIAS(x) __attribute__ ((alias (x))) -void* operator new(size_t size) ALIAS("tc_new"); +void* operator new(size_t size) throw (std::bad_alloc) ALIAS("tc_new"); void operator delete(void* p) __THROW ALIAS("tc_delete"); -void* operator new[](size_t size) ALIAS("tc_newarray"); +void* operator new[](size_t size) throw (std::bad_alloc) ALIAS("tc_newarray"); void operator delete[](void* p) __THROW ALIAS("tc_deletearray"); void* operator new(size_t size, const std::nothrow_t&) __THROW ALIAS("tc_new_nothrow"); @@ -264,7 +265,7 @@ void* operator new[](size_t size, const std::nothrow_t&) __THROW void operator delete(void* size, const std::nothrow_t&) __THROW ALIAS("tc_delete_nothrow"); void operator delete[](void* size, const std::nothrow_t&) __THROW - ALIAS("tc_deletearray_nothrow"); + ALIAS("tc_deletearray_nothrow"); extern "C" { void* malloc(size_t size) __THROW ALIAS("tc_malloc"); void free(void* ptr) __THROW ALIAS("tc_free"); @@ -804,7 +805,17 @@ TCMallocGuard::TCMallocGuard() { tc_free(tc_malloc(1)); ThreadCache::InitTSD(); tc_free(tc_malloc(1)); - MallocExtension::Register(new TCMallocImplementation); + // Either we, or debugallocation.cc, or valgrind will control memory + // management. We register our extension if we're the winner. +#ifdef TCMALLOC_FOR_DEBUGALLOCATION + // Let debugallocation register its extension. +#else + if (RunningOnValgrind()) { + // Let Valgrind uses its own malloc (so don't register our extension). + } else { + MallocExtension::Register(new TCMallocImplementation); + } +#endif } } @@ -826,7 +837,28 @@ static TCMallocGuard module_enter_exit_hook; // Helpers for the exported routines below //------------------------------------------------------------------- -static Span* DoSampledAllocation(size_t size) { +static inline void* CheckedMallocResult(void *result) { + Span* fetched_span; + size_t cl; + + if (result != NULL) { + ASSERT(Static::pageheap()->GetSizeClassOrSpan(result, &cl, &fetched_span)); + } + + return result; +} + +static inline void* SpanToMallocResult(Span *span) { + Span* fetched_span = NULL; + size_t cl = 0; + ASSERT(Static::pageheap()->GetSizeClassOrSpan(span->start_ptr(), + &cl, &fetched_span)); + ASSERT(cl == kLargeSizeClass); + ASSERT(span == fetched_span); + return span->start_ptr(); +} + +static void* DoSampledAllocation(size_t size) { // Grab the stack trace outside the heap lock StackTrace tmp; tmp.depth = GetStackTrace(tmp.stack, tcmalloc::kMaxStackDepth, 1); @@ -834,7 +866,8 @@ static Span* DoSampledAllocation(size_t size) { SpinLockHolder h(Static::pageheap_lock()); // Allocate span - Span *span = Static::pageheap()->New(tcmalloc::pages(size == 0 ? 1 : size)); + Span *span = Static::pageheap()->New(tcmalloc::pages(size == 0 ? 1 : size), + kLargeSizeClass, kPageSize); if (span == NULL) { return NULL; } @@ -851,26 +884,7 @@ static Span* DoSampledAllocation(size_t size) { span->objects = stack; tcmalloc::DLL_Prepend(Static::sampled_objects(), span); - return span; -} - -static inline bool CheckCachedSizeClass(void *ptr) { - PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift; - size_t cached_value = Static::pageheap()->GetSizeClassIfCached(p); - return cached_value == 0 || - cached_value == Static::pageheap()->GetDescriptor(p)->sizeclass; -} - -static inline void* CheckedMallocResult(void *result) -{ - ASSERT(result == 0 || CheckCachedSizeClass(result)); - return result; -} - -static inline void* SpanToMallocResult(Span *span) { - Static::pageheap()->CacheSizeClass(span->start, 0); - return - CheckedMallocResult(reinterpret_cast<void*>(span->start << kPageShift)); + return SpanToMallocResult(span); } // Copy of FLAGS_tcmalloc_large_alloc_report_threshold with @@ -916,24 +930,39 @@ inline void* do_memalign_or_cpp_memalign(size_t align, size_t size) { return tc_new_mode ? cpp_memalign(align, size) : do_memalign(align, size); } +// Must be called with the page lock held. +inline bool should_report_large(Length num_pages) { + const int64 threshold = large_alloc_threshold; + if (threshold > 0 && num_pages >= (threshold >> kPageShift)) { + // Increase the threshold by 1/8 every time we generate a report. + // We cap the threshold at 8GB to avoid overflow problems. + large_alloc_threshold = (threshold + threshold/8 < 8ll<<30 + ? threshold + threshold/8 : 8ll<<30); + return true; + } + return false; +} + // Helper for do_malloc(). -inline void* do_malloc_pages(Length num_pages) { - Span *span; - bool report_large = false; - { +inline void* do_malloc_pages(ThreadCache* heap, size_t size) { + void* result; + bool report_large; + + Length num_pages = tcmalloc::pages(size); + size = num_pages << kPageShift; + + if ((FLAGS_tcmalloc_sample_parameter > 0) && heap->SampleAllocation(size)) { + result = DoSampledAllocation(size); + SpinLockHolder h(Static::pageheap_lock()); - span = Static::pageheap()->New(num_pages); - const int64 threshold = large_alloc_threshold; - if (threshold > 0 && num_pages >= (threshold >> kPageShift)) { - // Increase the threshold by 1/8 every time we generate a report. - // We cap the threshold at 8GB to avoid overflow problems. - large_alloc_threshold = (threshold + threshold/8 < 8ll<<30 - ? threshold + threshold/8 : 8ll<<30); - report_large = true; - } + report_large = should_report_large(num_pages); + } else { + SpinLockHolder h(Static::pageheap_lock()); + Span* span = Static::pageheap()->New(num_pages, kLargeSizeClass, kPageSize); + result = (span == NULL ? NULL : SpanToMallocResult(span)); + report_large = should_report_large(num_pages); } - void* result = (span == NULL ? NULL : SpanToMallocResult(span)); if (report_large) { ReportLargeAlloc(num_pages, result); } @@ -945,17 +974,19 @@ inline void* do_malloc(size_t size) { // The following call forces module initialization ThreadCache* heap = ThreadCache::GetCache(); - if ((FLAGS_tcmalloc_sample_parameter > 0) && heap->SampleAllocation(size)) { - Span* span = DoSampledAllocation(size); - if (span != NULL) { - ret = SpanToMallocResult(span); + if (size <= kMaxSize) { + size_t cl = Static::sizemap()->SizeClass(size); + size = Static::sizemap()->class_to_size(cl); + + if ((FLAGS_tcmalloc_sample_parameter > 0) && heap->SampleAllocation(size)) { + ret = DoSampledAllocation(size); + } else { + // The common case, and also the simplest. This just pops the + // size-appropriate freelist, after replenishing it if it's empty. + ret = CheckedMallocResult(heap->Allocate(size, cl)); } - } else if (size <= kMaxSize) { - // The common case, and also the simplest. This just pops the - // size-appropriate freelist, after replenishing it if it's empty. - ret = CheckedMallocResult(heap->Allocate(size)); } else { - ret = do_malloc_pages(tcmalloc::pages(size)); + ret = do_malloc_pages(heap, size); } if (ret == NULL) errno = ENOMEM; return ret; @@ -983,28 +1014,22 @@ static inline ThreadCache* GetCacheIfPresent() { inline void do_free_with_callback(void* ptr, void (*invalid_free_fn)(void*)) { if (ptr == NULL) return; ASSERT(Static::pageheap() != NULL); // Should not call free() before malloc() - const PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift; - Span* span = NULL; - size_t cl = Static::pageheap()->GetSizeClassIfCached(p); - - if (cl == 0) { - span = Static::pageheap()->GetDescriptor(p); - if (!span) { - // span can be NULL because the pointer passed in is invalid - // (not something returned by malloc or friends), or because the - // pointer was allocated with some other allocator besides - // tcmalloc. The latter can happen if tcmalloc is linked in via - // a dynamic library, but is not listed last on the link line. - // In that case, libraries after it on the link line will - // allocate with libc malloc, but free with tcmalloc's free. - (*invalid_free_fn)(ptr); // Decide how to handle the bad free request - return; - } - cl = span->sizeclass; - Static::pageheap()->CacheSizeClass(p, cl); + Span* span; + size_t cl; + + if (!Static::pageheap()->GetSizeClassOrSpan(ptr, &cl, &span)) { + // result can be false because the pointer passed in is invalid + // (not something returned by malloc or friends), or because the + // pointer was allocated with some other allocator besides + // tcmalloc. The latter can happen if tcmalloc is linked in via + // a dynamic library, but is not listed last on the link line. + // In that case, libraries after it on the link line will + // allocate with libc malloc, but free with tcmalloc's free. + (*invalid_free_fn)(ptr); // Decide how to handle the bad free request + return; } - if (cl != 0) { - ASSERT(!Static::pageheap()->GetDescriptor(p)->sample); + + if (cl != kLargeSizeClass) { ThreadCache* heap = GetCacheIfPresent(); if (heap != NULL) { heap->Deallocate(ptr, cl); @@ -1015,8 +1040,7 @@ inline void do_free_with_callback(void* ptr, void (*invalid_free_fn)(void*)) { } } else { SpinLockHolder h(Static::pageheap_lock()); - ASSERT(reinterpret_cast<uintptr_t>(ptr) % kPageSize == 0); - ASSERT(span != NULL && span->start == p); + ASSERT(span != NULL && ptr == span->start_ptr()); if (span->sample) { tcmalloc::DLL_Remove(span); Static::stacktrace_allocator()->Delete( @@ -1036,20 +1060,17 @@ inline size_t GetSizeWithCallback(void* ptr, size_t (*invalid_getsize_fn)(void*)) { if (ptr == NULL) return 0; - const PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift; - size_t cl = Static::pageheap()->GetSizeClassIfCached(p); - if (cl != 0) { + + Span* span; + size_t cl; + if (!Static::pageheap()->GetSizeClassOrSpan(ptr, &cl, &span)) { + return (*invalid_getsize_fn)(ptr); + } + + if (cl != kLargeSizeClass) { return Static::sizemap()->ByteSizeForClass(cl); } else { - Span *span = Static::pageheap()->GetDescriptor(p); - if (span == NULL) { // means we do not own this memory - return (*invalid_getsize_fn)(ptr); - } else if (span->sizeclass != 0) { - Static::pageheap()->CacheSizeClass(p, span->sizeclass); - return Static::sizemap()->ByteSizeForClass(span->sizeclass); - } else { - return span->length << kPageShift; - } + return span->length << kPageShift; } } @@ -1136,47 +1157,18 @@ void* do_memalign(size_t align, size_t size) { } if (cl < kNumClasses) { ThreadCache* heap = ThreadCache::GetCache(); - return CheckedMallocResult(heap->Allocate( - Static::sizemap()->class_to_size(cl))); + size = Static::sizemap()->class_to_size(cl); + return CheckedMallocResult(heap->Allocate(size, cl)); } } // We will allocate directly from the page heap SpinLockHolder h(Static::pageheap_lock()); - if (align <= kPageSize) { - // Any page-level allocation will be fine - // TODO: We could put the rest of this page in the appropriate - // TODO: cache but it does not seem worth it. - Span* span = Static::pageheap()->New(tcmalloc::pages(size)); - return span == NULL ? NULL : SpanToMallocResult(span); - } - - // Allocate extra pages and carve off an aligned portion - const Length alloc = tcmalloc::pages(size + align); - Span* span = Static::pageheap()->New(alloc); - if (span == NULL) return NULL; - - // Skip starting portion so that we end up aligned - Length skip = 0; - while ((((span->start+skip) << kPageShift) & (align - 1)) != 0) { - skip++; - } - ASSERT(skip < alloc); - if (skip > 0) { - Span* rest = Static::pageheap()->Split(span, skip); - Static::pageheap()->Delete(span); - span = rest; - } - - // Skip trailing portion that we do not need to return - const Length needed = tcmalloc::pages(size); - ASSERT(span->length >= needed); - if (span->length > needed) { - Span* trailer = Static::pageheap()->Split(span, needed); - Static::pageheap()->Delete(trailer); - } - return SpanToMallocResult(span); + // Any page-level allocation will be fine + Span* span = Static::pageheap()->New(tcmalloc::pages(size), + kLargeSizeClass, align); + return span == NULL ? NULL : SpanToMallocResult(span); } // Helpers for use by exported routines below: @@ -1392,8 +1384,7 @@ extern "C" PERFTOOLS_DLL_DECL void* tc_new(size_t size) { return p; } -extern "C" PERFTOOLS_DLL_DECL void* tc_new_nothrow( - size_t size, const std::nothrow_t&) __THROW { +extern "C" PERFTOOLS_DLL_DECL void* tc_new_nothrow(size_t size, const std::nothrow_t&) __THROW { void* p = cpp_alloc(size, true); MallocHook::InvokeNewHook(p, size); return p; @@ -1404,10 +1395,10 @@ extern "C" PERFTOOLS_DLL_DECL void tc_delete(void* p) __THROW { do_free(p); } -// Compilers define and use this (via ::operator delete(ptr, nothrow)). +// Standard C++ library implementations define and use this +// (via ::operator delete(ptr, nothrow)). // But it's really the same as normal delete, so we just do the same thing. -extern "C" PERFTOOLS_DLL_DECL void tc_delete_nothrow( - void* p, const std::nothrow_t&) __THROW { +extern "C" PERFTOOLS_DLL_DECL void tc_delete_nothrow(void* p, const std::nothrow_t&) __THROW { MallocHook::InvokeDeleteHook(p); do_free(p); } @@ -1423,8 +1414,8 @@ extern "C" PERFTOOLS_DLL_DECL void* tc_newarray(size_t size) { return p; } -extern "C" PERFTOOLS_DLL_DECL void* tc_newarray_nothrow( - size_t size, const std::nothrow_t&) __THROW { +extern "C" PERFTOOLS_DLL_DECL void* tc_newarray_nothrow(size_t size, const std::nothrow_t&) + __THROW { void* p = cpp_alloc(size, true); MallocHook::InvokeNewHook(p, size); return p; @@ -1435,8 +1426,7 @@ extern "C" PERFTOOLS_DLL_DECL void tc_deletearray(void* p) __THROW { do_free(p); } -extern "C" PERFTOOLS_DLL_DECL void tc_deletearray_nothrow( - void* p, const std::nothrow_t&) __THROW { +extern "C" PERFTOOLS_DLL_DECL void tc_deletearray_nothrow(void* p, const std::nothrow_t&) __THROW { MallocHook::InvokeDeleteHook(p); do_free(p); } diff --git a/third_party/tcmalloc/chromium/src/tests/debugallocation_test.cc b/third_party/tcmalloc/chromium/src/tests/debugallocation_test.cc index ca00e36..c482187 100644 --- a/third_party/tcmalloc/chromium/src/tests/debugallocation_test.cc +++ b/third_party/tcmalloc/chromium/src/tests/debugallocation_test.cc @@ -75,7 +75,14 @@ static int test_counter = 0; // incremented every time the macro is called // This flag won't be compiled in in opt mode. DECLARE_int32(max_free_queue_size); +// Test match as well as mismatch rules: TEST(DebugAllocationTest, DeallocMismatch) { + // malloc can be matched only by free + // new can be matched only by delete and delete(nothrow) + // new[] can be matched only by delete[] and delete[](nothrow) + // new(nothrow) can be matched only by delete and delete(nothrow) + // new(nothrow)[] can be matched only by delete[] and delete[](nothrow) + // Allocate with malloc. { int* x = static_cast<int*>(malloc(sizeof(*x))); @@ -88,17 +95,41 @@ TEST(DebugAllocationTest, DeallocMismatch) { // Allocate with new. { int* x = new int; + int* y = new int; IF_DEBUG_EXPECT_DEATH(free(x), "mismatch.*being dealloc.*free"); IF_DEBUG_EXPECT_DEATH(delete [] x, "mismatch.*being dealloc.*delete *[[]"); delete x; + ::operator delete(y, std::nothrow); } // Allocate with new[]. { int* x = new int[1]; + int* y = new int[1]; + IF_DEBUG_EXPECT_DEATH(free(x), "mismatch.*being dealloc.*free"); + IF_DEBUG_EXPECT_DEATH(delete x, "mismatch.*being dealloc.*delete"); + delete [] x; + ::operator delete[](y, std::nothrow); + } + + // Allocate with new(nothrow). + { + int* x = new(std::nothrow) int; + int* y = new(std::nothrow) int; + IF_DEBUG_EXPECT_DEATH(free(x), "mismatch.*being dealloc.*free"); + IF_DEBUG_EXPECT_DEATH(delete [] x, "mismatch.*being dealloc.*delete *[[]"); + delete x; + ::operator delete(y, std::nothrow); + } + + // Allocate with new(nothrow)[]. + { + int* x = new(std::nothrow) int[1]; + int* y = new(std::nothrow) int[1]; IF_DEBUG_EXPECT_DEATH(free(x), "mismatch.*being dealloc.*free"); IF_DEBUG_EXPECT_DEATH(delete x, "mismatch.*being dealloc.*delete"); delete [] x; + ::operator delete[](y, std::nothrow); } } diff --git a/third_party/tcmalloc/chromium/src/tests/heap-checker-death_unittest.sh b/third_party/tcmalloc/chromium/src/tests/heap-checker-death_unittest.sh index 9f0c08c..4a83fc2 100644 --- a/third_party/tcmalloc/chromium/src/tests/heap-checker-death_unittest.sh +++ b/third_party/tcmalloc/chromium/src/tests/heap-checker-death_unittest.sh @@ -139,13 +139,13 @@ EARLY_MSG="Starting tracking the heap$" Test 60 0 "$EARLY_MSG" "" \ HEAPCHECK="" HEAP_CHECKER_TEST_TEST_LEAK=1 HEAP_CHECKER_TEST_NO_THREADS=1 \ - PERFTOOLS_VERBOSE=1 || exit 5 + PERFTOOLS_VERBOSE=10 || exit 5 Test 60 0 "MemoryRegionMap Init$" "" \ HEAPCHECK="" HEAP_CHECKER_TEST_TEST_LEAK=1 HEAP_CHECKER_TEST_NO_THREADS=1 \ - PERFTOOLS_VERBOSE=2 || exit 6 + PERFTOOLS_VERBOSE=11 || exit 6 Test 60 0 "" "$EARLY_MSG" \ HEAPCHECK="" HEAP_CHECKER_TEST_TEST_LEAK=1 HEAP_CHECKER_TEST_NO_THREADS=1 \ - PERFTOOLS_VERBOSE=-2 || exit 7 + PERFTOOLS_VERBOSE=-11 || exit 7 # These invocations should fail with very high probability, # rather than return 0 or hang (1 == exit(1), 134 == abort(), 139 = SIGSEGV): @@ -162,10 +162,10 @@ Test 60 1 "MakeALeak" "" \ # Test that very early log messages are present and controllable: Test 60 1 "Starting tracking the heap$" "" \ - HEAP_CHECKER_TEST_TEST_LEAK=1 HEAP_CHECKER_TEST_NO_THREADS=1 PERFTOOLS_VERBOSE=1 \ + HEAP_CHECKER_TEST_TEST_LEAK=1 HEAP_CHECKER_TEST_NO_THREADS=1 PERFTOOLS_VERBOSE=10 \ || exit 11 Test 60 1 "" "Starting tracking the heap" \ - HEAP_CHECKER_TEST_TEST_LEAK=1 HEAP_CHECKER_TEST_NO_THREADS=1 PERFTOOLS_VERBOSE=-1 \ + HEAP_CHECKER_TEST_TEST_LEAK=1 HEAP_CHECKER_TEST_NO_THREADS=1 PERFTOOLS_VERBOSE=-10 \ || exit 12 cd / # so we're not in TMPDIR when we delete it diff --git a/third_party/tcmalloc/chromium/src/tests/page_heap_test.cc b/third_party/tcmalloc/chromium/src/tests/page_heap_test.cc index 9120b78..fd444da 100644 --- a/third_party/tcmalloc/chromium/src/tests/page_heap_test.cc +++ b/third_party/tcmalloc/chromium/src/tests/page_heap_test.cc @@ -26,7 +26,7 @@ static void TestPageHeap_Stats() { CheckStats(ph, 0, 0, 0); // Allocate a span 's1' - tcmalloc::Span* s1 = ph->New(256); + tcmalloc::Span* s1 = ph->New(256, kLargeSizeClass, kPageSize); CheckStats(ph, 256, 0, 0); // Split span 's1' into 's1', 's2'. Delete 's2' diff --git a/third_party/tcmalloc/chromium/src/tests/profiler_unittest.cc b/third_party/tcmalloc/chromium/src/tests/profiler_unittest.cc index 1908b03..19371b7 100644 --- a/third_party/tcmalloc/chromium/src/tests/profiler_unittest.cc +++ b/third_party/tcmalloc/chromium/src/tests/profiler_unittest.cc @@ -56,12 +56,11 @@ static void test_other_thread() { int i, m; char b[128]; + MutexLock ml(&mutex); for (m = 0; m < 1000000; ++m) { // run millions of times for (i = 0; i < g_iters; ++i ) { - MutexLock ml(&mutex); result ^= i; } - MutexLock ml(&mutex); snprintf(b, sizeof(b), "%d", result); // get some libc action } #endif @@ -70,12 +69,11 @@ static void test_other_thread() { static void test_main_thread() { int i, m; char b[128]; + MutexLock ml(&mutex); for (m = 0; m < 1000000; ++m) { // run millions of times for (i = 0; i < g_iters; ++i ) { - MutexLock ml(&mutex); result ^= i; } - MutexLock ml(&mutex); snprintf(b, sizeof(b), "%d", result); // get some libc action } } diff --git a/third_party/tcmalloc/chromium/src/tests/profiler_unittest.sh b/third_party/tcmalloc/chromium/src/tests/profiler_unittest.sh index 5766f2e..4668fa7 100644 --- a/third_party/tcmalloc/chromium/src/tests/profiler_unittest.sh +++ b/third_party/tcmalloc/chromium/src/tests/profiler_unittest.sh @@ -206,28 +206,27 @@ CPUPROFILE="$TMPDIR/p5" "$PROFILER2" 50 || RegisterFailure CPUPROFILE="$TMPDIR/p6" "$PROFILER2" 100 || RegisterFailure VerifySimilar p5 "$PROFILER2_REALNAME" p6 "$PROFILER2_REALNAME" 2 -# When we compile with threads, things take a lot longer even when we only use 1 -CPUPROFILE="$TMPDIR/p5b" "$PROFILER3" 10 || RegisterFailure -CPUPROFILE="$TMPDIR/p5c" "$PROFILER3" 20 || RegisterFailure +CPUPROFILE="$TMPDIR/p5b" "$PROFILER3" 30 || RegisterFailure +CPUPROFILE="$TMPDIR/p5c" "$PROFILER3" 60 || RegisterFailure VerifySimilar p5b "$PROFILER3_REALNAME" p5c "$PROFILER3_REALNAME" 2 # Now try what happens when we use threads -"$PROFILER3" 5 2 "$TMPDIR/p7" || RegisterFailure -"$PROFILER3" 10 2 "$TMPDIR/p8" || RegisterFailure +"$PROFILER3" 30 2 "$TMPDIR/p7" || RegisterFailure +"$PROFILER3" 60 2 "$TMPDIR/p8" || RegisterFailure VerifySimilar p7 "$PROFILER3_REALNAME" p8 "$PROFILER3_REALNAME" 2 -"$PROFILER4" 5 2 "$TMPDIR/p9" || RegisterFailure -"$PROFILER4" 10 2 "$TMPDIR/p10" || RegisterFailure +"$PROFILER4" 30 2 "$TMPDIR/p9" || RegisterFailure +"$PROFILER4" 60 2 "$TMPDIR/p10" || RegisterFailure VerifySimilar p9 "$PROFILER4_REALNAME" p10 "$PROFILER4_REALNAME" 2 # More threads! -"$PROFILER4" 2 3 "$TMPDIR/p9" || RegisterFailure -"$PROFILER4" 4 3 "$TMPDIR/p10" || RegisterFailure +"$PROFILER4" 25 3 "$TMPDIR/p9" || RegisterFailure +"$PROFILER4" 50 3 "$TMPDIR/p10" || RegisterFailure VerifySimilar p9 "$PROFILER4_REALNAME" p10 "$PROFILER4_REALNAME" 2 # Compare how much time the main thread takes compared to the other threads # Recall the main thread runs twice as long as the other threads, by design. -"$PROFILER4" 2 4 "$TMPDIR/p11" || RegisterFailure +"$PROFILER4" 20 4 "$TMPDIR/p11" || RegisterFailure VerifyAcrossThreads p11 "$PROFILER4_REALNAME" 2 # Test symbol save and restore @@ -236,14 +235,14 @@ VerifyAcrossThreads p11 "$PROFILER4_REALNAME" 2 >"$TMPDIR/p13" 2>/dev/null || RegisterFailure VerifyIdentical p12 "$PROFILER1_REALNAME" p13 "" || RegisterFailure -"$PROFILER3" 5 2 "$TMPDIR/p14" || RegisterFailure +"$PROFILER3" 30 2 "$TMPDIR/p14" || RegisterFailure "$PPROF" $PPROF_FLAGS "$PROFILER3_REALNAME" "$TMPDIR/p14" --raw \ >"$TMPDIR/p15" 2>/dev/null || RegisterFailure VerifyIdentical p14 "$PROFILER3_REALNAME" p15 "" || RegisterFailure # Test using ITIMER_REAL instead of ITIMER_PROF. -env CPUPROFILE_REALTIME=1 "$PROFILER3" 5 2 "$TMPDIR/p16" || RegisterFailure -env CPUPROFILE_REALTIME=1 "$PROFILER3" 10 2 "$TMPDIR/p17" || RegisterFailure +env CPUPROFILE_REALTIME=1 "$PROFILER3" 30 2 "$TMPDIR/p16" || RegisterFailure +env CPUPROFILE_REALTIME=1 "$PROFILER3" 60 2 "$TMPDIR/p17" || RegisterFailure VerifySimilar p16 "$PROFILER3_REALNAME" p17 "$PROFILER3_REALNAME" 2 diff --git a/third_party/tcmalloc/chromium/src/tests/tcmalloc_unittest.cc b/third_party/tcmalloc/chromium/src/tests/tcmalloc_unittest.cc index 25bfd6a..6b2ec26 100644 --- a/third_party/tcmalloc/chromium/src/tests/tcmalloc_unittest.cc +++ b/third_party/tcmalloc/chromium/src/tests/tcmalloc_unittest.cc @@ -977,7 +977,7 @@ static int RunAllTests(int argc, char** argv) { } // This code stresses some of the memory allocation via STL. - // In particular, it calls operator delete(void*, nothrow_t). + // It may call operator delete(void*, nothrow_t). fprintf(LOGSTREAM, "Testing STL use\n"); { std::vector<int> v; diff --git a/third_party/tcmalloc/chromium/src/third_party/valgrind.h b/third_party/tcmalloc/chromium/src/third_party/valgrind.h new file mode 100644 index 0000000..577c59a --- /dev/null +++ b/third_party/tcmalloc/chromium/src/third_party/valgrind.h @@ -0,0 +1,3924 @@ +/* -*- c -*- + ---------------------------------------------------------------- + + Notice that the following BSD-style license applies to this one + file (valgrind.h) only. The rest of Valgrind is licensed under the + terms of the GNU General Public License, version 2, unless + otherwise indicated. See the COPYING file in the source + distribution for details. + + ---------------------------------------------------------------- + + This file is part of Valgrind, a dynamic binary instrumentation + framework. + + Copyright (C) 2000-2008 Julian Seward. All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. The origin of this software must not be misrepresented; you must + not claim that you wrote the original software. If you use this + software in a product, an acknowledgment in the product + documentation would be appreciated but is not required. + + 3. Altered source versions must be plainly marked as such, and must + not be misrepresented as being the original software. + + 4. The name of the author may not be used to endorse or promote + products derived from this software without specific prior written + permission. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS + OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE + GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + ---------------------------------------------------------------- + + Notice that the above BSD-style license applies to this one file + (valgrind.h) only. The entire rest of Valgrind is licensed under + the terms of the GNU General Public License, version 2. See the + COPYING file in the source distribution for details. + + ---------------------------------------------------------------- +*/ + + +/* This file is for inclusion into client (your!) code. + + You can use these macros to manipulate and query Valgrind's + execution inside your own programs. + + The resulting executables will still run without Valgrind, just a + little bit more slowly than they otherwise would, but otherwise + unchanged. When not running on valgrind, each client request + consumes very few (eg. 7) instructions, so the resulting performance + loss is negligible unless you plan to execute client requests + millions of times per second. Nevertheless, if that is still a + problem, you can compile with the NVALGRIND symbol defined (gcc + -DNVALGRIND) so that client requests are not even compiled in. */ + +#ifndef __VALGRIND_H +#define __VALGRIND_H + +#include <stdarg.h> + +/* Nb: this file might be included in a file compiled with -ansi. So + we can't use C++ style "//" comments nor the "asm" keyword (instead + use "__asm__"). */ + +/* Derive some tags indicating what the target platform is. Note + that in this file we're using the compiler's CPP symbols for + identifying architectures, which are different to the ones we use + within the rest of Valgrind. Note, __powerpc__ is active for both + 32 and 64-bit PPC, whereas __powerpc64__ is only active for the + latter (on Linux, that is). */ +#undef PLAT_x86_linux +#undef PLAT_amd64_linux +#undef PLAT_ppc32_linux +#undef PLAT_ppc64_linux +#undef PLAT_ppc32_aix5 +#undef PLAT_ppc64_aix5 + +#if !defined(_AIX) && defined(__i386__) +# define PLAT_x86_linux 1 +#elif !defined(_AIX) && defined(__x86_64__) +# define PLAT_amd64_linux 1 +#elif !defined(_AIX) && defined(__powerpc__) && !defined(__powerpc64__) +# define PLAT_ppc32_linux 1 +#elif !defined(_AIX) && defined(__powerpc__) && defined(__powerpc64__) +# define PLAT_ppc64_linux 1 +#elif defined(_AIX) && defined(__64BIT__) +# define PLAT_ppc64_aix5 1 +#elif defined(_AIX) && !defined(__64BIT__) +# define PLAT_ppc32_aix5 1 +#endif + + +/* If we're not compiling for our target platform, don't generate + any inline asms. */ +#if !defined(PLAT_x86_linux) && !defined(PLAT_amd64_linux) \ + && !defined(PLAT_ppc32_linux) && !defined(PLAT_ppc64_linux) \ + && !defined(PLAT_ppc32_aix5) && !defined(PLAT_ppc64_aix5) +# if !defined(NVALGRIND) +# define NVALGRIND 1 +# endif +#endif + + +/* ------------------------------------------------------------------ */ +/* ARCHITECTURE SPECIFICS for SPECIAL INSTRUCTIONS. There is nothing */ +/* in here of use to end-users -- skip to the next section. */ +/* ------------------------------------------------------------------ */ + +#if defined(NVALGRIND) + +/* Define NVALGRIND to completely remove the Valgrind magic sequence + from the compiled code (analogous to NDEBUG's effects on + assert()) */ +#define VALGRIND_DO_CLIENT_REQUEST( \ + _zzq_rlval, _zzq_default, _zzq_request, \ + _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5) \ + { \ + (_zzq_rlval) = (_zzq_default); \ + } + +#else /* ! NVALGRIND */ + +/* The following defines the magic code sequences which the JITter + spots and handles magically. Don't look too closely at them as + they will rot your brain. + + The assembly code sequences for all architectures is in this one + file. This is because this file must be stand-alone, and we don't + want to have multiple files. + + For VALGRIND_DO_CLIENT_REQUEST, we must ensure that the default + value gets put in the return slot, so that everything works when + this is executed not under Valgrind. Args are passed in a memory + block, and so there's no intrinsic limit to the number that could + be passed, but it's currently five. + + The macro args are: + _zzq_rlval result lvalue + _zzq_default default value (result returned when running on real CPU) + _zzq_request request code + _zzq_arg1..5 request params + + The other two macros are used to support function wrapping, and are + a lot simpler. VALGRIND_GET_NR_CONTEXT returns the value of the + guest's NRADDR pseudo-register and whatever other information is + needed to safely run the call original from the wrapper: on + ppc64-linux, the R2 value at the divert point is also needed. This + information is abstracted into a user-visible type, OrigFn. + + VALGRIND_CALL_NOREDIR_* behaves the same as the following on the + guest, but guarantees that the branch instruction will not be + redirected: x86: call *%eax, amd64: call *%rax, ppc32/ppc64: + branch-and-link-to-r11. VALGRIND_CALL_NOREDIR is just text, not a + complete inline asm, since it needs to be combined with more magic + inline asm stuff to be useful. +*/ + +/* ------------------------- x86-linux ------------------------- */ + +#if defined(PLAT_x86_linux) + +typedef + struct { + unsigned int nraddr; /* where's the code? */ + } + OrigFn; + +#define __SPECIAL_INSTRUCTION_PREAMBLE \ + "roll $3, %%edi ; roll $13, %%edi\n\t" \ + "roll $29, %%edi ; roll $19, %%edi\n\t" + +#define VALGRIND_DO_CLIENT_REQUEST( \ + _zzq_rlval, _zzq_default, _zzq_request, \ + _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5) \ + { volatile unsigned int _zzq_args[6]; \ + volatile unsigned int _zzq_result; \ + _zzq_args[0] = (unsigned int)(_zzq_request); \ + _zzq_args[1] = (unsigned int)(_zzq_arg1); \ + _zzq_args[2] = (unsigned int)(_zzq_arg2); \ + _zzq_args[3] = (unsigned int)(_zzq_arg3); \ + _zzq_args[4] = (unsigned int)(_zzq_arg4); \ + _zzq_args[5] = (unsigned int)(_zzq_arg5); \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ + /* %EDX = client_request ( %EAX ) */ \ + "xchgl %%ebx,%%ebx" \ + : "=d" (_zzq_result) \ + : "a" (&_zzq_args[0]), "0" (_zzq_default) \ + : "cc", "memory" \ + ); \ + _zzq_rlval = _zzq_result; \ + } + +#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval) \ + { volatile OrigFn* _zzq_orig = &(_zzq_rlval); \ + volatile unsigned int __addr; \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ + /* %EAX = guest_NRADDR */ \ + "xchgl %%ecx,%%ecx" \ + : "=a" (__addr) \ + : \ + : "cc", "memory" \ + ); \ + _zzq_orig->nraddr = __addr; \ + } + +#define VALGRIND_CALL_NOREDIR_EAX \ + __SPECIAL_INSTRUCTION_PREAMBLE \ + /* call-noredir *%EAX */ \ + "xchgl %%edx,%%edx\n\t" +#endif /* PLAT_x86_linux */ + +/* ------------------------ amd64-linux ------------------------ */ + +#if defined(PLAT_amd64_linux) + +typedef + struct { + unsigned long long int nraddr; /* where's the code? */ + } + OrigFn; + +#define __SPECIAL_INSTRUCTION_PREAMBLE \ + "rolq $3, %%rdi ; rolq $13, %%rdi\n\t" \ + "rolq $61, %%rdi ; rolq $51, %%rdi\n\t" + +#define VALGRIND_DO_CLIENT_REQUEST( \ + _zzq_rlval, _zzq_default, _zzq_request, \ + _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5) \ + { volatile unsigned long long int _zzq_args[6]; \ + volatile unsigned long long int _zzq_result; \ + _zzq_args[0] = (unsigned long long int)(_zzq_request); \ + _zzq_args[1] = (unsigned long long int)(_zzq_arg1); \ + _zzq_args[2] = (unsigned long long int)(_zzq_arg2); \ + _zzq_args[3] = (unsigned long long int)(_zzq_arg3); \ + _zzq_args[4] = (unsigned long long int)(_zzq_arg4); \ + _zzq_args[5] = (unsigned long long int)(_zzq_arg5); \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ + /* %RDX = client_request ( %RAX ) */ \ + "xchgq %%rbx,%%rbx" \ + : "=d" (_zzq_result) \ + : "a" (&_zzq_args[0]), "0" (_zzq_default) \ + : "cc", "memory" \ + ); \ + _zzq_rlval = _zzq_result; \ + } + +#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval) \ + { volatile OrigFn* _zzq_orig = &(_zzq_rlval); \ + volatile unsigned long long int __addr; \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ + /* %RAX = guest_NRADDR */ \ + "xchgq %%rcx,%%rcx" \ + : "=a" (__addr) \ + : \ + : "cc", "memory" \ + ); \ + _zzq_orig->nraddr = __addr; \ + } + +#define VALGRIND_CALL_NOREDIR_RAX \ + __SPECIAL_INSTRUCTION_PREAMBLE \ + /* call-noredir *%RAX */ \ + "xchgq %%rdx,%%rdx\n\t" +#endif /* PLAT_amd64_linux */ + +/* ------------------------ ppc32-linux ------------------------ */ + +#if defined(PLAT_ppc32_linux) + +typedef + struct { + unsigned int nraddr; /* where's the code? */ + } + OrigFn; + +#define __SPECIAL_INSTRUCTION_PREAMBLE \ + "rlwinm 0,0,3,0,0 ; rlwinm 0,0,13,0,0\n\t" \ + "rlwinm 0,0,29,0,0 ; rlwinm 0,0,19,0,0\n\t" + +#define VALGRIND_DO_CLIENT_REQUEST( \ + _zzq_rlval, _zzq_default, _zzq_request, \ + _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5) \ + \ + { unsigned int _zzq_args[6]; \ + unsigned int _zzq_result; \ + unsigned int* _zzq_ptr; \ + _zzq_args[0] = (unsigned int)(_zzq_request); \ + _zzq_args[1] = (unsigned int)(_zzq_arg1); \ + _zzq_args[2] = (unsigned int)(_zzq_arg2); \ + _zzq_args[3] = (unsigned int)(_zzq_arg3); \ + _zzq_args[4] = (unsigned int)(_zzq_arg4); \ + _zzq_args[5] = (unsigned int)(_zzq_arg5); \ + _zzq_ptr = _zzq_args; \ + __asm__ volatile("mr 3,%1\n\t" /*default*/ \ + "mr 4,%2\n\t" /*ptr*/ \ + __SPECIAL_INSTRUCTION_PREAMBLE \ + /* %R3 = client_request ( %R4 ) */ \ + "or 1,1,1\n\t" \ + "mr %0,3" /*result*/ \ + : "=b" (_zzq_result) \ + : "b" (_zzq_default), "b" (_zzq_ptr) \ + : "cc", "memory", "r3", "r4"); \ + _zzq_rlval = _zzq_result; \ + } + +#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval) \ + { volatile OrigFn* _zzq_orig = &(_zzq_rlval); \ + unsigned int __addr; \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ + /* %R3 = guest_NRADDR */ \ + "or 2,2,2\n\t" \ + "mr %0,3" \ + : "=b" (__addr) \ + : \ + : "cc", "memory", "r3" \ + ); \ + _zzq_orig->nraddr = __addr; \ + } + +#define VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + __SPECIAL_INSTRUCTION_PREAMBLE \ + /* branch-and-link-to-noredir *%R11 */ \ + "or 3,3,3\n\t" +#endif /* PLAT_ppc32_linux */ + +/* ------------------------ ppc64-linux ------------------------ */ + +#if defined(PLAT_ppc64_linux) + +typedef + struct { + unsigned long long int nraddr; /* where's the code? */ + unsigned long long int r2; /* what tocptr do we need? */ + } + OrigFn; + +#define __SPECIAL_INSTRUCTION_PREAMBLE \ + "rotldi 0,0,3 ; rotldi 0,0,13\n\t" \ + "rotldi 0,0,61 ; rotldi 0,0,51\n\t" + +#define VALGRIND_DO_CLIENT_REQUEST( \ + _zzq_rlval, _zzq_default, _zzq_request, \ + _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5) \ + \ + { unsigned long long int _zzq_args[6]; \ + register unsigned long long int _zzq_result __asm__("r3"); \ + register unsigned long long int* _zzq_ptr __asm__("r4"); \ + _zzq_args[0] = (unsigned long long int)(_zzq_request); \ + _zzq_args[1] = (unsigned long long int)(_zzq_arg1); \ + _zzq_args[2] = (unsigned long long int)(_zzq_arg2); \ + _zzq_args[3] = (unsigned long long int)(_zzq_arg3); \ + _zzq_args[4] = (unsigned long long int)(_zzq_arg4); \ + _zzq_args[5] = (unsigned long long int)(_zzq_arg5); \ + _zzq_ptr = _zzq_args; \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ + /* %R3 = client_request ( %R4 ) */ \ + "or 1,1,1" \ + : "=r" (_zzq_result) \ + : "0" (_zzq_default), "r" (_zzq_ptr) \ + : "cc", "memory"); \ + _zzq_rlval = _zzq_result; \ + } + +#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval) \ + { volatile OrigFn* _zzq_orig = &(_zzq_rlval); \ + register unsigned long long int __addr __asm__("r3"); \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ + /* %R3 = guest_NRADDR */ \ + "or 2,2,2" \ + : "=r" (__addr) \ + : \ + : "cc", "memory" \ + ); \ + _zzq_orig->nraddr = __addr; \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ + /* %R3 = guest_NRADDR_GPR2 */ \ + "or 4,4,4" \ + : "=r" (__addr) \ + : \ + : "cc", "memory" \ + ); \ + _zzq_orig->r2 = __addr; \ + } + +#define VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + __SPECIAL_INSTRUCTION_PREAMBLE \ + /* branch-and-link-to-noredir *%R11 */ \ + "or 3,3,3\n\t" + +#endif /* PLAT_ppc64_linux */ + +/* ------------------------ ppc32-aix5 ------------------------- */ + +#if defined(PLAT_ppc32_aix5) + +typedef + struct { + unsigned int nraddr; /* where's the code? */ + unsigned int r2; /* what tocptr do we need? */ + } + OrigFn; + +#define __SPECIAL_INSTRUCTION_PREAMBLE \ + "rlwinm 0,0,3,0,0 ; rlwinm 0,0,13,0,0\n\t" \ + "rlwinm 0,0,29,0,0 ; rlwinm 0,0,19,0,0\n\t" + +#define VALGRIND_DO_CLIENT_REQUEST( \ + _zzq_rlval, _zzq_default, _zzq_request, \ + _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5) \ + \ + { unsigned int _zzq_args[7]; \ + register unsigned int _zzq_result; \ + register unsigned int* _zzq_ptr; \ + _zzq_args[0] = (unsigned int)(_zzq_request); \ + _zzq_args[1] = (unsigned int)(_zzq_arg1); \ + _zzq_args[2] = (unsigned int)(_zzq_arg2); \ + _zzq_args[3] = (unsigned int)(_zzq_arg3); \ + _zzq_args[4] = (unsigned int)(_zzq_arg4); \ + _zzq_args[5] = (unsigned int)(_zzq_arg5); \ + _zzq_args[6] = (unsigned int)(_zzq_default); \ + _zzq_ptr = _zzq_args; \ + __asm__ volatile("mr 4,%1\n\t" \ + "lwz 3, 24(4)\n\t" \ + __SPECIAL_INSTRUCTION_PREAMBLE \ + /* %R3 = client_request ( %R4 ) */ \ + "or 1,1,1\n\t" \ + "mr %0,3" \ + : "=b" (_zzq_result) \ + : "b" (_zzq_ptr) \ + : "r3", "r4", "cc", "memory"); \ + _zzq_rlval = _zzq_result; \ + } + +#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval) \ + { volatile OrigFn* _zzq_orig = &(_zzq_rlval); \ + register unsigned int __addr; \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ + /* %R3 = guest_NRADDR */ \ + "or 2,2,2\n\t" \ + "mr %0,3" \ + : "=b" (__addr) \ + : \ + : "r3", "cc", "memory" \ + ); \ + _zzq_orig->nraddr = __addr; \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ + /* %R3 = guest_NRADDR_GPR2 */ \ + "or 4,4,4\n\t" \ + "mr %0,3" \ + : "=b" (__addr) \ + : \ + : "r3", "cc", "memory" \ + ); \ + _zzq_orig->r2 = __addr; \ + } + +#define VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + __SPECIAL_INSTRUCTION_PREAMBLE \ + /* branch-and-link-to-noredir *%R11 */ \ + "or 3,3,3\n\t" + +#endif /* PLAT_ppc32_aix5 */ + +/* ------------------------ ppc64-aix5 ------------------------- */ + +#if defined(PLAT_ppc64_aix5) + +typedef + struct { + unsigned long long int nraddr; /* where's the code? */ + unsigned long long int r2; /* what tocptr do we need? */ + } + OrigFn; + +#define __SPECIAL_INSTRUCTION_PREAMBLE \ + "rotldi 0,0,3 ; rotldi 0,0,13\n\t" \ + "rotldi 0,0,61 ; rotldi 0,0,51\n\t" + +#define VALGRIND_DO_CLIENT_REQUEST( \ + _zzq_rlval, _zzq_default, _zzq_request, \ + _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5) \ + \ + { unsigned long long int _zzq_args[7]; \ + register unsigned long long int _zzq_result; \ + register unsigned long long int* _zzq_ptr; \ + _zzq_args[0] = (unsigned int long long)(_zzq_request); \ + _zzq_args[1] = (unsigned int long long)(_zzq_arg1); \ + _zzq_args[2] = (unsigned int long long)(_zzq_arg2); \ + _zzq_args[3] = (unsigned int long long)(_zzq_arg3); \ + _zzq_args[4] = (unsigned int long long)(_zzq_arg4); \ + _zzq_args[5] = (unsigned int long long)(_zzq_arg5); \ + _zzq_args[6] = (unsigned int long long)(_zzq_default); \ + _zzq_ptr = _zzq_args; \ + __asm__ volatile("mr 4,%1\n\t" \ + "ld 3, 48(4)\n\t" \ + __SPECIAL_INSTRUCTION_PREAMBLE \ + /* %R3 = client_request ( %R4 ) */ \ + "or 1,1,1\n\t" \ + "mr %0,3" \ + : "=b" (_zzq_result) \ + : "b" (_zzq_ptr) \ + : "r3", "r4", "cc", "memory"); \ + _zzq_rlval = _zzq_result; \ + } + +#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval) \ + { volatile OrigFn* _zzq_orig = &(_zzq_rlval); \ + register unsigned long long int __addr; \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ + /* %R3 = guest_NRADDR */ \ + "or 2,2,2\n\t" \ + "mr %0,3" \ + : "=b" (__addr) \ + : \ + : "r3", "cc", "memory" \ + ); \ + _zzq_orig->nraddr = __addr; \ + __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \ + /* %R3 = guest_NRADDR_GPR2 */ \ + "or 4,4,4\n\t" \ + "mr %0,3" \ + : "=b" (__addr) \ + : \ + : "r3", "cc", "memory" \ + ); \ + _zzq_orig->r2 = __addr; \ + } + +#define VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + __SPECIAL_INSTRUCTION_PREAMBLE \ + /* branch-and-link-to-noredir *%R11 */ \ + "or 3,3,3\n\t" + +#endif /* PLAT_ppc64_aix5 */ + +/* Insert assembly code for other platforms here... */ + +#endif /* NVALGRIND */ + + +/* ------------------------------------------------------------------ */ +/* PLATFORM SPECIFICS for FUNCTION WRAPPING. This is all very */ +/* ugly. It's the least-worst tradeoff I can think of. */ +/* ------------------------------------------------------------------ */ + +/* This section defines magic (a.k.a appalling-hack) macros for doing + guaranteed-no-redirection macros, so as to get from function + wrappers to the functions they are wrapping. The whole point is to + construct standard call sequences, but to do the call itself with a + special no-redirect call pseudo-instruction that the JIT + understands and handles specially. This section is long and + repetitious, and I can't see a way to make it shorter. + + The naming scheme is as follows: + + CALL_FN_{W,v}_{v,W,WW,WWW,WWWW,5W,6W,7W,etc} + + 'W' stands for "word" and 'v' for "void". Hence there are + different macros for calling arity 0, 1, 2, 3, 4, etc, functions, + and for each, the possibility of returning a word-typed result, or + no result. +*/ + +/* Use these to write the name of your wrapper. NOTE: duplicates + VG_WRAP_FUNCTION_Z{U,Z} in pub_tool_redir.h. */ + +#define I_WRAP_SONAME_FNNAME_ZU(soname,fnname) \ + _vgwZU_##soname##_##fnname + +#define I_WRAP_SONAME_FNNAME_ZZ(soname,fnname) \ + _vgwZZ_##soname##_##fnname + +/* Use this macro from within a wrapper function to collect the + context (address and possibly other info) of the original function. + Once you have that you can then use it in one of the CALL_FN_ + macros. The type of the argument _lval is OrigFn. */ +#define VALGRIND_GET_ORIG_FN(_lval) VALGRIND_GET_NR_CONTEXT(_lval) + +/* Derivatives of the main macros below, for calling functions + returning void. */ + +#define CALL_FN_v_v(fnptr) \ + do { volatile unsigned long _junk; \ + CALL_FN_W_v(_junk,fnptr); } while (0) + +#define CALL_FN_v_W(fnptr, arg1) \ + do { volatile unsigned long _junk; \ + CALL_FN_W_W(_junk,fnptr,arg1); } while (0) + +#define CALL_FN_v_WW(fnptr, arg1,arg2) \ + do { volatile unsigned long _junk; \ + CALL_FN_W_WW(_junk,fnptr,arg1,arg2); } while (0) + +#define CALL_FN_v_WWW(fnptr, arg1,arg2,arg3) \ + do { volatile unsigned long _junk; \ + CALL_FN_W_WWW(_junk,fnptr,arg1,arg2,arg3); } while (0) + +/* ------------------------- x86-linux ------------------------- */ + +#if defined(PLAT_x86_linux) + +/* These regs are trashed by the hidden call. No need to mention eax + as gcc can already see that, plus causes gcc to bomb. */ +#define __CALLER_SAVED_REGS /*"eax"*/ "ecx", "edx" + +/* These CALL_FN_ macros assume that on x86-linux, sizeof(unsigned + long) == 4. */ + +#define CALL_FN_W_v(lval, orig) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[1]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + __asm__ volatile( \ + "movl (%%eax), %%eax\n\t" /* target->%eax */ \ + VALGRIND_CALL_NOREDIR_EAX \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_W(lval, orig, arg1) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[2]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + __asm__ volatile( \ + "pushl 4(%%eax)\n\t" \ + "movl (%%eax), %%eax\n\t" /* target->%eax */ \ + VALGRIND_CALL_NOREDIR_EAX \ + "addl $4, %%esp\n" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WW(lval, orig, arg1,arg2) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + __asm__ volatile( \ + "pushl 8(%%eax)\n\t" \ + "pushl 4(%%eax)\n\t" \ + "movl (%%eax), %%eax\n\t" /* target->%eax */ \ + VALGRIND_CALL_NOREDIR_EAX \ + "addl $8, %%esp\n" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WWW(lval, orig, arg1,arg2,arg3) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[4]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + __asm__ volatile( \ + "pushl 12(%%eax)\n\t" \ + "pushl 8(%%eax)\n\t" \ + "pushl 4(%%eax)\n\t" \ + "movl (%%eax), %%eax\n\t" /* target->%eax */ \ + VALGRIND_CALL_NOREDIR_EAX \ + "addl $12, %%esp\n" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WWWW(lval, orig, arg1,arg2,arg3,arg4) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[5]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + __asm__ volatile( \ + "pushl 16(%%eax)\n\t" \ + "pushl 12(%%eax)\n\t" \ + "pushl 8(%%eax)\n\t" \ + "pushl 4(%%eax)\n\t" \ + "movl (%%eax), %%eax\n\t" /* target->%eax */ \ + VALGRIND_CALL_NOREDIR_EAX \ + "addl $16, %%esp\n" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_5W(lval, orig, arg1,arg2,arg3,arg4,arg5) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[6]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + __asm__ volatile( \ + "pushl 20(%%eax)\n\t" \ + "pushl 16(%%eax)\n\t" \ + "pushl 12(%%eax)\n\t" \ + "pushl 8(%%eax)\n\t" \ + "pushl 4(%%eax)\n\t" \ + "movl (%%eax), %%eax\n\t" /* target->%eax */ \ + VALGRIND_CALL_NOREDIR_EAX \ + "addl $20, %%esp\n" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_6W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[7]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + __asm__ volatile( \ + "pushl 24(%%eax)\n\t" \ + "pushl 20(%%eax)\n\t" \ + "pushl 16(%%eax)\n\t" \ + "pushl 12(%%eax)\n\t" \ + "pushl 8(%%eax)\n\t" \ + "pushl 4(%%eax)\n\t" \ + "movl (%%eax), %%eax\n\t" /* target->%eax */ \ + VALGRIND_CALL_NOREDIR_EAX \ + "addl $24, %%esp\n" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_7W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[8]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + __asm__ volatile( \ + "pushl 28(%%eax)\n\t" \ + "pushl 24(%%eax)\n\t" \ + "pushl 20(%%eax)\n\t" \ + "pushl 16(%%eax)\n\t" \ + "pushl 12(%%eax)\n\t" \ + "pushl 8(%%eax)\n\t" \ + "pushl 4(%%eax)\n\t" \ + "movl (%%eax), %%eax\n\t" /* target->%eax */ \ + VALGRIND_CALL_NOREDIR_EAX \ + "addl $28, %%esp\n" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_8W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[9]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + __asm__ volatile( \ + "pushl 32(%%eax)\n\t" \ + "pushl 28(%%eax)\n\t" \ + "pushl 24(%%eax)\n\t" \ + "pushl 20(%%eax)\n\t" \ + "pushl 16(%%eax)\n\t" \ + "pushl 12(%%eax)\n\t" \ + "pushl 8(%%eax)\n\t" \ + "pushl 4(%%eax)\n\t" \ + "movl (%%eax), %%eax\n\t" /* target->%eax */ \ + VALGRIND_CALL_NOREDIR_EAX \ + "addl $32, %%esp\n" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_9W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[10]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + _argvec[9] = (unsigned long)(arg9); \ + __asm__ volatile( \ + "pushl 36(%%eax)\n\t" \ + "pushl 32(%%eax)\n\t" \ + "pushl 28(%%eax)\n\t" \ + "pushl 24(%%eax)\n\t" \ + "pushl 20(%%eax)\n\t" \ + "pushl 16(%%eax)\n\t" \ + "pushl 12(%%eax)\n\t" \ + "pushl 8(%%eax)\n\t" \ + "pushl 4(%%eax)\n\t" \ + "movl (%%eax), %%eax\n\t" /* target->%eax */ \ + VALGRIND_CALL_NOREDIR_EAX \ + "addl $36, %%esp\n" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_10W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[11]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + _argvec[9] = (unsigned long)(arg9); \ + _argvec[10] = (unsigned long)(arg10); \ + __asm__ volatile( \ + "pushl 40(%%eax)\n\t" \ + "pushl 36(%%eax)\n\t" \ + "pushl 32(%%eax)\n\t" \ + "pushl 28(%%eax)\n\t" \ + "pushl 24(%%eax)\n\t" \ + "pushl 20(%%eax)\n\t" \ + "pushl 16(%%eax)\n\t" \ + "pushl 12(%%eax)\n\t" \ + "pushl 8(%%eax)\n\t" \ + "pushl 4(%%eax)\n\t" \ + "movl (%%eax), %%eax\n\t" /* target->%eax */ \ + VALGRIND_CALL_NOREDIR_EAX \ + "addl $40, %%esp\n" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_11W(lval, orig, arg1,arg2,arg3,arg4,arg5, \ + arg6,arg7,arg8,arg9,arg10, \ + arg11) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[12]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + _argvec[9] = (unsigned long)(arg9); \ + _argvec[10] = (unsigned long)(arg10); \ + _argvec[11] = (unsigned long)(arg11); \ + __asm__ volatile( \ + "pushl 44(%%eax)\n\t" \ + "pushl 40(%%eax)\n\t" \ + "pushl 36(%%eax)\n\t" \ + "pushl 32(%%eax)\n\t" \ + "pushl 28(%%eax)\n\t" \ + "pushl 24(%%eax)\n\t" \ + "pushl 20(%%eax)\n\t" \ + "pushl 16(%%eax)\n\t" \ + "pushl 12(%%eax)\n\t" \ + "pushl 8(%%eax)\n\t" \ + "pushl 4(%%eax)\n\t" \ + "movl (%%eax), %%eax\n\t" /* target->%eax */ \ + VALGRIND_CALL_NOREDIR_EAX \ + "addl $44, %%esp\n" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_12W(lval, orig, arg1,arg2,arg3,arg4,arg5, \ + arg6,arg7,arg8,arg9,arg10, \ + arg11,arg12) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[13]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + _argvec[9] = (unsigned long)(arg9); \ + _argvec[10] = (unsigned long)(arg10); \ + _argvec[11] = (unsigned long)(arg11); \ + _argvec[12] = (unsigned long)(arg12); \ + __asm__ volatile( \ + "pushl 48(%%eax)\n\t" \ + "pushl 44(%%eax)\n\t" \ + "pushl 40(%%eax)\n\t" \ + "pushl 36(%%eax)\n\t" \ + "pushl 32(%%eax)\n\t" \ + "pushl 28(%%eax)\n\t" \ + "pushl 24(%%eax)\n\t" \ + "pushl 20(%%eax)\n\t" \ + "pushl 16(%%eax)\n\t" \ + "pushl 12(%%eax)\n\t" \ + "pushl 8(%%eax)\n\t" \ + "pushl 4(%%eax)\n\t" \ + "movl (%%eax), %%eax\n\t" /* target->%eax */ \ + VALGRIND_CALL_NOREDIR_EAX \ + "addl $48, %%esp\n" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#endif /* PLAT_x86_linux */ + +/* ------------------------ amd64-linux ------------------------ */ + +#if defined(PLAT_amd64_linux) + +/* ARGREGS: rdi rsi rdx rcx r8 r9 (the rest on stack in R-to-L order) */ + +/* These regs are trashed by the hidden call. */ +#define __CALLER_SAVED_REGS /*"rax",*/ "rcx", "rdx", "rsi", \ + "rdi", "r8", "r9", "r10", "r11" + +/* These CALL_FN_ macros assume that on amd64-linux, sizeof(unsigned + long) == 8. */ + +/* NB 9 Sept 07. There is a nasty kludge here in all these CALL_FN_ + macros. In order not to trash the stack redzone, we need to drop + %rsp by 128 before the hidden call, and restore afterwards. The + nastyness is that it is only by luck that the stack still appears + to be unwindable during the hidden call - since then the behaviour + of any routine using this macro does not match what the CFI data + says. Sigh. + + Why is this important? Imagine that a wrapper has a stack + allocated local, and passes to the hidden call, a pointer to it. + Because gcc does not know about the hidden call, it may allocate + that local in the redzone. Unfortunately the hidden call may then + trash it before it comes to use it. So we must step clear of the + redzone, for the duration of the hidden call, to make it safe. + + Probably the same problem afflicts the other redzone-style ABIs too + (ppc64-linux, ppc32-aix5, ppc64-aix5); but for those, the stack is + self describing (none of this CFI nonsense) so at least messing + with the stack pointer doesn't give a danger of non-unwindable + stack. */ + +#define CALL_FN_W_v(lval, orig) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[1]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + __asm__ volatile( \ + "subq $128,%%rsp\n\t" \ + "movq (%%rax), %%rax\n\t" /* target->%rax */ \ + VALGRIND_CALL_NOREDIR_RAX \ + "addq $128,%%rsp\n\t" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_W(lval, orig, arg1) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[2]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + __asm__ volatile( \ + "subq $128,%%rsp\n\t" \ + "movq 8(%%rax), %%rdi\n\t" \ + "movq (%%rax), %%rax\n\t" /* target->%rax */ \ + VALGRIND_CALL_NOREDIR_RAX \ + "addq $128,%%rsp\n\t" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WW(lval, orig, arg1,arg2) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + __asm__ volatile( \ + "subq $128,%%rsp\n\t" \ + "movq 16(%%rax), %%rsi\n\t" \ + "movq 8(%%rax), %%rdi\n\t" \ + "movq (%%rax), %%rax\n\t" /* target->%rax */ \ + VALGRIND_CALL_NOREDIR_RAX \ + "addq $128,%%rsp\n\t" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WWW(lval, orig, arg1,arg2,arg3) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[4]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + __asm__ volatile( \ + "subq $128,%%rsp\n\t" \ + "movq 24(%%rax), %%rdx\n\t" \ + "movq 16(%%rax), %%rsi\n\t" \ + "movq 8(%%rax), %%rdi\n\t" \ + "movq (%%rax), %%rax\n\t" /* target->%rax */ \ + VALGRIND_CALL_NOREDIR_RAX \ + "addq $128,%%rsp\n\t" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WWWW(lval, orig, arg1,arg2,arg3,arg4) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[5]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + __asm__ volatile( \ + "subq $128,%%rsp\n\t" \ + "movq 32(%%rax), %%rcx\n\t" \ + "movq 24(%%rax), %%rdx\n\t" \ + "movq 16(%%rax), %%rsi\n\t" \ + "movq 8(%%rax), %%rdi\n\t" \ + "movq (%%rax), %%rax\n\t" /* target->%rax */ \ + VALGRIND_CALL_NOREDIR_RAX \ + "addq $128,%%rsp\n\t" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_5W(lval, orig, arg1,arg2,arg3,arg4,arg5) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[6]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + __asm__ volatile( \ + "subq $128,%%rsp\n\t" \ + "movq 40(%%rax), %%r8\n\t" \ + "movq 32(%%rax), %%rcx\n\t" \ + "movq 24(%%rax), %%rdx\n\t" \ + "movq 16(%%rax), %%rsi\n\t" \ + "movq 8(%%rax), %%rdi\n\t" \ + "movq (%%rax), %%rax\n\t" /* target->%rax */ \ + VALGRIND_CALL_NOREDIR_RAX \ + "addq $128,%%rsp\n\t" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_6W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[7]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + __asm__ volatile( \ + "subq $128,%%rsp\n\t" \ + "movq 48(%%rax), %%r9\n\t" \ + "movq 40(%%rax), %%r8\n\t" \ + "movq 32(%%rax), %%rcx\n\t" \ + "movq 24(%%rax), %%rdx\n\t" \ + "movq 16(%%rax), %%rsi\n\t" \ + "movq 8(%%rax), %%rdi\n\t" \ + "movq (%%rax), %%rax\n\t" /* target->%rax */ \ + "addq $128,%%rsp\n\t" \ + VALGRIND_CALL_NOREDIR_RAX \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_7W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[8]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + __asm__ volatile( \ + "subq $128,%%rsp\n\t" \ + "pushq 56(%%rax)\n\t" \ + "movq 48(%%rax), %%r9\n\t" \ + "movq 40(%%rax), %%r8\n\t" \ + "movq 32(%%rax), %%rcx\n\t" \ + "movq 24(%%rax), %%rdx\n\t" \ + "movq 16(%%rax), %%rsi\n\t" \ + "movq 8(%%rax), %%rdi\n\t" \ + "movq (%%rax), %%rax\n\t" /* target->%rax */ \ + VALGRIND_CALL_NOREDIR_RAX \ + "addq $8, %%rsp\n" \ + "addq $128,%%rsp\n\t" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_8W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[9]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + __asm__ volatile( \ + "subq $128,%%rsp\n\t" \ + "pushq 64(%%rax)\n\t" \ + "pushq 56(%%rax)\n\t" \ + "movq 48(%%rax), %%r9\n\t" \ + "movq 40(%%rax), %%r8\n\t" \ + "movq 32(%%rax), %%rcx\n\t" \ + "movq 24(%%rax), %%rdx\n\t" \ + "movq 16(%%rax), %%rsi\n\t" \ + "movq 8(%%rax), %%rdi\n\t" \ + "movq (%%rax), %%rax\n\t" /* target->%rax */ \ + VALGRIND_CALL_NOREDIR_RAX \ + "addq $16, %%rsp\n" \ + "addq $128,%%rsp\n\t" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_9W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[10]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + _argvec[9] = (unsigned long)(arg9); \ + __asm__ volatile( \ + "subq $128,%%rsp\n\t" \ + "pushq 72(%%rax)\n\t" \ + "pushq 64(%%rax)\n\t" \ + "pushq 56(%%rax)\n\t" \ + "movq 48(%%rax), %%r9\n\t" \ + "movq 40(%%rax), %%r8\n\t" \ + "movq 32(%%rax), %%rcx\n\t" \ + "movq 24(%%rax), %%rdx\n\t" \ + "movq 16(%%rax), %%rsi\n\t" \ + "movq 8(%%rax), %%rdi\n\t" \ + "movq (%%rax), %%rax\n\t" /* target->%rax */ \ + VALGRIND_CALL_NOREDIR_RAX \ + "addq $24, %%rsp\n" \ + "addq $128,%%rsp\n\t" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_10W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[11]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + _argvec[9] = (unsigned long)(arg9); \ + _argvec[10] = (unsigned long)(arg10); \ + __asm__ volatile( \ + "subq $128,%%rsp\n\t" \ + "pushq 80(%%rax)\n\t" \ + "pushq 72(%%rax)\n\t" \ + "pushq 64(%%rax)\n\t" \ + "pushq 56(%%rax)\n\t" \ + "movq 48(%%rax), %%r9\n\t" \ + "movq 40(%%rax), %%r8\n\t" \ + "movq 32(%%rax), %%rcx\n\t" \ + "movq 24(%%rax), %%rdx\n\t" \ + "movq 16(%%rax), %%rsi\n\t" \ + "movq 8(%%rax), %%rdi\n\t" \ + "movq (%%rax), %%rax\n\t" /* target->%rax */ \ + VALGRIND_CALL_NOREDIR_RAX \ + "addq $32, %%rsp\n" \ + "addq $128,%%rsp\n\t" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_11W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10,arg11) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[12]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + _argvec[9] = (unsigned long)(arg9); \ + _argvec[10] = (unsigned long)(arg10); \ + _argvec[11] = (unsigned long)(arg11); \ + __asm__ volatile( \ + "subq $128,%%rsp\n\t" \ + "pushq 88(%%rax)\n\t" \ + "pushq 80(%%rax)\n\t" \ + "pushq 72(%%rax)\n\t" \ + "pushq 64(%%rax)\n\t" \ + "pushq 56(%%rax)\n\t" \ + "movq 48(%%rax), %%r9\n\t" \ + "movq 40(%%rax), %%r8\n\t" \ + "movq 32(%%rax), %%rcx\n\t" \ + "movq 24(%%rax), %%rdx\n\t" \ + "movq 16(%%rax), %%rsi\n\t" \ + "movq 8(%%rax), %%rdi\n\t" \ + "movq (%%rax), %%rax\n\t" /* target->%rax */ \ + VALGRIND_CALL_NOREDIR_RAX \ + "addq $40, %%rsp\n" \ + "addq $128,%%rsp\n\t" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_12W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10,arg11,arg12) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[13]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)(arg1); \ + _argvec[2] = (unsigned long)(arg2); \ + _argvec[3] = (unsigned long)(arg3); \ + _argvec[4] = (unsigned long)(arg4); \ + _argvec[5] = (unsigned long)(arg5); \ + _argvec[6] = (unsigned long)(arg6); \ + _argvec[7] = (unsigned long)(arg7); \ + _argvec[8] = (unsigned long)(arg8); \ + _argvec[9] = (unsigned long)(arg9); \ + _argvec[10] = (unsigned long)(arg10); \ + _argvec[11] = (unsigned long)(arg11); \ + _argvec[12] = (unsigned long)(arg12); \ + __asm__ volatile( \ + "subq $128,%%rsp\n\t" \ + "pushq 96(%%rax)\n\t" \ + "pushq 88(%%rax)\n\t" \ + "pushq 80(%%rax)\n\t" \ + "pushq 72(%%rax)\n\t" \ + "pushq 64(%%rax)\n\t" \ + "pushq 56(%%rax)\n\t" \ + "movq 48(%%rax), %%r9\n\t" \ + "movq 40(%%rax), %%r8\n\t" \ + "movq 32(%%rax), %%rcx\n\t" \ + "movq 24(%%rax), %%rdx\n\t" \ + "movq 16(%%rax), %%rsi\n\t" \ + "movq 8(%%rax), %%rdi\n\t" \ + "movq (%%rax), %%rax\n\t" /* target->%rax */ \ + VALGRIND_CALL_NOREDIR_RAX \ + "addq $48, %%rsp\n" \ + "addq $128,%%rsp\n\t" \ + : /*out*/ "=a" (_res) \ + : /*in*/ "a" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#endif /* PLAT_amd64_linux */ + +/* ------------------------ ppc32-linux ------------------------ */ + +#if defined(PLAT_ppc32_linux) + +/* This is useful for finding out about the on-stack stuff: + + extern int f9 ( int,int,int,int,int,int,int,int,int ); + extern int f10 ( int,int,int,int,int,int,int,int,int,int ); + extern int f11 ( int,int,int,int,int,int,int,int,int,int,int ); + extern int f12 ( int,int,int,int,int,int,int,int,int,int,int,int ); + + int g9 ( void ) { + return f9(11,22,33,44,55,66,77,88,99); + } + int g10 ( void ) { + return f10(11,22,33,44,55,66,77,88,99,110); + } + int g11 ( void ) { + return f11(11,22,33,44,55,66,77,88,99,110,121); + } + int g12 ( void ) { + return f12(11,22,33,44,55,66,77,88,99,110,121,132); + } +*/ + +/* ARGREGS: r3 r4 r5 r6 r7 r8 r9 r10 (the rest on stack somewhere) */ + +/* These regs are trashed by the hidden call. */ +#define __CALLER_SAVED_REGS \ + "lr", "ctr", "xer", \ + "cr0", "cr1", "cr2", "cr3", "cr4", "cr5", "cr6", "cr7", \ + "r0", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", \ + "r11", "r12", "r13" + +/* These CALL_FN_ macros assume that on ppc32-linux, + sizeof(unsigned long) == 4. */ + +#define CALL_FN_W_v(lval, orig) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[1]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "lwz 11,0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr %0,3" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_W(lval, orig, arg1) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[2]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "lwz 3,4(11)\n\t" /* arg1->r3 */ \ + "lwz 11,0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr %0,3" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WW(lval, orig, arg1,arg2) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "lwz 3,4(11)\n\t" /* arg1->r3 */ \ + "lwz 4,8(11)\n\t" \ + "lwz 11,0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr %0,3" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WWW(lval, orig, arg1,arg2,arg3) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[4]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + _argvec[3] = (unsigned long)arg3; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "lwz 3,4(11)\n\t" /* arg1->r3 */ \ + "lwz 4,8(11)\n\t" \ + "lwz 5,12(11)\n\t" \ + "lwz 11,0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr %0,3" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WWWW(lval, orig, arg1,arg2,arg3,arg4) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[5]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + _argvec[3] = (unsigned long)arg3; \ + _argvec[4] = (unsigned long)arg4; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "lwz 3,4(11)\n\t" /* arg1->r3 */ \ + "lwz 4,8(11)\n\t" \ + "lwz 5,12(11)\n\t" \ + "lwz 6,16(11)\n\t" /* arg4->r6 */ \ + "lwz 11,0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr %0,3" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_5W(lval, orig, arg1,arg2,arg3,arg4,arg5) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[6]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + _argvec[3] = (unsigned long)arg3; \ + _argvec[4] = (unsigned long)arg4; \ + _argvec[5] = (unsigned long)arg5; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "lwz 3,4(11)\n\t" /* arg1->r3 */ \ + "lwz 4,8(11)\n\t" \ + "lwz 5,12(11)\n\t" \ + "lwz 6,16(11)\n\t" /* arg4->r6 */ \ + "lwz 7,20(11)\n\t" \ + "lwz 11,0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr %0,3" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_6W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[7]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + _argvec[3] = (unsigned long)arg3; \ + _argvec[4] = (unsigned long)arg4; \ + _argvec[5] = (unsigned long)arg5; \ + _argvec[6] = (unsigned long)arg6; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "lwz 3,4(11)\n\t" /* arg1->r3 */ \ + "lwz 4,8(11)\n\t" \ + "lwz 5,12(11)\n\t" \ + "lwz 6,16(11)\n\t" /* arg4->r6 */ \ + "lwz 7,20(11)\n\t" \ + "lwz 8,24(11)\n\t" \ + "lwz 11,0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr %0,3" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_7W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[8]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + _argvec[3] = (unsigned long)arg3; \ + _argvec[4] = (unsigned long)arg4; \ + _argvec[5] = (unsigned long)arg5; \ + _argvec[6] = (unsigned long)arg6; \ + _argvec[7] = (unsigned long)arg7; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "lwz 3,4(11)\n\t" /* arg1->r3 */ \ + "lwz 4,8(11)\n\t" \ + "lwz 5,12(11)\n\t" \ + "lwz 6,16(11)\n\t" /* arg4->r6 */ \ + "lwz 7,20(11)\n\t" \ + "lwz 8,24(11)\n\t" \ + "lwz 9,28(11)\n\t" \ + "lwz 11,0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr %0,3" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_8W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[9]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + _argvec[3] = (unsigned long)arg3; \ + _argvec[4] = (unsigned long)arg4; \ + _argvec[5] = (unsigned long)arg5; \ + _argvec[6] = (unsigned long)arg6; \ + _argvec[7] = (unsigned long)arg7; \ + _argvec[8] = (unsigned long)arg8; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "lwz 3,4(11)\n\t" /* arg1->r3 */ \ + "lwz 4,8(11)\n\t" \ + "lwz 5,12(11)\n\t" \ + "lwz 6,16(11)\n\t" /* arg4->r6 */ \ + "lwz 7,20(11)\n\t" \ + "lwz 8,24(11)\n\t" \ + "lwz 9,28(11)\n\t" \ + "lwz 10,32(11)\n\t" /* arg8->r10 */ \ + "lwz 11,0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr %0,3" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_9W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[10]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + _argvec[3] = (unsigned long)arg3; \ + _argvec[4] = (unsigned long)arg4; \ + _argvec[5] = (unsigned long)arg5; \ + _argvec[6] = (unsigned long)arg6; \ + _argvec[7] = (unsigned long)arg7; \ + _argvec[8] = (unsigned long)arg8; \ + _argvec[9] = (unsigned long)arg9; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "addi 1,1,-16\n\t" \ + /* arg9 */ \ + "lwz 3,36(11)\n\t" \ + "stw 3,8(1)\n\t" \ + /* args1-8 */ \ + "lwz 3,4(11)\n\t" /* arg1->r3 */ \ + "lwz 4,8(11)\n\t" \ + "lwz 5,12(11)\n\t" \ + "lwz 6,16(11)\n\t" /* arg4->r6 */ \ + "lwz 7,20(11)\n\t" \ + "lwz 8,24(11)\n\t" \ + "lwz 9,28(11)\n\t" \ + "lwz 10,32(11)\n\t" /* arg8->r10 */ \ + "lwz 11,0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "addi 1,1,16\n\t" \ + "mr %0,3" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_10W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[11]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + _argvec[3] = (unsigned long)arg3; \ + _argvec[4] = (unsigned long)arg4; \ + _argvec[5] = (unsigned long)arg5; \ + _argvec[6] = (unsigned long)arg6; \ + _argvec[7] = (unsigned long)arg7; \ + _argvec[8] = (unsigned long)arg8; \ + _argvec[9] = (unsigned long)arg9; \ + _argvec[10] = (unsigned long)arg10; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "addi 1,1,-16\n\t" \ + /* arg10 */ \ + "lwz 3,40(11)\n\t" \ + "stw 3,12(1)\n\t" \ + /* arg9 */ \ + "lwz 3,36(11)\n\t" \ + "stw 3,8(1)\n\t" \ + /* args1-8 */ \ + "lwz 3,4(11)\n\t" /* arg1->r3 */ \ + "lwz 4,8(11)\n\t" \ + "lwz 5,12(11)\n\t" \ + "lwz 6,16(11)\n\t" /* arg4->r6 */ \ + "lwz 7,20(11)\n\t" \ + "lwz 8,24(11)\n\t" \ + "lwz 9,28(11)\n\t" \ + "lwz 10,32(11)\n\t" /* arg8->r10 */ \ + "lwz 11,0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "addi 1,1,16\n\t" \ + "mr %0,3" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_11W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10,arg11) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[12]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + _argvec[3] = (unsigned long)arg3; \ + _argvec[4] = (unsigned long)arg4; \ + _argvec[5] = (unsigned long)arg5; \ + _argvec[6] = (unsigned long)arg6; \ + _argvec[7] = (unsigned long)arg7; \ + _argvec[8] = (unsigned long)arg8; \ + _argvec[9] = (unsigned long)arg9; \ + _argvec[10] = (unsigned long)arg10; \ + _argvec[11] = (unsigned long)arg11; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "addi 1,1,-32\n\t" \ + /* arg11 */ \ + "lwz 3,44(11)\n\t" \ + "stw 3,16(1)\n\t" \ + /* arg10 */ \ + "lwz 3,40(11)\n\t" \ + "stw 3,12(1)\n\t" \ + /* arg9 */ \ + "lwz 3,36(11)\n\t" \ + "stw 3,8(1)\n\t" \ + /* args1-8 */ \ + "lwz 3,4(11)\n\t" /* arg1->r3 */ \ + "lwz 4,8(11)\n\t" \ + "lwz 5,12(11)\n\t" \ + "lwz 6,16(11)\n\t" /* arg4->r6 */ \ + "lwz 7,20(11)\n\t" \ + "lwz 8,24(11)\n\t" \ + "lwz 9,28(11)\n\t" \ + "lwz 10,32(11)\n\t" /* arg8->r10 */ \ + "lwz 11,0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "addi 1,1,32\n\t" \ + "mr %0,3" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_12W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10,arg11,arg12) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[13]; \ + volatile unsigned long _res; \ + _argvec[0] = (unsigned long)_orig.nraddr; \ + _argvec[1] = (unsigned long)arg1; \ + _argvec[2] = (unsigned long)arg2; \ + _argvec[3] = (unsigned long)arg3; \ + _argvec[4] = (unsigned long)arg4; \ + _argvec[5] = (unsigned long)arg5; \ + _argvec[6] = (unsigned long)arg6; \ + _argvec[7] = (unsigned long)arg7; \ + _argvec[8] = (unsigned long)arg8; \ + _argvec[9] = (unsigned long)arg9; \ + _argvec[10] = (unsigned long)arg10; \ + _argvec[11] = (unsigned long)arg11; \ + _argvec[12] = (unsigned long)arg12; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "addi 1,1,-32\n\t" \ + /* arg12 */ \ + "lwz 3,48(11)\n\t" \ + "stw 3,20(1)\n\t" \ + /* arg11 */ \ + "lwz 3,44(11)\n\t" \ + "stw 3,16(1)\n\t" \ + /* arg10 */ \ + "lwz 3,40(11)\n\t" \ + "stw 3,12(1)\n\t" \ + /* arg9 */ \ + "lwz 3,36(11)\n\t" \ + "stw 3,8(1)\n\t" \ + /* args1-8 */ \ + "lwz 3,4(11)\n\t" /* arg1->r3 */ \ + "lwz 4,8(11)\n\t" \ + "lwz 5,12(11)\n\t" \ + "lwz 6,16(11)\n\t" /* arg4->r6 */ \ + "lwz 7,20(11)\n\t" \ + "lwz 8,24(11)\n\t" \ + "lwz 9,28(11)\n\t" \ + "lwz 10,32(11)\n\t" /* arg8->r10 */ \ + "lwz 11,0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "addi 1,1,32\n\t" \ + "mr %0,3" \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[0]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#endif /* PLAT_ppc32_linux */ + +/* ------------------------ ppc64-linux ------------------------ */ + +#if defined(PLAT_ppc64_linux) + +/* ARGREGS: r3 r4 r5 r6 r7 r8 r9 r10 (the rest on stack somewhere) */ + +/* These regs are trashed by the hidden call. */ +#define __CALLER_SAVED_REGS \ + "lr", "ctr", "xer", \ + "cr0", "cr1", "cr2", "cr3", "cr4", "cr5", "cr6", "cr7", \ + "r0", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", \ + "r11", "r12", "r13" + +/* These CALL_FN_ macros assume that on ppc64-linux, sizeof(unsigned + long) == 8. */ + +#define CALL_FN_W_v(lval, orig) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+0]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)" /* restore tocptr */ \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_W(lval, orig, arg1) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+1]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)" /* restore tocptr */ \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WW(lval, orig, arg1,arg2) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+2]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)" /* restore tocptr */ \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WWW(lval, orig, arg1,arg2,arg3) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+3]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)" /* restore tocptr */ \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WWWW(lval, orig, arg1,arg2,arg3,arg4) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+4]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 6, 32(11)\n\t" /* arg4->r6 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)" /* restore tocptr */ \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_5W(lval, orig, arg1,arg2,arg3,arg4,arg5) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+5]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 6, 32(11)\n\t" /* arg4->r6 */ \ + "ld 7, 40(11)\n\t" /* arg5->r7 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)" /* restore tocptr */ \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_6W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+6]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 6, 32(11)\n\t" /* arg4->r6 */ \ + "ld 7, 40(11)\n\t" /* arg5->r7 */ \ + "ld 8, 48(11)\n\t" /* arg6->r8 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)" /* restore tocptr */ \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_7W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+7]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + _argvec[2+7] = (unsigned long)arg7; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 6, 32(11)\n\t" /* arg4->r6 */ \ + "ld 7, 40(11)\n\t" /* arg5->r7 */ \ + "ld 8, 48(11)\n\t" /* arg6->r8 */ \ + "ld 9, 56(11)\n\t" /* arg7->r9 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)" /* restore tocptr */ \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_8W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+8]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + _argvec[2+7] = (unsigned long)arg7; \ + _argvec[2+8] = (unsigned long)arg8; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 6, 32(11)\n\t" /* arg4->r6 */ \ + "ld 7, 40(11)\n\t" /* arg5->r7 */ \ + "ld 8, 48(11)\n\t" /* arg6->r8 */ \ + "ld 9, 56(11)\n\t" /* arg7->r9 */ \ + "ld 10, 64(11)\n\t" /* arg8->r10 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)" /* restore tocptr */ \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_9W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+9]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + _argvec[2+7] = (unsigned long)arg7; \ + _argvec[2+8] = (unsigned long)arg8; \ + _argvec[2+9] = (unsigned long)arg9; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "addi 1,1,-128\n\t" /* expand stack frame */ \ + /* arg9 */ \ + "ld 3,72(11)\n\t" \ + "std 3,112(1)\n\t" \ + /* args1-8 */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 6, 32(11)\n\t" /* arg4->r6 */ \ + "ld 7, 40(11)\n\t" /* arg5->r7 */ \ + "ld 8, 48(11)\n\t" /* arg6->r8 */ \ + "ld 9, 56(11)\n\t" /* arg7->r9 */ \ + "ld 10, 64(11)\n\t" /* arg8->r10 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)\n\t" /* restore tocptr */ \ + "addi 1,1,128" /* restore frame */ \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_10W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+10]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + _argvec[2+7] = (unsigned long)arg7; \ + _argvec[2+8] = (unsigned long)arg8; \ + _argvec[2+9] = (unsigned long)arg9; \ + _argvec[2+10] = (unsigned long)arg10; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "addi 1,1,-128\n\t" /* expand stack frame */ \ + /* arg10 */ \ + "ld 3,80(11)\n\t" \ + "std 3,120(1)\n\t" \ + /* arg9 */ \ + "ld 3,72(11)\n\t" \ + "std 3,112(1)\n\t" \ + /* args1-8 */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 6, 32(11)\n\t" /* arg4->r6 */ \ + "ld 7, 40(11)\n\t" /* arg5->r7 */ \ + "ld 8, 48(11)\n\t" /* arg6->r8 */ \ + "ld 9, 56(11)\n\t" /* arg7->r9 */ \ + "ld 10, 64(11)\n\t" /* arg8->r10 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)\n\t" /* restore tocptr */ \ + "addi 1,1,128" /* restore frame */ \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_11W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10,arg11) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+11]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + _argvec[2+7] = (unsigned long)arg7; \ + _argvec[2+8] = (unsigned long)arg8; \ + _argvec[2+9] = (unsigned long)arg9; \ + _argvec[2+10] = (unsigned long)arg10; \ + _argvec[2+11] = (unsigned long)arg11; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "addi 1,1,-144\n\t" /* expand stack frame */ \ + /* arg11 */ \ + "ld 3,88(11)\n\t" \ + "std 3,128(1)\n\t" \ + /* arg10 */ \ + "ld 3,80(11)\n\t" \ + "std 3,120(1)\n\t" \ + /* arg9 */ \ + "ld 3,72(11)\n\t" \ + "std 3,112(1)\n\t" \ + /* args1-8 */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 6, 32(11)\n\t" /* arg4->r6 */ \ + "ld 7, 40(11)\n\t" /* arg5->r7 */ \ + "ld 8, 48(11)\n\t" /* arg6->r8 */ \ + "ld 9, 56(11)\n\t" /* arg7->r9 */ \ + "ld 10, 64(11)\n\t" /* arg8->r10 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)\n\t" /* restore tocptr */ \ + "addi 1,1,144" /* restore frame */ \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_12W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10,arg11,arg12) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+12]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + _argvec[2+7] = (unsigned long)arg7; \ + _argvec[2+8] = (unsigned long)arg8; \ + _argvec[2+9] = (unsigned long)arg9; \ + _argvec[2+10] = (unsigned long)arg10; \ + _argvec[2+11] = (unsigned long)arg11; \ + _argvec[2+12] = (unsigned long)arg12; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "addi 1,1,-144\n\t" /* expand stack frame */ \ + /* arg12 */ \ + "ld 3,96(11)\n\t" \ + "std 3,136(1)\n\t" \ + /* arg11 */ \ + "ld 3,88(11)\n\t" \ + "std 3,128(1)\n\t" \ + /* arg10 */ \ + "ld 3,80(11)\n\t" \ + "std 3,120(1)\n\t" \ + /* arg9 */ \ + "ld 3,72(11)\n\t" \ + "std 3,112(1)\n\t" \ + /* args1-8 */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 6, 32(11)\n\t" /* arg4->r6 */ \ + "ld 7, 40(11)\n\t" /* arg5->r7 */ \ + "ld 8, 48(11)\n\t" /* arg6->r8 */ \ + "ld 9, 56(11)\n\t" /* arg7->r9 */ \ + "ld 10, 64(11)\n\t" /* arg8->r10 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)\n\t" /* restore tocptr */ \ + "addi 1,1,144" /* restore frame */ \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#endif /* PLAT_ppc64_linux */ + +/* ------------------------ ppc32-aix5 ------------------------- */ + +#if defined(PLAT_ppc32_aix5) + +/* ARGREGS: r3 r4 r5 r6 r7 r8 r9 r10 (the rest on stack somewhere) */ + +/* These regs are trashed by the hidden call. */ +#define __CALLER_SAVED_REGS \ + "lr", "ctr", "xer", \ + "cr0", "cr1", "cr2", "cr3", "cr4", "cr5", "cr6", "cr7", \ + "r0", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", \ + "r11", "r12", "r13" + +/* Expand the stack frame, copying enough info that unwinding + still works. Trashes r3. */ + +#define VG_EXPAND_FRAME_BY_trashes_r3(_n_fr) \ + "addi 1,1,-" #_n_fr "\n\t" \ + "lwz 3," #_n_fr "(1)\n\t" \ + "stw 3,0(1)\n\t" + +#define VG_CONTRACT_FRAME_BY(_n_fr) \ + "addi 1,1," #_n_fr "\n\t" + +/* These CALL_FN_ macros assume that on ppc32-aix5, sizeof(unsigned + long) == 4. */ + +#define CALL_FN_W_v(lval, orig) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+0]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "stw 2,-8(11)\n\t" /* save tocptr */ \ + "lwz 2,-4(11)\n\t" /* use nraddr's tocptr */ \ + "lwz 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "lwz 2,-8(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_W(lval, orig, arg1) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+1]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "stw 2,-8(11)\n\t" /* save tocptr */ \ + "lwz 2,-4(11)\n\t" /* use nraddr's tocptr */ \ + "lwz 3, 4(11)\n\t" /* arg1->r3 */ \ + "lwz 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "lwz 2,-8(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WW(lval, orig, arg1,arg2) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+2]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "stw 2,-8(11)\n\t" /* save tocptr */ \ + "lwz 2,-4(11)\n\t" /* use nraddr's tocptr */ \ + "lwz 3, 4(11)\n\t" /* arg1->r3 */ \ + "lwz 4, 8(11)\n\t" /* arg2->r4 */ \ + "lwz 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "lwz 2,-8(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WWW(lval, orig, arg1,arg2,arg3) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+3]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "stw 2,-8(11)\n\t" /* save tocptr */ \ + "lwz 2,-4(11)\n\t" /* use nraddr's tocptr */ \ + "lwz 3, 4(11)\n\t" /* arg1->r3 */ \ + "lwz 4, 8(11)\n\t" /* arg2->r4 */ \ + "lwz 5, 12(11)\n\t" /* arg3->r5 */ \ + "lwz 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "lwz 2,-8(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WWWW(lval, orig, arg1,arg2,arg3,arg4) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+4]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "stw 2,-8(11)\n\t" /* save tocptr */ \ + "lwz 2,-4(11)\n\t" /* use nraddr's tocptr */ \ + "lwz 3, 4(11)\n\t" /* arg1->r3 */ \ + "lwz 4, 8(11)\n\t" /* arg2->r4 */ \ + "lwz 5, 12(11)\n\t" /* arg3->r5 */ \ + "lwz 6, 16(11)\n\t" /* arg4->r6 */ \ + "lwz 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "lwz 2,-8(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_5W(lval, orig, arg1,arg2,arg3,arg4,arg5) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+5]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "stw 2,-8(11)\n\t" /* save tocptr */ \ + "lwz 2,-4(11)\n\t" /* use nraddr's tocptr */ \ + "lwz 3, 4(11)\n\t" /* arg1->r3 */ \ + "lwz 4, 8(11)\n\t" /* arg2->r4 */ \ + "lwz 5, 12(11)\n\t" /* arg3->r5 */ \ + "lwz 6, 16(11)\n\t" /* arg4->r6 */ \ + "lwz 7, 20(11)\n\t" /* arg5->r7 */ \ + "lwz 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "lwz 2,-8(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_6W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+6]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "stw 2,-8(11)\n\t" /* save tocptr */ \ + "lwz 2,-4(11)\n\t" /* use nraddr's tocptr */ \ + "lwz 3, 4(11)\n\t" /* arg1->r3 */ \ + "lwz 4, 8(11)\n\t" /* arg2->r4 */ \ + "lwz 5, 12(11)\n\t" /* arg3->r5 */ \ + "lwz 6, 16(11)\n\t" /* arg4->r6 */ \ + "lwz 7, 20(11)\n\t" /* arg5->r7 */ \ + "lwz 8, 24(11)\n\t" /* arg6->r8 */ \ + "lwz 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "lwz 2,-8(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_7W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+7]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + _argvec[2+7] = (unsigned long)arg7; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "stw 2,-8(11)\n\t" /* save tocptr */ \ + "lwz 2,-4(11)\n\t" /* use nraddr's tocptr */ \ + "lwz 3, 4(11)\n\t" /* arg1->r3 */ \ + "lwz 4, 8(11)\n\t" /* arg2->r4 */ \ + "lwz 5, 12(11)\n\t" /* arg3->r5 */ \ + "lwz 6, 16(11)\n\t" /* arg4->r6 */ \ + "lwz 7, 20(11)\n\t" /* arg5->r7 */ \ + "lwz 8, 24(11)\n\t" /* arg6->r8 */ \ + "lwz 9, 28(11)\n\t" /* arg7->r9 */ \ + "lwz 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "lwz 2,-8(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_8W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+8]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + _argvec[2+7] = (unsigned long)arg7; \ + _argvec[2+8] = (unsigned long)arg8; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "stw 2,-8(11)\n\t" /* save tocptr */ \ + "lwz 2,-4(11)\n\t" /* use nraddr's tocptr */ \ + "lwz 3, 4(11)\n\t" /* arg1->r3 */ \ + "lwz 4, 8(11)\n\t" /* arg2->r4 */ \ + "lwz 5, 12(11)\n\t" /* arg3->r5 */ \ + "lwz 6, 16(11)\n\t" /* arg4->r6 */ \ + "lwz 7, 20(11)\n\t" /* arg5->r7 */ \ + "lwz 8, 24(11)\n\t" /* arg6->r8 */ \ + "lwz 9, 28(11)\n\t" /* arg7->r9 */ \ + "lwz 10, 32(11)\n\t" /* arg8->r10 */ \ + "lwz 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "lwz 2,-8(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_9W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+9]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + _argvec[2+7] = (unsigned long)arg7; \ + _argvec[2+8] = (unsigned long)arg8; \ + _argvec[2+9] = (unsigned long)arg9; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "stw 2,-8(11)\n\t" /* save tocptr */ \ + "lwz 2,-4(11)\n\t" /* use nraddr's tocptr */ \ + VG_EXPAND_FRAME_BY_trashes_r3(64) \ + /* arg9 */ \ + "lwz 3,36(11)\n\t" \ + "stw 3,56(1)\n\t" \ + /* args1-8 */ \ + "lwz 3, 4(11)\n\t" /* arg1->r3 */ \ + "lwz 4, 8(11)\n\t" /* arg2->r4 */ \ + "lwz 5, 12(11)\n\t" /* arg3->r5 */ \ + "lwz 6, 16(11)\n\t" /* arg4->r6 */ \ + "lwz 7, 20(11)\n\t" /* arg5->r7 */ \ + "lwz 8, 24(11)\n\t" /* arg6->r8 */ \ + "lwz 9, 28(11)\n\t" /* arg7->r9 */ \ + "lwz 10, 32(11)\n\t" /* arg8->r10 */ \ + "lwz 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "lwz 2,-8(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(64) \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_10W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+10]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + _argvec[2+7] = (unsigned long)arg7; \ + _argvec[2+8] = (unsigned long)arg8; \ + _argvec[2+9] = (unsigned long)arg9; \ + _argvec[2+10] = (unsigned long)arg10; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "stw 2,-8(11)\n\t" /* save tocptr */ \ + "lwz 2,-4(11)\n\t" /* use nraddr's tocptr */ \ + VG_EXPAND_FRAME_BY_trashes_r3(64) \ + /* arg10 */ \ + "lwz 3,40(11)\n\t" \ + "stw 3,60(1)\n\t" \ + /* arg9 */ \ + "lwz 3,36(11)\n\t" \ + "stw 3,56(1)\n\t" \ + /* args1-8 */ \ + "lwz 3, 4(11)\n\t" /* arg1->r3 */ \ + "lwz 4, 8(11)\n\t" /* arg2->r4 */ \ + "lwz 5, 12(11)\n\t" /* arg3->r5 */ \ + "lwz 6, 16(11)\n\t" /* arg4->r6 */ \ + "lwz 7, 20(11)\n\t" /* arg5->r7 */ \ + "lwz 8, 24(11)\n\t" /* arg6->r8 */ \ + "lwz 9, 28(11)\n\t" /* arg7->r9 */ \ + "lwz 10, 32(11)\n\t" /* arg8->r10 */ \ + "lwz 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "lwz 2,-8(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(64) \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_11W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10,arg11) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+11]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + _argvec[2+7] = (unsigned long)arg7; \ + _argvec[2+8] = (unsigned long)arg8; \ + _argvec[2+9] = (unsigned long)arg9; \ + _argvec[2+10] = (unsigned long)arg10; \ + _argvec[2+11] = (unsigned long)arg11; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "stw 2,-8(11)\n\t" /* save tocptr */ \ + "lwz 2,-4(11)\n\t" /* use nraddr's tocptr */ \ + VG_EXPAND_FRAME_BY_trashes_r3(72) \ + /* arg11 */ \ + "lwz 3,44(11)\n\t" \ + "stw 3,64(1)\n\t" \ + /* arg10 */ \ + "lwz 3,40(11)\n\t" \ + "stw 3,60(1)\n\t" \ + /* arg9 */ \ + "lwz 3,36(11)\n\t" \ + "stw 3,56(1)\n\t" \ + /* args1-8 */ \ + "lwz 3, 4(11)\n\t" /* arg1->r3 */ \ + "lwz 4, 8(11)\n\t" /* arg2->r4 */ \ + "lwz 5, 12(11)\n\t" /* arg3->r5 */ \ + "lwz 6, 16(11)\n\t" /* arg4->r6 */ \ + "lwz 7, 20(11)\n\t" /* arg5->r7 */ \ + "lwz 8, 24(11)\n\t" /* arg6->r8 */ \ + "lwz 9, 28(11)\n\t" /* arg7->r9 */ \ + "lwz 10, 32(11)\n\t" /* arg8->r10 */ \ + "lwz 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "lwz 2,-8(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(72) \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_12W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10,arg11,arg12) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+12]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + _argvec[2+7] = (unsigned long)arg7; \ + _argvec[2+8] = (unsigned long)arg8; \ + _argvec[2+9] = (unsigned long)arg9; \ + _argvec[2+10] = (unsigned long)arg10; \ + _argvec[2+11] = (unsigned long)arg11; \ + _argvec[2+12] = (unsigned long)arg12; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "stw 2,-8(11)\n\t" /* save tocptr */ \ + "lwz 2,-4(11)\n\t" /* use nraddr's tocptr */ \ + VG_EXPAND_FRAME_BY_trashes_r3(72) \ + /* arg12 */ \ + "lwz 3,48(11)\n\t" \ + "stw 3,68(1)\n\t" \ + /* arg11 */ \ + "lwz 3,44(11)\n\t" \ + "stw 3,64(1)\n\t" \ + /* arg10 */ \ + "lwz 3,40(11)\n\t" \ + "stw 3,60(1)\n\t" \ + /* arg9 */ \ + "lwz 3,36(11)\n\t" \ + "stw 3,56(1)\n\t" \ + /* args1-8 */ \ + "lwz 3, 4(11)\n\t" /* arg1->r3 */ \ + "lwz 4, 8(11)\n\t" /* arg2->r4 */ \ + "lwz 5, 12(11)\n\t" /* arg3->r5 */ \ + "lwz 6, 16(11)\n\t" /* arg4->r6 */ \ + "lwz 7, 20(11)\n\t" /* arg5->r7 */ \ + "lwz 8, 24(11)\n\t" /* arg6->r8 */ \ + "lwz 9, 28(11)\n\t" /* arg7->r9 */ \ + "lwz 10, 32(11)\n\t" /* arg8->r10 */ \ + "lwz 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "lwz 2,-8(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(72) \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#endif /* PLAT_ppc32_aix5 */ + +/* ------------------------ ppc64-aix5 ------------------------- */ + +#if defined(PLAT_ppc64_aix5) + +/* ARGREGS: r3 r4 r5 r6 r7 r8 r9 r10 (the rest on stack somewhere) */ + +/* These regs are trashed by the hidden call. */ +#define __CALLER_SAVED_REGS \ + "lr", "ctr", "xer", \ + "cr0", "cr1", "cr2", "cr3", "cr4", "cr5", "cr6", "cr7", \ + "r0", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", \ + "r11", "r12", "r13" + +/* Expand the stack frame, copying enough info that unwinding + still works. Trashes r3. */ + +#define VG_EXPAND_FRAME_BY_trashes_r3(_n_fr) \ + "addi 1,1,-" #_n_fr "\n\t" \ + "ld 3," #_n_fr "(1)\n\t" \ + "std 3,0(1)\n\t" + +#define VG_CONTRACT_FRAME_BY(_n_fr) \ + "addi 1,1," #_n_fr "\n\t" + +/* These CALL_FN_ macros assume that on ppc64-aix5, sizeof(unsigned + long) == 8. */ + +#define CALL_FN_W_v(lval, orig) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+0]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_W(lval, orig, arg1) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+1]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WW(lval, orig, arg1,arg2) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+2]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WWW(lval, orig, arg1,arg2,arg3) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+3]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_WWWW(lval, orig, arg1,arg2,arg3,arg4) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+4]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 6, 32(11)\n\t" /* arg4->r6 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_5W(lval, orig, arg1,arg2,arg3,arg4,arg5) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+5]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 6, 32(11)\n\t" /* arg4->r6 */ \ + "ld 7, 40(11)\n\t" /* arg5->r7 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_6W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+6]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 6, 32(11)\n\t" /* arg4->r6 */ \ + "ld 7, 40(11)\n\t" /* arg5->r7 */ \ + "ld 8, 48(11)\n\t" /* arg6->r8 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_7W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+7]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + _argvec[2+7] = (unsigned long)arg7; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 6, 32(11)\n\t" /* arg4->r6 */ \ + "ld 7, 40(11)\n\t" /* arg5->r7 */ \ + "ld 8, 48(11)\n\t" /* arg6->r8 */ \ + "ld 9, 56(11)\n\t" /* arg7->r9 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_8W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+8]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + _argvec[2+7] = (unsigned long)arg7; \ + _argvec[2+8] = (unsigned long)arg8; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 6, 32(11)\n\t" /* arg4->r6 */ \ + "ld 7, 40(11)\n\t" /* arg5->r7 */ \ + "ld 8, 48(11)\n\t" /* arg6->r8 */ \ + "ld 9, 56(11)\n\t" /* arg7->r9 */ \ + "ld 10, 64(11)\n\t" /* arg8->r10 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_9W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+9]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + _argvec[2+7] = (unsigned long)arg7; \ + _argvec[2+8] = (unsigned long)arg8; \ + _argvec[2+9] = (unsigned long)arg9; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + VG_EXPAND_FRAME_BY_trashes_r3(128) \ + /* arg9 */ \ + "ld 3,72(11)\n\t" \ + "std 3,112(1)\n\t" \ + /* args1-8 */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 6, 32(11)\n\t" /* arg4->r6 */ \ + "ld 7, 40(11)\n\t" /* arg5->r7 */ \ + "ld 8, 48(11)\n\t" /* arg6->r8 */ \ + "ld 9, 56(11)\n\t" /* arg7->r9 */ \ + "ld 10, 64(11)\n\t" /* arg8->r10 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(128) \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_10W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+10]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + _argvec[2+7] = (unsigned long)arg7; \ + _argvec[2+8] = (unsigned long)arg8; \ + _argvec[2+9] = (unsigned long)arg9; \ + _argvec[2+10] = (unsigned long)arg10; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + VG_EXPAND_FRAME_BY_trashes_r3(128) \ + /* arg10 */ \ + "ld 3,80(11)\n\t" \ + "std 3,120(1)\n\t" \ + /* arg9 */ \ + "ld 3,72(11)\n\t" \ + "std 3,112(1)\n\t" \ + /* args1-8 */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 6, 32(11)\n\t" /* arg4->r6 */ \ + "ld 7, 40(11)\n\t" /* arg5->r7 */ \ + "ld 8, 48(11)\n\t" /* arg6->r8 */ \ + "ld 9, 56(11)\n\t" /* arg7->r9 */ \ + "ld 10, 64(11)\n\t" /* arg8->r10 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(128) \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_11W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10,arg11) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+11]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + _argvec[2+7] = (unsigned long)arg7; \ + _argvec[2+8] = (unsigned long)arg8; \ + _argvec[2+9] = (unsigned long)arg9; \ + _argvec[2+10] = (unsigned long)arg10; \ + _argvec[2+11] = (unsigned long)arg11; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + VG_EXPAND_FRAME_BY_trashes_r3(144) \ + /* arg11 */ \ + "ld 3,88(11)\n\t" \ + "std 3,128(1)\n\t" \ + /* arg10 */ \ + "ld 3,80(11)\n\t" \ + "std 3,120(1)\n\t" \ + /* arg9 */ \ + "ld 3,72(11)\n\t" \ + "std 3,112(1)\n\t" \ + /* args1-8 */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 6, 32(11)\n\t" /* arg4->r6 */ \ + "ld 7, 40(11)\n\t" /* arg5->r7 */ \ + "ld 8, 48(11)\n\t" /* arg6->r8 */ \ + "ld 9, 56(11)\n\t" /* arg7->r9 */ \ + "ld 10, 64(11)\n\t" /* arg8->r10 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(144) \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#define CALL_FN_W_12W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \ + arg7,arg8,arg9,arg10,arg11,arg12) \ + do { \ + volatile OrigFn _orig = (orig); \ + volatile unsigned long _argvec[3+12]; \ + volatile unsigned long _res; \ + /* _argvec[0] holds current r2 across the call */ \ + _argvec[1] = (unsigned long)_orig.r2; \ + _argvec[2] = (unsigned long)_orig.nraddr; \ + _argvec[2+1] = (unsigned long)arg1; \ + _argvec[2+2] = (unsigned long)arg2; \ + _argvec[2+3] = (unsigned long)arg3; \ + _argvec[2+4] = (unsigned long)arg4; \ + _argvec[2+5] = (unsigned long)arg5; \ + _argvec[2+6] = (unsigned long)arg6; \ + _argvec[2+7] = (unsigned long)arg7; \ + _argvec[2+8] = (unsigned long)arg8; \ + _argvec[2+9] = (unsigned long)arg9; \ + _argvec[2+10] = (unsigned long)arg10; \ + _argvec[2+11] = (unsigned long)arg11; \ + _argvec[2+12] = (unsigned long)arg12; \ + __asm__ volatile( \ + "mr 11,%1\n\t" \ + VG_EXPAND_FRAME_BY_trashes_r3(512) \ + "std 2,-16(11)\n\t" /* save tocptr */ \ + "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \ + VG_EXPAND_FRAME_BY_trashes_r3(144) \ + /* arg12 */ \ + "ld 3,96(11)\n\t" \ + "std 3,136(1)\n\t" \ + /* arg11 */ \ + "ld 3,88(11)\n\t" \ + "std 3,128(1)\n\t" \ + /* arg10 */ \ + "ld 3,80(11)\n\t" \ + "std 3,120(1)\n\t" \ + /* arg9 */ \ + "ld 3,72(11)\n\t" \ + "std 3,112(1)\n\t" \ + /* args1-8 */ \ + "ld 3, 8(11)\n\t" /* arg1->r3 */ \ + "ld 4, 16(11)\n\t" /* arg2->r4 */ \ + "ld 5, 24(11)\n\t" /* arg3->r5 */ \ + "ld 6, 32(11)\n\t" /* arg4->r6 */ \ + "ld 7, 40(11)\n\t" /* arg5->r7 */ \ + "ld 8, 48(11)\n\t" /* arg6->r8 */ \ + "ld 9, 56(11)\n\t" /* arg7->r9 */ \ + "ld 10, 64(11)\n\t" /* arg8->r10 */ \ + "ld 11, 0(11)\n\t" /* target->r11 */ \ + VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \ + "mr 11,%1\n\t" \ + "mr %0,3\n\t" \ + "ld 2,-16(11)\n\t" /* restore tocptr */ \ + VG_CONTRACT_FRAME_BY(144) \ + VG_CONTRACT_FRAME_BY(512) \ + : /*out*/ "=r" (_res) \ + : /*in*/ "r" (&_argvec[2]) \ + : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \ + ); \ + lval = (__typeof__(lval)) _res; \ + } while (0) + +#endif /* PLAT_ppc64_aix5 */ + + +/* ------------------------------------------------------------------ */ +/* ARCHITECTURE INDEPENDENT MACROS for CLIENT REQUESTS. */ +/* */ +/* ------------------------------------------------------------------ */ + +/* Some request codes. There are many more of these, but most are not + exposed to end-user view. These are the public ones, all of the + form 0x1000 + small_number. + + Core ones are in the range 0x00000000--0x0000ffff. The non-public + ones start at 0x2000. +*/ + +/* These macros are used by tools -- they must be public, but don't + embed them into other programs. */ +#define VG_USERREQ_TOOL_BASE(a,b) \ + ((unsigned int)(((a)&0xff) << 24 | ((b)&0xff) << 16)) +#define VG_IS_TOOL_USERREQ(a, b, v) \ + (VG_USERREQ_TOOL_BASE(a,b) == ((v) & 0xffff0000)) + +/* !! ABIWARNING !! ABIWARNING !! ABIWARNING !! ABIWARNING !! + This enum comprises an ABI exported by Valgrind to programs + which use client requests. DO NOT CHANGE THE ORDER OF THESE + ENTRIES, NOR DELETE ANY -- add new ones at the end. */ +typedef + enum { VG_USERREQ__RUNNING_ON_VALGRIND = 0x1001, + VG_USERREQ__DISCARD_TRANSLATIONS = 0x1002, + + /* These allow any function to be called from the simulated + CPU but run on the real CPU. Nb: the first arg passed to + the function is always the ThreadId of the running + thread! So CLIENT_CALL0 actually requires a 1 arg + function, etc. */ + VG_USERREQ__CLIENT_CALL0 = 0x1101, + VG_USERREQ__CLIENT_CALL1 = 0x1102, + VG_USERREQ__CLIENT_CALL2 = 0x1103, + VG_USERREQ__CLIENT_CALL3 = 0x1104, + + /* Can be useful in regression testing suites -- eg. can + send Valgrind's output to /dev/null and still count + errors. */ + VG_USERREQ__COUNT_ERRORS = 0x1201, + + /* These are useful and can be interpreted by any tool that + tracks malloc() et al, by using vg_replace_malloc.c. */ + VG_USERREQ__MALLOCLIKE_BLOCK = 0x1301, + VG_USERREQ__FREELIKE_BLOCK = 0x1302, + /* Memory pool support. */ + VG_USERREQ__CREATE_MEMPOOL = 0x1303, + VG_USERREQ__DESTROY_MEMPOOL = 0x1304, + VG_USERREQ__MEMPOOL_ALLOC = 0x1305, + VG_USERREQ__MEMPOOL_FREE = 0x1306, + VG_USERREQ__MEMPOOL_TRIM = 0x1307, + VG_USERREQ__MOVE_MEMPOOL = 0x1308, + VG_USERREQ__MEMPOOL_CHANGE = 0x1309, + VG_USERREQ__MEMPOOL_EXISTS = 0x130a, + + /* Allow printfs to valgrind log. */ + VG_USERREQ__PRINTF = 0x1401, + VG_USERREQ__PRINTF_BACKTRACE = 0x1402, + + /* Stack support. */ + VG_USERREQ__STACK_REGISTER = 0x1501, + VG_USERREQ__STACK_DEREGISTER = 0x1502, + VG_USERREQ__STACK_CHANGE = 0x1503 + } Vg_ClientRequest; + +#if !defined(__GNUC__) +# define __extension__ /* */ +#endif + +/* Returns the number of Valgrinds this code is running under. That + is, 0 if running natively, 1 if running under Valgrind, 2 if + running under Valgrind which is running under another Valgrind, + etc. */ +#define RUNNING_ON_VALGRIND __extension__ \ + ({unsigned int _qzz_res; \ + VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0 /* if not */, \ + VG_USERREQ__RUNNING_ON_VALGRIND, \ + 0, 0, 0, 0, 0); \ + _qzz_res; \ + }) + + +/* Discard translation of code in the range [_qzz_addr .. _qzz_addr + + _qzz_len - 1]. Useful if you are debugging a JITter or some such, + since it provides a way to make sure valgrind will retranslate the + invalidated area. Returns no value. */ +#define VALGRIND_DISCARD_TRANSLATIONS(_qzz_addr,_qzz_len) \ + {unsigned int _qzz_res; \ + VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \ + VG_USERREQ__DISCARD_TRANSLATIONS, \ + _qzz_addr, _qzz_len, 0, 0, 0); \ + } + + +/* These requests are for getting Valgrind itself to print something. + Possibly with a backtrace. This is a really ugly hack. */ + +#if defined(NVALGRIND) + +# define VALGRIND_PRINTF(...) +# define VALGRIND_PRINTF_BACKTRACE(...) + +#else /* NVALGRIND */ + +/* Modern GCC will optimize the static routine out if unused, + and unused attribute will shut down warnings about it. */ +static int VALGRIND_PRINTF(const char *format, ...) + __attribute__((format(__printf__, 1, 2), __unused__)); +static int +VALGRIND_PRINTF(const char *format, ...) +{ + unsigned long _qzz_res; + va_list vargs; + va_start(vargs, format); + VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, VG_USERREQ__PRINTF, + (unsigned long)format, (unsigned long)vargs, + 0, 0, 0); + va_end(vargs); + return (int)_qzz_res; +} + +static int VALGRIND_PRINTF_BACKTRACE(const char *format, ...) + __attribute__((format(__printf__, 1, 2), __unused__)); +static int +VALGRIND_PRINTF_BACKTRACE(const char *format, ...) +{ + unsigned long _qzz_res; + va_list vargs; + va_start(vargs, format); + VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, VG_USERREQ__PRINTF_BACKTRACE, + (unsigned long)format, (unsigned long)vargs, + 0, 0, 0); + va_end(vargs); + return (int)_qzz_res; +} + +#endif /* NVALGRIND */ + + +/* These requests allow control to move from the simulated CPU to the + real CPU, calling an arbitary function. + + Note that the current ThreadId is inserted as the first argument. + So this call: + + VALGRIND_NON_SIMD_CALL2(f, arg1, arg2) + + requires f to have this signature: + + Word f(Word tid, Word arg1, Word arg2) + + where "Word" is a word-sized type. + + Note that these client requests are not entirely reliable. For example, + if you call a function with them that subsequently calls printf(), + there's a high chance Valgrind will crash. Generally, your prospects of + these working are made higher if the called function does not refer to + any global variables, and does not refer to any libc or other functions + (printf et al). Any kind of entanglement with libc or dynamic linking is + likely to have a bad outcome, for tricky reasons which we've grappled + with a lot in the past. +*/ +#define VALGRIND_NON_SIMD_CALL0(_qyy_fn) \ + __extension__ \ + ({unsigned long _qyy_res; \ + VALGRIND_DO_CLIENT_REQUEST(_qyy_res, 0 /* default return */, \ + VG_USERREQ__CLIENT_CALL0, \ + _qyy_fn, \ + 0, 0, 0, 0); \ + _qyy_res; \ + }) + +#define VALGRIND_NON_SIMD_CALL1(_qyy_fn, _qyy_arg1) \ + __extension__ \ + ({unsigned long _qyy_res; \ + VALGRIND_DO_CLIENT_REQUEST(_qyy_res, 0 /* default return */, \ + VG_USERREQ__CLIENT_CALL1, \ + _qyy_fn, \ + _qyy_arg1, 0, 0, 0); \ + _qyy_res; \ + }) + +#define VALGRIND_NON_SIMD_CALL2(_qyy_fn, _qyy_arg1, _qyy_arg2) \ + __extension__ \ + ({unsigned long _qyy_res; \ + VALGRIND_DO_CLIENT_REQUEST(_qyy_res, 0 /* default return */, \ + VG_USERREQ__CLIENT_CALL2, \ + _qyy_fn, \ + _qyy_arg1, _qyy_arg2, 0, 0); \ + _qyy_res; \ + }) + +#define VALGRIND_NON_SIMD_CALL3(_qyy_fn, _qyy_arg1, _qyy_arg2, _qyy_arg3) \ + __extension__ \ + ({unsigned long _qyy_res; \ + VALGRIND_DO_CLIENT_REQUEST(_qyy_res, 0 /* default return */, \ + VG_USERREQ__CLIENT_CALL3, \ + _qyy_fn, \ + _qyy_arg1, _qyy_arg2, \ + _qyy_arg3, 0); \ + _qyy_res; \ + }) + + +/* Counts the number of errors that have been recorded by a tool. Nb: + the tool must record the errors with VG_(maybe_record_error)() or + VG_(unique_error)() for them to be counted. */ +#define VALGRIND_COUNT_ERRORS \ + __extension__ \ + ({unsigned int _qyy_res; \ + VALGRIND_DO_CLIENT_REQUEST(_qyy_res, 0 /* default return */, \ + VG_USERREQ__COUNT_ERRORS, \ + 0, 0, 0, 0, 0); \ + _qyy_res; \ + }) + +/* Mark a block of memory as having been allocated by a malloc()-like + function. `addr' is the start of the usable block (ie. after any + redzone) `rzB' is redzone size if the allocator can apply redzones; + use '0' if not. Adding redzones makes it more likely Valgrind will spot + block overruns. `is_zeroed' indicates if the memory is zeroed, as it is + for calloc(). Put it immediately after the point where a block is + allocated. + + If you're using Memcheck: If you're allocating memory via superblocks, + and then handing out small chunks of each superblock, if you don't have + redzones on your small blocks, it's worth marking the superblock with + VALGRIND_MAKE_MEM_NOACCESS when it's created, so that block overruns are + detected. But if you can put redzones on, it's probably better to not do + this, so that messages for small overruns are described in terms of the + small block rather than the superblock (but if you have a big overrun + that skips over a redzone, you could miss an error this way). See + memcheck/tests/custom_alloc.c for an example. + + WARNING: if your allocator uses malloc() or 'new' to allocate + superblocks, rather than mmap() or brk(), this will not work properly -- + you'll likely get assertion failures during leak detection. This is + because Valgrind doesn't like seeing overlapping heap blocks. Sorry. + + Nb: block must be freed via a free()-like function specified + with VALGRIND_FREELIKE_BLOCK or mismatch errors will occur. */ +#define VALGRIND_MALLOCLIKE_BLOCK(addr, sizeB, rzB, is_zeroed) \ + {unsigned int _qzz_res; \ + VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \ + VG_USERREQ__MALLOCLIKE_BLOCK, \ + addr, sizeB, rzB, is_zeroed, 0); \ + } + +/* Mark a block of memory as having been freed by a free()-like function. + `rzB' is redzone size; it must match that given to + VALGRIND_MALLOCLIKE_BLOCK. Memory not freed will be detected by the leak + checker. Put it immediately after the point where the block is freed. */ +#define VALGRIND_FREELIKE_BLOCK(addr, rzB) \ + {unsigned int _qzz_res; \ + VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \ + VG_USERREQ__FREELIKE_BLOCK, \ + addr, rzB, 0, 0, 0); \ + } + +/* Create a memory pool. */ +#define VALGRIND_CREATE_MEMPOOL(pool, rzB, is_zeroed) \ + {unsigned int _qzz_res; \ + VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \ + VG_USERREQ__CREATE_MEMPOOL, \ + pool, rzB, is_zeroed, 0, 0); \ + } + +/* Destroy a memory pool. */ +#define VALGRIND_DESTROY_MEMPOOL(pool) \ + {unsigned int _qzz_res; \ + VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \ + VG_USERREQ__DESTROY_MEMPOOL, \ + pool, 0, 0, 0, 0); \ + } + +/* Associate a piece of memory with a memory pool. */ +#define VALGRIND_MEMPOOL_ALLOC(pool, addr, size) \ + {unsigned int _qzz_res; \ + VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \ + VG_USERREQ__MEMPOOL_ALLOC, \ + pool, addr, size, 0, 0); \ + } + +/* Disassociate a piece of memory from a memory pool. */ +#define VALGRIND_MEMPOOL_FREE(pool, addr) \ + {unsigned int _qzz_res; \ + VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \ + VG_USERREQ__MEMPOOL_FREE, \ + pool, addr, 0, 0, 0); \ + } + +/* Disassociate any pieces outside a particular range. */ +#define VALGRIND_MEMPOOL_TRIM(pool, addr, size) \ + {unsigned int _qzz_res; \ + VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \ + VG_USERREQ__MEMPOOL_TRIM, \ + pool, addr, size, 0, 0); \ + } + +/* Resize and/or move a piece associated with a memory pool. */ +#define VALGRIND_MOVE_MEMPOOL(poolA, poolB) \ + {unsigned int _qzz_res; \ + VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \ + VG_USERREQ__MOVE_MEMPOOL, \ + poolA, poolB, 0, 0, 0); \ + } + +/* Resize and/or move a piece associated with a memory pool. */ +#define VALGRIND_MEMPOOL_CHANGE(pool, addrA, addrB, size) \ + {unsigned int _qzz_res; \ + VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \ + VG_USERREQ__MEMPOOL_CHANGE, \ + pool, addrA, addrB, size, 0); \ + } + +/* Return 1 if a mempool exists, else 0. */ +#define VALGRIND_MEMPOOL_EXISTS(pool) \ + ({unsigned int _qzz_res; \ + VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \ + VG_USERREQ__MEMPOOL_EXISTS, \ + pool, 0, 0, 0, 0); \ + _qzz_res; \ + }) + +/* Mark a piece of memory as being a stack. Returns a stack id. */ +#define VALGRIND_STACK_REGISTER(start, end) \ + ({unsigned int _qzz_res; \ + VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \ + VG_USERREQ__STACK_REGISTER, \ + start, end, 0, 0, 0); \ + _qzz_res; \ + }) + +/* Unmark the piece of memory associated with a stack id as being a + stack. */ +#define VALGRIND_STACK_DEREGISTER(id) \ + {unsigned int _qzz_res; \ + VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \ + VG_USERREQ__STACK_DEREGISTER, \ + id, 0, 0, 0, 0); \ + } + +/* Change the start and end address of the stack id. */ +#define VALGRIND_STACK_CHANGE(id, start, end) \ + {unsigned int _qzz_res; \ + VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \ + VG_USERREQ__STACK_CHANGE, \ + id, start, end, 0, 0); \ + } + + +#undef PLAT_x86_linux +#undef PLAT_amd64_linux +#undef PLAT_ppc32_linux +#undef PLAT_ppc64_linux +#undef PLAT_ppc32_aix5 +#undef PLAT_ppc64_aix5 + +#endif /* __VALGRIND_H */ diff --git a/third_party/tcmalloc/chromium/src/thread_cache.h b/third_party/tcmalloc/chromium/src/thread_cache.h index 4c6a233..1165447 100644 --- a/third_party/tcmalloc/chromium/src/thread_cache.h +++ b/third_party/tcmalloc/chromium/src/thread_cache.h @@ -79,7 +79,9 @@ class ThreadCache { // Total byte size in cache size_t Size() const { return size_; } - void* Allocate(size_t size); + // Allocate an object of the given size and class. The size given + // must be the same as the size of the class in the size map. + void* Allocate(size_t size, size_t cl); void Deallocate(void* ptr, size_t size_class); void Scavenge(); @@ -293,15 +295,18 @@ class ThreadCache { // across all ThreadCaches. Protected by Static::pageheap_lock. static ssize_t unclaimed_cache_space_; - // Warning: the offset of list_ affects performance. On general - // principles, we don't like list_[x] to span multiple L1 cache - // lines. However, merely placing list_ at offset 0 here seems to - // cause cache conflicts. + // This class is laid out with the most frequently used fields + // first so that hot elements are placed on the same cache line. size_t size_; // Combined size of data size_t max_size_; // size_ > max_size_ --> Scavenge() - pthread_t tid_; // Which thread owns it + + // We sample allocations, biased by the size of the allocation + Sampler sampler_; // A sampler + FreeList list_[kNumClasses]; // Array indexed by size-class + + pthread_t tid_; // Which thread owns it bool in_setspecific_; // In call to pthread_setspecific? // Allocate a new heap. REQUIRES: Static::pageheap_lock is held. @@ -313,9 +318,10 @@ class ThreadCache { static void DeleteCache(ThreadCache* heap); static void RecomputePerThreadCacheSize(); - // We sample allocations, biased by the size of the allocation - Sampler sampler_; // A sampler -}; + // Ensure that this class is cacheline-aligned. This is critical for + // performance, as false sharing would negate many of the benefits + // of a per-thread cache. +} CACHELINE_ALIGNED; // Allocator for thread heaps // This is logically part of the ThreadCache class, but MSVC, at @@ -331,15 +337,15 @@ inline bool ThreadCache::SampleAllocation(size_t k) { return sampler_.SampleAllocation(k); } -inline void* ThreadCache::Allocate(size_t size) { +inline void* ThreadCache::Allocate(size_t size, size_t cl) { ASSERT(size <= kMaxSize); - const size_t cl = Static::sizemap()->SizeClass(size); - const size_t alloc_size = Static::sizemap()->ByteSizeForClass(cl); + ASSERT(size == Static::sizemap()->ByteSizeForClass(cl)); + FreeList* list = &list_[cl]; if (list->empty()) { - return FetchFromCentralCache(cl, alloc_size); + return FetchFromCentralCache(cl, size); } - size_ -= alloc_size; + size_ -= size; return list->Pop(); } diff --git a/third_party/tcmalloc/chromium/src/windows/addr2line-pdb.c b/third_party/tcmalloc/chromium/src/windows/addr2line-pdb.c index 97b614b..5c65a03 100644 --- a/third_party/tcmalloc/chromium/src/windows/addr2line-pdb.c +++ b/third_party/tcmalloc/chromium/src/windows/addr2line-pdb.c @@ -48,6 +48,12 @@ #define SEARCH_CAP (1024*1024) #define WEBSYM "SRV*c:\\websymbols*http://msdl.microsoft.com/download/symbols" +void usage() { + fprintf(stderr, "usage: " + "addr2line-pdb [-f|--functions] [-C|--demangle] [-e filename]\n"); + fprintf(stderr, "(Then list the hex addresses on stdin, one per line)\n"); +} + int main(int argc, char *argv[]) { DWORD error; HANDLE process; @@ -74,10 +80,11 @@ int main(int argc, char *argv[]) { } filename = argv[i+1]; i++; /* to skip over filename too */ + } else if (strcmp(argv[i], "--help") == 0) { + usage(); + exit(0); } else { - fprintf(stderr, "usage: " - "addr2line-pdb [-f|--functions] [-C|--demangle] [-e filename]\n"); - fprintf(stderr, "(Then list the hex addresses on stdin, one per line)\n"); + usage(); exit(1); } } diff --git a/third_party/tcmalloc/chromium/src/windows/config.h b/third_party/tcmalloc/chromium/src/windows/config.h index 99de82c..b5d9bb6 100644 --- a/third_party/tcmalloc/chromium/src/windows/config.h +++ b/third_party/tcmalloc/chromium/src/windows/config.h @@ -261,10 +261,12 @@ // --------------------------------------------------------------------- // Extra stuff not found in config.h.in -// This must be defined before the windows.h is included. It's needed -// for mutex.h, to give access to the TryLock method. +// This must be defined before the windows.h is included. We need at +// least 0x0400 for mutex.h to have access to TryLock, and at least +// 0x0501 for patch_functions.cc to have access to GetModuleHandleEx. +// (This latter is an optimization we could take out if need be.) #ifndef _WIN32_WINNT -# define _WIN32_WINNT 0x0400 +# define _WIN32_WINNT 0x0501 #endif // We want to make sure not to ever try to #include heap-checker.h diff --git a/third_party/tcmalloc/chromium/src/windows/google/tcmalloc.h b/third_party/tcmalloc/chromium/src/windows/google/tcmalloc.h index 4b97b15..663b7f9 100644 --- a/third_party/tcmalloc/chromium/src/windows/google/tcmalloc.h +++ b/third_party/tcmalloc/chromium/src/windows/google/tcmalloc.h @@ -61,7 +61,8 @@ #endif #ifdef __cplusplus -#include <new> // for nothrow_t +#include <new> // for std::nothrow_t + extern "C" { #endif // Returns a human-readable version string. If major, minor, @@ -92,16 +93,15 @@ extern "C" { #ifdef __cplusplus PERFTOOLS_DLL_DECL int tc_set_new_mode(int flag) __THROW; PERFTOOLS_DLL_DECL void* tc_new(size_t size); - PERFTOOLS_DLL_DECL void tc_delete(void* p) __THROW; - PERFTOOLS_DLL_DECL void* tc_newarray(size_t size); - PERFTOOLS_DLL_DECL void tc_deletearray(void* p) __THROW; - PERFTOOLS_DLL_DECL void* tc_new_nothrow(size_t size, const std::nothrow_t&) __THROW; - PERFTOOLS_DLL_DECL void* tc_newarray_nothrow(size_t size, - const std::nothrow_t&) __THROW; + PERFTOOLS_DLL_DECL void tc_delete(void* p) __THROW; PERFTOOLS_DLL_DECL void tc_delete_nothrow(void* p, const std::nothrow_t&) __THROW; + PERFTOOLS_DLL_DECL void* tc_newarray(size_t size); + PERFTOOLS_DLL_DECL void* tc_newarray_nothrow(size_t size, + const std::nothrow_t&) __THROW; + PERFTOOLS_DLL_DECL void tc_deletearray(void* p) __THROW; PERFTOOLS_DLL_DECL void tc_deletearray_nothrow(void* p, const std::nothrow_t&) __THROW; } diff --git a/third_party/tcmalloc/chromium/src/windows/nm-pdb.c b/third_party/tcmalloc/chromium/src/windows/nm-pdb.c index 726d345..9beb21d 100644 --- a/third_party/tcmalloc/chromium/src/windows/nm-pdb.c +++ b/third_party/tcmalloc/chromium/src/windows/nm-pdb.c @@ -180,6 +180,10 @@ static void ShowSymbolInfo(HANDLE process, ULONG64 module_base) { #endif } +void usage() { + fprintf(stderr, "usage: nm-pdb [-C|--demangle] <module or filename>\n"); +} + int main(int argc, char *argv[]) { DWORD error; HANDLE process; @@ -195,12 +199,15 @@ int main(int argc, char *argv[]) { for (i = 1; i < argc; i++) { if (strcmp(argv[i], "--demangle") == 0 || strcmp(argv[i], "-C") == 0) { symopts |= SYMOPT_UNDNAME; + } else if (strcmp(argv[i], "--help") == 0) { + usage(); + exit(0); } else { break; } } if (i != argc - 1) { - fprintf(stderr, "usage: nm-pdb [-C|--demangle] <module or filename>\n"); + usage(); exit(1); } filename = argv[i]; diff --git a/third_party/tcmalloc/chromium/src/windows/patch_functions.cc b/third_party/tcmalloc/chromium/src/windows/patch_functions.cc index c1ed37f..deb841b 100644 --- a/third_party/tcmalloc/chromium/src/windows/patch_functions.cc +++ b/third_party/tcmalloc/chromium/src/windows/patch_functions.cc @@ -83,6 +83,7 @@ #endif #include <windows.h> +#include <stdio.h> #include <malloc.h> // for _msize and _expand #include <Psapi.h> // for EnumProcessModules, GetModuleInformation, etc. #include <set> @@ -96,8 +97,6 @@ // The maximum number of modules we allow to be in one executable const int kMaxModules = 8182; -// The maximum size of a module's basename -const int kMaxModuleNameSize = 256; // These are hard-coded, unfortunately. :-( They are also probably // compiler specific. See get_mangled_names.cc, in this directory, @@ -145,13 +144,15 @@ class LibcInfo { LibcInfo() { memset(this, 0, sizeof(*this)); // easiest way to initialize the array } - bool SameAs(const LibcInfo& that) const; - bool SameAsModuleEntry(const ModuleEntryCopy& module_entry) const; - - bool patched() const { return is_valid() && module_name_[0] != '\0'; } - const char* module_name() const { return is_valid() ? module_name_ : ""; } + bool patched() const { return is_valid(); } void set_is_valid(bool b) { is_valid_ = b; } + // According to http://msdn.microsoft.com/en-us/library/ms684229(VS.85).aspx: + // "The load address of a module (lpBaseOfDll) is the same as the HMODULE + // value." + HMODULE hmodule() const { + return reinterpret_cast<HMODULE>(const_cast<void*>(module_base_address_)); + } // Populates all the windows_fn_[] vars based on our module info. // Returns false if windows_fn_ is all NULL's, because there's @@ -167,7 +168,6 @@ class LibcInfo { memcpy(this->windows_fn_, that.windows_fn_, sizeof(windows_fn_)); this->module_base_address_ = that.module_base_address_; this->module_base_size_ = that.module_base_size_; - memcpy(this->module_name_, that.module_name_, sizeof(module_name_)); } enum { @@ -207,7 +207,6 @@ class LibcInfo { const void *module_base_address_; size_t module_base_size_; - char module_name_[kMaxModuleNameSize]; public: // These shouldn't have to be public, since only subclasses of @@ -285,10 +284,8 @@ template<int> class LibcInfoWithPatchFunctions : public LibcInfo { // This is a subset of MODDULEENTRY32, that we need for patching. struct ModuleEntryCopy { - LPVOID modBaseAddr; + LPVOID modBaseAddr; // the same as hmodule DWORD modBaseSize; - HMODULE hModule; - TCHAR szModule[kMaxModuleNameSize]; // This is not part of MODDULEENTRY32, but is needed to avoid making // windows syscalls while we're holding patch_all_modules_lock (see // lock-inversion comments at patch_all_modules_lock definition, below). @@ -297,26 +294,16 @@ struct ModuleEntryCopy { ModuleEntryCopy() { modBaseAddr = NULL; modBaseSize = 0; - hModule = NULL; - strcpy(szModule, "<executable>"); for (int i = 0; i < sizeof(rgProcAddresses)/sizeof(*rgProcAddresses); i++) rgProcAddresses[i] = LibcInfo::static_fn(i); } - ModuleEntryCopy(HANDLE hprocess, HMODULE hmodule, const MODULEINFO& mi) { + ModuleEntryCopy(const MODULEINFO& mi) { this->modBaseAddr = mi.lpBaseOfDll; this->modBaseSize = mi.SizeOfImage; - this->hModule = hmodule; - // TODO(csilvers): we could make more efficient by calling these - // lazily (not until the vars are needed, which is often never). - // However, there's tricky business with calling windows functions - // inside the patch_all_modules_lock (see the lock inversion - // comments with the patch_all_modules_lock definition, below), so - // it's safest to do it all here, where no lock is needed. - ::GetModuleBaseNameA(hprocess, hmodule, - this->szModule, sizeof(this->szModule)); for (int i = 0; i < sizeof(rgProcAddresses)/sizeof(*rgProcAddresses); i++) - rgProcAddresses[i] = - (GenericFnPtr)::GetProcAddress(hModule, LibcInfo::function_name(i)); + rgProcAddresses[i] = (GenericFnPtr)::GetProcAddress( + reinterpret_cast<const HMODULE>(mi.lpBaseOfDll), + LibcInfo::function_name(i)); } }; @@ -479,18 +466,6 @@ const GenericFnPtr LibcInfoWithPatchFunctions<T>::perftools_fn_[] = { { "FreeLibrary", NULL, NULL, (GenericFnPtr)&Perftools_FreeLibrary }, }; -bool LibcInfo::SameAs(const LibcInfo& that) const { - return (is_valid() && - module_base_address_ == that.module_base_address_ && - module_base_size_ == that.module_base_size_); -} - -bool LibcInfo::SameAsModuleEntry(const ModuleEntryCopy& module_entry) const { - return (is_valid() && - module_base_address_ == module_entry.modBaseAddr && - module_base_size_ == module_entry.modBaseSize); -} - bool LibcInfo::PopulateWindowsFn(const ModuleEntryCopy& module_entry) { // First, store the location of the function to patch before // patching it. If none of these functions are found in the module, @@ -552,10 +527,9 @@ bool LibcInfo::PopulateWindowsFn(const ModuleEntryCopy& module_entry) { CHECK(windows_fn_[kFree]); CHECK(windows_fn_[kRealloc]); - // OK, we successfully patched. Let's store our member information. + // OK, we successfully populated. Let's store our member information. module_base_address_ = module_entry.modBaseAddr; module_base_size_ = module_entry.modBaseSize; - strcpy(module_name_, module_entry.szModule); return true; } @@ -636,14 +610,6 @@ void WindowsInfo::Unpatch() { // You should hold the patch_all_modules_lock when calling this. void PatchOneModuleLocked(const LibcInfo& me_info) { - // Double-check we haven't seen this module before. - for (int i = 0; i < sizeof(g_module_libcs)/sizeof(*g_module_libcs); i++) { - if (g_module_libcs[i]->SameAs(me_info)) { - fprintf(stderr, "%s:%d: FATAL PERFTOOLS ERROR: %s double-patched somehow.\n", - __FILE__, __LINE__, g_module_libcs[i]->module_name()); - CHECK(false); - } - } // If we don't already have info on this module, let's add it. This // is where we're sad that each libcX has a different type, so we // can't use an array; instead, we have to use a switch statement. @@ -686,52 +652,70 @@ void PatchMainExecutableLocked() { // patch_all_modules_lock, inside PatchAllModules(). static SpinLock patch_all_modules_lock(SpinLock::LINKER_INITIALIZED); +// last_loaded: The set of modules that were loaded the last time +// PatchAllModules was called. This is an optimization for only +// looking at modules that were added or removed from the last call. +static std::set<HMODULE> *g_last_loaded; + // Iterates over all the modules currently loaded by the executable, -// and makes sure they're all patched. For ones that aren't, we patch -// them in. We also check that every module we had patched in the -// past is still loaded, and update internal data structures if so. -// We return true if this PatchAllModules did any work, false else. +// according to windows, and makes sure they're all patched. Most +// modules will already be in loaded_modules, meaning we have already +// loaded and either patched them or determined they did not need to +// be patched. Others will not, which means we need to patch them +// (if necessary). Finally, we have to go through the existing +// g_module_libcs and see if any of those are *not* in the modules +// currently loaded by the executable. If so, we need to invalidate +// them. Returns true if we did any work (patching or invalidating), +// false if we were a noop. May update loaded_modules as well. +// NOTE: you must hold the patch_all_modules_lock to access loaded_modules. bool PatchAllModules() { std::vector<ModuleEntryCopy> modules; bool made_changes = false; const HANDLE hCurrentProcess = GetCurrentProcess(); - MODULEINFO mi; - DWORD cbNeeded = 0; + DWORD num_modules = 0; HMODULE hModules[kMaxModules]; // max # of modules we support in one process - if (::EnumProcessModules(hCurrentProcess, hModules, sizeof(hModules), - &cbNeeded)) { - for (int i = 0; i < cbNeeded / sizeof(*hModules); ++i) { - if (i >= kMaxModules) { - printf("PERFTOOLS ERROR: Too many modules in this executable to try" - " to patch them all (if you need to, raise kMaxModules in" - " patch_functions.cc).\n"); - break; - } - if (::GetModuleInformation(hCurrentProcess, hModules[i], &mi, sizeof(mi))) - modules.push_back(ModuleEntryCopy(hCurrentProcess, hModules[i], mi)); - } + if (!::EnumProcessModules(hCurrentProcess, hModules, sizeof(hModules), + &num_modules)) { + num_modules = 0; + } + // EnumProcessModules actually set the bytes written into hModules, + // so we need to divide to make num_modules actually be a module-count. + num_modules /= sizeof(*hModules); + if (num_modules >= kMaxModules) { + printf("PERFTOOLS ERROR: Too many modules in this executable to try" + " to patch them all (if you need to, raise kMaxModules in" + " patch_functions.cc).\n"); + num_modules = kMaxModules; } - // Now do the actual patching and unpatching. + // Now we handle the unpatching of modules we have in g_module_libcs + // but that were not found in EnumProcessModules. We need to + // invalidate them. To speed that up, we store the EnumProcessModules + // output in a set. + // At the same time, we prepare for the adding of new modules, by + // removing from hModules all the modules we know we've already + // patched (or decided don't need to be patched). At the end, + // hModules will hold only the modules that we need to consider patching. + std::set<HMODULE> currently_loaded_modules; { SpinLockHolder h(&patch_all_modules_lock); - for (int i = 0; i < sizeof(g_module_libcs)/sizeof(*g_module_libcs); i++) { - if (!g_module_libcs[i]->is_valid()) - continue; - bool still_loaded = false; - for (std::vector<ModuleEntryCopy>::iterator it = modules.begin(); - it != modules.end(); ++it) { - if (g_module_libcs[i]->SameAsModuleEntry(*it)) { - // Both g_module_libcs[i] and it are still valid. Mark it by - // removing it from the vector; mark g_module_libcs[i] by - // setting a bool. - modules.erase(it); - still_loaded = true; - break; - } + if (!g_last_loaded) g_last_loaded = new std::set<HMODULE>; + // At the end of this loop, currently_loaded_modules contains the + // full list of EnumProcessModules, and hModules just the ones we + // haven't handled yet. + for (int i = 0; i < num_modules; ) { + currently_loaded_modules.insert(hModules[i]); + if (g_last_loaded->count(hModules[i]) > 0) { + hModules[i] = hModules[--num_modules]; // replace element i with tail + } else { + i++; // keep element i } - if (!still_loaded) { + } + // Now we do the unpatching/invalidation. + for (int i = 0; i < sizeof(g_module_libcs)/sizeof(*g_module_libcs); i++) { + if (g_module_libcs[i]->patched() && + currently_loaded_modules.count(g_module_libcs[i]->hmodule()) == 0) { // Means g_module_libcs[i] is no longer loaded (no me32 matched). // We could call Unpatch() here, but why bother? The module // has gone away, so nobody is going to call into it anyway. @@ -739,14 +723,28 @@ bool PatchAllModules() { made_changes = true; } } + // Update the loaded module cache. + g_last_loaded->swap(currently_loaded_modules); + } + + // Now that we know what modules are new, let's get the info we'll + // need to patch them. Note this *cannot* be done while holding the + // lock, since it needs to make windows calls (see the lock-inversion + // comments before the definition of patch_all_modules_lock). + MODULEINFO mi; + for (int i = 0; i < num_modules; i++) { + if (::GetModuleInformation(hCurrentProcess, hModules[i], &mi, sizeof(mi))) + modules.push_back(ModuleEntryCopy(mi)); + } - // We've handled all the g_module_libcs. Now let's handle the rest - // of the module-entries: those that haven't already been loaded. - for (std::vector<ModuleEntryCopy>::const_iterator it = modules.begin(); + // Now we can do the patching of new modules. + { + SpinLockHolder h(&patch_all_modules_lock); + for (std::vector<ModuleEntryCopy>::iterator it = modules.begin(); it != modules.end(); ++it) { LibcInfo libc_info; if (libc_info.PopulateWindowsFn(*it)) { // true==module has libc routines - PatchOneModuleLocked(libc_info); // updates num_patched_modules + PatchOneModuleLocked(libc_info); made_changes = true; } } @@ -759,6 +757,10 @@ bool PatchAllModules() { made_changes = true; } } + // TODO(csilvers): for this to be reliable, we need to also take + // into account if we *would* have patched any modules had they not + // already been loaded. (That is, made_changes should ignore + // g_last_loaded.) return made_changes; } @@ -766,59 +768,9 @@ bool PatchAllModules() { } // end unnamed namespace // --------------------------------------------------------------------- -// PatchWindowsFunctions() -// This is the function that is exposed to the outside world. -// It should be called before the program becomes multi-threaded, -// since main_executable_windows.Patch() is not thread-safe. -// --------------------------------------------------------------------- - -void PatchWindowsFunctions() { - // This does the libc patching in every module, and the main executable. - PatchAllModules(); - main_executable_windows.Patch(); -} - -#if 0 -// It's possible to unpatch all the functions when we are exiting. - -// The idea is to handle properly windows-internal data that is -// allocated before PatchWindowsFunctions is called. If all -// destruction happened in reverse order from construction, then we -// could call UnpatchWindowsFunctions at just the right time, so that -// that early-allocated data would be freed using the windows -// allocation functions rather than tcmalloc. The problem is that -// windows allocates some structures lazily, so it would allocate them -// late (using tcmalloc) and then try to deallocate them late as well. -// So instead of unpatching, we just modify all the tcmalloc routines -// so they call through to the libc rountines if the memory in -// question doesn't seem to have been allocated with tcmalloc. I keep -// this unpatch code around for reference. - -void UnpatchWindowsFunctions() { - // We need to go back to the system malloc/etc at global destruct time, - // so objects that were constructed before tcmalloc, using the system - // malloc, can destroy themselves using the system free. This depends - // on DLLs unloading in the reverse order in which they load! - // - // We also go back to the default HeapAlloc/etc, just for consistency. - // Who knows, it may help avoid weird bugs in some situations. - main_executable_windows.Unpatch(); - main_executable.Unpatch(); - if (libc1.is_valid()) libc1.Unpatch(); - if (libc2.is_valid()) libc2.Unpatch(); - if (libc3.is_valid()) libc3.Unpatch(); - if (libc4.is_valid()) libc4.Unpatch(); - if (libc5.is_valid()) libc5.Unpatch(); - if (libc6.is_valid()) libc6.Unpatch(); - if (libc7.is_valid()) libc7.Unpatch(); - if (libc8.is_valid()) libc8.Unpatch(); -} -#endif - -// --------------------------------------------------------------------- -// Now that we've done all the patching machinery, let's end the file -// by actually defining the functions we're patching in. Mostly these -// are simple wrappers around the do_* routines in tcmalloc.cc. +// Now that we've done all the patching machinery, let's actually +// define the functions we're patching in. Mostly these are +// simple wrappers around the do_* routines in tcmalloc.cc. // // In fact, we #include tcmalloc.cc to get at the tcmalloc internal // do_* functions, the better to write our own hook functions. @@ -1029,19 +981,107 @@ BOOL WINAPI WindowsInfo::Perftools_UnmapViewOfFile(LPCVOID lpBaseAddress) { lpBaseAddress); } +// g_load_map holds a copy of windows' refcount for how many times +// each currently loaded module has been loaded and unloaded. We use +// it as an optimization when the same module is loaded more than +// once: as long as the refcount stays above 1, we don't need to worry +// about patching because it's already patched. Likewise, we don't +// need to unpatch until the refcount drops to 0. load_map is +// maintained in LoadLibraryExW and FreeLibrary, and only covers +// modules explicitly loaded/freed via those interfaces. +static std::map<HMODULE, int>* g_load_map = NULL; + HMODULE WINAPI WindowsInfo::Perftools_LoadLibraryExW(LPCWSTR lpFileName, HANDLE hFile, DWORD dwFlags) { - HMODULE rv = ((HMODULE (WINAPI *)(LPCWSTR, HANDLE, DWORD)) - function_info_[kLoadLibraryExW].origstub_fn)( - lpFileName, hFile, dwFlags); - PatchAllModules(); - return rv; + HMODULE rv; + // Check to see if the modules is already loaded, flag 0 gets a + // reference if it was loaded. If it was loaded no need to call + // PatchAllModules, just increase the reference count to match + // what GetModuleHandleExW does internally inside windows. + if (::GetModuleHandleExW(0, lpFileName, &rv)) { + return rv; + } else { + // Not already loaded, so load it. + rv = ((HMODULE (WINAPI *)(LPCWSTR, HANDLE, DWORD)) + function_info_[kLoadLibraryExW].origstub_fn)( + lpFileName, hFile, dwFlags); + // This will patch any newly loaded libraries, if patching needs + // to be done. + PatchAllModules(); + + return rv; + } } BOOL WINAPI WindowsInfo::Perftools_FreeLibrary(HMODULE hLibModule) { BOOL rv = ((BOOL (WINAPI *)(HMODULE)) function_info_[kFreeLibrary].origstub_fn)(hLibModule); + + // Check to see if the module is still loaded by passing the base + // address and seeing if it comes back with the same address. If it + // is the same address it's still loaded, so the FreeLibrary() call + // was a noop, and there's no need to redo the patching. + HMODULE owner = NULL; + BOOL result = ::GetModuleHandleExW( + (GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS | + GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT), + (LPCWSTR)hLibModule, + &owner); + if (result && owner == hLibModule) + return rv; + PatchAllModules(); // this will fix up the list of patched libraries return rv; } + + +// --------------------------------------------------------------------- +// PatchWindowsFunctions() +// This is the function that is exposed to the outside world. +// It should be called before the program becomes multi-threaded, +// since main_executable_windows.Patch() is not thread-safe. +// --------------------------------------------------------------------- + +void PatchWindowsFunctions() { + // This does the libc patching in every module, and the main executable. + PatchAllModules(); + main_executable_windows.Patch(); +} + +#if 0 +// It's possible to unpatch all the functions when we are exiting. + +// The idea is to handle properly windows-internal data that is +// allocated before PatchWindowsFunctions is called. If all +// destruction happened in reverse order from construction, then we +// could call UnpatchWindowsFunctions at just the right time, so that +// that early-allocated data would be freed using the windows +// allocation functions rather than tcmalloc. The problem is that +// windows allocates some structures lazily, so it would allocate them +// late (using tcmalloc) and then try to deallocate them late as well. +// So instead of unpatching, we just modify all the tcmalloc routines +// so they call through to the libc rountines if the memory in +// question doesn't seem to have been allocated with tcmalloc. I keep +// this unpatch code around for reference. + +void UnpatchWindowsFunctions() { + // We need to go back to the system malloc/etc at global destruct time, + // so objects that were constructed before tcmalloc, using the system + // malloc, can destroy themselves using the system free. This depends + // on DLLs unloading in the reverse order in which they load! + // + // We also go back to the default HeapAlloc/etc, just for consistency. + // Who knows, it may help avoid weird bugs in some situations. + main_executable_windows.Unpatch(); + main_executable.Unpatch(); + if (libc1.is_valid()) libc1.Unpatch(); + if (libc2.is_valid()) libc2.Unpatch(); + if (libc3.is_valid()) libc3.Unpatch(); + if (libc4.is_valid()) libc4.Unpatch(); + if (libc5.is_valid()) libc5.Unpatch(); + if (libc6.is_valid()) libc6.Unpatch(); + if (libc7.is_valid()) libc7.Unpatch(); + if (libc8.is_valid()) libc8.Unpatch(); +} +#endif |