summaryrefslogtreecommitdiffstats
path: root/third_party/tcmalloc
diff options
context:
space:
mode:
authorglider@chromium.org <glider@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2010-05-20 12:22:51 +0000
committerglider@chromium.org <glider@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2010-05-20 12:22:51 +0000
commitdb3fb1cb119f862b96ca3de8f74ff647fef6f94e (patch)
treed547159ec152ac05ed793f9cac867b500a42a8f7 /third_party/tcmalloc
parentfa82f93da256dede111ee4143c340e55a195d7e3 (diff)
downloadchromium_src-db3fb1cb119f862b96ca3de8f74ff647fef6f94e.zip
chromium_src-db3fb1cb119f862b96ca3de8f74ff647fef6f94e.tar.gz
chromium_src-db3fb1cb119f862b96ca3de8f74ff647fef6f94e.tar.bz2
The newer version of tcmalloc should fix the problems with running tcmalloc under Valgrind.
Review URL: http://codereview.chromium.org/1735024 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@47789 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'third_party/tcmalloc')
-rw-r--r--third_party/tcmalloc/README.chromium5
-rw-r--r--third_party/tcmalloc/chromium/src/base/basictypes.h8
-rw-r--r--third_party/tcmalloc/chromium/src/base/dynamic_annotations.c148
-rw-r--r--third_party/tcmalloc/chromium/src/base/dynamic_annotations.cc110
-rw-r--r--third_party/tcmalloc/chromium/src/base/dynamic_annotations.h664
-rw-r--r--third_party/tcmalloc/chromium/src/base/low_level_alloc.cc7
-rw-r--r--third_party/tcmalloc/chromium/src/base/vdso_support.cc2
-rw-r--r--third_party/tcmalloc/chromium/src/central_freelist.cc9
-rw-r--r--third_party/tcmalloc/chromium/src/common.h1
-rw-r--r--third_party/tcmalloc/chromium/src/config.h.in5
-rw-r--r--third_party/tcmalloc/chromium/src/config_linux.h5
-rw-r--r--third_party/tcmalloc/chromium/src/config_win.h8
-rw-r--r--third_party/tcmalloc/chromium/src/debugallocation.cc35
-rw-r--r--third_party/tcmalloc/chromium/src/google/heap-profiler.h5
-rw-r--r--third_party/tcmalloc/chromium/src/google/profiler.h6
-rw-r--r--third_party/tcmalloc/chromium/src/google/stacktrace.h18
-rw-r--r--third_party/tcmalloc/chromium/src/google/tcmalloc.h.in14
-rw-r--r--third_party/tcmalloc/chromium/src/heap-checker.cc155
-rw-r--r--third_party/tcmalloc/chromium/src/heap-profile-table.cc2
-rw-r--r--third_party/tcmalloc/chromium/src/heap-profile-table.h4
-rw-r--r--third_party/tcmalloc/chromium/src/heap-profiler.cc4
-rw-r--r--third_party/tcmalloc/chromium/src/internal_logging.h4
-rw-r--r--third_party/tcmalloc/chromium/src/malloc_extension.cc5
-rw-r--r--third_party/tcmalloc/chromium/src/malloc_hook.cc4
-rw-r--r--third_party/tcmalloc/chromium/src/memory_region_map.cc52
-rw-r--r--third_party/tcmalloc/chromium/src/page_heap.cc114
-rw-r--r--third_party/tcmalloc/chromium/src/page_heap.h71
-rw-r--r--third_party/tcmalloc/chromium/src/page_heap_allocator.h15
-rwxr-xr-xthird_party/tcmalloc/chromium/src/pprof1146
-rw-r--r--third_party/tcmalloc/chromium/src/span.h4
-rw-r--r--third_party/tcmalloc/chromium/src/stacktrace.cc40
-rw-r--r--third_party/tcmalloc/chromium/src/stacktrace_config.h1
-rw-r--r--third_party/tcmalloc/chromium/src/stacktrace_generic-inl.h69
-rw-r--r--third_party/tcmalloc/chromium/src/stacktrace_libunwind-inl.h120
-rw-r--r--third_party/tcmalloc/chromium/src/stacktrace_powerpc-inl.h115
-rw-r--r--third_party/tcmalloc/chromium/src/stacktrace_win32-inl.h7
-rw-r--r--third_party/tcmalloc/chromium/src/stacktrace_x86-inl.h245
-rw-r--r--third_party/tcmalloc/chromium/src/symbolize.cc42
-rw-r--r--third_party/tcmalloc/chromium/src/system-alloc.cc43
-rw-r--r--third_party/tcmalloc/chromium/src/system-alloc.h6
-rw-r--r--third_party/tcmalloc/chromium/src/tcmalloc.cc250
-rw-r--r--third_party/tcmalloc/chromium/src/tests/debugallocation_test.cc31
-rw-r--r--third_party/tcmalloc/chromium/src/tests/heap-checker-death_unittest.sh10
-rw-r--r--third_party/tcmalloc/chromium/src/tests/page_heap_test.cc2
-rw-r--r--third_party/tcmalloc/chromium/src/tests/profiler_unittest.cc6
-rw-r--r--third_party/tcmalloc/chromium/src/tests/profiler_unittest.sh25
-rw-r--r--third_party/tcmalloc/chromium/src/tests/tcmalloc_unittest.cc2
-rw-r--r--third_party/tcmalloc/chromium/src/third_party/valgrind.h3924
-rw-r--r--third_party/tcmalloc/chromium/src/thread_cache.h34
-rw-r--r--third_party/tcmalloc/chromium/src/windows/addr2line-pdb.c13
-rw-r--r--third_party/tcmalloc/chromium/src/windows/config.h8
-rw-r--r--third_party/tcmalloc/chromium/src/windows/google/tcmalloc.h14
-rw-r--r--third_party/tcmalloc/chromium/src/windows/nm-pdb.c9
-rw-r--r--third_party/tcmalloc/chromium/src/windows/patch_functions.cc328
54 files changed, 6226 insertions, 1748 deletions
diff --git a/third_party/tcmalloc/README.chromium b/third_party/tcmalloc/README.chromium
index a8352a9..1ceda91 100644
--- a/third_party/tcmalloc/README.chromium
+++ b/third_party/tcmalloc/README.chromium
@@ -22,9 +22,8 @@ Contents:
The current revision is:
- Last Changed Rev: 77
- Last Changed Date: 2009-10-27 10:30:52 -0700 (Tue, 27 Oct 2009)
-
+ Last Changed Rev: 94
+ Last Changed Date: 2010-05-08 01:53:24 +0400 (Sat, 08 May 2010)
HOWTOs:
diff --git a/third_party/tcmalloc/chromium/src/base/basictypes.h b/third_party/tcmalloc/chromium/src/base/basictypes.h
index 9991413..ab9cdabc 100644
--- a/third_party/tcmalloc/chromium/src/base/basictypes.h
+++ b/third_party/tcmalloc/chromium/src/base/basictypes.h
@@ -308,6 +308,14 @@ class AssignAttributeStartEnd {
#endif // HAVE___ATTRIBUTE__ and __ELF__ or __MACH__
+#if defined(HAVE___ATTRIBUTE__) && (defined(__i386__) || defined(__x86_64__))
+# define CACHELINE_SIZE 64
+# define CACHELINE_ALIGNED __attribute__((aligned(CACHELINE_SIZE)))
+#else
+# define CACHELINE_ALIGNED
+#endif // defined(HAVE___ATTRIBUTE__) && (__i386__ || __x86_64__)
+
+
// The following enum should be used only as a constructor argument to indicate
// that the variable has static storage class, and that the constructor should
// do nothing to its state. It indicates to the reader that it is legal to
diff --git a/third_party/tcmalloc/chromium/src/base/dynamic_annotations.c b/third_party/tcmalloc/chromium/src/base/dynamic_annotations.c
new file mode 100644
index 0000000..cdefaa7
--- /dev/null
+++ b/third_party/tcmalloc/chromium/src/base/dynamic_annotations.c
@@ -0,0 +1,148 @@
+/* Copyright (c) 2008-2009, Google Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Google Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * ---
+ * Author: Kostya Serebryany
+ */
+
+#ifdef __cplusplus
+# error "This file should be built as pure C to avoid name mangling"
+#endif
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "base/dynamic_annotations.h"
+
+#ifdef __GNUC__
+/* valgrind.h uses gcc extensions so it won't build with other compilers */
+# ifdef HAVE_VALGRIND_H /* prefer the user's copy if they have it */
+# include <valgrind.h>
+# else /* otherwise just use the copy that we have */
+# include "third_party/valgrind.h"
+# endif
+#endif
+
+/* Each function is empty and called (via a macro) only in debug mode.
+ The arguments are captured by dynamic tools at runtime. */
+
+#if DYNAMIC_ANNOTATIONS_ENABLED == 1
+
+void AnnotateRWLockCreate(const char *file, int line,
+ const volatile void *lock){}
+void AnnotateRWLockDestroy(const char *file, int line,
+ const volatile void *lock){}
+void AnnotateRWLockAcquired(const char *file, int line,
+ const volatile void *lock, long is_w){}
+void AnnotateRWLockReleased(const char *file, int line,
+ const volatile void *lock, long is_w){}
+void AnnotateBarrierInit(const char *file, int line,
+ const volatile void *barrier, long count,
+ long reinitialization_allowed) {}
+void AnnotateBarrierWaitBefore(const char *file, int line,
+ const volatile void *barrier) {}
+void AnnotateBarrierWaitAfter(const char *file, int line,
+ const volatile void *barrier) {}
+void AnnotateBarrierDestroy(const char *file, int line,
+ const volatile void *barrier) {}
+
+void AnnotateCondVarWait(const char *file, int line,
+ const volatile void *cv,
+ const volatile void *lock){}
+void AnnotateCondVarSignal(const char *file, int line,
+ const volatile void *cv){}
+void AnnotateCondVarSignalAll(const char *file, int line,
+ const volatile void *cv){}
+void AnnotatePublishMemoryRange(const char *file, int line,
+ const volatile void *address,
+ long size){}
+void AnnotateUnpublishMemoryRange(const char *file, int line,
+ const volatile void *address,
+ long size){}
+void AnnotatePCQCreate(const char *file, int line,
+ const volatile void *pcq){}
+void AnnotatePCQDestroy(const char *file, int line,
+ const volatile void *pcq){}
+void AnnotatePCQPut(const char *file, int line,
+ const volatile void *pcq){}
+void AnnotatePCQGet(const char *file, int line,
+ const volatile void *pcq){}
+void AnnotateNewMemory(const char *file, int line,
+ const volatile void *mem,
+ long size){}
+void AnnotateExpectRace(const char *file, int line,
+ const volatile void *mem,
+ const char *description){}
+void AnnotateBenignRace(const char *file, int line,
+ const volatile void *mem,
+ const char *description){}
+void AnnotateBenignRaceSized(const char *file, int line,
+ const volatile void *mem,
+ long size,
+ const char *description) {}
+void AnnotateMutexIsUsedAsCondVar(const char *file, int line,
+ const volatile void *mu){}
+void AnnotateTraceMemory(const char *file, int line,
+ const volatile void *arg){}
+void AnnotateThreadName(const char *file, int line,
+ const char *name){}
+void AnnotateIgnoreReadsBegin(const char *file, int line){}
+void AnnotateIgnoreReadsEnd(const char *file, int line){}
+void AnnotateIgnoreWritesBegin(const char *file, int line){}
+void AnnotateIgnoreWritesEnd(const char *file, int line){}
+void AnnotateEnableRaceDetection(const char *file, int line, int enable){}
+void AnnotateNoOp(const char *file, int line,
+ const volatile void *arg){}
+void AnnotateFlushState(const char *file, int line){}
+
+#endif /* DYNAMIC_ANNOTATIONS_ENABLED == 1 */
+
+static int GetRunningOnValgrind(void) {
+#ifdef RUNNING_ON_VALGRIND
+ if (RUNNING_ON_VALGRIND) return 1;
+#endif
+ // TODO(csilvers): use GetenvBeforeMain() instead? Will need to
+ // change it to be extern "C".
+ char *running_on_valgrind_str = getenv("RUNNING_ON_VALGRIND");
+ if (running_on_valgrind_str) {
+ return strcmp(running_on_valgrind_str, "0") != 0;
+ }
+ return 0;
+}
+
+/* See the comments in dynamic_annotations.h */
+int RunningOnValgrind(void) {
+ static volatile int running_on_valgrind = -1;
+ /* C doesn't have thread-safe initialization of statics, and we
+ don't want to depend on pthread_once here, so hack it. */
+ int local_running_on_valgrind = running_on_valgrind;
+ if (local_running_on_valgrind == -1)
+ running_on_valgrind = local_running_on_valgrind = GetRunningOnValgrind();
+ return local_running_on_valgrind;
+}
diff --git a/third_party/tcmalloc/chromium/src/base/dynamic_annotations.cc b/third_party/tcmalloc/chromium/src/base/dynamic_annotations.cc
deleted file mode 100644
index c8bbcd7..0000000
--- a/third_party/tcmalloc/chromium/src/base/dynamic_annotations.cc
+++ /dev/null
@@ -1,110 +0,0 @@
-/* Copyright (c) 2008, Google Inc.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following disclaimer
- * in the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Google Inc. nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * ---
- * Author: Kostya Serebryany
- */
-
-#include <config.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "base/dynamic_annotations.h"
-#include "base/sysinfo.h"
-
-// Each function is empty and called (via a macro) only in debug mode.
-// The arguments are captured by dynamic tools at runtime.
-
-extern "C" void AnnotateRWLockCreate(const char *file, int line,
- const volatile void *lock){}
-extern "C" void AnnotateRWLockDestroy(const char *file, int line,
- const volatile void *lock){}
-extern "C" void AnnotateRWLockAcquired(const char *file, int line,
- const volatile void *lock, long is_w){}
-extern "C" void AnnotateRWLockReleased(const char *file, int line,
- const volatile void *lock, long is_w){}
-extern "C" void AnnotateCondVarWait(const char *file, int line,
- const volatile void *cv,
- const volatile void *lock){}
-extern "C" void AnnotateCondVarSignal(const char *file, int line,
- const volatile void *cv){}
-extern "C" void AnnotateCondVarSignalAll(const char *file, int line,
- const volatile void *cv){}
-extern "C" void AnnotatePublishMemoryRange(const char *file, int line,
- const volatile void *address,
- long size){}
-extern "C" void AnnotateUnpublishMemoryRange(const char *file, int line,
- const volatile void *address,
- long size){}
-extern "C" void AnnotatePCQCreate(const char *file, int line,
- const volatile void *pcq){}
-extern "C" void AnnotatePCQDestroy(const char *file, int line,
- const volatile void *pcq){}
-extern "C" void AnnotatePCQPut(const char *file, int line,
- const volatile void *pcq){}
-extern "C" void AnnotatePCQGet(const char *file, int line,
- const volatile void *pcq){}
-extern "C" void AnnotateNewMemory(const char *file, int line,
- const volatile void *mem,
- long size){}
-extern "C" void AnnotateExpectRace(const char *file, int line,
- const volatile void *mem,
- const char *description){}
-extern "C" void AnnotateBenignRace(const char *file, int line,
- const volatile void *mem,
- const char *description){}
-extern "C" void AnnotateMutexIsUsedAsCondVar(const char *file, int line,
- const volatile void *mu){}
-extern "C" void AnnotateTraceMemory(const char *file, int line,
- const volatile void *arg){}
-extern "C" void AnnotateThreadName(const char *file, int line,
- const char *name){}
-extern "C" void AnnotateIgnoreReadsBegin(const char *file, int line){}
-extern "C" void AnnotateIgnoreReadsEnd(const char *file, int line){}
-extern "C" void AnnotateIgnoreWritesBegin(const char *file, int line){}
-extern "C" void AnnotateIgnoreWritesEnd(const char *file, int line){}
-extern "C" void AnnotateNoOp(const char *file, int line,
- const volatile void *arg){}
-
-static int GetRunningOnValgrind() {
- const char *running_on_valgrind_str = GetenvBeforeMain("RUNNING_ON_VALGRIND");
- if (running_on_valgrind_str) {
- return strcmp(running_on_valgrind_str, "0") != 0;
- }
- return 0;
-}
-
-// When running under valgrind, this function will be intercepted
-// and a non-zero value will be returned.
-// Some valgrind-based tools (e.g. callgrind) do not intercept functions,
-// so we also read environment variable.
-extern "C" int RunningOnValgrind() {
- static int running_on_valgrind = GetRunningOnValgrind();
- return running_on_valgrind;
-}
diff --git a/third_party/tcmalloc/chromium/src/base/dynamic_annotations.h b/third_party/tcmalloc/chromium/src/base/dynamic_annotations.h
index a2a268f..dae1a14 100644
--- a/third_party/tcmalloc/chromium/src/base/dynamic_annotations.h
+++ b/third_party/tcmalloc/chromium/src/base/dynamic_annotations.h
@@ -1,10 +1,10 @@
/* Copyright (c) 2008, Google Inc.
* All rights reserved.
- *
+ *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met:
- *
+ *
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above
@@ -14,7 +14,7 @@
* * Neither the name of Google Inc. nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
- *
+ *
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -31,445 +31,471 @@
* Author: Kostya Serebryany
*/
-// This file defines dynamic annotations for use with dynamic analysis
-// tool such as valgrind, PIN, etc.
-//
-// Dynamic annotation is a source code annotation that affects
-// the generated code (that is, the annotation is not a comment).
-// Each such annotation is attached to a particular
-// instruction and/or to a particular object (address) in the program.
-//
-// The annotations that should be used by users are macros in all upper-case
-// (e.g., ANNOTATE_NEW_MEMORY).
-//
-// Actual implementation of these macros may differ depending on the
-// dynamic analysis tool being used.
-//
-// This file supports the following dynamic analysis tools:
-// - None (NDEBUG is defined).
-// Macros are defined empty.
-// - Helgrind (NDEBUG is not defined).
-// Macros are defined as calls to non-inlinable empty functions
-// that are intercepted by helgrind.
-//
+/* This file defines dynamic annotations for use with dynamic analysis
+ tool such as valgrind, PIN, etc.
+
+ Dynamic annotation is a source code annotation that affects
+ the generated code (that is, the annotation is not a comment).
+ Each such annotation is attached to a particular
+ instruction and/or to a particular object (address) in the program.
+
+ The annotations that should be used by users are macros in all upper-case
+ (e.g., ANNOTATE_NEW_MEMORY).
+
+ Actual implementation of these macros may differ depending on the
+ dynamic analysis tool being used.
+
+ See http://code.google.com/p/data-race-test/ for more information.
+
+ This file supports the following dynamic analysis tools:
+ - None (DYNAMIC_ANNOTATIONS_ENABLED is not defined or zero).
+ Macros are defined empty.
+ - ThreadSanitizer, Helgrind, DRD (DYNAMIC_ANNOTATIONS_ENABLED is 1).
+ Macros are defined as calls to non-inlinable empty functions
+ that are intercepted by Valgrind. */
+
#ifndef BASE_DYNAMIC_ANNOTATIONS_H_
#define BASE_DYNAMIC_ANNOTATIONS_H_
-#include "base/thread_annotations.h"
-
-// All the annotation macros are in effect only in debug mode.
-#ifndef NDEBUG
-
- // -------------------------------------------------------------
- // Annotations useful when implementing condition variables such as CondVar,
- // using conditional critical sections (Await/LockWhen) and when constructing
- // user-defined synchronization mechanisms.
- //
- // The annotations ANNOTATE_HAPPENS_BEFORE() and ANNOTATE_HAPPENS_AFTER() can
- // be used to define happens-before arcs in user-defined synchronization
- // mechanisms: the race detector will infer an arc from the former to the
- // latter when they share the same argument pointer.
- //
- // Example 1 (reference counting):
- //
- // void Unref() {
- // ANNOTATE_HAPPENS_BEFORE(&refcount_);
- // if (AtomicDecrementByOne(&refcount_) == 0) {
- // ANNOTATE_HAPPENS_AFTER(&refcount_);
- // delete this;
- // }
- // }
- //
- // Example 2 (message queue):
- //
- // void MyQueue::Put(Type *e) {
- // MutexLock lock(&mu_);
- // ANNOTATE_HAPPENS_BEFORE(e);
- // PutElementIntoMyQueue(e);
- // }
- //
- // Type *MyQueue::Get() {
- // MutexLock lock(&mu_);
- // Type *e = GetElementFromMyQueue();
- // ANNOTATE_HAPPENS_AFTER(e);
- // return e;
- // }
- //
- // Note: when possible, please use the existing reference counting and message
- // queue implementations instead of inventing new ones.
-
- // Report that wait on the condition variable at address "cv" has succeeded
- // and the lock at address "lock" is held.
+#ifndef DYNAMIC_ANNOTATIONS_ENABLED
+# define DYNAMIC_ANNOTATIONS_ENABLED 0
+#endif
+
+#if DYNAMIC_ANNOTATIONS_ENABLED != 0
+
+ /* -------------------------------------------------------------
+ Annotations useful when implementing condition variables such as CondVar,
+ using conditional critical sections (Await/LockWhen) and when constructing
+ user-defined synchronization mechanisms.
+
+ The annotations ANNOTATE_HAPPENS_BEFORE() and ANNOTATE_HAPPENS_AFTER() can
+ be used to define happens-before arcs in user-defined synchronization
+ mechanisms: the race detector will infer an arc from the former to the
+ latter when they share the same argument pointer.
+
+ Example 1 (reference counting):
+
+ void Unref() {
+ ANNOTATE_HAPPENS_BEFORE(&refcount_);
+ if (AtomicDecrementByOne(&refcount_) == 0) {
+ ANNOTATE_HAPPENS_AFTER(&refcount_);
+ delete this;
+ }
+ }
+
+ Example 2 (message queue):
+
+ void MyQueue::Put(Type *e) {
+ MutexLock lock(&mu_);
+ ANNOTATE_HAPPENS_BEFORE(e);
+ PutElementIntoMyQueue(e);
+ }
+
+ Type *MyQueue::Get() {
+ MutexLock lock(&mu_);
+ Type *e = GetElementFromMyQueue();
+ ANNOTATE_HAPPENS_AFTER(e);
+ return e;
+ }
+
+ Note: when possible, please use the existing reference counting and message
+ queue implementations instead of inventing new ones. */
+
+ /* Report that wait on the condition variable at address "cv" has succeeded
+ and the lock at address "lock" is held. */
#define ANNOTATE_CONDVAR_LOCK_WAIT(cv, lock) \
AnnotateCondVarWait(__FILE__, __LINE__, cv, lock)
- // Report that wait on the condition variable at "cv" has succeeded. Variant
- // w/o lock.
+ /* Report that wait on the condition variable at "cv" has succeeded. Variant
+ w/o lock. */
#define ANNOTATE_CONDVAR_WAIT(cv) \
AnnotateCondVarWait(__FILE__, __LINE__, cv, NULL)
- // Report that we are about to signal on the condition variable at address
- // "cv".
+ /* Report that we are about to signal on the condition variable at address
+ "cv". */
#define ANNOTATE_CONDVAR_SIGNAL(cv) \
AnnotateCondVarSignal(__FILE__, __LINE__, cv)
- // Report that we are about to signal_all on the condition variable at "cv".
+ /* Report that we are about to signal_all on the condition variable at "cv". */
#define ANNOTATE_CONDVAR_SIGNAL_ALL(cv) \
AnnotateCondVarSignalAll(__FILE__, __LINE__, cv)
- // Annotations for user-defined synchronization mechanisms.
+ /* Annotations for user-defined synchronization mechanisms. */
#define ANNOTATE_HAPPENS_BEFORE(obj) ANNOTATE_CONDVAR_SIGNAL(obj)
#define ANNOTATE_HAPPENS_AFTER(obj) ANNOTATE_CONDVAR_WAIT(obj)
- // Report that the bytes in the range [pointer, pointer+size) are about
- // to be published safely. The race checker will create a happens-before
- // arc from the call ANNOTATE_PUBLISH_MEMORY_RANGE(pointer, size) to
- // subsequent accesses to this memory.
+ /* Report that the bytes in the range [pointer, pointer+size) are about
+ to be published safely. The race checker will create a happens-before
+ arc from the call ANNOTATE_PUBLISH_MEMORY_RANGE(pointer, size) to
+ subsequent accesses to this memory.
+ Note: this annotation may not work properly if the race detector uses
+ sampling, i.e. does not observe all memory accesses.
+ */
#define ANNOTATE_PUBLISH_MEMORY_RANGE(pointer, size) \
AnnotatePublishMemoryRange(__FILE__, __LINE__, pointer, size)
- // Report that the bytes in the range [pointer, pointer+size) are not shared
- // between threads any more and can be safely used by the current thread w/o
- // synchronization. The race checker will create a happens-before arc from
- // all previous accesses to this memory to this call.
- //
- // This annotation could be applied to complex objects, such as STL
- // containers, with one condition: the accesses to the object itself
- // and its internal data should not be separated with any synchronization.
- //
- // Example that works:
- //
- // map<int, int> the_map;
- // void Thread1() {
- // MutexLock lock(&mu);
- // // Ok: accesses to the_map and its internal data is not separated by
- // // synchronization.
- // the_map[1]++;
- // }
- // void Thread2() {
- // {
- // MutexLock lock(&mu);
- // ...
- // // because of some reason we know that the_map will not be used by
- // // other threads any more
- // ANNOTATE_UNPUBLISH_MEMORY_RANGE(&the_map, sizeof(the_map));
- // }
- // the_map->DoSomething();
- // }
- //
- // Example that does not work (due to the way happens-before arcs are
- // represented in some race detectors):
- //
- // void Thread1() {
- // MutexLock lock(&mu);
- // int *guts_of_the_map = &(*the_map)[1];
- // // we have some synchronization between access to 'c' and its guts.
- // // This will make ANNOTATE_UNPUBLISH_MEMORY_RANGE in Thread2 useless.
- // some_other_lock_or_other_synchronization_utility.Lock();
- // (*guts_of_the_map)++;
- // ...
- // }
- //
- // void Thread1() { // same as above...
+ /* DEPRECATED. Don't use it. */
#define ANNOTATE_UNPUBLISH_MEMORY_RANGE(pointer, size) \
AnnotateUnpublishMemoryRange(__FILE__, __LINE__, pointer, size)
- // This annotation should be used to annotate thread-safe swapping of
- // containers. Required only when using hybrid (i.e. not pure happens-before)
- // detectors.
- //
- // This annotation has the same limitation as ANNOTATE_UNPUBLISH_MEMORY_RANGE
- // (see above).
- //
- // Example:
- // map<int, int> the_map;
- // void Thread1() {
- // MutexLock lock(&mu);
- // the_map[1]++;
- // }
- // void Thread2() {
- // map<int,int> tmp;
- // {
- // MutexLock lock(&mu);
- // the_map.swap(tmp);
- // ANNOTATE_SWAP_MEMORY_RANGE(&the_map, sizeof(the_map));
- // }
- // tmp->DoSomething();
- // }
+ /* DEPRECATED. Don't use it. */
#define ANNOTATE_SWAP_MEMORY_RANGE(pointer, size) \
do { \
ANNOTATE_UNPUBLISH_MEMORY_RANGE(pointer, size); \
ANNOTATE_PUBLISH_MEMORY_RANGE(pointer, size); \
} while (0)
- // Instruct the tool to create a happens-before arc between mu->Unlock() and
- // mu->Lock(). This annotation may slow down the race detector and hide real
- // races. Normally it is used only when it would be difficult to annotate each
- // of the mutex's critical sections individually using the annotations above.
- // This annotation makes sense only for hybrid race detectors. For pure
- // happens-before detectors this is a no-op. For more details see
- // http://code.google.com/p/data-race-test/wiki/PureHappensBeforeVsHybrid .
+ /* Instruct the tool to create a happens-before arc between mu->Unlock() and
+ mu->Lock(). This annotation may slow down the race detector and hide real
+ races. Normally it is used only when it would be difficult to annotate each
+ of the mutex's critical sections individually using the annotations above.
+ This annotation makes sense only for hybrid race detectors. For pure
+ happens-before detectors this is a no-op. For more details see
+ http://code.google.com/p/data-race-test/wiki/PureHappensBeforeVsHybrid . */
#define ANNOTATE_PURE_HAPPENS_BEFORE_MUTEX(mu) \
AnnotateMutexIsUsedAsCondVar(__FILE__, __LINE__, mu)
- // Deprecated. Use ANNOTATE_PURE_HAPPENS_BEFORE_MUTEX.
+ /* Deprecated. Use ANNOTATE_PURE_HAPPENS_BEFORE_MUTEX. */
#define ANNOTATE_MUTEX_IS_USED_AS_CONDVAR(mu) \
AnnotateMutexIsUsedAsCondVar(__FILE__, __LINE__, mu)
- // -------------------------------------------------------------
- // Annotations useful when defining memory allocators, or when memory that
- // was protected in one way starts to be protected in another.
+ /* -------------------------------------------------------------
+ Annotations useful when defining memory allocators, or when memory that
+ was protected in one way starts to be protected in another. */
- // Report that a new memory at "address" of size "size" has been allocated.
- // This might be used when the memory has been retrieved from a free list and
- // is about to be reused, or when a the locking discipline for a variable
- // changes.
+ /* Report that a new memory at "address" of size "size" has been allocated.
+ This might be used when the memory has been retrieved from a free list and
+ is about to be reused, or when a the locking discipline for a variable
+ changes. */
#define ANNOTATE_NEW_MEMORY(address, size) \
AnnotateNewMemory(__FILE__, __LINE__, address, size)
- // -------------------------------------------------------------
- // Annotations useful when defining FIFO queues that transfer data between
- // threads.
+ /* -------------------------------------------------------------
+ Annotations useful when defining FIFO queues that transfer data between
+ threads. */
- // Report that the producer-consumer queue (such as ProducerConsumerQueue) at
- // address "pcq" has been created. The ANNOTATE_PCQ_* annotations
- // should be used only for FIFO queues. For non-FIFO queues use
- // ANNOTATE_HAPPENS_BEFORE (for put) and ANNOTATE_HAPPENS_AFTER (for get).
+ /* Report that the producer-consumer queue (such as ProducerConsumerQueue) at
+ address "pcq" has been created. The ANNOTATE_PCQ_* annotations
+ should be used only for FIFO queues. For non-FIFO queues use
+ ANNOTATE_HAPPENS_BEFORE (for put) and ANNOTATE_HAPPENS_AFTER (for get). */
#define ANNOTATE_PCQ_CREATE(pcq) \
AnnotatePCQCreate(__FILE__, __LINE__, pcq)
- // Report that the queue at address "pcq" is about to be destroyed.
+ /* Report that the queue at address "pcq" is about to be destroyed. */
#define ANNOTATE_PCQ_DESTROY(pcq) \
AnnotatePCQDestroy(__FILE__, __LINE__, pcq)
- // Report that we are about to put an element into a FIFO queue at address
- // "pcq".
+ /* Report that we are about to put an element into a FIFO queue at address
+ "pcq". */
#define ANNOTATE_PCQ_PUT(pcq) \
AnnotatePCQPut(__FILE__, __LINE__, pcq)
- // Report that we've just got an element from a FIFO queue at address "pcq".
+ /* Report that we've just got an element from a FIFO queue at address "pcq". */
#define ANNOTATE_PCQ_GET(pcq) \
AnnotatePCQGet(__FILE__, __LINE__, pcq)
- // -------------------------------------------------------------
- // Annotations that suppress errors. It is usually better to express the
- // program's synchronization using the other annotations, but these can
- // be used when all else fails.
-
- // Report that we may have a benign race on at "address".
- // Insert at the point where "address" has been allocated, preferably close
- // to the point where the race happens.
- // See also ANNOTATE_BENIGN_RACE_STATIC.
- #define ANNOTATE_BENIGN_RACE(address, description) \
- AnnotateBenignRace(__FILE__, __LINE__, address, description)
-
- // Request the analysis tool to ignore all reads in the current thread
- // until ANNOTATE_IGNORE_READS_END is called.
- // Useful to ignore intentional racey reads, while still checking
- // other reads and all writes.
- // See also ANNOTATE_UNPROTECTED_READ.
+ /* -------------------------------------------------------------
+ Annotations that suppress errors. It is usually better to express the
+ program's synchronization using the other annotations, but these can
+ be used when all else fails. */
+
+ /* Report that we may have a benign race at "pointer", with size
+ "sizeof(*(pointer))". "pointer" must be a non-void* pointer. Insert at the
+ point where "pointer" has been allocated, preferably close to the point
+ where the race happens. See also ANNOTATE_BENIGN_RACE_STATIC. */
+ #define ANNOTATE_BENIGN_RACE(pointer, description) \
+ AnnotateBenignRaceSized(__FILE__, __LINE__, pointer, \
+ sizeof(*(pointer)), description)
+
+ /* Same as ANNOTATE_BENIGN_RACE(address, description), but applies to
+ the memory range [address, address+size). */
+ #define ANNOTATE_BENIGN_RACE_SIZED(address, size, description) \
+ AnnotateBenignRaceSized(__FILE__, __LINE__, address, size, description)
+
+ /* Request the analysis tool to ignore all reads in the current thread
+ until ANNOTATE_IGNORE_READS_END is called.
+ Useful to ignore intentional racey reads, while still checking
+ other reads and all writes.
+ See also ANNOTATE_UNPROTECTED_READ. */
#define ANNOTATE_IGNORE_READS_BEGIN() \
AnnotateIgnoreReadsBegin(__FILE__, __LINE__)
- // Stop ignoring reads.
+ /* Stop ignoring reads. */
#define ANNOTATE_IGNORE_READS_END() \
AnnotateIgnoreReadsEnd(__FILE__, __LINE__)
- // Similar to ANNOTATE_IGNORE_READS_BEGIN, but ignore writes.
+ /* Similar to ANNOTATE_IGNORE_READS_BEGIN, but ignore writes. */
#define ANNOTATE_IGNORE_WRITES_BEGIN() \
AnnotateIgnoreWritesBegin(__FILE__, __LINE__)
- // Stop ignoring writes.
+ /* Stop ignoring writes. */
#define ANNOTATE_IGNORE_WRITES_END() \
AnnotateIgnoreWritesEnd(__FILE__, __LINE__)
- // Start ignoring all memory accesses (reads and writes).
+ /* Start ignoring all memory accesses (reads and writes). */
#define ANNOTATE_IGNORE_READS_AND_WRITES_BEGIN() \
do {\
ANNOTATE_IGNORE_READS_BEGIN();\
ANNOTATE_IGNORE_WRITES_BEGIN();\
}while(0)\
- // Stop ignoring all memory accesses.
+ /* Stop ignoring all memory accesses. */
#define ANNOTATE_IGNORE_READS_AND_WRITES_END() \
do {\
ANNOTATE_IGNORE_WRITES_END();\
ANNOTATE_IGNORE_READS_END();\
}while(0)\
- // -------------------------------------------------------------
- // Annotations useful for debugging.
+ /* Enable (enable!=0) or disable (enable==0) race detection for all threads.
+ This annotation could be useful if you want to skip expensive race analysis
+ during some period of program execution, e.g. during initialization. */
+ #define ANNOTATE_ENABLE_RACE_DETECTION(enable) \
+ AnnotateEnableRaceDetection(__FILE__, __LINE__, enable)
- // Request to trace every access to "address".
+ /* -------------------------------------------------------------
+ Annotations useful for debugging. */
+
+ /* Request to trace every access to "address". */
#define ANNOTATE_TRACE_MEMORY(address) \
AnnotateTraceMemory(__FILE__, __LINE__, address)
- // Report the current thread name to a race detector.
+ /* Report the current thread name to a race detector. */
#define ANNOTATE_THREAD_NAME(name) \
AnnotateThreadName(__FILE__, __LINE__, name)
- // -------------------------------------------------------------
- // Annotations useful when implementing locks. They are not
- // normally needed by modules that merely use locks.
- // The "lock" argument is a pointer to the lock object.
+ /* -------------------------------------------------------------
+ Annotations useful when implementing locks. They are not
+ normally needed by modules that merely use locks.
+ The "lock" argument is a pointer to the lock object. */
- // Report that a lock has been created at address "lock".
+ /* Report that a lock has been created at address "lock". */
#define ANNOTATE_RWLOCK_CREATE(lock) \
AnnotateRWLockCreate(__FILE__, __LINE__, lock)
- // Report that the lock at address "lock" is about to be destroyed.
+ /* Report that the lock at address "lock" is about to be destroyed. */
#define ANNOTATE_RWLOCK_DESTROY(lock) \
AnnotateRWLockDestroy(__FILE__, __LINE__, lock)
- // Report that the lock at address "lock" has been acquired.
- // is_w=1 for writer lock, is_w=0 for reader lock.
+ /* Report that the lock at address "lock" has been acquired.
+ is_w=1 for writer lock, is_w=0 for reader lock. */
#define ANNOTATE_RWLOCK_ACQUIRED(lock, is_w) \
AnnotateRWLockAcquired(__FILE__, __LINE__, lock, is_w)
- // Report that the lock at address "lock" is about to be released.
+ /* Report that the lock at address "lock" is about to be released. */
#define ANNOTATE_RWLOCK_RELEASED(lock, is_w) \
AnnotateRWLockReleased(__FILE__, __LINE__, lock, is_w)
- // -------------------------------------------------------------
- // Annotations useful for testing race detectors.
+ /* -------------------------------------------------------------
+ Annotations useful when implementing barriers. They are not
+ normally needed by modules that merely use barriers.
+ The "barrier" argument is a pointer to the barrier object. */
+
+ /* Report that the "barrier" has been initialized with initial "count".
+ If 'reinitialization_allowed' is true, initialization is allowed to happen
+ multiple times w/o calling barrier_destroy() */
+ #define ANNOTATE_BARRIER_INIT(barrier, count, reinitialization_allowed) \
+ AnnotateBarrierInit(__FILE__, __LINE__, barrier, count, \
+ reinitialization_allowed)
+
+ /* Report that we are about to enter barrier_wait("barrier"). */
+ #define ANNOTATE_BARRIER_WAIT_BEFORE(barrier) \
+ AnnotateBarrierWaitBefore(__FILE__, __LINE__, barrier)
+
+ /* Report that we just exited barrier_wait("barrier"). */
+ #define ANNOTATE_BARRIER_WAIT_AFTER(barrier) \
+ AnnotateBarrierWaitAfter(__FILE__, __LINE__, barrier)
+
+ /* Report that the "barrier" has been destroyed. */
+ #define ANNOTATE_BARRIER_DESTROY(barrier) \
+ AnnotateBarrierDestroy(__FILE__, __LINE__, barrier)
+
+ /* -------------------------------------------------------------
+ Annotations useful for testing race detectors. */
- // Report that we expect a race on the variable at "address".
- // Use only in unit tests for a race detector.
+ /* Report that we expect a race on the variable at "address".
+ Use only in unit tests for a race detector. */
#define ANNOTATE_EXPECT_RACE(address, description) \
AnnotateExpectRace(__FILE__, __LINE__, address, description)
- // A no-op. Insert where you like to test the interceptors.
+ /* A no-op. Insert where you like to test the interceptors. */
#define ANNOTATE_NO_OP(arg) \
AnnotateNoOp(__FILE__, __LINE__, arg)
-#else // NDEBUG is defined
-
- #define ANNOTATE_RWLOCK_CREATE(lock) // empty
- #define ANNOTATE_RWLOCK_DESTROY(lock) // empty
- #define ANNOTATE_RWLOCK_ACQUIRED(lock, is_w) // empty
- #define ANNOTATE_RWLOCK_RELEASED(lock, is_w) // empty
- #define ANNOTATE_CONDVAR_LOCK_WAIT(cv, lock) // empty
- #define ANNOTATE_CONDVAR_WAIT(cv) // empty
- #define ANNOTATE_CONDVAR_SIGNAL(cv) // empty
- #define ANNOTATE_CONDVAR_SIGNAL_ALL(cv) // empty
- #define ANNOTATE_HAPPENS_BEFORE(obj) // empty
- #define ANNOTATE_HAPPENS_AFTER(obj) // empty
- #define ANNOTATE_PUBLISH_MEMORY_RANGE(address, size) // empty
- #define ANNOTATE_UNPUBLISH_MEMORY_RANGE(address, size) // empty
- #define ANNOTATE_SWAP_MEMORY_RANGE(address, size) // empty
- #define ANNOTATE_PCQ_CREATE(pcq) // empty
- #define ANNOTATE_PCQ_DESTROY(pcq) // empty
- #define ANNOTATE_PCQ_PUT(pcq) // empty
- #define ANNOTATE_PCQ_GET(pcq) // empty
- #define ANNOTATE_NEW_MEMORY(address, size) // empty
- #define ANNOTATE_EXPECT_RACE(address, description) // empty
- #define ANNOTATE_BENIGN_RACE(address, description) // empty
- #define ANNOTATE_PURE_HAPPENS_BEFORE_MUTEX(mu) // empty
- #define ANNOTATE_MUTEX_IS_USED_AS_CONDVAR(mu) // empty
- #define ANNOTATE_TRACE_MEMORY(arg) // empty
- #define ANNOTATE_THREAD_NAME(name) // empty
- #define ANNOTATE_IGNORE_READS_BEGIN() // empty
- #define ANNOTATE_IGNORE_READS_END() // empty
- #define ANNOTATE_IGNORE_WRITES_BEGIN() // empty
- #define ANNOTATE_IGNORE_WRITES_END() // empty
- #define ANNOTATE_IGNORE_READS_AND_WRITES_BEGIN() // empty
- #define ANNOTATE_IGNORE_READS_AND_WRITES_END() // empty
- #define ANNOTATE_NO_OP(arg) // empty
-
-#endif // NDEBUG
-
-// Use the macros above rather than using these functions directly.
-extern "C" void AnnotateRWLockCreate(const char *file, int line,
- const volatile void *lock);
-extern "C" void AnnotateRWLockDestroy(const char *file, int line,
- const volatile void *lock);
-extern "C" void AnnotateRWLockAcquired(const char *file, int line,
- const volatile void *lock, long is_w);
-extern "C" void AnnotateRWLockReleased(const char *file, int line,
- const volatile void *lock, long is_w);
-extern "C" void AnnotateCondVarWait(const char *file, int line,
- const volatile void *cv,
- const volatile void *lock);
-extern "C" void AnnotateCondVarSignal(const char *file, int line,
- const volatile void *cv);
-extern "C" void AnnotateCondVarSignalAll(const char *file, int line,
- const volatile void *cv);
-extern "C" void AnnotatePublishMemoryRange(const char *file, int line,
- const volatile void *address,
- long size);
-extern "C" void AnnotateUnpublishMemoryRange(const char *file, int line,
- const volatile void *address,
- long size);
-extern "C" void AnnotatePCQCreate(const char *file, int line,
- const volatile void *pcq);
-extern "C" void AnnotatePCQDestroy(const char *file, int line,
- const volatile void *pcq);
-extern "C" void AnnotatePCQPut(const char *file, int line,
- const volatile void *pcq);
-extern "C" void AnnotatePCQGet(const char *file, int line,
- const volatile void *pcq);
-extern "C" void AnnotateNewMemory(const char *file, int line,
+ /* Force the race detector to flush its state. The actual effect depends on
+ * the implementation of the detector. */
+ #define ANNOTATE_FLUSH_STATE() \
+ AnnotateFlushState(__FILE__, __LINE__)
+
+
+#else /* DYNAMIC_ANNOTATIONS_ENABLED == 0 */
+
+ #define ANNOTATE_RWLOCK_CREATE(lock) /* empty */
+ #define ANNOTATE_RWLOCK_DESTROY(lock) /* empty */
+ #define ANNOTATE_RWLOCK_ACQUIRED(lock, is_w) /* empty */
+ #define ANNOTATE_RWLOCK_RELEASED(lock, is_w) /* empty */
+ #define ANNOTATE_BARRIER_INIT(barrier, count, reinitialization_allowed) /* */
+ #define ANNOTATE_BARRIER_WAIT_BEFORE(barrier) /* empty */
+ #define ANNOTATE_BARRIER_WAIT_AFTER(barrier) /* empty */
+ #define ANNOTATE_BARRIER_DESTROY(barrier) /* empty */
+ #define ANNOTATE_CONDVAR_LOCK_WAIT(cv, lock) /* empty */
+ #define ANNOTATE_CONDVAR_WAIT(cv) /* empty */
+ #define ANNOTATE_CONDVAR_SIGNAL(cv) /* empty */
+ #define ANNOTATE_CONDVAR_SIGNAL_ALL(cv) /* empty */
+ #define ANNOTATE_HAPPENS_BEFORE(obj) /* empty */
+ #define ANNOTATE_HAPPENS_AFTER(obj) /* empty */
+ #define ANNOTATE_PUBLISH_MEMORY_RANGE(address, size) /* empty */
+ #define ANNOTATE_UNPUBLISH_MEMORY_RANGE(address, size) /* empty */
+ #define ANNOTATE_SWAP_MEMORY_RANGE(address, size) /* empty */
+ #define ANNOTATE_PCQ_CREATE(pcq) /* empty */
+ #define ANNOTATE_PCQ_DESTROY(pcq) /* empty */
+ #define ANNOTATE_PCQ_PUT(pcq) /* empty */
+ #define ANNOTATE_PCQ_GET(pcq) /* empty */
+ #define ANNOTATE_NEW_MEMORY(address, size) /* empty */
+ #define ANNOTATE_EXPECT_RACE(address, description) /* empty */
+ #define ANNOTATE_BENIGN_RACE(address, description) /* empty */
+ #define ANNOTATE_BENIGN_RACE_SIZED(address, size, description) /* empty */
+ #define ANNOTATE_PURE_HAPPENS_BEFORE_MUTEX(mu) /* empty */
+ #define ANNOTATE_MUTEX_IS_USED_AS_CONDVAR(mu) /* empty */
+ #define ANNOTATE_TRACE_MEMORY(arg) /* empty */
+ #define ANNOTATE_THREAD_NAME(name) /* empty */
+ #define ANNOTATE_IGNORE_READS_BEGIN() /* empty */
+ #define ANNOTATE_IGNORE_READS_END() /* empty */
+ #define ANNOTATE_IGNORE_WRITES_BEGIN() /* empty */
+ #define ANNOTATE_IGNORE_WRITES_END() /* empty */
+ #define ANNOTATE_IGNORE_READS_AND_WRITES_BEGIN() /* empty */
+ #define ANNOTATE_IGNORE_READS_AND_WRITES_END() /* empty */
+ #define ANNOTATE_ENABLE_RACE_DETECTION(enable) /* empty */
+ #define ANNOTATE_NO_OP(arg) /* empty */
+ #define ANNOTATE_FLUSH_STATE() /* empty */
+
+#endif /* DYNAMIC_ANNOTATIONS_ENABLED */
+
+/* Use the macros above rather than using these functions directly. */
+#ifdef __cplusplus
+extern "C" {
+#endif
+void AnnotateRWLockCreate(const char *file, int line,
+ const volatile void *lock);
+void AnnotateRWLockDestroy(const char *file, int line,
+ const volatile void *lock);
+void AnnotateRWLockAcquired(const char *file, int line,
+ const volatile void *lock, long is_w);
+void AnnotateRWLockReleased(const char *file, int line,
+ const volatile void *lock, long is_w);
+void AnnotateBarrierInit(const char *file, int line,
+ const volatile void *barrier, long count,
+ long reinitialization_allowed);
+void AnnotateBarrierWaitBefore(const char *file, int line,
+ const volatile void *barrier);
+void AnnotateBarrierWaitAfter(const char *file, int line,
+ const volatile void *barrier);
+void AnnotateBarrierDestroy(const char *file, int line,
+ const volatile void *barrier);
+void AnnotateCondVarWait(const char *file, int line,
+ const volatile void *cv,
+ const volatile void *lock);
+void AnnotateCondVarSignal(const char *file, int line,
+ const volatile void *cv);
+void AnnotateCondVarSignalAll(const char *file, int line,
+ const volatile void *cv);
+void AnnotatePublishMemoryRange(const char *file, int line,
+ const volatile void *address,
+ long size);
+void AnnotateUnpublishMemoryRange(const char *file, int line,
const volatile void *address,
long size);
-extern "C" void AnnotateExpectRace(const char *file, int line,
- const volatile void *address,
- const char *description);
-extern "C" void AnnotateBenignRace(const char *file, int line,
- const volatile void *address,
- const char *description);
-extern "C" void AnnotateMutexIsUsedAsCondVar(const char *file, int line,
- const volatile void *mu);
-extern "C" void AnnotateTraceMemory(const char *file, int line,
- const volatile void *arg);
-extern "C" void AnnotateThreadName(const char *file, int line,
- const char *name);
-extern "C" void AnnotateIgnoreReadsBegin(const char *file, int line);
-extern "C" void AnnotateIgnoreReadsEnd(const char *file, int line);
-extern "C" void AnnotateIgnoreWritesBegin(const char *file, int line);
-extern "C" void AnnotateIgnoreWritesEnd(const char *file, int line);
-extern "C" void AnnotateNoOp(const char *file, int line,
- const volatile void *arg);
-
-#ifndef NDEBUG
-
- // ANNOTATE_UNPROTECTED_READ is the preferred way to annotate racey reads.
- //
- // Instead of doing
- // ANNOTATE_IGNORE_READS_BEGIN();
- // ... = x;
- // ANNOTATE_IGNORE_READS_END();
- // one can use
- // ... = ANNOTATE_UNPROTECTED_READ(x);
+void AnnotatePCQCreate(const char *file, int line,
+ const volatile void *pcq);
+void AnnotatePCQDestroy(const char *file, int line,
+ const volatile void *pcq);
+void AnnotatePCQPut(const char *file, int line,
+ const volatile void *pcq);
+void AnnotatePCQGet(const char *file, int line,
+ const volatile void *pcq);
+void AnnotateNewMemory(const char *file, int line,
+ const volatile void *address,
+ long size);
+void AnnotateExpectRace(const char *file, int line,
+ const volatile void *address,
+ const char *description);
+void AnnotateBenignRace(const char *file, int line,
+ const volatile void *address,
+ const char *description);
+void AnnotateBenignRaceSized(const char *file, int line,
+ const volatile void *address,
+ long size,
+ const char *description);
+void AnnotateMutexIsUsedAsCondVar(const char *file, int line,
+ const volatile void *mu);
+void AnnotateTraceMemory(const char *file, int line,
+ const volatile void *arg);
+void AnnotateThreadName(const char *file, int line,
+ const char *name);
+void AnnotateIgnoreReadsBegin(const char *file, int line);
+void AnnotateIgnoreReadsEnd(const char *file, int line);
+void AnnotateIgnoreWritesBegin(const char *file, int line);
+void AnnotateIgnoreWritesEnd(const char *file, int line);
+void AnnotateEnableRaceDetection(const char *file, int line, int enable);
+void AnnotateNoOp(const char *file, int line,
+ const volatile void *arg);
+void AnnotateFlushState(const char *file, int line);
+
+/* Return non-zero value if running under valgrind.
+
+ If "valgrind.h" is included into dynamic_annotations.c,
+ the regular valgrind mechanism will be used.
+ See http://valgrind.org/docs/manual/manual-core-adv.html about
+ RUNNING_ON_VALGRIND and other valgrind "client requests".
+ The file "valgrind.h" may be obtained by doing
+ svn co svn://svn.valgrind.org/valgrind/trunk/include
+
+ If for some reason you can't use "valgrind.h" or want to fake valgrind,
+ there are two ways to make this function return non-zero:
+ - Use environment variable: export RUNNING_ON_VALGRIND=1
+ - Make your tool intercept the function RunningOnValgrind() and
+ change its return value.
+ */
+int RunningOnValgrind(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#if DYNAMIC_ANNOTATIONS_ENABLED != 0 && defined(__cplusplus)
+
+ /* ANNOTATE_UNPROTECTED_READ is the preferred way to annotate racey reads.
+
+ Instead of doing
+ ANNOTATE_IGNORE_READS_BEGIN();
+ ... = x;
+ ANNOTATE_IGNORE_READS_END();
+ one can use
+ ... = ANNOTATE_UNPROTECTED_READ(x); */
template <class T>
- inline T ANNOTATE_UNPROTECTED_READ(const volatile T &x)
- NO_THREAD_SAFETY_ANALYSIS {
+ inline T ANNOTATE_UNPROTECTED_READ(const volatile T &x) {
ANNOTATE_IGNORE_READS_BEGIN();
T res = x;
ANNOTATE_IGNORE_READS_END();
return res;
}
-
- // Apply ANNOTATE_BENIGN_RACE to a static variable.
+ /* Apply ANNOTATE_BENIGN_RACE_SIZED to a static variable. */
#define ANNOTATE_BENIGN_RACE_STATIC(static_var, description) \
namespace { \
class static_var ## _annotator { \
public: \
static_var ## _annotator() { \
- ANNOTATE_BENIGN_RACE(&static_var, \
+ ANNOTATE_BENIGN_RACE_SIZED(&static_var, \
+ sizeof(static_var), \
# static_var ": " description); \
} \
}; \
static static_var ## _annotator the ## static_var ## _annotator;\
}
-#else // !NDEBUG
+#else /* DYNAMIC_ANNOTATIONS_ENABLED == 0 */
#define ANNOTATE_UNPROTECTED_READ(x) (x)
- #define ANNOTATE_BENIGN_RACE_STATIC(static_var, description) // empty
-
-#endif // !NDEBUG
-
-// Return non-zero value if running under valgrind.
-extern "C" int RunningOnValgrind();
+ #define ANNOTATE_BENIGN_RACE_STATIC(static_var, description) /* empty */
+#endif /* DYNAMIC_ANNOTATIONS_ENABLED */
-#endif // BASE_DYNAMIC_ANNOTATIONS_H_
+#endif /* BASE_DYNAMIC_ANNOTATIONS_H_ */
diff --git a/third_party/tcmalloc/chromium/src/base/low_level_alloc.cc b/third_party/tcmalloc/chromium/src/base/low_level_alloc.cc
index 2bbce54..7ca3953a 100644
--- a/third_party/tcmalloc/chromium/src/base/low_level_alloc.cc
+++ b/third_party/tcmalloc/chromium/src/base/low_level_alloc.cc
@@ -210,8 +210,9 @@ static const intptr_t kMagicUnallocated = ~kMagicAllocated;
namespace {
class ArenaLock {
public:
- explicit ArenaLock(LowLevelAlloc::Arena *arena) :
- left_(false), mask_valid_(false), arena_(arena) {
+ explicit ArenaLock(LowLevelAlloc::Arena *arena)
+ EXCLUSIVE_LOCK_FUNCTION(arena->mu)
+ : left_(false), mask_valid_(false), arena_(arena) {
if ((arena->flags & LowLevelAlloc::kAsyncSignalSafe) != 0) {
// We've decided not to support async-signal-safe arena use until
// there a demonstrated need. Here's how one could do it though
@@ -228,7 +229,7 @@ namespace {
this->arena_->mu.Lock();
}
~ArenaLock() { RAW_CHECK(this->left_, "haven't left Arena region"); }
- void Leave() {
+ void Leave() UNLOCK_FUNCTION(arena_->mu) {
this->arena_->mu.Unlock();
#if 0
if (this->mask_valid_) {
diff --git a/third_party/tcmalloc/chromium/src/base/vdso_support.cc b/third_party/tcmalloc/chromium/src/base/vdso_support.cc
index ddaca37..fce7c2c 100644
--- a/third_party/tcmalloc/chromium/src/base/vdso_support.cc
+++ b/third_party/tcmalloc/chromium/src/base/vdso_support.cc
@@ -42,8 +42,8 @@
#include <fcntl.h>
#include "base/atomicops.h" // for MemoryBarrier
-#include "base/logging.h"
#include "base/linux_syscall_support.h"
+#include "base/logging.h"
#include "base/dynamic_annotations.h"
#include "base/basictypes.h" // for COMPILE_ASSERT
diff --git a/third_party/tcmalloc/chromium/src/central_freelist.cc b/third_party/tcmalloc/chromium/src/central_freelist.cc
index 674ff9b..5b7dfbb 100644
--- a/third_party/tcmalloc/chromium/src/central_freelist.cc
+++ b/third_party/tcmalloc/chromium/src/central_freelist.cc
@@ -266,8 +266,7 @@ void CentralFreeList::Populate() {
Span* span;
{
SpinLockHolder h(Static::pageheap_lock());
- span = Static::pageheap()->New(npages);
- if (span) Static::pageheap()->RegisterSizeClass(span, size_class_);
+ span = Static::pageheap()->New(npages, size_class_, kPageSize);
}
if (span == NULL) {
MESSAGE("tcmalloc: allocation failed", npages << kPageShift);
@@ -275,12 +274,6 @@ void CentralFreeList::Populate() {
return;
}
ASSERT(span->length == npages);
- // Cache sizeclass info eagerly. Locking is not necessary.
- // (Instead of being eager, we could just replace any stale info
- // about this span, but that seems to be no better in practice.)
- for (int i = 0; i < npages; i++) {
- Static::pageheap()->CacheSizeClass(span->start + i, size_class_);
- }
// Split the block into pieces and add to the free-list
// TODO: coloring of objects to avoid cache conflicts?
diff --git a/third_party/tcmalloc/chromium/src/common.h b/third_party/tcmalloc/chromium/src/common.h
index 53a0a0b..f9557c9 100644
--- a/third_party/tcmalloc/chromium/src/common.h
+++ b/third_party/tcmalloc/chromium/src/common.h
@@ -62,6 +62,7 @@ static const size_t kPageSize = 1 << kPageShift;
static const size_t kMaxSize = 8u * kPageSize;
static const size_t kAlignment = 8;
static const size_t kNumClasses = 61;
+static const size_t kLargeSizeClass = 0;
// Maximum length we allow a per-thread free-list to have before we
// move objects from it into the corresponding central free-list. We
diff --git a/third_party/tcmalloc/chromium/src/config.h.in b/third_party/tcmalloc/chromium/src/config.h.in
index 1ad2642..49bbf0d 100644
--- a/third_party/tcmalloc/chromium/src/config.h.in
+++ b/third_party/tcmalloc/chromium/src/config.h.in
@@ -132,7 +132,7 @@
/* Define to 1 if you have the <sys/types.h> header file. */
#undef HAVE_SYS_TYPES_H
-/* Define to 1 if you have the <sys/ucontext.h> header file. */
+/* <sys/ucontext.h> is broken on redhat 7 */
#undef HAVE_SYS_UCONTEXT_H
/* Define to 1 if you have the <sys/wait.h> header file. */
@@ -150,6 +150,9 @@
/* Define to 1 if you have the <unwind.h> header file. */
#undef HAVE_UNWIND_H
+/* Define to 1 if you have the <valgrind.h> header file. */
+#undef HAVE_VALGRIND_H
+
/* define if your compiler has __attribute__ */
#undef HAVE___ATTRIBUTE__
diff --git a/third_party/tcmalloc/chromium/src/config_linux.h b/third_party/tcmalloc/chromium/src/config_linux.h
index 398f303..9786b3e 100644
--- a/third_party/tcmalloc/chromium/src/config_linux.h
+++ b/third_party/tcmalloc/chromium/src/config_linux.h
@@ -136,7 +136,7 @@
/* Define to 1 if compiler supports __thread */
#define HAVE_TLS 1
-/* Define to 1 if you have the <ucontext.h> header file. */
+/* <sys/ucontext.h> is broken on redhat 7 */
#define HAVE_UCONTEXT_H 1
/* Define to 1 if you have the <unistd.h> header file. */
@@ -145,6 +145,9 @@
/* Define to 1 if you have the <unwind.h> header file. */
#define HAVE_UNWIND_H 1
+/* Define to 1 if you have the <valgrind.h> header file. */
+#undef HAVE_VALGRIND_H
+
/* define if your compiler has __attribute__ */
#define HAVE___ATTRIBUTE__ 1
diff --git a/third_party/tcmalloc/chromium/src/config_win.h b/third_party/tcmalloc/chromium/src/config_win.h
index 30daf4f..236bd6b 100644
--- a/third_party/tcmalloc/chromium/src/config_win.h
+++ b/third_party/tcmalloc/chromium/src/config_win.h
@@ -255,10 +255,12 @@
// ---------------------------------------------------------------------
// Extra stuff not found in config.h.in
-// This must be defined before the windows.h is included. It's needed
-// for mutex.h, to give access to the TryLock method.
+// This must be defined before the windows.h is included. We need at
+// least 0x0400 for mutex.h to have access to TryLock, and at least
+// 0x0501 for patch_functions.cc to have access to GetModuleHandleEx.
+// (This latter is an optimization we could take out if need be.)
#ifndef _WIN32_WINNT
-# define _WIN32_WINNT 0x0400
+# define _WIN32_WINNT 0x0501
#endif
// We want to make sure not to ever try to #include heap-checker.h
diff --git a/third_party/tcmalloc/chromium/src/debugallocation.cc b/third_party/tcmalloc/chromium/src/debugallocation.cc
index 1a9ddcb..949fbe9 100644
--- a/third_party/tcmalloc/chromium/src/debugallocation.cc
+++ b/third_party/tcmalloc/chromium/src/debugallocation.cc
@@ -1010,7 +1010,7 @@ static void *MemalignOverride(size_t align, size_t size,
const void *caller) __THROW
ATTRIBUTE_SECTION(google_malloc);
-void* operator new(size_t size)
+void* operator new(size_t size) throw (std::bad_alloc)
ATTRIBUTE_SECTION(google_malloc);
void* operator new(size_t size, const std::nothrow_t&) __THROW
ATTRIBUTE_SECTION(google_malloc);
@@ -1018,7 +1018,7 @@ void operator delete(void* p) __THROW
ATTRIBUTE_SECTION(google_malloc);
void operator delete(void* p, const std::nothrow_t&) __THROW
ATTRIBUTE_SECTION(google_malloc);
-void* operator new[](size_t size)
+void* operator new[](size_t size) throw (std::bad_alloc)
ATTRIBUTE_SECTION(google_malloc);
void* operator new[](size_t size, const std::nothrow_t&) __THROW
ATTRIBUTE_SECTION(google_malloc);
@@ -1176,12 +1176,12 @@ extern "C" void* pvalloc(size_t size) __THROW {
return p;
}
-extern "C" int mallopt(int cmd, int value) {
+extern "C" int mallopt(int cmd, int value) __THROW {
return BASE_MALLOPT(cmd, value);
}
#ifdef HAVE_STRUCT_MALLINFO
-extern "C" struct mallinfo mallinfo(void) {
+extern "C" struct mallinfo mallinfo(void) __THROW {
return BASE_MALLINFO();
}
#endif
@@ -1239,7 +1239,7 @@ inline void* cpp_debug_alloc(size_t size, int new_type, bool nothrow) {
}
}
-void* operator new(size_t size) {
+void* operator new(size_t size) throw (std::bad_alloc) {
void* ptr = cpp_debug_alloc(size, MallocBlock::kNewType, false);
MallocHook::InvokeNewHook(ptr, size);
if (ptr == NULL) {
@@ -1259,7 +1259,8 @@ void operator delete(void* ptr) __THROW {
DebugDeallocate(ptr, MallocBlock::kNewType);
}
-// Compilers use this, though I can't see how it differs from normal delete.
+// Some STL implementations explicitly invoke this.
+// It is completely equivalent to a normal delete (delete never throws).
void operator delete(void* ptr, const std::nothrow_t&) __THROW {
MallocHook::InvokeDeleteHook(ptr);
DebugDeallocate(ptr, MallocBlock::kNewType);
@@ -1269,7 +1270,7 @@ void operator delete(void* ptr, const std::nothrow_t&) __THROW {
// Alloc/free stuff for debug operator new[] & friends
-void* operator new[](size_t size) {
+void* operator new[](size_t size) throw (std::bad_alloc) {
void* ptr = cpp_debug_alloc(size, MallocBlock::kArrayNewType, false);
MallocHook::InvokeNewHook(ptr, size);
if (ptr == NULL) {
@@ -1289,7 +1290,8 @@ void operator delete[](void* ptr) __THROW {
DebugDeallocate(ptr, MallocBlock::kArrayNewType);
}
-// Compilers use this, though I can't see how it differs from normal delete.
+// Some STL implementations explicitly invoke this.
+// It is completely equivalent to a normal delete (delete never throws).
void operator delete[](void* ptr, const std::nothrow_t&) __THROW {
MallocHook::InvokeDeleteHook(ptr);
DebugDeallocate(ptr, MallocBlock::kArrayNewType);
@@ -1359,17 +1361,22 @@ class DebugMallocImplementation : public ParentImplementation {
static DebugMallocImplementation debug_malloc_implementation;
REGISTER_MODULE_INITIALIZER(debugallocation, {
- MallocExtension::Register(&debug_malloc_implementation);
-
- // When the program exits, check all blocks still in the free queue for
- // corruption.
- atexit(DanglingWriteChecker);
+ // Either we or valgrind will control memory management. We
+ // register our extension if we're the winner.
+ if (RunningOnValgrind()) {
+ // Let Valgrind uses its own malloc (so don't register our extension).
+ } else {
+ MallocExtension::Register(&debug_malloc_implementation);
+ // When the program exits, check all blocks still in the free
+ // queue for corruption.
+ atexit(DanglingWriteChecker);
+ }
});
#ifdef TCMALLOC_FOR_DEBUGALLOCATION
// Redefine malloc_stats to use tcmalloc's implementation:
-extern "C" void malloc_stats(void) {
+extern "C" void malloc_stats(void) __THROW {
do_malloc_stats();
}
diff --git a/third_party/tcmalloc/chromium/src/google/heap-profiler.h b/third_party/tcmalloc/chromium/src/google/heap-profiler.h
index 5efaf64..57cb97a 100644
--- a/third_party/tcmalloc/chromium/src/google/heap-profiler.h
+++ b/third_party/tcmalloc/chromium/src/google/heap-profiler.h
@@ -71,12 +71,13 @@ extern "C" {
*/
PERFTOOLS_DLL_DECL void HeapProfilerStart(const char* prefix);
-/* Returns true if we are currently profiling the heap. This is true
+/* Returns non-zero if we are currently profiling the heap. (Returns
+ * an int rather than a bool so it's usable from C.) This is true
* between calls to HeapProfilerStart() and HeapProfilerStop(), and
* also if the program has been run with HEAPPROFILER, or some other
* way to turn on whole-program profiling.
*/
-bool IsHeapProfilerRunning();
+int IsHeapProfilerRunning();
/* Stop heap profiling. Can be restarted again with HeapProfilerStart(),
* but the currently accumulated profiling information will be cleared.
diff --git a/third_party/tcmalloc/chromium/src/google/profiler.h b/third_party/tcmalloc/chromium/src/google/profiler.h
index 74b936f..a6883f4 100644
--- a/third_party/tcmalloc/chromium/src/google/profiler.h
+++ b/third_party/tcmalloc/chromium/src/google/profiler.h
@@ -108,13 +108,15 @@ struct ProfilerOptions {
void *filter_in_thread_arg;
};
-/* Start profiling and write profile info into fname.
+/* Start profiling and write profile info into fname, discarding any
+ * existing profiling data in that file.
*
* This is equivalent to calling ProfilerStartWithOptions(fname, NULL).
*/
PERFTOOLS_DLL_DECL int ProfilerStart(const char* fname);
-/* Start profiling and write profile into fname.
+/* Start profiling and write profile into fname, discarding any
+ * existing profiling data in that file.
*
* The profiler is configured using the options given by 'options'.
* Options which are not specified are given default values.
diff --git a/third_party/tcmalloc/chromium/src/google/stacktrace.h b/third_party/tcmalloc/chromium/src/google/stacktrace.h
index 8188ce3..fd186d6 100644
--- a/third_party/tcmalloc/chromium/src/google/stacktrace.h
+++ b/third_party/tcmalloc/chromium/src/google/stacktrace.h
@@ -49,23 +49,23 @@
// Skips the most recent "skip_count" stack frames (also skips the
// frame generated for the "GetStackFrames" routine itself), and then
// records the pc values for up to the next "max_depth" frames in
-// "pcs", and the corresponding stack frame sizes in "sizes". Returns
-// the number of values recorded in "pcs"/"sizes".
+// "result", and the corresponding stack frame sizes in "sizes".
+// Returns the number of values recorded in "result"/"sizes".
//
// Example:
// main() { foo(); }
// foo() { bar(); }
// bar() {
-// void* pcs[10];
+// void* result[10];
// int sizes[10];
-// int depth = GetStackFrames(pcs, sizes, 10, 1);
+// int depth = GetStackFrames(result, sizes, 10, 1);
// }
//
// The GetStackFrames call will skip the frame for "bar". It will
// return 2 and will produce pc values that map to the following
// procedures:
-// pcs[0] foo
-// pcs[1] main
+// result[0] foo
+// result[1] main
// (Actually, there may be a few more entries after "main" to account for
// startup procedures.)
// And corresponding stack frame sizes will also be recorded:
@@ -76,15 +76,15 @@
// be identified.
//
// This routine may return fewer stack frame entries than are
-// available. Also note that "pcs" and "sizes" must both be non-NULL.
-extern PERFTOOLS_DLL_DECL int GetStackFrames(void** pcs, int* sizes, int max_depth,
+// available. Also note that "result" and "sizes" must both be non-NULL.
+extern PERFTOOLS_DLL_DECL int GetStackFrames(void** result, int* sizes, int max_depth,
int skip_count);
// Same as above, but to be used from a signal handler. The "uc" parameter
// should be the pointer to ucontext_t which was passed as the 3rd parameter
// to sa_sigaction signal handler. It may help the unwinder to get a
// better stack trace under certain conditions. The "uc" may safely be NULL.
-extern PERFTOOLS_DLL_DECL int GetStackFramesWithContext(void** pcs, int* sizes, int max_depth,
+extern PERFTOOLS_DLL_DECL int GetStackFramesWithContext(void** result, int* sizes, int max_depth,
int skip_count, const void *uc);
// This is similar to the GetStackFrames routine, except that it returns
diff --git a/third_party/tcmalloc/chromium/src/google/tcmalloc.h.in b/third_party/tcmalloc/chromium/src/google/tcmalloc.h.in
index e5c873d..fbb70ab 100644
--- a/third_party/tcmalloc/chromium/src/google/tcmalloc.h.in
+++ b/third_party/tcmalloc/chromium/src/google/tcmalloc.h.in
@@ -60,7 +60,8 @@
#endif
#ifdef __cplusplus
-#include <new> // for nothrow_t
+#include <new> // for std::nothrow_t
+
extern "C" {
#endif
// Returns a human-readable version string. If major, minor,
@@ -91,16 +92,15 @@ extern "C" {
#ifdef __cplusplus
PERFTOOLS_DLL_DECL int tc_set_new_mode(int flag) __THROW;
PERFTOOLS_DLL_DECL void* tc_new(size_t size);
- PERFTOOLS_DLL_DECL void tc_delete(void* p) __THROW;
- PERFTOOLS_DLL_DECL void* tc_newarray(size_t size);
- PERFTOOLS_DLL_DECL void tc_deletearray(void* p) __THROW;
-
PERFTOOLS_DLL_DECL void* tc_new_nothrow(size_t size,
const std::nothrow_t&) __THROW;
- PERFTOOLS_DLL_DECL void* tc_newarray_nothrow(size_t size,
- const std::nothrow_t&) __THROW;
+ PERFTOOLS_DLL_DECL void tc_delete(void* p) __THROW;
PERFTOOLS_DLL_DECL void tc_delete_nothrow(void* p,
const std::nothrow_t&) __THROW;
+ PERFTOOLS_DLL_DECL void* tc_newarray(size_t size);
+ PERFTOOLS_DLL_DECL void* tc_newarray_nothrow(size_t size,
+ const std::nothrow_t&) __THROW;
+ PERFTOOLS_DLL_DECL void tc_deletearray(void* p) __THROW;
PERFTOOLS_DLL_DECL void tc_deletearray_nothrow(void* p,
const std::nothrow_t&) __THROW;
}
diff --git a/third_party/tcmalloc/chromium/src/heap-checker.cc b/third_party/tcmalloc/chromium/src/heap-checker.cc
index 82a7adb..2779c97 100644
--- a/third_party/tcmalloc/chromium/src/heap-checker.cc
+++ b/third_party/tcmalloc/chromium/src/heap-checker.cc
@@ -159,6 +159,23 @@ DEFINE_bool(heap_check_test_pointer_alignment,
"Set to true to check if the found leak can be due to "
"use of unaligned pointers");
+// Alignment at which all pointers in memory are supposed to be located;
+// use 1 if any alignment is ok.
+// heap_check_test_pointer_alignment flag guides if we try the value of 1.
+// The larger it can be, the lesser is the chance of missing real leaks.
+//
+// sizeof(void)* is correct. However gold (the new linker) has a bug where it
+// sometimes places global pointers on 4-byte boundaries, even when pointers
+// are 8 bytes long. While we are fixing the linker, degrade to 4-byte
+// alignment on all targets. http://b/1226481
+//
+static const size_t kPointerSourceAlignment = sizeof(void*);
+DEFINE_int32(heap_check_pointer_source_alignment,
+ EnvToInt("HEAP_CHECK_POINTER_SOURCE_ALIGNMENT",
+ kPointerSourceAlignment),
+ "Alignment at which all pointers in memory are supposed to be "
+ "located. Use 1 if any alignment is ok.");
+
// A reasonable default to handle pointers inside of typical class objects:
// Too low and we won't be able to traverse pointers to normally-used
// nested objects and base parts of multiple-inherited objects.
@@ -245,13 +262,6 @@ static bool constructor_heap_profiling = false;
static const int heap_checker_info_level = 0;
//----------------------------------------------------------------------
-
-// Alignment at which all pointers in memory are supposed to be located;
-// use 1 if any alignment is ok.
-// heap_check_test_pointer_alignment flag guides if we try the value of 1.
-// The larger it can be, the lesser is the chance of missing real leaks.
-static const size_t kPointerSourceAlignment = sizeof(void*);
-
// Cancel our InitialMallocHook_* if present.
static void CancelInitialMallocHooks(); // defined below
@@ -484,7 +494,7 @@ HeapLeakChecker::Disabler::Disabler() {
// in a thread-safe manner.
int counter = get_thread_disable_counter();
set_thread_disable_counter(counter + 1);
- RAW_VLOG(1, "Increasing thread disable counter to %d", counter + 1);
+ RAW_VLOG(10, "Increasing thread disable counter to %d", counter + 1);
}
HeapLeakChecker::Disabler::~Disabler() {
@@ -492,7 +502,7 @@ HeapLeakChecker::Disabler::~Disabler() {
RAW_DCHECK(counter > 0, "");
if (counter > 0) {
set_thread_disable_counter(counter - 1);
- RAW_VLOG(1, "Decreasing thread disable counter to %d", counter);
+ RAW_VLOG(10, "Decreasing thread disable counter to %d", counter);
} else {
RAW_VLOG(0, "Thread disable counter underflow : %d", counter);
}
@@ -525,7 +535,7 @@ static void NewHook(const void* ptr, size_t size) {
if (ptr != NULL) {
const int counter = get_thread_disable_counter();
const bool ignore = (counter > 0);
- RAW_VLOG(7, "Recording Alloc: %p of %"PRIuS "; %d", ptr, size,
+ RAW_VLOG(16, "Recording Alloc: %p of %"PRIuS "; %d", ptr, size,
int(counter));
{ SpinLockHolder l(&heap_checker_lock);
if (size > max_heap_object_size) max_heap_object_size = size;
@@ -540,17 +550,17 @@ static void NewHook(const void* ptr, size_t size) {
}
}
}
- RAW_VLOG(8, "Alloc Recorded: %p of %"PRIuS"", ptr, size);
+ RAW_VLOG(17, "Alloc Recorded: %p of %"PRIuS"", ptr, size);
}
}
static void DeleteHook(const void* ptr) {
if (ptr != NULL) {
- RAW_VLOG(7, "Recording Free %p", ptr);
+ RAW_VLOG(16, "Recording Free %p", ptr);
{ SpinLockHolder l(&heap_checker_lock);
if (heap_checker_on) heap_profile->RecordFree(ptr);
}
- RAW_VLOG(8, "Free Recorded: %p", ptr);
+ RAW_VLOG(17, "Free Recorded: %p", ptr);
}
}
@@ -584,7 +594,7 @@ static StackDirection stack_direction = UNKNOWN_DIRECTION;
static void RegisterStackLocked(const void* top_ptr) {
RAW_DCHECK(heap_checker_lock.IsHeld(), "");
RAW_DCHECK(MemoryRegionMap::LockIsHeld(), "");
- RAW_VLOG(1, "Thread stack at %p", top_ptr);
+ RAW_VLOG(10, "Thread stack at %p", top_ptr);
uintptr_t top = AsInt(top_ptr);
stack_tops->insert(top); // add for later use
@@ -598,12 +608,12 @@ static void RegisterStackLocked(const void* top_ptr) {
if (MemoryRegionMap::FindAndMarkStackRegion(top, &region)) {
// Make the proper portion of the stack live:
if (stack_direction == GROWS_TOWARDS_LOW_ADDRESSES) {
- RAW_VLOG(2, "Live stack at %p of %"PRIuPTR" bytes",
+ RAW_VLOG(11, "Live stack at %p of %"PRIuPTR" bytes",
top_ptr, region.end_addr - top);
live_objects->push_back(AllocObject(top_ptr, region.end_addr - top,
THREAD_DATA));
} else { // GROWS_TOWARDS_HIGH_ADDRESSES
- RAW_VLOG(2, "Live stack at %p of %"PRIuPTR" bytes",
+ RAW_VLOG(11, "Live stack at %p of %"PRIuPTR" bytes",
AsPtr(region.start_addr),
top - region.start_addr);
live_objects->push_back(AllocObject(AsPtr(region.start_addr),
@@ -619,7 +629,7 @@ static void RegisterStackLocked(const void* top_ptr) {
uintptr_t start = AsInt(span->ptr);
uintptr_t end = start + span->size;
if (start <= top && top < end) {
- RAW_VLOG(2, "Stack at %p is inside /proc/self/maps chunk %p..%p",
+ RAW_VLOG(11, "Stack at %p is inside /proc/self/maps chunk %p..%p",
top_ptr, AsPtr(start), AsPtr(end));
// Shrink start..end region by chopping away the memory regions in
// MemoryRegionMap that land in it to undo merging of regions
@@ -640,17 +650,17 @@ static void RegisterStackLocked(const void* top_ptr) {
}
}
if (stack_start != start || stack_end != end) {
- RAW_VLOG(2, "Stack at %p is actually inside memory chunk %p..%p",
+ RAW_VLOG(11, "Stack at %p is actually inside memory chunk %p..%p",
top_ptr, AsPtr(stack_start), AsPtr(stack_end));
}
// Make the proper portion of the stack live:
if (stack_direction == GROWS_TOWARDS_LOW_ADDRESSES) {
- RAW_VLOG(2, "Live stack at %p of %"PRIuPTR" bytes",
+ RAW_VLOG(11, "Live stack at %p of %"PRIuPTR" bytes",
top_ptr, stack_end - top);
live_objects->push_back(
AllocObject(top_ptr, stack_end - top, THREAD_DATA));
} else { // GROWS_TOWARDS_HIGH_ADDRESSES
- RAW_VLOG(2, "Live stack at %p of %"PRIuPTR" bytes",
+ RAW_VLOG(11, "Live stack at %p of %"PRIuPTR" bytes",
AsPtr(stack_start), top - stack_start);
live_objects->push_back(
AllocObject(AsPtr(stack_start), top - stack_start, THREAD_DATA));
@@ -723,14 +733,14 @@ static void MakeDisabledLiveCallbackLocked(
// and the rest of the region where the stack lives can well
// contain outdated stack variables which are not live anymore,
// hence should not be treated as such.
- RAW_VLOG(2, "Not %s-disabling %"PRIuS" bytes at %p"
+ RAW_VLOG(11, "Not %s-disabling %"PRIuS" bytes at %p"
": have stack inside: %p",
(stack_disable ? "stack" : "range"),
info.object_size, ptr, AsPtr(*iter));
return;
}
}
- RAW_VLOG(2, "%s-disabling %"PRIuS" bytes at %p",
+ RAW_VLOG(11, "%s-disabling %"PRIuS" bytes at %p",
(stack_disable ? "Stack" : "Range"), info.object_size, ptr);
live_objects->push_back(AllocObject(ptr, info.object_size,
MUST_BE_ON_HEAP));
@@ -755,7 +765,7 @@ static void RecordGlobalDataLocked(uintptr_t start_address,
// Ignore non-writeable regions.
if (strchr(permissions, 'w') == NULL) return;
if (filename == NULL || *filename == '\0') filename = "UNNAMED";
- RAW_VLOG(2, "Looking into %s: 0x%" PRIxPTR "..0x%" PRIxPTR,
+ RAW_VLOG(11, "Looking into %s: 0x%" PRIxPTR "..0x%" PRIxPTR,
filename, start_address, end_address);
(*library_live_objects)[filename].
push_back(AllocObject(AsPtr(start_address),
@@ -814,12 +824,12 @@ void HeapLeakChecker::DisableLibraryAllocsLocked(const char* library,
// does not call user code.
}
if (depth) {
- RAW_VLOG(1, "Disabling allocations from %s at depth %d:", library, depth);
+ RAW_VLOG(10, "Disabling allocations from %s at depth %d:", library, depth);
DisableChecksFromToLocked(AsPtr(start_address), AsPtr(end_address), depth);
if (IsLibraryNamed(library, "/libpthread") ||
IsLibraryNamed(library, "/libdl") ||
IsLibraryNamed(library, "/ld")) {
- RAW_VLOG(1, "Global memory regions made by %s will be live data",
+ RAW_VLOG(10, "Global memory regions made by %s will be live data",
library);
if (global_region_caller_ranges == NULL) {
global_region_caller_ranges =
@@ -936,7 +946,7 @@ static enum {
va_list /*ap*/) {
RAW_DCHECK(heap_checker_lock.IsHeld(), "");
thread_listing_status = CALLBACK_STARTED;
- RAW_VLOG(2, "Found %d threads (from pid %d)", num_threads, getpid());
+ RAW_VLOG(11, "Found %d threads (from pid %d)", num_threads, getpid());
if (FLAGS_heap_check_ignore_global_live) {
UseProcMapsLocked(RECORD_GLOBAL_DATA);
@@ -951,7 +961,7 @@ static enum {
// the leak checking thread itself is handled
// specially via self_thread_stack, not here:
if (thread_pids[i] == self_thread_pid) continue;
- RAW_VLOG(2, "Handling thread with pid %d", thread_pids[i]);
+ RAW_VLOG(11, "Handling thread with pid %d", thread_pids[i]);
#if defined(HAVE_LINUX_PTRACE_H) && defined(HAVE_SYS_SYSCALL_H) && defined(DUMPER)
i386_regs thread_regs;
#define sys_ptrace(r, p, a, d) syscall(SYS_ptrace, (r), (p), (a), (d))
@@ -967,7 +977,7 @@ static enum {
// register pointers still being in the registers and not on the stack):
for (void** p = reinterpret_cast<void**>(&thread_regs);
p < reinterpret_cast<void**>(&thread_regs + 1); ++p) {
- RAW_VLOG(3, "Thread register %p", *p);
+ RAW_VLOG(12, "Thread register %p", *p);
thread_registers.push_back(*p);
}
} else {
@@ -982,7 +992,7 @@ static enum {
if (thread_registers.size()) {
// Make thread registers be live heap data sources.
// we rely here on the fact that vector is in one memory chunk:
- RAW_VLOG(2, "Live registers at %p of %"PRIuS" bytes",
+ RAW_VLOG(11, "Live registers at %p of %"PRIuS" bytes",
&thread_registers[0], thread_registers.size() * sizeof(void*));
live_objects->push_back(AllocObject(&thread_registers[0],
thread_registers.size() * sizeof(void*),
@@ -1005,7 +1015,7 @@ static const void* self_thread_stack_top;
void HeapLeakChecker::IgnoreNonThreadLiveObjectsLocked() {
RAW_DCHECK(heap_checker_lock.IsHeld(), "");
RAW_DCHECK(MemoryRegionMap::LockIsHeld(), "");
- RAW_VLOG(2, "Handling self thread with pid %d", self_thread_pid);
+ RAW_VLOG(11, "Handling self thread with pid %d", self_thread_pid);
// Register our own stack:
// Important that all stack ranges (including the one here)
@@ -1019,7 +1029,7 @@ void HeapLeakChecker::IgnoreNonThreadLiveObjectsLocked() {
for (IgnoredObjectsMap::const_iterator object = ignored_objects->begin();
object != ignored_objects->end(); ++object) {
const void* ptr = AsPtr(object->first);
- RAW_VLOG(2, "Ignored live object at %p of %"PRIuS" bytes",
+ RAW_VLOG(11, "Ignored live object at %p of %"PRIuS" bytes",
ptr, object->second);
live_objects->
push_back(AllocObject(ptr, object->second, MUST_BE_ON_HEAP));
@@ -1132,10 +1142,10 @@ void HeapLeakChecker::IgnoreNonThreadLiveObjectsLocked() {
}
}
// Now get and use live_objects from the final version of l->second:
- if (VLOG_IS_ON(2)) {
+ if (VLOG_IS_ON(11)) {
for (LiveObjectsStack::const_iterator i = l->second.begin();
i != l->second.end(); ++i) {
- RAW_VLOG(2, "Library live region at %p of %"PRIuPTR" bytes",
+ RAW_VLOG(11, "Library live region at %p of %"PRIuPTR" bytes",
i->ptr, i->size);
}
}
@@ -1240,7 +1250,7 @@ void HeapLeakChecker::IgnoreAllLiveObjectsLocked(const void* self_stack_top) {
RAW_LOG(ERROR, "Thread stacks not found for %d threads. "
"Will likely report false leak positives.", r);
} else {
- RAW_VLOG(2, "Thread stacks appear to be found for all threads");
+ RAW_VLOG(11, "Thread stacks appear to be found for all threads");
}
} else {
RAW_LOG(WARNING, "Not looking for thread stacks; "
@@ -1256,7 +1266,7 @@ void HeapLeakChecker::IgnoreAllLiveObjectsLocked(const void* self_stack_top) {
IgnoreNonThreadLiveObjectsLocked();
}
if (live_objects_total) {
- RAW_VLOG(1, "Ignoring %"PRId64" reachable objects of %"PRId64" bytes",
+ RAW_VLOG(10, "Ignoring %"PRId64" reachable objects of %"PRId64" bytes",
live_objects_total, live_bytes_total);
}
// Free these: we made them here and heap_profile never saw them
@@ -1266,7 +1276,8 @@ void HeapLeakChecker::IgnoreAllLiveObjectsLocked(const void* self_stack_top) {
}
// Alignment at which we should consider pointer positions
-// in IgnoreLiveObjectsLocked. Use 1 if any alignment is ok.
+// in IgnoreLiveObjectsLocked. Will normally use the value of
+// FLAGS_heap_check_pointer_source_alignment.
static size_t pointer_source_alignment = kPointerSourceAlignment;
// Global lock for HeapLeakChecker::DoNoLeaks
// to protect pointer_source_alignment.
@@ -1314,7 +1325,7 @@ static SpinLock alignment_checker_lock(SpinLock::LINKER_INITIALIZED);
live_object_count += 1;
live_byte_count += size;
}
- RAW_VLOG(4, "Looking for heap pointers in %p of %"PRIuS" bytes",
+ RAW_VLOG(13, "Looking for heap pointers in %p of %"PRIuS" bytes",
object, size);
const char* const whole_object = object;
size_t const whole_size = size;
@@ -1351,7 +1362,7 @@ static SpinLock alignment_checker_lock(SpinLock::LINKER_INITIALIZED);
if (can_be_on_heap) {
const void* ptr = reinterpret_cast<const void*>(addr);
// Too expensive (inner loop): manually uncomment when debugging:
- // RAW_VLOG(8, "Trying pointer to %p at %p", ptr, object);
+ // RAW_VLOG(17, "Trying pointer to %p at %p", ptr, object);
size_t object_size;
if (HaveOnHeapLocked(&ptr, &object_size) &&
heap_profile->MarkAsLive(ptr)) {
@@ -1360,15 +1371,15 @@ static SpinLock alignment_checker_lock(SpinLock::LINKER_INITIALIZED);
// a heap object which is in fact leaked.
// I.e. in very rare and probably not repeatable/lasting cases
// we might miss some real heap memory leaks.
- RAW_VLOG(5, "Found pointer to %p of %"PRIuS" bytes at %p "
+ RAW_VLOG(14, "Found pointer to %p of %"PRIuS" bytes at %p "
"inside %p of size %"PRIuS"",
ptr, object_size, object, whole_object, whole_size);
- if (VLOG_IS_ON(6)) {
+ if (VLOG_IS_ON(15)) {
// log call stacks to help debug how come something is not a leak
HeapProfileTable::AllocInfo alloc;
- bool r = heap_profile->FindAllocDetails(ptr, &alloc);
- r = r; // suppress compiler warning in non-debug mode
- RAW_DCHECK(r, ""); // sanity
+ if (!heap_profile->FindAllocDetails(ptr, &alloc)) {
+ RAW_LOG(FATAL, "FindAllocDetails failed on ptr %p", ptr);
+ }
RAW_LOG(INFO, "New live %p object's alloc stack:", ptr);
for (int i = 0; i < alloc.stack_depth; ++i) {
RAW_LOG(INFO, " @ %p", alloc.call_stack[i]);
@@ -1386,7 +1397,7 @@ static SpinLock alignment_checker_lock(SpinLock::LINKER_INITIALIZED);
live_objects_total += live_object_count;
live_bytes_total += live_byte_count;
if (live_object_count) {
- RAW_VLOG(1, "Removed %"PRId64" live heap objects of %"PRId64" bytes: %s%s",
+ RAW_VLOG(10, "Removed %"PRId64" live heap objects of %"PRId64" bytes: %s%s",
live_object_count, live_byte_count, name, name2);
}
}
@@ -1408,7 +1419,7 @@ void HeapLeakChecker::IgnoreObject(const void* ptr) {
if (!HaveOnHeapLocked(&ptr, &object_size)) {
RAW_LOG(ERROR, "No live heap object at %p to ignore", ptr);
} else {
- RAW_VLOG(1, "Going to ignore live object at %p of %"PRIuS" bytes",
+ RAW_VLOG(10, "Going to ignore live object at %p of %"PRIuS" bytes",
ptr, object_size);
if (ignored_objects == NULL) {
ignored_objects = new(Allocator::Allocate(sizeof(IgnoredObjectsMap)))
@@ -1434,7 +1445,7 @@ void HeapLeakChecker::UnIgnoreObject(const void* ptr) {
if (object != ignored_objects->end() && object_size == object->second) {
ignored_objects->erase(object);
found = true;
- RAW_VLOG(1, "Now not going to ignore live object "
+ RAW_VLOG(10, "Now not going to ignore live object "
"at %p of %"PRIuS" bytes", ptr, object_size);
}
}
@@ -1483,7 +1494,7 @@ void HeapLeakChecker::Create(const char *name, bool make_start_snapshot) {
const HeapProfileTable::Stats& t = heap_profile->total();
const size_t start_inuse_bytes = t.alloc_size - t.free_size;
const size_t start_inuse_allocs = t.allocs - t.frees;
- RAW_VLOG(1, "Start check \"%s\" profile: %"PRIuS" bytes "
+ RAW_VLOG(10, "Start check \"%s\" profile: %"PRIuS" bytes "
"in %"PRIuS" objects",
name_, start_inuse_bytes, start_inuse_allocs);
} else {
@@ -1612,7 +1623,7 @@ bool HeapLeakChecker::DoNoLeaks(ShouldSymbolize should_symbolize) {
{
// Heap activity in other threads is paused during this function
// (i.e. until we got all profile difference info).
- SpinLockHolder l(&heap_checker_lock);
+ SpinLockHolder hl(&heap_checker_lock);
if (heap_checker_on == false) {
if (name_ != NULL) { // leak checking enabled when created the checker
RAW_LOG(WARNING, "Heap leak checker got turned off after checker "
@@ -1649,6 +1660,8 @@ bool HeapLeakChecker::DoNoLeaks(ShouldSymbolize should_symbolize) {
// Make the heap profile, other threads are locked out.
HeapProfileTable::Snapshot* base =
reinterpret_cast<HeapProfileTable::Snapshot*>(start_snapshot_);
+ RAW_DCHECK(FLAGS_heap_check_pointer_source_alignment > 0, "");
+ pointer_source_alignment = FLAGS_heap_check_pointer_source_alignment;
IgnoreAllLiveObjectsLocked(&a_local_var);
leaks = heap_profile->NonLiveSnapshot(base);
@@ -1668,23 +1681,28 @@ bool HeapLeakChecker::DoNoLeaks(ShouldSymbolize should_symbolize) {
initial_allocs, Allocator::alloc_count());
}
} else if (FLAGS_heap_check_test_pointer_alignment) {
- // Try with reduced pointer aligment
- pointer_source_alignment = 1;
- IgnoreAllLiveObjectsLocked(&a_local_var);
- HeapProfileTable::Snapshot* leaks_wo_align =
- heap_profile->NonLiveSnapshot(base);
- pointer_source_alignment = kPointerSourceAlignment;
- if (leaks_wo_align->Empty()) {
- RAW_LOG(WARNING, "Found no leaks without pointer alignment: "
- "something might be placing pointers at "
- "unaligned addresses! This needs to be fixed.");
+ if (pointer_source_alignment == 1) {
+ RAW_LOG(WARNING, "--heap_check_test_pointer_alignment has no effect: "
+ "--heap_check_pointer_source_alignment was already set to 1");
} else {
- RAW_LOG(INFO, "Found leaks without pointer alignment as well: "
- "unaligned pointers must not be the cause of leaks.");
- RAW_LOG(INFO, "--heap_check_test_pointer_alignment did not help "
- "to diagnose the leaks.");
+ // Try with reduced pointer aligment
+ pointer_source_alignment = 1;
+ IgnoreAllLiveObjectsLocked(&a_local_var);
+ HeapProfileTable::Snapshot* leaks_wo_align =
+ heap_profile->NonLiveSnapshot(base);
+ pointer_source_alignment = FLAGS_heap_check_pointer_source_alignment;
+ if (leaks_wo_align->Empty()) {
+ RAW_LOG(WARNING, "Found no leaks without pointer alignment: "
+ "something might be placing pointers at "
+ "unaligned addresses! This needs to be fixed.");
+ } else {
+ RAW_LOG(INFO, "Found leaks without pointer alignment as well: "
+ "unaligned pointers must not be the cause of leaks.");
+ RAW_LOG(INFO, "--heap_check_test_pointer_alignment did not help "
+ "to diagnose the leaks.");
+ }
+ heap_profile->ReleaseSnapshot(leaks_wo_align);
}
- heap_profile->ReleaseSnapshot(leaks_wo_align);
}
if (leaks != NULL) {
@@ -1741,7 +1759,7 @@ bool HeapLeakChecker::DoNoLeaks(ShouldSymbolize should_symbolize) {
SuggestPprofCommand(pprof_file);
{
- SpinLockHolder l(&heap_checker_lock);
+ SpinLockHolder hl(&heap_checker_lock);
heap_profile->ReleaseSnapshot(leaks);
Allocator::Free(pprof_file);
}
@@ -1874,6 +1892,7 @@ static bool internal_init_start_has_run = false;
}
// Set all flags
+ RAW_DCHECK(FLAGS_heap_check_pointer_source_alignment > 0, "");
if (FLAGS_heap_check == "minimal") {
// The least we can check.
FLAGS_heap_check_before_constructors = false; // from after main
@@ -2043,7 +2062,7 @@ bool HeapLeakChecker::NoGlobalLeaks() {
// we never delete or change main_heap_checker once it's set:
HeapLeakChecker* main_hc = GlobalChecker();
if (main_hc) {
- RAW_VLOG(1, "Checking for whole-program memory leaks");
+ RAW_VLOG(10, "Checking for whole-program memory leaks");
// The program is over, so it's safe to symbolize addresses (which
// requires a fork) because no serious work is expected to be done
// after this. Symbolizing is really useful -- knowing what
@@ -2165,7 +2184,7 @@ void HeapLeakChecker::BeforeConstructorsLocked() {
RAW_CHECK(heap_profile == NULL, "");
heap_profile = new(Allocator::Allocate(sizeof(HeapProfileTable)))
HeapProfileTable(&Allocator::Allocate, &Allocator::Free);
- RAW_VLOG(1, "Starting tracking the heap");
+ RAW_VLOG(10, "Starting tracking the heap");
heap_checker_on = true;
}
@@ -2329,7 +2348,7 @@ void HeapLeakChecker::DisableChecksFromToLocked(const void* start_address,
value.start_address = AsInt(start_address);
value.max_depth = max_depth;
if (disabled_ranges->insert(make_pair(AsInt(end_address), value)).second) {
- RAW_VLOG(1, "Disabling leak checking in stack traces "
+ RAW_VLOG(10, "Disabling leak checking in stack traces "
"under frame addresses between %p..%p",
start_address, end_address);
} else { // check that this is just a verbatim repetition
@@ -2352,7 +2371,7 @@ inline bool HeapLeakChecker::HaveOnHeapLocked(const void** ptr,
const uintptr_t addr = AsInt(*ptr);
if (heap_profile->FindInsideAlloc(
*ptr, max_heap_object_size, ptr, object_size)) {
- RAW_VLOG(7, "Got pointer into %p at +%"PRIuPTR" offset",
+ RAW_VLOG(16, "Got pointer into %p at +%"PRIuPTR" offset",
*ptr, addr - AsInt(*ptr));
return true;
}
diff --git a/third_party/tcmalloc/chromium/src/heap-profile-table.cc b/third_party/tcmalloc/chromium/src/heap-profile-table.cc
index 66e4f20..ecaf75f 100644
--- a/third_party/tcmalloc/chromium/src/heap-profile-table.cc
+++ b/third_party/tcmalloc/chromium/src/heap-profile-table.cc
@@ -99,7 +99,7 @@ const char HeapProfileTable::kFileExt[] = ".heap";
//----------------------------------------------------------------------
static const int kHashTableSize = 179999; // Size for table_.
-/*static*/ const int HeapProfileTable::kMaxStackDepth = 32;
+/*static*/ const int HeapProfileTable::kMaxStackDepth;
//----------------------------------------------------------------------
diff --git a/third_party/tcmalloc/chromium/src/heap-profile-table.h b/third_party/tcmalloc/chromium/src/heap-profile-table.h
index 5403257..c9bee15 100644
--- a/third_party/tcmalloc/chromium/src/heap-profile-table.h
+++ b/third_party/tcmalloc/chromium/src/heap-profile-table.h
@@ -52,8 +52,8 @@ class HeapProfileTable {
// Extension to be used for heap pforile files.
static const char kFileExt[];
- // Longest stack trace we record. Defined in the .cc file.
- static const int kMaxStackDepth;
+ // Longest stack trace we record.
+ static const int kMaxStackDepth = 32;
// data types ----------------------------
diff --git a/third_party/tcmalloc/chromium/src/heap-profiler.cc b/third_party/tcmalloc/chromium/src/heap-profiler.cc
index a1c643a9..3055f4ce 100644
--- a/third_party/tcmalloc/chromium/src/heap-profiler.cc
+++ b/third_party/tcmalloc/chromium/src/heap-profiler.cc
@@ -524,9 +524,9 @@ extern "C" void HeapProfilerStart(const char* prefix) {
filename_prefix[prefix_length] = '\0';
}
-extern "C" bool IsHeapProfilerRunning() {
+extern "C" int IsHeapProfilerRunning() {
SpinLockHolder l(&heap_lock);
- return is_on;
+ return is_on ? 1 : 0; // return an int, because C code doesn't have bool
}
extern "C" void HeapProfilerStop() {
diff --git a/third_party/tcmalloc/chromium/src/internal_logging.h b/third_party/tcmalloc/chromium/src/internal_logging.h
index 0cb9ba2..731b2d9 100644
--- a/third_party/tcmalloc/chromium/src/internal_logging.h
+++ b/third_party/tcmalloc/chromium/src/internal_logging.h
@@ -119,7 +119,9 @@ do { \
#ifndef NDEBUG
#define ASSERT(cond) CHECK_CONDITION(cond)
#else
-#define ASSERT(cond) ((void) 0)
+#define ASSERT(cond) \
+ do { \
+ } while (0 && (cond))
#endif
// Print into buffer
diff --git a/third_party/tcmalloc/chromium/src/malloc_extension.cc b/third_party/tcmalloc/chromium/src/malloc_extension.cc
index 4ce262f..c2f8b54 100644
--- a/third_party/tcmalloc/chromium/src/malloc_extension.cc
+++ b/third_party/tcmalloc/chromium/src/malloc_extension.cc
@@ -187,7 +187,10 @@ MallocExtension* MallocExtension::instance() {
void MallocExtension::Register(MallocExtension* implementation) {
perftools_pthread_once(&module_init, InitModule);
// When running under valgrind, our custom malloc is replaced with
- // valgrind's one and malloc extensions will not work.
+ // valgrind's one and malloc extensions will not work. (Note:
+ // callers should be responsible for checking that they are the
+ // malloc that is really being run, before calling Register. This
+ // is just here as an extra sanity check.)
if (!RunningOnValgrind()) {
current_instance = implementation;
}
diff --git a/third_party/tcmalloc/chromium/src/malloc_hook.cc b/third_party/tcmalloc/chromium/src/malloc_hook.cc
index 2a7f542..4315b86 100644
--- a/third_party/tcmalloc/chromium/src/malloc_hook.cc
+++ b/third_party/tcmalloc/chromium/src/malloc_hook.cc
@@ -326,8 +326,8 @@ extern "C" int MallocHook_GetCallerStackTrace(void** result, int max_depth,
return 0;
for (int i = 0; i < depth; ++i) { // stack[0] is our immediate caller
if (InHookCaller(stack[i])) {
- RAW_VLOG(4, "Found hooked allocator at %d: %p <- %p",
- i, stack[i], stack[i+1]);
+ RAW_VLOG(10, "Found hooked allocator at %d: %p <- %p",
+ i, stack[i], stack[i+1]);
i += 1; // skip hook caller frame
depth -= i; // correct depth
if (depth > max_depth) depth = max_depth;
diff --git a/third_party/tcmalloc/chromium/src/memory_region_map.cc b/third_party/tcmalloc/chromium/src/memory_region_map.cc
index 05fdc06..f6bed45 100644
--- a/third_party/tcmalloc/chromium/src/memory_region_map.cc
+++ b/third_party/tcmalloc/chromium/src/memory_region_map.cc
@@ -181,7 +181,7 @@ static MemoryRegionMap::RegionSetRep regions_rep;
static bool recursive_insert = false;
void MemoryRegionMap::Init(int max_stack_depth) {
- RAW_VLOG(2, "MemoryRegionMap Init");
+ RAW_VLOG(10, "MemoryRegionMap Init");
RAW_CHECK(max_stack_depth >= 0, "");
// Make sure we don't overflow the memory in region stacks:
RAW_CHECK(max_stack_depth <= kMaxStackDepth,
@@ -192,7 +192,7 @@ void MemoryRegionMap::Init(int max_stack_depth) {
if (client_count_ > 1) {
// not first client: already did initialization-proper
Unlock();
- RAW_VLOG(2, "MemoryRegionMap Init increment done");
+ RAW_VLOG(10, "MemoryRegionMap Init increment done");
return;
}
// Set our hooks and make sure no other hooks existed:
@@ -217,17 +217,17 @@ void MemoryRegionMap::Init(int max_stack_depth) {
// recursive_insert = false; as InsertRegionLocked will also construct
// regions_ on demand for us.
Unlock();
- RAW_VLOG(2, "MemoryRegionMap Init done");
+ RAW_VLOG(10, "MemoryRegionMap Init done");
}
bool MemoryRegionMap::Shutdown() {
- RAW_VLOG(2, "MemoryRegionMap Shutdown");
+ RAW_VLOG(10, "MemoryRegionMap Shutdown");
Lock();
RAW_CHECK(client_count_ > 0, "");
client_count_ -= 1;
if (client_count_ != 0) { // not last client; need not really shutdown
Unlock();
- RAW_VLOG(2, "MemoryRegionMap Shutdown decrement done");
+ RAW_VLOG(10, "MemoryRegionMap Shutdown decrement done");
return true;
}
CheckMallocHooks(); // we assume no other hooks
@@ -244,7 +244,7 @@ bool MemoryRegionMap::Shutdown() {
RAW_LOG(WARNING, "Can't delete LowLevelAlloc arena: it's being used");
}
Unlock();
- RAW_VLOG(2, "MemoryRegionMap Shutdown done");
+ RAW_VLOG(10, "MemoryRegionMap Shutdown done");
return deleted_arena;
}
@@ -336,7 +336,7 @@ bool MemoryRegionMap::FindAndMarkStackRegion(uintptr_t stack_top,
Lock();
const Region* region = DoFindRegionLocked(stack_top);
if (region != NULL) {
- RAW_VLOG(2, "Stack at %p is inside region %p..%p",
+ RAW_VLOG(10, "Stack at %p is inside region %p..%p",
reinterpret_cast<void*>(stack_top),
reinterpret_cast<void*>(region->start_addr),
reinterpret_cast<void*>(region->end_addr));
@@ -361,7 +361,7 @@ MemoryRegionMap::RegionIterator MemoryRegionMap::EndRegionLocked() {
}
inline void MemoryRegionMap::DoInsertRegionLocked(const Region& region) {
- RAW_VLOG(4, "Inserting region %p..%p from %p",
+ RAW_VLOG(12, "Inserting region %p..%p from %p",
reinterpret_cast<void*>(region.start_addr),
reinterpret_cast<void*>(region.end_addr),
reinterpret_cast<void*>(region.caller()));
@@ -385,10 +385,10 @@ inline void MemoryRegionMap::DoInsertRegionLocked(const Region& region) {
// This inserts and allocates permanent storage for region
// and its call stack data: it's safe to do it now:
regions_->insert(region);
- RAW_VLOG(4, "Inserted region %p..%p :",
+ RAW_VLOG(12, "Inserted region %p..%p :",
reinterpret_cast<void*>(region.start_addr),
reinterpret_cast<void*>(region.end_addr));
- if (VLOG_IS_ON(4)) LogAllLocked();
+ if (VLOG_IS_ON(12)) LogAllLocked();
}
// These variables are local to MemoryRegionMap::InsertRegionLocked()
@@ -425,7 +425,7 @@ inline void MemoryRegionMap::InsertRegionLocked(const Region& region) {
// and taken into account when the recursion unwinds.
// Do the insert:
if (recursive_insert) { // recursion: save in saved_regions
- RAW_VLOG(4, "Saving recursive insert of region %p..%p from %p",
+ RAW_VLOG(12, "Saving recursive insert of region %p..%p from %p",
reinterpret_cast<void*>(region.start_addr),
reinterpret_cast<void*>(region.end_addr),
reinterpret_cast<void*>(region.caller()));
@@ -436,7 +436,7 @@ inline void MemoryRegionMap::InsertRegionLocked(const Region& region) {
saved_regions[saved_regions_count++] = region;
} else { // not a recusrive call
if (regions_ == NULL) { // init regions_
- RAW_VLOG(4, "Initializing region set");
+ RAW_VLOG(12, "Initializing region set");
regions_ = regions_rep.region_set();
recursive_insert = true;
new(regions_) RegionSet();
@@ -470,7 +470,7 @@ void MemoryRegionMap::RecordRegionAddition(const void* start, size_t size) {
max_stack_depth_, kStripFrames + 1)
: 0;
region.set_call_stack_depth(depth); // record stack info fully
- RAW_VLOG(2, "New global region %p..%p from %p",
+ RAW_VLOG(10, "New global region %p..%p from %p",
reinterpret_cast<void*>(region.start_addr),
reinterpret_cast<void*>(region.end_addr),
reinterpret_cast<void*>(region.caller()));
@@ -499,7 +499,7 @@ void MemoryRegionMap::RecordRegionRemoval(const void* start, size_t size) {
// An exact match, so it's safe to remove.
--saved_regions_count;
--put_pos;
- RAW_VLOG(2, ("Insta-Removing saved region %p..%p; "
+ RAW_VLOG(10, ("Insta-Removing saved region %p..%p; "
"now have %d saved regions"),
reinterpret_cast<void*>(start_addr),
reinterpret_cast<void*>(end_addr),
@@ -523,7 +523,7 @@ void MemoryRegionMap::RecordRegionRemoval(const void* start, size_t size) {
uintptr_t start_addr = reinterpret_cast<uintptr_t>(start);
uintptr_t end_addr = start_addr + size;
// subtract start_addr, end_addr from all the regions
- RAW_VLOG(2, "Removing global region %p..%p; have %"PRIuS" regions",
+ RAW_VLOG(10, "Removing global region %p..%p; have %"PRIuS" regions",
reinterpret_cast<void*>(start_addr),
reinterpret_cast<void*>(end_addr),
regions_->size());
@@ -533,12 +533,12 @@ void MemoryRegionMap::RecordRegionRemoval(const void* start, size_t size) {
for (RegionSet::iterator region = regions_->lower_bound(sample);
region != regions_->end() && region->start_addr < end_addr;
/*noop*/) {
- RAW_VLOG(5, "Looking at region %p..%p",
+ RAW_VLOG(13, "Looking at region %p..%p",
reinterpret_cast<void*>(region->start_addr),
reinterpret_cast<void*>(region->end_addr));
if (start_addr <= region->start_addr &&
region->end_addr <= end_addr) { // full deletion
- RAW_VLOG(4, "Deleting region %p..%p",
+ RAW_VLOG(12, "Deleting region %p..%p",
reinterpret_cast<void*>(region->start_addr),
reinterpret_cast<void*>(region->end_addr));
RegionSet::iterator d = region;
@@ -547,7 +547,7 @@ void MemoryRegionMap::RecordRegionRemoval(const void* start, size_t size) {
continue;
} else if (region->start_addr < start_addr &&
end_addr < region->end_addr) { // cutting-out split
- RAW_VLOG(4, "Splitting region %p..%p in two",
+ RAW_VLOG(12, "Splitting region %p..%p in two",
reinterpret_cast<void*>(region->start_addr),
reinterpret_cast<void*>(region->end_addr));
// Make another region for the start portion:
@@ -560,13 +560,13 @@ void MemoryRegionMap::RecordRegionRemoval(const void* start, size_t size) {
const_cast<Region&>(*region).set_start_addr(end_addr);
} else if (end_addr > region->start_addr &&
start_addr <= region->start_addr) { // cut from start
- RAW_VLOG(4, "Start-chopping region %p..%p",
+ RAW_VLOG(12, "Start-chopping region %p..%p",
reinterpret_cast<void*>(region->start_addr),
reinterpret_cast<void*>(region->end_addr));
const_cast<Region&>(*region).set_start_addr(end_addr);
} else if (start_addr > region->start_addr &&
start_addr < region->end_addr) { // cut from end
- RAW_VLOG(4, "End-chopping region %p..%p",
+ RAW_VLOG(12, "End-chopping region %p..%p",
reinterpret_cast<void*>(region->start_addr),
reinterpret_cast<void*>(region->end_addr));
// Can't just modify region->end_addr (it's the sorting key):
@@ -582,11 +582,11 @@ void MemoryRegionMap::RecordRegionRemoval(const void* start, size_t size) {
}
++region;
}
- RAW_VLOG(4, "Removed region %p..%p; have %"PRIuS" regions",
+ RAW_VLOG(12, "Removed region %p..%p; have %"PRIuS" regions",
reinterpret_cast<void*>(start_addr),
reinterpret_cast<void*>(end_addr),
regions_->size());
- if (VLOG_IS_ON(4)) LogAllLocked();
+ if (VLOG_IS_ON(12)) LogAllLocked();
Unlock();
}
@@ -596,7 +596,7 @@ void MemoryRegionMap::MmapHook(const void* result,
int fd, off_t offset) {
// TODO(maxim): replace all 0x%"PRIxS" by %p when RAW_VLOG uses a safe
// snprintf reimplementation that does not malloc to pretty-print NULL
- RAW_VLOG(2, "MMap = 0x%"PRIxPTR" of %"PRIuS" at %llu "
+ RAW_VLOG(10, "MMap = 0x%"PRIxPTR" of %"PRIuS" at %llu "
"prot %d flags %d fd %d offs %lld",
reinterpret_cast<uintptr_t>(result), size,
reinterpret_cast<uint64>(start), prot, flags, fd,
@@ -607,7 +607,7 @@ void MemoryRegionMap::MmapHook(const void* result,
}
void MemoryRegionMap::MunmapHook(const void* ptr, size_t size) {
- RAW_VLOG(2, "MUnmap of %p %"PRIuS"", ptr, size);
+ RAW_VLOG(10, "MUnmap of %p %"PRIuS"", ptr, size);
if (size != 0) {
RecordRegionRemoval(ptr, size);
}
@@ -617,7 +617,7 @@ void MemoryRegionMap::MremapHook(const void* result,
const void* old_addr, size_t old_size,
size_t new_size, int flags,
const void* new_addr) {
- RAW_VLOG(2, "MRemap = 0x%"PRIxPTR" of 0x%"PRIxPTR" %"PRIuS" "
+ RAW_VLOG(10, "MRemap = 0x%"PRIxPTR" of 0x%"PRIxPTR" %"PRIuS" "
"to %"PRIuS" flags %d new_addr=0x%"PRIxPTR,
(uintptr_t)result, (uintptr_t)old_addr,
old_size, new_size, flags,
@@ -631,7 +631,7 @@ void MemoryRegionMap::MremapHook(const void* result,
extern "C" void* __sbrk(ptrdiff_t increment); // defined in libc
void MemoryRegionMap::SbrkHook(const void* result, ptrdiff_t increment) {
- RAW_VLOG(2, "Sbrk = 0x%"PRIxPTR" of %"PRIdS"", (uintptr_t)result, increment);
+ RAW_VLOG(10, "Sbrk = 0x%"PRIxPTR" of %"PRIdS"", (uintptr_t)result, increment);
if (result != reinterpret_cast<void*>(-1)) {
if (increment > 0) {
void* new_end = sbrk(0);
diff --git a/third_party/tcmalloc/chromium/src/page_heap.cc b/third_party/tcmalloc/chromium/src/page_heap.cc
index 31130e9..a256b64 100644
--- a/third_party/tcmalloc/chromium/src/page_heap.cc
+++ b/third_party/tcmalloc/chromium/src/page_heap.cc
@@ -61,50 +61,65 @@ PageHeap::PageHeap()
}
}
-Span* PageHeap::New(Length n) {
+// Returns the minimum number of pages necessary to ensure that an
+// allocation of size n can be aligned to the given alignment.
+static Length AlignedAllocationSize(Length n, size_t alignment) {
+ ASSERT(alignment >= kPageSize);
+ return n + tcmalloc::pages(alignment - kPageSize);
+}
+
+Span* PageHeap::New(Length n, size_t sc, size_t align) {
ASSERT(Check());
ASSERT(n > 0);
+ if (align < kPageSize) {
+ align = kPageSize;
+ }
+
+ Length aligned_size = AlignedAllocationSize(n, align);
+
// Find first size >= n that has a non-empty list
- for (Length s = n; s < kMaxPages; s++) {
+ for (Length s = aligned_size; s < kMaxPages; s++) {
Span* ll = &free_[s].normal;
// If we're lucky, ll is non-empty, meaning it has a suitable span.
if (!DLL_IsEmpty(ll)) {
ASSERT(ll->next->location == Span::ON_NORMAL_FREELIST);
- return Carve(ll->next, n);
+ return Carve(ll->next, n, sc, align);
}
// Alternatively, maybe there's a usable returned span.
ll = &free_[s].returned;
if (!DLL_IsEmpty(ll)) {
ASSERT(ll->next->location == Span::ON_RETURNED_FREELIST);
- return Carve(ll->next, n);
+ return Carve(ll->next, n, sc, align);
}
// Still no luck, so keep looking in larger classes.
}
- Span* result = AllocLarge(n);
+ Span* result = AllocLarge(n, sc, align);
if (result != NULL) return result;
// Grow the heap and try again
- if (!GrowHeap(n)) {
+ if (!GrowHeap(aligned_size)) {
ASSERT(stats_.unmapped_bytes+ stats_.committed_bytes==stats_.system_bytes);
ASSERT(Check());
return NULL;
}
- return AllocLarge(n);
+ return AllocLarge(n, sc, align);
}
-Span* PageHeap::AllocLarge(Length n) {
- // find the best span (closest to n in size).
+Span* PageHeap::AllocLarge(Length n, size_t sc, size_t align) {
+ // Find the best span (closest to n in size).
// The following loops implements address-ordered best-fit.
Span *best = NULL;
+ Length aligned_size = AlignedAllocationSize(n, align);
+
// Search through normal list
for (Span* span = large_.normal.next;
span != &large_.normal;
span = span->next) {
- if (span->length >= n) {
+ if (span->length >= aligned_size) {
if ((best == NULL)
|| (span->length < best->length)
|| ((span->length == best->length) && (span->start < best->start))) {
@@ -118,7 +133,7 @@ Span* PageHeap::AllocLarge(Length n) {
for (Span* span = large_.returned.next;
span != &large_.returned;
span = span->next) {
- if (span->length >= n) {
+ if (span->length >= aligned_size) {
if ((best == NULL)
|| (span->length < best->length)
|| ((span->length == best->length) && (span->start < best->start))) {
@@ -128,19 +143,18 @@ Span* PageHeap::AllocLarge(Length n) {
}
}
- return best == NULL ? NULL : Carve(best, n);
+ return best == NULL ? NULL : Carve(best, n, sc, align);
}
Span* PageHeap::Split(Span* span, Length n) {
ASSERT(0 < n);
ASSERT(n < span->length);
- ASSERT(span->location == Span::IN_USE);
- ASSERT(span->sizeclass == 0);
+ ASSERT((span->location != Span::IN_USE) || span->sizeclass == 0);
Event(span, 'T', n);
const int extra = span->length - n;
Span* leftover = NewSpan(span->start + n, extra);
- ASSERT(leftover->location == Span::IN_USE);
+ leftover->location = span->location;
Event(leftover, 'U', extra);
RecordSpan(leftover);
pagemap_.set(span->start + n - 1, span); // Update map from pageid to span
@@ -161,43 +175,71 @@ void PageHeap::DecommitSpan(Span* span) {
stats_.committed_bytes -= span->length << kPageShift;
}
-Span* PageHeap::Carve(Span* span, Length n) {
+Span* PageHeap::Carve(Span* span, Length n, size_t sc, size_t align) {
ASSERT(n > 0);
ASSERT(span->location != Span::IN_USE);
+ ASSERT(align >= kPageSize);
const int old_location = span->location;
+
+ Length align_pages = align >> kPageShift;
RemoveFromFreeList(span);
- span->location = Span::IN_USE;
- Event(span, 'A', n);
+
+ if (span->start & (align_pages - 1)) {
+ Length skip_for_alignment = align_pages - (span->start & (align_pages - 1));
+ Span* aligned = Split(span, skip_for_alignment);
+
+ // The next span of |span| was just splitted -- no need to
+ // coalesce them. The previous span of |span| was not previously coalesced
+ // with |span|, i.e. is NULL or has location other than |old_location|.
+ const PageID p = span->start;
+ const Length n = span->length;
+ Span* prev = GetDescriptor(p-1);
+ ASSERT(prev == NULL ||
+ prev->location == Span::IN_USE ||
+ prev->location != old_location);
+ PrependToFreeList(span); // Skip coalescing - no candidates possible
+ span = aligned;
+ }
const int extra = span->length - n;
ASSERT(extra >= 0);
if (extra > 0) {
- Span* leftover = NewSpan(span->start + n, extra);
- leftover->location = old_location;
- Event(leftover, 'S', extra);
- RecordSpan(leftover);
-
+ Span* leftover = Split(span, n);
// The previous span of |leftover| was just splitted -- no need to
// coalesce them. The next span of |leftover| was not previously coalesced
- // with |span|, i.e. is NULL or has got location other than |old_location|.
+ // with |span|, i.e. is NULL or has location other than |old_location|.
const PageID p = leftover->start;
const Length len = leftover->length;
Span* next = GetDescriptor(p+len);
ASSERT (next == NULL ||
next->location == Span::IN_USE ||
next->location != leftover->location);
-
- PrependToFreeList(leftover); // Skip coalescing - no candidates possible
- span->length = n;
- pagemap_.set(span->start + n - 1, span);
+ PrependToFreeList(leftover);
}
+
+
ASSERT(Check());
if (old_location == Span::ON_RETURNED_FREELIST) {
// We need to recommit this address space.
CommitSpan(span);
}
- ASSERT(span->location == Span::IN_USE);
- ASSERT(span->length == n);
+
+ span->location = Span::IN_USE;
+ span->sizeclass = sc;
+ Event(span, 'A', n);
+
+ // Cache sizeclass info eagerly. Locking is not necessary.
+ // (Instead of being eager, we could just replace any stale info
+ // about this span, but that seems to be no better in practice.)
+ CacheSizeClass(span->start, sc);
+
+ if (sc != kLargeSizeClass) {
+ for (Length i = 1; i < n; i++) {
+ pagemap_.set(span->start + i, span);
+ CacheSizeClass(span->start + i, sc);
+ }
+ }
+
ASSERT(stats_.unmapped_bytes+ stats_.committed_bytes==stats_.system_bytes);
return span;
}
@@ -379,18 +421,6 @@ Length PageHeap::ReleaseAtLeastNPages(Length num_pages) {
return released_pages;
}
-void PageHeap::RegisterSizeClass(Span* span, size_t sc) {
- // Associate span object with all interior pages as well
- ASSERT(span->location == Span::IN_USE);
- ASSERT(GetDescriptor(span->start) == span);
- ASSERT(GetDescriptor(span->start+span->length-1) == span);
- Event(span, 'C', sc);
- span->sizeclass = sc;
- for (Length i = 1; i < span->length-1; i++) {
- pagemap_.set(span->start+i, span);
- }
-}
-
static double MB(uint64_t bytes) {
return bytes / 1048576.0;
}
diff --git a/third_party/tcmalloc/chromium/src/page_heap.h b/third_party/tcmalloc/chromium/src/page_heap.h
index 52acedb..63f21b2 100644
--- a/third_party/tcmalloc/chromium/src/page_heap.h
+++ b/third_party/tcmalloc/chromium/src/page_heap.h
@@ -101,21 +101,49 @@ class PERFTOOLS_DLL_DECL PageHeap {
public:
PageHeap();
- // Allocate a run of "n" pages. Returns zero if out of memory.
- // Caller should not pass "n == 0" -- instead, n should have
- // been rounded up already.
- Span* New(Length n);
+ // Allocate a run of "n" pages. Returns NULL if out of memory.
+ // Caller should not pass "n == 0" -- instead, n should have been
+ // rounded up already. The span will be used for allocating objects
+ // with the specifled sizeclass sc (sc must be zero for large
+ // objects). The first page of the span will be aligned to the value
+ // specified by align, which must be a power of two.
+ Span* New(Length n, size_t sc, size_t align);
// Delete the span "[p, p+n-1]".
// REQUIRES: span was returned by earlier call to New() and
// has not yet been deleted.
void Delete(Span* span);
- // Mark an allocated span as being used for small objects of the
- // specified size-class.
- // REQUIRES: span was returned by an earlier call to New()
- // and has not yet been deleted.
- void RegisterSizeClass(Span* span, size_t sc);
+ // Gets either the size class of addr, if it is a small object, or it's span.
+ // Return:
+ // if addr is invalid:
+ // leave *out_sc and *out_span unchanged and return false;
+ // if addr is valid and has a small size class:
+ // *out_sc = the size class
+ // *out_span = <undefined>
+ // return true
+ // if addr is valid and has a large size class:
+ // *out_sc = kLargeSizeClass
+ // *out_span = the span pointer
+ // return true
+ bool GetSizeClassOrSpan(void* addr, size_t* out_sc, Span** out_span) {
+ const PageID p = reinterpret_cast<uintptr_t>(addr) >> kPageShift;
+ size_t cl = GetSizeClassIfCached(p);
+ Span* span = NULL;
+
+ if (cl != kLargeSizeClass) {
+ ASSERT(cl == GetDescriptor(p)->sizeclass);
+ } else {
+ span = GetDescriptor(p);
+ if (!span) {
+ return false;
+ }
+ cl = span->sizeclass;
+ }
+ *out_span = span;
+ *out_sc = cl;
+ return true;
+ }
// Split an allocated span into two spans: one of length "n" pages
// followed by another span of length "span->length - n" pages.
@@ -123,14 +151,29 @@ class PERFTOOLS_DLL_DECL PageHeap {
// Returns a pointer to the second span.
//
// REQUIRES: "0 < n < span->length"
- // REQUIRES: span->location == IN_USE
- // REQUIRES: span->sizeclass == 0
+ // REQUIRES: a) the span is free or b) sizeclass == 0
Span* Split(Span* span, Length n);
// Return the descriptor for the specified page. Returns NULL if
// this PageID was not allocated previously.
inline Span* GetDescriptor(PageID p) const {
- return reinterpret_cast<Span*>(pagemap_.get(p));
+ Span* ret = reinterpret_cast<Span*>(pagemap_.get(p));
+#ifndef NDEBUG
+ if (ret != NULL && ret->location == Span::IN_USE) {
+ size_t cl = GetSizeClassIfCached(p);
+ // Three cases:
+ // - The object is not cached
+ // - The object is cached correctly
+ // - It is a large object and we're not looking at the first
+ // page. This happens in coalescing.
+ ASSERT(cl == kLargeSizeClass || cl == ret->sizeclass ||
+ (ret->start != p && ret->sizeclass == kLargeSizeClass));
+ // If the object is sampled, it must have be kLargeSizeClass
+ ASSERT(ret->sizeclass == kLargeSizeClass || !ret->sample);
+ }
+#endif
+
+ return ret;
}
// Dump state to stderr
@@ -234,7 +277,7 @@ class PERFTOOLS_DLL_DECL PageHeap {
// length exactly "n" and mark it as non-free so it can be returned
// to the client. After all that, decrease free_pages_ by n and
// return span.
- Span* Carve(Span* span, Length n);
+ Span* Carve(Span* span, Length n, size_t sc, size_t align);
void RecordSpan(Span* span) {
pagemap_.set(span->start, span);
@@ -245,7 +288,7 @@ class PERFTOOLS_DLL_DECL PageHeap {
// Allocate a large span of length == n. If successful, returns a
// span of exactly the specified length. Else, returns NULL.
- Span* AllocLarge(Length n);
+ Span* AllocLarge(Length n, size_t sc, size_t align);
// Coalesce span with neighboring spans if possible, prepend to
// appropriate free list, and adjust stats.
diff --git a/third_party/tcmalloc/chromium/src/page_heap_allocator.h b/third_party/tcmalloc/chromium/src/page_heap_allocator.h
index 20e1ab1..3f75939 100644
--- a/third_party/tcmalloc/chromium/src/page_heap_allocator.h
+++ b/third_party/tcmalloc/chromium/src/page_heap_allocator.h
@@ -44,7 +44,7 @@ class PageHeapAllocator {
// allocated and their constructors might not have run by the time some
// other static variable tries to allocate memory.
void Init() {
- ASSERT(kAlignedSize <= kAllocIncrement);
+ ASSERT(sizeof(T) <= kAllocIncrement);
inuse_ = 0;
free_area_ = NULL;
free_avail_ = 0;
@@ -60,8 +60,9 @@ class PageHeapAllocator {
result = free_list_;
free_list_ = *(reinterpret_cast<void**>(result));
} else {
- if (free_avail_ < kAlignedSize) {
- // Need more room
+ if (free_avail_ < sizeof(T)) {
+ // Need more room. We assume that MetaDataAlloc returns
+ // suitably aligned memory.
free_area_ = reinterpret_cast<char*>(MetaDataAlloc(kAllocIncrement));
if (free_area_ == NULL) {
CRASH("FATAL ERROR: Out of memory trying to allocate internal "
@@ -71,8 +72,8 @@ class PageHeapAllocator {
free_avail_ = kAllocIncrement;
}
result = free_area_;
- free_area_ += kAlignedSize;
- free_avail_ -= kAlignedSize;
+ free_area_ += sizeof(T);
+ free_avail_ -= sizeof(T);
}
inuse_++;
return reinterpret_cast<T*>(result);
@@ -90,10 +91,6 @@ class PageHeapAllocator {
// How much to allocate from system at a time
static const int kAllocIncrement = 128 << 10;
- // Aligned size of T
- static const size_t kAlignedSize
- = (((sizeof(T) + kAlignment - 1) / kAlignment) * kAlignment);
-
// Free area from which to carve new objects
char* free_area_;
size_t free_avail_;
diff --git a/third_party/tcmalloc/chromium/src/pprof b/third_party/tcmalloc/chromium/src/pprof
index fec0c9e..8aff380 100755
--- a/third_party/tcmalloc/chromium/src/pprof
+++ b/third_party/tcmalloc/chromium/src/pprof
@@ -89,11 +89,10 @@ my %obj_tool_map = (
);
my $DOT = "dot"; # leave non-absolute, since it may be in /usr/local
my $GV = "gv";
+my $KCACHEGRIND = "kcachegrind";
my $PS2PDF = "ps2pdf";
# These are used for dynamic profiles
-my $WGET = "wget";
-my $WGET_FLAGS = "--no-http-keep-alive"; # only supported by some wgets
-my $CURL = "curl";
+my $URL_FETCHER = "curl -s";
# These are the web pages that servers need to support for dynamic profiles
my $HEAP_PAGE = "/pprof/heap";
@@ -107,6 +106,12 @@ my $FILTEREDPROFILE_PAGE = "/pprof/filteredprofile(?:\\?.*)?";
my $SYMBOL_PAGE = "/pprof/symbol"; # must support symbol lookup via POST
my $PROGRAM_NAME_PAGE = "/pprof/cmdline";
+# These are the web pages that can be named on the command line.
+# All the alternatives must begin with /.
+my $PROFILES = "($HEAP_PAGE|$PROFILE_PAGE|$PMUPROFILE_PAGE|" .
+ "$GROWTH_PAGE|$CONTENTION_PAGE|$WALL_PAGE|" .
+ "$FILTEREDPROFILE_PAGE)";
+
# default binary name
my $UNKNOWN_BINARY = "(unknown)";
@@ -175,12 +180,14 @@ Output type:
--text Generate text report
--callgrind Generate callgrind format to stdout
--gv Generate Postscript and display
+ --web Generate SVG and display
--list=<regexp> Generate source listing of matching routines
--disasm=<regexp> Generate disassembly of matching routines
--symbols Print demangled symbol names found at given addresses
--dot Generate DOT file to stdout
--ps Generate Postcript to stdout
--pdf Generate PDF to stdout
+ --svg Generate SVG to stdout
--gif Generate GIF to stdout
--raw Generate symbolized pprof data (useful with remote fetch)
@@ -223,6 +230,8 @@ pprof /bin/ls ls.prof
Enters "interactive" mode
pprof --text /bin/ls ls.prof
Outputs one line per procedure
+pprof --web /bin/ls ls.prof
+ Displays annotated call-graph in web browser
pprof --gv /bin/ls ls.prof
Displays annotated call-graph via 'gv'
pprof --gv --focus=Mutex /bin/ls ls.prof
@@ -233,6 +242,9 @@ pprof --list=getdir /bin/ls ls.prof
(Per-line) annotated source listing for getdir()
pprof --disasm=getdir /bin/ls ls.prof
(Per-PC) annotated disassembly for getdir()
+
+pprof http://localhost:1234/
+ Enters "interactive" mode
pprof --text localhost:1234
Outputs one line per procedure for localhost:1234
pprof --raw localhost:1234 > ./local.raw
@@ -292,10 +304,12 @@ sub Init() {
$main::opt_disasm = "";
$main::opt_symbols = 0;
$main::opt_gv = 0;
+ $main::opt_web = 0;
$main::opt_dot = 0;
$main::opt_ps = 0;
$main::opt_pdf = 0;
$main::opt_gif = 0;
+ $main::opt_svg = 0;
$main::opt_raw = 0;
$main::opt_nodecount = 80;
@@ -330,13 +344,16 @@ sub Init() {
# Are we using $SYMBOL_PAGE?
$main::use_symbol_page = 0;
+ # Files returned by TempName.
+ %main::tempnames = ();
+
# Type of profile we are dealing with
# Supported types:
- # cpu
- # heap
- # growth
- # contention
- $main::profile_type = ''; # Empty type means "unknown"
+ # cpu
+ # heap
+ # growth
+ # contention
+ $main::profile_type = ''; # Empty type means "unknown"
GetOptions("help!" => \$main::opt_help,
"version!" => \$main::opt_version,
@@ -355,9 +372,11 @@ sub Init() {
"disasm=s" => \$main::opt_disasm,
"symbols!" => \$main::opt_symbols,
"gv!" => \$main::opt_gv,
+ "web!" => \$main::opt_web,
"dot!" => \$main::opt_dot,
"ps!" => \$main::opt_ps,
"pdf!" => \$main::opt_pdf,
+ "svg!" => \$main::opt_svg,
"gif!" => \$main::opt_gif,
"raw!" => \$main::opt_raw,
"interactive!" => \$main::opt_interactive,
@@ -380,8 +399,8 @@ sub Init() {
"tools=s" => \$main::opt_tools,
"test!" => \$main::opt_test,
"debug!" => \$main::opt_debug,
- # Undocumented flags used only by unittests:
- "test_stride=i" => \$main::opt_test_stride,
+ # Undocumented flags used only by unittests:
+ "test_stride=i" => \$main::opt_test_stride,
) || usage("Invalid option(s)");
# Deal with the standard --help and --version
@@ -433,9 +452,11 @@ sub Init() {
($main::opt_disasm eq '' ? 0 : 1) +
($main::opt_symbols == 0 ? 0 : 1) +
$main::opt_gv +
+ $main::opt_web +
$main::opt_dot +
$main::opt_ps +
$main::opt_pdf +
+ $main::opt_svg +
$main::opt_gif +
$main::opt_raw +
$main::opt_interactive +
@@ -510,20 +531,6 @@ sub Init() {
ConfigureObjTools($main::prog)
}
- # Check what flags our commandline utilities support
- if (open(TFILE, "$WGET $WGET_FLAGS -V 2>&1 |")) {
- my @lines = <TFILE>;
- if (grep(/unrecognized/, @lines) > 0) {
- # grep found 'unrecognized' token from WGET, clear WGET flags
- $WGET_FLAGS = "";
- }
- close(TFILE);
- }
- # TODO(csilvers): check all the other binaries and objtools to see
- # if they are installed and what flags they support, and store that
- # in a data structure here, rather than scattering these tests about.
- # Then, ideally, rewrite code to use wget OR curl OR GET or ...
-
# Break the opt_list_prefix into the prefix_list array
@prefix_list = split (',', $main::opt_lib_prefix);
@@ -634,9 +641,24 @@ sub Main() {
} else {
if (PrintDot($main::prog, $symbols, $profile, $flat, $cumulative, $total)) {
if ($main::opt_gv) {
- RunGV(PsTempName($main::next_tmpfile), "");
+ RunGV(TempName($main::next_tmpfile, "ps"), "");
+ } elsif ($main::opt_web) {
+ my $tmp = TempName($main::next_tmpfile, "svg");
+ RunWeb($tmp);
+ # The command we run might hand the file name off
+ # to an already running browser instance and then exit.
+ # Normally, we'd remove $tmp on exit (right now),
+ # but fork a child to remove $tmp a little later, so that the
+ # browser has time to load it first.
+ delete $main::tempnames{$tmp};
+ if (fork() == 0) {
+ sleep 5;
+ unlink($tmp);
+ exit(0);
+ }
}
} else {
+ cleanup();
exit(1);
}
}
@@ -667,7 +689,7 @@ sub ReadlineMightFail {
sub RunGV {
my $fname = shift;
- my $bg = shift; # "" or " &" if we should run in background
+ my $bg = shift; # "" or " &" if we should run in background
if (!system("$GV --version >/dev/null 2>&1")) {
# Options using double dash are supported by this gv version.
# Also, turn on noantialias to better handle bug in gv for
@@ -682,6 +704,41 @@ sub RunGV {
}
}
+sub RunWeb {
+ my $fname = shift;
+ print STDERR "Loading web page file:///$fname\n";
+
+ if (`uname` =~ /Darwin/) {
+ # OS X: open will use standard preference for SVG files.
+ system("/usr/bin/open", $fname);
+ return;
+ }
+
+ # Some kind of Unix; try generic symlinks, then specific browsers.
+ # (Stop once we find one.)
+ # Works best if the browser is already running.
+ my @alt = (
+ "/etc/alternatives/gnome-www-browser",
+ "/etc/alternatives/x-www-browser",
+ "google-chrome",
+ "firefox",
+ );
+ foreach my $b (@alt) {
+ if (system($b, $fname) == 0) {
+ return;
+ }
+ }
+
+ print STDERR "Could not load web browser.\n";
+}
+
+sub RunKcachegrind {
+ my $fname = shift;
+ my $bg = shift; # "" or " &" if we should run in background
+ print STDERR "Starting '$KCACHEGRIND " . $fname . $bg . "'\n";
+ system("$KCACHEGRIND " . $fname . $bg);
+}
+
##### Interactive helper routines #####
@@ -689,10 +746,11 @@ sub InteractiveMode {
$| = 1; # Make output unbuffered for interactive mode
my ($orig_profile, $symbols, $libs, $total) = @_;
- print "Welcome to pprof! For help, type 'help'.\n";
+ print STDERR "Welcome to pprof! For help, type 'help'.\n";
- # Use ReadLine if it's installed.
- if ( !ReadlineMightFail() &&
+ # Use ReadLine if it's installed and input comes from a console.
+ if ( -t STDIN &&
+ !ReadlineMightFail() &&
defined(eval {require Term::ReadLine}) ) {
my $term = new Term::ReadLine 'pprof';
while ( defined ($_ = $term->readline('(pprof) '))) {
@@ -703,7 +761,7 @@ sub InteractiveMode {
}
} else { # don't have readline
while (1) {
- print "(pprof) ";
+ print STDERR "(pprof) ";
$_ = <STDIN>;
last if ! defined $_ ;
s/\r//g; # turn windows-looking lines into unix-looking lines
@@ -727,13 +785,13 @@ sub InteractiveCommand {
my($orig_profile, $symbols, $libs, $total, $command) = @_;
$_ = $command; # just to make future m//'s easier
if (!defined($_)) {
- print "\n";
+ print STDERR "\n";
return 0;
}
- if (m/^ *quit/) {
+ if (m/^\s*quit/) {
return 0;
}
- if (m/^ *help/) {
+ if (m/^\s*help/) {
InteractiveHelpMessage();
return 1;
}
@@ -745,7 +803,7 @@ sub InteractiveCommand {
$main::opt_gv = 0;
$main::opt_cum = 0;
- if (m/^ *(text|top)(\d*) *(.*)/) {
+ if (m/^\s*(text|top)(\d*)\s*(.*)/) {
$main::opt_text = 1;
my $line_limit = ($2 ne "") ? int($2) : 10;
@@ -764,7 +822,24 @@ sub InteractiveCommand {
PrintText($symbols, $flat, $cumulative, $total, $line_limit);
return 1;
}
- if (m/^ *list *(.+)/) {
+ if (m/^\s*callgrind\s*([^ \n]*)/) {
+ $main::opt_callgrind = 1;
+
+ # Get derived profiles
+ my $calls = ExtractCalls($symbols, $orig_profile);
+ my $filename = $1;
+ if ( $1 eq '' ) {
+ $filename = TempName($main::next_tmpfile, "callgrind");
+ }
+ PrintCallgrind($calls, $filename);
+ if ( $1 eq '' ) {
+ RunKcachegrind($filename, " & ");
+ $main::next_tmpfile++;
+ }
+
+ return 1;
+ }
+ if (m/^\s*list\s*(.+)/) {
$main::opt_list = 1;
my $routine;
@@ -781,7 +856,7 @@ sub InteractiveCommand {
PrintListing($libs, $flat, $cumulative, $routine);
return 1;
}
- if (m/^ *disasm *(.+)/) {
+ if (m/^\s*disasm\s*(.+)/) {
$main::opt_disasm = 1;
my $routine;
@@ -799,12 +874,18 @@ sub InteractiveCommand {
PrintDisassembly($libs, $flat, $cumulative, $routine, $total);
return 1;
}
- if (m/^ *gv *(.*)/) {
- $main::opt_gv = 1;
+ if (m/^\s*(gv|web)\s*(.*)/) {
+ $main::opt_gv = 0;
+ $main::opt_web = 0;
+ if ($1 eq "gv") {
+ $main::opt_gv = 1;
+ } elsif ($1 eq "web") {
+ $main::opt_web = 1;
+ }
my $focus;
my $ignore;
- ($focus, $ignore) = ParseInteractiveArgs($1);
+ ($focus, $ignore) = ParseInteractiveArgs($2);
# Process current profile to account for various settings
my $profile = ProcessProfile($orig_profile, $symbols, $focus, $ignore);
@@ -815,11 +896,19 @@ sub InteractiveCommand {
my $cumulative = CumulativeProfile($reduced);
if (PrintDot($main::prog, $symbols, $profile, $flat, $cumulative, $total)) {
- RunGV(PsTempName($main::next_tmpfile), " &");
+ if ($main::opt_gv) {
+ RunGV(TempName($main::next_tmpfile, "ps"), " &");
+ } elsif ($main::opt_web) {
+ RunWeb(TempName($main::next_tmpfile, "svg"));
+ }
$main::next_tmpfile++;
}
return 1;
}
+ if (m/^\s*$/) {
+ return 1;
+ }
+ print STDERR "Unknown command: try 'help'.\n";
return 1;
}
@@ -856,7 +945,7 @@ sub ProcessProfile {
}
sub InteractiveHelpMessage {
- print <<ENDOFHELP;
+ print STDERR <<ENDOFHELP;
Interactive pprof mode
Commands:
@@ -868,6 +957,14 @@ Commands:
the "focus" regular expression matches a routine name on the stack
trace.
+ web
+ web [focus] [-ignore1] [-ignore2]
+ Like GV, but displays profile in your web browser instead of using
+ Ghostview. Works best if your web browser is already running.
+ To change the browser that gets used:
+ On Linux, set the /etc/alternatives/gnome-www-browser symlink.
+ On OS X, change the Finder association for SVG files.
+
list [routine_regexp] [-ignore1] [-ignore2]
Show source listing of routines whose names match "routine_regexp"
@@ -882,6 +979,10 @@ Commands:
Show disassembly of routines whose names match "routine_regexp",
annotated with sample counts.
+ callgrind
+ callgrind [filename]
+ Generates callgrind file. If no filename is given, kcachegrind is called.
+
help - This listing
quit or ^D - End pprof
@@ -913,16 +1014,19 @@ sub ParseInteractiveArgs {
}
}
if ($ignore ne "") {
- print "Ignoring samples in call stacks that match '$ignore'\n";
+ print STDERR "Ignoring samples in call stacks that match '$ignore'\n";
}
return ($focus, $ignore);
}
##### Output code #####
-sub PsTempName {
+sub TempName {
my $fnum = shift;
- return "$main::tmpfile_ps" . "." . "$fnum" . ".ps";
+ my $ext = shift;
+ my $file = "$main::tmpfile_ps.$fnum.$ext";
+ $main::tempnames{$file} = 1;
+ return $file;
}
# Print profile data in packed binary format (64-bit) to standard out
@@ -1045,7 +1149,15 @@ sub PrintText {
# Print the call graph in a way that's suiteable for callgrind.
sub PrintCallgrind {
my $calls = shift;
- printf("events: Hits\n\n");
+ my $filename;
+ if ($main::opt_interactive) {
+ $filename = shift;
+ print STDERR "Writing callgrind file to '$filename'.\n"
+ } else {
+ $filename = "&STDOUT";
+ }
+ open(CG, ">".$filename );
+ printf CG ("events: Hits\n\n");
foreach my $call ( map { $_->[0] }
sort { $a->[1] cmp $b ->[1] ||
$a->[2] <=> $b->[2] }
@@ -1057,13 +1169,15 @@ sub PrintCallgrind {
my ( $caller_file, $caller_line, $caller_function,
$callee_file, $callee_line, $callee_function ) =
( $1, $2, $3, $5, $6, $7 );
- printf("fl=$caller_file\nfn=$caller_function\n");
+
+
+ printf CG ("fl=$caller_file\nfn=$caller_function\n");
if (defined $6) {
- printf("cfl=$callee_file\n");
- printf("cfn=$callee_function\n");
- printf("calls=$count $callee_line\n");
+ printf CG ("cfl=$callee_file\n");
+ printf CG ("cfn=$callee_function\n");
+ printf CG ("calls=$count $callee_line\n");
}
- printf("$caller_line $count\n\n");
+ printf CG ("$caller_line $count\n\n");
}
}
@@ -1385,7 +1499,7 @@ sub SourceLine {
return undef;
}
my $lines = [];
- push(@{$lines}, ""); # So we can use 1-based line numbers as indices
+ push(@{$lines}, ""); # So we can use 1-based line numbers as indices
while (<FILE>) {
push(@{$lines}, $_);
}
@@ -1477,8 +1591,8 @@ sub PrintDisassembledFunction {
# Find run of instructions for this range of source lines
my $first_inst = $i;
while (($i <= $#instructions) &&
- ($instructions[$i]->[2] >= $first_line) &&
- ($instructions[$i]->[2] <= $last_line)) {
+ ($instructions[$i]->[2] >= $first_line) &&
+ ($instructions[$i]->[2] <= $last_line)) {
$e = $instructions[$i];
$flat_sum{$e->[2]} += $flat_count[$i];
$cum_sum{$e->[2]} += $cum_count[$i];
@@ -1490,16 +1604,16 @@ sub PrintDisassembledFunction {
for (my $l = $first_line; $l <= $last_line; $l++) {
my $line = SourceLine($current_file, $l);
if (!defined($line)) {
- $line = "?\n";
+ $line = "?\n";
next;
} else {
$line =~ s/^\s+//;
}
printf("%6s %6s %5d: %s",
- UnparseAlt($flat_sum{$l}),
- UnparseAlt($cum_sum{$l}),
- $l,
- $line);
+ UnparseAlt($flat_sum{$l}),
+ UnparseAlt($cum_sum{$l}),
+ $l,
+ $line);
}
# Print disassembly
@@ -1516,9 +1630,9 @@ sub PrintDisassembledFunction {
while ($d =~ s/(\w+)<[^<>]*>/$1/g) { } # Remove template arguments
printf("%6s %6s %8s: %6s\n",
- UnparseAlt($flat_count[$x]),
- UnparseAlt($cum_count[$x]),
- $address,
+ UnparseAlt($flat_count[$x]),
+ UnparseAlt($cum_count[$x]),
+ $address,
$d);
}
}
@@ -1542,7 +1656,7 @@ sub PrintDot {
# Find nodes to include
my @list = (sort { abs(GetEntry($cumulative, $b)) <=>
abs(GetEntry($cumulative, $a))
- || $a cmp $b }
+ || $a cmp $b }
keys(%{$cumulative}));
my $last = $nodecount - 1;
if ($last > $#list) {
@@ -1554,7 +1668,6 @@ sub PrintDot {
}
if ($last < 0) {
print STDERR "No nodes to print\n";
- cleanup();
return 0;
}
@@ -1567,11 +1680,14 @@ sub PrintDot {
# Open DOT output file
my $output;
if ($main::opt_gv) {
- $output = "| $DOT -Tps2 >" . PsTempName($main::next_tmpfile);
+ $output = "| $DOT -Tps2 >" . TempName($main::next_tmpfile, "ps");
} elsif ($main::opt_ps) {
$output = "| $DOT -Tps2";
} elsif ($main::opt_pdf) {
$output = "| $DOT -Tps2 | $PS2PDF - -";
+ } elsif ($main::opt_web || $main::opt_svg) {
+ # We need to post-process the SVG, so write to a temporary file always.
+ $output = "| $DOT -Tsvg >" . TempName($main::next_tmpfile, "svg");
} elsif ($main::opt_gif) {
$output = "| $DOT -Tgif";
} else {
@@ -1682,7 +1798,10 @@ sub PrintDot {
my $fraction = abs($local_total ? (3 * ($n / $local_total)) : 0);
if ($fraction > 1) { $fraction = 1; }
my $w = $fraction * 2;
- #if ($w < 1) { $w = 1; }
+ if ($w < 1 && ($main::opt_web || $main::opt_svg)) {
+ # SVG output treats line widths < 1 poorly.
+ $w = 1;
+ }
# Dot sometimes segfaults if given edge weights that are too large, so
# we cap the weights at a large value
@@ -1706,11 +1825,312 @@ sub PrintDot {
}
print DOT ("}\n");
-
close(DOT);
+
+ if ($main::opt_web || $main::opt_svg) {
+ # Rewrite SVG to be more usable inside web browser.
+ RewriteSvg(TempName($main::next_tmpfile, "svg"));
+ }
+
return 1;
}
+sub RewriteSvg {
+ my $svgfile = shift;
+
+ open(SVG, $svgfile) || die "open temp svg: $!";
+ my @svg = <SVG>;
+ close(SVG);
+ unlink $svgfile;
+ my $svg = join('', @svg);
+
+ # Dot's SVG output is
+ #
+ # <svg width="___" height="___"
+ # viewBox="___" xmlns=...>
+ # <g id="graph0" transform="...">
+ # ...
+ # </g>
+ # </svg>
+ #
+ # Change it to
+ #
+ # <svg width="100%" height="100%"
+ # xmlns=...>
+ # $svg_javascript
+ # <g id="viewport" transform="translate(0,0)">
+ # <g id="graph0" transform="...">
+ # ...
+ # </g>
+ # </g>
+ # </svg>
+
+ # Fix width, height; drop viewBox.
+ $svg =~ s/(?s)<svg width="[^"]+" height="[^"]+"(.*?)viewBox="[^"]+"/<svg width="100%" height="100%"$1/;
+
+ # Insert script, viewport <g> above first <g>
+ my $svg_javascript = SvgJavascript();
+ my $viewport = "<g id=\"viewport\" transform=\"translate(0,0)\">\n";
+ $svg =~ s/<g id="graph\d"/$svg_javascript$viewport$&/;
+
+ # Insert final </g> above </svg>.
+ $svg =~ s/(.*)(<\/svg>)/$1<\/g>$2/;
+ $svg =~ s/<g id="graph\d"(.*?)/<g id="viewport"$1/;
+
+ if ($main::opt_svg) {
+ # --svg: write to standard output.
+ print $svg;
+ } else {
+ # Write back to temporary file.
+ open(SVG, ">$svgfile") || die "open $svgfile: $!";
+ print SVG $svg;
+ close(SVG);
+ }
+}
+
+sub SvgJavascript {
+ return <<'EOF';
+<script type="text/ecmascript"><![CDATA[
+// SVGPan
+// http://www.cyberz.org/blog/2009/12/08/svgpan-a-javascript-svg-panzoomdrag-library/
+// Local modification: if(true || ...) below to force panning, never moving.
+
+/**
+ * SVGPan library 1.2
+ * ====================
+ *
+ * Given an unique existing element with id "viewport", including the
+ * the library into any SVG adds the following capabilities:
+ *
+ * - Mouse panning
+ * - Mouse zooming (using the wheel)
+ * - Object dargging
+ *
+ * Known issues:
+ *
+ * - Zooming (while panning) on Safari has still some issues
+ *
+ * Releases:
+ *
+ * 1.2, Sat Mar 20 08:42:50 GMT 2010, Zeng Xiaohui
+ * Fixed a bug with browser mouse handler interaction
+ *
+ * 1.1, Wed Feb 3 17:39:33 GMT 2010, Zeng Xiaohui
+ * Updated the zoom code to support the mouse wheel on Safari/Chrome
+ *
+ * 1.0, Andrea Leofreddi
+ * First release
+ *
+ * This code is licensed under the following BSD license:
+ *
+ * Copyright 2009-2010 Andrea Leofreddi <a.leofreddi@itcharm.com>. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without modification, are
+ * permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this list of
+ * conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice, this list
+ * of conditions and the following disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Andrea Leofreddi ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL Andrea Leofreddi OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * The views and conclusions contained in the software and documentation are those of the
+ * authors and should not be interpreted as representing official policies, either expressed
+ * or implied, of Andrea Leofreddi.
+ */
+
+var root = document.documentElement;
+
+var state = 'none', stateTarget, stateOrigin, stateTf;
+
+setupHandlers(root);
+
+/**
+ * Register handlers
+ */
+function setupHandlers(root){
+ setAttributes(root, {
+ "onmouseup" : "add(evt)",
+ "onmousedown" : "handleMouseDown(evt)",
+ "onmousemove" : "handleMouseMove(evt)",
+ "onmouseup" : "handleMouseUp(evt)",
+ //"onmouseout" : "handleMouseUp(evt)", // Decomment this to stop the pan functionality when dragging out of the SVG element
+ });
+
+ if(navigator.userAgent.toLowerCase().indexOf('webkit') >= 0)
+ window.addEventListener('mousewheel', handleMouseWheel, false); // Chrome/Safari
+ else
+ window.addEventListener('DOMMouseScroll', handleMouseWheel, false); // Others
+
+ var g = svgDoc.getElementById("svg");
+ g.width = "100%";
+ g.height = "100%";
+}
+
+/**
+ * Instance an SVGPoint object with given event coordinates.
+ */
+function getEventPoint(evt) {
+ var p = root.createSVGPoint();
+
+ p.x = evt.clientX;
+ p.y = evt.clientY;
+
+ return p;
+}
+
+/**
+ * Sets the current transform matrix of an element.
+ */
+function setCTM(element, matrix) {
+ var s = "matrix(" + matrix.a + "," + matrix.b + "," + matrix.c + "," + matrix.d + "," + matrix.e + "," + matrix.f + ")";
+
+ element.setAttribute("transform", s);
+}
+
+/**
+ * Dumps a matrix to a string (useful for debug).
+ */
+function dumpMatrix(matrix) {
+ var s = "[ " + matrix.a + ", " + matrix.c + ", " + matrix.e + "\n " + matrix.b + ", " + matrix.d + ", " + matrix.f + "\n 0, 0, 1 ]";
+
+ return s;
+}
+
+/**
+ * Sets attributes of an element.
+ */
+function setAttributes(element, attributes){
+ for (i in attributes)
+ element.setAttributeNS(null, i, attributes[i]);
+}
+
+/**
+ * Handle mouse move event.
+ */
+function handleMouseWheel(evt) {
+ if(evt.preventDefault)
+ evt.preventDefault();
+
+ evt.returnValue = false;
+
+ var svgDoc = evt.target.ownerDocument;
+
+ var delta;
+
+ if(evt.wheelDelta)
+ delta = evt.wheelDelta / 3600; // Chrome/Safari
+ else
+ delta = evt.detail / -90; // Mozilla
+
+ var z = 1 + delta; // Zoom factor: 0.9/1.1
+
+ var g = svgDoc.getElementById("viewport");
+
+ var p = getEventPoint(evt);
+
+ p = p.matrixTransform(g.getCTM().inverse());
+
+ // Compute new scale matrix in current mouse position
+ var k = root.createSVGMatrix().translate(p.x, p.y).scale(z).translate(-p.x, -p.y);
+
+ setCTM(g, g.getCTM().multiply(k));
+
+ stateTf = stateTf.multiply(k.inverse());
+}
+
+/**
+ * Handle mouse move event.
+ */
+function handleMouseMove(evt) {
+ if(evt.preventDefault)
+ evt.preventDefault();
+
+ evt.returnValue = false;
+
+ var svgDoc = evt.target.ownerDocument;
+
+ var g = svgDoc.getElementById("viewport");
+
+ if(state == 'pan') {
+ // Pan mode
+ var p = getEventPoint(evt).matrixTransform(stateTf);
+
+ setCTM(g, stateTf.inverse().translate(p.x - stateOrigin.x, p.y - stateOrigin.y));
+ } else if(state == 'move') {
+ // Move mode
+ var p = getEventPoint(evt).matrixTransform(g.getCTM().inverse());
+
+ setCTM(stateTarget, root.createSVGMatrix().translate(p.x - stateOrigin.x, p.y - stateOrigin.y).multiply(g.getCTM().inverse()).multiply(stateTarget.getCTM()));
+
+ stateOrigin = p;
+ }
+}
+
+/**
+ * Handle click event.
+ */
+function handleMouseDown(evt) {
+ if(evt.preventDefault)
+ evt.preventDefault();
+
+ evt.returnValue = false;
+
+ var svgDoc = evt.target.ownerDocument;
+
+ var g = svgDoc.getElementById("viewport");
+
+ if(true || evt.target.tagName == "svg") {
+ // Pan mode
+ state = 'pan';
+
+ stateTf = g.getCTM().inverse();
+
+ stateOrigin = getEventPoint(evt).matrixTransform(stateTf);
+ } else {
+ // Move mode
+ state = 'move';
+
+ stateTarget = evt.target;
+
+ stateTf = g.getCTM().inverse();
+
+ stateOrigin = getEventPoint(evt).matrixTransform(stateTf);
+ }
+}
+
+/**
+ * Handle mouse button release event.
+ */
+function handleMouseUp(evt) {
+ if(evt.preventDefault)
+ evt.preventDefault();
+
+ evt.returnValue = false;
+
+ var svgDoc = evt.target.ownerDocument;
+
+ if(state == 'pan' || state == 'move') {
+ // Quit pan mode
+ state = '';
+ }
+}
+
+]]></script>
+EOF
+}
+
# Translate a stack of addresses into a stack of symbols
sub TranslateStack {
my $symbols = shift;
@@ -1806,7 +2226,7 @@ sub Unparse {
}
}
} elsif ($main::profile_type eq 'contention' && !$main::opt_contentions) {
- return sprintf("%.3f", $num / 1e9); # Convert nanoseconds to seconds
+ return sprintf("%.3f", $num / 1e9); # Convert nanoseconds to seconds
} else {
return sprintf("%d", $num);
}
@@ -1947,42 +2367,42 @@ sub RemoveUninterestingFrames {
'malloc',
'free',
'memalign',
- 'posix_memalign',
+ 'posix_memalign',
'pvalloc',
'valloc',
'realloc',
- 'tc_calloc',
+ 'tc_calloc',
'tc_cfree',
'tc_malloc',
'tc_free',
'tc_memalign',
- 'tc_posix_memalign',
+ 'tc_posix_memalign',
'tc_pvalloc',
'tc_valloc',
'tc_realloc',
- 'tc_new',
- 'tc_delete',
- 'tc_newarray',
- 'tc_deletearray',
- 'tc_new_nothrow',
- 'tc_newarray_nothrow',
- 'do_malloc',
+ 'tc_new',
+ 'tc_delete',
+ 'tc_newarray',
+ 'tc_deletearray',
+ 'tc_new_nothrow',
+ 'tc_newarray_nothrow',
+ 'do_malloc',
'::do_malloc', # new name -- got moved to an unnamed ns
'::do_malloc_or_cpp_alloc',
'DoSampledAllocation',
- 'simple_alloc::allocate',
- '__malloc_alloc_template::allocate',
+ 'simple_alloc::allocate',
+ '__malloc_alloc_template::allocate',
'__builtin_delete',
'__builtin_new',
'__builtin_vec_delete',
'__builtin_vec_new',
'operator new',
'operator new[]',
- # These mark the beginning/end of our custom sections
- '__start_google_malloc',
- '__stop_google_malloc',
- '__start_malloc_hook',
- '__stop_malloc_hook') {
+ # These mark the beginning/end of our custom sections
+ '__start_google_malloc',
+ '__stop_google_malloc',
+ '__start_malloc_hook',
+ '__stop_malloc_hook') {
$skip{$name} = 1;
$skip{"_" . $name} = 1; # Mach (OS X) adds a _ prefix to everything
}
@@ -1999,11 +2419,11 @@ sub RemoveUninterestingFrames {
# TODO(dpeng): this should not be necessary; it's taken
# care of by the general 2nd-pc mechanism below.
foreach my $name ('ProfileData::Add', # historical
- 'ProfileData::prof_handler', # historical
- 'CpuProfiler::prof_handler',
+ 'ProfileData::prof_handler', # historical
+ 'CpuProfiler::prof_handler',
'__FRAME_END__',
- '__pthread_sighandler',
- '__restore') {
+ '__pthread_sighandler',
+ '__restore') {
$skip{$name} = 1;
}
} else {
@@ -2042,10 +2462,10 @@ sub RemoveUninterestingFrames {
my @path = ();
foreach my $a (@addrs) {
if (exists($symbols->{$a})) {
- my $func = $symbols->{$a}->[0];
- if ($skip{$func} || ($func =~ m/$skip_regexp/)) {
- next;
- }
+ my $func = $symbols->{$a}->[0];
+ if ($skip{$func} || ($func =~ m/$skip_regexp/)) {
+ next;
+ }
}
push(@path, $a);
}
@@ -2070,8 +2490,8 @@ sub ReduceProfile {
# To avoid double-counting due to recursion, skip a stack-trace
# entry if it has already been seen
if (!$seen{$e}) {
- $seen{$e} = 1;
- push(@path, $e);
+ $seen{$e} = 1;
+ push(@path, $e);
}
}
my $reduced_path = join("\n", @path);
@@ -2265,28 +2685,11 @@ sub AddEntries {
AddEntry($profile, (join "\n", @k), $count);
}
-sub IsSymbolizedProfileFile {
- my $file_name = shift;
-
- if (!(-e $file_name) || !(-r $file_name)) {
- return 0;
- }
-
- $SYMBOL_PAGE =~ m,[^/]+$,; # matches everything after the last slash
- my $symbol_marker = $&;
- # Check if the file contains a symbol-section marker.
- open(TFILE, "<$file_name");
- my @lines = <TFILE>;
- my $result = grep(/^--- *$symbol_marker/, @lines);
- close(TFILE);
- return $result > 0;
-}
-
##### Code to profile a server dynamically #####
sub CheckSymbolPage {
my $url = SymbolPageURL();
- open(SYMBOL, "$WGET $WGET_FLAGS -qO- '$url' |");
+ open(SYMBOL, "$URL_FETCHER '$url' |");
my $line = <SYMBOL>;
$line =~ s/\r//g; # turn windows-looking lines into unix-looking lines
close(SYMBOL);
@@ -2305,33 +2708,45 @@ sub CheckSymbolPage {
sub IsProfileURL {
my $profile_name = shift;
- my ($host, $port, $path) = ParseProfileURL($profile_name);
- return defined($host) and defined($port) and defined($path);
+ if (-f $profile_name) {
+ printf STDERR "Using local file $profile_name.\n";
+ return 0;
+ }
+ return 1;
}
sub ParseProfileURL {
my $profile_name = shift;
- if (defined($profile_name) &&
- $profile_name =~ m,^(http://|)([^/:]+):(\d+)(|\@\d+)(|/|.*($PROFILE_PAGE|$PMUPROFILE_PAGE|$HEAP_PAGE|$GROWTH_PAGE|$CONTENTION_PAGE|$WALL_PAGE|$FILTEREDPROFILE_PAGE))$,o) {
- # $6 is $PROFILE_PAGE/$HEAP_PAGE/etc. $5 is *everything* after
- # the hostname, as long as that everything is the empty string,
- # a slash, or something ending in $PROFILE_PAGE/$HEAP_PAGE/etc.
- # So "$6 || $5" is $PROFILE_PAGE/etc if there, or else it's "/" or "".
- return ($2, $3, $6 || $5);
- }
- return ();
+
+ if (!defined($profile_name) || $profile_name eq "") {
+ return ();
+ }
+
+ # Split profile URL - matches all non-empty strings, so no test.
+ $profile_name =~ m,^(https?://)?([^/]+)(.*?)(/|$PROFILES)?$,;
+
+ my $proto = $1 || "http://";
+ my $hostport = $2;
+ my $prefix = $3;
+ my $profile = $4 || "/";
+
+ my $host = $hostport;
+ $host =~ s/:.*//;
+
+ my $baseurl = "$proto$hostport$prefix";
+ return ($host, $baseurl, $profile);
}
# We fetch symbols from the first profile argument.
sub SymbolPageURL {
- my ($host, $port, $path) = ParseProfileURL($main::pfile_args[0]);
- return "http://$host:$port$SYMBOL_PAGE";
+ my ($host, $baseURL, $path) = ParseProfileURL($main::pfile_args[0]);
+ return "$baseURL$SYMBOL_PAGE";
}
sub FetchProgramName() {
- my ($host, $port, $path) = ParseProfileURL($main::pfile_args[0]);
- my $url = "http://$host:$port$PROGRAM_NAME_PAGE";
- my $command_line = "$WGET $WGET_FLAGS -qO- '$url'";
+ my ($host, $baseURL, $path) = ParseProfileURL($main::pfile_args[0]);
+ my $url = "$baseURL$PROGRAM_NAME_PAGE";
+ my $command_line = "$URL_FETCHER '$url'";
open(CMDLINE, "$command_line |") or error($command_line);
my $cmdline = <CMDLINE>;
$cmdline =~ s/\r//g; # turn windows-looking lines into unix-looking lines
@@ -2348,7 +2763,7 @@ sub FetchProgramName() {
# curl. Redirection happens on borg hosts.
sub ResolveRedirectionForCurl {
my $url = shift;
- my $command_line = "$CURL -s --head '$url'";
+ my $command_line = "$URL_FETCHER --head '$url'";
open(CMDLINE, "$command_line |") or error($command_line);
while (<CMDLINE>) {
s/\r//g; # turn windows-looking lines into unix-looking lines
@@ -2360,6 +2775,20 @@ sub ResolveRedirectionForCurl {
return $url;
}
+# Add a timeout flat to URL_FETCHER
+sub AddFetchTimeout {
+ my $fetcher = shift;
+ my $timeout = shift;
+ if (defined($timeout)) {
+ if ($fetcher =~ m/\bcurl -s/) {
+ $fetcher .= sprintf(" --max-time %d", $timeout);
+ } elsif ($fetcher =~ m/\brpcget\b/) {
+ $fetcher .= sprintf(" --deadline=%d", $timeout);
+ }
+ }
+ return $fetcher;
+}
+
# Reads a symbol map from the file handle name given as $1, returning
# the resulting symbol map. Also processes variables relating to symbols.
# Currently, the only variable processed is 'binary=<value>' which updates
@@ -2404,7 +2833,6 @@ sub FetchSymbols {
my $pcset = shift;
my $symbol_map = shift;
-
my %seen = ();
my @pcs = grep { !$seen{$_}++ } keys(%$pcset); # uniq
@@ -2414,12 +2842,16 @@ sub FetchSymbols {
open(POSTFILE, ">$main::tmpfile_sym");
print POSTFILE $post_data;
close(POSTFILE);
-
+
my $url = SymbolPageURL();
- # Here we use curl for sending data via POST since old
- # wget doesn't have --post-file option.
- $url = ResolveRedirectionForCurl($url);
- my $command_line = "$CURL -sd '\@$main::tmpfile_sym' '$url'";
+
+ my $command_line;
+ if ($URL_FETCHER =~ m/\bcurl -s/) {
+ $url = ResolveRedirectionForCurl($url);
+ $command_line = "$URL_FETCHER -d '\@$main::tmpfile_sym' '$url'";
+ } else {
+ $command_line = "$URL_FETCHER --post '$url' < '$main::tmpfile_sym'";
+ }
# We use c++filt in case $SYMBOL_PAGE gives us mangled symbols.
my $cppfilt = $obj_tool_map{"c++filt"};
open(SYMBOL, "$command_line | $cppfilt |") or error($command_line);
@@ -2464,10 +2896,10 @@ sub BaseName {
sub MakeProfileBaseName {
my ($binary_name, $profile_name) = @_;
- my ($host, $port, $path) = ParseProfileURL($profile_name);
+ my ($host, $baseURL, $path) = ParseProfileURL($profile_name);
my $binary_shortname = BaseName($binary_name);
- return sprintf("%s.%s.%s-port%s",
- $binary_shortname, $main::op_time, $host, $port);
+ return sprintf("%s.%s.%s",
+ $binary_shortname, $main::op_time, $host);
}
sub FetchDynamicProfile {
@@ -2479,7 +2911,7 @@ sub FetchDynamicProfile {
if (!IsProfileURL($profile_name)) {
return $profile_name;
} else {
- my ($host, $port, $path) = ParseProfileURL($profile_name);
+ my ($host, $baseURL, $path) = ParseProfileURL($profile_name);
if ($path eq "" || $path eq "/") {
# Missing type specifier defaults to cpu-profile
$path = $PROFILE_PAGE;
@@ -2487,37 +2919,28 @@ sub FetchDynamicProfile {
my $profile_file = MakeProfileBaseName($binary_name, $profile_name);
- my $url;
- my $wget_timeout;
- if (($path =~ m/$PROFILE_PAGE/) || ($path =~ m/$PMUPROFILE_PAGE/)) {
- if ($path =~ m/$PROFILE_PAGE/) {
- $url = sprintf("http://$host:$port$path?seconds=%d",
- $main::opt_seconds);
+ my $url = "$baseURL$path";
+ my $fetch_timeout = undef;
+ if ($path =~ m/$PROFILE_PAGE|$PMUPROFILE_PAGE/) {
+ if ($path =~ m/[?]/) {
+ $url .= "&";
} else {
- if ($profile_name =~ m/[?]/) {
- $profile_name .= "&"
- } else {
- $profile_name .= "?"
- }
- $url = sprintf("http://$profile_name" . "seconds=%d",
- $main::opt_seconds);
+ $url .= "?";
}
- $wget_timeout = sprintf("--timeout=%d",
- int($main::opt_seconds * 1.01 + 60));
+ $url .= sprintf("seconds=%d", $main::opt_seconds);
+ $fetch_timeout = $main::opt_seconds * 1.01 + 60;
} else {
# For non-CPU profiles, we add a type-extension to
# the target profile file name.
my $suffix = $path;
$suffix =~ s,/,.,g;
- $profile_file .= "$suffix";
- $url = "http://$host:$port$path";
- $wget_timeout = "";
+ $profile_file .= $suffix;
}
my $profile_dir = $ENV{"PPROF_TMPDIR"} || ($ENV{HOME} . "/pprof");
- if (!(-d $profile_dir)) {
+ if (! -d $profile_dir) {
mkdir($profile_dir)
- || die("Unable to create profile directory $profile_dir: $!\n");
+ || die("Unable to create profile directory $profile_dir: $!\n");
}
my $tmp_profile = "$profile_dir/.tmp.$profile_file";
my $real_profile = "$profile_dir/$profile_file";
@@ -2526,14 +2949,15 @@ sub FetchDynamicProfile {
return $real_profile;
}
- my $cmd = "$WGET $WGET_FLAGS $wget_timeout -q -O $tmp_profile '$url'";
- if (($path =~ m/$PROFILE_PAGE/) || ($path =~ m/$PMUPROFILE_PAGE/)){
+ my $fetcher = AddFetchTimeout($URL_FETCHER, $fetch_timeout);
+ my $cmd = "$fetcher '$url' > '$tmp_profile'";
+ if ($path =~ m/$PROFILE_PAGE|$PMUPROFILE_PAGE/){
print STDERR "Gathering CPU profile from $url for $main::opt_seconds seconds to\n ${real_profile}\n";
if ($encourage_patience) {
print STDERR "Be patient...\n";
}
} else {
- print STDERR "Fetching $path profile from $host:$port to\n ${real_profile}\n";
+ print STDERR "Fetching $path profile from $url to\n ${real_profile}\n";
}
(system($cmd) == 0) || error("Failed to get profile: $cmd: $!\n");
@@ -2580,6 +3004,7 @@ sub FetchDynamicProfilesRecurse {
} else {
$position = 1 | ($position << 1);
TryCollectProfile($maxlevel, $level, $position);
+ cleanup();
exit(0);
}
}
@@ -2603,22 +3028,69 @@ sub TryCollectProfile {
# Provide a small streaming-read module to handle very large
# cpu-profile files. Stream in chunks along a sliding window.
+# Provides an interface to get one 'slot', correctly handling
+# endian-ness differences. A slot is one 32-bit or 64-bit word
+# (depending on the input profile). We tell endianness and bit-size
+# for the profile by looking at the first 8 bytes: in cpu profiles,
+# the second slot is always 3 (we'll accept anything that's not 0).
BEGIN {
package CpuProfileStream;
sub new {
- my ($class, $file) = @_;
- my $self = { file => $file,
- base => 0,
- stride => 512 * 1024, # must be a multiple of |long|
- slots => []
+ my ($class, $file, $fname) = @_;
+ my $self = { file => $file,
+ base => 0,
+ stride => 512 * 1024, # must be a multiple of bitsize/8
+ slots => [],
+ unpack_code => "", # N for big-endian, V for little
};
bless $self, $class;
# Let unittests adjust the stride
if ($main::opt_test_stride > 0) {
$self->{stride} = $main::opt_test_stride;
}
- $self->overflow();
+ # Read the first two slots to figure out bitsize and endianness.
+ my $slots = $self->{slots};
+ my $str;
+ read($self->{file}, $str, 8);
+ # Set the global $address_length based on what we see here.
+ # 8 is 32-bit (8 hexadecimal chars); 16 is 64-bit (16 hexadecimal chars).
+ $address_length = ($str eq (chr(0)x8)) ? 16 : 8;
+ if ($address_length == 8) {
+ if (substr($str, 6, 2) eq chr(0)x2) {
+ $self->{unpack_code} = 'V'; # Little-endian.
+ } elsif (substr($str, 4, 2) eq chr(0)x2) {
+ $self->{unpack_code} = 'N'; # Big-endian
+ } else {
+ ::error("$fname: header size >= 2**16\n");
+ }
+ @$slots = unpack($self->{unpack_code} . "*", $str);
+ } else {
+ # If we're a 64-bit profile, make sure we're a 64-bit-capable
+ # perl. Otherwise, each slot will be represented as a float
+ # instead of an int64, losing precision and making all the
+ # 64-bit addresses right. We *could* try to handle this with
+ # software emulation of 64-bit ints, but that's added complexity
+ # for no clear benefit (yet). We use 'Q' to test for 64-bit-ness;
+ # perl docs say it's only available on 64-bit perl systems.
+ my $has_q = 0;
+ eval { $has_q = pack("Q", "1") ? 1 : 1; };
+ if (!$has_q) {
+ ::error("$fname: need a 64-bit perl to process this 64-bit profile.\n");
+ }
+ read($self->{file}, $str, 8);
+ if (substr($str, 4, 4) eq chr(0)x4) {
+ # We'd love to use 'Q', but it's a) not universal, b) not endian-proof.
+ $self->{unpack_code} = 'V'; # Little-endian.
+ } elsif (substr($str, 0, 4) eq chr(0)x4) {
+ $self->{unpack_code} = 'N'; # Big-endian
+ } else {
+ ::error("$fname: header size >= 2**32\n");
+ }
+ my @pair = unpack($self->{unpack_code} . "*", $str);
+ # Since we know one of the pair is 0, it's fine to just add them.
+ @$slots = (0, $pair[0] + $pair[1]);
+ }
return $self;
}
@@ -2629,7 +3101,25 @@ BEGIN {
$self->{base} += $#$slots + 1; # skip over data we're replacing
my $str;
read($self->{file}, $str, $self->{stride});
- @$slots = unpack("L*", $str);
+ if ($address_length == 8) { # the 32-bit case
+ # This is the easy case: unpack provides 32-bit unpacking primitives.
+ @$slots = unpack($self->{unpack_code} . "*", $str);
+ } else {
+ # We need to unpack 32 bits at a time and combine.
+ my @b32_values = unpack($self->{unpack_code} . "*", $str);
+ my @b64_values = ();
+ for (my $i = 0; $i < $#b32_values; $i += 2) {
+ # TODO(csilvers): if this is a 32-bit perl, the math below
+ # could end up in a too-large int, which perl will promote
+ # to a double, losing necessary precision. Deal with that.
+ if ($self->{unpack_code} eq 'V') { # little-endian
+ push(@b64_values, $b32_values[$i] + $b32_values[$i+1] * (2**32));
+ } else {
+ push(@b64_values, $b32_values[$i] * (2**32) + $b32_values[$i+1]);
+ }
+ }
+ @$slots = @b64_values;
+ }
}
# Access the i-th long in the file (logically), or -1 at EOF.
@@ -2638,16 +3128,16 @@ BEGIN {
my $slots = $self->{slots};
while ($#$slots >= 0) {
if ($idx < $self->{base}) {
- # The only time we expect a reference to $slots[$i - something]
- # after referencing $slots[$i] is reading the very first header.
- # Since $stride > |header|, that shouldn't cause any lookback
- # errors. And everything after the header is sequential.
- print STDERR "Unexpected look-back reading CPU profile";
- return -1; # shrug, don't know what better to return
+ # The only time we expect a reference to $slots[$i - something]
+ # after referencing $slots[$i] is reading the very first header.
+ # Since $stride > |header|, that shouldn't cause any lookback
+ # errors. And everything after the header is sequential.
+ print STDERR "Unexpected look-back reading CPU profile";
+ return -1; # shrug, don't know what better to return
} elsif ($idx > $self->{base} + $#$slots) {
- $self->overflow();
+ $self->overflow();
} else {
- return $slots->[$idx - $self->{base}];
+ return $slots->[$idx - $self->{base}];
}
}
# If we get here, $slots is [], which means we've reached EOF
@@ -2655,6 +3145,44 @@ BEGIN {
}
}
+# Return the next line from the profile file, assuming it's a text
+# line (which in this case means, doesn't start with a NUL byte). If
+# it's not a text line, return "". At EOF, return undef, like perl does.
+# Input file should be in binmode.
+sub ReadProfileLine {
+ local *PROFILE = shift;
+ my $firstchar = "";
+ my $line = "";
+ read(PROFILE, $firstchar, 1);
+ seek(PROFILE, -1, 1); # unread the firstchar
+ if ($firstchar eq "\0") {
+ return "";
+ }
+ $line = <PROFILE>;
+ if (defined($line)) {
+ $line =~ s/\r//g; # turn windows-looking lines into unix-looking lines
+ }
+ return $line;
+}
+
+sub IsSymbolizedProfileFile {
+ my $file_name = shift;
+ if (!(-e $file_name) || !(-r $file_name)) {
+ return 0;
+ }
+ # Check if the file contains a symbol-section marker.
+ open(TFILE, "<$file_name");
+ binmode TFILE;
+ my $firstline = ReadProfileLine(*TFILE);
+ close(TFILE);
+ if (!$firstline) {
+ return 0;
+ }
+ $SYMBOL_PAGE =~ m,[^/]+$,; # matches everything after the last slash
+ my $symbol_marker = $&;
+ return $firstline =~ /^--- *$symbol_marker/;
+}
+
# Parse profile generated by common/profiler.cc and return a reference
# to a map:
# $result->{version} Version number of profile file
@@ -2689,28 +3217,17 @@ sub ReadProfile {
# whole firstline, since it may be gigabytes(!) of data.
open(PROFILE, "<$fname") || error("$fname: $!\n");
binmode PROFILE; # New perls do UTF-8 processing
- my $firstchar = "";
- my $header = "";
- read(PROFILE, $firstchar, 1);
- seek(PROFILE, -1, 1); # unread the firstchar
- if ($firstchar ne "\0") {
- $header = <PROFILE>;
- $header =~ s/\r//g; # turn windows-looking lines into unix-looking lines
+ my $header = ReadProfileLine(*PROFILE);
+ if (!defined($header)) { # means "at EOF"
+ error("Profile is empty.\n");
}
my $symbols;
if ($header =~ m/^--- *$symbol_marker/o) {
- # read the symbol section of the symbolized profile file
+ # Read the symbol section of the symbolized profile file.
$symbols = ReadSymbols(*PROFILE{IO});
-
- # read the next line to get the header for the remaining profile
- $header = "";
- read(PROFILE, $firstchar, 1);
- seek(PROFILE, -1, 1); # unread the firstchar
- if ($firstchar ne "\0") {
- $header = <PROFILE>;
- $header =~ s/\r//g;
- }
+ # Read the next line to get the header for the remaining profile.
+ $header = ReadProfileLine(*PROFILE) || "";
}
my $result;
@@ -2752,6 +3269,33 @@ sub ReadProfile {
return $result;
}
+# Subtract one from caller pc so we map back to call instr.
+# However, don't do this if we're reading a symbolized profile
+# file, in which case the subtract-one was done when the file
+# was written.
+#
+# We apply the same logic to all readers, though ReadCPUProfile uses an
+# independent implementation.
+sub FixCallerAddresses {
+ my $stack = shift;
+ if ($main::use_symbolized_profile) {
+ return $stack;
+ } else {
+ $stack =~ /(\s)/;
+ my $delimiter = $1;
+ my @addrs = split(' ', $stack);
+ my @fixedaddrs;
+ $#fixedaddrs = $#addrs;
+ if ($#addrs >= 0) {
+ $fixedaddrs[0] = $addrs[0];
+ }
+ for (my $i = 1; $i <= $#addrs; $i++) {
+ $fixedaddrs[$i] = AddressSub($addrs[$i], "0x1");
+ }
+ return join $delimiter, @fixedaddrs;
+ }
+}
+
# CPU profile reader
sub ReadCPUProfile {
my $prog = shift;
@@ -2763,10 +3307,7 @@ sub ReadCPUProfile {
my $pcs = {};
# Parse string into array of slots.
- # L! cannot be used because with a native 64-bit build, it will cause
- # 1) a valid 64-bit profile to use the 32-bit codepath, and
- # 2) a valid 32-bit profile to be unrecognized.
- my $slots = CpuProfileStream->new(*PROFILE);
+ my $slots = CpuProfileStream->new(*PROFILE, $fname);
# Read header. The current header version is a 5-element structure
# containing:
@@ -2775,108 +3316,50 @@ sub ReadCPUProfile {
# 2: format version (0)
# 3: sampling period (usec)
# 4: unused padding (always 0)
- # The header words are 32-bit or 64-bit depending on the ABI of the program
- # that generated the profile. In the 64-bit case, since our x86-architecture
- # machines are little-endian, the actual value of each of these elements is
- # in the first 32-bit word, and the second is always zero. The @slots array
- # above was read as a sequence of 32-bit words in both cases, so we need to
- # explicitly check for both cases. A typical slot sequence for each is:
- # 32-bit: 0 3 0 100 0
- # 64-bit: 0 0 3 0 0 0 100 0 0 0
- #
if ($slots->get(0) != 0 ) {
error("$fname: not a profile file, or old format profile file\n");
}
- if ($slots->get(1) >= 3) {
- # Normal 32-bit header:
- $version = $slots->get(2);
- $period = $slots->get(3);
- $i = 2 + $slots->get(1);
- $address_length = 8;
-
- # Parse profile
- while ($slots->get($i) != -1) {
- my $n = $slots->get($i++);
- my $d = $slots->get($i++);
- if ($slots->get($i) == 0) {
- # End of profile data marker
- $i += $d;
- last;
- }
-
- # Make key out of the stack entries
- my @k = ();
- for (my $j = 0; $j < $d; $j++) {
- my $pc = sprintf("%08x", $slots->get($i+$j));
- $pcs->{$pc} = 1;
- push @k, $pc;
- }
-
- AddEntry($profile, (join "\n", @k), $n);
+ $i = 2 + $slots->get(1);
+ $version = $slots->get(2);
+ $period = $slots->get(3);
+ # Do some sanity checking on these header values.
+ if ($version > (2**32) || $period > (2**32) || $i > (2**32) || $i < 5) {
+ error("$fname: not a profile file, or corrupted profile file\n");
+ }
+
+ # Parse profile
+ while ($slots->get($i) != -1) {
+ my $n = $slots->get($i++);
+ my $d = $slots->get($i++);
+ if ($d > (2**16)) { # TODO(csilvers): what's a reasonable max-stack-depth?
+ my $addr = sprintf("0%o", $i * ($address_length == 8 ? 4 : 8));
+ print STDERR "At index $i (address $addr):\n";
+ error("$fname: stack trace depth >= 2**32\n");
+ }
+ if ($slots->get($i) == 0) {
+ # End of profile data marker
$i += $d;
+ last;
}
- # Normal 64-bit header: All entries are doubled in size. The first
- # word (little-endian) should contain the real value, the second should
- # be zero.
- } elsif ($slots->get(1) != 0 ||
- $slots->get(2) < 3 ||
- $slots->get(3) != 0 ||
- $slots->get(5) != 0 ||
- $slots->get(7) != 0) {
- error("$fname: not a profile file, or old format profile file\n");
- } else {
- $version = $slots->get(4);
- $period = $slots->get(6);
- $i = 4 + 2 * $slots->get(2);
- $address_length = 16;
-
- # Parse profile
- while ($slots->get($i) != -1) {
- my $n = $slots->get($i++);
- my $nhi = $slots->get($i++);
- # Huge counts may coerce to floating point, keeping scale, not precision
- if ($nhi != 0) { $n += $nhi*(2**32); }
- my $d = $slots->get($i++);
- if ($slots->get($i++) != 0) {
- my $addr = sprintf("%o", 4 * $i);
- print STDERR "At index $i ($addr):\n";
- error("$fname: stack trace depth >= 2**32\n");
+ # Make key out of the stack entries
+ my @k = ();
+ for (my $j = 0; $j < $d; $j++) {
+ my $pc = $slots->get($i+$j);
+ # Subtract one from caller pc so we map back to call instr.
+ # However, don't do this if we're reading a symbolized profile
+ # file, in which case the subtract-one was done when the file
+ # was written.
+ if ($j > 0 && !$main::use_symbolized_profile) {
+ $pc--;
}
- if ($slots->get($i) == 0 && $slots->get($i+1) == 0) {
- # End of profile data marker
- $i += 2 * $d;
- last;
- }
-
- # Make key out of the stack entries
- my @k = ();
- for (my $j = 0; $j < $d; $j++) {
- my $pclo = $slots->get($i++);
- my $pchi = $slots->get($i++);
- if ($pclo == -1 || $pchi == -1) {
- error("$fname: Unexpected EOF when reading stack of depth $d\n");
- }
-
- # Subtract one from caller pc so we map back to call instr.
- # However, don't do this if we're reading a symbolized profile
- # file, in which case the subtract-one was done when the file
- # was written.
- if ($j > 0 && !$main::use_symbolized_profile) {
- if ($pclo == 0) {
- $pchi--;
- $pclo = 0xffffffff;
- } else {
- $pclo--;
- }
- }
-
- my $pc = sprintf("%08x%08x", $pchi, $pclo);
- $pcs->{$pc} = 1;
- push @k, $pc;
- }
- AddEntry($profile, (join "\n", @k), $n);
+ $pc = sprintf("%0*x", $address_length, $pc);
+ $pcs->{$pc} = 1;
+ push @k, $pc;
}
+
+ AddEntry($profile, (join "\n", @k), $n);
+ $i += $d;
}
# Parse map
@@ -2947,18 +3430,18 @@ sub ReadHeapProfile {
# found for profiles generated locally, and the others for
# remote profiles.
if (($type eq "heapprofile") || ($type !~ /heap/) ) {
- # No need to adjust for the sampling rate with heap-profiler-derived data
- $sampling_algorithm = 0;
+ # No need to adjust for the sampling rate with heap-profiler-derived data
+ $sampling_algorithm = 0;
} elsif ($type =~ /_v2/) {
- $sampling_algorithm = 2; # version 2 sampling
+ $sampling_algorithm = 2; # version 2 sampling
if (defined($sample_period) && ($sample_period ne '')) {
- $sample_adjustment = int($sample_period);
- }
+ $sample_adjustment = int($sample_period);
+ }
} else {
- $sampling_algorithm = 1; # version 1 sampling
+ $sampling_algorithm = 1; # version 1 sampling
if (defined($sample_period) && ($sample_period ne '')) {
- $sample_adjustment = int($sample_period)/2;
- }
+ $sample_adjustment = int($sample_period)/2;
+ }
}
} else {
# We detect whether or not this is a remote-heap profile by checking
@@ -2970,7 +3453,7 @@ sub ReadHeapProfile {
my ($n1, $s1, $n2, $s2) = ($1, $2, $3, $4);
if (($n1 == $n2) && ($s1 == $s2)) {
# This is likely to be a remote-heap based sample profile
- $sampling_algorithm = 1;
+ $sampling_algorithm = 1;
}
}
}
@@ -2984,7 +3467,7 @@ sub ReadHeapProfile {
print STDERR "Adjusting heap profiles for 1-in-128KB sampling rate\n";
} else {
printf STDERR ("Adjusting heap profiles for 1-in-%d sampling rate\n",
- $sample_adjustment);
+ $sample_adjustment);
}
if ($sampling_algorithm > 1) {
# We don't bother printing anything for the original version (version 1)
@@ -3001,7 +3484,7 @@ sub ReadHeapProfile {
if (/^MAPPED_LIBRARIES:/) {
# Read the /proc/self/maps data
while (<PROFILE>) {
- s/\r//g; # turn windows-looking lines into unix-looking lines
+ s/\r//g; # turn windows-looking lines into unix-looking lines
$map .= $_;
}
last;
@@ -3011,7 +3494,7 @@ sub ReadHeapProfile {
# Read /proc/self/maps data as formatted by DumpAddressMap()
my $buildvar = "";
while (<PROFILE>) {
- s/\r//g; # turn windows-looking lines into unix-looking lines
+ s/\r//g; # turn windows-looking lines into unix-looking lines
# Parse "build=<dir>" specification if supplied
if (m/^\s*build=(.*)\n/) {
$buildvar = $1;
@@ -3066,7 +3549,7 @@ sub ReadHeapProfile {
}
my @counts = ($n1, $s1, $n2, $s2);
- AddEntries($profile, $pcs, $stack, $counts[$index]);
+ AddEntries($profile, $pcs, FixCallerAddresses($stack), $counts[$index]);
}
}
@@ -3086,7 +3569,7 @@ sub ReadSynchProfile {
my $profile = {};
my $pcs = {};
my $sampling_period = 1;
- my $cyclespernanosec = 2.8; # Default assumption for old binaries
+ my $cyclespernanosec = 2.8; # Default assumption for old binaries
my $seen_clockrate = 0;
my $line;
@@ -3112,7 +3595,7 @@ sub ReadSynchProfile {
$count *= $sampling_period;
my @values = ($cycles, $count, $cycles / $count);
- AddEntries($profile, $pcs, $stack, $values[$index]);
+ AddEntries($profile, $pcs, FixCallerAddresses($stack), $values[$index]);
} elsif ( $line =~ /^(slow release).*thread \d+ \@\s*(.*?)\s*$/ ||
$line =~ /^\s*(\d+) \@\s*(.*?)\s*$/ ) {
@@ -3127,7 +3610,7 @@ sub ReadSynchProfile {
# Adjust for sampling done by application
$cycles *= $sampling_period;
- AddEntries($profile, $pcs, $stack, $cycles);
+ AddEntries($profile, $pcs, FixCallerAddresses($stack), $cycles);
} elsif ( $line =~ m/^([a-z][^=]*)=(.*)$/ ) {
my ($variable, $value) = ($1,$2);
@@ -3308,8 +3791,8 @@ sub ParseTextSectionHeaderFromOtool {
} elsif ($line =~ /segname (\w+)/) {
$segname = $1;
} elsif (!(($cmd eq "LC_SEGMENT" || $cmd eq "LC_SEGMENT_64") &&
- $sectname eq "__text" &&
- $segname eq "__TEXT")) {
+ $sectname eq "__text" &&
+ $segname eq "__TEXT")) {
next;
} elsif ($line =~ /\baddr 0x([0-9a-fA-F]+)/) {
$vma = $1;
@@ -3369,7 +3852,7 @@ sub ParseLibraries {
my $finish;
my $offset;
my $lib;
- if ($l =~ /^($h)-($h)\s+..x.\s+($h)\s+\S+:\S+\s+\d+\s+(\S+\.(so|dll|dylib|bundle)((\.\d+)+\w*)?)$/i) {
+ if ($l =~ /^($h)-($h)\s+..x.\s+($h)\s+\S+:\S+\s+\d+\s+(\S+\.(so|dll|dylib|bundle)((\.\d+)+\w*(\.\d+){0,3})?)$/i) {
# Full line from /proc/self/maps. Example:
# 40000000-40015000 r-xp 00000000 03:01 12845071 /lib/ld-2.3.2.so
$start = HexExtend($1);
@@ -3675,7 +4158,7 @@ sub MapToSymbols {
if ($debug) { print("---- $image ---\n"); }
for (my $i = 0; $i <= $#{$pclist}; $i++) {
# addr2line always reads hex addresses, and does not need '0x' prefix.
- if ($debug) { printf("%s\n", $pclist->[$i]); }
+ if ($debug) { printf STDERR ("%s\n", $pclist->[$i]); }
printf ADDRESSES ("%s\n", AddressSub($pclist->[$i], $offset));
if (defined($sep_address)) {
printf ADDRESSES ("%s\n", $sep_address);
@@ -3727,7 +4210,7 @@ sub MapToSymbols {
$symbols->{$pcstr} = $sym;
}
unshift(@{$sym}, $function, $filelinenum, $fullfunction);
- if ($debug) { printf("%s => [%s]\n", $pcstr, join(" ", @{$sym})); }
+ if ($debug) { printf STDERR ("%s => [%s]\n", $pcstr, join(" ", @{$sym})); }
if (!defined($sep_address)) {
# Inlining is off, se this entry ends immediately
$count++;
@@ -3783,7 +4266,7 @@ sub MapSymbolsWithNM {
}
return 1;
}
-
+
sub ShortFunctionName {
my $function = shift;
while ($function =~ s/\([^()]*\)(\s*const)?//g) { } # Argument types
@@ -3830,6 +4313,8 @@ sub ConfigureObjTools {
if ($file_type =~ /Mach-O/) {
# OS X uses otool to examine Mach-O files, rather than objdump.
$obj_tool_map{"otool"} = "otool";
+ $obj_tool_map{"addr2line"} = "false"; # no addr2line
+ $obj_tool_map{"objdump"} = "false"; # no objdump
}
# Go fill in %obj_tool_map with the pathnames to use:
@@ -3876,9 +4361,8 @@ sub ConfigureTool {
sub cleanup {
unlink($main::tmpfile_sym);
- for (my $i = 0; $i < $main::next_tmpfile; $i++) {
- unlink(PsTempName($i));
- }
+ unlink(keys %main::tempnames);
+
# We leave any collected profiles in $HOME/pprof in case the user wants
# to look at them later. We print a message informing them of this.
if ((scalar(@main::profile_files) > 0) &&
@@ -3921,7 +4405,7 @@ sub GetProcedureBoundariesViaNm {
my $routine = "";
while (<NM>) {
s/\r//g; # turn windows-looking lines into unix-looking lines
- if (m/^([0-9a-f]+) (.) (..*)/) {
+ if (m/^\s*([0-9a-f]+) (.) (..*)/) {
my $start_val = $1;
my $type = $2;
my $this_routine = $3;
@@ -3942,12 +4426,12 @@ sub GetProcedureBoundariesViaNm {
# we'll just go ahead and process the first entry (which never
# got touched in the queue), and ignore the others.
if ($start_val eq $last_start && $type =~ /t/i) {
- # We are the 'T' symbol at this address, replace previous symbol.
- $routine = $this_routine;
- next;
+ # We are the 'T' symbol at this address, replace previous symbol.
+ $routine = $this_routine;
+ next;
} elsif ($start_val eq $last_start) {
- # We're not the 'T' symbol at this address, so ignore us.
- next;
+ # We're not the 'T' symbol at this address, so ignore us.
+ next;
}
if ($this_routine eq $sep_symbol) {
@@ -3962,7 +4446,7 @@ sub GetProcedureBoundariesViaNm {
if (defined($routine) && $routine =~ m/$regexp/) {
$symbol_table->{$routine} = [HexExtend($last_start),
- HexExtend($start_val)];
+ HexExtend($start_val)];
}
$last_start = $start_val;
$routine = $this_routine;
@@ -3981,9 +4465,8 @@ sub GetProcedureBoundariesViaNm {
# TODO(csilvers): do better here.
if (defined($routine) && $routine =~ m/$regexp/) {
$symbol_table->{$routine} = [HexExtend($last_start),
- HexExtend($last_start)];
+ HexExtend($last_start)];
}
-
return $symbol_table;
}
@@ -4029,9 +4512,13 @@ sub GetProcedureBoundaries {
# -D to at least get *exported* symbols. If we can't use --demangle,
# we use c++filt instead, if it exists on this system.
my @nm_commands = ("$nm -n $flatten_flag $demangle_flag" .
- " $image 2>/dev/null $cppfilt_flag",
- "$nm -D -n $flatten_flag $demangle_flag" .
- " $image 2>/dev/null $cppfilt_flag");
+ " $image 2>/dev/null $cppfilt_flag",
+ "$nm -D -n $flatten_flag $demangle_flag" .
+ " $image 2>/dev/null $cppfilt_flag",
+ # 6nm is for Go binaries
+ "6nm $image 2>/dev/null | sort",
+ );
+
# If the executable is an MS Windows PDB-format executable, we'll
# have set up obj_tool_map("nm_pdb"). In this case, we actually
# want to use both unix nm and windows-specific nm_pdb, since
@@ -4263,4 +4750,3 @@ sub RunUnitTests {
}
exit ($error_count);
}
-
diff --git a/third_party/tcmalloc/chromium/src/span.h b/third_party/tcmalloc/chromium/src/span.h
index ab9a796..b3483ca 100644
--- a/third_party/tcmalloc/chromium/src/span.h
+++ b/third_party/tcmalloc/chromium/src/span.h
@@ -60,6 +60,10 @@ struct Span {
int value[64];
#endif
+ void* start_ptr() {
+ return reinterpret_cast<void*>(start << kPageShift);
+ }
+
// What freelist the span is on: IN_USE if on none, or normal or returned
enum { IN_USE, ON_NORMAL_FREELIST, ON_RETURNED_FREELIST };
};
diff --git a/third_party/tcmalloc/chromium/src/stacktrace.cc b/third_party/tcmalloc/chromium/src/stacktrace.cc
index d158eea..68cb865 100644
--- a/third_party/tcmalloc/chromium/src/stacktrace.cc
+++ b/third_party/tcmalloc/chromium/src/stacktrace.cc
@@ -57,7 +57,45 @@
#include "stacktrace_config.h"
#if defined(STACKTRACE_INL_HEADER)
-# include STACKTRACE_INL_HEADER
+
+#define IS_STACK_FRAMES 0
+#define IS_WITH_CONTEXT 0
+#define GET_STACK_TRACE_OR_FRAMES \
+ GetStackTrace(void **result, int max_depth, int skip_count)
+#include STACKTRACE_INL_HEADER
+#undef IS_STACK_FRAMES
+#undef IS_WITH_CONTEXT
+#undef GET_STACK_TRACE_OR_FRAMES
+
+#define IS_STACK_FRAMES 1
+#define IS_WITH_CONTEXT 0
+#define GET_STACK_TRACE_OR_FRAMES \
+ GetStackFrames(void **result, int *sizes, int max_depth, int skip_count)
+#include STACKTRACE_INL_HEADER
+#undef IS_STACK_FRAMES
+#undef IS_WITH_CONTEXT
+#undef GET_STACK_TRACE_OR_FRAMES
+
+#define IS_STACK_FRAMES 0
+#define IS_WITH_CONTEXT 1
+#define GET_STACK_TRACE_OR_FRAMES \
+ GetStackTraceWithContext(void **result, int max_depth, \
+ int skip_count, const void *ucp)
+#include STACKTRACE_INL_HEADER
+#undef IS_STACK_FRAMES
+#undef IS_WITH_CONTEXT
+#undef GET_STACK_TRACE_OR_FRAMES
+
+#define IS_STACK_FRAMES 1
+#define IS_WITH_CONTEXT 1
+#define GET_STACK_TRACE_OR_FRAMES \
+ GetStackFramesWithContext(void **result, int *sizes, int max_depth, \
+ int skip_count, const void *ucp)
+#include STACKTRACE_INL_HEADER
+#undef IS_STACK_FRAMES
+#undef IS_WITH_CONTEXT
+#undef GET_STACK_TRACE_OR_FRAMES
+
#elif 0
// This is for the benefit of code analysis tools that may have
// trouble with the computed #include above.
diff --git a/third_party/tcmalloc/chromium/src/stacktrace_config.h b/third_party/tcmalloc/chromium/src/stacktrace_config.h
index b58ab1d..18f16ab 100644
--- a/third_party/tcmalloc/chromium/src/stacktrace_config.h
+++ b/third_party/tcmalloc/chromium/src/stacktrace_config.h
@@ -53,6 +53,7 @@
# define STACKTRACE_SKIP_CONTEXT_ROUTINES 1
# elif defined(HAVE_LIBUNWIND_H) // a proxy for having libunwind installed
# define STACKTRACE_INL_HEADER "stacktrace_libunwind-inl.h"
+# define STACKTRACE_USES_LIBUNWIND 1
# elif defined(__linux)
# error Cannnot calculate stack trace: need either libunwind or frame-pointers (see INSTALL file)
# else
diff --git a/third_party/tcmalloc/chromium/src/stacktrace_generic-inl.h b/third_party/tcmalloc/chromium/src/stacktrace_generic-inl.h
index 490cd9d..0e72ee7 100644
--- a/third_party/tcmalloc/chromium/src/stacktrace_generic-inl.h
+++ b/third_party/tcmalloc/chromium/src/stacktrace_generic-inl.h
@@ -34,57 +34,32 @@
//
// Note: The glibc implementation may cause a call to malloc.
// This can cause a deadlock in HeapProfiler.
+
+#ifndef BASE_STACKTRACE_GENERIC_INL_H_
+#define BASE_STACKTRACE_GENERIC_INL_H_
+// Note: this file is included into stacktrace.cc more than once.
+// Anything that should only be defined once should be here:
+
#include <execinfo.h>
#include <string.h>
#include "google/stacktrace.h"
+#endif // BASE_STACKTRACE_GENERIC_INL_H_
-// If you change this function, also change GetStackFrames below.
-int GetStackTrace(void** result, int max_depth, int skip_count) {
- static const int kStackLength = 64;
- void * stack[kStackLength];
- int size;
-
- size = backtrace(stack, kStackLength);
- skip_count++; // we want to skip the current frame as well
- int result_count = size - skip_count;
- if (result_count < 0)
- result_count = 0;
- if (result_count > max_depth)
- result_count = max_depth;
- for (int i = 0; i < result_count; i++)
- result[i] = stack[i + skip_count];
-
- return result_count;
-}
+// Note: this part of the file is included several times.
+// Do not put globals below.
-// If you change this function, also change GetStackTrace above:
-//
-// This GetStackFrames routine shares a lot of code with GetStackTrace
-// above. This code could have been refactored into a common routine,
-// and then both GetStackTrace/GetStackFrames could call that routine.
-// There are two problems with that:
-//
-// (1) The performance of the refactored-code suffers substantially - the
-// refactored needs to be able to record the stack trace when called
-// from GetStackTrace, and both the stack trace and stack frame sizes,
-// when called from GetStackFrames - this introduces enough new
-// conditionals that GetStackTrace performance can degrade by as much
-// as 50%.
+// The following 4 functions are generated from the code below:
+// GetStack{Trace,Frames}()
+// GetStack{Trace,Frames}WithContext()
//
-// (2) Whether the refactored routine gets inlined into GetStackTrace and
-// GetStackFrames depends on the compiler, and we can't guarantee the
-// behavior either-way, even with "__attribute__ ((always_inline))"
-// or "__attribute__ ((noinline))". But we need this guarantee or the
-// frame counts may be off by one.
-//
-// Both (1) and (2) can be addressed without this code duplication, by
-// clever use of template functions, and by defining GetStackTrace and
-// GetStackFrames as macros that expand to these template functions.
-// However, this approach comes with its own set of problems - namely,
-// macros and preprocessor trouble - for example, if GetStackTrace
-// and/or GetStackFrames is ever defined as a member functions in some
-// class, we are in trouble.
-int GetStackFrames(void** pcs, int* sizes, int max_depth, int skip_count) {
+// These functions take the following args:
+// void** result: the stack-trace, as an array
+// int* sizes: the size of each stack frame, as an array
+// (GetStackFrames* only)
+// int max_depth: the size of the result (and sizes) array(s)
+// int skip_count: how many stack pointers to skip before storing in result
+// void* ucp: a ucontext_t* (GetStack{Trace,Frames}WithContext only)
+int GET_STACK_TRACE_OR_FRAMES {
static const int kStackLength = 64;
void * stack[kStackLength];
int size;
@@ -97,10 +72,12 @@ int GetStackFrames(void** pcs, int* sizes, int max_depth, int skip_count) {
if (result_count > max_depth)
result_count = max_depth;
for (int i = 0; i < result_count; i++)
- pcs[i] = stack[i + skip_count];
+ result[i] = stack[i + skip_count];
+#if IS_STACK_FRAMES
// No implementation for finding out the stack frame sizes yet.
memset(sizes, 0, sizeof(*sizes) * result_count);
+#endif
return result_count;
}
diff --git a/third_party/tcmalloc/chromium/src/stacktrace_libunwind-inl.h b/third_party/tcmalloc/chromium/src/stacktrace_libunwind-inl.h
index d9d829a..a1d5249 100644
--- a/third_party/tcmalloc/chromium/src/stacktrace_libunwind-inl.h
+++ b/third_party/tcmalloc/chromium/src/stacktrace_libunwind-inl.h
@@ -32,6 +32,11 @@
//
// Produce stack trace using libunwind
+#ifndef BASE_STACKTRACE_LIBINWIND_INL_H_
+#define BASE_STACKTRACE_LIBINWIND_INL_H_
+// Note: this file is included into stacktrace.cc more than once.
+// Anything that should only be defined once should be here:
+
// We only need local unwinder.
#define UNW_LOCAL_ONLY
@@ -52,73 +57,30 @@ extern "C" {
// cases, we return 0 to indicate the situation.
static __thread int recursive;
-// If you change this function, also change GetStackFrames below.
-int GetStackTrace(void** result, int max_depth, int skip_count) {
- void *ip;
- int n = 0;
- unw_cursor_t cursor;
- unw_context_t uc;
+#endif // BASE_STACKTRACE_LIBINWIND_INL_H_
- if (recursive) {
- return 0;
- }
- ++recursive;
-
- unw_getcontext(&uc);
- int ret = unw_init_local(&cursor, &uc);
- assert(ret >= 0);
- skip_count++; // Do not include the "GetStackTrace" frame
-
- while (n < max_depth) {
- if (unw_get_reg(&cursor, UNW_REG_IP, (unw_word_t *) &ip) < 0) {
- break;
- }
- if (skip_count > 0) {
- skip_count--;
- } else {
- result[n++] = ip;
- }
- if (unw_step(&cursor) <= 0) {
- break;
- }
- }
- --recursive;
- return n;
-}
+// Note: this part of the file is included several times.
+// Do not put globals below.
-// If you change this function, also change GetStackTrace above:
-//
-// This GetStackFrames routine shares a lot of code with GetStackTrace
-// above. This code could have been refactored into a common routine,
-// and then both GetStackTrace/GetStackFrames could call that routine.
-// There are two problems with that:
+// The following 4 functions are generated from the code below:
+// GetStack{Trace,Frames}()
+// GetStack{Trace,Frames}WithContext()
//
-// (1) The performance of the refactored-code suffers substantially - the
-// refactored needs to be able to record the stack trace when called
-// from GetStackTrace, and both the stack trace and stack frame sizes,
-// when called from GetStackFrames - this introduces enough new
-// conditionals that GetStackTrace performance can degrade by as much
-// as 50%.
-//
-// (2) Whether the refactored routine gets inlined into GetStackTrace and
-// GetStackFrames depends on the compiler, and we can't guarantee the
-// behavior either-way, even with "__attribute__ ((always_inline))"
-// or "__attribute__ ((noinline))". But we need this guarantee or the
-// frame counts may be off by one.
-//
-// Both (1) and (2) can be addressed without this code duplication, by
-// clever use of template functions, and by defining GetStackTrace and
-// GetStackFrames as macros that expand to these template functions.
-// However, this approach comes with its own set of problems - namely,
-// macros and preprocessor trouble - for example, if GetStackTrace
-// and/or GetStackFrames is ever defined as a member functions in some
-// class, we are in trouble.
-int GetStackFrames(void** pcs, int* sizes, int max_depth, int skip_count) {
+// These functions take the following args:
+// void** result: the stack-trace, as an array
+// int* sizes: the size of each stack frame, as an array
+// (GetStackFrames* only)
+// int max_depth: the size of the result (and sizes) array(s)
+// int skip_count: how many stack pointers to skip before storing in result
+// void* ucp: a ucontext_t* (GetStack{Trace,Frames}WithContext only)
+int GET_STACK_TRACE_OR_FRAMES {
void *ip;
int n = 0;
unw_cursor_t cursor;
unw_context_t uc;
+#if IS_STACK_FRAMES
unw_word_t sp = 0, next_sp = 0;
+#endif
if (recursive) {
return 0;
@@ -126,31 +88,41 @@ int GetStackFrames(void** pcs, int* sizes, int max_depth, int skip_count) {
++recursive;
unw_getcontext(&uc);
- RAW_CHECK(unw_init_local(&cursor, &uc) >= 0, "unw_init_local failed");
- skip_count++; // Do not include the "GetStackFrames" frame
+ int ret = unw_init_local(&cursor, &uc);
+ assert(ret >= 0);
+ skip_count++; // Do not include current frame
while (skip_count--) {
- if (unw_step(&cursor) <= 0 ||
- unw_get_reg(&cursor, UNW_REG_SP, &next_sp) < 0) {
+ if (unw_step(&cursor) <= 0) {
goto out;
}
+#if IS_STACK_FRAMES
+ if (unw_get_reg(&cursor, UNW_REG_SP, &next_sp)) {
+ goto out;
+ }
+#endif
}
+
while (n < max_depth) {
- sp = next_sp;
- if (unw_get_reg(&cursor, UNW_REG_IP, (unw_word_t *) &ip) < 0)
+ if (unw_get_reg(&cursor, UNW_REG_IP, (unw_word_t *) &ip) < 0) {
break;
- if (unw_step(&cursor) <= 0 ||
- unw_get_reg(&cursor, UNW_REG_SP, &next_sp)) {
- // We couldn't step any further (possibly because we reached _start).
- // Provide the last good PC we've got, and get out.
- sizes[n] = 0;
- pcs[n++] = ip;
+ }
+#if IS_STACK_FRAMES
+ sizes[n] = 0;
+#endif
+ result[n++] = ip;
+ if (unw_step(&cursor) <= 0) {
+ break;
+ }
+#if IS_STACK_FRAMES
+ sp = next_sp;
+ if (unw_get_reg(&cursor, UNW_REG_SP, &next_sp) , 0) {
break;
}
- sizes[n] = next_sp - sp;
- pcs[n++] = ip;
+ sizes[n - 1] = next_sp - sp;
+#endif
}
- out:
+out:
--recursive;
return n;
}
diff --git a/third_party/tcmalloc/chromium/src/stacktrace_powerpc-inl.h b/third_party/tcmalloc/chromium/src/stacktrace_powerpc-inl.h
index 5631e49..9a07eea 100644
--- a/third_party/tcmalloc/chromium/src/stacktrace_powerpc-inl.h
+++ b/third_party/tcmalloc/chromium/src/stacktrace_powerpc-inl.h
@@ -36,6 +36,11 @@
// http://www.linux-foundation.org/spec/ELF/ppc64/PPC-elf64abi-1.9.html#STACK
// Linux has similar code: http://patchwork.ozlabs.org/linuxppc/patch?id=8882
+#ifndef BASE_STACKTRACE_POWERPC_INL_H_
+#define BASE_STACKTRACE_POWERPC_INL_H_
+// Note: this file is included into stacktrace.cc more than once.
+// Anything that should only be defined once should be here:
+
#include <stdint.h> // for uintptr_t
#include <stdlib.h> // for NULL
#include <google/stacktrace.h>
@@ -71,9 +76,23 @@ static void **NextStackFrame(void **old_sp) {
// This ensures that GetStackTrace stes up the Link Register properly.
void StacktracePowerPCDummyFunction() __attribute__((noinline));
void StacktracePowerPCDummyFunction() { __asm__ volatile(""); }
+#endif // BASE_STACKTRACE_POWERPC_INL_H_
+
+// Note: this part of the file is included several times.
+// Do not put globals below.
-// If you change this function, also change GetStackFrames below.
-int GetStackTrace(void** result, int max_depth, int skip_count) {
+// The following 4 functions are generated from the code below:
+// GetStack{Trace,Frames}()
+// GetStack{Trace,Frames}WithContext()
+//
+// These functions take the following args:
+// void** result: the stack-trace, as an array
+// int* sizes: the size of each stack frame, as an array
+// (GetStackFrames* only)
+// int max_depth: the size of the result (and sizes) array(s)
+// int skip_count: how many stack pointers to skip before storing in result
+// void* ucp: a ucontext_t* (GetStack{Trace,Frames}WithContext only)
+int GET_STACK_TRACE_OR_FRAMES {
void **sp;
// Apple OS X uses an old version of gnu as -- both Darwin 7.9.0 (Panther)
// and Darwin 8.8.1 (Tiger) use as 1.38. This means we have to use a
@@ -95,11 +114,29 @@ int GetStackTrace(void** result, int max_depth, int skip_count) {
// This routine forces the compiler (at least gcc) to push it anyway.
StacktracePowerPCDummyFunction();
+#if IS_STACK_FRAMES
+ // Note we do *not* increment skip_count here for the SYSV ABI. If
+ // we did, the list of stack frames wouldn't properly match up with
+ // the list of return addresses. Note this means the top pc entry
+ // is probably bogus for linux/ppc (and other SYSV-ABI systems).
+#else
// The LR save area is used by the callee, so the top entry is bogus.
skip_count++;
+#endif
int n = 0;
while (sp && n < max_depth) {
+#if IS_STACK_FRAMES
+ // The GetStackFrames routine is called when we are in some
+ // informational context (the failure signal handler for example).
+ // Use the non-strict unwinding rules to produce a stack trace
+ // that is as complete as possible (even if it contains a few bogus
+ // entries in some rare cases).
+ void **next_sp = NextStackFrame<false>(sp);
+#else
+ void **next_sp = NextStackFrame<true>(sp);
+#endif
+
if (skip_count > 0) {
skip_count--;
} else {
@@ -120,85 +157,15 @@ int GetStackTrace(void** result, int max_depth, int skip_count) {
#else
#error Need to specify the PPC ABI for your archiecture.
#endif
- }
- // Use strict unwinding rules.
- sp = NextStackFrame<true>(sp);
- }
- return n;
-}
-
-// If you change this function, also change GetStackTrace above:
-//
-// This GetStackFrames routine shares a lot of code with GetStackTrace
-// above. This code could have been refactored into a common routine,
-// and then both GetStackTrace/GetStackFrames could call that routine.
-// There are two problems with that:
-//
-// (1) The performance of the refactored-code suffers substantially - the
-// refactored needs to be able to record the stack trace when called
-// from GetStackTrace, and both the stack trace and stack frame sizes,
-// when called from GetStackFrames - this introduces enough new
-// conditionals that GetStackTrace performance can degrade by as much
-// as 50%.
-//
-// (2) Whether the refactored routine gets inlined into GetStackTrace and
-// GetStackFrames depends on the compiler, and we can't guarantee the
-// behavior either-way, even with "__attribute__ ((always_inline))"
-// or "__attribute__ ((noinline))". But we need this guarantee or the
-// frame counts may be off by one.
-//
-// Both (1) and (2) can be addressed without this code duplication, by
-// clever use of template functions, and by defining GetStackTrace and
-// GetStackFrames as macros that expand to these template functions.
-// However, this approach comes with its own set of problems - namely,
-// macros and preprocessor trouble - for example, if GetStackTrace
-// and/or GetStackFrames is ever defined as a member functions in some
-// class, we are in trouble.
-int GetStackFrames(void** pcs, int *sizes, int max_depth, int skip_count) {
- void **sp;
-#ifdef __APPLE__
- __asm__ volatile ("mr %0,r1" : "=r" (sp));
-#else
- __asm__ volatile ("mr %0,1" : "=r" (sp));
-#endif
- StacktracePowerPCDummyFunction();
- // Note we do *not* increment skip_count here for the SYSV ABI. If
- // we did, the list of stack frames wouldn't properly match up with
- // the list of return addresses. Note this means the top pc entry
- // is probably bogus for linux/ppc (and other SYSV-ABI systems).
-
- int n = 0;
- while (sp && n < max_depth) {
- // The GetStackFrames routine is called when we are in some
- // informational context (the failure signal handler for example).
- // Use the non-strict unwinding rules to produce a stack trace
- // that is as complete as possible (even if it contains a few bogus
- // entries in some rare cases).
- void **next_sp = NextStackFrame<false>(sp);
- if (skip_count > 0) {
- skip_count--;
- } else {
-#if defined(_CALL_AIX) || defined(_CALL_DARWIN)
- pcs[n++] = *(sp+2);
-#elif defined(_CALL_SYSV)
- pcs[n++] = *(sp+1);
-#elif defined(__APPLE__) || (defined(__linux) && defined(__PPC64__))
- // This check is in case the compiler doesn't define _CALL_AIX/etc.
- pcs[n++] = *(sp+2);
-#elif defined(__linux)
- // This check is in case the compiler doesn't define _CALL_SYSV.
- pcs[n++] = *(sp+1);
-#else
-#error Need to specify the PPC ABI for your archiecture.
-#endif
+#if IS_STACK_FRAME
if (next_sp > sp) {
sizes[n] = (uintptr_t)next_sp - (uintptr_t)sp;
} else {
// A frame-size of 0 is used to indicate unknown frame size.
sizes[n] = 0;
}
- n++;
+#endif
}
sp = next_sp;
}
diff --git a/third_party/tcmalloc/chromium/src/stacktrace_win32-inl.h b/third_party/tcmalloc/chromium/src/stacktrace_win32-inl.h
index 892cd7c..bbd4c43 100644
--- a/third_party/tcmalloc/chromium/src/stacktrace_win32-inl.h
+++ b/third_party/tcmalloc/chromium/src/stacktrace_win32-inl.h
@@ -49,6 +49,11 @@
// This code is inspired by a patch from David Vitek:
// http://code.google.com/p/google-perftools/issues/detail?id=83
+#ifndef BASE_STACKTRACE_WIN32_INL_H_
+#define BASE_STACKTRACE_WIN32_INL_H_
+// Note: this file is included into stacktrace.cc more than once.
+// Anything that should only be defined once should be here:
+
#include "config.h"
#include <windows.h> // for GetProcAddress and GetModuleHandle
#include <assert.h>
@@ -82,3 +87,5 @@ PERFTOOLS_DLL_DECL int GetStackFrames(void** /* pcs */,
assert(0 == "Not yet implemented");
return 0;
}
+
+#endif // BASE_STACKTRACE_WIN32_INL_H_
diff --git a/third_party/tcmalloc/chromium/src/stacktrace_x86-inl.h b/third_party/tcmalloc/chromium/src/stacktrace_x86-inl.h
index 05701e7..6753fdb 100644
--- a/third_party/tcmalloc/chromium/src/stacktrace_x86-inl.h
+++ b/third_party/tcmalloc/chromium/src/stacktrace_x86-inl.h
@@ -31,17 +31,13 @@
// Author: Sanjay Ghemawat
//
// Produce stack trace
-//
-// NOTE: there is code duplication between
-// GetStackTrace, GetStackTraceWithContext, GetStackFrames and
-// GetStackFramesWithContext. If you update one, update them all.
-//
-// There is no easy way to avoid this, because inlining
-// interferes with skip_count, and there is no portable
-// way to turn inlining off, or force it always on.
-#include "config.h"
+#ifndef BASE_STACKTRACE_X86_INL_H_
+#define BASE_STACKTRACE_X86_INL_H_
+// Note: this file is included into stacktrace.cc more than once.
+// Anything that should only be defined once should be here:
+#include "config.h"
#include <stdlib.h> // for NULL
#include <assert.h>
#if defined(HAVE_SYS_UCONTEXT_H)
@@ -190,8 +186,8 @@ static void **NextStackFrame(void **old_sp, const void *uc) {
const ucontext_t *ucv = static_cast<const ucontext_t *>(uc);
// This kernel does not use frame pointer in its VDSO code,
// and so %ebp is not suitable for unwinding.
- const void **const reg_ebp =
- reinterpret_cast<const void **>(ucv->uc_mcontext.gregs[REG_EBP]);
+ void **const reg_ebp =
+ reinterpret_cast<void **>(ucv->uc_mcontext.gregs[REG_EBP]);
const unsigned char *const reg_eip =
reinterpret_cast<unsigned char *>(ucv->uc_mcontext.gregs[REG_EIP]);
if (new_sp == reg_ebp &&
@@ -269,209 +265,24 @@ static void **NextStackFrame(void **old_sp, const void *uc) {
return new_sp;
}
-// If you change this function, see NOTE at the top of file.
-// Same as above, but with signal ucontext_t pointer.
-int GetStackTraceWithContext(void** result,
- int max_depth,
- int skip_count,
- const void *uc) {
- void **sp;
-#if (__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 2) || __llvm__
- // __builtin_frame_address(0) can return the wrong address on gcc-4.1.0-k8.
- // It's always correct on llvm, and the techniques below aren't (in
- // particular, llvm-gcc will make a copy of pcs, so it's not in sp[2]),
- // so we also prefer __builtin_frame_address when running under llvm.
- sp = reinterpret_cast<void**>(__builtin_frame_address(0));
-#elif defined(__i386__)
- // Stack frame format:
- // sp[0] pointer to previous frame
- // sp[1] caller address
- // sp[2] first argument
- // ...
- // NOTE: This will break under llvm, since result is a copy and not in sp[2]
- sp = (void **)&result - 2;
-#elif defined(__x86_64__)
- unsigned long rbp;
- // Move the value of the register %rbp into the local variable rbp.
- // We need 'volatile' to prevent this instruction from getting moved
- // around during optimization to before function prologue is done.
- // An alternative way to achieve this
- // would be (before this __asm__ instruction) to call Noop() defined as
- // static void Noop() __attribute__ ((noinline)); // prevent inlining
- // static void Noop() { asm(""); } // prevent optimizing-away
- __asm__ volatile ("mov %%rbp, %0" : "=r" (rbp));
- // Arguments are passed in registers on x86-64, so we can't just
- // offset from &result
- sp = (void **) rbp;
-#else
-# error Using stacktrace_x86-inl.h on a non x86 architecture!
-#endif
-
- int n = 0;
- while (sp && n < max_depth) {
- if (*(sp+1) == reinterpret_cast<void *>(0)) {
- // In 64-bit code, we often see a frame that
- // points to itself and has a return address of 0.
- break;
- }
- if (skip_count > 0) {
- skip_count--;
- } else {
- result[n++] = *(sp+1);
- }
- // Use strict unwinding rules.
- sp = NextStackFrame<true, true>(sp, uc);
- }
- return n;
-}
-
-int GetStackTrace(void** result, int max_depth, int skip_count) {
- void **sp;
-#if (__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 2) || __llvm__
- // __builtin_frame_address(0) can return the wrong address on gcc-4.1.0-k8.
- // It's always correct on llvm, and the techniques below aren't (in
- // particular, llvm-gcc will make a copy of pcs, so it's not in sp[2]),
- // so we also prefer __builtin_frame_address when running under llvm.
- sp = reinterpret_cast<void**>(__builtin_frame_address(0));
-#elif defined(__i386__)
- // Stack frame format:
- // sp[0] pointer to previous frame
- // sp[1] caller address
- // sp[2] first argument
- // ...
- // NOTE: This will break under llvm, since result is a copy and not in sp[2]
- sp = (void **)&result - 2;
-#elif defined(__x86_64__)
- unsigned long rbp;
- // Move the value of the register %rbp into the local variable rbp.
- // We need 'volatile' to prevent this instruction from getting moved
- // around during optimization to before function prologue is done.
- // An alternative way to achieve this
- // would be (before this __asm__ instruction) to call Noop() defined as
- // static void Noop() __attribute__ ((noinline)); // prevent inlining
- // static void Noop() { asm(""); } // prevent optimizing-away
- __asm__ volatile ("mov %%rbp, %0" : "=r" (rbp));
- // Arguments are passed in registers on x86-64, so we can't just
- // offset from &result
- sp = (void **) rbp;
-#else
-# error Using stacktrace_x86-inl.h on a non x86 architecture!
-#endif
+#endif // BASE_STACKTRACE_X86_INL_H_
- int n = 0;
- while (sp && n < max_depth) {
- if (*(sp+1) == reinterpret_cast<void *>(0)) {
- // In 64-bit code, we often see a frame that
- // points to itself and has a return address of 0.
- break;
- }
- if (skip_count > 0) {
- skip_count--;
- } else {
- result[n++] = *(sp+1);
- }
- // Use strict unwinding rules.
- sp = NextStackFrame<true, false>(sp, NULL);
- }
- return n;
-}
+// Note: this part of the file is included several times.
+// Do not put globals below.
-// If you change this function, see NOTE at the top of file.
-//
-// This GetStackFrames routine shares a lot of code with GetStackTrace
-// above. This code could have been refactored into a common routine,
-// and then both GetStackTrace/GetStackFrames could call that routine.
-// There are two problems with that:
+// The following 4 functions are generated from the code below:
+// GetStack{Trace,Frames}()
+// GetStack{Trace,Frames}WithContext()
//
-// (1) The performance of the refactored-code suffers substantially - the
-// refactored needs to be able to record the stack trace when called
-// from GetStackTrace, and both the stack trace and stack frame sizes,
-// when called from GetStackFrames - this introduces enough new
-// conditionals that GetStackTrace performance can degrade by as much
-// as 50%.
-//
-// (2) Whether the refactored routine gets inlined into GetStackTrace and
-// GetStackFrames depends on the compiler, and we can't guarantee the
-// behavior either-way, even with "__attribute__ ((always_inline))"
-// or "__attribute__ ((noinline))". But we need this guarantee or the
-// frame counts may be off by one.
-//
-// Both (1) and (2) can be addressed without this code duplication, by
-// clever use of template functions, and by defining GetStackTrace and
-// GetStackFrames as macros that expand to these template functions.
-// However, this approach comes with its own set of problems - namely,
-// macros and preprocessor trouble - for example, if GetStackTrace
-// and/or GetStackFrames is ever defined as a member functions in some
-// class, we are in trouble.
-int GetStackFrames(void** pcs, int* sizes, int max_depth, int skip_count) {
- void **sp;
-#if (__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 2) || __llvm__
- // __builtin_frame_address(0) can return the wrong address on gcc-4.1.0-k8.
- // It's always correct on llvm, and the techniques below aren't (in
- // particular, llvm-gcc will make a copy of pcs, so it's not in sp[2]),
- // so we also prefer __builtin_frame_address when running under llvm.
- sp = reinterpret_cast<void**>(__builtin_frame_address(0));
-#elif defined(__i386__)
- // Stack frame format:
- // sp[0] pointer to previous frame
- // sp[1] caller address
- // sp[2] first argument
- // ...
- sp = (void **)&pcs - 2;
-#elif defined(__x86_64__)
- unsigned long rbp;
- // Move the value of the register %rbp into the local variable rbp.
- // We need 'volatile' to prevent this instruction from getting moved
- // around during optimization to before function prologue is done.
- // An alternative way to achieve this
- // would be (before this __asm__ instruction) to call Noop() defined as
- // static void Noop() __attribute__ ((noinline)); // prevent inlining
- // static void Noop() { asm(""); } // prevent optimizing-away
- __asm__ volatile ("mov %%rbp, %0" : "=r" (rbp));
- // Arguments are passed in registers on x86-64, so we can't just
- // offset from &result
- sp = (void **) rbp;
-#else
-# error Using stacktrace_x86-inl.h on a non x86 architecture!
-#endif
-
- int n = 0;
- while (sp && n < max_depth) {
- if (*(sp+1) == reinterpret_cast<void *>(0)) {
- // In 64-bit code, we often see a frame that
- // points to itself and has a return address of 0.
- break;
- }
- // The GetStackFrames routine is called when we are in some
- // informational context (the failure signal handler for example).
- // Use the non-strict unwinding rules to produce a stack trace
- // that is as complete as possible (even if it contains a few bogus
- // entries in some rare cases).
- void **next_sp = NextStackFrame<false, false>(sp, NULL);
- if (skip_count > 0) {
- skip_count--;
- } else {
- pcs[n] = *(sp+1);
- if (next_sp > sp) {
- sizes[n] = (uintptr_t)next_sp - (uintptr_t)sp;
- } else {
- // A frame-size of 0 is used to indicate unknown frame size.
- sizes[n] = 0;
- }
- n++;
- }
- sp = next_sp;
- }
- return n;
-}
+// These functions take the following args:
+// void** result: the stack-trace, as an array
+// int* sizes: the size of each stack frame, as an array
+// (GetStackFrames* only)
+// int max_depth: the size of the result (and sizes) array(s)
+// int skip_count: how many stack pointers to skip before storing in result
+// void* ucp: a ucontext_t* (GetStack{Trace,Frames}WithContext only)
-// If you change this function, see NOTE at the top of file.
-// Same as above, but with signal ucontext_t pointer.
-int GetStackFramesWithContext(void** pcs,
- int* sizes,
- int max_depth,
- int skip_count,
- const void *uc) {
+int GET_STACK_TRACE_OR_FRAMES {
void **sp;
#if (__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 2) || __llvm__
// __builtin_frame_address(0) can return the wrong address on gcc-4.1.0-k8.
@@ -511,22 +322,22 @@ int GetStackFramesWithContext(void** pcs,
// points to itself and has a return address of 0.
break;
}
- // The GetStackFrames routine is called when we are in some
- // informational context (the failure signal handler for example).
- // Use the non-strict unwinding rules to produce a stack trace
- // that is as complete as possible (even if it contains a few bogus
- // entries in some rare cases).
- void **next_sp = NextStackFrame<false, true>(sp, uc);
+#if !IS_WITH_CONTEXT
+ const void *const ucp = NULL;
+#endif
+ void **next_sp = NextStackFrame<!IS_STACK_FRAMES, IS_WITH_CONTEXT>(sp, ucp);
if (skip_count > 0) {
skip_count--;
} else {
- pcs[n] = *(sp+1);
+ result[n] = *(sp+1);
+#if IS_STACK_FRAMES
if (next_sp > sp) {
sizes[n] = (uintptr_t)next_sp - (uintptr_t)sp;
} else {
// A frame-size of 0 is used to indicate unknown frame size.
sizes[n] = 0;
}
+#endif
n++;
}
sp = next_sp;
diff --git a/third_party/tcmalloc/chromium/src/symbolize.cc b/third_party/tcmalloc/chromium/src/symbolize.cc
index 9dd890e..ff45e3e 100644
--- a/third_party/tcmalloc/chromium/src/symbolize.cc
+++ b/third_party/tcmalloc/chromium/src/symbolize.cc
@@ -87,16 +87,40 @@ int SymbolTable::Symbolize() {
#else
// All this work is to do two-way communication. ugh.
extern char* program_invocation_name; // gcc provides this
- int child_in[2]; // file descriptors
- int child_out[2]; // for now, we don't worry about child_err
- if (socketpair(AF_UNIX, SOCK_STREAM, 0, child_in) == -1) {
- return 0;
- }
- if (socketpair(AF_UNIX, SOCK_STREAM, 0, child_out) == -1) {
- close(child_in[0]);
- close(child_in[1]);
- return 0;
+ int *child_in = NULL; // file descriptors
+ int *child_out = NULL; // for now, we don't worry about child_err
+ int child_fds[5][2]; // socketpair may be called up to five times below
+
+ // The client program may close its stdin and/or stdout and/or stderr
+ // thus allowing socketpair to reuse file descriptors 0, 1 or 2.
+ // In this case the communication between the forked processes may be broken
+ // if either the parent or the child tries to close or duplicate these
+ // descriptors. The loop below produces two pairs of file descriptors, each
+ // greater than 2 (stderr).
+ for (int i = 0; i < 5; i++) {
+ if (socketpair(AF_UNIX, SOCK_STREAM, 0, child_fds[i]) == -1) {
+ for (int j = 0; j < i; j++) {
+ close(child_fds[j][0]);
+ close(child_fds[j][1]);
+ return 0;
+ }
+ } else {
+ if ((child_fds[i][0] > 2) && (child_fds[i][1] > 2)) {
+ if (child_in == NULL) {
+ child_in = child_fds[i];
+ } else {
+ child_out = child_fds[i];
+ for (int j = 0; j < i; j++) {
+ if (child_fds[j] == child_in) continue;
+ close(child_fds[j][0]);
+ close(child_fds[j][1]);
+ }
+ break;
+ }
+ }
+ }
}
+
switch (fork()) {
case -1: { // error
close(child_in[0]);
diff --git a/third_party/tcmalloc/chromium/src/system-alloc.cc b/third_party/tcmalloc/chromium/src/system-alloc.cc
index 21d9b43..29bed80 100644
--- a/third_party/tcmalloc/chromium/src/system-alloc.cc
+++ b/third_party/tcmalloc/chromium/src/system-alloc.cc
@@ -78,7 +78,7 @@ union MemoryAligner {
void* p;
double d;
size_t s;
-};
+} CACHELINE_ALIGNED;
static SpinLock spinlock(SpinLock::LINKER_INITIALIZED);
@@ -150,6 +150,10 @@ bool RegisterSystemAllocator(SysAllocator *a, int priority) {
void* SbrkSysAllocator::Alloc(size_t size, size_t *actual_size,
size_t alignment) {
+#ifndef HAVE_SBRK
+ failed_ = true;
+ return NULL;
+#else
// Check if we should use sbrk allocation.
// FLAGS_malloc_skip_sbrk starts out as false (its uninitialized
// state) and eventually gets initialized to the specified value. Note
@@ -164,16 +168,16 @@ void* SbrkSysAllocator::Alloc(size_t size, size_t *actual_size,
// a strict check here
if (static_cast<ptrdiff_t>(size + alignment) < 0) return NULL;
- // could theoretically return the "extra" bytes here, but this
- // is simple and correct.
- if (actual_size) {
- *actual_size = size;
- }
-
// This doesn't overflow because TCMalloc_SystemAlloc has already
// tested for overflow at the alignment boundary.
size = ((size + alignment - 1) / alignment) * alignment;
+ // "actual_size" indicates that the bytes from the returned pointer
+ // p up to and including (p + actual_size - 1) have been allocated.
+ if (actual_size) {
+ *actual_size = size;
+ }
+
// Check that we we're not asking for so much more memory that we'd
// wrap around the end of the virtual address space. (This seems
// like something sbrk() should check for us, and indeed opensolaris
@@ -216,6 +220,7 @@ void* SbrkSysAllocator::Alloc(size_t size, size_t *actual_size,
ptr += alignment - (ptr & (alignment-1));
}
return reinterpret_cast<void*>(ptr);
+#endif // HAVE_SBRK
}
void SbrkSysAllocator::DumpStats(TCMalloc_Printer* printer) {
@@ -238,12 +243,6 @@ void* MmapSysAllocator::Alloc(size_t size, size_t *actual_size,
return NULL;
}
- // could theoretically return the "extra" bytes here, but this
- // is simple and correct.
- if (actual_size) {
- *actual_size = size;
- }
-
// Enforce page alignment
if (pagesize == 0) pagesize = getpagesize();
if (alignment < pagesize) alignment = pagesize;
@@ -253,6 +252,12 @@ void* MmapSysAllocator::Alloc(size_t size, size_t *actual_size,
}
size = aligned_size;
+ // "actual_size" indicates that the bytes from the returned pointer
+ // p up to and including (p + actual_size - 1) have been allocated.
+ if (actual_size) {
+ *actual_size = size;
+ }
+
// Ask for extra memory if alignment > pagesize
size_t extra = 0;
if (alignment > pagesize) {
@@ -328,12 +333,6 @@ void* DevMemSysAllocator::Alloc(size_t size, size_t *actual_size,
initialized = true;
}
- // could theoretically return the "extra" bytes here, but this
- // is simple and correct.
- if (actual_size) {
- *actual_size = size;
- }
-
// Enforce page alignment
if (pagesize == 0) pagesize = getpagesize();
if (alignment < pagesize) alignment = pagesize;
@@ -343,6 +342,12 @@ void* DevMemSysAllocator::Alloc(size_t size, size_t *actual_size,
}
size = aligned_size;
+ // "actual_size" indicates that the bytes from the returned pointer
+ // p up to and including (p + actual_size - 1) have been allocated.
+ if (actual_size) {
+ *actual_size = size;
+ }
+
// Ask for extra memory if alignment > pagesize
size_t extra = 0;
if (alignment > pagesize) {
diff --git a/third_party/tcmalloc/chromium/src/system-alloc.h b/third_party/tcmalloc/chromium/src/system-alloc.h
index 60affed..8d982ef 100644
--- a/third_party/tcmalloc/chromium/src/system-alloc.h
+++ b/third_party/tcmalloc/chromium/src/system-alloc.h
@@ -48,7 +48,11 @@
// may optionally return more bytes than asked for (i.e. return an
// entire "huge" page if a huge page allocator is in use).
//
-// The returned pointer is a multiple of "alignment" if non-zero.
+// The returned pointer is a multiple of "alignment" if non-zero. The
+// returned pointer will always be aligned suitably for holding a
+// void*, double, or size_t. In addition, if this platform defines
+// CACHELINE_ALIGNED, the return pointer will always be cacheline
+// aligned.
//
// Returns NULL when out of memory.
extern void* TCMalloc_SystemAlloc(size_t bytes, size_t *actual_bytes,
diff --git a/third_party/tcmalloc/chromium/src/tcmalloc.cc b/third_party/tcmalloc/chromium/src/tcmalloc.cc
index 6acead8..79825ce 100644
--- a/third_party/tcmalloc/chromium/src/tcmalloc.cc
+++ b/third_party/tcmalloc/chromium/src/tcmalloc.cc
@@ -228,8 +228,9 @@ extern "C" {
ATTRIBUTE_SECTION(google_malloc);
void* tc_newarray_nothrow(size_t size, const std::nothrow_t&) __THROW
ATTRIBUTE_SECTION(google_malloc);
- // Surprisingly, compilers use a nothrow-delete internally. See, eg:
- // http://www.dinkumware.com/manuals/?manual=compleat&page=new.html
+ // Surprisingly, standard C++ library implementations use a
+ // nothrow-delete internally. See, eg:
+ // http://www.dinkumware.com/manuals/?manual=compleat&page=new.html
void tc_delete_nothrow(void* ptr, const std::nothrow_t&) __THROW
ATTRIBUTE_SECTION(google_malloc);
void tc_deletearray_nothrow(void* ptr, const std::nothrow_t&) __THROW
@@ -253,9 +254,9 @@ extern "C" {
// NOTE: we make many of these symbols weak, but do so in the makefile
// (via objcopy -W) and not here. That ends up being more portable.
# define ALIAS(x) __attribute__ ((alias (x)))
-void* operator new(size_t size) ALIAS("tc_new");
+void* operator new(size_t size) throw (std::bad_alloc) ALIAS("tc_new");
void operator delete(void* p) __THROW ALIAS("tc_delete");
-void* operator new[](size_t size) ALIAS("tc_newarray");
+void* operator new[](size_t size) throw (std::bad_alloc) ALIAS("tc_newarray");
void operator delete[](void* p) __THROW ALIAS("tc_deletearray");
void* operator new(size_t size, const std::nothrow_t&) __THROW
ALIAS("tc_new_nothrow");
@@ -264,7 +265,7 @@ void* operator new[](size_t size, const std::nothrow_t&) __THROW
void operator delete(void* size, const std::nothrow_t&) __THROW
ALIAS("tc_delete_nothrow");
void operator delete[](void* size, const std::nothrow_t&) __THROW
- ALIAS("tc_deletearray_nothrow");
+ ALIAS("tc_deletearray_nothrow");
extern "C" {
void* malloc(size_t size) __THROW ALIAS("tc_malloc");
void free(void* ptr) __THROW ALIAS("tc_free");
@@ -804,7 +805,17 @@ TCMallocGuard::TCMallocGuard() {
tc_free(tc_malloc(1));
ThreadCache::InitTSD();
tc_free(tc_malloc(1));
- MallocExtension::Register(new TCMallocImplementation);
+ // Either we, or debugallocation.cc, or valgrind will control memory
+ // management. We register our extension if we're the winner.
+#ifdef TCMALLOC_FOR_DEBUGALLOCATION
+ // Let debugallocation register its extension.
+#else
+ if (RunningOnValgrind()) {
+ // Let Valgrind uses its own malloc (so don't register our extension).
+ } else {
+ MallocExtension::Register(new TCMallocImplementation);
+ }
+#endif
}
}
@@ -826,7 +837,28 @@ static TCMallocGuard module_enter_exit_hook;
// Helpers for the exported routines below
//-------------------------------------------------------------------
-static Span* DoSampledAllocation(size_t size) {
+static inline void* CheckedMallocResult(void *result) {
+ Span* fetched_span;
+ size_t cl;
+
+ if (result != NULL) {
+ ASSERT(Static::pageheap()->GetSizeClassOrSpan(result, &cl, &fetched_span));
+ }
+
+ return result;
+}
+
+static inline void* SpanToMallocResult(Span *span) {
+ Span* fetched_span = NULL;
+ size_t cl = 0;
+ ASSERT(Static::pageheap()->GetSizeClassOrSpan(span->start_ptr(),
+ &cl, &fetched_span));
+ ASSERT(cl == kLargeSizeClass);
+ ASSERT(span == fetched_span);
+ return span->start_ptr();
+}
+
+static void* DoSampledAllocation(size_t size) {
// Grab the stack trace outside the heap lock
StackTrace tmp;
tmp.depth = GetStackTrace(tmp.stack, tcmalloc::kMaxStackDepth, 1);
@@ -834,7 +866,8 @@ static Span* DoSampledAllocation(size_t size) {
SpinLockHolder h(Static::pageheap_lock());
// Allocate span
- Span *span = Static::pageheap()->New(tcmalloc::pages(size == 0 ? 1 : size));
+ Span *span = Static::pageheap()->New(tcmalloc::pages(size == 0 ? 1 : size),
+ kLargeSizeClass, kPageSize);
if (span == NULL) {
return NULL;
}
@@ -851,26 +884,7 @@ static Span* DoSampledAllocation(size_t size) {
span->objects = stack;
tcmalloc::DLL_Prepend(Static::sampled_objects(), span);
- return span;
-}
-
-static inline bool CheckCachedSizeClass(void *ptr) {
- PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift;
- size_t cached_value = Static::pageheap()->GetSizeClassIfCached(p);
- return cached_value == 0 ||
- cached_value == Static::pageheap()->GetDescriptor(p)->sizeclass;
-}
-
-static inline void* CheckedMallocResult(void *result)
-{
- ASSERT(result == 0 || CheckCachedSizeClass(result));
- return result;
-}
-
-static inline void* SpanToMallocResult(Span *span) {
- Static::pageheap()->CacheSizeClass(span->start, 0);
- return
- CheckedMallocResult(reinterpret_cast<void*>(span->start << kPageShift));
+ return SpanToMallocResult(span);
}
// Copy of FLAGS_tcmalloc_large_alloc_report_threshold with
@@ -916,24 +930,39 @@ inline void* do_memalign_or_cpp_memalign(size_t align, size_t size) {
return tc_new_mode ? cpp_memalign(align, size) : do_memalign(align, size);
}
+// Must be called with the page lock held.
+inline bool should_report_large(Length num_pages) {
+ const int64 threshold = large_alloc_threshold;
+ if (threshold > 0 && num_pages >= (threshold >> kPageShift)) {
+ // Increase the threshold by 1/8 every time we generate a report.
+ // We cap the threshold at 8GB to avoid overflow problems.
+ large_alloc_threshold = (threshold + threshold/8 < 8ll<<30
+ ? threshold + threshold/8 : 8ll<<30);
+ return true;
+ }
+ return false;
+}
+
// Helper for do_malloc().
-inline void* do_malloc_pages(Length num_pages) {
- Span *span;
- bool report_large = false;
- {
+inline void* do_malloc_pages(ThreadCache* heap, size_t size) {
+ void* result;
+ bool report_large;
+
+ Length num_pages = tcmalloc::pages(size);
+ size = num_pages << kPageShift;
+
+ if ((FLAGS_tcmalloc_sample_parameter > 0) && heap->SampleAllocation(size)) {
+ result = DoSampledAllocation(size);
+
SpinLockHolder h(Static::pageheap_lock());
- span = Static::pageheap()->New(num_pages);
- const int64 threshold = large_alloc_threshold;
- if (threshold > 0 && num_pages >= (threshold >> kPageShift)) {
- // Increase the threshold by 1/8 every time we generate a report.
- // We cap the threshold at 8GB to avoid overflow problems.
- large_alloc_threshold = (threshold + threshold/8 < 8ll<<30
- ? threshold + threshold/8 : 8ll<<30);
- report_large = true;
- }
+ report_large = should_report_large(num_pages);
+ } else {
+ SpinLockHolder h(Static::pageheap_lock());
+ Span* span = Static::pageheap()->New(num_pages, kLargeSizeClass, kPageSize);
+ result = (span == NULL ? NULL : SpanToMallocResult(span));
+ report_large = should_report_large(num_pages);
}
- void* result = (span == NULL ? NULL : SpanToMallocResult(span));
if (report_large) {
ReportLargeAlloc(num_pages, result);
}
@@ -945,17 +974,19 @@ inline void* do_malloc(size_t size) {
// The following call forces module initialization
ThreadCache* heap = ThreadCache::GetCache();
- if ((FLAGS_tcmalloc_sample_parameter > 0) && heap->SampleAllocation(size)) {
- Span* span = DoSampledAllocation(size);
- if (span != NULL) {
- ret = SpanToMallocResult(span);
+ if (size <= kMaxSize) {
+ size_t cl = Static::sizemap()->SizeClass(size);
+ size = Static::sizemap()->class_to_size(cl);
+
+ if ((FLAGS_tcmalloc_sample_parameter > 0) && heap->SampleAllocation(size)) {
+ ret = DoSampledAllocation(size);
+ } else {
+ // The common case, and also the simplest. This just pops the
+ // size-appropriate freelist, after replenishing it if it's empty.
+ ret = CheckedMallocResult(heap->Allocate(size, cl));
}
- } else if (size <= kMaxSize) {
- // The common case, and also the simplest. This just pops the
- // size-appropriate freelist, after replenishing it if it's empty.
- ret = CheckedMallocResult(heap->Allocate(size));
} else {
- ret = do_malloc_pages(tcmalloc::pages(size));
+ ret = do_malloc_pages(heap, size);
}
if (ret == NULL) errno = ENOMEM;
return ret;
@@ -983,28 +1014,22 @@ static inline ThreadCache* GetCacheIfPresent() {
inline void do_free_with_callback(void* ptr, void (*invalid_free_fn)(void*)) {
if (ptr == NULL) return;
ASSERT(Static::pageheap() != NULL); // Should not call free() before malloc()
- const PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift;
- Span* span = NULL;
- size_t cl = Static::pageheap()->GetSizeClassIfCached(p);
-
- if (cl == 0) {
- span = Static::pageheap()->GetDescriptor(p);
- if (!span) {
- // span can be NULL because the pointer passed in is invalid
- // (not something returned by malloc or friends), or because the
- // pointer was allocated with some other allocator besides
- // tcmalloc. The latter can happen if tcmalloc is linked in via
- // a dynamic library, but is not listed last on the link line.
- // In that case, libraries after it on the link line will
- // allocate with libc malloc, but free with tcmalloc's free.
- (*invalid_free_fn)(ptr); // Decide how to handle the bad free request
- return;
- }
- cl = span->sizeclass;
- Static::pageheap()->CacheSizeClass(p, cl);
+ Span* span;
+ size_t cl;
+
+ if (!Static::pageheap()->GetSizeClassOrSpan(ptr, &cl, &span)) {
+ // result can be false because the pointer passed in is invalid
+ // (not something returned by malloc or friends), or because the
+ // pointer was allocated with some other allocator besides
+ // tcmalloc. The latter can happen if tcmalloc is linked in via
+ // a dynamic library, but is not listed last on the link line.
+ // In that case, libraries after it on the link line will
+ // allocate with libc malloc, but free with tcmalloc's free.
+ (*invalid_free_fn)(ptr); // Decide how to handle the bad free request
+ return;
}
- if (cl != 0) {
- ASSERT(!Static::pageheap()->GetDescriptor(p)->sample);
+
+ if (cl != kLargeSizeClass) {
ThreadCache* heap = GetCacheIfPresent();
if (heap != NULL) {
heap->Deallocate(ptr, cl);
@@ -1015,8 +1040,7 @@ inline void do_free_with_callback(void* ptr, void (*invalid_free_fn)(void*)) {
}
} else {
SpinLockHolder h(Static::pageheap_lock());
- ASSERT(reinterpret_cast<uintptr_t>(ptr) % kPageSize == 0);
- ASSERT(span != NULL && span->start == p);
+ ASSERT(span != NULL && ptr == span->start_ptr());
if (span->sample) {
tcmalloc::DLL_Remove(span);
Static::stacktrace_allocator()->Delete(
@@ -1036,20 +1060,17 @@ inline size_t GetSizeWithCallback(void* ptr,
size_t (*invalid_getsize_fn)(void*)) {
if (ptr == NULL)
return 0;
- const PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift;
- size_t cl = Static::pageheap()->GetSizeClassIfCached(p);
- if (cl != 0) {
+
+ Span* span;
+ size_t cl;
+ if (!Static::pageheap()->GetSizeClassOrSpan(ptr, &cl, &span)) {
+ return (*invalid_getsize_fn)(ptr);
+ }
+
+ if (cl != kLargeSizeClass) {
return Static::sizemap()->ByteSizeForClass(cl);
} else {
- Span *span = Static::pageheap()->GetDescriptor(p);
- if (span == NULL) { // means we do not own this memory
- return (*invalid_getsize_fn)(ptr);
- } else if (span->sizeclass != 0) {
- Static::pageheap()->CacheSizeClass(p, span->sizeclass);
- return Static::sizemap()->ByteSizeForClass(span->sizeclass);
- } else {
- return span->length << kPageShift;
- }
+ return span->length << kPageShift;
}
}
@@ -1136,47 +1157,18 @@ void* do_memalign(size_t align, size_t size) {
}
if (cl < kNumClasses) {
ThreadCache* heap = ThreadCache::GetCache();
- return CheckedMallocResult(heap->Allocate(
- Static::sizemap()->class_to_size(cl)));
+ size = Static::sizemap()->class_to_size(cl);
+ return CheckedMallocResult(heap->Allocate(size, cl));
}
}
// We will allocate directly from the page heap
SpinLockHolder h(Static::pageheap_lock());
- if (align <= kPageSize) {
- // Any page-level allocation will be fine
- // TODO: We could put the rest of this page in the appropriate
- // TODO: cache but it does not seem worth it.
- Span* span = Static::pageheap()->New(tcmalloc::pages(size));
- return span == NULL ? NULL : SpanToMallocResult(span);
- }
-
- // Allocate extra pages and carve off an aligned portion
- const Length alloc = tcmalloc::pages(size + align);
- Span* span = Static::pageheap()->New(alloc);
- if (span == NULL) return NULL;
-
- // Skip starting portion so that we end up aligned
- Length skip = 0;
- while ((((span->start+skip) << kPageShift) & (align - 1)) != 0) {
- skip++;
- }
- ASSERT(skip < alloc);
- if (skip > 0) {
- Span* rest = Static::pageheap()->Split(span, skip);
- Static::pageheap()->Delete(span);
- span = rest;
- }
-
- // Skip trailing portion that we do not need to return
- const Length needed = tcmalloc::pages(size);
- ASSERT(span->length >= needed);
- if (span->length > needed) {
- Span* trailer = Static::pageheap()->Split(span, needed);
- Static::pageheap()->Delete(trailer);
- }
- return SpanToMallocResult(span);
+ // Any page-level allocation will be fine
+ Span* span = Static::pageheap()->New(tcmalloc::pages(size),
+ kLargeSizeClass, align);
+ return span == NULL ? NULL : SpanToMallocResult(span);
}
// Helpers for use by exported routines below:
@@ -1392,8 +1384,7 @@ extern "C" PERFTOOLS_DLL_DECL void* tc_new(size_t size) {
return p;
}
-extern "C" PERFTOOLS_DLL_DECL void* tc_new_nothrow(
- size_t size, const std::nothrow_t&) __THROW {
+extern "C" PERFTOOLS_DLL_DECL void* tc_new_nothrow(size_t size, const std::nothrow_t&) __THROW {
void* p = cpp_alloc(size, true);
MallocHook::InvokeNewHook(p, size);
return p;
@@ -1404,10 +1395,10 @@ extern "C" PERFTOOLS_DLL_DECL void tc_delete(void* p) __THROW {
do_free(p);
}
-// Compilers define and use this (via ::operator delete(ptr, nothrow)).
+// Standard C++ library implementations define and use this
+// (via ::operator delete(ptr, nothrow)).
// But it's really the same as normal delete, so we just do the same thing.
-extern "C" PERFTOOLS_DLL_DECL void tc_delete_nothrow(
- void* p, const std::nothrow_t&) __THROW {
+extern "C" PERFTOOLS_DLL_DECL void tc_delete_nothrow(void* p, const std::nothrow_t&) __THROW {
MallocHook::InvokeDeleteHook(p);
do_free(p);
}
@@ -1423,8 +1414,8 @@ extern "C" PERFTOOLS_DLL_DECL void* tc_newarray(size_t size) {
return p;
}
-extern "C" PERFTOOLS_DLL_DECL void* tc_newarray_nothrow(
- size_t size, const std::nothrow_t&) __THROW {
+extern "C" PERFTOOLS_DLL_DECL void* tc_newarray_nothrow(size_t size, const std::nothrow_t&)
+ __THROW {
void* p = cpp_alloc(size, true);
MallocHook::InvokeNewHook(p, size);
return p;
@@ -1435,8 +1426,7 @@ extern "C" PERFTOOLS_DLL_DECL void tc_deletearray(void* p) __THROW {
do_free(p);
}
-extern "C" PERFTOOLS_DLL_DECL void tc_deletearray_nothrow(
- void* p, const std::nothrow_t&) __THROW {
+extern "C" PERFTOOLS_DLL_DECL void tc_deletearray_nothrow(void* p, const std::nothrow_t&) __THROW {
MallocHook::InvokeDeleteHook(p);
do_free(p);
}
diff --git a/third_party/tcmalloc/chromium/src/tests/debugallocation_test.cc b/third_party/tcmalloc/chromium/src/tests/debugallocation_test.cc
index ca00e36..c482187 100644
--- a/third_party/tcmalloc/chromium/src/tests/debugallocation_test.cc
+++ b/third_party/tcmalloc/chromium/src/tests/debugallocation_test.cc
@@ -75,7 +75,14 @@ static int test_counter = 0; // incremented every time the macro is called
// This flag won't be compiled in in opt mode.
DECLARE_int32(max_free_queue_size);
+// Test match as well as mismatch rules:
TEST(DebugAllocationTest, DeallocMismatch) {
+ // malloc can be matched only by free
+ // new can be matched only by delete and delete(nothrow)
+ // new[] can be matched only by delete[] and delete[](nothrow)
+ // new(nothrow) can be matched only by delete and delete(nothrow)
+ // new(nothrow)[] can be matched only by delete[] and delete[](nothrow)
+
// Allocate with malloc.
{
int* x = static_cast<int*>(malloc(sizeof(*x)));
@@ -88,17 +95,41 @@ TEST(DebugAllocationTest, DeallocMismatch) {
// Allocate with new.
{
int* x = new int;
+ int* y = new int;
IF_DEBUG_EXPECT_DEATH(free(x), "mismatch.*being dealloc.*free");
IF_DEBUG_EXPECT_DEATH(delete [] x, "mismatch.*being dealloc.*delete *[[]");
delete x;
+ ::operator delete(y, std::nothrow);
}
// Allocate with new[].
{
int* x = new int[1];
+ int* y = new int[1];
+ IF_DEBUG_EXPECT_DEATH(free(x), "mismatch.*being dealloc.*free");
+ IF_DEBUG_EXPECT_DEATH(delete x, "mismatch.*being dealloc.*delete");
+ delete [] x;
+ ::operator delete[](y, std::nothrow);
+ }
+
+ // Allocate with new(nothrow).
+ {
+ int* x = new(std::nothrow) int;
+ int* y = new(std::nothrow) int;
+ IF_DEBUG_EXPECT_DEATH(free(x), "mismatch.*being dealloc.*free");
+ IF_DEBUG_EXPECT_DEATH(delete [] x, "mismatch.*being dealloc.*delete *[[]");
+ delete x;
+ ::operator delete(y, std::nothrow);
+ }
+
+ // Allocate with new(nothrow)[].
+ {
+ int* x = new(std::nothrow) int[1];
+ int* y = new(std::nothrow) int[1];
IF_DEBUG_EXPECT_DEATH(free(x), "mismatch.*being dealloc.*free");
IF_DEBUG_EXPECT_DEATH(delete x, "mismatch.*being dealloc.*delete");
delete [] x;
+ ::operator delete[](y, std::nothrow);
}
}
diff --git a/third_party/tcmalloc/chromium/src/tests/heap-checker-death_unittest.sh b/third_party/tcmalloc/chromium/src/tests/heap-checker-death_unittest.sh
index 9f0c08c..4a83fc2 100644
--- a/third_party/tcmalloc/chromium/src/tests/heap-checker-death_unittest.sh
+++ b/third_party/tcmalloc/chromium/src/tests/heap-checker-death_unittest.sh
@@ -139,13 +139,13 @@ EARLY_MSG="Starting tracking the heap$"
Test 60 0 "$EARLY_MSG" "" \
HEAPCHECK="" HEAP_CHECKER_TEST_TEST_LEAK=1 HEAP_CHECKER_TEST_NO_THREADS=1 \
- PERFTOOLS_VERBOSE=1 || exit 5
+ PERFTOOLS_VERBOSE=10 || exit 5
Test 60 0 "MemoryRegionMap Init$" "" \
HEAPCHECK="" HEAP_CHECKER_TEST_TEST_LEAK=1 HEAP_CHECKER_TEST_NO_THREADS=1 \
- PERFTOOLS_VERBOSE=2 || exit 6
+ PERFTOOLS_VERBOSE=11 || exit 6
Test 60 0 "" "$EARLY_MSG" \
HEAPCHECK="" HEAP_CHECKER_TEST_TEST_LEAK=1 HEAP_CHECKER_TEST_NO_THREADS=1 \
- PERFTOOLS_VERBOSE=-2 || exit 7
+ PERFTOOLS_VERBOSE=-11 || exit 7
# These invocations should fail with very high probability,
# rather than return 0 or hang (1 == exit(1), 134 == abort(), 139 = SIGSEGV):
@@ -162,10 +162,10 @@ Test 60 1 "MakeALeak" "" \
# Test that very early log messages are present and controllable:
Test 60 1 "Starting tracking the heap$" "" \
- HEAP_CHECKER_TEST_TEST_LEAK=1 HEAP_CHECKER_TEST_NO_THREADS=1 PERFTOOLS_VERBOSE=1 \
+ HEAP_CHECKER_TEST_TEST_LEAK=1 HEAP_CHECKER_TEST_NO_THREADS=1 PERFTOOLS_VERBOSE=10 \
|| exit 11
Test 60 1 "" "Starting tracking the heap" \
- HEAP_CHECKER_TEST_TEST_LEAK=1 HEAP_CHECKER_TEST_NO_THREADS=1 PERFTOOLS_VERBOSE=-1 \
+ HEAP_CHECKER_TEST_TEST_LEAK=1 HEAP_CHECKER_TEST_NO_THREADS=1 PERFTOOLS_VERBOSE=-10 \
|| exit 12
cd / # so we're not in TMPDIR when we delete it
diff --git a/third_party/tcmalloc/chromium/src/tests/page_heap_test.cc b/third_party/tcmalloc/chromium/src/tests/page_heap_test.cc
index 9120b78..fd444da 100644
--- a/third_party/tcmalloc/chromium/src/tests/page_heap_test.cc
+++ b/third_party/tcmalloc/chromium/src/tests/page_heap_test.cc
@@ -26,7 +26,7 @@ static void TestPageHeap_Stats() {
CheckStats(ph, 0, 0, 0);
// Allocate a span 's1'
- tcmalloc::Span* s1 = ph->New(256);
+ tcmalloc::Span* s1 = ph->New(256, kLargeSizeClass, kPageSize);
CheckStats(ph, 256, 0, 0);
// Split span 's1' into 's1', 's2'. Delete 's2'
diff --git a/third_party/tcmalloc/chromium/src/tests/profiler_unittest.cc b/third_party/tcmalloc/chromium/src/tests/profiler_unittest.cc
index 1908b03..19371b7 100644
--- a/third_party/tcmalloc/chromium/src/tests/profiler_unittest.cc
+++ b/third_party/tcmalloc/chromium/src/tests/profiler_unittest.cc
@@ -56,12 +56,11 @@ static void test_other_thread() {
int i, m;
char b[128];
+ MutexLock ml(&mutex);
for (m = 0; m < 1000000; ++m) { // run millions of times
for (i = 0; i < g_iters; ++i ) {
- MutexLock ml(&mutex);
result ^= i;
}
- MutexLock ml(&mutex);
snprintf(b, sizeof(b), "%d", result); // get some libc action
}
#endif
@@ -70,12 +69,11 @@ static void test_other_thread() {
static void test_main_thread() {
int i, m;
char b[128];
+ MutexLock ml(&mutex);
for (m = 0; m < 1000000; ++m) { // run millions of times
for (i = 0; i < g_iters; ++i ) {
- MutexLock ml(&mutex);
result ^= i;
}
- MutexLock ml(&mutex);
snprintf(b, sizeof(b), "%d", result); // get some libc action
}
}
diff --git a/third_party/tcmalloc/chromium/src/tests/profiler_unittest.sh b/third_party/tcmalloc/chromium/src/tests/profiler_unittest.sh
index 5766f2e..4668fa7 100644
--- a/third_party/tcmalloc/chromium/src/tests/profiler_unittest.sh
+++ b/third_party/tcmalloc/chromium/src/tests/profiler_unittest.sh
@@ -206,28 +206,27 @@ CPUPROFILE="$TMPDIR/p5" "$PROFILER2" 50 || RegisterFailure
CPUPROFILE="$TMPDIR/p6" "$PROFILER2" 100 || RegisterFailure
VerifySimilar p5 "$PROFILER2_REALNAME" p6 "$PROFILER2_REALNAME" 2
-# When we compile with threads, things take a lot longer even when we only use 1
-CPUPROFILE="$TMPDIR/p5b" "$PROFILER3" 10 || RegisterFailure
-CPUPROFILE="$TMPDIR/p5c" "$PROFILER3" 20 || RegisterFailure
+CPUPROFILE="$TMPDIR/p5b" "$PROFILER3" 30 || RegisterFailure
+CPUPROFILE="$TMPDIR/p5c" "$PROFILER3" 60 || RegisterFailure
VerifySimilar p5b "$PROFILER3_REALNAME" p5c "$PROFILER3_REALNAME" 2
# Now try what happens when we use threads
-"$PROFILER3" 5 2 "$TMPDIR/p7" || RegisterFailure
-"$PROFILER3" 10 2 "$TMPDIR/p8" || RegisterFailure
+"$PROFILER3" 30 2 "$TMPDIR/p7" || RegisterFailure
+"$PROFILER3" 60 2 "$TMPDIR/p8" || RegisterFailure
VerifySimilar p7 "$PROFILER3_REALNAME" p8 "$PROFILER3_REALNAME" 2
-"$PROFILER4" 5 2 "$TMPDIR/p9" || RegisterFailure
-"$PROFILER4" 10 2 "$TMPDIR/p10" || RegisterFailure
+"$PROFILER4" 30 2 "$TMPDIR/p9" || RegisterFailure
+"$PROFILER4" 60 2 "$TMPDIR/p10" || RegisterFailure
VerifySimilar p9 "$PROFILER4_REALNAME" p10 "$PROFILER4_REALNAME" 2
# More threads!
-"$PROFILER4" 2 3 "$TMPDIR/p9" || RegisterFailure
-"$PROFILER4" 4 3 "$TMPDIR/p10" || RegisterFailure
+"$PROFILER4" 25 3 "$TMPDIR/p9" || RegisterFailure
+"$PROFILER4" 50 3 "$TMPDIR/p10" || RegisterFailure
VerifySimilar p9 "$PROFILER4_REALNAME" p10 "$PROFILER4_REALNAME" 2
# Compare how much time the main thread takes compared to the other threads
# Recall the main thread runs twice as long as the other threads, by design.
-"$PROFILER4" 2 4 "$TMPDIR/p11" || RegisterFailure
+"$PROFILER4" 20 4 "$TMPDIR/p11" || RegisterFailure
VerifyAcrossThreads p11 "$PROFILER4_REALNAME" 2
# Test symbol save and restore
@@ -236,14 +235,14 @@ VerifyAcrossThreads p11 "$PROFILER4_REALNAME" 2
>"$TMPDIR/p13" 2>/dev/null || RegisterFailure
VerifyIdentical p12 "$PROFILER1_REALNAME" p13 "" || RegisterFailure
-"$PROFILER3" 5 2 "$TMPDIR/p14" || RegisterFailure
+"$PROFILER3" 30 2 "$TMPDIR/p14" || RegisterFailure
"$PPROF" $PPROF_FLAGS "$PROFILER3_REALNAME" "$TMPDIR/p14" --raw \
>"$TMPDIR/p15" 2>/dev/null || RegisterFailure
VerifyIdentical p14 "$PROFILER3_REALNAME" p15 "" || RegisterFailure
# Test using ITIMER_REAL instead of ITIMER_PROF.
-env CPUPROFILE_REALTIME=1 "$PROFILER3" 5 2 "$TMPDIR/p16" || RegisterFailure
-env CPUPROFILE_REALTIME=1 "$PROFILER3" 10 2 "$TMPDIR/p17" || RegisterFailure
+env CPUPROFILE_REALTIME=1 "$PROFILER3" 30 2 "$TMPDIR/p16" || RegisterFailure
+env CPUPROFILE_REALTIME=1 "$PROFILER3" 60 2 "$TMPDIR/p17" || RegisterFailure
VerifySimilar p16 "$PROFILER3_REALNAME" p17 "$PROFILER3_REALNAME" 2
diff --git a/third_party/tcmalloc/chromium/src/tests/tcmalloc_unittest.cc b/third_party/tcmalloc/chromium/src/tests/tcmalloc_unittest.cc
index 25bfd6a..6b2ec26 100644
--- a/third_party/tcmalloc/chromium/src/tests/tcmalloc_unittest.cc
+++ b/third_party/tcmalloc/chromium/src/tests/tcmalloc_unittest.cc
@@ -977,7 +977,7 @@ static int RunAllTests(int argc, char** argv) {
}
// This code stresses some of the memory allocation via STL.
- // In particular, it calls operator delete(void*, nothrow_t).
+ // It may call operator delete(void*, nothrow_t).
fprintf(LOGSTREAM, "Testing STL use\n");
{
std::vector<int> v;
diff --git a/third_party/tcmalloc/chromium/src/third_party/valgrind.h b/third_party/tcmalloc/chromium/src/third_party/valgrind.h
new file mode 100644
index 0000000..577c59a
--- /dev/null
+++ b/third_party/tcmalloc/chromium/src/third_party/valgrind.h
@@ -0,0 +1,3924 @@
+/* -*- c -*-
+ ----------------------------------------------------------------
+
+ Notice that the following BSD-style license applies to this one
+ file (valgrind.h) only. The rest of Valgrind is licensed under the
+ terms of the GNU General Public License, version 2, unless
+ otherwise indicated. See the COPYING file in the source
+ distribution for details.
+
+ ----------------------------------------------------------------
+
+ This file is part of Valgrind, a dynamic binary instrumentation
+ framework.
+
+ Copyright (C) 2000-2008 Julian Seward. All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ 1. Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ 2. The origin of this software must not be misrepresented; you must
+ not claim that you wrote the original software. If you use this
+ software in a product, an acknowledgment in the product
+ documentation would be appreciated but is not required.
+
+ 3. Altered source versions must be plainly marked as such, and must
+ not be misrepresented as being the original software.
+
+ 4. The name of the author may not be used to endorse or promote
+ products derived from this software without specific prior written
+ permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
+ OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
+ GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ ----------------------------------------------------------------
+
+ Notice that the above BSD-style license applies to this one file
+ (valgrind.h) only. The entire rest of Valgrind is licensed under
+ the terms of the GNU General Public License, version 2. See the
+ COPYING file in the source distribution for details.
+
+ ----------------------------------------------------------------
+*/
+
+
+/* This file is for inclusion into client (your!) code.
+
+ You can use these macros to manipulate and query Valgrind's
+ execution inside your own programs.
+
+ The resulting executables will still run without Valgrind, just a
+ little bit more slowly than they otherwise would, but otherwise
+ unchanged. When not running on valgrind, each client request
+ consumes very few (eg. 7) instructions, so the resulting performance
+ loss is negligible unless you plan to execute client requests
+ millions of times per second. Nevertheless, if that is still a
+ problem, you can compile with the NVALGRIND symbol defined (gcc
+ -DNVALGRIND) so that client requests are not even compiled in. */
+
+#ifndef __VALGRIND_H
+#define __VALGRIND_H
+
+#include <stdarg.h>
+
+/* Nb: this file might be included in a file compiled with -ansi. So
+ we can't use C++ style "//" comments nor the "asm" keyword (instead
+ use "__asm__"). */
+
+/* Derive some tags indicating what the target platform is. Note
+ that in this file we're using the compiler's CPP symbols for
+ identifying architectures, which are different to the ones we use
+ within the rest of Valgrind. Note, __powerpc__ is active for both
+ 32 and 64-bit PPC, whereas __powerpc64__ is only active for the
+ latter (on Linux, that is). */
+#undef PLAT_x86_linux
+#undef PLAT_amd64_linux
+#undef PLAT_ppc32_linux
+#undef PLAT_ppc64_linux
+#undef PLAT_ppc32_aix5
+#undef PLAT_ppc64_aix5
+
+#if !defined(_AIX) && defined(__i386__)
+# define PLAT_x86_linux 1
+#elif !defined(_AIX) && defined(__x86_64__)
+# define PLAT_amd64_linux 1
+#elif !defined(_AIX) && defined(__powerpc__) && !defined(__powerpc64__)
+# define PLAT_ppc32_linux 1
+#elif !defined(_AIX) && defined(__powerpc__) && defined(__powerpc64__)
+# define PLAT_ppc64_linux 1
+#elif defined(_AIX) && defined(__64BIT__)
+# define PLAT_ppc64_aix5 1
+#elif defined(_AIX) && !defined(__64BIT__)
+# define PLAT_ppc32_aix5 1
+#endif
+
+
+/* If we're not compiling for our target platform, don't generate
+ any inline asms. */
+#if !defined(PLAT_x86_linux) && !defined(PLAT_amd64_linux) \
+ && !defined(PLAT_ppc32_linux) && !defined(PLAT_ppc64_linux) \
+ && !defined(PLAT_ppc32_aix5) && !defined(PLAT_ppc64_aix5)
+# if !defined(NVALGRIND)
+# define NVALGRIND 1
+# endif
+#endif
+
+
+/* ------------------------------------------------------------------ */
+/* ARCHITECTURE SPECIFICS for SPECIAL INSTRUCTIONS. There is nothing */
+/* in here of use to end-users -- skip to the next section. */
+/* ------------------------------------------------------------------ */
+
+#if defined(NVALGRIND)
+
+/* Define NVALGRIND to completely remove the Valgrind magic sequence
+ from the compiled code (analogous to NDEBUG's effects on
+ assert()) */
+#define VALGRIND_DO_CLIENT_REQUEST( \
+ _zzq_rlval, _zzq_default, _zzq_request, \
+ _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5) \
+ { \
+ (_zzq_rlval) = (_zzq_default); \
+ }
+
+#else /* ! NVALGRIND */
+
+/* The following defines the magic code sequences which the JITter
+ spots and handles magically. Don't look too closely at them as
+ they will rot your brain.
+
+ The assembly code sequences for all architectures is in this one
+ file. This is because this file must be stand-alone, and we don't
+ want to have multiple files.
+
+ For VALGRIND_DO_CLIENT_REQUEST, we must ensure that the default
+ value gets put in the return slot, so that everything works when
+ this is executed not under Valgrind. Args are passed in a memory
+ block, and so there's no intrinsic limit to the number that could
+ be passed, but it's currently five.
+
+ The macro args are:
+ _zzq_rlval result lvalue
+ _zzq_default default value (result returned when running on real CPU)
+ _zzq_request request code
+ _zzq_arg1..5 request params
+
+ The other two macros are used to support function wrapping, and are
+ a lot simpler. VALGRIND_GET_NR_CONTEXT returns the value of the
+ guest's NRADDR pseudo-register and whatever other information is
+ needed to safely run the call original from the wrapper: on
+ ppc64-linux, the R2 value at the divert point is also needed. This
+ information is abstracted into a user-visible type, OrigFn.
+
+ VALGRIND_CALL_NOREDIR_* behaves the same as the following on the
+ guest, but guarantees that the branch instruction will not be
+ redirected: x86: call *%eax, amd64: call *%rax, ppc32/ppc64:
+ branch-and-link-to-r11. VALGRIND_CALL_NOREDIR is just text, not a
+ complete inline asm, since it needs to be combined with more magic
+ inline asm stuff to be useful.
+*/
+
+/* ------------------------- x86-linux ------------------------- */
+
+#if defined(PLAT_x86_linux)
+
+typedef
+ struct {
+ unsigned int nraddr; /* where's the code? */
+ }
+ OrigFn;
+
+#define __SPECIAL_INSTRUCTION_PREAMBLE \
+ "roll $3, %%edi ; roll $13, %%edi\n\t" \
+ "roll $29, %%edi ; roll $19, %%edi\n\t"
+
+#define VALGRIND_DO_CLIENT_REQUEST( \
+ _zzq_rlval, _zzq_default, _zzq_request, \
+ _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5) \
+ { volatile unsigned int _zzq_args[6]; \
+ volatile unsigned int _zzq_result; \
+ _zzq_args[0] = (unsigned int)(_zzq_request); \
+ _zzq_args[1] = (unsigned int)(_zzq_arg1); \
+ _zzq_args[2] = (unsigned int)(_zzq_arg2); \
+ _zzq_args[3] = (unsigned int)(_zzq_arg3); \
+ _zzq_args[4] = (unsigned int)(_zzq_arg4); \
+ _zzq_args[5] = (unsigned int)(_zzq_arg5); \
+ __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \
+ /* %EDX = client_request ( %EAX ) */ \
+ "xchgl %%ebx,%%ebx" \
+ : "=d" (_zzq_result) \
+ : "a" (&_zzq_args[0]), "0" (_zzq_default) \
+ : "cc", "memory" \
+ ); \
+ _zzq_rlval = _zzq_result; \
+ }
+
+#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval) \
+ { volatile OrigFn* _zzq_orig = &(_zzq_rlval); \
+ volatile unsigned int __addr; \
+ __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \
+ /* %EAX = guest_NRADDR */ \
+ "xchgl %%ecx,%%ecx" \
+ : "=a" (__addr) \
+ : \
+ : "cc", "memory" \
+ ); \
+ _zzq_orig->nraddr = __addr; \
+ }
+
+#define VALGRIND_CALL_NOREDIR_EAX \
+ __SPECIAL_INSTRUCTION_PREAMBLE \
+ /* call-noredir *%EAX */ \
+ "xchgl %%edx,%%edx\n\t"
+#endif /* PLAT_x86_linux */
+
+/* ------------------------ amd64-linux ------------------------ */
+
+#if defined(PLAT_amd64_linux)
+
+typedef
+ struct {
+ unsigned long long int nraddr; /* where's the code? */
+ }
+ OrigFn;
+
+#define __SPECIAL_INSTRUCTION_PREAMBLE \
+ "rolq $3, %%rdi ; rolq $13, %%rdi\n\t" \
+ "rolq $61, %%rdi ; rolq $51, %%rdi\n\t"
+
+#define VALGRIND_DO_CLIENT_REQUEST( \
+ _zzq_rlval, _zzq_default, _zzq_request, \
+ _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5) \
+ { volatile unsigned long long int _zzq_args[6]; \
+ volatile unsigned long long int _zzq_result; \
+ _zzq_args[0] = (unsigned long long int)(_zzq_request); \
+ _zzq_args[1] = (unsigned long long int)(_zzq_arg1); \
+ _zzq_args[2] = (unsigned long long int)(_zzq_arg2); \
+ _zzq_args[3] = (unsigned long long int)(_zzq_arg3); \
+ _zzq_args[4] = (unsigned long long int)(_zzq_arg4); \
+ _zzq_args[5] = (unsigned long long int)(_zzq_arg5); \
+ __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \
+ /* %RDX = client_request ( %RAX ) */ \
+ "xchgq %%rbx,%%rbx" \
+ : "=d" (_zzq_result) \
+ : "a" (&_zzq_args[0]), "0" (_zzq_default) \
+ : "cc", "memory" \
+ ); \
+ _zzq_rlval = _zzq_result; \
+ }
+
+#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval) \
+ { volatile OrigFn* _zzq_orig = &(_zzq_rlval); \
+ volatile unsigned long long int __addr; \
+ __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \
+ /* %RAX = guest_NRADDR */ \
+ "xchgq %%rcx,%%rcx" \
+ : "=a" (__addr) \
+ : \
+ : "cc", "memory" \
+ ); \
+ _zzq_orig->nraddr = __addr; \
+ }
+
+#define VALGRIND_CALL_NOREDIR_RAX \
+ __SPECIAL_INSTRUCTION_PREAMBLE \
+ /* call-noredir *%RAX */ \
+ "xchgq %%rdx,%%rdx\n\t"
+#endif /* PLAT_amd64_linux */
+
+/* ------------------------ ppc32-linux ------------------------ */
+
+#if defined(PLAT_ppc32_linux)
+
+typedef
+ struct {
+ unsigned int nraddr; /* where's the code? */
+ }
+ OrigFn;
+
+#define __SPECIAL_INSTRUCTION_PREAMBLE \
+ "rlwinm 0,0,3,0,0 ; rlwinm 0,0,13,0,0\n\t" \
+ "rlwinm 0,0,29,0,0 ; rlwinm 0,0,19,0,0\n\t"
+
+#define VALGRIND_DO_CLIENT_REQUEST( \
+ _zzq_rlval, _zzq_default, _zzq_request, \
+ _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5) \
+ \
+ { unsigned int _zzq_args[6]; \
+ unsigned int _zzq_result; \
+ unsigned int* _zzq_ptr; \
+ _zzq_args[0] = (unsigned int)(_zzq_request); \
+ _zzq_args[1] = (unsigned int)(_zzq_arg1); \
+ _zzq_args[2] = (unsigned int)(_zzq_arg2); \
+ _zzq_args[3] = (unsigned int)(_zzq_arg3); \
+ _zzq_args[4] = (unsigned int)(_zzq_arg4); \
+ _zzq_args[5] = (unsigned int)(_zzq_arg5); \
+ _zzq_ptr = _zzq_args; \
+ __asm__ volatile("mr 3,%1\n\t" /*default*/ \
+ "mr 4,%2\n\t" /*ptr*/ \
+ __SPECIAL_INSTRUCTION_PREAMBLE \
+ /* %R3 = client_request ( %R4 ) */ \
+ "or 1,1,1\n\t" \
+ "mr %0,3" /*result*/ \
+ : "=b" (_zzq_result) \
+ : "b" (_zzq_default), "b" (_zzq_ptr) \
+ : "cc", "memory", "r3", "r4"); \
+ _zzq_rlval = _zzq_result; \
+ }
+
+#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval) \
+ { volatile OrigFn* _zzq_orig = &(_zzq_rlval); \
+ unsigned int __addr; \
+ __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \
+ /* %R3 = guest_NRADDR */ \
+ "or 2,2,2\n\t" \
+ "mr %0,3" \
+ : "=b" (__addr) \
+ : \
+ : "cc", "memory", "r3" \
+ ); \
+ _zzq_orig->nraddr = __addr; \
+ }
+
+#define VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
+ __SPECIAL_INSTRUCTION_PREAMBLE \
+ /* branch-and-link-to-noredir *%R11 */ \
+ "or 3,3,3\n\t"
+#endif /* PLAT_ppc32_linux */
+
+/* ------------------------ ppc64-linux ------------------------ */
+
+#if defined(PLAT_ppc64_linux)
+
+typedef
+ struct {
+ unsigned long long int nraddr; /* where's the code? */
+ unsigned long long int r2; /* what tocptr do we need? */
+ }
+ OrigFn;
+
+#define __SPECIAL_INSTRUCTION_PREAMBLE \
+ "rotldi 0,0,3 ; rotldi 0,0,13\n\t" \
+ "rotldi 0,0,61 ; rotldi 0,0,51\n\t"
+
+#define VALGRIND_DO_CLIENT_REQUEST( \
+ _zzq_rlval, _zzq_default, _zzq_request, \
+ _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5) \
+ \
+ { unsigned long long int _zzq_args[6]; \
+ register unsigned long long int _zzq_result __asm__("r3"); \
+ register unsigned long long int* _zzq_ptr __asm__("r4"); \
+ _zzq_args[0] = (unsigned long long int)(_zzq_request); \
+ _zzq_args[1] = (unsigned long long int)(_zzq_arg1); \
+ _zzq_args[2] = (unsigned long long int)(_zzq_arg2); \
+ _zzq_args[3] = (unsigned long long int)(_zzq_arg3); \
+ _zzq_args[4] = (unsigned long long int)(_zzq_arg4); \
+ _zzq_args[5] = (unsigned long long int)(_zzq_arg5); \
+ _zzq_ptr = _zzq_args; \
+ __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \
+ /* %R3 = client_request ( %R4 ) */ \
+ "or 1,1,1" \
+ : "=r" (_zzq_result) \
+ : "0" (_zzq_default), "r" (_zzq_ptr) \
+ : "cc", "memory"); \
+ _zzq_rlval = _zzq_result; \
+ }
+
+#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval) \
+ { volatile OrigFn* _zzq_orig = &(_zzq_rlval); \
+ register unsigned long long int __addr __asm__("r3"); \
+ __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \
+ /* %R3 = guest_NRADDR */ \
+ "or 2,2,2" \
+ : "=r" (__addr) \
+ : \
+ : "cc", "memory" \
+ ); \
+ _zzq_orig->nraddr = __addr; \
+ __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \
+ /* %R3 = guest_NRADDR_GPR2 */ \
+ "or 4,4,4" \
+ : "=r" (__addr) \
+ : \
+ : "cc", "memory" \
+ ); \
+ _zzq_orig->r2 = __addr; \
+ }
+
+#define VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
+ __SPECIAL_INSTRUCTION_PREAMBLE \
+ /* branch-and-link-to-noredir *%R11 */ \
+ "or 3,3,3\n\t"
+
+#endif /* PLAT_ppc64_linux */
+
+/* ------------------------ ppc32-aix5 ------------------------- */
+
+#if defined(PLAT_ppc32_aix5)
+
+typedef
+ struct {
+ unsigned int nraddr; /* where's the code? */
+ unsigned int r2; /* what tocptr do we need? */
+ }
+ OrigFn;
+
+#define __SPECIAL_INSTRUCTION_PREAMBLE \
+ "rlwinm 0,0,3,0,0 ; rlwinm 0,0,13,0,0\n\t" \
+ "rlwinm 0,0,29,0,0 ; rlwinm 0,0,19,0,0\n\t"
+
+#define VALGRIND_DO_CLIENT_REQUEST( \
+ _zzq_rlval, _zzq_default, _zzq_request, \
+ _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5) \
+ \
+ { unsigned int _zzq_args[7]; \
+ register unsigned int _zzq_result; \
+ register unsigned int* _zzq_ptr; \
+ _zzq_args[0] = (unsigned int)(_zzq_request); \
+ _zzq_args[1] = (unsigned int)(_zzq_arg1); \
+ _zzq_args[2] = (unsigned int)(_zzq_arg2); \
+ _zzq_args[3] = (unsigned int)(_zzq_arg3); \
+ _zzq_args[4] = (unsigned int)(_zzq_arg4); \
+ _zzq_args[5] = (unsigned int)(_zzq_arg5); \
+ _zzq_args[6] = (unsigned int)(_zzq_default); \
+ _zzq_ptr = _zzq_args; \
+ __asm__ volatile("mr 4,%1\n\t" \
+ "lwz 3, 24(4)\n\t" \
+ __SPECIAL_INSTRUCTION_PREAMBLE \
+ /* %R3 = client_request ( %R4 ) */ \
+ "or 1,1,1\n\t" \
+ "mr %0,3" \
+ : "=b" (_zzq_result) \
+ : "b" (_zzq_ptr) \
+ : "r3", "r4", "cc", "memory"); \
+ _zzq_rlval = _zzq_result; \
+ }
+
+#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval) \
+ { volatile OrigFn* _zzq_orig = &(_zzq_rlval); \
+ register unsigned int __addr; \
+ __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \
+ /* %R3 = guest_NRADDR */ \
+ "or 2,2,2\n\t" \
+ "mr %0,3" \
+ : "=b" (__addr) \
+ : \
+ : "r3", "cc", "memory" \
+ ); \
+ _zzq_orig->nraddr = __addr; \
+ __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \
+ /* %R3 = guest_NRADDR_GPR2 */ \
+ "or 4,4,4\n\t" \
+ "mr %0,3" \
+ : "=b" (__addr) \
+ : \
+ : "r3", "cc", "memory" \
+ ); \
+ _zzq_orig->r2 = __addr; \
+ }
+
+#define VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
+ __SPECIAL_INSTRUCTION_PREAMBLE \
+ /* branch-and-link-to-noredir *%R11 */ \
+ "or 3,3,3\n\t"
+
+#endif /* PLAT_ppc32_aix5 */
+
+/* ------------------------ ppc64-aix5 ------------------------- */
+
+#if defined(PLAT_ppc64_aix5)
+
+typedef
+ struct {
+ unsigned long long int nraddr; /* where's the code? */
+ unsigned long long int r2; /* what tocptr do we need? */
+ }
+ OrigFn;
+
+#define __SPECIAL_INSTRUCTION_PREAMBLE \
+ "rotldi 0,0,3 ; rotldi 0,0,13\n\t" \
+ "rotldi 0,0,61 ; rotldi 0,0,51\n\t"
+
+#define VALGRIND_DO_CLIENT_REQUEST( \
+ _zzq_rlval, _zzq_default, _zzq_request, \
+ _zzq_arg1, _zzq_arg2, _zzq_arg3, _zzq_arg4, _zzq_arg5) \
+ \
+ { unsigned long long int _zzq_args[7]; \
+ register unsigned long long int _zzq_result; \
+ register unsigned long long int* _zzq_ptr; \
+ _zzq_args[0] = (unsigned int long long)(_zzq_request); \
+ _zzq_args[1] = (unsigned int long long)(_zzq_arg1); \
+ _zzq_args[2] = (unsigned int long long)(_zzq_arg2); \
+ _zzq_args[3] = (unsigned int long long)(_zzq_arg3); \
+ _zzq_args[4] = (unsigned int long long)(_zzq_arg4); \
+ _zzq_args[5] = (unsigned int long long)(_zzq_arg5); \
+ _zzq_args[6] = (unsigned int long long)(_zzq_default); \
+ _zzq_ptr = _zzq_args; \
+ __asm__ volatile("mr 4,%1\n\t" \
+ "ld 3, 48(4)\n\t" \
+ __SPECIAL_INSTRUCTION_PREAMBLE \
+ /* %R3 = client_request ( %R4 ) */ \
+ "or 1,1,1\n\t" \
+ "mr %0,3" \
+ : "=b" (_zzq_result) \
+ : "b" (_zzq_ptr) \
+ : "r3", "r4", "cc", "memory"); \
+ _zzq_rlval = _zzq_result; \
+ }
+
+#define VALGRIND_GET_NR_CONTEXT(_zzq_rlval) \
+ { volatile OrigFn* _zzq_orig = &(_zzq_rlval); \
+ register unsigned long long int __addr; \
+ __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \
+ /* %R3 = guest_NRADDR */ \
+ "or 2,2,2\n\t" \
+ "mr %0,3" \
+ : "=b" (__addr) \
+ : \
+ : "r3", "cc", "memory" \
+ ); \
+ _zzq_orig->nraddr = __addr; \
+ __asm__ volatile(__SPECIAL_INSTRUCTION_PREAMBLE \
+ /* %R3 = guest_NRADDR_GPR2 */ \
+ "or 4,4,4\n\t" \
+ "mr %0,3" \
+ : "=b" (__addr) \
+ : \
+ : "r3", "cc", "memory" \
+ ); \
+ _zzq_orig->r2 = __addr; \
+ }
+
+#define VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
+ __SPECIAL_INSTRUCTION_PREAMBLE \
+ /* branch-and-link-to-noredir *%R11 */ \
+ "or 3,3,3\n\t"
+
+#endif /* PLAT_ppc64_aix5 */
+
+/* Insert assembly code for other platforms here... */
+
+#endif /* NVALGRIND */
+
+
+/* ------------------------------------------------------------------ */
+/* PLATFORM SPECIFICS for FUNCTION WRAPPING. This is all very */
+/* ugly. It's the least-worst tradeoff I can think of. */
+/* ------------------------------------------------------------------ */
+
+/* This section defines magic (a.k.a appalling-hack) macros for doing
+ guaranteed-no-redirection macros, so as to get from function
+ wrappers to the functions they are wrapping. The whole point is to
+ construct standard call sequences, but to do the call itself with a
+ special no-redirect call pseudo-instruction that the JIT
+ understands and handles specially. This section is long and
+ repetitious, and I can't see a way to make it shorter.
+
+ The naming scheme is as follows:
+
+ CALL_FN_{W,v}_{v,W,WW,WWW,WWWW,5W,6W,7W,etc}
+
+ 'W' stands for "word" and 'v' for "void". Hence there are
+ different macros for calling arity 0, 1, 2, 3, 4, etc, functions,
+ and for each, the possibility of returning a word-typed result, or
+ no result.
+*/
+
+/* Use these to write the name of your wrapper. NOTE: duplicates
+ VG_WRAP_FUNCTION_Z{U,Z} in pub_tool_redir.h. */
+
+#define I_WRAP_SONAME_FNNAME_ZU(soname,fnname) \
+ _vgwZU_##soname##_##fnname
+
+#define I_WRAP_SONAME_FNNAME_ZZ(soname,fnname) \
+ _vgwZZ_##soname##_##fnname
+
+/* Use this macro from within a wrapper function to collect the
+ context (address and possibly other info) of the original function.
+ Once you have that you can then use it in one of the CALL_FN_
+ macros. The type of the argument _lval is OrigFn. */
+#define VALGRIND_GET_ORIG_FN(_lval) VALGRIND_GET_NR_CONTEXT(_lval)
+
+/* Derivatives of the main macros below, for calling functions
+ returning void. */
+
+#define CALL_FN_v_v(fnptr) \
+ do { volatile unsigned long _junk; \
+ CALL_FN_W_v(_junk,fnptr); } while (0)
+
+#define CALL_FN_v_W(fnptr, arg1) \
+ do { volatile unsigned long _junk; \
+ CALL_FN_W_W(_junk,fnptr,arg1); } while (0)
+
+#define CALL_FN_v_WW(fnptr, arg1,arg2) \
+ do { volatile unsigned long _junk; \
+ CALL_FN_W_WW(_junk,fnptr,arg1,arg2); } while (0)
+
+#define CALL_FN_v_WWW(fnptr, arg1,arg2,arg3) \
+ do { volatile unsigned long _junk; \
+ CALL_FN_W_WWW(_junk,fnptr,arg1,arg2,arg3); } while (0)
+
+/* ------------------------- x86-linux ------------------------- */
+
+#if defined(PLAT_x86_linux)
+
+/* These regs are trashed by the hidden call. No need to mention eax
+ as gcc can already see that, plus causes gcc to bomb. */
+#define __CALLER_SAVED_REGS /*"eax"*/ "ecx", "edx"
+
+/* These CALL_FN_ macros assume that on x86-linux, sizeof(unsigned
+ long) == 4. */
+
+#define CALL_FN_W_v(lval, orig) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[1]; \
+ volatile unsigned long _res; \
+ _argvec[0] = (unsigned long)_orig.nraddr; \
+ __asm__ volatile( \
+ "movl (%%eax), %%eax\n\t" /* target->%eax */ \
+ VALGRIND_CALL_NOREDIR_EAX \
+ : /*out*/ "=a" (_res) \
+ : /*in*/ "a" (&_argvec[0]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_W(lval, orig, arg1) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[2]; \
+ volatile unsigned long _res; \
+ _argvec[0] = (unsigned long)_orig.nraddr; \
+ _argvec[1] = (unsigned long)(arg1); \
+ __asm__ volatile( \
+ "pushl 4(%%eax)\n\t" \
+ "movl (%%eax), %%eax\n\t" /* target->%eax */ \
+ VALGRIND_CALL_NOREDIR_EAX \
+ "addl $4, %%esp\n" \
+ : /*out*/ "=a" (_res) \
+ : /*in*/ "a" (&_argvec[0]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_WW(lval, orig, arg1,arg2) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[3]; \
+ volatile unsigned long _res; \
+ _argvec[0] = (unsigned long)_orig.nraddr; \
+ _argvec[1] = (unsigned long)(arg1); \
+ _argvec[2] = (unsigned long)(arg2); \
+ __asm__ volatile( \
+ "pushl 8(%%eax)\n\t" \
+ "pushl 4(%%eax)\n\t" \
+ "movl (%%eax), %%eax\n\t" /* target->%eax */ \
+ VALGRIND_CALL_NOREDIR_EAX \
+ "addl $8, %%esp\n" \
+ : /*out*/ "=a" (_res) \
+ : /*in*/ "a" (&_argvec[0]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_WWW(lval, orig, arg1,arg2,arg3) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[4]; \
+ volatile unsigned long _res; \
+ _argvec[0] = (unsigned long)_orig.nraddr; \
+ _argvec[1] = (unsigned long)(arg1); \
+ _argvec[2] = (unsigned long)(arg2); \
+ _argvec[3] = (unsigned long)(arg3); \
+ __asm__ volatile( \
+ "pushl 12(%%eax)\n\t" \
+ "pushl 8(%%eax)\n\t" \
+ "pushl 4(%%eax)\n\t" \
+ "movl (%%eax), %%eax\n\t" /* target->%eax */ \
+ VALGRIND_CALL_NOREDIR_EAX \
+ "addl $12, %%esp\n" \
+ : /*out*/ "=a" (_res) \
+ : /*in*/ "a" (&_argvec[0]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_WWWW(lval, orig, arg1,arg2,arg3,arg4) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[5]; \
+ volatile unsigned long _res; \
+ _argvec[0] = (unsigned long)_orig.nraddr; \
+ _argvec[1] = (unsigned long)(arg1); \
+ _argvec[2] = (unsigned long)(arg2); \
+ _argvec[3] = (unsigned long)(arg3); \
+ _argvec[4] = (unsigned long)(arg4); \
+ __asm__ volatile( \
+ "pushl 16(%%eax)\n\t" \
+ "pushl 12(%%eax)\n\t" \
+ "pushl 8(%%eax)\n\t" \
+ "pushl 4(%%eax)\n\t" \
+ "movl (%%eax), %%eax\n\t" /* target->%eax */ \
+ VALGRIND_CALL_NOREDIR_EAX \
+ "addl $16, %%esp\n" \
+ : /*out*/ "=a" (_res) \
+ : /*in*/ "a" (&_argvec[0]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_5W(lval, orig, arg1,arg2,arg3,arg4,arg5) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[6]; \
+ volatile unsigned long _res; \
+ _argvec[0] = (unsigned long)_orig.nraddr; \
+ _argvec[1] = (unsigned long)(arg1); \
+ _argvec[2] = (unsigned long)(arg2); \
+ _argvec[3] = (unsigned long)(arg3); \
+ _argvec[4] = (unsigned long)(arg4); \
+ _argvec[5] = (unsigned long)(arg5); \
+ __asm__ volatile( \
+ "pushl 20(%%eax)\n\t" \
+ "pushl 16(%%eax)\n\t" \
+ "pushl 12(%%eax)\n\t" \
+ "pushl 8(%%eax)\n\t" \
+ "pushl 4(%%eax)\n\t" \
+ "movl (%%eax), %%eax\n\t" /* target->%eax */ \
+ VALGRIND_CALL_NOREDIR_EAX \
+ "addl $20, %%esp\n" \
+ : /*out*/ "=a" (_res) \
+ : /*in*/ "a" (&_argvec[0]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_6W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[7]; \
+ volatile unsigned long _res; \
+ _argvec[0] = (unsigned long)_orig.nraddr; \
+ _argvec[1] = (unsigned long)(arg1); \
+ _argvec[2] = (unsigned long)(arg2); \
+ _argvec[3] = (unsigned long)(arg3); \
+ _argvec[4] = (unsigned long)(arg4); \
+ _argvec[5] = (unsigned long)(arg5); \
+ _argvec[6] = (unsigned long)(arg6); \
+ __asm__ volatile( \
+ "pushl 24(%%eax)\n\t" \
+ "pushl 20(%%eax)\n\t" \
+ "pushl 16(%%eax)\n\t" \
+ "pushl 12(%%eax)\n\t" \
+ "pushl 8(%%eax)\n\t" \
+ "pushl 4(%%eax)\n\t" \
+ "movl (%%eax), %%eax\n\t" /* target->%eax */ \
+ VALGRIND_CALL_NOREDIR_EAX \
+ "addl $24, %%esp\n" \
+ : /*out*/ "=a" (_res) \
+ : /*in*/ "a" (&_argvec[0]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_7W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \
+ arg7) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[8]; \
+ volatile unsigned long _res; \
+ _argvec[0] = (unsigned long)_orig.nraddr; \
+ _argvec[1] = (unsigned long)(arg1); \
+ _argvec[2] = (unsigned long)(arg2); \
+ _argvec[3] = (unsigned long)(arg3); \
+ _argvec[4] = (unsigned long)(arg4); \
+ _argvec[5] = (unsigned long)(arg5); \
+ _argvec[6] = (unsigned long)(arg6); \
+ _argvec[7] = (unsigned long)(arg7); \
+ __asm__ volatile( \
+ "pushl 28(%%eax)\n\t" \
+ "pushl 24(%%eax)\n\t" \
+ "pushl 20(%%eax)\n\t" \
+ "pushl 16(%%eax)\n\t" \
+ "pushl 12(%%eax)\n\t" \
+ "pushl 8(%%eax)\n\t" \
+ "pushl 4(%%eax)\n\t" \
+ "movl (%%eax), %%eax\n\t" /* target->%eax */ \
+ VALGRIND_CALL_NOREDIR_EAX \
+ "addl $28, %%esp\n" \
+ : /*out*/ "=a" (_res) \
+ : /*in*/ "a" (&_argvec[0]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_8W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \
+ arg7,arg8) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[9]; \
+ volatile unsigned long _res; \
+ _argvec[0] = (unsigned long)_orig.nraddr; \
+ _argvec[1] = (unsigned long)(arg1); \
+ _argvec[2] = (unsigned long)(arg2); \
+ _argvec[3] = (unsigned long)(arg3); \
+ _argvec[4] = (unsigned long)(arg4); \
+ _argvec[5] = (unsigned long)(arg5); \
+ _argvec[6] = (unsigned long)(arg6); \
+ _argvec[7] = (unsigned long)(arg7); \
+ _argvec[8] = (unsigned long)(arg8); \
+ __asm__ volatile( \
+ "pushl 32(%%eax)\n\t" \
+ "pushl 28(%%eax)\n\t" \
+ "pushl 24(%%eax)\n\t" \
+ "pushl 20(%%eax)\n\t" \
+ "pushl 16(%%eax)\n\t" \
+ "pushl 12(%%eax)\n\t" \
+ "pushl 8(%%eax)\n\t" \
+ "pushl 4(%%eax)\n\t" \
+ "movl (%%eax), %%eax\n\t" /* target->%eax */ \
+ VALGRIND_CALL_NOREDIR_EAX \
+ "addl $32, %%esp\n" \
+ : /*out*/ "=a" (_res) \
+ : /*in*/ "a" (&_argvec[0]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_9W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \
+ arg7,arg8,arg9) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[10]; \
+ volatile unsigned long _res; \
+ _argvec[0] = (unsigned long)_orig.nraddr; \
+ _argvec[1] = (unsigned long)(arg1); \
+ _argvec[2] = (unsigned long)(arg2); \
+ _argvec[3] = (unsigned long)(arg3); \
+ _argvec[4] = (unsigned long)(arg4); \
+ _argvec[5] = (unsigned long)(arg5); \
+ _argvec[6] = (unsigned long)(arg6); \
+ _argvec[7] = (unsigned long)(arg7); \
+ _argvec[8] = (unsigned long)(arg8); \
+ _argvec[9] = (unsigned long)(arg9); \
+ __asm__ volatile( \
+ "pushl 36(%%eax)\n\t" \
+ "pushl 32(%%eax)\n\t" \
+ "pushl 28(%%eax)\n\t" \
+ "pushl 24(%%eax)\n\t" \
+ "pushl 20(%%eax)\n\t" \
+ "pushl 16(%%eax)\n\t" \
+ "pushl 12(%%eax)\n\t" \
+ "pushl 8(%%eax)\n\t" \
+ "pushl 4(%%eax)\n\t" \
+ "movl (%%eax), %%eax\n\t" /* target->%eax */ \
+ VALGRIND_CALL_NOREDIR_EAX \
+ "addl $36, %%esp\n" \
+ : /*out*/ "=a" (_res) \
+ : /*in*/ "a" (&_argvec[0]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_10W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \
+ arg7,arg8,arg9,arg10) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[11]; \
+ volatile unsigned long _res; \
+ _argvec[0] = (unsigned long)_orig.nraddr; \
+ _argvec[1] = (unsigned long)(arg1); \
+ _argvec[2] = (unsigned long)(arg2); \
+ _argvec[3] = (unsigned long)(arg3); \
+ _argvec[4] = (unsigned long)(arg4); \
+ _argvec[5] = (unsigned long)(arg5); \
+ _argvec[6] = (unsigned long)(arg6); \
+ _argvec[7] = (unsigned long)(arg7); \
+ _argvec[8] = (unsigned long)(arg8); \
+ _argvec[9] = (unsigned long)(arg9); \
+ _argvec[10] = (unsigned long)(arg10); \
+ __asm__ volatile( \
+ "pushl 40(%%eax)\n\t" \
+ "pushl 36(%%eax)\n\t" \
+ "pushl 32(%%eax)\n\t" \
+ "pushl 28(%%eax)\n\t" \
+ "pushl 24(%%eax)\n\t" \
+ "pushl 20(%%eax)\n\t" \
+ "pushl 16(%%eax)\n\t" \
+ "pushl 12(%%eax)\n\t" \
+ "pushl 8(%%eax)\n\t" \
+ "pushl 4(%%eax)\n\t" \
+ "movl (%%eax), %%eax\n\t" /* target->%eax */ \
+ VALGRIND_CALL_NOREDIR_EAX \
+ "addl $40, %%esp\n" \
+ : /*out*/ "=a" (_res) \
+ : /*in*/ "a" (&_argvec[0]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_11W(lval, orig, arg1,arg2,arg3,arg4,arg5, \
+ arg6,arg7,arg8,arg9,arg10, \
+ arg11) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[12]; \
+ volatile unsigned long _res; \
+ _argvec[0] = (unsigned long)_orig.nraddr; \
+ _argvec[1] = (unsigned long)(arg1); \
+ _argvec[2] = (unsigned long)(arg2); \
+ _argvec[3] = (unsigned long)(arg3); \
+ _argvec[4] = (unsigned long)(arg4); \
+ _argvec[5] = (unsigned long)(arg5); \
+ _argvec[6] = (unsigned long)(arg6); \
+ _argvec[7] = (unsigned long)(arg7); \
+ _argvec[8] = (unsigned long)(arg8); \
+ _argvec[9] = (unsigned long)(arg9); \
+ _argvec[10] = (unsigned long)(arg10); \
+ _argvec[11] = (unsigned long)(arg11); \
+ __asm__ volatile( \
+ "pushl 44(%%eax)\n\t" \
+ "pushl 40(%%eax)\n\t" \
+ "pushl 36(%%eax)\n\t" \
+ "pushl 32(%%eax)\n\t" \
+ "pushl 28(%%eax)\n\t" \
+ "pushl 24(%%eax)\n\t" \
+ "pushl 20(%%eax)\n\t" \
+ "pushl 16(%%eax)\n\t" \
+ "pushl 12(%%eax)\n\t" \
+ "pushl 8(%%eax)\n\t" \
+ "pushl 4(%%eax)\n\t" \
+ "movl (%%eax), %%eax\n\t" /* target->%eax */ \
+ VALGRIND_CALL_NOREDIR_EAX \
+ "addl $44, %%esp\n" \
+ : /*out*/ "=a" (_res) \
+ : /*in*/ "a" (&_argvec[0]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_12W(lval, orig, arg1,arg2,arg3,arg4,arg5, \
+ arg6,arg7,arg8,arg9,arg10, \
+ arg11,arg12) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[13]; \
+ volatile unsigned long _res; \
+ _argvec[0] = (unsigned long)_orig.nraddr; \
+ _argvec[1] = (unsigned long)(arg1); \
+ _argvec[2] = (unsigned long)(arg2); \
+ _argvec[3] = (unsigned long)(arg3); \
+ _argvec[4] = (unsigned long)(arg4); \
+ _argvec[5] = (unsigned long)(arg5); \
+ _argvec[6] = (unsigned long)(arg6); \
+ _argvec[7] = (unsigned long)(arg7); \
+ _argvec[8] = (unsigned long)(arg8); \
+ _argvec[9] = (unsigned long)(arg9); \
+ _argvec[10] = (unsigned long)(arg10); \
+ _argvec[11] = (unsigned long)(arg11); \
+ _argvec[12] = (unsigned long)(arg12); \
+ __asm__ volatile( \
+ "pushl 48(%%eax)\n\t" \
+ "pushl 44(%%eax)\n\t" \
+ "pushl 40(%%eax)\n\t" \
+ "pushl 36(%%eax)\n\t" \
+ "pushl 32(%%eax)\n\t" \
+ "pushl 28(%%eax)\n\t" \
+ "pushl 24(%%eax)\n\t" \
+ "pushl 20(%%eax)\n\t" \
+ "pushl 16(%%eax)\n\t" \
+ "pushl 12(%%eax)\n\t" \
+ "pushl 8(%%eax)\n\t" \
+ "pushl 4(%%eax)\n\t" \
+ "movl (%%eax), %%eax\n\t" /* target->%eax */ \
+ VALGRIND_CALL_NOREDIR_EAX \
+ "addl $48, %%esp\n" \
+ : /*out*/ "=a" (_res) \
+ : /*in*/ "a" (&_argvec[0]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#endif /* PLAT_x86_linux */
+
+/* ------------------------ amd64-linux ------------------------ */
+
+#if defined(PLAT_amd64_linux)
+
+/* ARGREGS: rdi rsi rdx rcx r8 r9 (the rest on stack in R-to-L order) */
+
+/* These regs are trashed by the hidden call. */
+#define __CALLER_SAVED_REGS /*"rax",*/ "rcx", "rdx", "rsi", \
+ "rdi", "r8", "r9", "r10", "r11"
+
+/* These CALL_FN_ macros assume that on amd64-linux, sizeof(unsigned
+ long) == 8. */
+
+/* NB 9 Sept 07. There is a nasty kludge here in all these CALL_FN_
+ macros. In order not to trash the stack redzone, we need to drop
+ %rsp by 128 before the hidden call, and restore afterwards. The
+ nastyness is that it is only by luck that the stack still appears
+ to be unwindable during the hidden call - since then the behaviour
+ of any routine using this macro does not match what the CFI data
+ says. Sigh.
+
+ Why is this important? Imagine that a wrapper has a stack
+ allocated local, and passes to the hidden call, a pointer to it.
+ Because gcc does not know about the hidden call, it may allocate
+ that local in the redzone. Unfortunately the hidden call may then
+ trash it before it comes to use it. So we must step clear of the
+ redzone, for the duration of the hidden call, to make it safe.
+
+ Probably the same problem afflicts the other redzone-style ABIs too
+ (ppc64-linux, ppc32-aix5, ppc64-aix5); but for those, the stack is
+ self describing (none of this CFI nonsense) so at least messing
+ with the stack pointer doesn't give a danger of non-unwindable
+ stack. */
+
+#define CALL_FN_W_v(lval, orig) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[1]; \
+ volatile unsigned long _res; \
+ _argvec[0] = (unsigned long)_orig.nraddr; \
+ __asm__ volatile( \
+ "subq $128,%%rsp\n\t" \
+ "movq (%%rax), %%rax\n\t" /* target->%rax */ \
+ VALGRIND_CALL_NOREDIR_RAX \
+ "addq $128,%%rsp\n\t" \
+ : /*out*/ "=a" (_res) \
+ : /*in*/ "a" (&_argvec[0]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_W(lval, orig, arg1) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[2]; \
+ volatile unsigned long _res; \
+ _argvec[0] = (unsigned long)_orig.nraddr; \
+ _argvec[1] = (unsigned long)(arg1); \
+ __asm__ volatile( \
+ "subq $128,%%rsp\n\t" \
+ "movq 8(%%rax), %%rdi\n\t" \
+ "movq (%%rax), %%rax\n\t" /* target->%rax */ \
+ VALGRIND_CALL_NOREDIR_RAX \
+ "addq $128,%%rsp\n\t" \
+ : /*out*/ "=a" (_res) \
+ : /*in*/ "a" (&_argvec[0]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_WW(lval, orig, arg1,arg2) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[3]; \
+ volatile unsigned long _res; \
+ _argvec[0] = (unsigned long)_orig.nraddr; \
+ _argvec[1] = (unsigned long)(arg1); \
+ _argvec[2] = (unsigned long)(arg2); \
+ __asm__ volatile( \
+ "subq $128,%%rsp\n\t" \
+ "movq 16(%%rax), %%rsi\n\t" \
+ "movq 8(%%rax), %%rdi\n\t" \
+ "movq (%%rax), %%rax\n\t" /* target->%rax */ \
+ VALGRIND_CALL_NOREDIR_RAX \
+ "addq $128,%%rsp\n\t" \
+ : /*out*/ "=a" (_res) \
+ : /*in*/ "a" (&_argvec[0]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_WWW(lval, orig, arg1,arg2,arg3) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[4]; \
+ volatile unsigned long _res; \
+ _argvec[0] = (unsigned long)_orig.nraddr; \
+ _argvec[1] = (unsigned long)(arg1); \
+ _argvec[2] = (unsigned long)(arg2); \
+ _argvec[3] = (unsigned long)(arg3); \
+ __asm__ volatile( \
+ "subq $128,%%rsp\n\t" \
+ "movq 24(%%rax), %%rdx\n\t" \
+ "movq 16(%%rax), %%rsi\n\t" \
+ "movq 8(%%rax), %%rdi\n\t" \
+ "movq (%%rax), %%rax\n\t" /* target->%rax */ \
+ VALGRIND_CALL_NOREDIR_RAX \
+ "addq $128,%%rsp\n\t" \
+ : /*out*/ "=a" (_res) \
+ : /*in*/ "a" (&_argvec[0]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_WWWW(lval, orig, arg1,arg2,arg3,arg4) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[5]; \
+ volatile unsigned long _res; \
+ _argvec[0] = (unsigned long)_orig.nraddr; \
+ _argvec[1] = (unsigned long)(arg1); \
+ _argvec[2] = (unsigned long)(arg2); \
+ _argvec[3] = (unsigned long)(arg3); \
+ _argvec[4] = (unsigned long)(arg4); \
+ __asm__ volatile( \
+ "subq $128,%%rsp\n\t" \
+ "movq 32(%%rax), %%rcx\n\t" \
+ "movq 24(%%rax), %%rdx\n\t" \
+ "movq 16(%%rax), %%rsi\n\t" \
+ "movq 8(%%rax), %%rdi\n\t" \
+ "movq (%%rax), %%rax\n\t" /* target->%rax */ \
+ VALGRIND_CALL_NOREDIR_RAX \
+ "addq $128,%%rsp\n\t" \
+ : /*out*/ "=a" (_res) \
+ : /*in*/ "a" (&_argvec[0]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_5W(lval, orig, arg1,arg2,arg3,arg4,arg5) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[6]; \
+ volatile unsigned long _res; \
+ _argvec[0] = (unsigned long)_orig.nraddr; \
+ _argvec[1] = (unsigned long)(arg1); \
+ _argvec[2] = (unsigned long)(arg2); \
+ _argvec[3] = (unsigned long)(arg3); \
+ _argvec[4] = (unsigned long)(arg4); \
+ _argvec[5] = (unsigned long)(arg5); \
+ __asm__ volatile( \
+ "subq $128,%%rsp\n\t" \
+ "movq 40(%%rax), %%r8\n\t" \
+ "movq 32(%%rax), %%rcx\n\t" \
+ "movq 24(%%rax), %%rdx\n\t" \
+ "movq 16(%%rax), %%rsi\n\t" \
+ "movq 8(%%rax), %%rdi\n\t" \
+ "movq (%%rax), %%rax\n\t" /* target->%rax */ \
+ VALGRIND_CALL_NOREDIR_RAX \
+ "addq $128,%%rsp\n\t" \
+ : /*out*/ "=a" (_res) \
+ : /*in*/ "a" (&_argvec[0]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_6W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[7]; \
+ volatile unsigned long _res; \
+ _argvec[0] = (unsigned long)_orig.nraddr; \
+ _argvec[1] = (unsigned long)(arg1); \
+ _argvec[2] = (unsigned long)(arg2); \
+ _argvec[3] = (unsigned long)(arg3); \
+ _argvec[4] = (unsigned long)(arg4); \
+ _argvec[5] = (unsigned long)(arg5); \
+ _argvec[6] = (unsigned long)(arg6); \
+ __asm__ volatile( \
+ "subq $128,%%rsp\n\t" \
+ "movq 48(%%rax), %%r9\n\t" \
+ "movq 40(%%rax), %%r8\n\t" \
+ "movq 32(%%rax), %%rcx\n\t" \
+ "movq 24(%%rax), %%rdx\n\t" \
+ "movq 16(%%rax), %%rsi\n\t" \
+ "movq 8(%%rax), %%rdi\n\t" \
+ "movq (%%rax), %%rax\n\t" /* target->%rax */ \
+ "addq $128,%%rsp\n\t" \
+ VALGRIND_CALL_NOREDIR_RAX \
+ : /*out*/ "=a" (_res) \
+ : /*in*/ "a" (&_argvec[0]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_7W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \
+ arg7) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[8]; \
+ volatile unsigned long _res; \
+ _argvec[0] = (unsigned long)_orig.nraddr; \
+ _argvec[1] = (unsigned long)(arg1); \
+ _argvec[2] = (unsigned long)(arg2); \
+ _argvec[3] = (unsigned long)(arg3); \
+ _argvec[4] = (unsigned long)(arg4); \
+ _argvec[5] = (unsigned long)(arg5); \
+ _argvec[6] = (unsigned long)(arg6); \
+ _argvec[7] = (unsigned long)(arg7); \
+ __asm__ volatile( \
+ "subq $128,%%rsp\n\t" \
+ "pushq 56(%%rax)\n\t" \
+ "movq 48(%%rax), %%r9\n\t" \
+ "movq 40(%%rax), %%r8\n\t" \
+ "movq 32(%%rax), %%rcx\n\t" \
+ "movq 24(%%rax), %%rdx\n\t" \
+ "movq 16(%%rax), %%rsi\n\t" \
+ "movq 8(%%rax), %%rdi\n\t" \
+ "movq (%%rax), %%rax\n\t" /* target->%rax */ \
+ VALGRIND_CALL_NOREDIR_RAX \
+ "addq $8, %%rsp\n" \
+ "addq $128,%%rsp\n\t" \
+ : /*out*/ "=a" (_res) \
+ : /*in*/ "a" (&_argvec[0]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_8W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \
+ arg7,arg8) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[9]; \
+ volatile unsigned long _res; \
+ _argvec[0] = (unsigned long)_orig.nraddr; \
+ _argvec[1] = (unsigned long)(arg1); \
+ _argvec[2] = (unsigned long)(arg2); \
+ _argvec[3] = (unsigned long)(arg3); \
+ _argvec[4] = (unsigned long)(arg4); \
+ _argvec[5] = (unsigned long)(arg5); \
+ _argvec[6] = (unsigned long)(arg6); \
+ _argvec[7] = (unsigned long)(arg7); \
+ _argvec[8] = (unsigned long)(arg8); \
+ __asm__ volatile( \
+ "subq $128,%%rsp\n\t" \
+ "pushq 64(%%rax)\n\t" \
+ "pushq 56(%%rax)\n\t" \
+ "movq 48(%%rax), %%r9\n\t" \
+ "movq 40(%%rax), %%r8\n\t" \
+ "movq 32(%%rax), %%rcx\n\t" \
+ "movq 24(%%rax), %%rdx\n\t" \
+ "movq 16(%%rax), %%rsi\n\t" \
+ "movq 8(%%rax), %%rdi\n\t" \
+ "movq (%%rax), %%rax\n\t" /* target->%rax */ \
+ VALGRIND_CALL_NOREDIR_RAX \
+ "addq $16, %%rsp\n" \
+ "addq $128,%%rsp\n\t" \
+ : /*out*/ "=a" (_res) \
+ : /*in*/ "a" (&_argvec[0]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_9W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \
+ arg7,arg8,arg9) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[10]; \
+ volatile unsigned long _res; \
+ _argvec[0] = (unsigned long)_orig.nraddr; \
+ _argvec[1] = (unsigned long)(arg1); \
+ _argvec[2] = (unsigned long)(arg2); \
+ _argvec[3] = (unsigned long)(arg3); \
+ _argvec[4] = (unsigned long)(arg4); \
+ _argvec[5] = (unsigned long)(arg5); \
+ _argvec[6] = (unsigned long)(arg6); \
+ _argvec[7] = (unsigned long)(arg7); \
+ _argvec[8] = (unsigned long)(arg8); \
+ _argvec[9] = (unsigned long)(arg9); \
+ __asm__ volatile( \
+ "subq $128,%%rsp\n\t" \
+ "pushq 72(%%rax)\n\t" \
+ "pushq 64(%%rax)\n\t" \
+ "pushq 56(%%rax)\n\t" \
+ "movq 48(%%rax), %%r9\n\t" \
+ "movq 40(%%rax), %%r8\n\t" \
+ "movq 32(%%rax), %%rcx\n\t" \
+ "movq 24(%%rax), %%rdx\n\t" \
+ "movq 16(%%rax), %%rsi\n\t" \
+ "movq 8(%%rax), %%rdi\n\t" \
+ "movq (%%rax), %%rax\n\t" /* target->%rax */ \
+ VALGRIND_CALL_NOREDIR_RAX \
+ "addq $24, %%rsp\n" \
+ "addq $128,%%rsp\n\t" \
+ : /*out*/ "=a" (_res) \
+ : /*in*/ "a" (&_argvec[0]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_10W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \
+ arg7,arg8,arg9,arg10) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[11]; \
+ volatile unsigned long _res; \
+ _argvec[0] = (unsigned long)_orig.nraddr; \
+ _argvec[1] = (unsigned long)(arg1); \
+ _argvec[2] = (unsigned long)(arg2); \
+ _argvec[3] = (unsigned long)(arg3); \
+ _argvec[4] = (unsigned long)(arg4); \
+ _argvec[5] = (unsigned long)(arg5); \
+ _argvec[6] = (unsigned long)(arg6); \
+ _argvec[7] = (unsigned long)(arg7); \
+ _argvec[8] = (unsigned long)(arg8); \
+ _argvec[9] = (unsigned long)(arg9); \
+ _argvec[10] = (unsigned long)(arg10); \
+ __asm__ volatile( \
+ "subq $128,%%rsp\n\t" \
+ "pushq 80(%%rax)\n\t" \
+ "pushq 72(%%rax)\n\t" \
+ "pushq 64(%%rax)\n\t" \
+ "pushq 56(%%rax)\n\t" \
+ "movq 48(%%rax), %%r9\n\t" \
+ "movq 40(%%rax), %%r8\n\t" \
+ "movq 32(%%rax), %%rcx\n\t" \
+ "movq 24(%%rax), %%rdx\n\t" \
+ "movq 16(%%rax), %%rsi\n\t" \
+ "movq 8(%%rax), %%rdi\n\t" \
+ "movq (%%rax), %%rax\n\t" /* target->%rax */ \
+ VALGRIND_CALL_NOREDIR_RAX \
+ "addq $32, %%rsp\n" \
+ "addq $128,%%rsp\n\t" \
+ : /*out*/ "=a" (_res) \
+ : /*in*/ "a" (&_argvec[0]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_11W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \
+ arg7,arg8,arg9,arg10,arg11) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[12]; \
+ volatile unsigned long _res; \
+ _argvec[0] = (unsigned long)_orig.nraddr; \
+ _argvec[1] = (unsigned long)(arg1); \
+ _argvec[2] = (unsigned long)(arg2); \
+ _argvec[3] = (unsigned long)(arg3); \
+ _argvec[4] = (unsigned long)(arg4); \
+ _argvec[5] = (unsigned long)(arg5); \
+ _argvec[6] = (unsigned long)(arg6); \
+ _argvec[7] = (unsigned long)(arg7); \
+ _argvec[8] = (unsigned long)(arg8); \
+ _argvec[9] = (unsigned long)(arg9); \
+ _argvec[10] = (unsigned long)(arg10); \
+ _argvec[11] = (unsigned long)(arg11); \
+ __asm__ volatile( \
+ "subq $128,%%rsp\n\t" \
+ "pushq 88(%%rax)\n\t" \
+ "pushq 80(%%rax)\n\t" \
+ "pushq 72(%%rax)\n\t" \
+ "pushq 64(%%rax)\n\t" \
+ "pushq 56(%%rax)\n\t" \
+ "movq 48(%%rax), %%r9\n\t" \
+ "movq 40(%%rax), %%r8\n\t" \
+ "movq 32(%%rax), %%rcx\n\t" \
+ "movq 24(%%rax), %%rdx\n\t" \
+ "movq 16(%%rax), %%rsi\n\t" \
+ "movq 8(%%rax), %%rdi\n\t" \
+ "movq (%%rax), %%rax\n\t" /* target->%rax */ \
+ VALGRIND_CALL_NOREDIR_RAX \
+ "addq $40, %%rsp\n" \
+ "addq $128,%%rsp\n\t" \
+ : /*out*/ "=a" (_res) \
+ : /*in*/ "a" (&_argvec[0]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_12W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \
+ arg7,arg8,arg9,arg10,arg11,arg12) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[13]; \
+ volatile unsigned long _res; \
+ _argvec[0] = (unsigned long)_orig.nraddr; \
+ _argvec[1] = (unsigned long)(arg1); \
+ _argvec[2] = (unsigned long)(arg2); \
+ _argvec[3] = (unsigned long)(arg3); \
+ _argvec[4] = (unsigned long)(arg4); \
+ _argvec[5] = (unsigned long)(arg5); \
+ _argvec[6] = (unsigned long)(arg6); \
+ _argvec[7] = (unsigned long)(arg7); \
+ _argvec[8] = (unsigned long)(arg8); \
+ _argvec[9] = (unsigned long)(arg9); \
+ _argvec[10] = (unsigned long)(arg10); \
+ _argvec[11] = (unsigned long)(arg11); \
+ _argvec[12] = (unsigned long)(arg12); \
+ __asm__ volatile( \
+ "subq $128,%%rsp\n\t" \
+ "pushq 96(%%rax)\n\t" \
+ "pushq 88(%%rax)\n\t" \
+ "pushq 80(%%rax)\n\t" \
+ "pushq 72(%%rax)\n\t" \
+ "pushq 64(%%rax)\n\t" \
+ "pushq 56(%%rax)\n\t" \
+ "movq 48(%%rax), %%r9\n\t" \
+ "movq 40(%%rax), %%r8\n\t" \
+ "movq 32(%%rax), %%rcx\n\t" \
+ "movq 24(%%rax), %%rdx\n\t" \
+ "movq 16(%%rax), %%rsi\n\t" \
+ "movq 8(%%rax), %%rdi\n\t" \
+ "movq (%%rax), %%rax\n\t" /* target->%rax */ \
+ VALGRIND_CALL_NOREDIR_RAX \
+ "addq $48, %%rsp\n" \
+ "addq $128,%%rsp\n\t" \
+ : /*out*/ "=a" (_res) \
+ : /*in*/ "a" (&_argvec[0]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#endif /* PLAT_amd64_linux */
+
+/* ------------------------ ppc32-linux ------------------------ */
+
+#if defined(PLAT_ppc32_linux)
+
+/* This is useful for finding out about the on-stack stuff:
+
+ extern int f9 ( int,int,int,int,int,int,int,int,int );
+ extern int f10 ( int,int,int,int,int,int,int,int,int,int );
+ extern int f11 ( int,int,int,int,int,int,int,int,int,int,int );
+ extern int f12 ( int,int,int,int,int,int,int,int,int,int,int,int );
+
+ int g9 ( void ) {
+ return f9(11,22,33,44,55,66,77,88,99);
+ }
+ int g10 ( void ) {
+ return f10(11,22,33,44,55,66,77,88,99,110);
+ }
+ int g11 ( void ) {
+ return f11(11,22,33,44,55,66,77,88,99,110,121);
+ }
+ int g12 ( void ) {
+ return f12(11,22,33,44,55,66,77,88,99,110,121,132);
+ }
+*/
+
+/* ARGREGS: r3 r4 r5 r6 r7 r8 r9 r10 (the rest on stack somewhere) */
+
+/* These regs are trashed by the hidden call. */
+#define __CALLER_SAVED_REGS \
+ "lr", "ctr", "xer", \
+ "cr0", "cr1", "cr2", "cr3", "cr4", "cr5", "cr6", "cr7", \
+ "r0", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", \
+ "r11", "r12", "r13"
+
+/* These CALL_FN_ macros assume that on ppc32-linux,
+ sizeof(unsigned long) == 4. */
+
+#define CALL_FN_W_v(lval, orig) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[1]; \
+ volatile unsigned long _res; \
+ _argvec[0] = (unsigned long)_orig.nraddr; \
+ __asm__ volatile( \
+ "mr 11,%1\n\t" \
+ "lwz 11,0(11)\n\t" /* target->r11 */ \
+ VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
+ "mr %0,3" \
+ : /*out*/ "=r" (_res) \
+ : /*in*/ "r" (&_argvec[0]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_W(lval, orig, arg1) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[2]; \
+ volatile unsigned long _res; \
+ _argvec[0] = (unsigned long)_orig.nraddr; \
+ _argvec[1] = (unsigned long)arg1; \
+ __asm__ volatile( \
+ "mr 11,%1\n\t" \
+ "lwz 3,4(11)\n\t" /* arg1->r3 */ \
+ "lwz 11,0(11)\n\t" /* target->r11 */ \
+ VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
+ "mr %0,3" \
+ : /*out*/ "=r" (_res) \
+ : /*in*/ "r" (&_argvec[0]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_WW(lval, orig, arg1,arg2) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[3]; \
+ volatile unsigned long _res; \
+ _argvec[0] = (unsigned long)_orig.nraddr; \
+ _argvec[1] = (unsigned long)arg1; \
+ _argvec[2] = (unsigned long)arg2; \
+ __asm__ volatile( \
+ "mr 11,%1\n\t" \
+ "lwz 3,4(11)\n\t" /* arg1->r3 */ \
+ "lwz 4,8(11)\n\t" \
+ "lwz 11,0(11)\n\t" /* target->r11 */ \
+ VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
+ "mr %0,3" \
+ : /*out*/ "=r" (_res) \
+ : /*in*/ "r" (&_argvec[0]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_WWW(lval, orig, arg1,arg2,arg3) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[4]; \
+ volatile unsigned long _res; \
+ _argvec[0] = (unsigned long)_orig.nraddr; \
+ _argvec[1] = (unsigned long)arg1; \
+ _argvec[2] = (unsigned long)arg2; \
+ _argvec[3] = (unsigned long)arg3; \
+ __asm__ volatile( \
+ "mr 11,%1\n\t" \
+ "lwz 3,4(11)\n\t" /* arg1->r3 */ \
+ "lwz 4,8(11)\n\t" \
+ "lwz 5,12(11)\n\t" \
+ "lwz 11,0(11)\n\t" /* target->r11 */ \
+ VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
+ "mr %0,3" \
+ : /*out*/ "=r" (_res) \
+ : /*in*/ "r" (&_argvec[0]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_WWWW(lval, orig, arg1,arg2,arg3,arg4) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[5]; \
+ volatile unsigned long _res; \
+ _argvec[0] = (unsigned long)_orig.nraddr; \
+ _argvec[1] = (unsigned long)arg1; \
+ _argvec[2] = (unsigned long)arg2; \
+ _argvec[3] = (unsigned long)arg3; \
+ _argvec[4] = (unsigned long)arg4; \
+ __asm__ volatile( \
+ "mr 11,%1\n\t" \
+ "lwz 3,4(11)\n\t" /* arg1->r3 */ \
+ "lwz 4,8(11)\n\t" \
+ "lwz 5,12(11)\n\t" \
+ "lwz 6,16(11)\n\t" /* arg4->r6 */ \
+ "lwz 11,0(11)\n\t" /* target->r11 */ \
+ VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
+ "mr %0,3" \
+ : /*out*/ "=r" (_res) \
+ : /*in*/ "r" (&_argvec[0]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_5W(lval, orig, arg1,arg2,arg3,arg4,arg5) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[6]; \
+ volatile unsigned long _res; \
+ _argvec[0] = (unsigned long)_orig.nraddr; \
+ _argvec[1] = (unsigned long)arg1; \
+ _argvec[2] = (unsigned long)arg2; \
+ _argvec[3] = (unsigned long)arg3; \
+ _argvec[4] = (unsigned long)arg4; \
+ _argvec[5] = (unsigned long)arg5; \
+ __asm__ volatile( \
+ "mr 11,%1\n\t" \
+ "lwz 3,4(11)\n\t" /* arg1->r3 */ \
+ "lwz 4,8(11)\n\t" \
+ "lwz 5,12(11)\n\t" \
+ "lwz 6,16(11)\n\t" /* arg4->r6 */ \
+ "lwz 7,20(11)\n\t" \
+ "lwz 11,0(11)\n\t" /* target->r11 */ \
+ VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
+ "mr %0,3" \
+ : /*out*/ "=r" (_res) \
+ : /*in*/ "r" (&_argvec[0]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_6W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[7]; \
+ volatile unsigned long _res; \
+ _argvec[0] = (unsigned long)_orig.nraddr; \
+ _argvec[1] = (unsigned long)arg1; \
+ _argvec[2] = (unsigned long)arg2; \
+ _argvec[3] = (unsigned long)arg3; \
+ _argvec[4] = (unsigned long)arg4; \
+ _argvec[5] = (unsigned long)arg5; \
+ _argvec[6] = (unsigned long)arg6; \
+ __asm__ volatile( \
+ "mr 11,%1\n\t" \
+ "lwz 3,4(11)\n\t" /* arg1->r3 */ \
+ "lwz 4,8(11)\n\t" \
+ "lwz 5,12(11)\n\t" \
+ "lwz 6,16(11)\n\t" /* arg4->r6 */ \
+ "lwz 7,20(11)\n\t" \
+ "lwz 8,24(11)\n\t" \
+ "lwz 11,0(11)\n\t" /* target->r11 */ \
+ VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
+ "mr %0,3" \
+ : /*out*/ "=r" (_res) \
+ : /*in*/ "r" (&_argvec[0]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_7W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \
+ arg7) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[8]; \
+ volatile unsigned long _res; \
+ _argvec[0] = (unsigned long)_orig.nraddr; \
+ _argvec[1] = (unsigned long)arg1; \
+ _argvec[2] = (unsigned long)arg2; \
+ _argvec[3] = (unsigned long)arg3; \
+ _argvec[4] = (unsigned long)arg4; \
+ _argvec[5] = (unsigned long)arg5; \
+ _argvec[6] = (unsigned long)arg6; \
+ _argvec[7] = (unsigned long)arg7; \
+ __asm__ volatile( \
+ "mr 11,%1\n\t" \
+ "lwz 3,4(11)\n\t" /* arg1->r3 */ \
+ "lwz 4,8(11)\n\t" \
+ "lwz 5,12(11)\n\t" \
+ "lwz 6,16(11)\n\t" /* arg4->r6 */ \
+ "lwz 7,20(11)\n\t" \
+ "lwz 8,24(11)\n\t" \
+ "lwz 9,28(11)\n\t" \
+ "lwz 11,0(11)\n\t" /* target->r11 */ \
+ VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
+ "mr %0,3" \
+ : /*out*/ "=r" (_res) \
+ : /*in*/ "r" (&_argvec[0]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_8W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \
+ arg7,arg8) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[9]; \
+ volatile unsigned long _res; \
+ _argvec[0] = (unsigned long)_orig.nraddr; \
+ _argvec[1] = (unsigned long)arg1; \
+ _argvec[2] = (unsigned long)arg2; \
+ _argvec[3] = (unsigned long)arg3; \
+ _argvec[4] = (unsigned long)arg4; \
+ _argvec[5] = (unsigned long)arg5; \
+ _argvec[6] = (unsigned long)arg6; \
+ _argvec[7] = (unsigned long)arg7; \
+ _argvec[8] = (unsigned long)arg8; \
+ __asm__ volatile( \
+ "mr 11,%1\n\t" \
+ "lwz 3,4(11)\n\t" /* arg1->r3 */ \
+ "lwz 4,8(11)\n\t" \
+ "lwz 5,12(11)\n\t" \
+ "lwz 6,16(11)\n\t" /* arg4->r6 */ \
+ "lwz 7,20(11)\n\t" \
+ "lwz 8,24(11)\n\t" \
+ "lwz 9,28(11)\n\t" \
+ "lwz 10,32(11)\n\t" /* arg8->r10 */ \
+ "lwz 11,0(11)\n\t" /* target->r11 */ \
+ VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
+ "mr %0,3" \
+ : /*out*/ "=r" (_res) \
+ : /*in*/ "r" (&_argvec[0]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_9W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \
+ arg7,arg8,arg9) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[10]; \
+ volatile unsigned long _res; \
+ _argvec[0] = (unsigned long)_orig.nraddr; \
+ _argvec[1] = (unsigned long)arg1; \
+ _argvec[2] = (unsigned long)arg2; \
+ _argvec[3] = (unsigned long)arg3; \
+ _argvec[4] = (unsigned long)arg4; \
+ _argvec[5] = (unsigned long)arg5; \
+ _argvec[6] = (unsigned long)arg6; \
+ _argvec[7] = (unsigned long)arg7; \
+ _argvec[8] = (unsigned long)arg8; \
+ _argvec[9] = (unsigned long)arg9; \
+ __asm__ volatile( \
+ "mr 11,%1\n\t" \
+ "addi 1,1,-16\n\t" \
+ /* arg9 */ \
+ "lwz 3,36(11)\n\t" \
+ "stw 3,8(1)\n\t" \
+ /* args1-8 */ \
+ "lwz 3,4(11)\n\t" /* arg1->r3 */ \
+ "lwz 4,8(11)\n\t" \
+ "lwz 5,12(11)\n\t" \
+ "lwz 6,16(11)\n\t" /* arg4->r6 */ \
+ "lwz 7,20(11)\n\t" \
+ "lwz 8,24(11)\n\t" \
+ "lwz 9,28(11)\n\t" \
+ "lwz 10,32(11)\n\t" /* arg8->r10 */ \
+ "lwz 11,0(11)\n\t" /* target->r11 */ \
+ VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
+ "addi 1,1,16\n\t" \
+ "mr %0,3" \
+ : /*out*/ "=r" (_res) \
+ : /*in*/ "r" (&_argvec[0]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_10W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \
+ arg7,arg8,arg9,arg10) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[11]; \
+ volatile unsigned long _res; \
+ _argvec[0] = (unsigned long)_orig.nraddr; \
+ _argvec[1] = (unsigned long)arg1; \
+ _argvec[2] = (unsigned long)arg2; \
+ _argvec[3] = (unsigned long)arg3; \
+ _argvec[4] = (unsigned long)arg4; \
+ _argvec[5] = (unsigned long)arg5; \
+ _argvec[6] = (unsigned long)arg6; \
+ _argvec[7] = (unsigned long)arg7; \
+ _argvec[8] = (unsigned long)arg8; \
+ _argvec[9] = (unsigned long)arg9; \
+ _argvec[10] = (unsigned long)arg10; \
+ __asm__ volatile( \
+ "mr 11,%1\n\t" \
+ "addi 1,1,-16\n\t" \
+ /* arg10 */ \
+ "lwz 3,40(11)\n\t" \
+ "stw 3,12(1)\n\t" \
+ /* arg9 */ \
+ "lwz 3,36(11)\n\t" \
+ "stw 3,8(1)\n\t" \
+ /* args1-8 */ \
+ "lwz 3,4(11)\n\t" /* arg1->r3 */ \
+ "lwz 4,8(11)\n\t" \
+ "lwz 5,12(11)\n\t" \
+ "lwz 6,16(11)\n\t" /* arg4->r6 */ \
+ "lwz 7,20(11)\n\t" \
+ "lwz 8,24(11)\n\t" \
+ "lwz 9,28(11)\n\t" \
+ "lwz 10,32(11)\n\t" /* arg8->r10 */ \
+ "lwz 11,0(11)\n\t" /* target->r11 */ \
+ VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
+ "addi 1,1,16\n\t" \
+ "mr %0,3" \
+ : /*out*/ "=r" (_res) \
+ : /*in*/ "r" (&_argvec[0]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_11W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \
+ arg7,arg8,arg9,arg10,arg11) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[12]; \
+ volatile unsigned long _res; \
+ _argvec[0] = (unsigned long)_orig.nraddr; \
+ _argvec[1] = (unsigned long)arg1; \
+ _argvec[2] = (unsigned long)arg2; \
+ _argvec[3] = (unsigned long)arg3; \
+ _argvec[4] = (unsigned long)arg4; \
+ _argvec[5] = (unsigned long)arg5; \
+ _argvec[6] = (unsigned long)arg6; \
+ _argvec[7] = (unsigned long)arg7; \
+ _argvec[8] = (unsigned long)arg8; \
+ _argvec[9] = (unsigned long)arg9; \
+ _argvec[10] = (unsigned long)arg10; \
+ _argvec[11] = (unsigned long)arg11; \
+ __asm__ volatile( \
+ "mr 11,%1\n\t" \
+ "addi 1,1,-32\n\t" \
+ /* arg11 */ \
+ "lwz 3,44(11)\n\t" \
+ "stw 3,16(1)\n\t" \
+ /* arg10 */ \
+ "lwz 3,40(11)\n\t" \
+ "stw 3,12(1)\n\t" \
+ /* arg9 */ \
+ "lwz 3,36(11)\n\t" \
+ "stw 3,8(1)\n\t" \
+ /* args1-8 */ \
+ "lwz 3,4(11)\n\t" /* arg1->r3 */ \
+ "lwz 4,8(11)\n\t" \
+ "lwz 5,12(11)\n\t" \
+ "lwz 6,16(11)\n\t" /* arg4->r6 */ \
+ "lwz 7,20(11)\n\t" \
+ "lwz 8,24(11)\n\t" \
+ "lwz 9,28(11)\n\t" \
+ "lwz 10,32(11)\n\t" /* arg8->r10 */ \
+ "lwz 11,0(11)\n\t" /* target->r11 */ \
+ VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
+ "addi 1,1,32\n\t" \
+ "mr %0,3" \
+ : /*out*/ "=r" (_res) \
+ : /*in*/ "r" (&_argvec[0]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_12W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \
+ arg7,arg8,arg9,arg10,arg11,arg12) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[13]; \
+ volatile unsigned long _res; \
+ _argvec[0] = (unsigned long)_orig.nraddr; \
+ _argvec[1] = (unsigned long)arg1; \
+ _argvec[2] = (unsigned long)arg2; \
+ _argvec[3] = (unsigned long)arg3; \
+ _argvec[4] = (unsigned long)arg4; \
+ _argvec[5] = (unsigned long)arg5; \
+ _argvec[6] = (unsigned long)arg6; \
+ _argvec[7] = (unsigned long)arg7; \
+ _argvec[8] = (unsigned long)arg8; \
+ _argvec[9] = (unsigned long)arg9; \
+ _argvec[10] = (unsigned long)arg10; \
+ _argvec[11] = (unsigned long)arg11; \
+ _argvec[12] = (unsigned long)arg12; \
+ __asm__ volatile( \
+ "mr 11,%1\n\t" \
+ "addi 1,1,-32\n\t" \
+ /* arg12 */ \
+ "lwz 3,48(11)\n\t" \
+ "stw 3,20(1)\n\t" \
+ /* arg11 */ \
+ "lwz 3,44(11)\n\t" \
+ "stw 3,16(1)\n\t" \
+ /* arg10 */ \
+ "lwz 3,40(11)\n\t" \
+ "stw 3,12(1)\n\t" \
+ /* arg9 */ \
+ "lwz 3,36(11)\n\t" \
+ "stw 3,8(1)\n\t" \
+ /* args1-8 */ \
+ "lwz 3,4(11)\n\t" /* arg1->r3 */ \
+ "lwz 4,8(11)\n\t" \
+ "lwz 5,12(11)\n\t" \
+ "lwz 6,16(11)\n\t" /* arg4->r6 */ \
+ "lwz 7,20(11)\n\t" \
+ "lwz 8,24(11)\n\t" \
+ "lwz 9,28(11)\n\t" \
+ "lwz 10,32(11)\n\t" /* arg8->r10 */ \
+ "lwz 11,0(11)\n\t" /* target->r11 */ \
+ VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
+ "addi 1,1,32\n\t" \
+ "mr %0,3" \
+ : /*out*/ "=r" (_res) \
+ : /*in*/ "r" (&_argvec[0]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#endif /* PLAT_ppc32_linux */
+
+/* ------------------------ ppc64-linux ------------------------ */
+
+#if defined(PLAT_ppc64_linux)
+
+/* ARGREGS: r3 r4 r5 r6 r7 r8 r9 r10 (the rest on stack somewhere) */
+
+/* These regs are trashed by the hidden call. */
+#define __CALLER_SAVED_REGS \
+ "lr", "ctr", "xer", \
+ "cr0", "cr1", "cr2", "cr3", "cr4", "cr5", "cr6", "cr7", \
+ "r0", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", \
+ "r11", "r12", "r13"
+
+/* These CALL_FN_ macros assume that on ppc64-linux, sizeof(unsigned
+ long) == 8. */
+
+#define CALL_FN_W_v(lval, orig) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[3+0]; \
+ volatile unsigned long _res; \
+ /* _argvec[0] holds current r2 across the call */ \
+ _argvec[1] = (unsigned long)_orig.r2; \
+ _argvec[2] = (unsigned long)_orig.nraddr; \
+ __asm__ volatile( \
+ "mr 11,%1\n\t" \
+ "std 2,-16(11)\n\t" /* save tocptr */ \
+ "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \
+ "ld 11, 0(11)\n\t" /* target->r11 */ \
+ VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
+ "mr 11,%1\n\t" \
+ "mr %0,3\n\t" \
+ "ld 2,-16(11)" /* restore tocptr */ \
+ : /*out*/ "=r" (_res) \
+ : /*in*/ "r" (&_argvec[2]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_W(lval, orig, arg1) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[3+1]; \
+ volatile unsigned long _res; \
+ /* _argvec[0] holds current r2 across the call */ \
+ _argvec[1] = (unsigned long)_orig.r2; \
+ _argvec[2] = (unsigned long)_orig.nraddr; \
+ _argvec[2+1] = (unsigned long)arg1; \
+ __asm__ volatile( \
+ "mr 11,%1\n\t" \
+ "std 2,-16(11)\n\t" /* save tocptr */ \
+ "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \
+ "ld 3, 8(11)\n\t" /* arg1->r3 */ \
+ "ld 11, 0(11)\n\t" /* target->r11 */ \
+ VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
+ "mr 11,%1\n\t" \
+ "mr %0,3\n\t" \
+ "ld 2,-16(11)" /* restore tocptr */ \
+ : /*out*/ "=r" (_res) \
+ : /*in*/ "r" (&_argvec[2]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_WW(lval, orig, arg1,arg2) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[3+2]; \
+ volatile unsigned long _res; \
+ /* _argvec[0] holds current r2 across the call */ \
+ _argvec[1] = (unsigned long)_orig.r2; \
+ _argvec[2] = (unsigned long)_orig.nraddr; \
+ _argvec[2+1] = (unsigned long)arg1; \
+ _argvec[2+2] = (unsigned long)arg2; \
+ __asm__ volatile( \
+ "mr 11,%1\n\t" \
+ "std 2,-16(11)\n\t" /* save tocptr */ \
+ "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \
+ "ld 3, 8(11)\n\t" /* arg1->r3 */ \
+ "ld 4, 16(11)\n\t" /* arg2->r4 */ \
+ "ld 11, 0(11)\n\t" /* target->r11 */ \
+ VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
+ "mr 11,%1\n\t" \
+ "mr %0,3\n\t" \
+ "ld 2,-16(11)" /* restore tocptr */ \
+ : /*out*/ "=r" (_res) \
+ : /*in*/ "r" (&_argvec[2]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_WWW(lval, orig, arg1,arg2,arg3) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[3+3]; \
+ volatile unsigned long _res; \
+ /* _argvec[0] holds current r2 across the call */ \
+ _argvec[1] = (unsigned long)_orig.r2; \
+ _argvec[2] = (unsigned long)_orig.nraddr; \
+ _argvec[2+1] = (unsigned long)arg1; \
+ _argvec[2+2] = (unsigned long)arg2; \
+ _argvec[2+3] = (unsigned long)arg3; \
+ __asm__ volatile( \
+ "mr 11,%1\n\t" \
+ "std 2,-16(11)\n\t" /* save tocptr */ \
+ "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \
+ "ld 3, 8(11)\n\t" /* arg1->r3 */ \
+ "ld 4, 16(11)\n\t" /* arg2->r4 */ \
+ "ld 5, 24(11)\n\t" /* arg3->r5 */ \
+ "ld 11, 0(11)\n\t" /* target->r11 */ \
+ VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
+ "mr 11,%1\n\t" \
+ "mr %0,3\n\t" \
+ "ld 2,-16(11)" /* restore tocptr */ \
+ : /*out*/ "=r" (_res) \
+ : /*in*/ "r" (&_argvec[2]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_WWWW(lval, orig, arg1,arg2,arg3,arg4) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[3+4]; \
+ volatile unsigned long _res; \
+ /* _argvec[0] holds current r2 across the call */ \
+ _argvec[1] = (unsigned long)_orig.r2; \
+ _argvec[2] = (unsigned long)_orig.nraddr; \
+ _argvec[2+1] = (unsigned long)arg1; \
+ _argvec[2+2] = (unsigned long)arg2; \
+ _argvec[2+3] = (unsigned long)arg3; \
+ _argvec[2+4] = (unsigned long)arg4; \
+ __asm__ volatile( \
+ "mr 11,%1\n\t" \
+ "std 2,-16(11)\n\t" /* save tocptr */ \
+ "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \
+ "ld 3, 8(11)\n\t" /* arg1->r3 */ \
+ "ld 4, 16(11)\n\t" /* arg2->r4 */ \
+ "ld 5, 24(11)\n\t" /* arg3->r5 */ \
+ "ld 6, 32(11)\n\t" /* arg4->r6 */ \
+ "ld 11, 0(11)\n\t" /* target->r11 */ \
+ VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
+ "mr 11,%1\n\t" \
+ "mr %0,3\n\t" \
+ "ld 2,-16(11)" /* restore tocptr */ \
+ : /*out*/ "=r" (_res) \
+ : /*in*/ "r" (&_argvec[2]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_5W(lval, orig, arg1,arg2,arg3,arg4,arg5) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[3+5]; \
+ volatile unsigned long _res; \
+ /* _argvec[0] holds current r2 across the call */ \
+ _argvec[1] = (unsigned long)_orig.r2; \
+ _argvec[2] = (unsigned long)_orig.nraddr; \
+ _argvec[2+1] = (unsigned long)arg1; \
+ _argvec[2+2] = (unsigned long)arg2; \
+ _argvec[2+3] = (unsigned long)arg3; \
+ _argvec[2+4] = (unsigned long)arg4; \
+ _argvec[2+5] = (unsigned long)arg5; \
+ __asm__ volatile( \
+ "mr 11,%1\n\t" \
+ "std 2,-16(11)\n\t" /* save tocptr */ \
+ "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \
+ "ld 3, 8(11)\n\t" /* arg1->r3 */ \
+ "ld 4, 16(11)\n\t" /* arg2->r4 */ \
+ "ld 5, 24(11)\n\t" /* arg3->r5 */ \
+ "ld 6, 32(11)\n\t" /* arg4->r6 */ \
+ "ld 7, 40(11)\n\t" /* arg5->r7 */ \
+ "ld 11, 0(11)\n\t" /* target->r11 */ \
+ VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
+ "mr 11,%1\n\t" \
+ "mr %0,3\n\t" \
+ "ld 2,-16(11)" /* restore tocptr */ \
+ : /*out*/ "=r" (_res) \
+ : /*in*/ "r" (&_argvec[2]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_6W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[3+6]; \
+ volatile unsigned long _res; \
+ /* _argvec[0] holds current r2 across the call */ \
+ _argvec[1] = (unsigned long)_orig.r2; \
+ _argvec[2] = (unsigned long)_orig.nraddr; \
+ _argvec[2+1] = (unsigned long)arg1; \
+ _argvec[2+2] = (unsigned long)arg2; \
+ _argvec[2+3] = (unsigned long)arg3; \
+ _argvec[2+4] = (unsigned long)arg4; \
+ _argvec[2+5] = (unsigned long)arg5; \
+ _argvec[2+6] = (unsigned long)arg6; \
+ __asm__ volatile( \
+ "mr 11,%1\n\t" \
+ "std 2,-16(11)\n\t" /* save tocptr */ \
+ "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \
+ "ld 3, 8(11)\n\t" /* arg1->r3 */ \
+ "ld 4, 16(11)\n\t" /* arg2->r4 */ \
+ "ld 5, 24(11)\n\t" /* arg3->r5 */ \
+ "ld 6, 32(11)\n\t" /* arg4->r6 */ \
+ "ld 7, 40(11)\n\t" /* arg5->r7 */ \
+ "ld 8, 48(11)\n\t" /* arg6->r8 */ \
+ "ld 11, 0(11)\n\t" /* target->r11 */ \
+ VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
+ "mr 11,%1\n\t" \
+ "mr %0,3\n\t" \
+ "ld 2,-16(11)" /* restore tocptr */ \
+ : /*out*/ "=r" (_res) \
+ : /*in*/ "r" (&_argvec[2]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_7W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \
+ arg7) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[3+7]; \
+ volatile unsigned long _res; \
+ /* _argvec[0] holds current r2 across the call */ \
+ _argvec[1] = (unsigned long)_orig.r2; \
+ _argvec[2] = (unsigned long)_orig.nraddr; \
+ _argvec[2+1] = (unsigned long)arg1; \
+ _argvec[2+2] = (unsigned long)arg2; \
+ _argvec[2+3] = (unsigned long)arg3; \
+ _argvec[2+4] = (unsigned long)arg4; \
+ _argvec[2+5] = (unsigned long)arg5; \
+ _argvec[2+6] = (unsigned long)arg6; \
+ _argvec[2+7] = (unsigned long)arg7; \
+ __asm__ volatile( \
+ "mr 11,%1\n\t" \
+ "std 2,-16(11)\n\t" /* save tocptr */ \
+ "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \
+ "ld 3, 8(11)\n\t" /* arg1->r3 */ \
+ "ld 4, 16(11)\n\t" /* arg2->r4 */ \
+ "ld 5, 24(11)\n\t" /* arg3->r5 */ \
+ "ld 6, 32(11)\n\t" /* arg4->r6 */ \
+ "ld 7, 40(11)\n\t" /* arg5->r7 */ \
+ "ld 8, 48(11)\n\t" /* arg6->r8 */ \
+ "ld 9, 56(11)\n\t" /* arg7->r9 */ \
+ "ld 11, 0(11)\n\t" /* target->r11 */ \
+ VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
+ "mr 11,%1\n\t" \
+ "mr %0,3\n\t" \
+ "ld 2,-16(11)" /* restore tocptr */ \
+ : /*out*/ "=r" (_res) \
+ : /*in*/ "r" (&_argvec[2]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_8W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \
+ arg7,arg8) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[3+8]; \
+ volatile unsigned long _res; \
+ /* _argvec[0] holds current r2 across the call */ \
+ _argvec[1] = (unsigned long)_orig.r2; \
+ _argvec[2] = (unsigned long)_orig.nraddr; \
+ _argvec[2+1] = (unsigned long)arg1; \
+ _argvec[2+2] = (unsigned long)arg2; \
+ _argvec[2+3] = (unsigned long)arg3; \
+ _argvec[2+4] = (unsigned long)arg4; \
+ _argvec[2+5] = (unsigned long)arg5; \
+ _argvec[2+6] = (unsigned long)arg6; \
+ _argvec[2+7] = (unsigned long)arg7; \
+ _argvec[2+8] = (unsigned long)arg8; \
+ __asm__ volatile( \
+ "mr 11,%1\n\t" \
+ "std 2,-16(11)\n\t" /* save tocptr */ \
+ "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \
+ "ld 3, 8(11)\n\t" /* arg1->r3 */ \
+ "ld 4, 16(11)\n\t" /* arg2->r4 */ \
+ "ld 5, 24(11)\n\t" /* arg3->r5 */ \
+ "ld 6, 32(11)\n\t" /* arg4->r6 */ \
+ "ld 7, 40(11)\n\t" /* arg5->r7 */ \
+ "ld 8, 48(11)\n\t" /* arg6->r8 */ \
+ "ld 9, 56(11)\n\t" /* arg7->r9 */ \
+ "ld 10, 64(11)\n\t" /* arg8->r10 */ \
+ "ld 11, 0(11)\n\t" /* target->r11 */ \
+ VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
+ "mr 11,%1\n\t" \
+ "mr %0,3\n\t" \
+ "ld 2,-16(11)" /* restore tocptr */ \
+ : /*out*/ "=r" (_res) \
+ : /*in*/ "r" (&_argvec[2]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_9W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \
+ arg7,arg8,arg9) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[3+9]; \
+ volatile unsigned long _res; \
+ /* _argvec[0] holds current r2 across the call */ \
+ _argvec[1] = (unsigned long)_orig.r2; \
+ _argvec[2] = (unsigned long)_orig.nraddr; \
+ _argvec[2+1] = (unsigned long)arg1; \
+ _argvec[2+2] = (unsigned long)arg2; \
+ _argvec[2+3] = (unsigned long)arg3; \
+ _argvec[2+4] = (unsigned long)arg4; \
+ _argvec[2+5] = (unsigned long)arg5; \
+ _argvec[2+6] = (unsigned long)arg6; \
+ _argvec[2+7] = (unsigned long)arg7; \
+ _argvec[2+8] = (unsigned long)arg8; \
+ _argvec[2+9] = (unsigned long)arg9; \
+ __asm__ volatile( \
+ "mr 11,%1\n\t" \
+ "std 2,-16(11)\n\t" /* save tocptr */ \
+ "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \
+ "addi 1,1,-128\n\t" /* expand stack frame */ \
+ /* arg9 */ \
+ "ld 3,72(11)\n\t" \
+ "std 3,112(1)\n\t" \
+ /* args1-8 */ \
+ "ld 3, 8(11)\n\t" /* arg1->r3 */ \
+ "ld 4, 16(11)\n\t" /* arg2->r4 */ \
+ "ld 5, 24(11)\n\t" /* arg3->r5 */ \
+ "ld 6, 32(11)\n\t" /* arg4->r6 */ \
+ "ld 7, 40(11)\n\t" /* arg5->r7 */ \
+ "ld 8, 48(11)\n\t" /* arg6->r8 */ \
+ "ld 9, 56(11)\n\t" /* arg7->r9 */ \
+ "ld 10, 64(11)\n\t" /* arg8->r10 */ \
+ "ld 11, 0(11)\n\t" /* target->r11 */ \
+ VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
+ "mr 11,%1\n\t" \
+ "mr %0,3\n\t" \
+ "ld 2,-16(11)\n\t" /* restore tocptr */ \
+ "addi 1,1,128" /* restore frame */ \
+ : /*out*/ "=r" (_res) \
+ : /*in*/ "r" (&_argvec[2]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_10W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \
+ arg7,arg8,arg9,arg10) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[3+10]; \
+ volatile unsigned long _res; \
+ /* _argvec[0] holds current r2 across the call */ \
+ _argvec[1] = (unsigned long)_orig.r2; \
+ _argvec[2] = (unsigned long)_orig.nraddr; \
+ _argvec[2+1] = (unsigned long)arg1; \
+ _argvec[2+2] = (unsigned long)arg2; \
+ _argvec[2+3] = (unsigned long)arg3; \
+ _argvec[2+4] = (unsigned long)arg4; \
+ _argvec[2+5] = (unsigned long)arg5; \
+ _argvec[2+6] = (unsigned long)arg6; \
+ _argvec[2+7] = (unsigned long)arg7; \
+ _argvec[2+8] = (unsigned long)arg8; \
+ _argvec[2+9] = (unsigned long)arg9; \
+ _argvec[2+10] = (unsigned long)arg10; \
+ __asm__ volatile( \
+ "mr 11,%1\n\t" \
+ "std 2,-16(11)\n\t" /* save tocptr */ \
+ "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \
+ "addi 1,1,-128\n\t" /* expand stack frame */ \
+ /* arg10 */ \
+ "ld 3,80(11)\n\t" \
+ "std 3,120(1)\n\t" \
+ /* arg9 */ \
+ "ld 3,72(11)\n\t" \
+ "std 3,112(1)\n\t" \
+ /* args1-8 */ \
+ "ld 3, 8(11)\n\t" /* arg1->r3 */ \
+ "ld 4, 16(11)\n\t" /* arg2->r4 */ \
+ "ld 5, 24(11)\n\t" /* arg3->r5 */ \
+ "ld 6, 32(11)\n\t" /* arg4->r6 */ \
+ "ld 7, 40(11)\n\t" /* arg5->r7 */ \
+ "ld 8, 48(11)\n\t" /* arg6->r8 */ \
+ "ld 9, 56(11)\n\t" /* arg7->r9 */ \
+ "ld 10, 64(11)\n\t" /* arg8->r10 */ \
+ "ld 11, 0(11)\n\t" /* target->r11 */ \
+ VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
+ "mr 11,%1\n\t" \
+ "mr %0,3\n\t" \
+ "ld 2,-16(11)\n\t" /* restore tocptr */ \
+ "addi 1,1,128" /* restore frame */ \
+ : /*out*/ "=r" (_res) \
+ : /*in*/ "r" (&_argvec[2]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_11W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \
+ arg7,arg8,arg9,arg10,arg11) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[3+11]; \
+ volatile unsigned long _res; \
+ /* _argvec[0] holds current r2 across the call */ \
+ _argvec[1] = (unsigned long)_orig.r2; \
+ _argvec[2] = (unsigned long)_orig.nraddr; \
+ _argvec[2+1] = (unsigned long)arg1; \
+ _argvec[2+2] = (unsigned long)arg2; \
+ _argvec[2+3] = (unsigned long)arg3; \
+ _argvec[2+4] = (unsigned long)arg4; \
+ _argvec[2+5] = (unsigned long)arg5; \
+ _argvec[2+6] = (unsigned long)arg6; \
+ _argvec[2+7] = (unsigned long)arg7; \
+ _argvec[2+8] = (unsigned long)arg8; \
+ _argvec[2+9] = (unsigned long)arg9; \
+ _argvec[2+10] = (unsigned long)arg10; \
+ _argvec[2+11] = (unsigned long)arg11; \
+ __asm__ volatile( \
+ "mr 11,%1\n\t" \
+ "std 2,-16(11)\n\t" /* save tocptr */ \
+ "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \
+ "addi 1,1,-144\n\t" /* expand stack frame */ \
+ /* arg11 */ \
+ "ld 3,88(11)\n\t" \
+ "std 3,128(1)\n\t" \
+ /* arg10 */ \
+ "ld 3,80(11)\n\t" \
+ "std 3,120(1)\n\t" \
+ /* arg9 */ \
+ "ld 3,72(11)\n\t" \
+ "std 3,112(1)\n\t" \
+ /* args1-8 */ \
+ "ld 3, 8(11)\n\t" /* arg1->r3 */ \
+ "ld 4, 16(11)\n\t" /* arg2->r4 */ \
+ "ld 5, 24(11)\n\t" /* arg3->r5 */ \
+ "ld 6, 32(11)\n\t" /* arg4->r6 */ \
+ "ld 7, 40(11)\n\t" /* arg5->r7 */ \
+ "ld 8, 48(11)\n\t" /* arg6->r8 */ \
+ "ld 9, 56(11)\n\t" /* arg7->r9 */ \
+ "ld 10, 64(11)\n\t" /* arg8->r10 */ \
+ "ld 11, 0(11)\n\t" /* target->r11 */ \
+ VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
+ "mr 11,%1\n\t" \
+ "mr %0,3\n\t" \
+ "ld 2,-16(11)\n\t" /* restore tocptr */ \
+ "addi 1,1,144" /* restore frame */ \
+ : /*out*/ "=r" (_res) \
+ : /*in*/ "r" (&_argvec[2]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_12W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \
+ arg7,arg8,arg9,arg10,arg11,arg12) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[3+12]; \
+ volatile unsigned long _res; \
+ /* _argvec[0] holds current r2 across the call */ \
+ _argvec[1] = (unsigned long)_orig.r2; \
+ _argvec[2] = (unsigned long)_orig.nraddr; \
+ _argvec[2+1] = (unsigned long)arg1; \
+ _argvec[2+2] = (unsigned long)arg2; \
+ _argvec[2+3] = (unsigned long)arg3; \
+ _argvec[2+4] = (unsigned long)arg4; \
+ _argvec[2+5] = (unsigned long)arg5; \
+ _argvec[2+6] = (unsigned long)arg6; \
+ _argvec[2+7] = (unsigned long)arg7; \
+ _argvec[2+8] = (unsigned long)arg8; \
+ _argvec[2+9] = (unsigned long)arg9; \
+ _argvec[2+10] = (unsigned long)arg10; \
+ _argvec[2+11] = (unsigned long)arg11; \
+ _argvec[2+12] = (unsigned long)arg12; \
+ __asm__ volatile( \
+ "mr 11,%1\n\t" \
+ "std 2,-16(11)\n\t" /* save tocptr */ \
+ "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \
+ "addi 1,1,-144\n\t" /* expand stack frame */ \
+ /* arg12 */ \
+ "ld 3,96(11)\n\t" \
+ "std 3,136(1)\n\t" \
+ /* arg11 */ \
+ "ld 3,88(11)\n\t" \
+ "std 3,128(1)\n\t" \
+ /* arg10 */ \
+ "ld 3,80(11)\n\t" \
+ "std 3,120(1)\n\t" \
+ /* arg9 */ \
+ "ld 3,72(11)\n\t" \
+ "std 3,112(1)\n\t" \
+ /* args1-8 */ \
+ "ld 3, 8(11)\n\t" /* arg1->r3 */ \
+ "ld 4, 16(11)\n\t" /* arg2->r4 */ \
+ "ld 5, 24(11)\n\t" /* arg3->r5 */ \
+ "ld 6, 32(11)\n\t" /* arg4->r6 */ \
+ "ld 7, 40(11)\n\t" /* arg5->r7 */ \
+ "ld 8, 48(11)\n\t" /* arg6->r8 */ \
+ "ld 9, 56(11)\n\t" /* arg7->r9 */ \
+ "ld 10, 64(11)\n\t" /* arg8->r10 */ \
+ "ld 11, 0(11)\n\t" /* target->r11 */ \
+ VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
+ "mr 11,%1\n\t" \
+ "mr %0,3\n\t" \
+ "ld 2,-16(11)\n\t" /* restore tocptr */ \
+ "addi 1,1,144" /* restore frame */ \
+ : /*out*/ "=r" (_res) \
+ : /*in*/ "r" (&_argvec[2]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#endif /* PLAT_ppc64_linux */
+
+/* ------------------------ ppc32-aix5 ------------------------- */
+
+#if defined(PLAT_ppc32_aix5)
+
+/* ARGREGS: r3 r4 r5 r6 r7 r8 r9 r10 (the rest on stack somewhere) */
+
+/* These regs are trashed by the hidden call. */
+#define __CALLER_SAVED_REGS \
+ "lr", "ctr", "xer", \
+ "cr0", "cr1", "cr2", "cr3", "cr4", "cr5", "cr6", "cr7", \
+ "r0", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", \
+ "r11", "r12", "r13"
+
+/* Expand the stack frame, copying enough info that unwinding
+ still works. Trashes r3. */
+
+#define VG_EXPAND_FRAME_BY_trashes_r3(_n_fr) \
+ "addi 1,1,-" #_n_fr "\n\t" \
+ "lwz 3," #_n_fr "(1)\n\t" \
+ "stw 3,0(1)\n\t"
+
+#define VG_CONTRACT_FRAME_BY(_n_fr) \
+ "addi 1,1," #_n_fr "\n\t"
+
+/* These CALL_FN_ macros assume that on ppc32-aix5, sizeof(unsigned
+ long) == 4. */
+
+#define CALL_FN_W_v(lval, orig) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[3+0]; \
+ volatile unsigned long _res; \
+ /* _argvec[0] holds current r2 across the call */ \
+ _argvec[1] = (unsigned long)_orig.r2; \
+ _argvec[2] = (unsigned long)_orig.nraddr; \
+ __asm__ volatile( \
+ "mr 11,%1\n\t" \
+ VG_EXPAND_FRAME_BY_trashes_r3(512) \
+ "stw 2,-8(11)\n\t" /* save tocptr */ \
+ "lwz 2,-4(11)\n\t" /* use nraddr's tocptr */ \
+ "lwz 11, 0(11)\n\t" /* target->r11 */ \
+ VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
+ "mr 11,%1\n\t" \
+ "mr %0,3\n\t" \
+ "lwz 2,-8(11)\n\t" /* restore tocptr */ \
+ VG_CONTRACT_FRAME_BY(512) \
+ : /*out*/ "=r" (_res) \
+ : /*in*/ "r" (&_argvec[2]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_W(lval, orig, arg1) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[3+1]; \
+ volatile unsigned long _res; \
+ /* _argvec[0] holds current r2 across the call */ \
+ _argvec[1] = (unsigned long)_orig.r2; \
+ _argvec[2] = (unsigned long)_orig.nraddr; \
+ _argvec[2+1] = (unsigned long)arg1; \
+ __asm__ volatile( \
+ "mr 11,%1\n\t" \
+ VG_EXPAND_FRAME_BY_trashes_r3(512) \
+ "stw 2,-8(11)\n\t" /* save tocptr */ \
+ "lwz 2,-4(11)\n\t" /* use nraddr's tocptr */ \
+ "lwz 3, 4(11)\n\t" /* arg1->r3 */ \
+ "lwz 11, 0(11)\n\t" /* target->r11 */ \
+ VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
+ "mr 11,%1\n\t" \
+ "mr %0,3\n\t" \
+ "lwz 2,-8(11)\n\t" /* restore tocptr */ \
+ VG_CONTRACT_FRAME_BY(512) \
+ : /*out*/ "=r" (_res) \
+ : /*in*/ "r" (&_argvec[2]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_WW(lval, orig, arg1,arg2) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[3+2]; \
+ volatile unsigned long _res; \
+ /* _argvec[0] holds current r2 across the call */ \
+ _argvec[1] = (unsigned long)_orig.r2; \
+ _argvec[2] = (unsigned long)_orig.nraddr; \
+ _argvec[2+1] = (unsigned long)arg1; \
+ _argvec[2+2] = (unsigned long)arg2; \
+ __asm__ volatile( \
+ "mr 11,%1\n\t" \
+ VG_EXPAND_FRAME_BY_trashes_r3(512) \
+ "stw 2,-8(11)\n\t" /* save tocptr */ \
+ "lwz 2,-4(11)\n\t" /* use nraddr's tocptr */ \
+ "lwz 3, 4(11)\n\t" /* arg1->r3 */ \
+ "lwz 4, 8(11)\n\t" /* arg2->r4 */ \
+ "lwz 11, 0(11)\n\t" /* target->r11 */ \
+ VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
+ "mr 11,%1\n\t" \
+ "mr %0,3\n\t" \
+ "lwz 2,-8(11)\n\t" /* restore tocptr */ \
+ VG_CONTRACT_FRAME_BY(512) \
+ : /*out*/ "=r" (_res) \
+ : /*in*/ "r" (&_argvec[2]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_WWW(lval, orig, arg1,arg2,arg3) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[3+3]; \
+ volatile unsigned long _res; \
+ /* _argvec[0] holds current r2 across the call */ \
+ _argvec[1] = (unsigned long)_orig.r2; \
+ _argvec[2] = (unsigned long)_orig.nraddr; \
+ _argvec[2+1] = (unsigned long)arg1; \
+ _argvec[2+2] = (unsigned long)arg2; \
+ _argvec[2+3] = (unsigned long)arg3; \
+ __asm__ volatile( \
+ "mr 11,%1\n\t" \
+ VG_EXPAND_FRAME_BY_trashes_r3(512) \
+ "stw 2,-8(11)\n\t" /* save tocptr */ \
+ "lwz 2,-4(11)\n\t" /* use nraddr's tocptr */ \
+ "lwz 3, 4(11)\n\t" /* arg1->r3 */ \
+ "lwz 4, 8(11)\n\t" /* arg2->r4 */ \
+ "lwz 5, 12(11)\n\t" /* arg3->r5 */ \
+ "lwz 11, 0(11)\n\t" /* target->r11 */ \
+ VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
+ "mr 11,%1\n\t" \
+ "mr %0,3\n\t" \
+ "lwz 2,-8(11)\n\t" /* restore tocptr */ \
+ VG_CONTRACT_FRAME_BY(512) \
+ : /*out*/ "=r" (_res) \
+ : /*in*/ "r" (&_argvec[2]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_WWWW(lval, orig, arg1,arg2,arg3,arg4) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[3+4]; \
+ volatile unsigned long _res; \
+ /* _argvec[0] holds current r2 across the call */ \
+ _argvec[1] = (unsigned long)_orig.r2; \
+ _argvec[2] = (unsigned long)_orig.nraddr; \
+ _argvec[2+1] = (unsigned long)arg1; \
+ _argvec[2+2] = (unsigned long)arg2; \
+ _argvec[2+3] = (unsigned long)arg3; \
+ _argvec[2+4] = (unsigned long)arg4; \
+ __asm__ volatile( \
+ "mr 11,%1\n\t" \
+ VG_EXPAND_FRAME_BY_trashes_r3(512) \
+ "stw 2,-8(11)\n\t" /* save tocptr */ \
+ "lwz 2,-4(11)\n\t" /* use nraddr's tocptr */ \
+ "lwz 3, 4(11)\n\t" /* arg1->r3 */ \
+ "lwz 4, 8(11)\n\t" /* arg2->r4 */ \
+ "lwz 5, 12(11)\n\t" /* arg3->r5 */ \
+ "lwz 6, 16(11)\n\t" /* arg4->r6 */ \
+ "lwz 11, 0(11)\n\t" /* target->r11 */ \
+ VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
+ "mr 11,%1\n\t" \
+ "mr %0,3\n\t" \
+ "lwz 2,-8(11)\n\t" /* restore tocptr */ \
+ VG_CONTRACT_FRAME_BY(512) \
+ : /*out*/ "=r" (_res) \
+ : /*in*/ "r" (&_argvec[2]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_5W(lval, orig, arg1,arg2,arg3,arg4,arg5) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[3+5]; \
+ volatile unsigned long _res; \
+ /* _argvec[0] holds current r2 across the call */ \
+ _argvec[1] = (unsigned long)_orig.r2; \
+ _argvec[2] = (unsigned long)_orig.nraddr; \
+ _argvec[2+1] = (unsigned long)arg1; \
+ _argvec[2+2] = (unsigned long)arg2; \
+ _argvec[2+3] = (unsigned long)arg3; \
+ _argvec[2+4] = (unsigned long)arg4; \
+ _argvec[2+5] = (unsigned long)arg5; \
+ __asm__ volatile( \
+ "mr 11,%1\n\t" \
+ VG_EXPAND_FRAME_BY_trashes_r3(512) \
+ "stw 2,-8(11)\n\t" /* save tocptr */ \
+ "lwz 2,-4(11)\n\t" /* use nraddr's tocptr */ \
+ "lwz 3, 4(11)\n\t" /* arg1->r3 */ \
+ "lwz 4, 8(11)\n\t" /* arg2->r4 */ \
+ "lwz 5, 12(11)\n\t" /* arg3->r5 */ \
+ "lwz 6, 16(11)\n\t" /* arg4->r6 */ \
+ "lwz 7, 20(11)\n\t" /* arg5->r7 */ \
+ "lwz 11, 0(11)\n\t" /* target->r11 */ \
+ VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
+ "mr 11,%1\n\t" \
+ "mr %0,3\n\t" \
+ "lwz 2,-8(11)\n\t" /* restore tocptr */ \
+ VG_CONTRACT_FRAME_BY(512) \
+ : /*out*/ "=r" (_res) \
+ : /*in*/ "r" (&_argvec[2]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_6W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[3+6]; \
+ volatile unsigned long _res; \
+ /* _argvec[0] holds current r2 across the call */ \
+ _argvec[1] = (unsigned long)_orig.r2; \
+ _argvec[2] = (unsigned long)_orig.nraddr; \
+ _argvec[2+1] = (unsigned long)arg1; \
+ _argvec[2+2] = (unsigned long)arg2; \
+ _argvec[2+3] = (unsigned long)arg3; \
+ _argvec[2+4] = (unsigned long)arg4; \
+ _argvec[2+5] = (unsigned long)arg5; \
+ _argvec[2+6] = (unsigned long)arg6; \
+ __asm__ volatile( \
+ "mr 11,%1\n\t" \
+ VG_EXPAND_FRAME_BY_trashes_r3(512) \
+ "stw 2,-8(11)\n\t" /* save tocptr */ \
+ "lwz 2,-4(11)\n\t" /* use nraddr's tocptr */ \
+ "lwz 3, 4(11)\n\t" /* arg1->r3 */ \
+ "lwz 4, 8(11)\n\t" /* arg2->r4 */ \
+ "lwz 5, 12(11)\n\t" /* arg3->r5 */ \
+ "lwz 6, 16(11)\n\t" /* arg4->r6 */ \
+ "lwz 7, 20(11)\n\t" /* arg5->r7 */ \
+ "lwz 8, 24(11)\n\t" /* arg6->r8 */ \
+ "lwz 11, 0(11)\n\t" /* target->r11 */ \
+ VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
+ "mr 11,%1\n\t" \
+ "mr %0,3\n\t" \
+ "lwz 2,-8(11)\n\t" /* restore tocptr */ \
+ VG_CONTRACT_FRAME_BY(512) \
+ : /*out*/ "=r" (_res) \
+ : /*in*/ "r" (&_argvec[2]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_7W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \
+ arg7) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[3+7]; \
+ volatile unsigned long _res; \
+ /* _argvec[0] holds current r2 across the call */ \
+ _argvec[1] = (unsigned long)_orig.r2; \
+ _argvec[2] = (unsigned long)_orig.nraddr; \
+ _argvec[2+1] = (unsigned long)arg1; \
+ _argvec[2+2] = (unsigned long)arg2; \
+ _argvec[2+3] = (unsigned long)arg3; \
+ _argvec[2+4] = (unsigned long)arg4; \
+ _argvec[2+5] = (unsigned long)arg5; \
+ _argvec[2+6] = (unsigned long)arg6; \
+ _argvec[2+7] = (unsigned long)arg7; \
+ __asm__ volatile( \
+ "mr 11,%1\n\t" \
+ VG_EXPAND_FRAME_BY_trashes_r3(512) \
+ "stw 2,-8(11)\n\t" /* save tocptr */ \
+ "lwz 2,-4(11)\n\t" /* use nraddr's tocptr */ \
+ "lwz 3, 4(11)\n\t" /* arg1->r3 */ \
+ "lwz 4, 8(11)\n\t" /* arg2->r4 */ \
+ "lwz 5, 12(11)\n\t" /* arg3->r5 */ \
+ "lwz 6, 16(11)\n\t" /* arg4->r6 */ \
+ "lwz 7, 20(11)\n\t" /* arg5->r7 */ \
+ "lwz 8, 24(11)\n\t" /* arg6->r8 */ \
+ "lwz 9, 28(11)\n\t" /* arg7->r9 */ \
+ "lwz 11, 0(11)\n\t" /* target->r11 */ \
+ VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
+ "mr 11,%1\n\t" \
+ "mr %0,3\n\t" \
+ "lwz 2,-8(11)\n\t" /* restore tocptr */ \
+ VG_CONTRACT_FRAME_BY(512) \
+ : /*out*/ "=r" (_res) \
+ : /*in*/ "r" (&_argvec[2]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_8W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \
+ arg7,arg8) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[3+8]; \
+ volatile unsigned long _res; \
+ /* _argvec[0] holds current r2 across the call */ \
+ _argvec[1] = (unsigned long)_orig.r2; \
+ _argvec[2] = (unsigned long)_orig.nraddr; \
+ _argvec[2+1] = (unsigned long)arg1; \
+ _argvec[2+2] = (unsigned long)arg2; \
+ _argvec[2+3] = (unsigned long)arg3; \
+ _argvec[2+4] = (unsigned long)arg4; \
+ _argvec[2+5] = (unsigned long)arg5; \
+ _argvec[2+6] = (unsigned long)arg6; \
+ _argvec[2+7] = (unsigned long)arg7; \
+ _argvec[2+8] = (unsigned long)arg8; \
+ __asm__ volatile( \
+ "mr 11,%1\n\t" \
+ VG_EXPAND_FRAME_BY_trashes_r3(512) \
+ "stw 2,-8(11)\n\t" /* save tocptr */ \
+ "lwz 2,-4(11)\n\t" /* use nraddr's tocptr */ \
+ "lwz 3, 4(11)\n\t" /* arg1->r3 */ \
+ "lwz 4, 8(11)\n\t" /* arg2->r4 */ \
+ "lwz 5, 12(11)\n\t" /* arg3->r5 */ \
+ "lwz 6, 16(11)\n\t" /* arg4->r6 */ \
+ "lwz 7, 20(11)\n\t" /* arg5->r7 */ \
+ "lwz 8, 24(11)\n\t" /* arg6->r8 */ \
+ "lwz 9, 28(11)\n\t" /* arg7->r9 */ \
+ "lwz 10, 32(11)\n\t" /* arg8->r10 */ \
+ "lwz 11, 0(11)\n\t" /* target->r11 */ \
+ VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
+ "mr 11,%1\n\t" \
+ "mr %0,3\n\t" \
+ "lwz 2,-8(11)\n\t" /* restore tocptr */ \
+ VG_CONTRACT_FRAME_BY(512) \
+ : /*out*/ "=r" (_res) \
+ : /*in*/ "r" (&_argvec[2]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_9W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \
+ arg7,arg8,arg9) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[3+9]; \
+ volatile unsigned long _res; \
+ /* _argvec[0] holds current r2 across the call */ \
+ _argvec[1] = (unsigned long)_orig.r2; \
+ _argvec[2] = (unsigned long)_orig.nraddr; \
+ _argvec[2+1] = (unsigned long)arg1; \
+ _argvec[2+2] = (unsigned long)arg2; \
+ _argvec[2+3] = (unsigned long)arg3; \
+ _argvec[2+4] = (unsigned long)arg4; \
+ _argvec[2+5] = (unsigned long)arg5; \
+ _argvec[2+6] = (unsigned long)arg6; \
+ _argvec[2+7] = (unsigned long)arg7; \
+ _argvec[2+8] = (unsigned long)arg8; \
+ _argvec[2+9] = (unsigned long)arg9; \
+ __asm__ volatile( \
+ "mr 11,%1\n\t" \
+ VG_EXPAND_FRAME_BY_trashes_r3(512) \
+ "stw 2,-8(11)\n\t" /* save tocptr */ \
+ "lwz 2,-4(11)\n\t" /* use nraddr's tocptr */ \
+ VG_EXPAND_FRAME_BY_trashes_r3(64) \
+ /* arg9 */ \
+ "lwz 3,36(11)\n\t" \
+ "stw 3,56(1)\n\t" \
+ /* args1-8 */ \
+ "lwz 3, 4(11)\n\t" /* arg1->r3 */ \
+ "lwz 4, 8(11)\n\t" /* arg2->r4 */ \
+ "lwz 5, 12(11)\n\t" /* arg3->r5 */ \
+ "lwz 6, 16(11)\n\t" /* arg4->r6 */ \
+ "lwz 7, 20(11)\n\t" /* arg5->r7 */ \
+ "lwz 8, 24(11)\n\t" /* arg6->r8 */ \
+ "lwz 9, 28(11)\n\t" /* arg7->r9 */ \
+ "lwz 10, 32(11)\n\t" /* arg8->r10 */ \
+ "lwz 11, 0(11)\n\t" /* target->r11 */ \
+ VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
+ "mr 11,%1\n\t" \
+ "mr %0,3\n\t" \
+ "lwz 2,-8(11)\n\t" /* restore tocptr */ \
+ VG_CONTRACT_FRAME_BY(64) \
+ VG_CONTRACT_FRAME_BY(512) \
+ : /*out*/ "=r" (_res) \
+ : /*in*/ "r" (&_argvec[2]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_10W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \
+ arg7,arg8,arg9,arg10) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[3+10]; \
+ volatile unsigned long _res; \
+ /* _argvec[0] holds current r2 across the call */ \
+ _argvec[1] = (unsigned long)_orig.r2; \
+ _argvec[2] = (unsigned long)_orig.nraddr; \
+ _argvec[2+1] = (unsigned long)arg1; \
+ _argvec[2+2] = (unsigned long)arg2; \
+ _argvec[2+3] = (unsigned long)arg3; \
+ _argvec[2+4] = (unsigned long)arg4; \
+ _argvec[2+5] = (unsigned long)arg5; \
+ _argvec[2+6] = (unsigned long)arg6; \
+ _argvec[2+7] = (unsigned long)arg7; \
+ _argvec[2+8] = (unsigned long)arg8; \
+ _argvec[2+9] = (unsigned long)arg9; \
+ _argvec[2+10] = (unsigned long)arg10; \
+ __asm__ volatile( \
+ "mr 11,%1\n\t" \
+ VG_EXPAND_FRAME_BY_trashes_r3(512) \
+ "stw 2,-8(11)\n\t" /* save tocptr */ \
+ "lwz 2,-4(11)\n\t" /* use nraddr's tocptr */ \
+ VG_EXPAND_FRAME_BY_trashes_r3(64) \
+ /* arg10 */ \
+ "lwz 3,40(11)\n\t" \
+ "stw 3,60(1)\n\t" \
+ /* arg9 */ \
+ "lwz 3,36(11)\n\t" \
+ "stw 3,56(1)\n\t" \
+ /* args1-8 */ \
+ "lwz 3, 4(11)\n\t" /* arg1->r3 */ \
+ "lwz 4, 8(11)\n\t" /* arg2->r4 */ \
+ "lwz 5, 12(11)\n\t" /* arg3->r5 */ \
+ "lwz 6, 16(11)\n\t" /* arg4->r6 */ \
+ "lwz 7, 20(11)\n\t" /* arg5->r7 */ \
+ "lwz 8, 24(11)\n\t" /* arg6->r8 */ \
+ "lwz 9, 28(11)\n\t" /* arg7->r9 */ \
+ "lwz 10, 32(11)\n\t" /* arg8->r10 */ \
+ "lwz 11, 0(11)\n\t" /* target->r11 */ \
+ VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
+ "mr 11,%1\n\t" \
+ "mr %0,3\n\t" \
+ "lwz 2,-8(11)\n\t" /* restore tocptr */ \
+ VG_CONTRACT_FRAME_BY(64) \
+ VG_CONTRACT_FRAME_BY(512) \
+ : /*out*/ "=r" (_res) \
+ : /*in*/ "r" (&_argvec[2]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_11W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \
+ arg7,arg8,arg9,arg10,arg11) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[3+11]; \
+ volatile unsigned long _res; \
+ /* _argvec[0] holds current r2 across the call */ \
+ _argvec[1] = (unsigned long)_orig.r2; \
+ _argvec[2] = (unsigned long)_orig.nraddr; \
+ _argvec[2+1] = (unsigned long)arg1; \
+ _argvec[2+2] = (unsigned long)arg2; \
+ _argvec[2+3] = (unsigned long)arg3; \
+ _argvec[2+4] = (unsigned long)arg4; \
+ _argvec[2+5] = (unsigned long)arg5; \
+ _argvec[2+6] = (unsigned long)arg6; \
+ _argvec[2+7] = (unsigned long)arg7; \
+ _argvec[2+8] = (unsigned long)arg8; \
+ _argvec[2+9] = (unsigned long)arg9; \
+ _argvec[2+10] = (unsigned long)arg10; \
+ _argvec[2+11] = (unsigned long)arg11; \
+ __asm__ volatile( \
+ "mr 11,%1\n\t" \
+ VG_EXPAND_FRAME_BY_trashes_r3(512) \
+ "stw 2,-8(11)\n\t" /* save tocptr */ \
+ "lwz 2,-4(11)\n\t" /* use nraddr's tocptr */ \
+ VG_EXPAND_FRAME_BY_trashes_r3(72) \
+ /* arg11 */ \
+ "lwz 3,44(11)\n\t" \
+ "stw 3,64(1)\n\t" \
+ /* arg10 */ \
+ "lwz 3,40(11)\n\t" \
+ "stw 3,60(1)\n\t" \
+ /* arg9 */ \
+ "lwz 3,36(11)\n\t" \
+ "stw 3,56(1)\n\t" \
+ /* args1-8 */ \
+ "lwz 3, 4(11)\n\t" /* arg1->r3 */ \
+ "lwz 4, 8(11)\n\t" /* arg2->r4 */ \
+ "lwz 5, 12(11)\n\t" /* arg3->r5 */ \
+ "lwz 6, 16(11)\n\t" /* arg4->r6 */ \
+ "lwz 7, 20(11)\n\t" /* arg5->r7 */ \
+ "lwz 8, 24(11)\n\t" /* arg6->r8 */ \
+ "lwz 9, 28(11)\n\t" /* arg7->r9 */ \
+ "lwz 10, 32(11)\n\t" /* arg8->r10 */ \
+ "lwz 11, 0(11)\n\t" /* target->r11 */ \
+ VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
+ "mr 11,%1\n\t" \
+ "mr %0,3\n\t" \
+ "lwz 2,-8(11)\n\t" /* restore tocptr */ \
+ VG_CONTRACT_FRAME_BY(72) \
+ VG_CONTRACT_FRAME_BY(512) \
+ : /*out*/ "=r" (_res) \
+ : /*in*/ "r" (&_argvec[2]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_12W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \
+ arg7,arg8,arg9,arg10,arg11,arg12) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[3+12]; \
+ volatile unsigned long _res; \
+ /* _argvec[0] holds current r2 across the call */ \
+ _argvec[1] = (unsigned long)_orig.r2; \
+ _argvec[2] = (unsigned long)_orig.nraddr; \
+ _argvec[2+1] = (unsigned long)arg1; \
+ _argvec[2+2] = (unsigned long)arg2; \
+ _argvec[2+3] = (unsigned long)arg3; \
+ _argvec[2+4] = (unsigned long)arg4; \
+ _argvec[2+5] = (unsigned long)arg5; \
+ _argvec[2+6] = (unsigned long)arg6; \
+ _argvec[2+7] = (unsigned long)arg7; \
+ _argvec[2+8] = (unsigned long)arg8; \
+ _argvec[2+9] = (unsigned long)arg9; \
+ _argvec[2+10] = (unsigned long)arg10; \
+ _argvec[2+11] = (unsigned long)arg11; \
+ _argvec[2+12] = (unsigned long)arg12; \
+ __asm__ volatile( \
+ "mr 11,%1\n\t" \
+ VG_EXPAND_FRAME_BY_trashes_r3(512) \
+ "stw 2,-8(11)\n\t" /* save tocptr */ \
+ "lwz 2,-4(11)\n\t" /* use nraddr's tocptr */ \
+ VG_EXPAND_FRAME_BY_trashes_r3(72) \
+ /* arg12 */ \
+ "lwz 3,48(11)\n\t" \
+ "stw 3,68(1)\n\t" \
+ /* arg11 */ \
+ "lwz 3,44(11)\n\t" \
+ "stw 3,64(1)\n\t" \
+ /* arg10 */ \
+ "lwz 3,40(11)\n\t" \
+ "stw 3,60(1)\n\t" \
+ /* arg9 */ \
+ "lwz 3,36(11)\n\t" \
+ "stw 3,56(1)\n\t" \
+ /* args1-8 */ \
+ "lwz 3, 4(11)\n\t" /* arg1->r3 */ \
+ "lwz 4, 8(11)\n\t" /* arg2->r4 */ \
+ "lwz 5, 12(11)\n\t" /* arg3->r5 */ \
+ "lwz 6, 16(11)\n\t" /* arg4->r6 */ \
+ "lwz 7, 20(11)\n\t" /* arg5->r7 */ \
+ "lwz 8, 24(11)\n\t" /* arg6->r8 */ \
+ "lwz 9, 28(11)\n\t" /* arg7->r9 */ \
+ "lwz 10, 32(11)\n\t" /* arg8->r10 */ \
+ "lwz 11, 0(11)\n\t" /* target->r11 */ \
+ VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
+ "mr 11,%1\n\t" \
+ "mr %0,3\n\t" \
+ "lwz 2,-8(11)\n\t" /* restore tocptr */ \
+ VG_CONTRACT_FRAME_BY(72) \
+ VG_CONTRACT_FRAME_BY(512) \
+ : /*out*/ "=r" (_res) \
+ : /*in*/ "r" (&_argvec[2]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#endif /* PLAT_ppc32_aix5 */
+
+/* ------------------------ ppc64-aix5 ------------------------- */
+
+#if defined(PLAT_ppc64_aix5)
+
+/* ARGREGS: r3 r4 r5 r6 r7 r8 r9 r10 (the rest on stack somewhere) */
+
+/* These regs are trashed by the hidden call. */
+#define __CALLER_SAVED_REGS \
+ "lr", "ctr", "xer", \
+ "cr0", "cr1", "cr2", "cr3", "cr4", "cr5", "cr6", "cr7", \
+ "r0", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", \
+ "r11", "r12", "r13"
+
+/* Expand the stack frame, copying enough info that unwinding
+ still works. Trashes r3. */
+
+#define VG_EXPAND_FRAME_BY_trashes_r3(_n_fr) \
+ "addi 1,1,-" #_n_fr "\n\t" \
+ "ld 3," #_n_fr "(1)\n\t" \
+ "std 3,0(1)\n\t"
+
+#define VG_CONTRACT_FRAME_BY(_n_fr) \
+ "addi 1,1," #_n_fr "\n\t"
+
+/* These CALL_FN_ macros assume that on ppc64-aix5, sizeof(unsigned
+ long) == 8. */
+
+#define CALL_FN_W_v(lval, orig) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[3+0]; \
+ volatile unsigned long _res; \
+ /* _argvec[0] holds current r2 across the call */ \
+ _argvec[1] = (unsigned long)_orig.r2; \
+ _argvec[2] = (unsigned long)_orig.nraddr; \
+ __asm__ volatile( \
+ "mr 11,%1\n\t" \
+ VG_EXPAND_FRAME_BY_trashes_r3(512) \
+ "std 2,-16(11)\n\t" /* save tocptr */ \
+ "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \
+ "ld 11, 0(11)\n\t" /* target->r11 */ \
+ VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
+ "mr 11,%1\n\t" \
+ "mr %0,3\n\t" \
+ "ld 2,-16(11)\n\t" /* restore tocptr */ \
+ VG_CONTRACT_FRAME_BY(512) \
+ : /*out*/ "=r" (_res) \
+ : /*in*/ "r" (&_argvec[2]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_W(lval, orig, arg1) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[3+1]; \
+ volatile unsigned long _res; \
+ /* _argvec[0] holds current r2 across the call */ \
+ _argvec[1] = (unsigned long)_orig.r2; \
+ _argvec[2] = (unsigned long)_orig.nraddr; \
+ _argvec[2+1] = (unsigned long)arg1; \
+ __asm__ volatile( \
+ "mr 11,%1\n\t" \
+ VG_EXPAND_FRAME_BY_trashes_r3(512) \
+ "std 2,-16(11)\n\t" /* save tocptr */ \
+ "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \
+ "ld 3, 8(11)\n\t" /* arg1->r3 */ \
+ "ld 11, 0(11)\n\t" /* target->r11 */ \
+ VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
+ "mr 11,%1\n\t" \
+ "mr %0,3\n\t" \
+ "ld 2,-16(11)\n\t" /* restore tocptr */ \
+ VG_CONTRACT_FRAME_BY(512) \
+ : /*out*/ "=r" (_res) \
+ : /*in*/ "r" (&_argvec[2]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_WW(lval, orig, arg1,arg2) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[3+2]; \
+ volatile unsigned long _res; \
+ /* _argvec[0] holds current r2 across the call */ \
+ _argvec[1] = (unsigned long)_orig.r2; \
+ _argvec[2] = (unsigned long)_orig.nraddr; \
+ _argvec[2+1] = (unsigned long)arg1; \
+ _argvec[2+2] = (unsigned long)arg2; \
+ __asm__ volatile( \
+ "mr 11,%1\n\t" \
+ VG_EXPAND_FRAME_BY_trashes_r3(512) \
+ "std 2,-16(11)\n\t" /* save tocptr */ \
+ "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \
+ "ld 3, 8(11)\n\t" /* arg1->r3 */ \
+ "ld 4, 16(11)\n\t" /* arg2->r4 */ \
+ "ld 11, 0(11)\n\t" /* target->r11 */ \
+ VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
+ "mr 11,%1\n\t" \
+ "mr %0,3\n\t" \
+ "ld 2,-16(11)\n\t" /* restore tocptr */ \
+ VG_CONTRACT_FRAME_BY(512) \
+ : /*out*/ "=r" (_res) \
+ : /*in*/ "r" (&_argvec[2]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_WWW(lval, orig, arg1,arg2,arg3) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[3+3]; \
+ volatile unsigned long _res; \
+ /* _argvec[0] holds current r2 across the call */ \
+ _argvec[1] = (unsigned long)_orig.r2; \
+ _argvec[2] = (unsigned long)_orig.nraddr; \
+ _argvec[2+1] = (unsigned long)arg1; \
+ _argvec[2+2] = (unsigned long)arg2; \
+ _argvec[2+3] = (unsigned long)arg3; \
+ __asm__ volatile( \
+ "mr 11,%1\n\t" \
+ VG_EXPAND_FRAME_BY_trashes_r3(512) \
+ "std 2,-16(11)\n\t" /* save tocptr */ \
+ "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \
+ "ld 3, 8(11)\n\t" /* arg1->r3 */ \
+ "ld 4, 16(11)\n\t" /* arg2->r4 */ \
+ "ld 5, 24(11)\n\t" /* arg3->r5 */ \
+ "ld 11, 0(11)\n\t" /* target->r11 */ \
+ VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
+ "mr 11,%1\n\t" \
+ "mr %0,3\n\t" \
+ "ld 2,-16(11)\n\t" /* restore tocptr */ \
+ VG_CONTRACT_FRAME_BY(512) \
+ : /*out*/ "=r" (_res) \
+ : /*in*/ "r" (&_argvec[2]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_WWWW(lval, orig, arg1,arg2,arg3,arg4) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[3+4]; \
+ volatile unsigned long _res; \
+ /* _argvec[0] holds current r2 across the call */ \
+ _argvec[1] = (unsigned long)_orig.r2; \
+ _argvec[2] = (unsigned long)_orig.nraddr; \
+ _argvec[2+1] = (unsigned long)arg1; \
+ _argvec[2+2] = (unsigned long)arg2; \
+ _argvec[2+3] = (unsigned long)arg3; \
+ _argvec[2+4] = (unsigned long)arg4; \
+ __asm__ volatile( \
+ "mr 11,%1\n\t" \
+ VG_EXPAND_FRAME_BY_trashes_r3(512) \
+ "std 2,-16(11)\n\t" /* save tocptr */ \
+ "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \
+ "ld 3, 8(11)\n\t" /* arg1->r3 */ \
+ "ld 4, 16(11)\n\t" /* arg2->r4 */ \
+ "ld 5, 24(11)\n\t" /* arg3->r5 */ \
+ "ld 6, 32(11)\n\t" /* arg4->r6 */ \
+ "ld 11, 0(11)\n\t" /* target->r11 */ \
+ VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
+ "mr 11,%1\n\t" \
+ "mr %0,3\n\t" \
+ "ld 2,-16(11)\n\t" /* restore tocptr */ \
+ VG_CONTRACT_FRAME_BY(512) \
+ : /*out*/ "=r" (_res) \
+ : /*in*/ "r" (&_argvec[2]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_5W(lval, orig, arg1,arg2,arg3,arg4,arg5) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[3+5]; \
+ volatile unsigned long _res; \
+ /* _argvec[0] holds current r2 across the call */ \
+ _argvec[1] = (unsigned long)_orig.r2; \
+ _argvec[2] = (unsigned long)_orig.nraddr; \
+ _argvec[2+1] = (unsigned long)arg1; \
+ _argvec[2+2] = (unsigned long)arg2; \
+ _argvec[2+3] = (unsigned long)arg3; \
+ _argvec[2+4] = (unsigned long)arg4; \
+ _argvec[2+5] = (unsigned long)arg5; \
+ __asm__ volatile( \
+ "mr 11,%1\n\t" \
+ VG_EXPAND_FRAME_BY_trashes_r3(512) \
+ "std 2,-16(11)\n\t" /* save tocptr */ \
+ "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \
+ "ld 3, 8(11)\n\t" /* arg1->r3 */ \
+ "ld 4, 16(11)\n\t" /* arg2->r4 */ \
+ "ld 5, 24(11)\n\t" /* arg3->r5 */ \
+ "ld 6, 32(11)\n\t" /* arg4->r6 */ \
+ "ld 7, 40(11)\n\t" /* arg5->r7 */ \
+ "ld 11, 0(11)\n\t" /* target->r11 */ \
+ VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
+ "mr 11,%1\n\t" \
+ "mr %0,3\n\t" \
+ "ld 2,-16(11)\n\t" /* restore tocptr */ \
+ VG_CONTRACT_FRAME_BY(512) \
+ : /*out*/ "=r" (_res) \
+ : /*in*/ "r" (&_argvec[2]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_6W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[3+6]; \
+ volatile unsigned long _res; \
+ /* _argvec[0] holds current r2 across the call */ \
+ _argvec[1] = (unsigned long)_orig.r2; \
+ _argvec[2] = (unsigned long)_orig.nraddr; \
+ _argvec[2+1] = (unsigned long)arg1; \
+ _argvec[2+2] = (unsigned long)arg2; \
+ _argvec[2+3] = (unsigned long)arg3; \
+ _argvec[2+4] = (unsigned long)arg4; \
+ _argvec[2+5] = (unsigned long)arg5; \
+ _argvec[2+6] = (unsigned long)arg6; \
+ __asm__ volatile( \
+ "mr 11,%1\n\t" \
+ VG_EXPAND_FRAME_BY_trashes_r3(512) \
+ "std 2,-16(11)\n\t" /* save tocptr */ \
+ "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \
+ "ld 3, 8(11)\n\t" /* arg1->r3 */ \
+ "ld 4, 16(11)\n\t" /* arg2->r4 */ \
+ "ld 5, 24(11)\n\t" /* arg3->r5 */ \
+ "ld 6, 32(11)\n\t" /* arg4->r6 */ \
+ "ld 7, 40(11)\n\t" /* arg5->r7 */ \
+ "ld 8, 48(11)\n\t" /* arg6->r8 */ \
+ "ld 11, 0(11)\n\t" /* target->r11 */ \
+ VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
+ "mr 11,%1\n\t" \
+ "mr %0,3\n\t" \
+ "ld 2,-16(11)\n\t" /* restore tocptr */ \
+ VG_CONTRACT_FRAME_BY(512) \
+ : /*out*/ "=r" (_res) \
+ : /*in*/ "r" (&_argvec[2]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_7W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \
+ arg7) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[3+7]; \
+ volatile unsigned long _res; \
+ /* _argvec[0] holds current r2 across the call */ \
+ _argvec[1] = (unsigned long)_orig.r2; \
+ _argvec[2] = (unsigned long)_orig.nraddr; \
+ _argvec[2+1] = (unsigned long)arg1; \
+ _argvec[2+2] = (unsigned long)arg2; \
+ _argvec[2+3] = (unsigned long)arg3; \
+ _argvec[2+4] = (unsigned long)arg4; \
+ _argvec[2+5] = (unsigned long)arg5; \
+ _argvec[2+6] = (unsigned long)arg6; \
+ _argvec[2+7] = (unsigned long)arg7; \
+ __asm__ volatile( \
+ "mr 11,%1\n\t" \
+ VG_EXPAND_FRAME_BY_trashes_r3(512) \
+ "std 2,-16(11)\n\t" /* save tocptr */ \
+ "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \
+ "ld 3, 8(11)\n\t" /* arg1->r3 */ \
+ "ld 4, 16(11)\n\t" /* arg2->r4 */ \
+ "ld 5, 24(11)\n\t" /* arg3->r5 */ \
+ "ld 6, 32(11)\n\t" /* arg4->r6 */ \
+ "ld 7, 40(11)\n\t" /* arg5->r7 */ \
+ "ld 8, 48(11)\n\t" /* arg6->r8 */ \
+ "ld 9, 56(11)\n\t" /* arg7->r9 */ \
+ "ld 11, 0(11)\n\t" /* target->r11 */ \
+ VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
+ "mr 11,%1\n\t" \
+ "mr %0,3\n\t" \
+ "ld 2,-16(11)\n\t" /* restore tocptr */ \
+ VG_CONTRACT_FRAME_BY(512) \
+ : /*out*/ "=r" (_res) \
+ : /*in*/ "r" (&_argvec[2]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_8W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \
+ arg7,arg8) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[3+8]; \
+ volatile unsigned long _res; \
+ /* _argvec[0] holds current r2 across the call */ \
+ _argvec[1] = (unsigned long)_orig.r2; \
+ _argvec[2] = (unsigned long)_orig.nraddr; \
+ _argvec[2+1] = (unsigned long)arg1; \
+ _argvec[2+2] = (unsigned long)arg2; \
+ _argvec[2+3] = (unsigned long)arg3; \
+ _argvec[2+4] = (unsigned long)arg4; \
+ _argvec[2+5] = (unsigned long)arg5; \
+ _argvec[2+6] = (unsigned long)arg6; \
+ _argvec[2+7] = (unsigned long)arg7; \
+ _argvec[2+8] = (unsigned long)arg8; \
+ __asm__ volatile( \
+ "mr 11,%1\n\t" \
+ VG_EXPAND_FRAME_BY_trashes_r3(512) \
+ "std 2,-16(11)\n\t" /* save tocptr */ \
+ "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \
+ "ld 3, 8(11)\n\t" /* arg1->r3 */ \
+ "ld 4, 16(11)\n\t" /* arg2->r4 */ \
+ "ld 5, 24(11)\n\t" /* arg3->r5 */ \
+ "ld 6, 32(11)\n\t" /* arg4->r6 */ \
+ "ld 7, 40(11)\n\t" /* arg5->r7 */ \
+ "ld 8, 48(11)\n\t" /* arg6->r8 */ \
+ "ld 9, 56(11)\n\t" /* arg7->r9 */ \
+ "ld 10, 64(11)\n\t" /* arg8->r10 */ \
+ "ld 11, 0(11)\n\t" /* target->r11 */ \
+ VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
+ "mr 11,%1\n\t" \
+ "mr %0,3\n\t" \
+ "ld 2,-16(11)\n\t" /* restore tocptr */ \
+ VG_CONTRACT_FRAME_BY(512) \
+ : /*out*/ "=r" (_res) \
+ : /*in*/ "r" (&_argvec[2]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_9W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \
+ arg7,arg8,arg9) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[3+9]; \
+ volatile unsigned long _res; \
+ /* _argvec[0] holds current r2 across the call */ \
+ _argvec[1] = (unsigned long)_orig.r2; \
+ _argvec[2] = (unsigned long)_orig.nraddr; \
+ _argvec[2+1] = (unsigned long)arg1; \
+ _argvec[2+2] = (unsigned long)arg2; \
+ _argvec[2+3] = (unsigned long)arg3; \
+ _argvec[2+4] = (unsigned long)arg4; \
+ _argvec[2+5] = (unsigned long)arg5; \
+ _argvec[2+6] = (unsigned long)arg6; \
+ _argvec[2+7] = (unsigned long)arg7; \
+ _argvec[2+8] = (unsigned long)arg8; \
+ _argvec[2+9] = (unsigned long)arg9; \
+ __asm__ volatile( \
+ "mr 11,%1\n\t" \
+ VG_EXPAND_FRAME_BY_trashes_r3(512) \
+ "std 2,-16(11)\n\t" /* save tocptr */ \
+ "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \
+ VG_EXPAND_FRAME_BY_trashes_r3(128) \
+ /* arg9 */ \
+ "ld 3,72(11)\n\t" \
+ "std 3,112(1)\n\t" \
+ /* args1-8 */ \
+ "ld 3, 8(11)\n\t" /* arg1->r3 */ \
+ "ld 4, 16(11)\n\t" /* arg2->r4 */ \
+ "ld 5, 24(11)\n\t" /* arg3->r5 */ \
+ "ld 6, 32(11)\n\t" /* arg4->r6 */ \
+ "ld 7, 40(11)\n\t" /* arg5->r7 */ \
+ "ld 8, 48(11)\n\t" /* arg6->r8 */ \
+ "ld 9, 56(11)\n\t" /* arg7->r9 */ \
+ "ld 10, 64(11)\n\t" /* arg8->r10 */ \
+ "ld 11, 0(11)\n\t" /* target->r11 */ \
+ VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
+ "mr 11,%1\n\t" \
+ "mr %0,3\n\t" \
+ "ld 2,-16(11)\n\t" /* restore tocptr */ \
+ VG_CONTRACT_FRAME_BY(128) \
+ VG_CONTRACT_FRAME_BY(512) \
+ : /*out*/ "=r" (_res) \
+ : /*in*/ "r" (&_argvec[2]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_10W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \
+ arg7,arg8,arg9,arg10) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[3+10]; \
+ volatile unsigned long _res; \
+ /* _argvec[0] holds current r2 across the call */ \
+ _argvec[1] = (unsigned long)_orig.r2; \
+ _argvec[2] = (unsigned long)_orig.nraddr; \
+ _argvec[2+1] = (unsigned long)arg1; \
+ _argvec[2+2] = (unsigned long)arg2; \
+ _argvec[2+3] = (unsigned long)arg3; \
+ _argvec[2+4] = (unsigned long)arg4; \
+ _argvec[2+5] = (unsigned long)arg5; \
+ _argvec[2+6] = (unsigned long)arg6; \
+ _argvec[2+7] = (unsigned long)arg7; \
+ _argvec[2+8] = (unsigned long)arg8; \
+ _argvec[2+9] = (unsigned long)arg9; \
+ _argvec[2+10] = (unsigned long)arg10; \
+ __asm__ volatile( \
+ "mr 11,%1\n\t" \
+ VG_EXPAND_FRAME_BY_trashes_r3(512) \
+ "std 2,-16(11)\n\t" /* save tocptr */ \
+ "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \
+ VG_EXPAND_FRAME_BY_trashes_r3(128) \
+ /* arg10 */ \
+ "ld 3,80(11)\n\t" \
+ "std 3,120(1)\n\t" \
+ /* arg9 */ \
+ "ld 3,72(11)\n\t" \
+ "std 3,112(1)\n\t" \
+ /* args1-8 */ \
+ "ld 3, 8(11)\n\t" /* arg1->r3 */ \
+ "ld 4, 16(11)\n\t" /* arg2->r4 */ \
+ "ld 5, 24(11)\n\t" /* arg3->r5 */ \
+ "ld 6, 32(11)\n\t" /* arg4->r6 */ \
+ "ld 7, 40(11)\n\t" /* arg5->r7 */ \
+ "ld 8, 48(11)\n\t" /* arg6->r8 */ \
+ "ld 9, 56(11)\n\t" /* arg7->r9 */ \
+ "ld 10, 64(11)\n\t" /* arg8->r10 */ \
+ "ld 11, 0(11)\n\t" /* target->r11 */ \
+ VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
+ "mr 11,%1\n\t" \
+ "mr %0,3\n\t" \
+ "ld 2,-16(11)\n\t" /* restore tocptr */ \
+ VG_CONTRACT_FRAME_BY(128) \
+ VG_CONTRACT_FRAME_BY(512) \
+ : /*out*/ "=r" (_res) \
+ : /*in*/ "r" (&_argvec[2]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_11W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \
+ arg7,arg8,arg9,arg10,arg11) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[3+11]; \
+ volatile unsigned long _res; \
+ /* _argvec[0] holds current r2 across the call */ \
+ _argvec[1] = (unsigned long)_orig.r2; \
+ _argvec[2] = (unsigned long)_orig.nraddr; \
+ _argvec[2+1] = (unsigned long)arg1; \
+ _argvec[2+2] = (unsigned long)arg2; \
+ _argvec[2+3] = (unsigned long)arg3; \
+ _argvec[2+4] = (unsigned long)arg4; \
+ _argvec[2+5] = (unsigned long)arg5; \
+ _argvec[2+6] = (unsigned long)arg6; \
+ _argvec[2+7] = (unsigned long)arg7; \
+ _argvec[2+8] = (unsigned long)arg8; \
+ _argvec[2+9] = (unsigned long)arg9; \
+ _argvec[2+10] = (unsigned long)arg10; \
+ _argvec[2+11] = (unsigned long)arg11; \
+ __asm__ volatile( \
+ "mr 11,%1\n\t" \
+ VG_EXPAND_FRAME_BY_trashes_r3(512) \
+ "std 2,-16(11)\n\t" /* save tocptr */ \
+ "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \
+ VG_EXPAND_FRAME_BY_trashes_r3(144) \
+ /* arg11 */ \
+ "ld 3,88(11)\n\t" \
+ "std 3,128(1)\n\t" \
+ /* arg10 */ \
+ "ld 3,80(11)\n\t" \
+ "std 3,120(1)\n\t" \
+ /* arg9 */ \
+ "ld 3,72(11)\n\t" \
+ "std 3,112(1)\n\t" \
+ /* args1-8 */ \
+ "ld 3, 8(11)\n\t" /* arg1->r3 */ \
+ "ld 4, 16(11)\n\t" /* arg2->r4 */ \
+ "ld 5, 24(11)\n\t" /* arg3->r5 */ \
+ "ld 6, 32(11)\n\t" /* arg4->r6 */ \
+ "ld 7, 40(11)\n\t" /* arg5->r7 */ \
+ "ld 8, 48(11)\n\t" /* arg6->r8 */ \
+ "ld 9, 56(11)\n\t" /* arg7->r9 */ \
+ "ld 10, 64(11)\n\t" /* arg8->r10 */ \
+ "ld 11, 0(11)\n\t" /* target->r11 */ \
+ VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
+ "mr 11,%1\n\t" \
+ "mr %0,3\n\t" \
+ "ld 2,-16(11)\n\t" /* restore tocptr */ \
+ VG_CONTRACT_FRAME_BY(144) \
+ VG_CONTRACT_FRAME_BY(512) \
+ : /*out*/ "=r" (_res) \
+ : /*in*/ "r" (&_argvec[2]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#define CALL_FN_W_12W(lval, orig, arg1,arg2,arg3,arg4,arg5,arg6, \
+ arg7,arg8,arg9,arg10,arg11,arg12) \
+ do { \
+ volatile OrigFn _orig = (orig); \
+ volatile unsigned long _argvec[3+12]; \
+ volatile unsigned long _res; \
+ /* _argvec[0] holds current r2 across the call */ \
+ _argvec[1] = (unsigned long)_orig.r2; \
+ _argvec[2] = (unsigned long)_orig.nraddr; \
+ _argvec[2+1] = (unsigned long)arg1; \
+ _argvec[2+2] = (unsigned long)arg2; \
+ _argvec[2+3] = (unsigned long)arg3; \
+ _argvec[2+4] = (unsigned long)arg4; \
+ _argvec[2+5] = (unsigned long)arg5; \
+ _argvec[2+6] = (unsigned long)arg6; \
+ _argvec[2+7] = (unsigned long)arg7; \
+ _argvec[2+8] = (unsigned long)arg8; \
+ _argvec[2+9] = (unsigned long)arg9; \
+ _argvec[2+10] = (unsigned long)arg10; \
+ _argvec[2+11] = (unsigned long)arg11; \
+ _argvec[2+12] = (unsigned long)arg12; \
+ __asm__ volatile( \
+ "mr 11,%1\n\t" \
+ VG_EXPAND_FRAME_BY_trashes_r3(512) \
+ "std 2,-16(11)\n\t" /* save tocptr */ \
+ "ld 2,-8(11)\n\t" /* use nraddr's tocptr */ \
+ VG_EXPAND_FRAME_BY_trashes_r3(144) \
+ /* arg12 */ \
+ "ld 3,96(11)\n\t" \
+ "std 3,136(1)\n\t" \
+ /* arg11 */ \
+ "ld 3,88(11)\n\t" \
+ "std 3,128(1)\n\t" \
+ /* arg10 */ \
+ "ld 3,80(11)\n\t" \
+ "std 3,120(1)\n\t" \
+ /* arg9 */ \
+ "ld 3,72(11)\n\t" \
+ "std 3,112(1)\n\t" \
+ /* args1-8 */ \
+ "ld 3, 8(11)\n\t" /* arg1->r3 */ \
+ "ld 4, 16(11)\n\t" /* arg2->r4 */ \
+ "ld 5, 24(11)\n\t" /* arg3->r5 */ \
+ "ld 6, 32(11)\n\t" /* arg4->r6 */ \
+ "ld 7, 40(11)\n\t" /* arg5->r7 */ \
+ "ld 8, 48(11)\n\t" /* arg6->r8 */ \
+ "ld 9, 56(11)\n\t" /* arg7->r9 */ \
+ "ld 10, 64(11)\n\t" /* arg8->r10 */ \
+ "ld 11, 0(11)\n\t" /* target->r11 */ \
+ VALGRIND_BRANCH_AND_LINK_TO_NOREDIR_R11 \
+ "mr 11,%1\n\t" \
+ "mr %0,3\n\t" \
+ "ld 2,-16(11)\n\t" /* restore tocptr */ \
+ VG_CONTRACT_FRAME_BY(144) \
+ VG_CONTRACT_FRAME_BY(512) \
+ : /*out*/ "=r" (_res) \
+ : /*in*/ "r" (&_argvec[2]) \
+ : /*trash*/ "cc", "memory", __CALLER_SAVED_REGS \
+ ); \
+ lval = (__typeof__(lval)) _res; \
+ } while (0)
+
+#endif /* PLAT_ppc64_aix5 */
+
+
+/* ------------------------------------------------------------------ */
+/* ARCHITECTURE INDEPENDENT MACROS for CLIENT REQUESTS. */
+/* */
+/* ------------------------------------------------------------------ */
+
+/* Some request codes. There are many more of these, but most are not
+ exposed to end-user view. These are the public ones, all of the
+ form 0x1000 + small_number.
+
+ Core ones are in the range 0x00000000--0x0000ffff. The non-public
+ ones start at 0x2000.
+*/
+
+/* These macros are used by tools -- they must be public, but don't
+ embed them into other programs. */
+#define VG_USERREQ_TOOL_BASE(a,b) \
+ ((unsigned int)(((a)&0xff) << 24 | ((b)&0xff) << 16))
+#define VG_IS_TOOL_USERREQ(a, b, v) \
+ (VG_USERREQ_TOOL_BASE(a,b) == ((v) & 0xffff0000))
+
+/* !! ABIWARNING !! ABIWARNING !! ABIWARNING !! ABIWARNING !!
+ This enum comprises an ABI exported by Valgrind to programs
+ which use client requests. DO NOT CHANGE THE ORDER OF THESE
+ ENTRIES, NOR DELETE ANY -- add new ones at the end. */
+typedef
+ enum { VG_USERREQ__RUNNING_ON_VALGRIND = 0x1001,
+ VG_USERREQ__DISCARD_TRANSLATIONS = 0x1002,
+
+ /* These allow any function to be called from the simulated
+ CPU but run on the real CPU. Nb: the first arg passed to
+ the function is always the ThreadId of the running
+ thread! So CLIENT_CALL0 actually requires a 1 arg
+ function, etc. */
+ VG_USERREQ__CLIENT_CALL0 = 0x1101,
+ VG_USERREQ__CLIENT_CALL1 = 0x1102,
+ VG_USERREQ__CLIENT_CALL2 = 0x1103,
+ VG_USERREQ__CLIENT_CALL3 = 0x1104,
+
+ /* Can be useful in regression testing suites -- eg. can
+ send Valgrind's output to /dev/null and still count
+ errors. */
+ VG_USERREQ__COUNT_ERRORS = 0x1201,
+
+ /* These are useful and can be interpreted by any tool that
+ tracks malloc() et al, by using vg_replace_malloc.c. */
+ VG_USERREQ__MALLOCLIKE_BLOCK = 0x1301,
+ VG_USERREQ__FREELIKE_BLOCK = 0x1302,
+ /* Memory pool support. */
+ VG_USERREQ__CREATE_MEMPOOL = 0x1303,
+ VG_USERREQ__DESTROY_MEMPOOL = 0x1304,
+ VG_USERREQ__MEMPOOL_ALLOC = 0x1305,
+ VG_USERREQ__MEMPOOL_FREE = 0x1306,
+ VG_USERREQ__MEMPOOL_TRIM = 0x1307,
+ VG_USERREQ__MOVE_MEMPOOL = 0x1308,
+ VG_USERREQ__MEMPOOL_CHANGE = 0x1309,
+ VG_USERREQ__MEMPOOL_EXISTS = 0x130a,
+
+ /* Allow printfs to valgrind log. */
+ VG_USERREQ__PRINTF = 0x1401,
+ VG_USERREQ__PRINTF_BACKTRACE = 0x1402,
+
+ /* Stack support. */
+ VG_USERREQ__STACK_REGISTER = 0x1501,
+ VG_USERREQ__STACK_DEREGISTER = 0x1502,
+ VG_USERREQ__STACK_CHANGE = 0x1503
+ } Vg_ClientRequest;
+
+#if !defined(__GNUC__)
+# define __extension__ /* */
+#endif
+
+/* Returns the number of Valgrinds this code is running under. That
+ is, 0 if running natively, 1 if running under Valgrind, 2 if
+ running under Valgrind which is running under another Valgrind,
+ etc. */
+#define RUNNING_ON_VALGRIND __extension__ \
+ ({unsigned int _qzz_res; \
+ VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0 /* if not */, \
+ VG_USERREQ__RUNNING_ON_VALGRIND, \
+ 0, 0, 0, 0, 0); \
+ _qzz_res; \
+ })
+
+
+/* Discard translation of code in the range [_qzz_addr .. _qzz_addr +
+ _qzz_len - 1]. Useful if you are debugging a JITter or some such,
+ since it provides a way to make sure valgrind will retranslate the
+ invalidated area. Returns no value. */
+#define VALGRIND_DISCARD_TRANSLATIONS(_qzz_addr,_qzz_len) \
+ {unsigned int _qzz_res; \
+ VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \
+ VG_USERREQ__DISCARD_TRANSLATIONS, \
+ _qzz_addr, _qzz_len, 0, 0, 0); \
+ }
+
+
+/* These requests are for getting Valgrind itself to print something.
+ Possibly with a backtrace. This is a really ugly hack. */
+
+#if defined(NVALGRIND)
+
+# define VALGRIND_PRINTF(...)
+# define VALGRIND_PRINTF_BACKTRACE(...)
+
+#else /* NVALGRIND */
+
+/* Modern GCC will optimize the static routine out if unused,
+ and unused attribute will shut down warnings about it. */
+static int VALGRIND_PRINTF(const char *format, ...)
+ __attribute__((format(__printf__, 1, 2), __unused__));
+static int
+VALGRIND_PRINTF(const char *format, ...)
+{
+ unsigned long _qzz_res;
+ va_list vargs;
+ va_start(vargs, format);
+ VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, VG_USERREQ__PRINTF,
+ (unsigned long)format, (unsigned long)vargs,
+ 0, 0, 0);
+ va_end(vargs);
+ return (int)_qzz_res;
+}
+
+static int VALGRIND_PRINTF_BACKTRACE(const char *format, ...)
+ __attribute__((format(__printf__, 1, 2), __unused__));
+static int
+VALGRIND_PRINTF_BACKTRACE(const char *format, ...)
+{
+ unsigned long _qzz_res;
+ va_list vargs;
+ va_start(vargs, format);
+ VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, VG_USERREQ__PRINTF_BACKTRACE,
+ (unsigned long)format, (unsigned long)vargs,
+ 0, 0, 0);
+ va_end(vargs);
+ return (int)_qzz_res;
+}
+
+#endif /* NVALGRIND */
+
+
+/* These requests allow control to move from the simulated CPU to the
+ real CPU, calling an arbitary function.
+
+ Note that the current ThreadId is inserted as the first argument.
+ So this call:
+
+ VALGRIND_NON_SIMD_CALL2(f, arg1, arg2)
+
+ requires f to have this signature:
+
+ Word f(Word tid, Word arg1, Word arg2)
+
+ where "Word" is a word-sized type.
+
+ Note that these client requests are not entirely reliable. For example,
+ if you call a function with them that subsequently calls printf(),
+ there's a high chance Valgrind will crash. Generally, your prospects of
+ these working are made higher if the called function does not refer to
+ any global variables, and does not refer to any libc or other functions
+ (printf et al). Any kind of entanglement with libc or dynamic linking is
+ likely to have a bad outcome, for tricky reasons which we've grappled
+ with a lot in the past.
+*/
+#define VALGRIND_NON_SIMD_CALL0(_qyy_fn) \
+ __extension__ \
+ ({unsigned long _qyy_res; \
+ VALGRIND_DO_CLIENT_REQUEST(_qyy_res, 0 /* default return */, \
+ VG_USERREQ__CLIENT_CALL0, \
+ _qyy_fn, \
+ 0, 0, 0, 0); \
+ _qyy_res; \
+ })
+
+#define VALGRIND_NON_SIMD_CALL1(_qyy_fn, _qyy_arg1) \
+ __extension__ \
+ ({unsigned long _qyy_res; \
+ VALGRIND_DO_CLIENT_REQUEST(_qyy_res, 0 /* default return */, \
+ VG_USERREQ__CLIENT_CALL1, \
+ _qyy_fn, \
+ _qyy_arg1, 0, 0, 0); \
+ _qyy_res; \
+ })
+
+#define VALGRIND_NON_SIMD_CALL2(_qyy_fn, _qyy_arg1, _qyy_arg2) \
+ __extension__ \
+ ({unsigned long _qyy_res; \
+ VALGRIND_DO_CLIENT_REQUEST(_qyy_res, 0 /* default return */, \
+ VG_USERREQ__CLIENT_CALL2, \
+ _qyy_fn, \
+ _qyy_arg1, _qyy_arg2, 0, 0); \
+ _qyy_res; \
+ })
+
+#define VALGRIND_NON_SIMD_CALL3(_qyy_fn, _qyy_arg1, _qyy_arg2, _qyy_arg3) \
+ __extension__ \
+ ({unsigned long _qyy_res; \
+ VALGRIND_DO_CLIENT_REQUEST(_qyy_res, 0 /* default return */, \
+ VG_USERREQ__CLIENT_CALL3, \
+ _qyy_fn, \
+ _qyy_arg1, _qyy_arg2, \
+ _qyy_arg3, 0); \
+ _qyy_res; \
+ })
+
+
+/* Counts the number of errors that have been recorded by a tool. Nb:
+ the tool must record the errors with VG_(maybe_record_error)() or
+ VG_(unique_error)() for them to be counted. */
+#define VALGRIND_COUNT_ERRORS \
+ __extension__ \
+ ({unsigned int _qyy_res; \
+ VALGRIND_DO_CLIENT_REQUEST(_qyy_res, 0 /* default return */, \
+ VG_USERREQ__COUNT_ERRORS, \
+ 0, 0, 0, 0, 0); \
+ _qyy_res; \
+ })
+
+/* Mark a block of memory as having been allocated by a malloc()-like
+ function. `addr' is the start of the usable block (ie. after any
+ redzone) `rzB' is redzone size if the allocator can apply redzones;
+ use '0' if not. Adding redzones makes it more likely Valgrind will spot
+ block overruns. `is_zeroed' indicates if the memory is zeroed, as it is
+ for calloc(). Put it immediately after the point where a block is
+ allocated.
+
+ If you're using Memcheck: If you're allocating memory via superblocks,
+ and then handing out small chunks of each superblock, if you don't have
+ redzones on your small blocks, it's worth marking the superblock with
+ VALGRIND_MAKE_MEM_NOACCESS when it's created, so that block overruns are
+ detected. But if you can put redzones on, it's probably better to not do
+ this, so that messages for small overruns are described in terms of the
+ small block rather than the superblock (but if you have a big overrun
+ that skips over a redzone, you could miss an error this way). See
+ memcheck/tests/custom_alloc.c for an example.
+
+ WARNING: if your allocator uses malloc() or 'new' to allocate
+ superblocks, rather than mmap() or brk(), this will not work properly --
+ you'll likely get assertion failures during leak detection. This is
+ because Valgrind doesn't like seeing overlapping heap blocks. Sorry.
+
+ Nb: block must be freed via a free()-like function specified
+ with VALGRIND_FREELIKE_BLOCK or mismatch errors will occur. */
+#define VALGRIND_MALLOCLIKE_BLOCK(addr, sizeB, rzB, is_zeroed) \
+ {unsigned int _qzz_res; \
+ VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \
+ VG_USERREQ__MALLOCLIKE_BLOCK, \
+ addr, sizeB, rzB, is_zeroed, 0); \
+ }
+
+/* Mark a block of memory as having been freed by a free()-like function.
+ `rzB' is redzone size; it must match that given to
+ VALGRIND_MALLOCLIKE_BLOCK. Memory not freed will be detected by the leak
+ checker. Put it immediately after the point where the block is freed. */
+#define VALGRIND_FREELIKE_BLOCK(addr, rzB) \
+ {unsigned int _qzz_res; \
+ VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \
+ VG_USERREQ__FREELIKE_BLOCK, \
+ addr, rzB, 0, 0, 0); \
+ }
+
+/* Create a memory pool. */
+#define VALGRIND_CREATE_MEMPOOL(pool, rzB, is_zeroed) \
+ {unsigned int _qzz_res; \
+ VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \
+ VG_USERREQ__CREATE_MEMPOOL, \
+ pool, rzB, is_zeroed, 0, 0); \
+ }
+
+/* Destroy a memory pool. */
+#define VALGRIND_DESTROY_MEMPOOL(pool) \
+ {unsigned int _qzz_res; \
+ VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \
+ VG_USERREQ__DESTROY_MEMPOOL, \
+ pool, 0, 0, 0, 0); \
+ }
+
+/* Associate a piece of memory with a memory pool. */
+#define VALGRIND_MEMPOOL_ALLOC(pool, addr, size) \
+ {unsigned int _qzz_res; \
+ VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \
+ VG_USERREQ__MEMPOOL_ALLOC, \
+ pool, addr, size, 0, 0); \
+ }
+
+/* Disassociate a piece of memory from a memory pool. */
+#define VALGRIND_MEMPOOL_FREE(pool, addr) \
+ {unsigned int _qzz_res; \
+ VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \
+ VG_USERREQ__MEMPOOL_FREE, \
+ pool, addr, 0, 0, 0); \
+ }
+
+/* Disassociate any pieces outside a particular range. */
+#define VALGRIND_MEMPOOL_TRIM(pool, addr, size) \
+ {unsigned int _qzz_res; \
+ VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \
+ VG_USERREQ__MEMPOOL_TRIM, \
+ pool, addr, size, 0, 0); \
+ }
+
+/* Resize and/or move a piece associated with a memory pool. */
+#define VALGRIND_MOVE_MEMPOOL(poolA, poolB) \
+ {unsigned int _qzz_res; \
+ VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \
+ VG_USERREQ__MOVE_MEMPOOL, \
+ poolA, poolB, 0, 0, 0); \
+ }
+
+/* Resize and/or move a piece associated with a memory pool. */
+#define VALGRIND_MEMPOOL_CHANGE(pool, addrA, addrB, size) \
+ {unsigned int _qzz_res; \
+ VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \
+ VG_USERREQ__MEMPOOL_CHANGE, \
+ pool, addrA, addrB, size, 0); \
+ }
+
+/* Return 1 if a mempool exists, else 0. */
+#define VALGRIND_MEMPOOL_EXISTS(pool) \
+ ({unsigned int _qzz_res; \
+ VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \
+ VG_USERREQ__MEMPOOL_EXISTS, \
+ pool, 0, 0, 0, 0); \
+ _qzz_res; \
+ })
+
+/* Mark a piece of memory as being a stack. Returns a stack id. */
+#define VALGRIND_STACK_REGISTER(start, end) \
+ ({unsigned int _qzz_res; \
+ VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \
+ VG_USERREQ__STACK_REGISTER, \
+ start, end, 0, 0, 0); \
+ _qzz_res; \
+ })
+
+/* Unmark the piece of memory associated with a stack id as being a
+ stack. */
+#define VALGRIND_STACK_DEREGISTER(id) \
+ {unsigned int _qzz_res; \
+ VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \
+ VG_USERREQ__STACK_DEREGISTER, \
+ id, 0, 0, 0, 0); \
+ }
+
+/* Change the start and end address of the stack id. */
+#define VALGRIND_STACK_CHANGE(id, start, end) \
+ {unsigned int _qzz_res; \
+ VALGRIND_DO_CLIENT_REQUEST(_qzz_res, 0, \
+ VG_USERREQ__STACK_CHANGE, \
+ id, start, end, 0, 0); \
+ }
+
+
+#undef PLAT_x86_linux
+#undef PLAT_amd64_linux
+#undef PLAT_ppc32_linux
+#undef PLAT_ppc64_linux
+#undef PLAT_ppc32_aix5
+#undef PLAT_ppc64_aix5
+
+#endif /* __VALGRIND_H */
diff --git a/third_party/tcmalloc/chromium/src/thread_cache.h b/third_party/tcmalloc/chromium/src/thread_cache.h
index 4c6a233..1165447 100644
--- a/third_party/tcmalloc/chromium/src/thread_cache.h
+++ b/third_party/tcmalloc/chromium/src/thread_cache.h
@@ -79,7 +79,9 @@ class ThreadCache {
// Total byte size in cache
size_t Size() const { return size_; }
- void* Allocate(size_t size);
+ // Allocate an object of the given size and class. The size given
+ // must be the same as the size of the class in the size map.
+ void* Allocate(size_t size, size_t cl);
void Deallocate(void* ptr, size_t size_class);
void Scavenge();
@@ -293,15 +295,18 @@ class ThreadCache {
// across all ThreadCaches. Protected by Static::pageheap_lock.
static ssize_t unclaimed_cache_space_;
- // Warning: the offset of list_ affects performance. On general
- // principles, we don't like list_[x] to span multiple L1 cache
- // lines. However, merely placing list_ at offset 0 here seems to
- // cause cache conflicts.
+ // This class is laid out with the most frequently used fields
+ // first so that hot elements are placed on the same cache line.
size_t size_; // Combined size of data
size_t max_size_; // size_ > max_size_ --> Scavenge()
- pthread_t tid_; // Which thread owns it
+
+ // We sample allocations, biased by the size of the allocation
+ Sampler sampler_; // A sampler
+
FreeList list_[kNumClasses]; // Array indexed by size-class
+
+ pthread_t tid_; // Which thread owns it
bool in_setspecific_; // In call to pthread_setspecific?
// Allocate a new heap. REQUIRES: Static::pageheap_lock is held.
@@ -313,9 +318,10 @@ class ThreadCache {
static void DeleteCache(ThreadCache* heap);
static void RecomputePerThreadCacheSize();
- // We sample allocations, biased by the size of the allocation
- Sampler sampler_; // A sampler
-};
+ // Ensure that this class is cacheline-aligned. This is critical for
+ // performance, as false sharing would negate many of the benefits
+ // of a per-thread cache.
+} CACHELINE_ALIGNED;
// Allocator for thread heaps
// This is logically part of the ThreadCache class, but MSVC, at
@@ -331,15 +337,15 @@ inline bool ThreadCache::SampleAllocation(size_t k) {
return sampler_.SampleAllocation(k);
}
-inline void* ThreadCache::Allocate(size_t size) {
+inline void* ThreadCache::Allocate(size_t size, size_t cl) {
ASSERT(size <= kMaxSize);
- const size_t cl = Static::sizemap()->SizeClass(size);
- const size_t alloc_size = Static::sizemap()->ByteSizeForClass(cl);
+ ASSERT(size == Static::sizemap()->ByteSizeForClass(cl));
+
FreeList* list = &list_[cl];
if (list->empty()) {
- return FetchFromCentralCache(cl, alloc_size);
+ return FetchFromCentralCache(cl, size);
}
- size_ -= alloc_size;
+ size_ -= size;
return list->Pop();
}
diff --git a/third_party/tcmalloc/chromium/src/windows/addr2line-pdb.c b/third_party/tcmalloc/chromium/src/windows/addr2line-pdb.c
index 97b614b..5c65a03 100644
--- a/third_party/tcmalloc/chromium/src/windows/addr2line-pdb.c
+++ b/third_party/tcmalloc/chromium/src/windows/addr2line-pdb.c
@@ -48,6 +48,12 @@
#define SEARCH_CAP (1024*1024)
#define WEBSYM "SRV*c:\\websymbols*http://msdl.microsoft.com/download/symbols"
+void usage() {
+ fprintf(stderr, "usage: "
+ "addr2line-pdb [-f|--functions] [-C|--demangle] [-e filename]\n");
+ fprintf(stderr, "(Then list the hex addresses on stdin, one per line)\n");
+}
+
int main(int argc, char *argv[]) {
DWORD error;
HANDLE process;
@@ -74,10 +80,11 @@ int main(int argc, char *argv[]) {
}
filename = argv[i+1];
i++; /* to skip over filename too */
+ } else if (strcmp(argv[i], "--help") == 0) {
+ usage();
+ exit(0);
} else {
- fprintf(stderr, "usage: "
- "addr2line-pdb [-f|--functions] [-C|--demangle] [-e filename]\n");
- fprintf(stderr, "(Then list the hex addresses on stdin, one per line)\n");
+ usage();
exit(1);
}
}
diff --git a/third_party/tcmalloc/chromium/src/windows/config.h b/third_party/tcmalloc/chromium/src/windows/config.h
index 99de82c..b5d9bb6 100644
--- a/third_party/tcmalloc/chromium/src/windows/config.h
+++ b/third_party/tcmalloc/chromium/src/windows/config.h
@@ -261,10 +261,12 @@
// ---------------------------------------------------------------------
// Extra stuff not found in config.h.in
-// This must be defined before the windows.h is included. It's needed
-// for mutex.h, to give access to the TryLock method.
+// This must be defined before the windows.h is included. We need at
+// least 0x0400 for mutex.h to have access to TryLock, and at least
+// 0x0501 for patch_functions.cc to have access to GetModuleHandleEx.
+// (This latter is an optimization we could take out if need be.)
#ifndef _WIN32_WINNT
-# define _WIN32_WINNT 0x0400
+# define _WIN32_WINNT 0x0501
#endif
// We want to make sure not to ever try to #include heap-checker.h
diff --git a/third_party/tcmalloc/chromium/src/windows/google/tcmalloc.h b/third_party/tcmalloc/chromium/src/windows/google/tcmalloc.h
index 4b97b15..663b7f9 100644
--- a/third_party/tcmalloc/chromium/src/windows/google/tcmalloc.h
+++ b/third_party/tcmalloc/chromium/src/windows/google/tcmalloc.h
@@ -61,7 +61,8 @@
#endif
#ifdef __cplusplus
-#include <new> // for nothrow_t
+#include <new> // for std::nothrow_t
+
extern "C" {
#endif
// Returns a human-readable version string. If major, minor,
@@ -92,16 +93,15 @@ extern "C" {
#ifdef __cplusplus
PERFTOOLS_DLL_DECL int tc_set_new_mode(int flag) __THROW;
PERFTOOLS_DLL_DECL void* tc_new(size_t size);
- PERFTOOLS_DLL_DECL void tc_delete(void* p) __THROW;
- PERFTOOLS_DLL_DECL void* tc_newarray(size_t size);
- PERFTOOLS_DLL_DECL void tc_deletearray(void* p) __THROW;
-
PERFTOOLS_DLL_DECL void* tc_new_nothrow(size_t size,
const std::nothrow_t&) __THROW;
- PERFTOOLS_DLL_DECL void* tc_newarray_nothrow(size_t size,
- const std::nothrow_t&) __THROW;
+ PERFTOOLS_DLL_DECL void tc_delete(void* p) __THROW;
PERFTOOLS_DLL_DECL void tc_delete_nothrow(void* p,
const std::nothrow_t&) __THROW;
+ PERFTOOLS_DLL_DECL void* tc_newarray(size_t size);
+ PERFTOOLS_DLL_DECL void* tc_newarray_nothrow(size_t size,
+ const std::nothrow_t&) __THROW;
+ PERFTOOLS_DLL_DECL void tc_deletearray(void* p) __THROW;
PERFTOOLS_DLL_DECL void tc_deletearray_nothrow(void* p,
const std::nothrow_t&) __THROW;
}
diff --git a/third_party/tcmalloc/chromium/src/windows/nm-pdb.c b/third_party/tcmalloc/chromium/src/windows/nm-pdb.c
index 726d345..9beb21d 100644
--- a/third_party/tcmalloc/chromium/src/windows/nm-pdb.c
+++ b/third_party/tcmalloc/chromium/src/windows/nm-pdb.c
@@ -180,6 +180,10 @@ static void ShowSymbolInfo(HANDLE process, ULONG64 module_base) {
#endif
}
+void usage() {
+ fprintf(stderr, "usage: nm-pdb [-C|--demangle] <module or filename>\n");
+}
+
int main(int argc, char *argv[]) {
DWORD error;
HANDLE process;
@@ -195,12 +199,15 @@ int main(int argc, char *argv[]) {
for (i = 1; i < argc; i++) {
if (strcmp(argv[i], "--demangle") == 0 || strcmp(argv[i], "-C") == 0) {
symopts |= SYMOPT_UNDNAME;
+ } else if (strcmp(argv[i], "--help") == 0) {
+ usage();
+ exit(0);
} else {
break;
}
}
if (i != argc - 1) {
- fprintf(stderr, "usage: nm-pdb [-C|--demangle] <module or filename>\n");
+ usage();
exit(1);
}
filename = argv[i];
diff --git a/third_party/tcmalloc/chromium/src/windows/patch_functions.cc b/third_party/tcmalloc/chromium/src/windows/patch_functions.cc
index c1ed37f..deb841b 100644
--- a/third_party/tcmalloc/chromium/src/windows/patch_functions.cc
+++ b/third_party/tcmalloc/chromium/src/windows/patch_functions.cc
@@ -83,6 +83,7 @@
#endif
#include <windows.h>
+#include <stdio.h>
#include <malloc.h> // for _msize and _expand
#include <Psapi.h> // for EnumProcessModules, GetModuleInformation, etc.
#include <set>
@@ -96,8 +97,6 @@
// The maximum number of modules we allow to be in one executable
const int kMaxModules = 8182;
-// The maximum size of a module's basename
-const int kMaxModuleNameSize = 256;
// These are hard-coded, unfortunately. :-( They are also probably
// compiler specific. See get_mangled_names.cc, in this directory,
@@ -145,13 +144,15 @@ class LibcInfo {
LibcInfo() {
memset(this, 0, sizeof(*this)); // easiest way to initialize the array
}
- bool SameAs(const LibcInfo& that) const;
- bool SameAsModuleEntry(const ModuleEntryCopy& module_entry) const;
-
- bool patched() const { return is_valid() && module_name_[0] != '\0'; }
- const char* module_name() const { return is_valid() ? module_name_ : ""; }
+ bool patched() const { return is_valid(); }
void set_is_valid(bool b) { is_valid_ = b; }
+ // According to http://msdn.microsoft.com/en-us/library/ms684229(VS.85).aspx:
+ // "The load address of a module (lpBaseOfDll) is the same as the HMODULE
+ // value."
+ HMODULE hmodule() const {
+ return reinterpret_cast<HMODULE>(const_cast<void*>(module_base_address_));
+ }
// Populates all the windows_fn_[] vars based on our module info.
// Returns false if windows_fn_ is all NULL's, because there's
@@ -167,7 +168,6 @@ class LibcInfo {
memcpy(this->windows_fn_, that.windows_fn_, sizeof(windows_fn_));
this->module_base_address_ = that.module_base_address_;
this->module_base_size_ = that.module_base_size_;
- memcpy(this->module_name_, that.module_name_, sizeof(module_name_));
}
enum {
@@ -207,7 +207,6 @@ class LibcInfo {
const void *module_base_address_;
size_t module_base_size_;
- char module_name_[kMaxModuleNameSize];
public:
// These shouldn't have to be public, since only subclasses of
@@ -285,10 +284,8 @@ template<int> class LibcInfoWithPatchFunctions : public LibcInfo {
// This is a subset of MODDULEENTRY32, that we need for patching.
struct ModuleEntryCopy {
- LPVOID modBaseAddr;
+ LPVOID modBaseAddr; // the same as hmodule
DWORD modBaseSize;
- HMODULE hModule;
- TCHAR szModule[kMaxModuleNameSize];
// This is not part of MODDULEENTRY32, but is needed to avoid making
// windows syscalls while we're holding patch_all_modules_lock (see
// lock-inversion comments at patch_all_modules_lock definition, below).
@@ -297,26 +294,16 @@ struct ModuleEntryCopy {
ModuleEntryCopy() {
modBaseAddr = NULL;
modBaseSize = 0;
- hModule = NULL;
- strcpy(szModule, "<executable>");
for (int i = 0; i < sizeof(rgProcAddresses)/sizeof(*rgProcAddresses); i++)
rgProcAddresses[i] = LibcInfo::static_fn(i);
}
- ModuleEntryCopy(HANDLE hprocess, HMODULE hmodule, const MODULEINFO& mi) {
+ ModuleEntryCopy(const MODULEINFO& mi) {
this->modBaseAddr = mi.lpBaseOfDll;
this->modBaseSize = mi.SizeOfImage;
- this->hModule = hmodule;
- // TODO(csilvers): we could make more efficient by calling these
- // lazily (not until the vars are needed, which is often never).
- // However, there's tricky business with calling windows functions
- // inside the patch_all_modules_lock (see the lock inversion
- // comments with the patch_all_modules_lock definition, below), so
- // it's safest to do it all here, where no lock is needed.
- ::GetModuleBaseNameA(hprocess, hmodule,
- this->szModule, sizeof(this->szModule));
for (int i = 0; i < sizeof(rgProcAddresses)/sizeof(*rgProcAddresses); i++)
- rgProcAddresses[i] =
- (GenericFnPtr)::GetProcAddress(hModule, LibcInfo::function_name(i));
+ rgProcAddresses[i] = (GenericFnPtr)::GetProcAddress(
+ reinterpret_cast<const HMODULE>(mi.lpBaseOfDll),
+ LibcInfo::function_name(i));
}
};
@@ -479,18 +466,6 @@ const GenericFnPtr LibcInfoWithPatchFunctions<T>::perftools_fn_[] = {
{ "FreeLibrary", NULL, NULL, (GenericFnPtr)&Perftools_FreeLibrary },
};
-bool LibcInfo::SameAs(const LibcInfo& that) const {
- return (is_valid() &&
- module_base_address_ == that.module_base_address_ &&
- module_base_size_ == that.module_base_size_);
-}
-
-bool LibcInfo::SameAsModuleEntry(const ModuleEntryCopy& module_entry) const {
- return (is_valid() &&
- module_base_address_ == module_entry.modBaseAddr &&
- module_base_size_ == module_entry.modBaseSize);
-}
-
bool LibcInfo::PopulateWindowsFn(const ModuleEntryCopy& module_entry) {
// First, store the location of the function to patch before
// patching it. If none of these functions are found in the module,
@@ -552,10 +527,9 @@ bool LibcInfo::PopulateWindowsFn(const ModuleEntryCopy& module_entry) {
CHECK(windows_fn_[kFree]);
CHECK(windows_fn_[kRealloc]);
- // OK, we successfully patched. Let's store our member information.
+ // OK, we successfully populated. Let's store our member information.
module_base_address_ = module_entry.modBaseAddr;
module_base_size_ = module_entry.modBaseSize;
- strcpy(module_name_, module_entry.szModule);
return true;
}
@@ -636,14 +610,6 @@ void WindowsInfo::Unpatch() {
// You should hold the patch_all_modules_lock when calling this.
void PatchOneModuleLocked(const LibcInfo& me_info) {
- // Double-check we haven't seen this module before.
- for (int i = 0; i < sizeof(g_module_libcs)/sizeof(*g_module_libcs); i++) {
- if (g_module_libcs[i]->SameAs(me_info)) {
- fprintf(stderr, "%s:%d: FATAL PERFTOOLS ERROR: %s double-patched somehow.\n",
- __FILE__, __LINE__, g_module_libcs[i]->module_name());
- CHECK(false);
- }
- }
// If we don't already have info on this module, let's add it. This
// is where we're sad that each libcX has a different type, so we
// can't use an array; instead, we have to use a switch statement.
@@ -686,52 +652,70 @@ void PatchMainExecutableLocked() {
// patch_all_modules_lock, inside PatchAllModules().
static SpinLock patch_all_modules_lock(SpinLock::LINKER_INITIALIZED);
+// last_loaded: The set of modules that were loaded the last time
+// PatchAllModules was called. This is an optimization for only
+// looking at modules that were added or removed from the last call.
+static std::set<HMODULE> *g_last_loaded;
+
// Iterates over all the modules currently loaded by the executable,
-// and makes sure they're all patched. For ones that aren't, we patch
-// them in. We also check that every module we had patched in the
-// past is still loaded, and update internal data structures if so.
-// We return true if this PatchAllModules did any work, false else.
+// according to windows, and makes sure they're all patched. Most
+// modules will already be in loaded_modules, meaning we have already
+// loaded and either patched them or determined they did not need to
+// be patched. Others will not, which means we need to patch them
+// (if necessary). Finally, we have to go through the existing
+// g_module_libcs and see if any of those are *not* in the modules
+// currently loaded by the executable. If so, we need to invalidate
+// them. Returns true if we did any work (patching or invalidating),
+// false if we were a noop. May update loaded_modules as well.
+// NOTE: you must hold the patch_all_modules_lock to access loaded_modules.
bool PatchAllModules() {
std::vector<ModuleEntryCopy> modules;
bool made_changes = false;
const HANDLE hCurrentProcess = GetCurrentProcess();
- MODULEINFO mi;
- DWORD cbNeeded = 0;
+ DWORD num_modules = 0;
HMODULE hModules[kMaxModules]; // max # of modules we support in one process
- if (::EnumProcessModules(hCurrentProcess, hModules, sizeof(hModules),
- &cbNeeded)) {
- for (int i = 0; i < cbNeeded / sizeof(*hModules); ++i) {
- if (i >= kMaxModules) {
- printf("PERFTOOLS ERROR: Too many modules in this executable to try"
- " to patch them all (if you need to, raise kMaxModules in"
- " patch_functions.cc).\n");
- break;
- }
- if (::GetModuleInformation(hCurrentProcess, hModules[i], &mi, sizeof(mi)))
- modules.push_back(ModuleEntryCopy(hCurrentProcess, hModules[i], mi));
- }
+ if (!::EnumProcessModules(hCurrentProcess, hModules, sizeof(hModules),
+ &num_modules)) {
+ num_modules = 0;
+ }
+ // EnumProcessModules actually set the bytes written into hModules,
+ // so we need to divide to make num_modules actually be a module-count.
+ num_modules /= sizeof(*hModules);
+ if (num_modules >= kMaxModules) {
+ printf("PERFTOOLS ERROR: Too many modules in this executable to try"
+ " to patch them all (if you need to, raise kMaxModules in"
+ " patch_functions.cc).\n");
+ num_modules = kMaxModules;
}
- // Now do the actual patching and unpatching.
+ // Now we handle the unpatching of modules we have in g_module_libcs
+ // but that were not found in EnumProcessModules. We need to
+ // invalidate them. To speed that up, we store the EnumProcessModules
+ // output in a set.
+ // At the same time, we prepare for the adding of new modules, by
+ // removing from hModules all the modules we know we've already
+ // patched (or decided don't need to be patched). At the end,
+ // hModules will hold only the modules that we need to consider patching.
+ std::set<HMODULE> currently_loaded_modules;
{
SpinLockHolder h(&patch_all_modules_lock);
- for (int i = 0; i < sizeof(g_module_libcs)/sizeof(*g_module_libcs); i++) {
- if (!g_module_libcs[i]->is_valid())
- continue;
- bool still_loaded = false;
- for (std::vector<ModuleEntryCopy>::iterator it = modules.begin();
- it != modules.end(); ++it) {
- if (g_module_libcs[i]->SameAsModuleEntry(*it)) {
- // Both g_module_libcs[i] and it are still valid. Mark it by
- // removing it from the vector; mark g_module_libcs[i] by
- // setting a bool.
- modules.erase(it);
- still_loaded = true;
- break;
- }
+ if (!g_last_loaded) g_last_loaded = new std::set<HMODULE>;
+ // At the end of this loop, currently_loaded_modules contains the
+ // full list of EnumProcessModules, and hModules just the ones we
+ // haven't handled yet.
+ for (int i = 0; i < num_modules; ) {
+ currently_loaded_modules.insert(hModules[i]);
+ if (g_last_loaded->count(hModules[i]) > 0) {
+ hModules[i] = hModules[--num_modules]; // replace element i with tail
+ } else {
+ i++; // keep element i
}
- if (!still_loaded) {
+ }
+ // Now we do the unpatching/invalidation.
+ for (int i = 0; i < sizeof(g_module_libcs)/sizeof(*g_module_libcs); i++) {
+ if (g_module_libcs[i]->patched() &&
+ currently_loaded_modules.count(g_module_libcs[i]->hmodule()) == 0) {
// Means g_module_libcs[i] is no longer loaded (no me32 matched).
// We could call Unpatch() here, but why bother? The module
// has gone away, so nobody is going to call into it anyway.
@@ -739,14 +723,28 @@ bool PatchAllModules() {
made_changes = true;
}
}
+ // Update the loaded module cache.
+ g_last_loaded->swap(currently_loaded_modules);
+ }
+
+ // Now that we know what modules are new, let's get the info we'll
+ // need to patch them. Note this *cannot* be done while holding the
+ // lock, since it needs to make windows calls (see the lock-inversion
+ // comments before the definition of patch_all_modules_lock).
+ MODULEINFO mi;
+ for (int i = 0; i < num_modules; i++) {
+ if (::GetModuleInformation(hCurrentProcess, hModules[i], &mi, sizeof(mi)))
+ modules.push_back(ModuleEntryCopy(mi));
+ }
- // We've handled all the g_module_libcs. Now let's handle the rest
- // of the module-entries: those that haven't already been loaded.
- for (std::vector<ModuleEntryCopy>::const_iterator it = modules.begin();
+ // Now we can do the patching of new modules.
+ {
+ SpinLockHolder h(&patch_all_modules_lock);
+ for (std::vector<ModuleEntryCopy>::iterator it = modules.begin();
it != modules.end(); ++it) {
LibcInfo libc_info;
if (libc_info.PopulateWindowsFn(*it)) { // true==module has libc routines
- PatchOneModuleLocked(libc_info); // updates num_patched_modules
+ PatchOneModuleLocked(libc_info);
made_changes = true;
}
}
@@ -759,6 +757,10 @@ bool PatchAllModules() {
made_changes = true;
}
}
+ // TODO(csilvers): for this to be reliable, we need to also take
+ // into account if we *would* have patched any modules had they not
+ // already been loaded. (That is, made_changes should ignore
+ // g_last_loaded.)
return made_changes;
}
@@ -766,59 +768,9 @@ bool PatchAllModules() {
} // end unnamed namespace
// ---------------------------------------------------------------------
-// PatchWindowsFunctions()
-// This is the function that is exposed to the outside world.
-// It should be called before the program becomes multi-threaded,
-// since main_executable_windows.Patch() is not thread-safe.
-// ---------------------------------------------------------------------
-
-void PatchWindowsFunctions() {
- // This does the libc patching in every module, and the main executable.
- PatchAllModules();
- main_executable_windows.Patch();
-}
-
-#if 0
-// It's possible to unpatch all the functions when we are exiting.
-
-// The idea is to handle properly windows-internal data that is
-// allocated before PatchWindowsFunctions is called. If all
-// destruction happened in reverse order from construction, then we
-// could call UnpatchWindowsFunctions at just the right time, so that
-// that early-allocated data would be freed using the windows
-// allocation functions rather than tcmalloc. The problem is that
-// windows allocates some structures lazily, so it would allocate them
-// late (using tcmalloc) and then try to deallocate them late as well.
-// So instead of unpatching, we just modify all the tcmalloc routines
-// so they call through to the libc rountines if the memory in
-// question doesn't seem to have been allocated with tcmalloc. I keep
-// this unpatch code around for reference.
-
-void UnpatchWindowsFunctions() {
- // We need to go back to the system malloc/etc at global destruct time,
- // so objects that were constructed before tcmalloc, using the system
- // malloc, can destroy themselves using the system free. This depends
- // on DLLs unloading in the reverse order in which they load!
- //
- // We also go back to the default HeapAlloc/etc, just for consistency.
- // Who knows, it may help avoid weird bugs in some situations.
- main_executable_windows.Unpatch();
- main_executable.Unpatch();
- if (libc1.is_valid()) libc1.Unpatch();
- if (libc2.is_valid()) libc2.Unpatch();
- if (libc3.is_valid()) libc3.Unpatch();
- if (libc4.is_valid()) libc4.Unpatch();
- if (libc5.is_valid()) libc5.Unpatch();
- if (libc6.is_valid()) libc6.Unpatch();
- if (libc7.is_valid()) libc7.Unpatch();
- if (libc8.is_valid()) libc8.Unpatch();
-}
-#endif
-
-// ---------------------------------------------------------------------
-// Now that we've done all the patching machinery, let's end the file
-// by actually defining the functions we're patching in. Mostly these
-// are simple wrappers around the do_* routines in tcmalloc.cc.
+// Now that we've done all the patching machinery, let's actually
+// define the functions we're patching in. Mostly these are
+// simple wrappers around the do_* routines in tcmalloc.cc.
//
// In fact, we #include tcmalloc.cc to get at the tcmalloc internal
// do_* functions, the better to write our own hook functions.
@@ -1029,19 +981,107 @@ BOOL WINAPI WindowsInfo::Perftools_UnmapViewOfFile(LPCVOID lpBaseAddress) {
lpBaseAddress);
}
+// g_load_map holds a copy of windows' refcount for how many times
+// each currently loaded module has been loaded and unloaded. We use
+// it as an optimization when the same module is loaded more than
+// once: as long as the refcount stays above 1, we don't need to worry
+// about patching because it's already patched. Likewise, we don't
+// need to unpatch until the refcount drops to 0. load_map is
+// maintained in LoadLibraryExW and FreeLibrary, and only covers
+// modules explicitly loaded/freed via those interfaces.
+static std::map<HMODULE, int>* g_load_map = NULL;
+
HMODULE WINAPI WindowsInfo::Perftools_LoadLibraryExW(LPCWSTR lpFileName,
HANDLE hFile,
DWORD dwFlags) {
- HMODULE rv = ((HMODULE (WINAPI *)(LPCWSTR, HANDLE, DWORD))
- function_info_[kLoadLibraryExW].origstub_fn)(
- lpFileName, hFile, dwFlags);
- PatchAllModules();
- return rv;
+ HMODULE rv;
+ // Check to see if the modules is already loaded, flag 0 gets a
+ // reference if it was loaded. If it was loaded no need to call
+ // PatchAllModules, just increase the reference count to match
+ // what GetModuleHandleExW does internally inside windows.
+ if (::GetModuleHandleExW(0, lpFileName, &rv)) {
+ return rv;
+ } else {
+ // Not already loaded, so load it.
+ rv = ((HMODULE (WINAPI *)(LPCWSTR, HANDLE, DWORD))
+ function_info_[kLoadLibraryExW].origstub_fn)(
+ lpFileName, hFile, dwFlags);
+ // This will patch any newly loaded libraries, if patching needs
+ // to be done.
+ PatchAllModules();
+
+ return rv;
+ }
}
BOOL WINAPI WindowsInfo::Perftools_FreeLibrary(HMODULE hLibModule) {
BOOL rv = ((BOOL (WINAPI *)(HMODULE))
function_info_[kFreeLibrary].origstub_fn)(hLibModule);
+
+ // Check to see if the module is still loaded by passing the base
+ // address and seeing if it comes back with the same address. If it
+ // is the same address it's still loaded, so the FreeLibrary() call
+ // was a noop, and there's no need to redo the patching.
+ HMODULE owner = NULL;
+ BOOL result = ::GetModuleHandleExW(
+ (GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS |
+ GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT),
+ (LPCWSTR)hLibModule,
+ &owner);
+ if (result && owner == hLibModule)
+ return rv;
+
PatchAllModules(); // this will fix up the list of patched libraries
return rv;
}
+
+
+// ---------------------------------------------------------------------
+// PatchWindowsFunctions()
+// This is the function that is exposed to the outside world.
+// It should be called before the program becomes multi-threaded,
+// since main_executable_windows.Patch() is not thread-safe.
+// ---------------------------------------------------------------------
+
+void PatchWindowsFunctions() {
+ // This does the libc patching in every module, and the main executable.
+ PatchAllModules();
+ main_executable_windows.Patch();
+}
+
+#if 0
+// It's possible to unpatch all the functions when we are exiting.
+
+// The idea is to handle properly windows-internal data that is
+// allocated before PatchWindowsFunctions is called. If all
+// destruction happened in reverse order from construction, then we
+// could call UnpatchWindowsFunctions at just the right time, so that
+// that early-allocated data would be freed using the windows
+// allocation functions rather than tcmalloc. The problem is that
+// windows allocates some structures lazily, so it would allocate them
+// late (using tcmalloc) and then try to deallocate them late as well.
+// So instead of unpatching, we just modify all the tcmalloc routines
+// so they call through to the libc rountines if the memory in
+// question doesn't seem to have been allocated with tcmalloc. I keep
+// this unpatch code around for reference.
+
+void UnpatchWindowsFunctions() {
+ // We need to go back to the system malloc/etc at global destruct time,
+ // so objects that were constructed before tcmalloc, using the system
+ // malloc, can destroy themselves using the system free. This depends
+ // on DLLs unloading in the reverse order in which they load!
+ //
+ // We also go back to the default HeapAlloc/etc, just for consistency.
+ // Who knows, it may help avoid weird bugs in some situations.
+ main_executable_windows.Unpatch();
+ main_executable.Unpatch();
+ if (libc1.is_valid()) libc1.Unpatch();
+ if (libc2.is_valid()) libc2.Unpatch();
+ if (libc3.is_valid()) libc3.Unpatch();
+ if (libc4.is_valid()) libc4.Unpatch();
+ if (libc5.is_valid()) libc5.Unpatch();
+ if (libc6.is_valid()) libc6.Unpatch();
+ if (libc7.is_valid()) libc7.Unpatch();
+ if (libc8.is_valid()) libc8.Unpatch();
+}
+#endif