diff options
author | glider@chromium.org <glider@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2011-06-17 07:59:58 +0000 |
---|---|---|
committer | glider@chromium.org <glider@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2011-06-17 07:59:58 +0000 |
commit | b6081702c2cb8094e45c3a58ea92915d4b972ddf (patch) | |
tree | c2229158e90a3342b986dfa1562a4607b7a57c9f /third_party/tcmalloc/chromium | |
parent | ad26ef4a0c0a170f4a167eea95aed749b791cc95 (diff) | |
download | chromium_src-b6081702c2cb8094e45c3a58ea92915d4b972ddf.zip chromium_src-b6081702c2cb8094e45c3a58ea92915d4b972ddf.tar.gz chromium_src-b6081702c2cb8094e45c3a58ea92915d4b972ddf.tar.bz2 |
Merge google-perftools r109 (the current contents of third_party/tcmalloc/vendor)
with the forked Chromium version of tcmalloc.
This change also requires some fixes to base/allocator/allocator.gyp: new tcmalloc
source files added, unittest_utils.cc disabled (tcmalloc has its own snprintf()
implementation now)
Review URL: http://codereview.chromium.org/7050034
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@89452 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'third_party/tcmalloc/chromium')
102 files changed, 5581 insertions, 1966 deletions
diff --git a/third_party/tcmalloc/chromium/src/base/arm_instruction_set_select.h b/third_party/tcmalloc/chromium/src/base/arm_instruction_set_select.h new file mode 100644 index 0000000..a47e6bb --- /dev/null +++ b/third_party/tcmalloc/chromium/src/base/arm_instruction_set_select.h @@ -0,0 +1,79 @@ +// Copyright (c) 2011, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Author: Alexander Levitskiy +// +// Generalizes the plethora of ARM flavors available to an easier to manage set +// Defs reference is at https://wiki.edubuntu.org/ARM/Thumb2PortingHowto + +#ifndef ARM_INSTRUCTION_SET_SELECT_H_ +#define ARM_INSTRUCTION_SET_SELECT_H_ + +#if defined(__ARM_ARCH_7__) || \ + defined(__ARM_ARCH_7R__) || \ + defined(__ARM_ARCH_7A__) +# define ARMV7 1 +#endif + +#if defined(ARMV7) || \ + defined(__ARM_ARCH_6__) || \ + defined(__ARM_ARCH_6J__) || \ + defined(__ARM_ARCH_6K__) || \ + defined(__ARM_ARCH_6Z__) || \ + defined(__ARM_ARCH_6T2__) || \ + defined(__ARM_ARCH_6ZK__) +# define ARMV6 1 +#endif + +#if defined(ARMV6) || \ + defined(__ARM_ARCH_5T__) || \ + defined(__ARM_ARCH_5E__) || \ + defined(__ARM_ARCH_5TE__) || \ + defined(__ARM_ARCH_5TEJ__) +# define ARMV5 1 +#endif + +#if defined(ARMV5) || \ + defined(__ARM_ARCH_4__) || \ + defined(__ARM_ARCH_4T__) +# define ARMV4 1 +#endif + +#if defined(ARMV4) || \ + defined(__ARM_ARCH_3__) || \ + defined(__ARM_ARCH_3M__) +# define ARMV3 1 +#endif + +#if defined(ARMV3) || \ + defined(__ARM_ARCH_2__) +# define ARMV2 1 +#endif + +#endif // ARM_INSTRUCTION_SET_SELECT_H_ diff --git a/third_party/tcmalloc/chromium/src/base/atomicops-internals-arm-gcc.h b/third_party/tcmalloc/chromium/src/base/atomicops-internals-arm-gcc.h new file mode 100644 index 0000000..423e993 --- /dev/null +++ b/third_party/tcmalloc/chromium/src/base/atomicops-internals-arm-gcc.h @@ -0,0 +1,234 @@ +/* Copyright (c) 2010, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * Author: Lei Zhang, Sasha Levitskiy + */ + +// This file is an internal atomic implementation, use base/atomicops.h instead. +// +// LinuxKernelCmpxchg and Barrier_AtomicIncrement are from Google Gears. + +#ifndef BASE_ATOMICOPS_INTERNALS_ARM_GCC_H_ +#define BASE_ATOMICOPS_INTERNALS_ARM_GCC_H_ + +#include <stdio.h> +#include "base/basictypes.h" // For COMPILE_ASSERT + +typedef int32_t Atomic32; + +namespace base { +namespace subtle { + +typedef int64_t Atomic64; + +// 0xffff0fc0 is the hard coded address of a function provided by +// the kernel which implements an atomic compare-exchange. On older +// ARM architecture revisions (pre-v6) this may be implemented using +// a syscall. This address is stable, and in active use (hard coded) +// by at least glibc-2.7 and the Android C library. +// pLinuxKernelCmpxchg has both acquire and release barrier sematincs. +typedef Atomic32 (*LinuxKernelCmpxchgFunc)(Atomic32 old_value, + Atomic32 new_value, + volatile Atomic32* ptr); +LinuxKernelCmpxchgFunc pLinuxKernelCmpxchg __attribute__((weak)) = + (LinuxKernelCmpxchgFunc) 0xffff0fc0; + +typedef void (*LinuxKernelMemoryBarrierFunc)(void); +LinuxKernelMemoryBarrierFunc pLinuxKernelMemoryBarrier __attribute__((weak)) = + (LinuxKernelMemoryBarrierFunc) 0xffff0fa0; + + +inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr, + Atomic32 old_value, + Atomic32 new_value) { + Atomic32 prev_value = *ptr; + do { + if (!pLinuxKernelCmpxchg(old_value, new_value, + const_cast<Atomic32*>(ptr))) { + return old_value; + } + prev_value = *ptr; + } while (prev_value == old_value); + return prev_value; +} + +inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr, + Atomic32 new_value) { + Atomic32 old_value; + do { + old_value = *ptr; + } while (pLinuxKernelCmpxchg(old_value, new_value, + const_cast<Atomic32*>(ptr))); + return old_value; +} + +inline Atomic32 Barrier_AtomicIncrement(volatile Atomic32* ptr, + Atomic32 increment) { + for (;;) { + // Atomic exchange the old value with an incremented one. + Atomic32 old_value = *ptr; + Atomic32 new_value = old_value + increment; + if (pLinuxKernelCmpxchg(old_value, new_value, + const_cast<Atomic32*>(ptr)) == 0) { + // The exchange took place as expected. + return new_value; + } + // Otherwise, *ptr changed mid-loop and we need to retry. + } +} + +inline Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32* ptr, + Atomic32 increment) { + return Barrier_AtomicIncrement(ptr, increment); +} + +inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr, + Atomic32 old_value, + Atomic32 new_value) { + return NoBarrier_CompareAndSwap(ptr, old_value, new_value); +} + +inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr, + Atomic32 old_value, + Atomic32 new_value) { + return NoBarrier_CompareAndSwap(ptr, old_value, new_value); +} + +inline void NoBarrier_Store(volatile Atomic32* ptr, Atomic32 value) { + *ptr = value; +} + +inline void MemoryBarrier() { + pLinuxKernelMemoryBarrier(); +} + +inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) { + *ptr = value; + MemoryBarrier(); +} + +inline void Release_Store(volatile Atomic32* ptr, Atomic32 value) { + MemoryBarrier(); + *ptr = value; +} + +inline Atomic32 NoBarrier_Load(volatile const Atomic32* ptr) { + return *ptr; +} + +inline Atomic32 Acquire_Load(volatile const Atomic32* ptr) { + Atomic32 value = *ptr; + MemoryBarrier(); + return value; +} + +inline Atomic32 Release_Load(volatile const Atomic32* ptr) { + MemoryBarrier(); + return *ptr; +} + + +// 64-bit versions are not implemented yet. + +inline void NotImplementedFatalError(const char *function_name) { + fprintf(stderr, "64-bit %s() not implemented on this platform\n", + function_name); + abort(); +} + +inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64* ptr, + Atomic64 old_value, + Atomic64 new_value) { + NotImplementedFatalError("NoBarrier_CompareAndSwap"); + return 0; +} + +inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr, + Atomic64 new_value) { + NotImplementedFatalError("NoBarrier_AtomicExchange"); + return 0; +} + +inline Atomic64 NoBarrier_AtomicIncrement(volatile Atomic64* ptr, + Atomic64 increment) { + NotImplementedFatalError("NoBarrier_AtomicIncrement"); + return 0; +} + +inline Atomic64 Barrier_AtomicIncrement(volatile Atomic64* ptr, + Atomic64 increment) { + NotImplementedFatalError("Barrier_AtomicIncrement"); + return 0; +} + +inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) { + NotImplementedFatalError("NoBarrier_Store"); +} + +inline void Acquire_Store(volatile Atomic64* ptr, Atomic64 value) { + NoBarrier_AtomicExchange(ptr, value); + // acts as a barrier in this implementation +} + +inline void Release_Store(volatile Atomic64* ptr, Atomic64 value) { + NotImplementedFatalError("Release_Store"); +} + +inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) { + NotImplementedFatalError("NoBarrier_Load"); + return 0; +} + +inline Atomic64 Acquire_Load(volatile const Atomic64* ptr) { + Atomic64 value = NoBarrier_Load(ptr); + return value; +} + +inline Atomic64 Release_Load(volatile const Atomic64* ptr) { + MemoryBarrier(); + return NoBarrier_Load(ptr); +} + +inline Atomic64 Acquire_CompareAndSwap(volatile Atomic64* ptr, + Atomic64 old_value, + Atomic64 new_value) { + return NoBarrier_CompareAndSwap(ptr, old_value, new_value); +} + +inline Atomic64 Release_CompareAndSwap(volatile Atomic64* ptr, + Atomic64 old_value, + Atomic64 new_value) { + return NoBarrier_CompareAndSwap(ptr, old_value, new_value); +} + +} // namespace base::subtle +} // namespace base + +#endif // BASE_ATOMICOPS_INTERNALS_ARM_GCC_H_ diff --git a/third_party/tcmalloc/chromium/src/base/atomicops-internals-arm-generic.h b/third_party/tcmalloc/chromium/src/base/atomicops-internals-arm-generic.h new file mode 100644 index 0000000..7882b0d --- /dev/null +++ b/third_party/tcmalloc/chromium/src/base/atomicops-internals-arm-generic.h @@ -0,0 +1,236 @@ +// Copyright (c) 2003, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// --- +// +// Author: Lei Zhang, Sasha Levitskiy +// +// This file is an internal atomic implementation, use base/atomicops.h instead. +// +// LinuxKernelCmpxchg and Barrier_AtomicIncrement are from Google Gears. + +#ifndef BASE_ATOMICOPS_INTERNALS_ARM_GENERIC_H_ +#define BASE_ATOMICOPS_INTERNALS_ARM_GENERIC_H_ + +#include <stdio.h> +#include <stdlib.h> +#include "base/macros.h" // For COMPILE_ASSERT +#include "base/port.h" // ATTRIBUTE_WEAK + +typedef int32_t Atomic32; + +namespace base { +namespace subtle { + +typedef int64_t Atomic64; + +// 0xffff0fc0 is the hard coded address of a function provided by +// the kernel which implements an atomic compare-exchange. On older +// ARM architecture revisions (pre-v6) this may be implemented using +// a syscall. This address is stable, and in active use (hard coded) +// by at least glibc-2.7 and the Android C library. +// pLinuxKernelCmpxchg has both acquire and release barrier sematincs. +typedef Atomic32 (*LinuxKernelCmpxchgFunc)(Atomic32 old_value, + Atomic32 new_value, + volatile Atomic32* ptr); +LinuxKernelCmpxchgFunc pLinuxKernelCmpxchg ATTRIBUTE_WEAK = + (LinuxKernelCmpxchgFunc) 0xffff0fc0; + +typedef void (*LinuxKernelMemoryBarrierFunc)(void); +LinuxKernelMemoryBarrierFunc pLinuxKernelMemoryBarrier ATTRIBUTE_WEAK = + (LinuxKernelMemoryBarrierFunc) 0xffff0fa0; + + +inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr, + Atomic32 old_value, + Atomic32 new_value) { + Atomic32 prev_value = *ptr; + do { + if (!pLinuxKernelCmpxchg(old_value, new_value, + const_cast<Atomic32*>(ptr))) { + return old_value; + } + prev_value = *ptr; + } while (prev_value == old_value); + return prev_value; +} + +inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr, + Atomic32 new_value) { + Atomic32 old_value; + do { + old_value = *ptr; + } while (pLinuxKernelCmpxchg(old_value, new_value, + const_cast<Atomic32*>(ptr))); + return old_value; +} + +inline Atomic32 Barrier_AtomicIncrement(volatile Atomic32* ptr, + Atomic32 increment) { + for (;;) { + // Atomic exchange the old value with an incremented one. + Atomic32 old_value = *ptr; + Atomic32 new_value = old_value + increment; + if (pLinuxKernelCmpxchg(old_value, new_value, + const_cast<Atomic32*>(ptr)) == 0) { + // The exchange took place as expected. + return new_value; + } + // Otherwise, *ptr changed mid-loop and we need to retry. + } +} + +inline Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32* ptr, + Atomic32 increment) { + return Barrier_AtomicIncrement(ptr, increment); +} + +inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr, + Atomic32 old_value, + Atomic32 new_value) { + return NoBarrier_CompareAndSwap(ptr, old_value, new_value); +} + +inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr, + Atomic32 old_value, + Atomic32 new_value) { + return NoBarrier_CompareAndSwap(ptr, old_value, new_value); +} + +inline void NoBarrier_Store(volatile Atomic32* ptr, Atomic32 value) { + *ptr = value; +} + +inline void MemoryBarrier() { + pLinuxKernelMemoryBarrier(); +} + +inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) { + *ptr = value; + MemoryBarrier(); +} + +inline void Release_Store(volatile Atomic32* ptr, Atomic32 value) { + MemoryBarrier(); + *ptr = value; +} + +inline Atomic32 NoBarrier_Load(volatile const Atomic32* ptr) { + return *ptr; +} + +inline Atomic32 Acquire_Load(volatile const Atomic32* ptr) { + Atomic32 value = *ptr; + MemoryBarrier(); + return value; +} + +inline Atomic32 Release_Load(volatile const Atomic32* ptr) { + MemoryBarrier(); + return *ptr; +} + + +// 64-bit versions are not implemented yet. + +inline void NotImplementedFatalError(const char *function_name) { + fprintf(stderr, "64-bit %s() not implemented on this platform\n", + function_name); + abort(); +} + +inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64* ptr, + Atomic64 old_value, + Atomic64 new_value) { + NotImplementedFatalError("NoBarrier_CompareAndSwap"); + return 0; +} + +inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr, + Atomic64 new_value) { + NotImplementedFatalError("NoBarrier_AtomicExchange"); + return 0; +} + +inline Atomic64 NoBarrier_AtomicIncrement(volatile Atomic64* ptr, + Atomic64 increment) { + NotImplementedFatalError("NoBarrier_AtomicIncrement"); + return 0; +} + +inline Atomic64 Barrier_AtomicIncrement(volatile Atomic64* ptr, + Atomic64 increment) { + NotImplementedFatalError("Barrier_AtomicIncrement"); + return 0; +} + +inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) { + NotImplementedFatalError("NoBarrier_Store"); +} + +inline void Acquire_Store(volatile Atomic64* ptr, Atomic64 value) { + NotImplementedFatalError("Acquire_Store64"); +} + +inline void Release_Store(volatile Atomic64* ptr, Atomic64 value) { + NotImplementedFatalError("Release_Store"); +} + +inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) { + NotImplementedFatalError("NoBarrier_Load"); + return 0; +} + +inline Atomic64 Acquire_Load(volatile const Atomic64* ptr) { + NotImplementedFatalError("Atomic64 Acquire_Load"); + return 0; +} + +inline Atomic64 Release_Load(volatile const Atomic64* ptr) { + NotImplementedFatalError("Atomic64 Release_Load"); + return 0; +} + +inline Atomic64 Acquire_CompareAndSwap(volatile Atomic64* ptr, + Atomic64 old_value, + Atomic64 new_value) { + NotImplementedFatalError("Atomic64 Acquire_CompareAndSwap"); + return 0; +} + +inline Atomic64 Release_CompareAndSwap(volatile Atomic64* ptr, + Atomic64 old_value, + Atomic64 new_value) { + NotImplementedFatalError("Atomic64 Release_CompareAndSwap"); + return 0; +} + +} // namespace base::subtle +} // namespace base + +#endif // BASE_ATOMICOPS_INTERNALS_ARM_GENERIC_H_ diff --git a/third_party/tcmalloc/chromium/src/base/atomicops-internals-arm-v6plus.h b/third_party/tcmalloc/chromium/src/base/atomicops-internals-arm-v6plus.h new file mode 100644 index 0000000..ee09f32 --- /dev/null +++ b/third_party/tcmalloc/chromium/src/base/atomicops-internals-arm-v6plus.h @@ -0,0 +1,244 @@ +// Copyright (c) 2011, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// --- +// +// Author: Sasha Levitskiy +// based on atomicops-internals by Sanjay Ghemawat +// +// This file is an internal atomic implementation, use base/atomicops.h instead. +// +// This code implements ARM atomics for architectures V6 and newer. + +#ifndef BASE_ATOMICOPS_INTERNALS_ARM_V6PLUS_H_ +#define BASE_ATOMICOPS_INTERNALS_ARM_V6PLUS_H_ + +#include <stdio.h> +#include <stdlib.h> +#include "base/basictypes.h" // For COMPILE_ASSERT + +typedef int32_t Atomic32; + +namespace base { +namespace subtle { + +typedef int64_t Atomic64; + +// 32-bit low-level ops + +inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr, + Atomic32 old_value, + Atomic32 new_value) { + Atomic32 oldval, res; + do { + __asm__ __volatile__( + "ldrex %1, [%3]\n" + "mov %0, #0\n" + "teq %1, %4\n" + "strexeq %0, %5, [%3]\n" + : "=&r" (res), "=&r" (oldval), "+Qo" (*ptr) + : "r" (ptr), "Ir" (old_value), "r" (new_value) + : "cc"); + } while (res); + return oldval; +} + +inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr, + Atomic32 new_value) { + Atomic32 tmp, old; + __asm__ __volatile__( + "1:\n" + "ldrex %1, [%2]\n" + "strex %0, %3, [%2]\n" + "teq %0, #0\n" + "bne 1b" + : "=&r" (tmp), "=&r" (old) + : "r" (ptr), "r" (new_value) + : "cc", "memory"); + return old; +} + +inline Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32* ptr, + Atomic32 increment) { + Atomic32 tmp, res; + __asm__ __volatile__( + "1:\n" + "ldrex %1, [%2]\n" + "add %1, %1, %3\n" + "strex %0, %1, [%2]\n" + "teq %0, #0\n" + "bne 1b" + : "=&r" (tmp), "=&r"(res) + : "r" (ptr), "r"(increment) + : "cc", "memory"); + return res; +} + +inline void MemoryBarrier() { + __asm__ __volatile__("dmb" : : : "memory"); +} + +inline Atomic32 Barrier_AtomicIncrement(volatile Atomic32* ptr, + Atomic32 increment) { + Atomic32 tmp, res; + __asm__ __volatile__( + "1:\n" + "ldrex %1, [%2]\n" + "add %1, %1, %3\n" + "dmb\n" + "strex %0, %1, [%2]\n" + "teq %0, #0\n" + "bne 1b" + : "=&r" (tmp), "=&r"(res) + : "r" (ptr), "r"(increment) + : "cc", "memory"); + return res; +} + +inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr, + Atomic32 old_value, + Atomic32 new_value) { + Atomic32 value = NoBarrier_CompareAndSwap(ptr, old_value, new_value); + MemoryBarrier(); + return value; +} + +inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr, + Atomic32 old_value, + Atomic32 new_value) { + MemoryBarrier(); + return NoBarrier_CompareAndSwap(ptr, old_value, new_value); +} + +inline void NoBarrier_Store(volatile Atomic32* ptr, Atomic32 value) { + *ptr = value; +} + +inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) { + *ptr = value; + MemoryBarrier(); +} + +inline void Release_Store(volatile Atomic32* ptr, Atomic32 value) { + MemoryBarrier(); + *ptr = value; +} + +inline Atomic32 NoBarrier_Load(volatile const Atomic32* ptr) { + return *ptr; +} + +inline Atomic32 Acquire_Load(volatile const Atomic32* ptr) { + Atomic32 value = *ptr; + MemoryBarrier(); + return value; +} + +inline Atomic32 Release_Load(volatile const Atomic32* ptr) { + MemoryBarrier(); + return *ptr; +} + +// 64-bit versions are not implemented yet. + +inline void NotImplementedFatalError(const char *function_name) { + fprintf(stderr, "64-bit %s() not implemented on this platform\n", + function_name); + abort(); +} + +inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64* ptr, + Atomic64 old_value, + Atomic64 new_value) { + NotImplementedFatalError("NoBarrier_CompareAndSwap"); + return 0; +} + +inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr, + Atomic64 new_value) { + NotImplementedFatalError("NoBarrier_AtomicExchange"); + return 0; +} + +inline Atomic64 NoBarrier_AtomicIncrement(volatile Atomic64* ptr, + Atomic64 increment) { + NotImplementedFatalError("NoBarrier_AtomicIncrement"); + return 0; +} + +inline Atomic64 Barrier_AtomicIncrement(volatile Atomic64* ptr, + Atomic64 increment) { + NotImplementedFatalError("Barrier_AtomicIncrement"); + return 0; +} + +inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) { + NotImplementedFatalError("NoBarrier_Store"); +} + +inline void Acquire_Store(volatile Atomic64* ptr, Atomic64 value) { + NotImplementedFatalError("Acquire_Store64"); +} + +inline void Release_Store(volatile Atomic64* ptr, Atomic64 value) { + NotImplementedFatalError("Release_Store"); +} + +inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) { + NotImplementedFatalError("NoBarrier_Load"); + return 0; +} + +inline Atomic64 Acquire_Load(volatile const Atomic64* ptr) { + NotImplementedFatalError("Atomic64 Acquire_Load"); + return 0; +} + +inline Atomic64 Release_Load(volatile const Atomic64* ptr) { + NotImplementedFatalError("Atomic64 Release_Load"); + return 0; +} + +inline Atomic64 Acquire_CompareAndSwap(volatile Atomic64* ptr, + Atomic64 old_value, + Atomic64 new_value) { + NotImplementedFatalError("Atomic64 Acquire_CompareAndSwap"); + return 0; +} + +inline Atomic64 Release_CompareAndSwap(volatile Atomic64* ptr, + Atomic64 old_value, + Atomic64 new_value) { + NotImplementedFatalError("Atomic64 Release_CompareAndSwap"); + return 0; +} + +} // namespace subtle ends +} // namespace base ends + +#endif // BASE_ATOMICOPS_INTERNALS_ARM_V6PLUS_H_ diff --git a/third_party/tcmalloc/chromium/src/base/atomicops-internals-x86-msvc.h b/third_party/tcmalloc/chromium/src/base/atomicops-internals-windows.h index d50894c..58782a17 100644 --- a/third_party/tcmalloc/chromium/src/base/atomicops-internals-x86-msvc.h +++ b/third_party/tcmalloc/chromium/src/base/atomicops-internals-windows.h @@ -31,12 +31,12 @@ * Author: Sanjay Ghemawat */ -// Implementation of atomic operations for x86. This file should not -// be included directly. Clients should instead include -// "base/atomicops.h". +// Implementation of atomic operations using Windows API +// functions. This file should not be included directly. Clients +// should instead include "base/atomicops.h". -#ifndef BASE_ATOMICOPS_INTERNALS_X86_MSVC_H_ -#define BASE_ATOMICOPS_INTERNALS_X86_MSVC_H_ +#ifndef BASE_ATOMICOPS_INTERNALS_WINDOWS_H_ +#define BASE_ATOMICOPS_INTERNALS_WINDOWS_H_ #include <stdio.h> #include <stdlib.h> @@ -257,8 +257,8 @@ inline Atomic64 Release_Load(volatile const Atomic64* ptr) { // 64-bit low-level operations on 32-bit platform -// TBD(vchen): The GNU assembly below must be converted to MSVC inline -// assembly. +// TODO(vchen): The GNU assembly below must be converted to MSVC inline +// assembly. Then the file should be renamed to ...-x86-mscv.h, probably. inline void NotImplementedFatalError(const char *function_name) { fprintf(stderr, "64-bit %s() not implemented on this platform\n", @@ -411,4 +411,4 @@ inline Atomic64 Release_CompareAndSwap(volatile Atomic64* ptr, } // namespace base::subtle } // namespace base -#endif // BASE_ATOMICOPS_INTERNALS_X86_MSVC_H_ +#endif // BASE_ATOMICOPS_INTERNALS_WINDOWS_H_ diff --git a/third_party/tcmalloc/chromium/src/base/atomicops.h b/third_party/tcmalloc/chromium/src/base/atomicops.h index 0f3d3ef..17e8a27 100644 --- a/third_party/tcmalloc/chromium/src/base/atomicops.h +++ b/third_party/tcmalloc/chromium/src/base/atomicops.h @@ -86,14 +86,18 @@ // TODO(csilvers): figure out ARCH_PIII/ARCH_K8 (perhaps via ./configure?) // ------------------------------------------------------------------------ +#include "base/arm_instruction_set_select.h" + // TODO(csilvers): match piii, not just __i386. Also, match k8 #if defined(__MACH__) && defined(__APPLE__) #include "base/atomicops-internals-macosx.h" -#elif defined(_MSC_VER) && defined(_M_IX86) -#include "base/atomicops-internals-x86-msvc.h" -#elif defined(__MINGW32__) && defined(__i386__) -#include "base/atomicops-internals-x86-msvc.h" -#elif defined(__GNUC__) && (defined(__i386) || defined(ARCH_K8)) +#elif defined(__GNUC__) && defined(ARMV6) +#include "base/atomicops-internals-arm-v6plus.h" +#elif defined(ARMV3) +#include "base/atomicops-internals-arm-generic.h" +#elif defined(_WIN32) +#include "base/atomicops-internals-windows.h" +#elif defined(__GNUC__) && (defined(__i386) || defined(__x86_64__)) #include "base/atomicops-internals-x86.h" #elif defined(__linux__) && defined(__PPC__) #include "base/atomicops-internals-linuxppc.h" diff --git a/third_party/tcmalloc/chromium/src/base/basictypes.h b/third_party/tcmalloc/chromium/src/base/basictypes.h index ab9cdabc..0f21fca 100644 --- a/third_party/tcmalloc/chromium/src/base/basictypes.h +++ b/third_party/tcmalloc/chromium/src/base/basictypes.h @@ -109,7 +109,7 @@ const int64 kint64min = ( ((( int64) kint32min) << 32) | 0 ); // Also allow for printing of a pthread_t. #define GPRIuPTHREAD "lu" #define GPRIxPTHREAD "lx" -#if defined(__CYGWIN__) || defined(__CYGWIN32__) || defined(__APPLE__) +#if defined(__CYGWIN__) || defined(__CYGWIN32__) || defined(__APPLE__) || defined(__FreeBSD__) #define PRINTABLE_PTHREAD(pthreadt) reinterpret_cast<uintptr_t>(pthreadt) #else #define PRINTABLE_PTHREAD(pthreadt) pthreadt diff --git a/third_party/tcmalloc/chromium/src/base/cycleclock.h b/third_party/tcmalloc/chromium/src/base/cycleclock.h index 8af664ed..6d6822a 100644 --- a/third_party/tcmalloc/chromium/src/base/cycleclock.h +++ b/third_party/tcmalloc/chromium/src/base/cycleclock.h @@ -46,33 +46,45 @@ #define GOOGLE_BASE_CYCLECLOCK_H_ #include "base/basictypes.h" // make sure we get the def for int64 +#include "base/arm_instruction_set_select.h" #if defined(__MACH__) && defined(__APPLE__) -#include <mach/mach_time.h> +# include <mach/mach_time.h> +#endif +// For MSVC, we want the __rdtsc intrinsic, declared in <intrin.h>. +// Unfortunately, in some environments, <windows.h> and <intrin.h> have +// conflicting declarations of some other intrinsics, breaking compilation. +// Therefore, we simply declare __rdtsc ourselves. See also +// http://connect.microsoft.com/VisualStudio/feedback/details/262047 +#if defined(_MSC_VER) +extern "C" uint64 __rdtsc(); +#pragma intrinsic(__rdtsc) +#endif +#ifdef HAVE_SYS_TIME_H +#include <sys/time.h> #endif // NOTE: only i386 and x86_64 have been well tested. // PPC, sparc, alpha, and ia64 are based on // http://peter.kuscsik.com/wordpress/?p=14 -// with modifications by m3b. cf +// with modifications by m3b. See also // https://setisvn.ssl.berkeley.edu/svn/lib/fftw-3.0.1/kernel/cycle.h struct CycleClock { // This should return the number of cycles since power-on. Thread-safe. static inline int64 Now() { #if defined(__MACH__) && defined(__APPLE__) - // this goes at the top because we need ALL Macs, regardless - // of architecture, to return the number of "mach time units" - // that have passes since startup. See sysinfo.cc where - // InitializeSystemInfo() sets the supposed cpu clock frequency of macs - // to the number of mach time units per second, not actual + // this goes at the top because we need ALL Macs, regardless of + // architecture, to return the number of "mach time units" that + // have passed since startup. See sysinfo.cc where + // InitializeSystemInfo() sets the supposed cpu clock frequency of + // macs to the number of mach time units per second, not actual // CPU clock frequency (which can change in the face of CPU - // frequency scaling). also note that when the Mac sleeps, - // this counter pauses; it does not continue counting, nor resets - // to zero. + // frequency scaling). Also note that when the Mac sleeps, this + // counter pauses; it does not continue counting, nor does it + // reset to zero. return mach_absolute_time(); #elif defined(__i386__) int64 ret; - __asm__ volatile ("rdtsc" - : "=A" (ret) ); + __asm__ volatile ("rdtsc" : "=A" (ret) ); return ret; #elif defined(__x86_64__) || defined(__amd64__) uint64 low, high; @@ -82,7 +94,7 @@ struct CycleClock { // This returns a time-base, which is not always precisely a cycle-count. int64 tbl, tbu0, tbu1; asm("mftbu %0" : "=r" (tbu0)); - asm("mftb %0" : "=r" (tbl )); + asm("mftb %0" : "=r" (tbl)); asm("mftbu %0" : "=r" (tbu1)); tbl &= -static_cast<int64>(tbu0 == tbu1); // high 32 bits in tbu1; low 32 bits in tbl (tbu0 is garbage) @@ -96,11 +108,31 @@ struct CycleClock { int64 itc; asm("mov %0 = ar.itc" : "=r" (itc)); return itc; -#elif defined(_MSC_VER) && defined(_M_IX86) - _asm rdtsc +#elif defined(_MSC_VER) + return __rdtsc(); +#elif defined(ARMV3) +#if defined(ARMV6) // V6 is the earliest arch that has a standard cyclecount + uint32 pmccntr; + uint32 pmuseren; + uint32 pmcntenset; + // Read the user mode perf monitor counter access permissions. + asm("mrc p15, 0, %0, c9, c14, 0" : "=r" (pmuseren)); + if (pmuseren & 1) { // Allows reading perfmon counters for user mode code. + asm("mrc p15, 0, %0, c9, c12, 1" : "=r" (pmcntenset)); + if (pmcntenset & 0x80000000ul) { // Is it counting? + asm("mrc p15, 0, %0, c9, c13, 0" : "=r" (pmccntr)); + // The counter is set up to count every 64th cycle + return static_cast<int64>(pmccntr) * 64; // Should optimize to << 6 + } + } +#endif + struct timeval tv; + gettimeofday(&tv, NULL); + return static_cast<int64>(tv.tv_sec) * 1000000 + tv.tv_usec; #else - // We could define __alpha here as well, but it only has a 32-bit - // timer (good for like 4 seconds), which isn't very useful. +// The soft failover to a generic implementation is automatic only for ARM. +// For other platforms the developer is expected to make an attempt to create +// a fast implementation and use generic version if nothing better is available. #error You need to define CycleTimer for your O/S and CPU #endif } diff --git a/third_party/tcmalloc/chromium/src/base/dynamic_annotations.c b/third_party/tcmalloc/chromium/src/base/dynamic_annotations.c new file mode 100644 index 0000000..1005f90 --- /dev/null +++ b/third_party/tcmalloc/chromium/src/base/dynamic_annotations.c @@ -0,0 +1,176 @@ +/* Copyright (c) 2008-2009, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * Author: Kostya Serebryany + */ + +#ifdef __cplusplus +# error "This file should be built as pure C to avoid name mangling" +#endif + +#include "config.h" +#include <stdlib.h> +#include <string.h> + +#include "base/dynamic_annotations.h" + +#ifdef __GNUC__ +/* valgrind.h uses gcc extensions so it won't build with other compilers */ +# ifdef HAVE_VALGRIND_H /* prefer the user's copy if they have it */ +# include <valgrind.h> +# else /* otherwise just use the copy that we have */ +# include "third_party/valgrind.h" +# endif +#endif + +/* Each function is empty and called (via a macro) only in debug mode. + The arguments are captured by dynamic tools at runtime. */ + +#if DYNAMIC_ANNOTATIONS_ENABLED == 1 + +void AnnotateRWLockCreate(const char *file, int line, + const volatile void *lock){} +void AnnotateRWLockDestroy(const char *file, int line, + const volatile void *lock){} +void AnnotateRWLockAcquired(const char *file, int line, + const volatile void *lock, long is_w){} +void AnnotateRWLockReleased(const char *file, int line, + const volatile void *lock, long is_w){} +void AnnotateBarrierInit(const char *file, int line, + const volatile void *barrier, long count, + long reinitialization_allowed) {} +void AnnotateBarrierWaitBefore(const char *file, int line, + const volatile void *barrier) {} +void AnnotateBarrierWaitAfter(const char *file, int line, + const volatile void *barrier) {} +void AnnotateBarrierDestroy(const char *file, int line, + const volatile void *barrier) {} + +void AnnotateCondVarWait(const char *file, int line, + const volatile void *cv, + const volatile void *lock){} +void AnnotateCondVarSignal(const char *file, int line, + const volatile void *cv){} +void AnnotateCondVarSignalAll(const char *file, int line, + const volatile void *cv){} +void AnnotatePublishMemoryRange(const char *file, int line, + const volatile void *address, + long size){} +void AnnotateUnpublishMemoryRange(const char *file, int line, + const volatile void *address, + long size){} +void AnnotatePCQCreate(const char *file, int line, + const volatile void *pcq){} +void AnnotatePCQDestroy(const char *file, int line, + const volatile void *pcq){} +void AnnotatePCQPut(const char *file, int line, + const volatile void *pcq){} +void AnnotatePCQGet(const char *file, int line, + const volatile void *pcq){} +void AnnotateNewMemory(const char *file, int line, + const volatile void *mem, + long size){} +void AnnotateExpectRace(const char *file, int line, + const volatile void *mem, + const char *description){} +void AnnotateBenignRace(const char *file, int line, + const volatile void *mem, + const char *description){} +void AnnotateBenignRaceSized(const char *file, int line, + const volatile void *mem, + long size, + const char *description) {} +void AnnotateMutexIsUsedAsCondVar(const char *file, int line, + const volatile void *mu){} +void AnnotateTraceMemory(const char *file, int line, + const volatile void *arg){} +void AnnotateThreadName(const char *file, int line, + const char *name){} +void AnnotateIgnoreReadsBegin(const char *file, int line){} +void AnnotateIgnoreReadsEnd(const char *file, int line){} +void AnnotateIgnoreWritesBegin(const char *file, int line){} +void AnnotateIgnoreWritesEnd(const char *file, int line){} +void AnnotateEnableRaceDetection(const char *file, int line, int enable){} +void AnnotateNoOp(const char *file, int line, + const volatile void *arg){} +void AnnotateFlushState(const char *file, int line){} + +#endif /* DYNAMIC_ANNOTATIONS_ENABLED == 1 */ + +static int GetRunningOnValgrind(void) { +#ifdef RUNNING_ON_VALGRIND + if (RUNNING_ON_VALGRIND) return 1; +#endif +#ifdef _MSC_VER + /* Visual Studio can complain about getenv, so use a windows equivalent. */ + char value[100] = "1"; /* something that is not "0" */ + int res = GetEnvironmentVariableA("RUNNING_ON_VALGRIND", + value, sizeof(value)); + /* value will remain "1" if the called failed for some reason. */ + return (res > 0 && strcmp(value, "0") != 0); +#else + /* TODO(csilvers): use GetenvBeforeMain() instead? Will need to + * change it to be extern "C". + */ + char *running_on_valgrind_str = getenv("RUNNING_ON_VALGRIND"); + if (running_on_valgrind_str) { + return strcmp(running_on_valgrind_str, "0") != 0; + } + return 0; +#endif +} + +/* See the comments in dynamic_annotations.h */ +int RunningOnValgrind(void) { + static volatile int running_on_valgrind = -1; + int local_running_on_valgrind = running_on_valgrind; + /* C doesn't have thread-safe initialization of statics, and we + don't want to depend on pthread_once here, so hack it. */ + ANNOTATE_BENIGN_RACE(&running_on_valgrind, "safe hack"); + if (local_running_on_valgrind == -1) + running_on_valgrind = local_running_on_valgrind = GetRunningOnValgrind(); + return local_running_on_valgrind; +} + +/* See the comments in dynamic_annotations.h */ +double ValgrindSlowdown(void) { + /* Same initialization hack as in RunningOnValgrind(). */ + static volatile double slowdown = 0.0; + double local_slowdown = slowdown; + ANNOTATE_BENIGN_RACE(&slowdown, "safe hack"); + if (RunningOnValgrind() == 0) { + return 1.0; + } + if (local_slowdown == 0.0) { + char *env = getenv("VALGRIND_SLOWDOWN"); + slowdown = local_slowdown = env ? atof(env) : 50.0; + } + return local_slowdown; +} diff --git a/third_party/tcmalloc/chromium/src/base/dynamic_annotations.cc b/third_party/tcmalloc/chromium/src/base/dynamic_annotations.cc deleted file mode 100644 index e69de29..0000000 --- a/third_party/tcmalloc/chromium/src/base/dynamic_annotations.cc +++ /dev/null diff --git a/third_party/tcmalloc/chromium/src/base/dynamic_annotations.h b/third_party/tcmalloc/chromium/src/base/dynamic_annotations.h index 3980b24..811bb5e 100644 --- a/third_party/tcmalloc/chromium/src/base/dynamic_annotations.h +++ b/third_party/tcmalloc/chromium/src/base/dynamic_annotations.h @@ -246,6 +246,12 @@ ANNOTATE_IGNORE_READS_END();\ }while(0)\ + /* Enable (enable!=0) or disable (enable==0) race detection for all threads. + This annotation could be useful if you want to skip expensive race analysis + during some period of program execution, e.g. during initialization. */ + #define ANNOTATE_ENABLE_RACE_DETECTION(enable) \ + AnnotateEnableRaceDetection(__FILE__, __LINE__, enable) + /* ------------------------------------------------------------- Annotations useful for debugging. */ @@ -358,11 +364,47 @@ #define ANNOTATE_IGNORE_WRITES_END() /* empty */ #define ANNOTATE_IGNORE_READS_AND_WRITES_BEGIN() /* empty */ #define ANNOTATE_IGNORE_READS_AND_WRITES_END() /* empty */ + #define ANNOTATE_ENABLE_RACE_DETECTION(enable) /* empty */ #define ANNOTATE_NO_OP(arg) /* empty */ #define ANNOTATE_FLUSH_STATE() /* empty */ #endif /* DYNAMIC_ANNOTATIONS_ENABLED */ +/* Macro definitions for GCC attributes that allow static thread safety + analysis to recognize and use some of the dynamic annotations as + escape hatches. + TODO(lcwu): remove the check for __SUPPORT_DYN_ANNOTATION__ once the + default crosstool/GCC supports these GCC attributes. */ + +#define ANNOTALYSIS_STATIC_INLINE +#define ANNOTALYSIS_SEMICOLON_OR_EMPTY_BODY ; + +#if defined(__GNUC__) && defined(__SUPPORT_TS_ANNOTATION__) \ + && (!defined(SWIG)) && defined(__SUPPORT_DYN_ANNOTATION__) + +#if DYNAMIC_ANNOTATIONS_ENABLED == 0 +#define ANNOTALYSIS_ONLY 1 +#undef ANNOTALYSIS_STATIC_INLINE +#define ANNOTALYSIS_STATIC_INLINE static inline +#undef ANNOTALYSIS_SEMICOLON_OR_EMPTY_BODY +#define ANNOTALYSIS_SEMICOLON_OR_EMPTY_BODY { (void)file; (void)line; } +#endif +#define ANNOTALYSIS_IGNORE_READS_BEGIN __attribute__ ((ignore_reads_begin)) +#define ANNOTALYSIS_IGNORE_READS_END __attribute__ ((ignore_reads_end)) +#define ANNOTALYSIS_IGNORE_WRITES_BEGIN __attribute__ ((ignore_writes_begin)) +#define ANNOTALYSIS_IGNORE_WRITES_END __attribute__ ((ignore_writes_end)) +#define ANNOTALYSIS_UNPROTECTED_READ __attribute__ ((unprotected_read)) + +#else + +#define ANNOTALYSIS_IGNORE_READS_BEGIN +#define ANNOTALYSIS_IGNORE_READS_END +#define ANNOTALYSIS_IGNORE_WRITES_BEGIN +#define ANNOTALYSIS_IGNORE_WRITES_END +#define ANNOTALYSIS_UNPROTECTED_READ + +#endif + /* Use the macros above rather than using these functions directly. */ #ifdef __cplusplus extern "C" { @@ -424,10 +466,19 @@ void AnnotateTraceMemory(const char *file, int line, const volatile void *arg); void AnnotateThreadName(const char *file, int line, const char *name); -void AnnotateIgnoreReadsBegin(const char *file, int line); -void AnnotateIgnoreReadsEnd(const char *file, int line); -void AnnotateIgnoreWritesBegin(const char *file, int line); -void AnnotateIgnoreWritesEnd(const char *file, int line); +ANNOTALYSIS_STATIC_INLINE +void AnnotateIgnoreReadsBegin(const char *file, int line) + ANNOTALYSIS_IGNORE_READS_BEGIN ANNOTALYSIS_SEMICOLON_OR_EMPTY_BODY +ANNOTALYSIS_STATIC_INLINE +void AnnotateIgnoreReadsEnd(const char *file, int line) + ANNOTALYSIS_IGNORE_READS_END ANNOTALYSIS_SEMICOLON_OR_EMPTY_BODY +ANNOTALYSIS_STATIC_INLINE +void AnnotateIgnoreWritesBegin(const char *file, int line) + ANNOTALYSIS_IGNORE_WRITES_BEGIN ANNOTALYSIS_SEMICOLON_OR_EMPTY_BODY +ANNOTALYSIS_STATIC_INLINE +void AnnotateIgnoreWritesEnd(const char *file, int line) + ANNOTALYSIS_IGNORE_WRITES_END ANNOTALYSIS_SEMICOLON_OR_EMPTY_BODY +void AnnotateEnableRaceDetection(const char *file, int line, int enable); void AnnotateNoOp(const char *file, int line, const volatile void *arg); void AnnotateFlushState(const char *file, int line); @@ -449,6 +500,19 @@ void AnnotateFlushState(const char *file, int line); */ int RunningOnValgrind(void); +/* ValgrindSlowdown returns: + * 1.0, if (RunningOnValgrind() == 0) + * 50.0, if (RunningOnValgrind() != 0 && getenv("VALGRIND_SLOWDOWN") == NULL) + * atof(getenv("VALGRIND_SLOWDOWN")) otherwise + This function can be used to scale timeout values: + EXAMPLE: + for (;;) { + DoExpensiveBackgroundTask(); + SleepForSeconds(5 * ValgrindSlowdown()); + } + */ +double ValgrindSlowdown(void); + #ifdef __cplusplus } #endif @@ -464,7 +528,8 @@ int RunningOnValgrind(void); one can use ... = ANNOTATE_UNPROTECTED_READ(x); */ template <class T> - inline T ANNOTATE_UNPROTECTED_READ(const volatile T &x) { + inline T ANNOTATE_UNPROTECTED_READ(const volatile T &x) + ANNOTALYSIS_UNPROTECTED_READ { ANNOTATE_IGNORE_READS_BEGIN(); T res = x; ANNOTATE_IGNORE_READS_END(); @@ -490,4 +555,67 @@ int RunningOnValgrind(void); #endif /* DYNAMIC_ANNOTATIONS_ENABLED */ +/* Annotalysis, a GCC based static analyzer, is able to understand and use + some of the dynamic annotations defined in this file. However, dynamic + annotations are usually disabled in the opt mode (to avoid additional + runtime overheads) while Annotalysis only works in the opt mode. + In order for Annotalysis to use these dynamic annotations when they + are disabled, we re-define these annotations here. Note that unlike the + original macro definitions above, these macros are expanded to calls to + static inline functions so that the compiler will be able to remove the + calls after the analysis. */ + +#ifdef ANNOTALYSIS_ONLY + + #undef ANNOTALYSIS_ONLY + + /* Undefine and re-define the macros that the static analyzer understands. */ + #undef ANNOTATE_IGNORE_READS_BEGIN + #define ANNOTATE_IGNORE_READS_BEGIN() \ + AnnotateIgnoreReadsBegin(__FILE__, __LINE__) + + #undef ANNOTATE_IGNORE_READS_END + #define ANNOTATE_IGNORE_READS_END() \ + AnnotateIgnoreReadsEnd(__FILE__, __LINE__) + + #undef ANNOTATE_IGNORE_WRITES_BEGIN + #define ANNOTATE_IGNORE_WRITES_BEGIN() \ + AnnotateIgnoreWritesBegin(__FILE__, __LINE__) + + #undef ANNOTATE_IGNORE_WRITES_END + #define ANNOTATE_IGNORE_WRITES_END() \ + AnnotateIgnoreWritesEnd(__FILE__, __LINE__) + + #undef ANNOTATE_IGNORE_READS_AND_WRITES_BEGIN + #define ANNOTATE_IGNORE_READS_AND_WRITES_BEGIN() \ + do { \ + ANNOTATE_IGNORE_READS_BEGIN(); \ + ANNOTATE_IGNORE_WRITES_BEGIN(); \ + }while(0) \ + + #undef ANNOTATE_IGNORE_READS_AND_WRITES_END + #define ANNOTATE_IGNORE_READS_AND_WRITES_END() \ + do { \ + ANNOTATE_IGNORE_WRITES_END(); \ + ANNOTATE_IGNORE_READS_END(); \ + }while(0) \ + + #if defined(__cplusplus) + #undef ANNOTATE_UNPROTECTED_READ + template <class T> + inline T ANNOTATE_UNPROTECTED_READ(const volatile T &x) + __attribute__ ((unprotected_read)) { + ANNOTATE_IGNORE_READS_BEGIN(); + T res = x; + ANNOTATE_IGNORE_READS_END(); + return res; + } + #endif /* __cplusplus */ + +#endif /* ANNOTALYSIS_ONLY */ + +/* Undefine the macros intended only in this file. */ +#undef ANNOTALYSIS_STATIC_INLINE +#undef ANNOTALYSIS_SEMICOLON_OR_EMPTY_BODY + #endif /* BASE_DYNAMIC_ANNOTATIONS_H_ */ diff --git a/third_party/tcmalloc/chromium/src/base/linux_syscall_support.h b/third_party/tcmalloc/chromium/src/base/linux_syscall_support.h index 512805b..79beafa 100644 --- a/third_party/tcmalloc/chromium/src/base/linux_syscall_support.h +++ b/third_party/tcmalloc/chromium/src/base/linux_syscall_support.h @@ -2593,7 +2593,7 @@ struct kernel_statfs { LSS_INLINE _syscall6(void*, mmap2, void*, s, size_t, l, int, p, int, f, int, d, - __off64_t, o) + off_t, o) #endif LSS_INLINE _syscall3(int, _sigaction, int, s, const struct kernel_old_sigaction*, a, diff --git a/third_party/tcmalloc/chromium/src/base/logging.h b/third_party/tcmalloc/chromium/src/base/logging.h index 4d5e30c..b24a030 100644 --- a/third_party/tcmalloc/chromium/src/base/logging.h +++ b/third_party/tcmalloc/chromium/src/base/logging.h @@ -49,16 +49,26 @@ // On some systems (like freebsd), we can't call write() at all in a // global constructor, perhaps because errno hasn't been set up. +// (In windows, we can't call it because it might call malloc.) // Calling the write syscall is safer (it doesn't set errno), so we // prefer that. Note we don't care about errno for logging: we just // do logging on a best-effort basis. -#ifdef HAVE_SYS_SYSCALL_H +#if defined(_MSC_VER) +#define WRITE_TO_STDERR(buf, len) WriteToStderr(buf, len); // in port.cc +#elif defined(HAVE_SYS_SYSCALL_H) #include <sys/syscall.h> #define WRITE_TO_STDERR(buf, len) syscall(SYS_write, STDERR_FILENO, buf, len) #else #define WRITE_TO_STDERR(buf, len) write(STDERR_FILENO, buf, len) #endif +// MSVC and mingw define their own, safe version of vnsprintf (the +// windows one in broken) in port.cc. Everyone else can use the +// version here. We had to give it a unique name for windows. +#ifndef _WIN32 +# define perftools_vsnprintf vsnprintf +#endif + // We log all messages at this log-level and below. // INFO == -1, WARNING == -2, ERROR == -3, FATAL == -4 @@ -188,7 +198,7 @@ inline void LogPrintf(int severity, const char* pat, va_list ap) { // We write directly to the stderr file descriptor and avoid FILE // buffering because that may invoke malloc() char buf[1600]; - vsnprintf(buf, sizeof(buf)-1, pat, ap); + perftools_vsnprintf(buf, sizeof(buf)-1, pat, ap); if (buf[0] != '\0' && buf[strlen(buf)-1] != '\n') { assert(strlen(buf)+1 < sizeof(buf)); strcat(buf, "\n"); @@ -230,6 +240,9 @@ inline void LOG_IF(int lvl, bool cond, const char* pat, ...) { // Like other "raw" routines, these functions are best effort, and // thus don't return error codes (except RawOpenForWriting()). #if defined(_WIN32) || defined(__CYGWIN__) || defined(__CYGWIN32__) +#ifndef NOMINMAX +#define NOMINMAX // @#!$& windows +#endif #include <windows.h> typedef HANDLE RawFD; const RawFD kIllegalRawFD = INVALID_HANDLE_VALUE; diff --git a/third_party/tcmalloc/chromium/src/base/low_level_alloc.cc b/third_party/tcmalloc/chromium/src/base/low_level_alloc.cc index 7ca3953a..532c594 100644 --- a/third_party/tcmalloc/chromium/src/base/low_level_alloc.cc +++ b/third_party/tcmalloc/chromium/src/base/low_level_alloc.cc @@ -59,7 +59,9 @@ // --------------------------------------------------------------------------- static const int kMaxLevel = 30; -namespace { +// We put this class-only struct in a namespace to avoid polluting the +// global namespace with this struct name (thus risking an ODR violation). +namespace low_level_alloc_internal { // This struct describes one allocated block, or one free block. struct AllocList { struct Header { @@ -79,6 +81,8 @@ namespace { // LLA_SkiplistLevels() }; } +using low_level_alloc_internal::AllocList; + // --------------------------------------------------------------------------- // A trivial skiplist implementation. This is used to keep the freelist @@ -208,7 +212,7 @@ static const intptr_t kMagicAllocated = 0x4c833e95; static const intptr_t kMagicUnallocated = ~kMagicAllocated; namespace { - class ArenaLock { + class SCOPED_LOCKABLE ArenaLock { public: explicit ArenaLock(LowLevelAlloc::Arena *arena) EXCLUSIVE_LOCK_FUNCTION(arena->mu) @@ -229,7 +233,7 @@ namespace { this->arena_->mu.Lock(); } ~ArenaLock() { RAW_CHECK(this->left_, "haven't left Arena region"); } - void Leave() UNLOCK_FUNCTION(arena_->mu) { + void Leave() UNLOCK_FUNCTION() { this->arena_->mu.Unlock(); #if 0 if (this->mask_valid_) { diff --git a/third_party/tcmalloc/chromium/src/base/spinlock.cc b/third_party/tcmalloc/chromium/src/base/spinlock.cc index 48cdc89..1413923 100644 --- a/third_party/tcmalloc/chromium/src/base/spinlock.cc +++ b/third_party/tcmalloc/chromium/src/base/spinlock.cc @@ -32,47 +32,28 @@ */ #include <config.h> -#include <time.h> /* For nanosleep() */ -#ifdef HAVE_SCHED_H -#include <sched.h> /* For sched_yield() */ -#endif -#ifdef HAVE_UNISTD_H -#include <unistd.h> /* For read() */ -#endif -#include <fcntl.h> /* for open(), O_RDONLY */ -#include <string.h> /* for strncmp */ -#include <errno.h> #include "base/spinlock.h" +#include "base/synchronization_profiling.h" +#include "base/spinlock_internal.h" #include "base/cycleclock.h" #include "base/sysinfo.h" /* for NumCPUs() */ -// We can do contention-profiling of SpinLocks, but the code is in -// mutex.cc, which is not always linked in with spinlock. Hence we -// provide this weak definition, which is used if mutex.cc isn't linked in. -ATTRIBUTE_WEAK extern void SubmitSpinLockProfileData(const void *, int64); -void SubmitSpinLockProfileData(const void *, int64) {} +// NOTE on the Lock-state values: +// +// kSpinLockFree represents the unlocked state +// kSpinLockHeld represents the locked state with no waiters +// +// Values greater than kSpinLockHeld represent the locked state with waiters, +// where the value is the time the current lock holder had to +// wait before obtaining the lock. The kSpinLockSleeper state is a special +// "locked with waiters" state that indicates that a sleeper needs to +// be woken, but the thread that just released the lock didn't wait. static int adaptive_spin_count = 0; const base::LinkerInitialized SpinLock::LINKER_INITIALIZED = base::LINKER_INITIALIZED; -// The OS-specific header included below must provide two calls: -// Wait until *w becomes zero, atomically set it to 1 and return. -// static void SpinLockWait(volatile Atomic32 *w); -// -// Hint that a thread waiting in SpinLockWait() could now make progress. May -// do nothing. This call may not read or write *w; it must use only the -// address. -// static void SpinLockWake(volatile Atomic32 *w); -#if defined(_WIN32) -#include "base/spinlock_win32-inl.h" -#elif defined(__linux__) -#include "base/spinlock_linux-inl.h" -#else -#include "base/spinlock_posix-inl.h" -#endif - namespace { struct SpinLock_InitHelper { SpinLock_InitHelper() { @@ -91,36 +72,111 @@ static SpinLock_InitHelper init_helper; } // unnamed namespace +// Monitor the lock to see if its value changes within some time period +// (adaptive_spin_count loop iterations). A timestamp indicating +// when the thread initially started waiting for the lock is passed in via +// the initial_wait_timestamp value. The total wait time in cycles for the +// lock is returned in the wait_cycles parameter. The last value read +// from the lock is returned from the method. +Atomic32 SpinLock::SpinLoop(int64 initial_wait_timestamp, + Atomic32* wait_cycles) { + int c = adaptive_spin_count; + while (base::subtle::NoBarrier_Load(&lockword_) != kSpinLockFree && --c > 0) { + } + Atomic32 spin_loop_wait_cycles = CalculateWaitCycles(initial_wait_timestamp); + Atomic32 lock_value = + base::subtle::Acquire_CompareAndSwap(&lockword_, kSpinLockFree, + spin_loop_wait_cycles); + *wait_cycles = spin_loop_wait_cycles; + return lock_value; +} void SpinLock::SlowLock() { - int c = adaptive_spin_count; + // The lock was not obtained initially, so this thread needs to wait for + // it. Record the current timestamp in the local variable wait_start_time + // so the total wait time can be stored in the lockword once this thread + // obtains the lock. + int64 wait_start_time = CycleClock::Now(); + Atomic32 wait_cycles; + Atomic32 lock_value = SpinLoop(wait_start_time, &wait_cycles); - // Spin a few times in the hope that the lock holder releases the lock - while ((c > 0) && (lockword_ != 0)) { - c--; - } + int lock_wait_call_count = 0; + while (lock_value != kSpinLockFree) { + // If the lock is currently held, but not marked as having a sleeper, mark + // it as having a sleeper. + if (lock_value == kSpinLockHeld) { + // Here, just "mark" that the thread is going to sleep. Don't store the + // lock wait time in the lock as that will cause the current lock + // owner to think it experienced contention. + lock_value = base::subtle::Acquire_CompareAndSwap(&lockword_, + kSpinLockHeld, + kSpinLockSleeper); + if (lock_value == kSpinLockHeld) { + // Successfully transitioned to kSpinLockSleeper. Pass + // kSpinLockSleeper to the SpinLockWait routine to properly indicate + // the last lock_value observed. + lock_value = kSpinLockSleeper; + } else if (lock_value == kSpinLockFree) { + // Lock is free again, so try and aquire it before sleeping. The + // new lock state will be the number of cycles this thread waited if + // this thread obtains the lock. + lock_value = base::subtle::Acquire_CompareAndSwap(&lockword_, + kSpinLockFree, + wait_cycles); + continue; // skip the delay at the end of the loop + } + } - if (lockword_ == 1) { - int32 now = (CycleClock::Now() >> PROFILE_TIMESTAMP_SHIFT); - // Don't loose the lock: make absolutely sure "now" is not zero - now |= 1; - // Atomically replace the value of lockword_ with "now" if - // lockword_ is 1, thereby remembering the first timestamp to - // be recorded. - base::subtle::NoBarrier_CompareAndSwap(&lockword_, 1, now); - // base::subtle::NoBarrier_CompareAndSwap() returns: - // 0: the lock is/was available; nothing stored - // 1: our timestamp was stored - // > 1: an older timestamp is already in lockword_; nothing stored + // Wait for an OS specific delay. + base::internal::SpinLockDelay(&lockword_, lock_value, + ++lock_wait_call_count); + // Spin again after returning from the wait routine to give this thread + // some chance of obtaining the lock. + lock_value = SpinLoop(wait_start_time, &wait_cycles); } - - SpinLockWait(&lockword_); // wait until lock acquired; OS specific } -void SpinLock::SlowUnlock(int64 wait_timestamp) { - SpinLockWake(&lockword_); // wake waiter if necessary; OS specific +// The wait time for contentionz lock profiling must fit into 32 bits. +// However, the lower 32-bits of the cycle counter wrap around too quickly +// with high frequency processors, so a right-shift by 7 is performed to +// quickly divide the cycles by 128. Using these 32 bits, reduces the +// granularity of time measurement to 128 cycles, and loses track +// of wait time for waits greater than 109 seconds on a 5 GHz machine +// [(2^32 cycles/5 Ghz)*128 = 109.95 seconds]. Waits this long should be +// very rare and the reduced granularity should not be an issue given +// processors in the Google fleet operate at a minimum of one billion +// cycles/sec. +enum { PROFILE_TIMESTAMP_SHIFT = 7 }; + +void SpinLock::SlowUnlock(uint64 wait_cycles) { + base::internal::SpinLockWake(&lockword_, false); // wake waiter if necessary + + // Collect contentionz profile info, expanding the wait_cycles back out to + // the full value. If wait_cycles is <= kSpinLockSleeper, then no wait + // was actually performed, so don't record the wait time. Note, that the + // CalculateWaitCycles method adds in kSpinLockSleeper cycles + // unconditionally to guarantee the wait time is not kSpinLockFree or + // kSpinLockHeld. The adding in of these small number of cycles may + // overestimate the contention by a slight amount 50% of the time. However, + // if this code tried to correct for that addition by subtracting out the + // kSpinLockSleeper amount that would underestimate the contention slightly + // 50% of the time. Both ways get the wrong answer, so the code + // overestimates to be more conservative. Overestimating also makes the code + // a little simpler. + // + if (wait_cycles > kSpinLockSleeper) { + base::SubmitSpinLockProfileData(this, + wait_cycles << PROFILE_TIMESTAMP_SHIFT); + } +} - // Collect contentionz profile info. Subtract one from wait_timestamp as - // antidote to "now |= 1;" in SlowLock(). - SubmitSpinLockProfileData(this, wait_timestamp - 1); +inline int32 SpinLock::CalculateWaitCycles(int64 wait_start_time) { + int32 wait_cycles = ((CycleClock::Now() - wait_start_time) >> + PROFILE_TIMESTAMP_SHIFT); + // The number of cycles waiting for the lock is used as both the + // wait_cycles and lock value, so it can't be kSpinLockFree or + // kSpinLockHeld. Make sure the value returned is at least + // kSpinLockSleeper. + wait_cycles |= kSpinLockSleeper; + return wait_cycles; } diff --git a/third_party/tcmalloc/chromium/src/base/spinlock.h b/third_party/tcmalloc/chromium/src/base/spinlock.h index 9e633c4..c2be4fd 100644 --- a/third_party/tcmalloc/chromium/src/base/spinlock.h +++ b/third_party/tcmalloc/chromium/src/base/spinlock.h @@ -44,14 +44,14 @@ #define BASE_SPINLOCK_H_ #include <config.h> -#include "base/basictypes.h" #include "base/atomicops.h" +#include "base/basictypes.h" #include "base/dynamic_annotations.h" #include "base/thread_annotations.h" class LOCKABLE SpinLock { public: - SpinLock() : lockword_(0) { } + SpinLock() : lockword_(kSpinLockFree) { } // Special constructor for use with static SpinLock objects. E.g., // @@ -70,18 +70,21 @@ class LOCKABLE SpinLock { // TODO(csilvers): uncomment the annotation when we figure out how to // support this macro with 0 args (see thread_annotations.h) inline void Lock() /*EXCLUSIVE_LOCK_FUNCTION()*/ { - if (Acquire_CompareAndSwap(&lockword_, 0, 1) != 0) { + if (base::subtle::Acquire_CompareAndSwap(&lockword_, kSpinLockFree, + kSpinLockHeld) != kSpinLockFree) { SlowLock(); } ANNOTATE_RWLOCK_ACQUIRED(this, 1); } - // Acquire this SpinLock and return true if the acquisition can be - // done without blocking, else return false. If this SpinLock is - // free at the time of the call, TryLock will return true with high - // probability. + // Try to acquire this SpinLock without blocking and return true if the + // acquisition was successful. If the lock was not acquired, false is + // returned. If this SpinLock is free at the time of the call, TryLock + // will return true with high probability. inline bool TryLock() EXCLUSIVE_TRYLOCK_FUNCTION(true) { - bool res = (Acquire_CompareAndSwap(&lockword_, 0, 1) == 0); + bool res = + (base::subtle::Acquire_CompareAndSwap(&lockword_, kSpinLockFree, + kSpinLockHeld) == kSpinLockFree); if (res) { ANNOTATE_RWLOCK_ACQUIRED(this, 1); } @@ -92,47 +95,37 @@ class LOCKABLE SpinLock { // TODO(csilvers): uncomment the annotation when we figure out how to // support this macro with 0 args (see thread_annotations.h) inline void Unlock() /*UNLOCK_FUNCTION()*/ { - // This is defined in mutex.cc. - extern void SubmitSpinLockProfileData(const void *, int64); - - int64 wait_timestamp = static_cast<uint32>(lockword_); + uint64 wait_cycles = + static_cast<uint64>(base::subtle::NoBarrier_Load(&lockword_)); ANNOTATE_RWLOCK_RELEASED(this, 1); - Release_Store(&lockword_, 0); - if (wait_timestamp != 1) { + base::subtle::Release_Store(&lockword_, kSpinLockFree); + if (wait_cycles != kSpinLockHeld) { // Collect contentionz profile info, and speed the wakeup of any waiter. - // The lockword_ value indicates when the waiter started waiting. - SlowUnlock(wait_timestamp); + // The wait_cycles value indicates how long this thread spent waiting + // for the lock. + SlowUnlock(wait_cycles); } } - // Report if we think the lock can be held by this thread. - // When the lock is truly held by the invoking thread - // we will always return true. - // Indended to be used as CHECK(lock.IsHeld()); + // Determine if the lock is held. When the lock is held by the invoking + // thread, true will always be returned. Intended to be used as + // CHECK(lock.IsHeld()). inline bool IsHeld() const { - return lockword_ != 0; + return base::subtle::NoBarrier_Load(&lockword_) != kSpinLockFree; } - // The timestamp for contention lock profiling must fit into 31 bits. - // as lockword_ is 32 bits and we loose an additional low-order bit due - // to the statement "now |= 1" in SlowLock(). - // To select 31 bits from the 64-bit cycle counter, we shift right by - // PROFILE_TIMESTAMP_SHIFT = 7. - // Using these 31 bits, we reduce granularity of time measurement to - // 256 cycles, and will loose track of wait time for waits greater than - // 109 seconds on a 5 GHz machine, longer for faster clock cycles. - // Waits this long should be very rare. - enum { PROFILE_TIMESTAMP_SHIFT = 7 }; - static const base::LinkerInitialized LINKER_INITIALIZED; // backwards compat private: - // Lock-state: 0 means unlocked; 1 means locked with no waiters; values - // greater than 1 indicate locked with waiters, where the value is the time - // the first waiter started waiting and is used for contention profiling. + enum { kSpinLockFree = 0 }; + enum { kSpinLockHeld = 1 }; + enum { kSpinLockSleeper = 2 }; + volatile Atomic32 lockword_; void SlowLock(); - void SlowUnlock(int64 wait_timestamp); + void SlowUnlock(uint64 wait_cycles); + Atomic32 SpinLoop(int64 initial_wait_timestamp, Atomic32* wait_cycles); + inline int32 CalculateWaitCycles(int64 wait_start_time); DISALLOW_COPY_AND_ASSIGN(SpinLock); }; diff --git a/third_party/tcmalloc/chromium/src/base/spinlock_internal.cc b/third_party/tcmalloc/chromium/src/base/spinlock_internal.cc new file mode 100644 index 0000000..b5b6ca4 --- /dev/null +++ b/third_party/tcmalloc/chromium/src/base/spinlock_internal.cc @@ -0,0 +1,77 @@ +/* Copyright (c) 2010, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +// The OS-specific header included below must provide two calls: +// base::internal::SpinLockDelay() and base::internal::SpinLockWake(). +// See spinlock_internal.h for the spec of SpinLockWake(). + +// void SpinLockDelay(volatile Atomic32 *w, int32 value, int loop) +// SpinLockDelay() generates an apprproate spin delay on iteration "loop" of a +// spin loop on location *w, whose previously observed value was "value". +// SpinLockDelay() may do nothing, may yield the CPU, may sleep a clock tick, +// or may wait for a delay that can be truncated by a call to SpinlockWake(w). +// In all cases, it must return in bounded time even if SpinlockWake() is not +// called. + +#include "base/spinlock_internal.h" + +#if defined(_WIN32) +#include "base/spinlock_win32-inl.h" +#elif defined(__linux__) +#include "base/spinlock_linux-inl.h" +#else +#include "base/spinlock_posix-inl.h" +#endif + +namespace base { +namespace internal { + +// See spinlock_internal.h for spec. +int32 SpinLockWait(volatile Atomic32 *w, int n, + const SpinLockWaitTransition trans[]) { + int32 v; + bool done = false; + for (int loop = 0; !done; loop++) { + v = base::subtle::Acquire_Load(w); + int i; + for (i = 0; i != n && v != trans[i].from; i++) { + } + if (i == n) { + SpinLockDelay(w, v, loop); // no matching transition + } else if (trans[i].to == v || // null transition + base::subtle::Acquire_CompareAndSwap(w, v, trans[i].to) == v) { + done = trans[i].done; + } + } + return v; +} + +} // namespace internal +} // namespace base diff --git a/third_party/tcmalloc/chromium/src/base/spinlock_internal.h b/third_party/tcmalloc/chromium/src/base/spinlock_internal.h new file mode 100644 index 0000000..4494260 --- /dev/null +++ b/third_party/tcmalloc/chromium/src/base/spinlock_internal.h @@ -0,0 +1,64 @@ +/* Copyright (c) 2010, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * This file is an internal part spinlock.cc and once.cc + * It may not be used directly by code outside of //base. + */ + +#ifndef BASE_SPINLOCK_INTERNAL_H_ +#define BASE_SPINLOCK_INTERNAL_H_ + +#include <config.h> +#include "base/basictypes.h" +#include "base/atomicops.h" + +namespace base { +namespace internal { + +// SpinLockWait() waits until it can perform one of several transitions from +// "from" to "to". It returns when it performs a transition where done==true. +struct SpinLockWaitTransition { + int32 from; + int32 to; + bool done; +}; + +// Wait until *w can transition from trans[i].from to trans[i].to for some i +// satisfying 0<=i<n && trans[i].done, atomically make the transition, +// then return the old value of *w. Make any other atomic tranistions +// where !trans[i].done, but continue waiting. +int32 SpinLockWait(volatile Atomic32 *w, int n, + const SpinLockWaitTransition trans[]); +void SpinLockWake(volatile Atomic32 *w, bool all); +void SpinLockDelay(volatile Atomic32 *w, int32 value, int loop); + +} // namespace internal +} // namespace base +#endif diff --git a/third_party/tcmalloc/chromium/src/base/spinlock_linux-inl.h b/third_party/tcmalloc/chromium/src/base/spinlock_linux-inl.h index f7b4a41..dc2c6ba 100644 --- a/third_party/tcmalloc/chromium/src/base/spinlock_linux-inl.h +++ b/third_party/tcmalloc/chromium/src/base/spinlock_linux-inl.h @@ -28,11 +28,13 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * --- - * This file is a Linux-specific part of spinlock.cc + * This file is a Linux-specific part of spinlock_internal.cc */ #include <sched.h> #include <time.h> +#include <limits.h> +#include "base/linux_syscall_support.h" #define FUTEX_WAIT 0 #define FUTEX_WAKE 1 @@ -58,44 +60,54 @@ static struct InitModule { int x = 0; // futexes are ints, so we can use them only when // that's the same size as the lockword_ in SpinLock. +#ifdef __arm__ + // ARM linux doesn't support sys_futex1(void*, int, int, struct timespec*); + have_futex = 0; +#else have_futex = (sizeof (Atomic32) == sizeof (int) && syscall(__NR_futex, &x, FUTEX_WAKE, 1, 0) >= 0); +#endif if (have_futex && syscall(__NR_futex, &x, FUTEX_WAKE | futex_private_flag, 1, 0) < 0) { futex_private_flag = 0; } } } init_module; + } // anonymous namespace -static void SpinLockWait(volatile Atomic32 *w) { - int save_errno = errno; - struct timespec tm; - tm.tv_sec = 0; - if (have_futex) { - int value; - tm.tv_nsec = 1000000; // 1ms; really we're trying to sleep for one kernel - // clock tick - while ((value = base::subtle::Acquire_CompareAndSwap(w, 0, 1)) != 0) { - syscall(__NR_futex, reinterpret_cast<int *>(const_cast<Atomic32 *>(w)), - FUTEX_WAIT | futex_private_flag, - value, reinterpret_cast<struct kernel_timespec *>(&tm)); - } - } else { - tm.tv_nsec = 2000001; // above 2ms so linux 2.4 doesn't spin - if (base::subtle::NoBarrier_Load(w) != 0) { - sched_yield(); + +namespace base { +namespace internal { + +void SpinLockDelay(volatile Atomic32 *w, int32 value, int loop) { + if (loop != 0) { + int save_errno = errno; + struct timespec tm; + tm.tv_sec = 0; + if (have_futex) { + tm.tv_nsec = 1000000; // 1ms; really we're trying to sleep for one + // kernel clock tick + } else { + tm.tv_nsec = 2000001; // above 2ms so linux 2.4 doesn't spin } - while (base::subtle::Acquire_CompareAndSwap(w, 0, 1) != 0) { + if (have_futex) { + syscall(__NR_futex, reinterpret_cast<int *>(const_cast<Atomic32 *>(w)), + FUTEX_WAIT | futex_private_flag, + value, reinterpret_cast<struct kernel_timespec *>(&tm)); + } else { nanosleep(&tm, NULL); } + errno = save_errno; } - errno = save_errno; } -static void SpinLockWake(volatile Atomic32 *w) { +void SpinLockWake(volatile Atomic32 *w, bool all) { if (have_futex) { syscall(__NR_futex, reinterpret_cast<int *>(const_cast<Atomic32 *>(w)), FUTEX_WAKE | futex_private_flag, 1, 0); } } + +} // namespace internal +} // namespace base diff --git a/third_party/tcmalloc/chromium/src/base/spinlock_posix-inl.h b/third_party/tcmalloc/chromium/src/base/spinlock_posix-inl.h index 0d933c0..d188ebd 100644 --- a/third_party/tcmalloc/chromium/src/base/spinlock_posix-inl.h +++ b/third_party/tcmalloc/chromium/src/base/spinlock_posix-inl.h @@ -28,25 +28,35 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * --- - * This file is a Posix-specific part of spinlock.cc + * This file is a Posix-specific part of spinlock_internal.cc */ -#include <sched.h> -#include <time.h> +#include <config.h> +#include <errno.h> +#ifdef HAVE_SCHED_H +#include <sched.h> /* For sched_yield() */ +#endif +#include <time.h> /* For nanosleep() */ -static void SpinLockWait(volatile Atomic32 *w) { +namespace base { +namespace internal { + +void SpinLockDelay(volatile Atomic32 *w, int32 value, int loop) { int save_errno = errno; - struct timespec tm; - tm.tv_sec = 0; - tm.tv_nsec = 1000000; - if (base::subtle::NoBarrier_Load(w) != 0) { + if (loop == 0) { + } else if (loop == 1) { sched_yield(); - } - while (base::subtle::Acquire_CompareAndSwap(w, 0, 1) != 0) { + } else { + struct timespec tm; + tm.tv_sec = 0; + tm.tv_nsec = 1000000; nanosleep(&tm, NULL); } errno = save_errno; } -static void SpinLockWake(volatile Atomic32 *w) { +void SpinLockWake(volatile Atomic32 *w, bool all) { } + +} // namespace internal +} // namespace base diff --git a/third_party/tcmalloc/chromium/src/base/spinlock_win32-inl.h b/third_party/tcmalloc/chromium/src/base/spinlock_win32-inl.h index 9058939..ee23541 100644 --- a/third_party/tcmalloc/chromium/src/base/spinlock_win32-inl.h +++ b/third_party/tcmalloc/chromium/src/base/spinlock_win32-inl.h @@ -28,20 +28,26 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * --- - * This file is a Win32-specific part of spinlock.cc + * This file is a Win32-specific part of spinlock_internal.cc */ #include <windows.h> -static void SpinLockWait(volatile Atomic32 *w) { - if (base::subtle::NoBarrier_Load(w) != 0) { +namespace base { +namespace internal { + +void SpinLockDelay(volatile Atomic32 *w, int32 value, int loop) { + if (loop == 0) { + } else if (loop == 1) { Sleep(0); - } - while (base::subtle::Acquire_CompareAndSwap(w, 0, 1) != 0) { + } else { Sleep(1); } } -static void SpinLockWake(volatile Atomic32 *w) { +void SpinLockWake(volatile Atomic32 *w, bool all) { } + +} // namespace internal +} // namespace base diff --git a/third_party/tcmalloc/chromium/src/base/stl_allocator.h b/third_party/tcmalloc/chromium/src/base/stl_allocator.h index 68c4860..22bd4ae 100644 --- a/third_party/tcmalloc/chromium/src/base/stl_allocator.h +++ b/third_party/tcmalloc/chromium/src/base/stl_allocator.h @@ -37,15 +37,15 @@ #include <config.h> +#include <stddef.h> // for std::ptrdiff_t #include <limits> -#include "base/basictypes.h" #include "base/logging.h" // Generic allocator class for STL objects // that uses a given type-less allocator Alloc, which must provide: // static void* Alloc::Allocate(size_t size); -// static void Alloc::Free(void* ptr); +// static void Alloc::Free(void* ptr, size_t size); // // STL_Allocator<T, MyAlloc> provides the same thread-safety // guarantees as MyAlloc. @@ -82,7 +82,7 @@ class STL_Allocator { RAW_DCHECK((n * sizeof(T)) / sizeof(T) == n, "n is too big to allocate"); return static_cast<T*>(Alloc::Allocate(n * sizeof(T))); } - void deallocate(pointer p, size_type /*n*/) { Alloc::Free(p); } + void deallocate(pointer p, size_type n) { Alloc::Free(p, n * sizeof(T)); } size_type max_size() const { return size_t(-1) / sizeof(T); } diff --git a/third_party/tcmalloc/chromium/src/base/synchronization_profiling.h b/third_party/tcmalloc/chromium/src/base/synchronization_profiling.h new file mode 100644 index 0000000..cf02c21 --- /dev/null +++ b/third_party/tcmalloc/chromium/src/base/synchronization_profiling.h @@ -0,0 +1,50 @@ +/* Copyright (c) 2010, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * Author: Chris Ruemmler + */ + +#ifndef BASE_AUXILIARY_SYNCHRONIZATION_PROFILING_H_ +#define BASE_AUXILIARY_SYNCHRONIZATION_PROFILING_H_ + +#include "base/basictypes.h" + +namespace base { + +// We can do contention-profiling of SpinLocks, but the code is in +// mutex.cc, which is not always linked in with spinlock. Hence we +// provide a weak definition, which are used if mutex.cc isn't linked in. + +// Submit the number of cycles the spinlock spent contending. +ATTRIBUTE_WEAK extern void SubmitSpinLockProfileData(const void *, int64); +extern void SubmitSpinLockProfileData(const void *contendedlock, + int64 wait_cycles) {} +} +#endif // BASE_AUXILIARY_SYNCHRONIZATION_PROFILING_H_ diff --git a/third_party/tcmalloc/chromium/src/base/sysinfo.cc b/third_party/tcmalloc/chromium/src/base/sysinfo.cc index adb2072..285630e 100644 --- a/third_party/tcmalloc/chromium/src/base/sysinfo.cc +++ b/third_party/tcmalloc/chromium/src/base/sysinfo.cc @@ -56,6 +56,7 @@ #endif #include "base/sysinfo.h" #include "base/commandlineflags.h" +#include "base/dynamic_annotations.h" // for RunningOnValgrind #include "base/logging.h" #include "base/cycleclock.h" @@ -110,20 +111,23 @@ // 8K), so it's not an ideal solution. const char* GetenvBeforeMain(const char* name) { #if defined(HAVE___ENVIRON) // if we have it, it's declared in unistd.h - const int namelen = strlen(name); - for (char** p = __environ; *p; p++) { - if (!memcmp(*p, name, namelen) && (*p)[namelen] == '=') // it's a match - return *p + namelen+1; // point after = + if (__environ) { // can exist but be NULL, if statically linked + const int namelen = strlen(name); + for (char** p = __environ; *p; p++) { + if (!memcmp(*p, name, namelen) && (*p)[namelen] == '=') // it's a match + return *p + namelen+1; // point after = + } + return NULL; } - return NULL; -#elif defined(PLATFORM_WINDOWS) +#endif +#if defined(PLATFORM_WINDOWS) // TODO(mbelshe) - repeated calls to this function will overwrite the // contents of the static buffer. - static char envbuf[1024]; // enough to hold any envvar we care about - if (!GetEnvironmentVariableA(name, envbuf, sizeof(envbuf)-1)) + static char envvar_buf[1024]; // enough to hold any envvar we care about + if (!GetEnvironmentVariableA(name, envvar_buf, sizeof(envvar_buf)-1)) return NULL; - return envbuf; -#else + return envvar_buf; +#endif // static is ok because this function should only be called before // main(), when we're single-threaded. static char envbuf[16<<10]; @@ -151,7 +155,6 @@ const char* GetenvBeforeMain(const char* name) { p = endp + 1; } return NULL; // env var never found -#endif } // This takes as an argument an environment-variable name (like @@ -206,7 +209,7 @@ bool GetUniquePathFromEnv(const char* env_name, char* path) { static double cpuinfo_cycles_per_second = 1.0; // 0.0 might be dangerous static int cpuinfo_num_cpus = 1; // Conservative guess -static void SleepForMilliseconds(int milliseconds) { +void SleepForMilliseconds(int milliseconds) { #ifdef PLATFORM_WINDOWS _sleep(milliseconds); // Windows's _sleep takes milliseconds argument #else @@ -233,6 +236,29 @@ static int64 EstimateCyclesPerSecond(const int estimate_time_ms) { return guess; } +// ReadIntFromFile is only called on linux and cygwin platforms. +#if defined(__linux__) || defined(__CYGWIN__) || defined(__CYGWIN32__) +// Helper function for reading an int from a file. Returns true if successful +// and the memory location pointed to by value is set to the value read. +static bool ReadIntFromFile(const char *file, int *value) { + bool ret = false; + int fd = open(file, O_RDONLY); + if (fd != -1) { + char line[1024]; + char* err; + memset(line, '\0', sizeof(line)); + read(fd, line, sizeof(line) - 1); + const int temp_value = strtol(line, &err, 10); + if (line[0] != '\0' && (*err == '\n' || *err == '\0')) { + *value = temp_value; + ret = true; + } + close(fd); + } + return ret; +} +#endif + // WARNING: logging calls back to InitializeSystemInfo() so it must // not invoke any logging code. Also, InitializeSystemInfo() can be // called before main() -- in fact it *must* be since already_called @@ -245,31 +271,44 @@ static void InitializeSystemInfo() { if (already_called) return; already_called = true; - // I put in a never-called reference to EstimateCyclesPerSecond() here - // to silence the compiler for OS's that don't need it - if (0) EstimateCyclesPerSecond(0); + bool saw_mhz = false; + + if (RunningOnValgrind()) { + // Valgrind may slow the progress of time artificially (--scale-time=N + // option). We thus can't rely on CPU Mhz info stored in /sys or /proc + // files. Thus, actually measure the cps. + cpuinfo_cycles_per_second = EstimateCyclesPerSecond(100); + saw_mhz = true; + } #if defined(__linux__) || defined(__CYGWIN__) || defined(__CYGWIN32__) char line[1024]; char* err; + int freq; + + // If the kernel is exporting the tsc frequency use that. There are issues + // where cpuinfo_max_freq cannot be relied on because the BIOS may be + // exporintg an invalid p-state (on x86) or p-states may be used to put the + // processor in a new mode (turbo mode). Essentially, those frequencies + // cannot always be relied upon. The same reasons apply to /proc/cpuinfo as + // well. + if (!saw_mhz && + ReadIntFromFile("/sys/devices/system/cpu/cpu0/tsc_freq_khz", &freq)) { + // The value is in kHz (as the file name suggests). For example, on a + // 2GHz warpstation, the file contains the value "2000000". + cpuinfo_cycles_per_second = freq * 1000.0; + saw_mhz = true; + } // If CPU scaling is in effect, we want to use the *maximum* frequency, // not whatever CPU speed some random processor happens to be using now. - bool saw_mhz = false; - const char* pname0 = "/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq"; - int fd0 = open(pname0, O_RDONLY); - if (fd0 != -1) { - memset(line, '\0', sizeof(line)); - read(fd0, line, sizeof(line)); - const int max_freq = strtol(line, &err, 10); - if (line[0] != '\0' && (*err == '\n' || *err == '\0')) { - // The value is in kHz. For example, on a 2GHz machine, the file - // contains the value "2000000". Historically this file contained no - // newline, but at some point the kernel started appending a newline. - cpuinfo_cycles_per_second = max_freq * 1000.0; - saw_mhz = true; - } - close(fd0); + if (!saw_mhz && + ReadIntFromFile("/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq", + &freq)) { + // The value is in kHz. For example, on a 2GHz machine, the file + // contains the value "2000000". + cpuinfo_cycles_per_second = freq * 1000.0; + saw_mhz = true; } // Read /proc/cpuinfo for other values, and if there is no cpuinfo_max_freq. @@ -277,11 +316,14 @@ static void InitializeSystemInfo() { int fd = open(pname, O_RDONLY); if (fd == -1) { perror(pname); - cpuinfo_cycles_per_second = EstimateCyclesPerSecond(1000); + if (!saw_mhz) { + cpuinfo_cycles_per_second = EstimateCyclesPerSecond(1000); + } return; // TODO: use generic tester instead? } double bogo_clock = 1.0; + bool saw_bogo = false; int num_cpus = 0; line[0] = line[1] = '\0'; int chars_read = 0; @@ -305,29 +347,38 @@ static void InitializeSystemInfo() { if (newline != NULL) *newline = '\0'; - if (!saw_mhz && strncmp(line, "cpu MHz", sizeof("cpu MHz")-1) == 0) { + // When parsing the "cpu MHz" and "bogomips" (fallback) entries, we only + // accept postive values. Some environments (virtual machines) report zero, + // which would cause infinite looping in WallTime_Init. + if (!saw_mhz && strncasecmp(line, "cpu MHz", sizeof("cpu MHz")-1) == 0) { const char* freqstr = strchr(line, ':'); if (freqstr) { cpuinfo_cycles_per_second = strtod(freqstr+1, &err) * 1000000.0; - if (freqstr[1] != '\0' && *err == '\0') + if (freqstr[1] != '\0' && *err == '\0' && cpuinfo_cycles_per_second > 0) saw_mhz = true; } - } else if (strncmp(line, "bogomips", sizeof("bogomips")-1) == 0) { + } else if (strncasecmp(line, "bogomips", sizeof("bogomips")-1) == 0) { const char* freqstr = strchr(line, ':'); - if (freqstr) + if (freqstr) { bogo_clock = strtod(freqstr+1, &err) * 1000000.0; - if (freqstr == NULL || freqstr[1] == '\0' || *err != '\0') - bogo_clock = 1.0; - } else if (strncmp(line, "processor", sizeof("processor")-1) == 0) { + if (freqstr[1] != '\0' && *err == '\0' && bogo_clock > 0) + saw_bogo = true; + } + } else if (strncasecmp(line, "processor", sizeof("processor")-1) == 0) { num_cpus++; // count up every time we see an "processor :" entry } } while (chars_read > 0); close(fd); if (!saw_mhz) { - // If we didn't find anything better, we'll use bogomips, but - // we're not happy about it. - cpuinfo_cycles_per_second = bogo_clock; + if (saw_bogo) { + // If we didn't find anything better, we'll use bogomips, but + // we're not happy about it. + cpuinfo_cycles_per_second = bogo_clock; + } else { + // If we don't even have bogomips, we'll use the slow estimation. + cpuinfo_cycles_per_second = EstimateCyclesPerSecond(1000); + } } if (cpuinfo_cycles_per_second == 0.0) { cpuinfo_cycles_per_second = 1.0; // maybe unnecessary, but safe @@ -458,7 +509,7 @@ static void ConstructFilename(const char* spec, pid_t pid, char* buf, int buf_size) { CHECK_LT(snprintf(buf, buf_size, spec, - pid ? pid : getpid()), buf_size); + static_cast<int>(pid ? pid : getpid())), buf_size); } #endif @@ -771,7 +822,8 @@ bool ProcMapsIterator::NextExt(uint64 *start, uint64 *end, char **flags, Buffer::kBufSize); } else { CHECK_LT(snprintf(object_path.buf_, Buffer::kBufSize, - "/proc/%d/path/%s", pid_, mapinfo->pr_mapname), + "/proc/%d/path/%s", + static_cast<int>(pid_), mapinfo->pr_mapname), Buffer::kBufSize); } ssize_t len = readlink(object_path.buf_, current_filename_, PATH_MAX); @@ -882,9 +934,10 @@ namespace tcmalloc { // Helper to add the list of mapped shared libraries to a profile. // Fill formatted "/proc/self/maps" contents into buffer 'buf' of size 'size' -// and return the actual size occupied in 'buf'. +// and return the actual size occupied in 'buf'. We fill wrote_all to true +// if we successfully wrote all proc lines to buf, false else. // We do not provision for 0-terminating 'buf'. -int FillProcSelfMaps(char buf[], int size) { +int FillProcSelfMaps(char buf[], int size, bool* wrote_all) { ProcMapsIterator::Buffer iterbuf; ProcMapsIterator it(0, &iterbuf); // 0 means "current pid" @@ -892,10 +945,17 @@ int FillProcSelfMaps(char buf[], int size) { int64 inode; char *flags, *filename; int bytes_written = 0; + *wrote_all = true; while (it.Next(&start, &end, &flags, &offset, &inode, &filename)) { - bytes_written += it.FormatLine(buf + bytes_written, size - bytes_written, - start, end, flags, offset, inode, filename, - 0); + const int line_length = it.FormatLine(buf + bytes_written, + size - bytes_written, + start, end, flags, offset, + inode, filename, 0); + if (line_length == 0) + *wrote_all = false; // failed to write this line out + else + bytes_written += line_length; + } return bytes_written; } diff --git a/third_party/tcmalloc/chromium/src/base/sysinfo.h b/third_party/tcmalloc/chromium/src/base/sysinfo.h index 0bcc1f5..7935855 100644 --- a/third_party/tcmalloc/chromium/src/base/sysinfo.h +++ b/third_party/tcmalloc/chromium/src/base/sysinfo.h @@ -71,6 +71,8 @@ extern bool GetUniquePathFromEnv(const char* env_name, char* path); extern int NumCPUs(); +void SleepForMilliseconds(int milliseconds); + // processor cycles per second of each processor. Thread-safe. extern double CyclesPerSecond(void); @@ -226,7 +228,7 @@ class ProcMapsIterator { // Helper routines namespace tcmalloc { -int FillProcSelfMaps(char buf[], int size); +int FillProcSelfMaps(char buf[], int size, bool* wrote_all); void DumpProcSelfMaps(RawFD fd); } diff --git a/third_party/tcmalloc/chromium/src/base/thread_annotations.h b/third_party/tcmalloc/chromium/src/base/thread_annotations.h index ded13d6..f57b299 100644 --- a/third_party/tcmalloc/chromium/src/base/thread_annotations.h +++ b/third_party/tcmalloc/chromium/src/base/thread_annotations.h @@ -45,15 +45,23 @@ #ifndef BASE_THREAD_ANNOTATIONS_H_ #define BASE_THREAD_ANNOTATIONS_H_ -#if defined(__GNUC__) && defined(__SUPPORT_TS_ANNOTATION__) && (!defined(SWIG)) + +#if defined(__GNUC__) \ + && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 4)) \ + && defined(__SUPPORT_TS_ANNOTATION__) && (!defined(SWIG)) +#define THREAD_ANNOTATION_ATTRIBUTE__(x) __attribute__((x)) +#else +#define THREAD_ANNOTATION_ATTRIBUTE__(x) // no-op +#endif + // Document if a shared variable/field needs to be protected by a lock. // GUARDED_BY allows the user to specify a particular lock that should be // held when accessing the annotated variable, while GUARDED_VAR only // indicates a shared variable should be guarded (by any lock). GUARDED_VAR // is primarily used when the client cannot express the name of the lock. -#define GUARDED_BY(x) __attribute__ ((guarded_by(x))) -#define GUARDED_VAR __attribute__ ((guarded)) +#define GUARDED_BY(x) THREAD_ANNOTATION_ATTRIBUTE__(guarded_by(x)) +#define GUARDED_VAR THREAD_ANNOTATION_ATTRIBUTE__(guarded) // Document if the memory location pointed to by a pointer should be guarded // by a lock when dereferencing the pointer. Similar to GUARDED_VAR, @@ -63,90 +71,64 @@ // q, which is guarded by mu1, points to a shared memory location that is // guarded by mu2, q should be annotated as follows: // int *q GUARDED_BY(mu1) PT_GUARDED_BY(mu2); -#define PT_GUARDED_BY(x) __attribute__ ((point_to_guarded_by(x))) -#define PT_GUARDED_VAR __attribute__ ((point_to_guarded)) +#define PT_GUARDED_BY(x) \ + THREAD_ANNOTATION_ATTRIBUTE__(point_to_guarded_by(x)) +#define PT_GUARDED_VAR \ + THREAD_ANNOTATION_ATTRIBUTE__(point_to_guarded) // Document the acquisition order between locks that can be held // simultaneously by a thread. For any two locks that need to be annotated // to establish an acquisition order, only one of them needs the annotation. // (i.e. You don't have to annotate both locks with both ACQUIRED_AFTER // and ACQUIRED_BEFORE.) -#define ACQUIRED_AFTER(...) __attribute__ ((acquired_after(__VA_ARGS__))) -#define ACQUIRED_BEFORE(...) __attribute__ ((acquired_before(__VA_ARGS__))) +#define ACQUIRED_AFTER(x) \ + THREAD_ANNOTATION_ATTRIBUTE__(acquired_after(x)) +#define ACQUIRED_BEFORE(x) \ + THREAD_ANNOTATION_ATTRIBUTE__(acquired_before(x)) // The following three annotations document the lock requirements for // functions/methods. // Document if a function expects certain locks to be held before it is called -#define EXCLUSIVE_LOCKS_REQUIRED(...) \ - __attribute__ ((exclusive_locks_required(__VA_ARGS__))) +#define EXCLUSIVE_LOCKS_REQUIRED(x) \ + THREAD_ANNOTATION_ATTRIBUTE__(exclusive_locks_required(x)) -#define SHARED_LOCKS_REQUIRED(...) \ - __attribute__ ((shared_locks_required(__VA_ARGS__))) +#define SHARED_LOCKS_REQUIRED(x) \ + THREAD_ANNOTATION_ATTRIBUTE__(shared_locks_required(x)) // Document the locks acquired in the body of the function. These locks // cannot be held when calling this function (as google3's Mutex locks are // non-reentrant). -#define LOCKS_EXCLUDED(...) __attribute__ ((locks_excluded(__VA_ARGS__))) +#define LOCKS_EXCLUDED(x) \ + THREAD_ANNOTATION_ATTRIBUTE__(locks_excluded(x)) // Document the lock the annotated function returns without acquiring it. -#define LOCK_RETURNED(x) __attribute__ ((lock_returned(x))) +#define LOCK_RETURNED(x) THREAD_ANNOTATION_ATTRIBUTE__(lock_returned(x)) // Document if a class/type is a lockable type (such as the Mutex class). -#define LOCKABLE __attribute__ ((lockable)) +#define LOCKABLE THREAD_ANNOTATION_ATTRIBUTE__(lockable) // Document if a class is a scoped lockable type (such as the MutexLock class). -#define SCOPED_LOCKABLE __attribute__ ((scoped_lockable)) +#define SCOPED_LOCKABLE THREAD_ANNOTATION_ATTRIBUTE__(scoped_lockable) // The following annotations specify lock and unlock primitives. -#define EXCLUSIVE_LOCK_FUNCTION(...) \ - __attribute__ ((exclusive_lock(__VA_ARGS__))) +#define EXCLUSIVE_LOCK_FUNCTION(x) \ + THREAD_ANNOTATION_ATTRIBUTE__(exclusive_lock(x)) -#define SHARED_LOCK_FUNCTION(...) \ - __attribute__ ((shared_lock(__VA_ARGS__))) +#define SHARED_LOCK_FUNCTION(x) \ + THREAD_ANNOTATION_ATTRIBUTE__(shared_lock(x)) -#define EXCLUSIVE_TRYLOCK_FUNCTION(...) \ - __attribute__ ((exclusive_trylock(__VA_ARGS__))) +#define EXCLUSIVE_TRYLOCK_FUNCTION(x) \ + THREAD_ANNOTATION_ATTRIBUTE__(exclusive_trylock(x)) -#define SHARED_TRYLOCK_FUNCTION(...) \ - __attribute__ ((shared_trylock(__VA_ARGS__))) +#define SHARED_TRYLOCK_FUNCTION(x) \ + THREAD_ANNOTATION_ATTRIBUTE__(shared_trylock(x)) -#define UNLOCK_FUNCTION(...) __attribute__ ((unlock(__VA_ARGS__))) +#define UNLOCK_FUNCTION(x) \ + THREAD_ANNOTATION_ATTRIBUTE__(unlock(x)) // An escape hatch for thread safety analysis to ignore the annotated function. -#define NO_THREAD_SAFETY_ANALYSIS __attribute__ ((no_thread_safety_analysis)) - - -#else - -// When the compiler is not GCC, these annotations are simply no-ops. - -// NOTE: in theory, the macros that take "arg" below *could* take -// multiple arguments, but in practice so far they only take one. -// Since not all non-gcc compilers support ... -- notably MSVC 7.1 -- -// I just hard-code in a single arg. If this assumption ever breaks, -// we can change it back to "...", or handle it some other way. - -#define GUARDED_BY(x) // no-op -#define GUARDED_VAR // no-op -#define PT_GUARDED_BY(x) // no-op -#define PT_GUARDED_VAR // no-op -#define ACQUIRED_AFTER(arg) // no-op -#define ACQUIRED_BEFORE(arg) // no-op -#define EXCLUSIVE_LOCKS_REQUIRED(arg) // no-op -#define SHARED_LOCKS_REQUIRED(arg) // no-op -#define LOCKS_EXCLUDED(arg) // no-op -#define LOCK_RETURNED(x) // no-op -#define LOCKABLE // no-op -#define SCOPED_LOCKABLE // no-op -#define EXCLUSIVE_LOCK_FUNCTION(arg) // no-op -#define SHARED_LOCK_FUNCTION(arg) // no-op -#define EXCLUSIVE_TRYLOCK_FUNCTION(arg) // no-op -#define SHARED_TRYLOCK_FUNCTION(arg) // no-op -#define UNLOCK_FUNCTION(arg) // no-op -#define NO_THREAD_SAFETY_ANALYSIS // no-op - -#endif // defined(__GNUC__) && defined(__SUPPORT_TS_ANNOTATION__) - // && !defined(SWIG) +#define NO_THREAD_SAFETY_ANALYSIS \ + THREAD_ANNOTATION_ATTRIBUTE__(no_thread_safety_analysis) #endif // BASE_THREAD_ANNOTATIONS_H_ diff --git a/third_party/tcmalloc/chromium/src/base/vdso_support.cc b/third_party/tcmalloc/chromium/src/base/vdso_support.cc index d1c374c..444be26 100644 --- a/third_party/tcmalloc/chromium/src/base/vdso_support.cc +++ b/third_party/tcmalloc/chromium/src/base/vdso_support.cc @@ -40,6 +40,7 @@ #ifdef HAVE_VDSO_SUPPORT // defined in vdso_support.h #include <fcntl.h> +#include <stddef.h> // for std::ptrdiff_t #include "base/atomicops.h" // for MemoryBarrier #include "base/logging.h" @@ -207,6 +208,10 @@ void VDSOSupport::ElfMemImage::Init(const void *base) { if (!base) { return; } + const intptr_t base_as_uintptr_t = reinterpret_cast<uintptr_t>(base); + // Fake VDSO has low bit set. + const bool fake_vdso = ((base_as_uintptr_t & 1) != 0); + base = reinterpret_cast<const void *>(base_as_uintptr_t & ~1); const char *const base_as_char = reinterpret_cast<const char *>(base); if (base_as_char[EI_MAG0] != ELFMAG0 || base_as_char[EI_MAG1] != ELFMAG1 || base_as_char[EI_MAG2] != ELFMAG2 || base_as_char[EI_MAG3] != ELFMAG3) { @@ -266,17 +271,6 @@ void VDSOSupport::ElfMemImage::Init(const void *base) { ElfW(Dyn) *dynamic_entry = reinterpret_cast<ElfW(Dyn) *>(dynamic_program_header->p_vaddr + relocation); - bool fake_vdso = false; // Assume we are dealing with the real VDSO. - for (ElfW(Dyn) *de = dynamic_entry; de->d_tag != DT_NULL; ++de) { - ElfW(Sxword) tag = de->d_tag; - if (tag == DT_PLTGOT || tag == DT_RELA || tag == DT_JMPREL || - tag == DT_NEEDED || tag == DT_RPATH || tag == DT_VERNEED || - tag == DT_INIT || tag == DT_FINI) { - /* Real vdso can not reasonably have any of the above entries. */ - fake_vdso = true; - break; - } - } for (; dynamic_entry->d_tag != DT_NULL; ++dynamic_entry) { ElfW(Xword) value = dynamic_entry->d_un.d_val; if (fake_vdso) { @@ -395,7 +389,7 @@ const void *VDSOSupport::Init() { } // Subtle: this code runs outside of any locks; prevent compiler // from assigning to getcpu_fn_ more than once. - MemoryBarrier(); + base::subtle::MemoryBarrier(); getcpu_fn_ = fn; return vdso_base_; } diff --git a/third_party/tcmalloc/chromium/src/base/vdso_support.h b/third_party/tcmalloc/chromium/src/base/vdso_support.h index c47b3c5..131646a 100644 --- a/third_party/tcmalloc/chromium/src/base/vdso_support.h +++ b/third_party/tcmalloc/chromium/src/base/vdso_support.h @@ -30,6 +30,7 @@ #ifdef HAVE_FEATURES_H #include <features.h> // for __GLIBC__ #endif +#include "base/basictypes.h" // Maybe one day we can rewrite this file not to require the elf // symbol extensions in glibc, but for right now we need them. @@ -39,7 +40,6 @@ #include <stdlib.h> // for NULL #include <link.h> // for ElfW -#include "base/basictypes.h" namespace base { @@ -64,7 +64,7 @@ class VDSOSupport { // Supports iteration over all dynamic symbols. class SymbolIterator { public: - friend struct VDSOSupport; + friend class VDSOSupport; const SymbolInfo *operator->() const; const SymbolInfo &operator*() const; SymbolIterator& operator++(); @@ -147,6 +147,10 @@ class VDSOSupport { // kInvalidBase => value hasn't been determined yet. // 0 => there is no VDSO. // else => vma of VDSO Elf{32,64}_Ehdr. + // + // When testing with mock VDSO, low bit is set. + // The low bit is always available because vdso_base_ is + // page-aligned. static const void *vdso_base_; // NOLINT on 'long' because these routines mimic kernel api. diff --git a/third_party/tcmalloc/chromium/src/central_freelist.cc b/third_party/tcmalloc/chromium/src/central_freelist.cc index 674ff9b..6b3be06 100644 --- a/third_party/tcmalloc/chromium/src/central_freelist.cc +++ b/third_party/tcmalloc/chromium/src/central_freelist.cc @@ -33,8 +33,10 @@ #include "config.h" #include "central_freelist.h" -#include "linked_list.h" -#include "static_vars.h" +#include "internal_logging.h" // for ASSERT, MESSAGE +#include "linked_list.h" // for SLL_Next, SLL_Push, etc +#include "page_heap.h" // for PageHeap +#include "static_vars.h" // for Static namespace tcmalloc { @@ -44,7 +46,12 @@ void CentralFreeList::Init(size_t cl) { tcmalloc::DLL_Init(&nonempty_); counter_ = 0; - cache_size_ = 1; +#ifdef TCMALLOC_SMALL_BUT_SLOW + // Disable the transfer cache for the small footprint case. + cache_size_ = 0; +#else + cache_size_ = 16; +#endif used_slots_ = 0; ASSERT(cache_size_ <= kNumTransferEntries); } @@ -57,9 +64,22 @@ void CentralFreeList::ReleaseListToSpans(void* start) { } } -void CentralFreeList::ReleaseToSpans(void* object) { +// MapObjectToSpan should logically be part of ReleaseToSpans. But +// this triggers an optimization bug in gcc 4.5.0. Moving to a +// separate function, and making sure that function isn't inlined, +// seems to fix the problem. It also should be fixed for gcc 4.5.1. +static +#if __GNUC__ == 4 && __GNUC_MINOR__ == 5 && __GNUC_PATCHLEVEL__ == 0 +__attribute__ ((noinline)) +#endif +Span* MapObjectToSpan(void* object) { const PageID p = reinterpret_cast<uintptr_t>(object) >> kPageShift; Span* span = Static::pageheap()->GetDescriptor(p); + return span; +} + +void CentralFreeList::ReleaseToSpans(void* object) { + Span* span = MapObjectToSpan(object); ASSERT(span != NULL); ASSERT(span->refcount > 0); @@ -129,8 +149,14 @@ bool CentralFreeList::MakeCacheSpace() { if (EvictRandomSizeClass(size_class_, false) || EvictRandomSizeClass(size_class_, true)) { // Succeeded in evicting, we're going to make our cache larger. - cache_size_++; - return true; + // However, we may have dropped and re-acquired the lock in + // EvictRandomSizeClass (via ShrinkCache and the LockInverter), so the + // cache_size may have changed. Therefore, check and verify that it is + // still OK to increase the cache_size. + if (cache_size_ < kNumTransferEntries) { + cache_size_++; + return true; + } } return false; } diff --git a/third_party/tcmalloc/chromium/src/central_freelist.h b/third_party/tcmalloc/chromium/src/central_freelist.h index 2e6a31b..e09210a 100644 --- a/third_party/tcmalloc/chromium/src/central_freelist.h +++ b/third_party/tcmalloc/chromium/src/central_freelist.h @@ -34,8 +34,12 @@ #define TCMALLOC_CENTRAL_FREELIST_H_ #include "config.h" -#include "base/thread_annotations.h" +#include <stddef.h> // for size_t +#ifdef HAVE_STDINT_H +#include <stdint.h> // for int32_t +#endif #include "base/spinlock.h" +#include "base/thread_annotations.h" #include "common.h" #include "span.h" @@ -78,7 +82,12 @@ class CentralFreeList { // number of TCEntries across size classes is fixed. Currently each size // class is initially given one TCEntry which also means that the maximum any // one class can have is kNumClasses. +#ifdef TCMALLOC_SMALL_BUT_SLOW + // For the small memory model, the transfer cache is not used. + static const int kNumTransferEntries = 0; +#else static const int kNumTransferEntries = kNumClasses; +#endif // REQUIRES: lock_ is held // Remove object from cache and return. diff --git a/third_party/tcmalloc/chromium/src/common.cc b/third_party/tcmalloc/chromium/src/common.cc index 2142f49..b92e988 100644 --- a/third_party/tcmalloc/chromium/src/common.cc +++ b/third_party/tcmalloc/chromium/src/common.cc @@ -31,9 +31,8 @@ // Author: Sanjay Ghemawat <opensource@google.com> #include "config.h" -#include "system-alloc.h" -#include "config.h" #include "common.h" +#include "system-alloc.h" namespace tcmalloc { @@ -53,6 +52,24 @@ static inline int LgFloor(size_t n) { return log; } +int AlignmentForSize(size_t size) { + int alignment = kAlignment; + if (size >= 2048) { + // Cap alignment at 256 for large sizes. + alignment = 256; + } else if (size >= 128) { + // Space wasted due to alignment is at most 1/8, i.e., 12.5%. + alignment = (1 << LgFloor(size)) / 8; + } else if (size >= 16) { + // We need an alignment of at least 16 bytes to satisfy + // requirements for some SSE types. + alignment = 16; + } + CHECK_CONDITION(size < 16 || alignment >= 16); + CHECK_CONDITION((alignment & (alignment - 1)) == 0); + return alignment; +} + int SizeMap::NumMoveSize(size_t size) { if (size == 0) return 0; // Use approx 64k transfers between thread and central caches. @@ -93,19 +110,7 @@ void SizeMap::Init() { int lg = LgFloor(size); if (lg > last_lg) { // Increase alignment every so often to reduce number of size classes. - if (size >= 2048) { - // Cap alignment at 256 for large sizes - alignment = 256; - } else if (size >= 128) { - // Space wasted due to alignment is at most 1/8, i.e., 12.5%. - alignment = size / 8; - } else if (size >= 16) { - // We need an alignment of at least 16 bytes to satisfy - // requirements for some SSE types. - alignment = 16; - } - CHECK_CONDITION(size < 16 || alignment >= 16); - CHECK_CONDITION((alignment & (alignment - 1)) == 0); + alignment = AlignmentForSize(size); last_lg = lg; } CHECK_CONDITION((size % alignment) == 0); diff --git a/third_party/tcmalloc/chromium/src/common.h b/third_party/tcmalloc/chromium/src/common.h index 53a0a0b..a3df8de 100644 --- a/third_party/tcmalloc/chromium/src/common.h +++ b/third_party/tcmalloc/chromium/src/common.h @@ -36,13 +36,11 @@ #define TCMALLOC_COMMON_H_ #include "config.h" -#include <stddef.h> +#include <stddef.h> // for size_t #ifdef HAVE_STDINT_H -#include <stdint.h> +#include <stdint.h> // for uintptr_t, uint64_t #endif -#include <stdarg.h> -#include "base/commandlineflags.h" -#include "internal_logging.h" +#include "internal_logging.h" // for ASSERT, etc // Type that can hold a page number typedef uintptr_t PageID; @@ -54,14 +52,51 @@ typedef uintptr_t Length; // Configuration //------------------------------------------------------------------- -// Not all possible combinations of the following parameters make -// sense. In particular, if kMaxSize increases, you may have to -// increase kNumClasses as well. +// Using large pages speeds up the execution at a cost of larger memory use. +// Deallocation may speed up by a factor as the page map gets 8x smaller, so +// lookups in the page map result in fewer L2 cache misses, which translates to +// speedup for application/platform combinations with high L2 cache pressure. +// As the number of size classes increases with large pages, we increase +// the thread cache allowance to avoid passing more free ranges to and from +// central lists. Also, larger pages are less likely to get freed. +// These two factors cause a bounded increase in memory use. + +#if defined(TCMALLOC_LARGE_PAGES) +static const size_t kPageShift = 15; +static const size_t kNumClasses = 95; +static const size_t kMaxThreadCacheSize = 4 << 20; +#else static const size_t kPageShift = 12; +static const size_t kNumClasses = 61; +static const size_t kMaxThreadCacheSize = 2 << 20; +#endif + static const size_t kPageSize = 1 << kPageShift; static const size_t kMaxSize = 8u * kPageSize; static const size_t kAlignment = 8; -static const size_t kNumClasses = 61; +// For all span-lengths < kMaxPages we keep an exact-size list. +static const size_t kMaxPages = 1 << (20 - kPageShift); + +// Default bound on the total amount of thread caches. +#ifdef TCMALLOC_SMALL_BUT_SLOW +// Make the overall thread cache no bigger than that of a single thread +// for the small memory footprint case. +static const size_t kDefaultOverallThreadCacheSize = kMaxThreadCacheSize; +#else +static const size_t kDefaultOverallThreadCacheSize = 8u * kMaxThreadCacheSize; +#endif + +// Lower bound on the per-thread cache sizes +static const size_t kMinThreadCacheSize = kMaxSize * 2; + +// The number of bytes one ThreadCache will steal from another when +// the first ThreadCache is forced to Scavenge(), delaying the +// next call to Scavenge for this thread. +static const size_t kStealAmount = 1 << 16; + +// The number of times that a deallocation can cause a freelist to +// go over its max_length() before shrinking max_length(). +static const int kMaxOverages = 3; // Maximum length we allow a per-thread free-list to have before we // move objects from it into the corresponding central free-list. We @@ -72,6 +107,17 @@ static const int kMaxDynamicFreeListLength = 8192; static const Length kMaxValidPages = (~static_cast<Length>(0)) >> kPageShift; +#if defined __x86_64__ +// All current and planned x86_64 processors only look at the lower 48 bits +// in virtual to physical address translation. The top 16 are thus unused. +// TODO(rus): Under what operating systems can we increase it safely to 17? +// This lets us use smaller page maps. On first allocation, a 36-bit page map +// uses only 96 KB instead of the 4.5 MB used by a 52-bit page map. +static const int kAddressBits = (sizeof(void*) < 8 ? (8 * sizeof(void*)) : 48); +#else +static const int kAddressBits = 8 * sizeof(void*); +#endif + namespace tcmalloc { // Convert byte size into pages. This won't overflow, but may return @@ -81,6 +127,10 @@ inline Length pages(size_t bytes) { ((bytes & (kPageSize - 1)) > 0 ? 1 : 0); } +// For larger allocation sizes, we use larger memory alignments to +// reduce the number of size classes. +int AlignmentForSize(size_t size); + // Size-class information + mapping class SizeMap { private: @@ -114,8 +164,10 @@ class SizeMap { // ... // 32768 (32768 + 127 + (120<<7)) / 128 376 static const int kMaxSmallSize = 1024; - unsigned char class_array_[377]; - + static const size_t kClassArraySize = + (((1 << kPageShift) * 8u + 127 + (120 << 7)) >> 7) + 1; + unsigned char class_array_[kClassArraySize]; + // Compute index of the class_array[] entry for a given size static inline int ClassIndex(int s) { ASSERT(0 <= s); diff --git a/third_party/tcmalloc/chromium/src/config.h.in b/third_party/tcmalloc/chromium/src/config.h.in index 49bbf0d..5ba784e 100644 --- a/third_party/tcmalloc/chromium/src/config.h.in +++ b/third_party/tcmalloc/chromium/src/config.h.in @@ -1,5 +1,10 @@ /* src/config.h.in. Generated from configure.ac by autoheader. */ + +#ifndef GOOGLE_PERFTOOLS_CONFIG_H_ +#define GOOGLE_PERFTOOLS_CONFIG_H_ + + /* Define to 1 if compiler supports __builtin_stack_pointer */ #undef HAVE_BUILTIN_STACK_POINTER @@ -72,6 +77,9 @@ /* Define to 1 if you have the <malloc.h> header file. */ #undef HAVE_MALLOC_H +/* Define to 1 if you have the <malloc/malloc.h> header file. */ +#undef HAVE_MALLOC_MALLOC_H + /* Define to 1 if you have the <memory.h> header file. */ #undef HAVE_MEMORY_H @@ -114,6 +122,15 @@ /* Define to 1 if the system has the type `struct mallinfo'. */ #undef HAVE_STRUCT_MALLINFO +/* Define to 1 if you have the <sys/cdefs.h> header file. */ +#undef HAVE_SYS_CDEFS_H + +/* Define to 1 if you have the <sys/malloc.h> header file. */ +#undef HAVE_SYS_MALLOC_H + +/* Define to 1 if you have the <sys/param.h> header file. */ +#undef HAVE_SYS_PARAM_H + /* Define to 1 if you have the <sys/prctl.h> header file. */ #undef HAVE_SYS_PRCTL_H @@ -129,6 +146,9 @@ /* Define to 1 if you have the <sys/syscall.h> header file. */ #undef HAVE_SYS_SYSCALL_H +/* Define to 1 if you have the <sys/time.h> header file. */ +#undef HAVE_SYS_TIME_H + /* Define to 1 if you have the <sys/types.h> header file. */ #undef HAVE_SYS_TYPES_H @@ -168,6 +188,10 @@ /* Define to 1 if int32_t is equivalent to intptr_t */ #undef INT32_EQUALS_INTPTR +/* Define to the sub-directory in which libtool stores uninstalled libraries. + */ +#undef LT_OBJDIR + /* Define to 1 if your C compiler doesn't accept -c and -o together. */ #undef NO_MINUS_C_MINUS_O @@ -240,3 +264,4 @@ #include "windows/mingw.h" #endif +#endif /* #ifndef GOOGLE_PERFTOOLS_CONFIG_H_ */ diff --git a/third_party/tcmalloc/chromium/src/config_freebsd.h b/third_party/tcmalloc/chromium/src/config_freebsd.h index 6aed305..fbb917f 100644 --- a/third_party/tcmalloc/chromium/src/config_freebsd.h +++ b/third_party/tcmalloc/chromium/src/config_freebsd.h @@ -73,6 +73,9 @@ /* Define to 1 if you have the <malloc.h> header file. */ /* #undef HAVE_MALLOC_H */ +/* Define to 1 if you have the <malloc/malloc.h> header file. */ +#undef HAVE_MALLOC_MALLOC_H + /* Define to 1 if you have the <memory.h> header file. */ #define HAVE_MEMORY_H 1 @@ -115,6 +118,15 @@ /* Define to 1 if the system has the type `struct mallinfo'. */ /* #undef HAVE_STRUCT_MALLINFO */ +/* Define to 1 if you have the <sys/cdefs.h> header file. */ +#define HAVE_SYS_CDEFS_H 1 + +/* Define to 1 if you have the <sys/malloc.h> header file. */ +#undef HAVE_SYS_MALLOC_H + +/* Define to 1 if you have the <sys/param.h> header file. */ +#define HAVE_SYS_PARAM_H 1 + /* Define to 1 if you have the <sys/prctl.h> header file. */ /* #undef HAVE_SYS_PRCTL_H */ @@ -130,6 +142,9 @@ /* Define to 1 if you have the <sys/syscall.h> header file. */ #define HAVE_SYS_SYSCALL_H 1 +/* Define to 1 if you have the <sys/time.h> header file. */ +#define HAVE_SYS_TIME_H 1 + /* Define to 1 if you have the <sys/types.h> header file. */ #define HAVE_SYS_TYPES_H 1 @@ -171,6 +186,10 @@ #define INT32_EQUALS_INTPTR 1 #endif +/* Define to the sub-directory in which libtool stores uninstalled libraries. + */ +#undef LT_OBJDIR + /* Define to 1 if your C compiler doesn't accept -c and -o together. */ /* #undef NO_MINUS_C_MINUS_O */ @@ -184,7 +203,7 @@ #define PACKAGE_NAME "google-perftools" /* Define to the full name and version of this package. */ -#define PACKAGE_STRING "google-perftools 1.5" +#define PACKAGE_STRING "google-perftools 1.7" /* Define to the one symbol short name of this package. */ #define PACKAGE_TARNAME "google-perftools" @@ -193,7 +212,7 @@ #define PACKAGE_URL "" /* Define to the version of this package. */ -#define PACKAGE_VERSION "1.5" +#define PACKAGE_VERSION "1.7" /* How to access the PC from a struct ucontext */ #if defined(__i386__) @@ -240,7 +259,7 @@ #define STL_NAMESPACE std /* Version number of package */ -#define VERSION "1.5" +#define VERSION "1.7" /* C99 says: define this to get the PRI... macros from stdint.h */ #ifndef __STDC_FORMAT_MACROS diff --git a/third_party/tcmalloc/chromium/src/config_linux.h b/third_party/tcmalloc/chromium/src/config_linux.h index 9786b3e..25c8da8 100644 --- a/third_party/tcmalloc/chromium/src/config_linux.h +++ b/third_party/tcmalloc/chromium/src/config_linux.h @@ -7,6 +7,9 @@ /* Define to 1 if you have the <conflict-signal.h> header file. */ /* #undef HAVE_CONFLICT_SIGNAL_H */ +/* Define to 1 if you have the <cygwin/signal.h> header file. */ +#undef HAVE_CYGWIN_SIGNAL_H + /* Define to 1 if you have the declaration of `cfree', and to 0 if you don't. */ #define HAVE_DECL_CFREE 1 @@ -70,6 +73,9 @@ /* Define to 1 if you have the <malloc.h> header file. */ #define HAVE_MALLOC_H 1 +/* Define to 1 if you have the <malloc/malloc.h> header file. */ +#undef HAVE_MALLOC_MALLOC_H + /* Define to 1 if you have the <memory.h> header file. */ #define HAVE_MEMORY_H 1 @@ -112,6 +118,15 @@ /* Define to 1 if the system has the type `struct mallinfo'. */ #define HAVE_STRUCT_MALLINFO 1 +/* Define to 1 if you have the <sys/cdefs.h> header file. */ +#define HAVE_SYS_CDEFS_H 1 + +/* Define to 1 if you have the <sys/malloc.h> header file. */ +#undef HAVE_SYS_MALLOC_H + +/* Define to 1 if you have the <sys/param.h> header file. */ +#define HAVE_SYS_PARAM_H 1 + /* Define to 1 if you have the <sys/prctl.h> header file. */ #define HAVE_SYS_PRCTL_H 1 @@ -127,9 +142,15 @@ /* Define to 1 if you have the <sys/syscall.h> header file. */ #define HAVE_SYS_SYSCALL_H 1 +/* Define to 1 if you have the <sys/time.h> header file. */ +#define HAVE_SYS_TIME_H 1 + /* Define to 1 if you have the <sys/types.h> header file. */ #define HAVE_SYS_TYPES_H 1 +/* <sys/ucontext.h> is broken on redhat 7 */ +#undef HAVE_SYS_UCONTEXT_H + /* Define to 1 if you have the <sys/wait.h> header file. */ #define HAVE_SYS_WAIT_H 1 @@ -151,6 +172,9 @@ /* define if your compiler has __attribute__ */ #define HAVE___ATTRIBUTE__ 1 +/* Define to 1 if compiler supports __environ */ +#undef HAVE___ENVIRON + /* Define to 1 if the system has the type `__int64'. */ /* #undef HAVE___INT64 */ @@ -160,6 +184,10 @@ /* Define to 1 if int32_t is equivalent to intptr_t */ /* #undef INT32_EQUALS_INTPTR */ +/* Define to the sub-directory in which libtool stores uninstalled libraries. + */ +#undef LT_OBJDIR + /* Define to 1 if your C compiler doesn't accept -c and -o together. */ /* #undef NO_MINUS_C_MINUS_O */ @@ -173,13 +201,16 @@ #define PACKAGE_NAME "google-perftools" /* Define to the full name and version of this package. */ -#define PACKAGE_STRING "google-perftools 1.4" +#define PACKAGE_STRING "google-perftools 1.7" /* Define to the one symbol short name of this package. */ #define PACKAGE_TARNAME "google-perftools" +/* Define to the home page for this package. */ +#undef PACKAGE_URL + /* Define to the version of this package. */ -#define PACKAGE_VERSION "1.4" +#define PACKAGE_VERSION "1.7" /* How to access the PC from a struct ucontext */ #define PC_FROM_UCONTEXT uc_mcontext.gregs[REG_RIP] @@ -211,7 +242,7 @@ #define STL_NAMESPACE std /* Version number of package */ -#define VERSION "1.4" +#define VERSION "1.7" /* C99 says: define this to get the PRI... macros from stdint.h */ #ifndef __STDC_FORMAT_MACROS @@ -228,4 +259,3 @@ #ifdef __MINGW32__ #include "windows/mingw.h" #endif - diff --git a/third_party/tcmalloc/chromium/src/config_win.h b/third_party/tcmalloc/chromium/src/config_win.h index 236bd6b..db4c518 100644 --- a/third_party/tcmalloc/chromium/src/config_win.h +++ b/third_party/tcmalloc/chromium/src/config_win.h @@ -29,7 +29,7 @@ #define HASH_SET_H <hash_set> /* Define to 1 if your libc has a snprintf implementation */ -#define HAVE_SNPRINTF +#undef HAVE_SNPRINTF /* Define to 1 if compiler supports __builtin_stack_pointer */ #undef HAVE_BUILTIN_STACK_POINTER @@ -37,6 +37,9 @@ /* Define to 1 if you have the <conflict-signal.h> header file. */ #undef HAVE_CONFLICT_SIGNAL_H +/* Define to 1 if you have the <cygwin/signal.h> header file. */ +#undef HAVE_CYGWIN_SIGNAL_H + /* Define to 1 if you have the declaration of `cfree', and to 0 if you don't. */ #undef HAVE_DECL_CFREE @@ -73,6 +76,9 @@ /* Define to 1 if you have the <fcntl.h> header file. */ #undef HAVE_FCNTL_H +/* Define to 1 if you have the <features.h> header file. */ +#undef HAVE_FEATURES_H + /* Define to 1 if you have the `geteuid' function. */ #undef HAVE_GETEUID @@ -103,6 +109,9 @@ /* Define to 1 if you have the <malloc.h> header file. */ #undef HAVE_MALLOC_H +/* Define to 1 if you have the <malloc/malloc.h> header file. */ +#undef HAVE_MALLOC_MALLOC_H + /* Define to 1 if you have the <memory.h> header file. */ #undef HAVE_MEMORY_H @@ -112,6 +121,9 @@ /* define if the compiler implements namespaces */ #define HAVE_NAMESPACES 1 +/* Define to 1 if you have the <poll.h> header file. */ +#undef HAVE_POLL_H + /* define if libc has program_invocation_name */ #undef HAVE_PROGRAM_INVOCATION_NAME @@ -123,6 +135,8 @@ /* Define to 1 if you have the `sbrk' function. */ #undef HAVE_SBRK +/* Define to 1 if you have the <sched.h> header file. */ +#undef HAVE_SCHED_H /* Define to 1 if you have the <stdint.h> header file. */ #undef HAVE_STDINT_H @@ -139,6 +153,15 @@ /* Define to 1 if the system has the type `struct mallinfo'. */ #undef HAVE_STRUCT_MALLINFO +/* Define to 1 if you have the <sys/cdefs.h> header file. */ +#undef HAVE_SYS_CDEFS_H + +/* Define to 1 if you have the <sys/malloc.h> header file. */ +#undef HAVE_SYS_MALLOC_H + +/* Define to 1 if you have the <sys/param.h> header file. */ +#undef HAVE_SYS_PARAM_H + /* Define to 1 if you have the <sys/prctl.h> header file. */ #undef HAVE_SYS_PRCTL_H @@ -154,9 +177,15 @@ /* Define to 1 if you have the <sys/syscall.h> header file. */ #undef HAVE_SYS_SYSCALL_H +/* Define to 1 if you have the <sys/time.h> header file. */ +#undef HAVE_SYS_TIME_H + /* Define to 1 if you have the <sys/types.h> header file. */ #define HAVE_SYS_TYPES_H 1 +/* <sys/ucontext.h> is broken on redhat 7 */ +#undef HAVE_SYS_UCONTEXT_H + /* Define to 1 if you have the <sys/wait.h> header file. */ #undef HAVE_SYS_WAIT_H @@ -175,6 +204,9 @@ /* define if your compiler has __attribute__ */ #undef HAVE___ATTRIBUTE__ +/* Define to 1 if compiler supports __environ */ +#undef HAVE___ENVIRON + /* Define to 1 if the system has the type `__int64'. */ #define HAVE___INT64 1 @@ -184,6 +216,10 @@ /* Define to 1 if int32_t is equivalent to intptr_t */ #undef INT32_EQUALS_INTPTR +/* Define to the sub-directory in which libtool stores uninstalled libraries. + */ +#undef LT_OBJDIR + /* Define to 1 if your C compiler doesn't accept -c and -o together. */ #undef NO_MINUS_C_MINUS_O @@ -202,6 +238,9 @@ /* Define to the one symbol short name of this package. */ #undef PACKAGE_TARNAME +/* Define to the home page for this package. */ +#undef PACKAGE_URL + /* Define to the version of this package. */ #undef PACKAGE_VERSION diff --git a/third_party/tcmalloc/chromium/src/debugallocation.cc b/third_party/tcmalloc/chromium/src/debugallocation.cc index 949fbe9..9de927a 100644 --- a/third_party/tcmalloc/chromium/src/debugallocation.cc +++ b/third_party/tcmalloc/chromium/src/debugallocation.cc @@ -31,8 +31,16 @@ // Author: Urs Holzle <opensource@google.com> #include "config.h" -#ifdef HAVE_MALLOC_H -#include <malloc.h> +// We only need malloc.h for struct mallinfo. +#ifdef HAVE_STRUCT_MALLINFO +// Malloc can be in several places on older versions of OS X. +# if defined(HAVE_MALLOC_H) +# include <malloc.h> +# elif defined(HAVE_MALLOC_MALLOC_H) +# include <malloc/malloc.h> +# elif defined(HAVE_SYS_MALLOC_H) +# include <sys/malloc.h> +# endif #endif #include <pthread.h> #include <stdio.h> @@ -54,22 +62,19 @@ #include <errno.h> #include <string.h> +#include <google/malloc_extension.h> +#include <google/malloc_hook.h> +#include <google/stacktrace.h> #include "base/commandlineflags.h" #include "base/googleinit.h" #include "base/logging.h" -#include "google/malloc_extension.h" -#include "google/malloc_hook.h" -#include "google/stacktrace.h" +#include "base/spinlock.h" #include "addressmap-inl.h" #include "malloc_hook-inl.h" #include "symbolize.h" -#ifdef TCMALLOC_FOR_DEBUGALLOCATION +#define TCMALLOC_USING_DEBUGALLOCATION #include "tcmalloc.cc" -#else -#include "base/spinlock.h" -// Else we already have a SpinLock defined in tcmalloc/internal_spinlock.h -#endif // __THROW is defined in glibc systems. It means, counter-intuitively, // "This function will never throw an exception." It's an optional @@ -126,48 +131,16 @@ DEFINE_bool(symbolize_stacktrace, static void TracePrintf(int fd, const char *fmt, ...) __attribute__ ((__format__ (__printf__, 2, 3))); -// -// GNU has some weird "weak aliasing" thing that permits us to define our -// own malloc(), free(), and realloc() which can use the normal versions of -// of themselves by calling __libc_malloc(), __libc_free(), and -// __libc_realloc(). -// -extern "C" { - extern void* __libc_malloc(size_t size); - extern void __libc_free(void* ptr); - extern void* __libc_realloc(void* ptr, size_t size); - extern void* __libc_calloc(size_t nmemb, size_t size); - extern int __libc_mallopt(int cmd, int value); -#ifdef HAVE_STRUCT_MALLINFO - extern struct mallinfo __libc_mallinfo(void); -#endif -} - -// Define the malloc/free/mallopt/mallinfo implementations -// we will be working on top of. -// TODO(csilvers): provide debugallocation on top of libc alloc, -// so this #ifdef might sometimes be false. -#ifdef TCMALLOC_FOR_DEBUGALLOCATION - -// The do_* functions are defined in tcmalloc.cc, +// The do_* functions are defined in tcmalloc/tcmalloc.cc, // which is included before this file -// when TCMALLOC_FOR_DEBUGALLOCATION is defined. -#define BASE_MALLOC_NEW(size) cpp_alloc(size, false) -#define BASE_MALLOC do_malloc_or_cpp_alloc -#define BASE_FREE do_free -#define BASE_MALLOPT do_mallopt -#define BASE_MALLINFO do_mallinfo - -#else - -// We are working on top of standard libc's malloc library -#define BASE_MALLOC_NEW __libc_malloc -#define BASE_MALLOC __libc_malloc -#define BASE_FREE __libc_free -#define BASE_MALLOPT __libc_mallopt -#define BASE_MALLINFO __libc_mallinfo - -#endif +// when TCMALLOC_FOR_DEBUGALLOCATION is defined +#define BASE_MALLOC_NEW(size) cpp_alloc(size, false) +#define BASE_MALLOC do_malloc +#define BASE_FREE do_free +#define BASE_MALLOC_STATS do_malloc_stats +#define BASE_MALLOPT do_mallopt +#define BASE_MALLINFO do_mallinfo +#define BASE_MALLOC_SIZE(ptr) GetSizeWithCallback(ptr, &InvalidGetAllocatedSize) // ========================================================================= // @@ -190,7 +163,7 @@ class FreeQueue { return (q_front_ + 1) % kFreeQueueSize == q_back_; } - void Push(QueueEntry block) { + void Push(const QueueEntry& block) { q_[q_front_] = block; q_front_ = (q_front_ + 1) % kFreeQueueSize; } @@ -273,12 +246,13 @@ class MallocBlock { // NOTE: tcmalloc.cc depends on the value of kMagicDeletedByte // to work around a bug in the pthread library. static const int kMagicDeletedByte = 0xCD; - // An int (type of alloc_type_ below) in a deallocated storage + // A size_t (type of alloc_type_ below) in a deallocated storage // filled with kMagicDeletedByte. - static const int kMagicDeletedInt = 0xCDCDCDCD | ((0xCDCDCDCD << 16) << 16); - // Initializer works for 32 and 64 bit ints; + static const size_t kMagicDeletedSizeT = + 0xCDCDCDCD | (((size_t)0xCDCDCDCD << 16) << 16); + // Initializer works for 32 and 64 bit size_ts; // "<< 16 << 16" is to fool gcc from issuing a warning - // when ints are 32 bits. + // when size_ts are 32 bits. // NOTE: on Linux, you can enable malloc debugging support in libc by // setting the environment variable MALLOC_CHECK_ to 1 before you @@ -297,12 +271,17 @@ class MallocBlock { private: // data layout // The four fields size1_,offset_,magic1_,alloc_type_ - // should together occupy a multiple of 8 bytes. + // should together occupy a multiple of 16 bytes. (At the + // moment, sizeof(size_t) == 4 or 8 depending on piii vs + // k8, and 4 of those sum to 16 or 32 bytes). + // This, combined with BASE_MALLOC's alignment guarantees, + // ensures that SSE types can be stored into the returned + // block, at &size2_. size_t size1_; size_t offset_; // normally 0 unless memaligned memory // see comments in memalign() and FromRawPointer(). - int magic1_; - int alloc_type_; + size_t magic1_; + size_t alloc_type_; // here comes the actual data (variable length) // ... // then come the size2_ and magic2_, or a full page of mprotect-ed memory @@ -435,7 +414,7 @@ class MallocBlock { "has been already deallocated (it was allocated with %s)", data_addr(), AllocName(map_type & ~kDeallocatedTypeBit)); } - if (alloc_type_ == kMagicDeletedInt) { + if (alloc_type_ == kMagicDeletedSizeT) { RAW_LOG(FATAL, "memory stomping bug: a word before object at %p " "has been corrupted; or else the object has been already " "deallocated and our memory map has been corrupted", @@ -497,7 +476,7 @@ class MallocBlock { // practical effect is that allocations are limited to 4Gb or so, even if // the address space could take more. static size_t max_size_t = ~0; - if (size < 0 || size > max_size_t - sizeof(MallocBlock)) { + if (size > max_size_t - sizeof(MallocBlock)) { RAW_LOG(ERROR, "Massive size passed to malloc: %"PRIuS"", size); return NULL; } @@ -662,24 +641,24 @@ class MallocBlock { reinterpret_cast<void*>( PRINTABLE_PTHREAD(queue_entry.deleter_threadid))); - SymbolTable symbolization_table; - const int num_symbols = queue_entry.num_deleter_pcs; // short alias name - for (int i = 0; i < num_symbols; i++) { + // We don't want to allocate or deallocate memory here, so we use + // placement-new. It's ok that we don't destroy this, since we're + // just going to error-exit below anyway. Union is for alignment. + union { void* alignment; char buf[sizeof(SymbolTable)]; } tablebuf; + SymbolTable* symbolization_table = new (tablebuf.buf) SymbolTable; + for (int i = 0; i < queue_entry.num_deleter_pcs; i++) { // Symbolizes the previous address of pc because pc may be in the // next function. This may happen when the function ends with // a call to a function annotated noreturn (e.g. CHECK). - char* pc = - reinterpret_cast<char*>(queue_entry.deleter_pcs[i]) - 1; - symbolization_table.Add(pc); + char *pc = reinterpret_cast<char*>(queue_entry.deleter_pcs[i]); + symbolization_table->Add(pc - 1); } if (FLAGS_symbolize_stacktrace) - symbolization_table.Symbolize(); - for (int i = 0; i < num_symbols; i++) { - char *pc = - reinterpret_cast<char*>(queue_entry.deleter_pcs[i]) - 1; - TracePrintf(STDERR_FILENO, " @ %"PRIxPTR" %s\n", - reinterpret_cast<uintptr_t>(pc), - symbolization_table.GetSymbol(pc)); + symbolization_table->Symbolize(); + for (int i = 0; i < queue_entry.num_deleter_pcs; i++) { + char *pc = reinterpret_cast<char*>(queue_entry.deleter_pcs[i]); + TracePrintf(STDERR_FILENO, " @ %p %s\n", + pc, symbolization_table->GetSymbol(pc - 1)); } } else { RAW_LOG(ERROR, @@ -701,8 +680,8 @@ class MallocBlock { // Find the header just before client's memory. MallocBlock *mb = reinterpret_cast<MallocBlock *>( reinterpret_cast<char *>(p) - data_offset); - // If mb->alloc_type_ is kMagicDeletedInt, we're not an ok pointer. - if (mb->alloc_type_ == kMagicDeletedInt) { + // If mb->alloc_type_ is kMagicDeletedSizeT, we're not an ok pointer. + if (mb->alloc_type_ == kMagicDeletedSizeT) { RAW_LOG(FATAL, "memory allocation bug: object at %p has been already" " deallocated; or else a word before the object has been" " corrupted (memory stomping bug)", p); @@ -976,71 +955,176 @@ static inline void DebugDeallocate(void* ptr, int type) { // ========================================================================= // -// Alloc/free stuff for debug hooks for malloc & friends - -// CAVEAT: The code structure below ensures that MallocHook methods are always -// called from the stack frame of the invoked allocation function. -// heap-checker.cc depends on this to start a stack trace from -// the call to the (de)allocation function. - -// Put all callers of MallocHook::Invoke* in this module into -// ATTRIBUTE_SECTION(google_malloc) section, -// so that MallocHook::GetCallerStackTrace can function accurately: - -extern "C" { - void* malloc(size_t size) __THROW ATTRIBUTE_SECTION(google_malloc); - void free(void* ptr) __THROW ATTRIBUTE_SECTION(google_malloc); - void* realloc(void* ptr, size_t size) __THROW - ATTRIBUTE_SECTION(google_malloc); - void* calloc(size_t nmemb, size_t size) __THROW - ATTRIBUTE_SECTION(google_malloc); - void cfree(void* ptr) __THROW ATTRIBUTE_SECTION(google_malloc); - - void* memalign(size_t __alignment, size_t __size) __THROW - ATTRIBUTE_SECTION(google_malloc); - int posix_memalign(void** ptr, size_t align, size_t size) __THROW - ATTRIBUTE_SECTION(google_malloc); - void* valloc(size_t __size) __THROW - ATTRIBUTE_SECTION(google_malloc); - void* pvalloc(size_t __size) __THROW - ATTRIBUTE_SECTION(google_malloc); +// The following functions may be called via MallocExtension::instance() +// for memory verification and statistics. +class DebugMallocImplementation : public TCMallocImplementation { + public: + virtual bool GetNumericProperty(const char* name, size_t* value) { + bool result = TCMallocImplementation::GetNumericProperty(name, value); + if (result && (strcmp(name, "generic.current_allocated_bytes") == 0)) { + // Subtract bytes kept in the free queue + size_t qsize = MallocBlock::FreeQueueSize(); + if (*value >= qsize) { + *value -= qsize; + } + } + return result; + } + + virtual bool VerifyNewMemory(void* p) { + if (p) MallocBlock::FromRawPointer(p)->Check(MallocBlock::kNewType); + return true; + } + + virtual bool VerifyArrayNewMemory(void* p) { + if (p) MallocBlock::FromRawPointer(p)->Check(MallocBlock::kArrayNewType); + return true; + } + + virtual bool VerifyMallocMemory(void* p) { + if (p) MallocBlock::FromRawPointer(p)->Check(MallocBlock::kMallocType); + return true; + } + + virtual bool VerifyAllMemory() { + return MallocBlock::CheckEverything(); + } + + virtual bool MallocMemoryStats(int* blocks, size_t* total, + int histogram[kMallocHistogramSize]) { + return MallocBlock::MemoryStats(blocks, total, histogram); + } + + virtual size_t GetAllocatedSize(void* p) { + if (p) { + return MallocBlock::FromRawPointer(p)->data_size(); + } + return 0; + } + virtual size_t GetEstimatedAllocatedSize(size_t size) { + return size; + } + + virtual void GetFreeListSizes(vector<MallocExtension::FreeListInfo>* v) { + static const char* kDebugFreeQueue = "debug.free_queue"; + + TCMallocImplementation::GetFreeListSizes(v); + + MallocExtension::FreeListInfo i; + i.type = kDebugFreeQueue; + i.min_object_size = 0; + i.max_object_size = numeric_limits<size_t>::max(); + i.total_bytes_free = MallocBlock::FreeQueueSize(); + v->push_back(i); + } + + }; + +static DebugMallocImplementation debug_malloc_implementation; + +REGISTER_MODULE_INITIALIZER(debugallocation, { + // Either we or valgrind will control memory management. We + // register our extension if we're the winner. + if (RunningOnValgrind()) { + // Let Valgrind uses its own malloc (so don't register our extension). + } else { + MallocExtension::Register(&debug_malloc_implementation); + // When the program exits, check all blocks still in the free + // queue for corruption. + atexit(DanglingWriteChecker); + } +}); + +// ========================================================================= // + +// This is mostly the same a cpp_alloc in tcmalloc.cc. +// TODO(csilvers): write a wrapper for new-handler so we don't have to +// copy this code so much. +inline void* debug_cpp_alloc(size_t size, int new_type, bool nothrow) { + for (;;) { + void* p = DebugAllocate(size, new_type); +#ifdef PREANSINEW + return p; +#else + if (p == NULL) { // allocation failed + // Get the current new handler. NB: this function is not + // thread-safe. We make a feeble stab at making it so here, but + // this lock only protects against tcmalloc interfering with + // itself, not with other libraries calling set_new_handler. + std::new_handler nh; + { + SpinLockHolder h(&set_new_handler_lock); + nh = std::set_new_handler(0); + (void) std::set_new_handler(nh); + } +#if (defined(__GNUC__) && !defined(__EXCEPTIONS)) || (defined(_HAS_EXCEPTIONS) && !_HAS_EXCEPTIONS) + if (nh) { + // Since exceptions are disabled, we don't really know if new_handler + // failed. Assume it will abort if it fails. + (*nh)(); + continue; + } + return 0; +#else + // If no new_handler is established, the allocation failed. + if (!nh) { + if (nothrow) return 0; + throw std::bad_alloc(); + } + // Otherwise, try the new_handler. If it returns, retry the + // allocation. If it throws std::bad_alloc, fail the allocation. + // if it throws something else, don't interfere. + try { + (*nh)(); + } catch (const std::bad_alloc&) { + if (!nothrow) throw; + return p; + } +#endif // (defined(__GNUC__) && !defined(__EXCEPTIONS)) || (defined(_HAS_EXCEPTIONS) && !_HAS_EXCEPTIONS) + } else { // allocation success + return p; + } +#endif // PREANSINEW + } +} + +inline void* do_debug_malloc_or_debug_cpp_alloc(size_t size) { + return tc_new_mode ? debug_cpp_alloc(size, MallocBlock::kMallocType, true) + : DebugAllocate(size, MallocBlock::kMallocType); } -static void *MemalignOverride(size_t align, size_t size, - const void *caller) __THROW - ATTRIBUTE_SECTION(google_malloc); - -void* operator new(size_t size) throw (std::bad_alloc) - ATTRIBUTE_SECTION(google_malloc); -void* operator new(size_t size, const std::nothrow_t&) __THROW - ATTRIBUTE_SECTION(google_malloc); -void operator delete(void* p) __THROW - ATTRIBUTE_SECTION(google_malloc); -void operator delete(void* p, const std::nothrow_t&) __THROW - ATTRIBUTE_SECTION(google_malloc); -void* operator new[](size_t size) throw (std::bad_alloc) - ATTRIBUTE_SECTION(google_malloc); -void* operator new[](size_t size, const std::nothrow_t&) __THROW - ATTRIBUTE_SECTION(google_malloc); -void operator delete[](void* p) __THROW - ATTRIBUTE_SECTION(google_malloc); -void operator delete[](void* p, const std::nothrow_t&) __THROW - ATTRIBUTE_SECTION(google_malloc); - -extern "C" void* malloc(size_t size) __THROW { - void* ptr = DebugAllocate(size, MallocBlock::kMallocType); +// Exported routines + +extern "C" PERFTOOLS_DLL_DECL void* tc_malloc(size_t size) __THROW { + void* ptr = do_debug_malloc_or_debug_cpp_alloc(size); MallocHook::InvokeNewHook(ptr, size); return ptr; } -extern "C" void free(void* ptr) __THROW { +extern "C" PERFTOOLS_DLL_DECL void tc_free(void* ptr) __THROW { MallocHook::InvokeDeleteHook(ptr); DebugDeallocate(ptr, MallocBlock::kMallocType); } -extern "C" void* realloc(void* ptr, size_t size) __THROW { +extern "C" PERFTOOLS_DLL_DECL void* tc_calloc(size_t count, size_t size) __THROW { + // Overflow check + const size_t total_size = count * size; + if (size != 0 && total_size / size != count) return NULL; + + void* block = do_debug_malloc_or_debug_cpp_alloc(total_size); + MallocHook::InvokeNewHook(block, total_size); + if (block) memset(block, 0, total_size); + return block; +} + +extern "C" PERFTOOLS_DLL_DECL void tc_cfree(void* ptr) __THROW { + MallocHook::InvokeDeleteHook(ptr); + DebugDeallocate(ptr, MallocBlock::kMallocType); +} + +extern "C" PERFTOOLS_DLL_DECL void* tc_realloc(void* ptr, size_t size) __THROW { if (ptr == NULL) { - ptr = DebugAllocate(size, MallocBlock::kMallocType); + ptr = do_debug_malloc_or_debug_cpp_alloc(size); MallocHook::InvokeNewHook(ptr, size); return ptr; } @@ -1066,20 +1150,59 @@ extern "C" void* realloc(void* ptr, size_t size) __THROW { return p->data_addr(); } -extern "C" void* calloc(size_t count, size_t size) __THROW { - // Overflow check - const size_t total_size = count * size; - if (size != 0 && total_size / size != count) return NULL; +extern "C" PERFTOOLS_DLL_DECL void* tc_new(size_t size) { + void* ptr = debug_cpp_alloc(size, MallocBlock::kNewType, false); + MallocHook::InvokeNewHook(ptr, size); + if (ptr == NULL) { + RAW_LOG(FATAL, "Unable to allocate %"PRIuS" bytes: new failed.", size); + } + return ptr; +} - void* block = DebugAllocate(total_size, MallocBlock::kMallocType); - MallocHook::InvokeNewHook(block, total_size); - if (block) memset(block, 0, total_size); - return block; +extern "C" PERFTOOLS_DLL_DECL void* tc_new_nothrow(size_t size, const std::nothrow_t&) __THROW { + void* ptr = debug_cpp_alloc(size, MallocBlock::kNewType, true); + MallocHook::InvokeNewHook(ptr, size); + return ptr; } -extern "C" void cfree(void* ptr) __THROW { - MallocHook::InvokeDeleteHook(ptr); - DebugDeallocate(ptr, MallocBlock::kMallocType); +extern "C" PERFTOOLS_DLL_DECL void tc_delete(void* p) __THROW { + MallocHook::InvokeDeleteHook(p); + DebugDeallocate(p, MallocBlock::kNewType); +} + +// Some STL implementations explicitly invoke this. +// It is completely equivalent to a normal delete (delete never throws). +extern "C" PERFTOOLS_DLL_DECL void tc_delete_nothrow(void* p, const std::nothrow_t&) __THROW { + MallocHook::InvokeDeleteHook(p); + DebugDeallocate(p, MallocBlock::kNewType); +} + +extern "C" PERFTOOLS_DLL_DECL void* tc_newarray(size_t size) { + void* ptr = debug_cpp_alloc(size, MallocBlock::kArrayNewType, false); + MallocHook::InvokeNewHook(ptr, size); + if (ptr == NULL) { + RAW_LOG(FATAL, "Unable to allocate %"PRIuS" bytes: new[] failed.", size); + } + return ptr; +} + +extern "C" PERFTOOLS_DLL_DECL void* tc_newarray_nothrow(size_t size, const std::nothrow_t&) + __THROW { + void* ptr = debug_cpp_alloc(size, MallocBlock::kArrayNewType, true); + MallocHook::InvokeNewHook(ptr, size); + return ptr; +} + +extern "C" PERFTOOLS_DLL_DECL void tc_deletearray(void* p) __THROW { + MallocHook::InvokeDeleteHook(p); + DebugDeallocate(p, MallocBlock::kArrayNewType); +} + +// Some STL implementations explicitly invoke this. +// It is completely equivalent to a normal delete (delete never throws). +extern "C" PERFTOOLS_DLL_DECL void tc_deletearray_nothrow(void* p, const std::nothrow_t&) __THROW { + MallocHook::InvokeDeleteHook(p); + DebugDeallocate(p, MallocBlock::kArrayNewType); } // Round "value" up to next "alignment" boundary. @@ -1088,6 +1211,7 @@ static intptr_t RoundUp(intptr_t value, intptr_t alignment) { return (value + alignment - 1) & ~(alignment - 1); } +// This is mostly the same as do_memalign in tcmalloc.cc. static void *do_debug_memalign(size_t alignment, size_t size) { // Allocate >= size bytes aligned on "alignment" boundary // "alignment" is a power of two. @@ -1117,83 +1241,10 @@ static void *do_debug_memalign(size_t alignment, size_t size) { return p; } -// Override __libc_memalign in libc on linux boxes. -// They have a bug in libc that causes them (very rarely) to allocate -// with __libc_memalign() yet deallocate with free(). -// This function is an exception to the rule of calling MallocHook method -// from the stack frame of the allocation function; -// heap-checker handles this special case explicitly. -static void *MemalignOverride(size_t align, size_t size, - const void *caller) __THROW { - void *p = do_debug_memalign(align, size); - MallocHook::InvokeNewHook(p, size); - return p; -} -void *(*__memalign_hook)(size_t, size_t, const void *) = MemalignOverride; - -extern "C" void* memalign(size_t align, size_t size) __THROW { - void *p = do_debug_memalign(align, size); - MallocHook::InvokeNewHook(p, size); - return p; -} - -// Implementation taken from tcmalloc/tcmalloc.cc -extern "C" int posix_memalign(void** result_ptr, - size_t align, size_t size) __THROW { - if (((align % sizeof(void*)) != 0) || - ((align & (align - 1)) != 0) || - (align == 0)) { - return EINVAL; - } - - void* result = do_debug_memalign(align, size); - MallocHook::InvokeNewHook(result, size); - if (result == NULL) { - return ENOMEM; - } else { - *result_ptr = result; - return 0; - } -} - -extern "C" void* valloc(size_t size) __THROW { - // Allocate >= size bytes starting on a page boundary - void *p = do_debug_memalign(getpagesize(), size); - MallocHook::InvokeNewHook(p, size); - return p; -} - -extern "C" void* pvalloc(size_t size) __THROW { - // Round size up to a multiple of pages - // then allocate memory on a page boundary - int pagesize = getpagesize(); - size = RoundUp(size, pagesize); - if (size == 0) { // pvalloc(0) should allocate one page, according to - size = pagesize; // http://man.free4web.biz/man3/libmpatrol.3.html - } - void *p = do_debug_memalign(pagesize, size); - MallocHook::InvokeNewHook(p, size); - return p; -} - -extern "C" int mallopt(int cmd, int value) __THROW { - return BASE_MALLOPT(cmd, value); -} - -#ifdef HAVE_STRUCT_MALLINFO -extern "C" struct mallinfo mallinfo(void) __THROW { - return BASE_MALLINFO(); -} -#endif - -// ========================================================================= // - -// Alloc/free stuff for debug operator new & friends - -// This is mostly the same a cpp_alloc in tcmalloc.cc. -inline void* cpp_debug_alloc(size_t size, int new_type, bool nothrow) { +// This is mostly the same as cpp_memalign in tcmalloc.cc. +static void* debug_cpp_memalign(size_t align, size_t size) { for (;;) { - void* p = DebugAllocate(size, new_type); + void* p = do_debug_memalign(align, size); #ifdef PREANSINEW return p; #else @@ -1218,17 +1269,15 @@ inline void* cpp_debug_alloc(size_t size, int new_type, bool nothrow) { return 0; #else // If no new_handler is established, the allocation failed. - if (!nh) { - if (nothrow) return 0; - throw std::bad_alloc(); - } + if (!nh) + return 0; + // Otherwise, try the new_handler. If it returns, retry the // allocation. If it throws std::bad_alloc, fail the allocation. // if it throws something else, don't interfere. try { (*nh)(); } catch (const std::bad_alloc&) { - if (!nothrow) throw; return p; } #endif // (defined(__GNUC__) && !defined(__EXCEPTIONS)) || (defined(_HAS_EXCEPTIONS) && !_HAS_EXCEPTIONS) @@ -1239,171 +1288,96 @@ inline void* cpp_debug_alloc(size_t size, int new_type, bool nothrow) { } } -void* operator new(size_t size) throw (std::bad_alloc) { - void* ptr = cpp_debug_alloc(size, MallocBlock::kNewType, false); - MallocHook::InvokeNewHook(ptr, size); - if (ptr == NULL) { - RAW_LOG(FATAL, "Unable to allocate %"PRIuS" bytes: new failed.", size); - } - return ptr; +inline void* do_debug_memalign_or_debug_cpp_memalign(size_t align, + size_t size) { + return tc_new_mode ? debug_cpp_memalign(align, size) + : do_debug_memalign(align, size); } -void* operator new(size_t size, const std::nothrow_t&) __THROW { - void* ptr = cpp_debug_alloc(size, MallocBlock::kNewType, true); - MallocHook::InvokeNewHook(ptr, size); - return ptr; +extern "C" PERFTOOLS_DLL_DECL void* tc_memalign(size_t align, size_t size) __THROW { + void *p = do_debug_memalign_or_debug_cpp_memalign(align, size); + MallocHook::InvokeNewHook(p, size); + return p; } -void operator delete(void* ptr) __THROW { - MallocHook::InvokeDeleteHook(ptr); - DebugDeallocate(ptr, MallocBlock::kNewType); -} +// Implementation taken from tcmalloc/tcmalloc.cc +extern "C" PERFTOOLS_DLL_DECL int tc_posix_memalign(void** result_ptr, size_t align, size_t size) + __THROW { + if (((align % sizeof(void*)) != 0) || + ((align & (align - 1)) != 0) || + (align == 0)) { + return EINVAL; + } -// Some STL implementations explicitly invoke this. -// It is completely equivalent to a normal delete (delete never throws). -void operator delete(void* ptr, const std::nothrow_t&) __THROW { - MallocHook::InvokeDeleteHook(ptr); - DebugDeallocate(ptr, MallocBlock::kNewType); + void* result = do_debug_memalign_or_debug_cpp_memalign(align, size); + MallocHook::InvokeNewHook(result, size); + if (result == NULL) { + return ENOMEM; + } else { + *result_ptr = result; + return 0; + } } -// ========================================================================= // - -// Alloc/free stuff for debug operator new[] & friends +extern "C" PERFTOOLS_DLL_DECL void* tc_valloc(size_t size) __THROW { + // Allocate >= size bytes starting on a page boundary + void *p = do_debug_memalign_or_debug_cpp_memalign(getpagesize(), size); + MallocHook::InvokeNewHook(p, size); + return p; +} -void* operator new[](size_t size) throw (std::bad_alloc) { - void* ptr = cpp_debug_alloc(size, MallocBlock::kArrayNewType, false); - MallocHook::InvokeNewHook(ptr, size); - if (ptr == NULL) { - RAW_LOG(FATAL, "Unable to allocate %"PRIuS" bytes: new[] failed.", size); +extern "C" PERFTOOLS_DLL_DECL void* tc_pvalloc(size_t size) __THROW { + // Round size up to a multiple of pages + // then allocate memory on a page boundary + int pagesize = getpagesize(); + size = RoundUp(size, pagesize); + if (size == 0) { // pvalloc(0) should allocate one page, according to + size = pagesize; // http://man.free4web.biz/man3/libmpatrol.3.html } - return ptr; + void *p = do_debug_memalign_or_debug_cpp_memalign(pagesize, size); + MallocHook::InvokeNewHook(p, size); + return p; } -void* operator new[](size_t size, const std::nothrow_t&) __THROW { - void* ptr = cpp_debug_alloc(size, MallocBlock::kArrayNewType, true); - MallocHook::InvokeNewHook(ptr, size); - return ptr; +// malloc_stats just falls through to the base implementation. +extern "C" PERFTOOLS_DLL_DECL void tc_malloc_stats(void) __THROW { + BASE_MALLOC_STATS(); } -void operator delete[](void* ptr) __THROW { - MallocHook::InvokeDeleteHook(ptr); - DebugDeallocate(ptr, MallocBlock::kArrayNewType); +extern "C" PERFTOOLS_DLL_DECL int tc_mallopt(int cmd, int value) __THROW { + return BASE_MALLOPT(cmd, value); } -// Some STL implementations explicitly invoke this. -// It is completely equivalent to a normal delete (delete never throws). -void operator delete[](void* ptr, const std::nothrow_t&) __THROW { - MallocHook::InvokeDeleteHook(ptr); - DebugDeallocate(ptr, MallocBlock::kArrayNewType); +#ifdef HAVE_STRUCT_MALLINFO +extern "C" PERFTOOLS_DLL_DECL struct mallinfo tc_mallinfo(void) __THROW { + return BASE_MALLINFO(); } - -// ========================================================================= // - -// The following functions may be called via MallocExtension::instance() -// for memory verification and statistics. -#ifdef TCMALLOC_FOR_DEBUGALLOCATION -// Inherit from tcmalloc's version -typedef TCMallocImplementation ParentImplementation; -#else -// Inherit from default version -typedef MallocExtension ParentImplementation; #endif -class DebugMallocImplementation : public ParentImplementation { - public: - virtual bool GetNumericProperty(const char* name, size_t* value) { - bool result = ParentImplementation::GetNumericProperty(name, value); - if (result && (strcmp(name, "generic.current_allocated_bytes") == 0)) { - // Subtract bytes kept in the free queue - size_t qsize = MallocBlock::FreeQueueSize(); - if (*value >= qsize) { - *value -= qsize; - } - } - return result; - } - - virtual bool VerifyNewMemory(void* p) { - if (p) MallocBlock::FromRawPointer(p)->Check(MallocBlock::kNewType); - return true; - } - - virtual bool VerifyArrayNewMemory(void* p) { - if (p) MallocBlock::FromRawPointer(p)->Check(MallocBlock::kArrayNewType); - return true; - } - - virtual bool VerifyMallocMemory(void* p) { - if (p) MallocBlock::FromRawPointer(p)->Check(MallocBlock::kMallocType); - return true; - } - - virtual bool VerifyAllMemory() { - return MallocBlock::CheckEverything(); - } - - virtual bool MallocMemoryStats(int* blocks, size_t* total, - int histogram[kMallocHistogramSize]) { - return MallocBlock::MemoryStats(blocks, total, histogram); - } - - virtual size_t GetAllocatedSize(void* p) { - if (p) { - return MallocBlock::FromRawPointer(p)->data_size(); - } +extern "C" PERFTOOLS_DLL_DECL size_t tc_malloc_size(void* ptr) __THROW { + if (!ptr) { return 0; } - virtual size_t GetEstimatedAllocatedSize(size_t size) { - return size; - } - }; - -static DebugMallocImplementation debug_malloc_implementation; - -REGISTER_MODULE_INITIALIZER(debugallocation, { - // Either we or valgrind will control memory management. We - // register our extension if we're the winner. - if (RunningOnValgrind()) { - // Let Valgrind uses its own malloc (so don't register our extension). - } else { - MallocExtension::Register(&debug_malloc_implementation); - // When the program exits, check all blocks still in the free - // queue for corruption. - atexit(DanglingWriteChecker); - } -}); - -#ifdef TCMALLOC_FOR_DEBUGALLOCATION - -// Redefine malloc_stats to use tcmalloc's implementation: -extern "C" void malloc_stats(void) __THROW { - do_malloc_stats(); + MallocBlock* mb = MallocBlock::FromRawPointer(ptr); + // This is just to make sure we actually own mb (and ptr). We don't + // use the actual value, just the 'exception' it raises on error. + (void)BASE_MALLOC_SIZE(mb); + return mb->data_size(); } -// Some library routines on RedHat 9 allocate memory using malloc() -// and free it using __libc_free() (or vice-versa). Since we provide -// our own implementations of malloc/free using tcmalloc.cc, -// we need to make sure that the __libc_XXX variants -// also point to the same implementations. -// -// Note: this might not override __libc_XXX calls withing libc itself, -// but it can be important for other libraries that mention these functions -// or when this code is LD_PRELOAD-ed. -// TODO: In case these __libc_* definitions do not actually matter, -// they should go away from here and from tcmalloc/tcmalloc.cc. -// -extern "C" { - void* __libc_malloc(size_t size) { return malloc(size); } - void __libc_free(void* ptr) { free(ptr); } - void* __libc_realloc(void* ptr, size_t size) { return realloc(ptr, size); } - void* __libc_calloc(size_t n, size_t size) { return calloc(n, size); } - void __libc_cfree(void* ptr) { cfree(ptr); } - void* __libc_memalign(size_t align, size_t s) { return memalign(align, s); } - void* __libc_valloc(size_t size) { return valloc(size); } - void* __libc_pvalloc(size_t size) { return pvalloc(size); } - int __posix_memalign(void** r, size_t a, size_t s) { - return posix_memalign(r, a, s); - } -} +// Override __libc_memalign in libc on linux boxes. +// They have a bug in libc that causes them (very rarely) to allocate +// with __libc_memalign() yet deallocate with free(). +// This function is an exception to the rule of calling MallocHook method +// from the stack frame of the allocation function; +// heap-checker handles this special case explicitly. +static void *MemalignOverride(size_t align, size_t size, const void *caller) + __THROW ATTRIBUTE_SECTION(google_malloc); -#endif // #ifdef TCMALLOC_FOR_DEBUGALLOCATION +static void *MemalignOverride(size_t align, size_t size, const void *caller) + __THROW { + void *p = do_debug_memalign_or_debug_cpp_memalign(align, size); + MallocHook::InvokeNewHook(p, size); + return p; +} +void *(*__memalign_hook)(size_t, size_t, const void *) = MemalignOverride; diff --git a/third_party/tcmalloc/chromium/src/google/heap-checker.h b/third_party/tcmalloc/chromium/src/google/heap-checker.h index c0ee8a8..f46f353 100644 --- a/third_party/tcmalloc/chromium/src/google/heap-checker.h +++ b/third_party/tcmalloc/chromium/src/google/heap-checker.h @@ -136,7 +136,7 @@ class PERFTOOLS_DLL_DECL HeapLeakChecker { bool NoLeaks() { return DoNoLeaks(DO_NOT_SYMBOLIZE); } // These forms are obsolete; use NoLeaks() instead. - // TODO(csilvers): mark with ATTRIBUTE_DEPRECATED. + // TODO(csilvers): mark as DEPRECATED. bool QuickNoLeaks() { return NoLeaks(); } bool BriefNoLeaks() { return NoLeaks(); } bool SameHeap() { return NoLeaks(); } diff --git a/third_party/tcmalloc/chromium/src/google/malloc_extension.h b/third_party/tcmalloc/chromium/src/google/malloc_extension.h index fc272c9..0e15c04 100644 --- a/third_party/tcmalloc/chromium/src/google/malloc_extension.h +++ b/third_party/tcmalloc/chromium/src/google/malloc_extension.h @@ -50,6 +50,7 @@ #include <stdint.h> #endif #include <string> +#include <vector> // Annoying stuff for windows -- makes sure clients can import these functions #ifndef PERFTOOLS_DLL_DECL @@ -69,6 +70,22 @@ namespace base { struct MallocRange; } +// Interface to a pluggable system allocator. +class SysAllocator { + public: + SysAllocator() { + } + virtual ~SysAllocator(); + + // Allocates "size"-byte of memory from system aligned with "alignment". + // Returns NULL if failed. Otherwise, the returned pointer p up to and + // including (p + actual_size -1) have been allocated. + virtual void* Alloc(size_t size, size_t *actual_size, size_t alignment) = 0; + + // Notification that command-line flags have been initialized. + virtual void FlagsInitialized() = 0; +}; + // The default implementations of the following routines do nothing. // All implementations should be thread-safe; the current one // (TCMallocImplementation) is. @@ -101,13 +118,23 @@ class PERFTOOLS_DLL_DECL MallocExtension { // Outputs to "writer" a sample of live objects and the stack traces // that allocated these objects. The format of the returned output // is equivalent to the output of the heap profiler and can - // therefore be passed to "pprof". + // therefore be passed to "pprof". This function is equivalent to + // ReadStackTraces. The main difference is that this function returns + // serialized data appropriately formatted for use by the pprof tool. + // NOTE: by default, tcmalloc does not do any heap sampling, and this + // function will always return an empty sample. To get useful + // data from GetHeapSample, you must also set the environment + // variable TCMALLOC_SAMPLE_PARAMETER to a value such as 524288. virtual void GetHeapSample(MallocExtensionWriter* writer); // Outputs to "writer" the stack traces that caused growth in the // address space size. The format of the returned output is // equivalent to the output of the heap profiler and can therefore - // be passed to "pprof". + // be passed to "pprof". This function is equivalent to + // ReadHeapGrowthStackTraces. The main difference is that this function + // returns serialized data appropriately formatted for use by the + // pprof tool. (This does not depend on, or require, + // TCMALLOC_SAMPLE_PARAMETER.) virtual void GetHeapGrowthStacks(MallocExtensionWriter* writer); // Invokes func(arg, range) for every controlled memory @@ -145,21 +172,22 @@ class PERFTOOLS_DLL_DECL MallocExtension { // Number of bytes used across all thread caches. // This property is not writable. // - // "tcmalloc.slack_bytes" - // Number of bytes allocated from system, but not currently in - // use by malloced objects. I.e., bytes available for - // allocation without needing more bytes from system. It is - // the sum of pageheap_free_bytes and pageheap_unmapped_bytes. - // This property is not writable. - // // "tcmalloc.pageheap_free_bytes" - // Number of bytes in free, mapped pages in pageheap - // This property is not writable. + // Number of bytes in free, mapped pages in page heap. These + // bytes can be used to fulfill allocation requests. They + // always count towards virtual memory usage, and unless the + // underlying memory is swapped out by the OS, they also count + // towards physical memory usage. This property is not writable. // // "tcmalloc.pageheap_unmapped_bytes" - // Number of bytes in free, unmapped pages in pageheap - // This property is not writable. - // + // Number of bytes in free, unmapped pages in page heap. + // These are bytes that have been released back to the OS, + // possibly by one of the MallocExtension "Release" calls. + // They can be used to fulfill allocation requests, but + // typically incur a page fault. They always count towards + // virtual memory usage, and depending on the OS, typically + // do not count towards physical memory usage. This property + // is not writable. // ------------------------------------------------------------------- // Get the named "property"'s value. Returns true if the property @@ -194,6 +222,27 @@ class PERFTOOLS_DLL_DECL MallocExtension { // Most malloc implementations ignore this routine. virtual void MarkThreadBusy(); + // Gets the system allocator used by the malloc extension instance. Returns + // NULL for malloc implementations that do not support pluggable system + // allocators. + virtual SysAllocator* GetSystemAllocator(); + + // Sets the system allocator to the specified. + // + // Users could register their own system allocators for malloc implementation + // that supports pluggable system allocators, such as TCMalloc, by doing: + // alloc = new MyOwnSysAllocator(); + // MallocExtension::instance()->SetSystemAllocator(alloc); + // It's up to users whether to fall back (recommended) to the default + // system allocator (use GetSystemAllocator() above) or not. The caller is + // responsible to any necessary locking. + // See tcmalloc/system-alloc.h for the interface and + // tcmalloc/memfs_malloc.cc for the examples. + // + // It's a no-op for malloc implementations that do not support pluggable + // system allocators. + virtual void SetSystemAllocator(SysAllocator *a); + // Try to release num_bytes of free memory back to the operating // system for reuse. Use this extension with caution -- to get this // memory back may require faulting pages back in by the OS, and @@ -218,6 +267,7 @@ class PERFTOOLS_DLL_DECL MallocExtension { // SIZE bytes may reserve more bytes, but will never reserve less. // (Currently only implemented in tcmalloc, other implementations // always return SIZE.) + // This is equivalent to malloc_good_size() in OS X. virtual size_t GetEstimatedAllocatedSize(size_t size); // Returns the actual number N of bytes reserved by tcmalloc for the @@ -231,6 +281,8 @@ class PERFTOOLS_DLL_DECL MallocExtension { // from that -- and should not have been freed yet. p may be NULL. // (Currently only implemented in tcmalloc; other implementations // will return 0.) + // This is equivalent to malloc_size() in OS X, malloc_usable_size() + // in glibc, and _msize() for windows. virtual size_t GetAllocatedSize(void* p); // The current malloc implementation. Always non-NULL. @@ -240,7 +292,45 @@ class PERFTOOLS_DLL_DECL MallocExtension { // malloc implementation during initialization. static void Register(MallocExtension* implementation); - protected: + // Returns detailed information about malloc's freelists. For each list, + // return a FreeListInfo: + struct FreeListInfo { + size_t min_object_size; + size_t max_object_size; + size_t total_bytes_free; + const char* type; + }; + // Each item in the vector refers to a different freelist. The lists + // are identified by the range of allocations that objects in the + // list can satisfy ([min_object_size, max_object_size]) and the + // type of freelist (see below). The current size of the list is + // returned in total_bytes_free (which count against a processes + // resident and virtual size). + // + // Currently supported types are: + // + // "tcmalloc.page{_unmapped}" - tcmalloc's page heap. An entry for each size + // class in the page heap is returned. Bytes in "page_unmapped" + // are no longer backed by physical memory and do not count against + // the resident size of a process. + // + // "tcmalloc.large{_unmapped}" - tcmalloc's list of objects larger + // than the largest page heap size class. Only one "large" + // entry is returned. There is no upper-bound on the size + // of objects in the large free list; this call returns + // kint64max for max_object_size. Bytes in + // "large_unmapped" are no longer backed by physical memory + // and do not count against the resident size of a process. + // + // "tcmalloc.central" - tcmalloc's central free-list. One entry per + // size-class is returned. Never unmapped. + // + // "debug.free_queue" - free objects queued by the debug allocator + // and not returned to tcmalloc. + // + // "tcmalloc.thread" - tcmalloc's per-thread caches. Never unmapped. + virtual void GetFreeListSizes(std::vector<FreeListInfo>* v); + // Get a list of stack traces of sampled allocation points. Returns // a pointer to a "new[]-ed" result array, and stores the sample // period in "sample_period". diff --git a/third_party/tcmalloc/chromium/src/google/malloc_hook.h b/third_party/tcmalloc/chromium/src/google/malloc_hook.h index 0bc0f73..f5575f1 100644 --- a/third_party/tcmalloc/chromium/src/google/malloc_hook.h +++ b/third_party/tcmalloc/chromium/src/google/malloc_hook.h @@ -30,38 +30,38 @@ // --- // Author: Sanjay Ghemawat // -// Some of our malloc implementations can invoke the following hooks -// whenever memory is allocated or deallocated. If the hooks are -// NULL, they are not invoked. MallocHook is thread-safe, and things -// you do before calling SetFooHook(MyHook) are visible to any -// resulting calls to MyHook. Hooks must be thread-safe, and if you -// write: +// Some of our malloc implementations can invoke the following hooks whenever +// memory is allocated or deallocated. MallocHook is thread-safe, and things +// you do before calling AddFooHook(MyHook) are visible to any resulting calls +// to MyHook. Hooks must be thread-safe. If you write: // -// MallocHook::NewHook old_new_hook_ = NULL; -// ... -// old_new_hook_ = MallocHook::SetNewHook(&MyNewHook); +// CHECK(MallocHook::AddNewHook(&MyNewHook)); // -// old_new_hook_ could still be NULL the first couple times MyNewHook -// is called. +// MyNewHook will be invoked in subsequent calls in the current thread, but +// there are no guarantees on when it might be invoked in other threads. +// +// There are a limited number of slots available for each hook type. Add*Hook +// will return false if there are no slots available. Remove*Hook will return +// false if the given hook was not already installed. +// +// The order in which individual hooks are called in Invoke*Hook is undefined. +// +// It is safe for a hook to remove itself within Invoke*Hook and add other +// hooks. Any hooks added inside a hook invocation (for the same hook type) +// will not be invoked for the current invocation. // // One important user of these hooks is the heap profiler. // -// CAVEAT: If you add new MallocHook::Invoke* calls (not for chaining hooks), -// then those calls must be directly in the code of the (de)allocation -// function that is provided to the user and that function must have -// an ATTRIBUTE_SECTION(malloc_hook) attribute. +// CAVEAT: If you add new MallocHook::Invoke* calls then those calls must be +// directly in the code of the (de)allocation function that is provided to the +// user and that function must have an ATTRIBUTE_SECTION(malloc_hook) attribute. // -// Note: Get*Hook() and Invoke*Hook() functions are defined in -// malloc_hook-inl.h. If you need to get or invoke a hook (which you -// shouldn't unless you're part of tcmalloc), be sure to #include -// malloc_hook-inl.h in addition to malloc_hook.h. +// Note: the Invoke*Hook() functions are defined in malloc_hook-inl.h. If you +// need to invoke a hook (which you shouldn't unless you're part of tcmalloc), +// be sure to #include malloc_hook-inl.h in addition to malloc_hook.h. // // NOTE FOR C USERS: If you want to use malloc_hook functionality from // a C program, #include malloc_hook_c.h instead of this file. -// -// TODO(csilvers): support a non-inlined function called -// Assert*HookIs()? This is the context in which I normally see -// Get*Hook() called in non-tcmalloc code. #ifndef _MALLOC_HOOK_H_ #define _MALLOC_HOOK_H_ @@ -82,27 +82,31 @@ extern "C" { #endif // Note: malloc_hook_c.h defines MallocHook_*Hook and -// MallocHook_Set*Hook. The version of these inside the MallocHook -// class are defined in terms of the malloc_hook_c version. See -// malloc_hook_c.h for details of these types/functions. +// MallocHook_{Add,Remove}*Hook. The version of these inside the MallocHook +// class are defined in terms of the malloc_hook_c version. See malloc_hook_c.h +// for details of these types/functions. class PERFTOOLS_DLL_DECL MallocHook { public: // The NewHook is invoked whenever an object is allocated. // It may be passed NULL if the allocator returned NULL. typedef MallocHook_NewHook NewHook; - inline static NewHook GetNewHook(); - inline static NewHook SetNewHook(NewHook hook) { - return MallocHook_SetNewHook(hook); + inline static bool AddNewHook(NewHook hook) { + return MallocHook_AddNewHook(hook); + } + inline static bool RemoveNewHook(NewHook hook) { + return MallocHook_RemoveNewHook(hook); } inline static void InvokeNewHook(const void* p, size_t s); // The DeleteHook is invoked whenever an object is deallocated. // It may be passed NULL if the caller is trying to delete NULL. typedef MallocHook_DeleteHook DeleteHook; - inline static DeleteHook GetDeleteHook(); - inline static DeleteHook SetDeleteHook(DeleteHook hook) { - return MallocHook_SetDeleteHook(hook); + inline static bool AddDeleteHook(DeleteHook hook) { + return MallocHook_AddDeleteHook(hook); + } + inline static bool RemoveDeleteHook(DeleteHook hook) { + return MallocHook_RemoveDeleteHook(hook); } inline static void InvokeDeleteHook(const void* p); @@ -111,9 +115,11 @@ class PERFTOOLS_DLL_DECL MallocHook { // in memory limited contexts, to catch allocations that will exceed // a memory limit, and take outside actions to increase that limit. typedef MallocHook_PreMmapHook PreMmapHook; - inline static PreMmapHook GetPreMmapHook(); - inline static PreMmapHook SetPreMmapHook(PreMmapHook hook) { - return MallocHook_SetPreMmapHook(hook); + inline static bool AddPreMmapHook(PreMmapHook hook) { + return MallocHook_AddPreMmapHook(hook); + } + inline static bool RemovePreMmapHook(PreMmapHook hook) { + return MallocHook_RemovePreMmapHook(hook); } inline static void InvokePreMmapHook(const void* start, size_t size, @@ -122,12 +128,40 @@ class PERFTOOLS_DLL_DECL MallocHook { int fd, off_t offset); + // The MmapReplacement is invoked after the PreMmapHook but before + // the call is actually made. The MmapReplacement should return true + // if it handled the call, or false if it is still necessary to + // call mmap/mmap64. + // This should be used only by experts, and users must be be + // extremely careful to avoid recursive calls to mmap. The replacement + // should be async signal safe. + // Only one MmapReplacement is supported. After setting an MmapReplacement + // you must call RemoveMmapReplacement before calling SetMmapReplacement + // again. + typedef MallocHook_MmapReplacement MmapReplacement; + inline static bool SetMmapReplacement(MmapReplacement hook) { + return MallocHook_SetMmapReplacement(hook); + } + inline static bool RemoveMmapReplacement(MmapReplacement hook) { + return MallocHook_RemoveMmapReplacement(hook); + } + inline static bool InvokeMmapReplacement(const void* start, + size_t size, + int protection, + int flags, + int fd, + off_t offset, + void** result); + + // The MmapHook is invoked whenever a region of memory is mapped. // It may be passed MAP_FAILED if the mmap failed. typedef MallocHook_MmapHook MmapHook; - inline static MmapHook GetMmapHook(); - inline static MmapHook SetMmapHook(MmapHook hook) { - return MallocHook_SetMmapHook(hook); + inline static bool AddMmapHook(MmapHook hook) { + return MallocHook_AddMmapHook(hook); + } + inline static bool RemoveMmapHook(MmapHook hook) { + return MallocHook_RemoveMmapHook(hook); } inline static void InvokeMmapHook(const void* result, const void* start, @@ -137,19 +171,43 @@ class PERFTOOLS_DLL_DECL MallocHook { int fd, off_t offset); + // The MunmapReplacement is invoked with munmap arguments just before + // the call is actually made. The MunmapReplacement should return true + // if it handled the call, or false if it is still necessary to + // call munmap. + // This should be used only by experts. The replacement should be + // async signal safe. + // Only one MunmapReplacement is supported. After setting an + // MunmapReplacement you must call RemoveMunmapReplacement before + // calling SetMunmapReplacement again. + typedef MallocHook_MunmapReplacement MunmapReplacement; + inline static bool SetMunmapReplacement(MunmapReplacement hook) { + return MallocHook_SetMunmapReplacement(hook); + } + inline static bool RemoveMunmapReplacement(MunmapReplacement hook) { + return MallocHook_RemoveMunmapReplacement(hook); + } + inline static bool InvokeMunmapReplacement(const void* p, + size_t size, + int* result); + // The MunmapHook is invoked whenever a region of memory is unmapped. typedef MallocHook_MunmapHook MunmapHook; - inline static MunmapHook GetMunmapHook(); - inline static MunmapHook SetMunmapHook(MunmapHook hook) { - return MallocHook_SetMunmapHook(hook); + inline static bool AddMunmapHook(MunmapHook hook) { + return MallocHook_AddMunmapHook(hook); + } + inline static bool RemoveMunmapHook(MunmapHook hook) { + return MallocHook_RemoveMunmapHook(hook); } inline static void InvokeMunmapHook(const void* p, size_t size); // The MremapHook is invoked whenever a region of memory is remapped. typedef MallocHook_MremapHook MremapHook; - inline static MremapHook GetMremapHook(); - inline static MremapHook SetMremapHook(MremapHook hook) { - return MallocHook_SetMremapHook(hook); + inline static bool AddMremapHook(MremapHook hook) { + return MallocHook_AddMremapHook(hook); + } + inline static bool RemoveMremapHook(MremapHook hook) { + return MallocHook_RemoveMremapHook(hook); } inline static void InvokeMremapHook(const void* result, const void* old_addr, @@ -165,9 +223,11 @@ class PERFTOOLS_DLL_DECL MallocHook { // to catch allocations that will exceed the limit and take outside // actions to increase such a limit. typedef MallocHook_PreSbrkHook PreSbrkHook; - inline static PreSbrkHook GetPreSbrkHook(); - inline static PreSbrkHook SetPreSbrkHook(PreSbrkHook hook) { - return MallocHook_SetPreSbrkHook(hook); + inline static bool AddPreSbrkHook(PreSbrkHook hook) { + return MallocHook_AddPreSbrkHook(hook); + } + inline static bool RemovePreSbrkHook(PreSbrkHook hook) { + return MallocHook_RemovePreSbrkHook(hook); } inline static void InvokePreSbrkHook(std::ptrdiff_t increment); @@ -176,9 +236,11 @@ class PERFTOOLS_DLL_DECL MallocHook { // to get the top of the memory stack, and is not actually a // memory-allocation call. typedef MallocHook_SbrkHook SbrkHook; - inline static SbrkHook GetSbrkHook(); - inline static SbrkHook SetSbrkHook(SbrkHook hook) { - return MallocHook_SetSbrkHook(hook); + inline static bool AddSbrkHook(SbrkHook hook) { + return MallocHook_AddSbrkHook(hook); + } + inline static bool RemoveSbrkHook(SbrkHook hook) { + return MallocHook_RemoveSbrkHook(hook); } inline static void InvokeSbrkHook(const void* result, std::ptrdiff_t increment); @@ -194,9 +256,90 @@ class PERFTOOLS_DLL_DECL MallocHook { // Unhooked versions of mmap() and munmap(). These should be used // only by experts, since they bypass heapchecking, etc. + // Note: These do not run hooks, but they still use the MmapReplacement + // and MunmapReplacement. static void* UnhookedMMap(void *start, size_t length, int prot, int flags, int fd, off_t offset); static int UnhookedMUnmap(void *start, size_t length); + + // The following are DEPRECATED. + inline static NewHook GetNewHook(); + inline static NewHook SetNewHook(NewHook hook) { + return MallocHook_SetNewHook(hook); + } + + inline static DeleteHook GetDeleteHook(); + inline static DeleteHook SetDeleteHook(DeleteHook hook) { + return MallocHook_SetDeleteHook(hook); + } + + inline static PreMmapHook GetPreMmapHook(); + inline static PreMmapHook SetPreMmapHook(PreMmapHook hook) { + return MallocHook_SetPreMmapHook(hook); + } + + inline static MmapHook GetMmapHook(); + inline static MmapHook SetMmapHook(MmapHook hook) { + return MallocHook_SetMmapHook(hook); + } + + inline static MunmapHook GetMunmapHook(); + inline static MunmapHook SetMunmapHook(MunmapHook hook) { + return MallocHook_SetMunmapHook(hook); + } + + inline static MremapHook GetMremapHook(); + inline static MremapHook SetMremapHook(MremapHook hook) { + return MallocHook_SetMremapHook(hook); + } + + inline static PreSbrkHook GetPreSbrkHook(); + inline static PreSbrkHook SetPreSbrkHook(PreSbrkHook hook) { + return MallocHook_SetPreSbrkHook(hook); + } + + inline static SbrkHook GetSbrkHook(); + inline static SbrkHook SetSbrkHook(SbrkHook hook) { + return MallocHook_SetSbrkHook(hook); + } + // End of DEPRECATED methods. + + private: + // Slow path versions of Invoke*Hook. + static void InvokeNewHookSlow(const void* p, size_t s); + static void InvokeDeleteHookSlow(const void* p); + static void InvokePreMmapHookSlow(const void* start, + size_t size, + int protection, + int flags, + int fd, + off_t offset); + static void InvokeMmapHookSlow(const void* result, + const void* start, + size_t size, + int protection, + int flags, + int fd, + off_t offset); + static bool InvokeMmapReplacementSlow(const void* start, + size_t size, + int protection, + int flags, + int fd, + off_t offset, + void** result); + static void InvokeMunmapHookSlow(const void* p, size_t size); + static bool InvokeMunmapReplacementSlow(const void* p, + size_t size, + int* result); + static void InvokeMremapHookSlow(const void* result, + const void* old_addr, + size_t old_size, + size_t new_size, + int flags, + const void* new_addr); + static void InvokePreSbrkHookSlow(std::ptrdiff_t increment); + static void InvokeSbrkHookSlow(const void* result, std::ptrdiff_t increment); }; #endif /* _MALLOC_HOOK_H_ */ diff --git a/third_party/tcmalloc/chromium/src/google/malloc_hook_c.h b/third_party/tcmalloc/chromium/src/google/malloc_hook_c.h index 0f63e0b..b8478c1 100644 --- a/third_party/tcmalloc/chromium/src/google/malloc_hook_c.h +++ b/third_party/tcmalloc/chromium/src/google/malloc_hook_c.h @@ -40,21 +40,44 @@ #include <stddef.h> #include <sys/types.h> +/* Annoying stuff for windows; makes sure clients can import these functions */ +#ifndef PERFTOOLS_DLL_DECL +# ifdef _WIN32 +# define PERFTOOLS_DLL_DECL __declspec(dllimport) +# else +# define PERFTOOLS_DLL_DECL +# endif +#endif + +#ifdef __cplusplus +extern "C" { +#endif + /* Get the current stack trace. Try to skip all routines up to and * and including the caller of MallocHook::Invoke*. * Use "skip_count" (similarly to GetStackTrace from stacktrace.h) * as a hint about how many routines to skip if better information * is not available. */ +PERFTOOLS_DLL_DECL int MallocHook_GetCallerStackTrace(void** result, int max_depth, int skip_count); +/* The MallocHook_{Add,Remove}*Hook functions return 1 on success and 0 on + * failure. + */ typedef void (*MallocHook_NewHook)(const void* ptr, size_t size); -MallocHook_NewHook MallocHook_SetNewHook(MallocHook_NewHook hook); +PERFTOOLS_DLL_DECL +int MallocHook_AddNewHook(MallocHook_NewHook hook); +PERFTOOLS_DLL_DECL +int MallocHook_RemoveNewHook(MallocHook_NewHook hook); typedef void (*MallocHook_DeleteHook)(const void* ptr); -MallocHook_DeleteHook MallocHook_SetDeleteHook(MallocHook_DeleteHook hook); +PERFTOOLS_DLL_DECL +int MallocHook_AddDeleteHook(MallocHook_DeleteHook hook); +PERFTOOLS_DLL_DECL +int MallocHook_RemoveDeleteHook(MallocHook_DeleteHook hook); typedef void (*MallocHook_PreMmapHook)(const void *start, size_t size, @@ -62,7 +85,10 @@ typedef void (*MallocHook_PreMmapHook)(const void *start, int flags, int fd, off_t offset); -MallocHook_PreMmapHook MallocHook_SetPreMmapHook(MallocHook_PreMmapHook hook); +PERFTOOLS_DLL_DECL +int MallocHook_AddPreMmapHook(MallocHook_PreMmapHook hook); +PERFTOOLS_DLL_DECL +int MallocHook_RemovePreMmapHook(MallocHook_PreMmapHook hook); typedef void (*MallocHook_MmapHook)(const void* result, const void* start, @@ -71,10 +97,32 @@ typedef void (*MallocHook_MmapHook)(const void* result, int flags, int fd, off_t offset); -MallocHook_MmapHook MallocHook_SetMmapHook(MallocHook_MmapHook hook); +PERFTOOLS_DLL_DECL +int MallocHook_AddMmapHook(MallocHook_MmapHook hook); +PERFTOOLS_DLL_DECL +int MallocHook_RemoveMmapHook(MallocHook_MmapHook hook); + +typedef int (*MallocHook_MmapReplacement)(const void* start, + size_t size, + int protection, + int flags, + int fd, + off_t offset, + void** result); +int MallocHook_SetMmapReplacement(MallocHook_MmapReplacement hook); +int MallocHook_RemoveMmapReplacement(MallocHook_MmapReplacement hook); typedef void (*MallocHook_MunmapHook)(const void* ptr, size_t size); -MallocHook_MunmapHook MallocHook_SetMunmapHook(MallocHook_MunmapHook hook); +PERFTOOLS_DLL_DECL +int MallocHook_AddMunmapHook(MallocHook_MunmapHook hook); +PERFTOOLS_DLL_DECL +int MallocHook_RemoveMunmapHook(MallocHook_MunmapHook hook); + +typedef int (*MallocHook_MunmapReplacement)(const void* ptr, + size_t size, + int* result); +int MallocHook_SetMunmapReplacement(MallocHook_MunmapReplacement hook); +int MallocHook_RemoveMunmapReplacement(MallocHook_MunmapReplacement hook); typedef void (*MallocHook_MremapHook)(const void* result, const void* old_addr, @@ -82,12 +130,44 @@ typedef void (*MallocHook_MremapHook)(const void* result, size_t new_size, int flags, const void* new_addr); -MallocHook_MremapHook MallocHook_SetMremapHook(MallocHook_MremapHook hook); +PERFTOOLS_DLL_DECL +int MallocHook_AddMremapHook(MallocHook_MremapHook hook); +PERFTOOLS_DLL_DECL +int MallocHook_RemoveMremapHook(MallocHook_MremapHook hook); typedef void (*MallocHook_PreSbrkHook)(std::ptrdiff_t increment); -MallocHook_PreSbrkHook MallocHook_SetPreSbrkHook(MallocHook_PreSbrkHook hook); +PERFTOOLS_DLL_DECL +int MallocHook_AddPreSbrkHook(MallocHook_PreSbrkHook hook); +PERFTOOLS_DLL_DECL +int MallocHook_RemovePreSbrkHook(MallocHook_PreSbrkHook hook); typedef void (*MallocHook_SbrkHook)(const void* result, std::ptrdiff_t increment); +PERFTOOLS_DLL_DECL +int MallocHook_AddSbrkHook(MallocHook_SbrkHook hook); +PERFTOOLS_DLL_DECL +int MallocHook_RemoveSbrkHook(MallocHook_SbrkHook hook); + +/* The following are DEPRECATED. */ +PERFTOOLS_DLL_DECL +MallocHook_NewHook MallocHook_SetNewHook(MallocHook_NewHook hook); +PERFTOOLS_DLL_DECL +MallocHook_DeleteHook MallocHook_SetDeleteHook(MallocHook_DeleteHook hook); +PERFTOOLS_DLL_DECL +MallocHook_PreMmapHook MallocHook_SetPreMmapHook(MallocHook_PreMmapHook hook); +PERFTOOLS_DLL_DECL +MallocHook_MmapHook MallocHook_SetMmapHook(MallocHook_MmapHook hook); +PERFTOOLS_DLL_DECL +MallocHook_MunmapHook MallocHook_SetMunmapHook(MallocHook_MunmapHook hook); +PERFTOOLS_DLL_DECL +MallocHook_MremapHook MallocHook_SetMremapHook(MallocHook_MremapHook hook); +PERFTOOLS_DLL_DECL +MallocHook_PreSbrkHook MallocHook_SetPreSbrkHook(MallocHook_PreSbrkHook hook); +PERFTOOLS_DLL_DECL MallocHook_SbrkHook MallocHook_SetSbrkHook(MallocHook_SbrkHook hook); +/* End of DEPRECATED functions. */ + +#ifdef __cplusplus +} // extern "C" +#endif #endif /* _MALLOC_HOOK_C_H_ */ diff --git a/third_party/tcmalloc/chromium/src/google/profiler.h b/third_party/tcmalloc/chromium/src/google/profiler.h index 74b936f..07323e4 100644 --- a/third_party/tcmalloc/chromium/src/google/profiler.h +++ b/third_party/tcmalloc/chromium/src/google/profiler.h @@ -155,7 +155,7 @@ struct ProfilerState { int enabled; /* Is profiling currently enabled? */ time_t start_time; /* If enabled, when was profiling started? */ char profile_name[1024]; /* Name of profile file being written, or '\0' */ - int samples_gathered; /* Number of samples gatheered to far (or 0) */ + int samples_gathered; /* Number of samples gathered so far (or 0) */ }; PERFTOOLS_DLL_DECL void ProfilerGetCurrentState(struct ProfilerState* state); diff --git a/third_party/tcmalloc/chromium/src/google/tcmalloc.h.in b/third_party/tcmalloc/chromium/src/google/tcmalloc.h.in index fbb70ab..c887559 100644 --- a/third_party/tcmalloc/chromium/src/google/tcmalloc.h.in +++ b/third_party/tcmalloc/chromium/src/google/tcmalloc.h.in @@ -35,6 +35,11 @@ #ifndef TCMALLOC_TCMALLOC_H_ #define TCMALLOC_TCMALLOC_H_ +#include <stddef.h> // for size_t +#ifdef HAVE_SYS_CDEFS_H +#include <sys/cdefs.h> // where glibc defines __THROW +#endif + // __THROW is defined in glibc systems. It means, counter-intuitively, // "This function will never throw an exception." It's an optional // optimization tool, but we may need to use it to match glibc prototypes. @@ -60,7 +65,9 @@ #endif #ifdef __cplusplus -#include <new> // for std::nothrow_t +namespace std { +struct nothrow_t; +} extern "C" { #endif @@ -89,6 +96,13 @@ extern "C" { PERFTOOLS_DLL_DECL struct mallinfo tc_mallinfo(void) __THROW; #endif + // This is an alias for MallocExtension::instance()->GetAllocatedSize(). + // It is equivalent to + // OS X: malloc_size() + // glibc: malloc_usable_size() + // Windows: _msize() + PERFTOOLS_DLL_DECL size_t tc_malloc_size(void* ptr) __THROW; + #ifdef __cplusplus PERFTOOLS_DLL_DECL int tc_set_new_mode(int flag) __THROW; PERFTOOLS_DLL_DECL void* tc_new(size_t size); diff --git a/third_party/tcmalloc/chromium/src/heap-checker-bcad.cc b/third_party/tcmalloc/chromium/src/heap-checker-bcad.cc index 87d3d87..82a3109 100644 --- a/third_party/tcmalloc/chromium/src/heap-checker-bcad.cc +++ b/third_party/tcmalloc/chromium/src/heap-checker-bcad.cc @@ -51,7 +51,6 @@ // sure this file is not optimized out by the linker. bool heap_leak_checker_bcad_variable; -extern void HeapLeakChecker_BeforeConstructors(); // in heap-checker.cc extern void HeapLeakChecker_AfterDestructors(); // in heap-checker.cc // A helper class to ensure that some components of heap leak checking @@ -61,7 +60,12 @@ class HeapLeakCheckerGlobalPrePost { public: HeapLeakCheckerGlobalPrePost() { if (count_ == 0) { - HeapLeakChecker_BeforeConstructors(); + // The 'new int' will ensure that we have run an initial malloc + // hook, which will set up the heap checker via + // MallocHook_InitAtFirstAllocation_HeapLeakChecker. See malloc_hook.cc. + // This is done in this roundabout fashion in order to avoid self-deadlock + // if we directly called HeapLeakChecker_BeforeConstructors here. + delete new int; // This needs to be called before the first allocation of an STL // object, but after libc is done setting up threads (because it // calls setenv, which requires a thread-aware errno). By diff --git a/third_party/tcmalloc/chromium/src/heap-checker.cc b/third_party/tcmalloc/chromium/src/heap-checker.cc index a04be8f..5d39a47 100644 --- a/third_party/tcmalloc/chromium/src/heap-checker.cc +++ b/third_party/tcmalloc/chromium/src/heap-checker.cc @@ -106,6 +106,28 @@ using std::max; using std::less; using std::char_traits; +// If current process is being ptrace()d, 'TracerPid' in /proc/self/status +// will be non-zero. +static bool IsDebuggerAttached(void) { // only works under linux, probably + char buf[256]; // TracerPid comes relatively earlier in status output + int fd = open("/proc/self/status", O_RDONLY); + if (fd == -1) { + return false; // Can't tell for sure. + } + const int len = read(fd, buf, sizeof(buf)); + bool rc = false; + if (len > 0) { + const char *const kTracerPid = "TracerPid:\t"; + buf[len - 1] = '\0'; + const char *p = strstr(buf, kTracerPid); + if (p != NULL) { + rc = (strncmp(p + strlen(kTracerPid), "0\n", 2) != 0); + } + } + close(fd); + return rc; +} + // This is the default if you don't link in -lprofiler extern "C" { ATTRIBUTE_WEAK PERFTOOLS_DLL_DECL bool ProfilingIsEnabledForAllThreads(); @@ -163,12 +185,6 @@ DEFINE_bool(heap_check_test_pointer_alignment, // use 1 if any alignment is ok. // heap_check_test_pointer_alignment flag guides if we try the value of 1. // The larger it can be, the lesser is the chance of missing real leaks. -// -// sizeof(void)* is correct. However gold (the new linker) has a bug where it -// sometimes places global pointers on 4-byte boundaries, even when pointers -// are 8 bytes long. While we are fixing the linker, degrade to 4-byte -// alignment on all targets. http://b/1226481 -// static const size_t kPointerSourceAlignment = sizeof(void*); DEFINE_int32(heap_check_pointer_source_alignment, EnvToInt("HEAP_CHECK_POINTER_SOURCE_ALIGNMENT", @@ -304,6 +320,9 @@ class HeapLeakChecker::Allocator { if (p) alloc_count_ -= 1; LowLevelAlloc::Free(p); } + static void Free(void* p, size_t /* n */) { + Free(p); + } // destruct, free, and make *p to be NULL template<typename T> static void DeleteAndNull(T** p) { (*p)->~T(); @@ -795,7 +814,7 @@ void HeapLeakChecker::DisableLibraryAllocsLocked(const char* library, // pthread_setspecific (which can be the only pointer to a heap object). IsLibraryNamed(library, "/libdl") || // library loaders leak some "system" heap that we don't care about - IsLibraryNamed(library, "/libcrypto") + IsLibraryNamed(library, "/libcrypto") || // Sometimes libcrypto of OpenSSH is compiled with -fomit-frame-pointer // (any library can be, of course, but this one often is because speed // is so important for making crypto usable). We ignore all its @@ -803,6 +822,10 @@ void HeapLeakChecker::DisableLibraryAllocsLocked(const char* library, // to ignore allocations done in files/symbols that match // "default_malloc_ex|default_realloc_ex" // but that doesn't work when the end-result binary is stripped. + IsLibraryNamed(library, "/libjvm") || + // JVM has a lot of leaks we don't care about. + IsLibraryNamed(library, "/libzip") + // The JVM leaks java.util.zip.Inflater after loading classes. ) { depth = 1; // only disable allocation calls directly from the library code } else if (IsLibraryNamed(library, "/ld") @@ -859,6 +882,8 @@ HeapLeakChecker::ProcMapsResult HeapLeakChecker::UseProcMapsLocked( int64 inode; char *permissions, *filename; bool saw_shared_lib = false; + bool saw_nonzero_inode = false; + bool saw_shared_lib_with_nonzero_inode = false; while (it.Next(&start_address, &end_address, &permissions, &file_offset, &inode, &filename)) { if (start_address >= end_address) { @@ -874,10 +899,25 @@ HeapLeakChecker::ProcMapsResult HeapLeakChecker::UseProcMapsLocked( // do things in this loop. continue; } - // Determine if any shared libraries are present. - if (inode != 0 && strstr(filename, "lib") && strstr(filename, ".so")) { + // Determine if any shared libraries are present (this is the same + // list of extensions as is found in pprof). We want to ignore + // 'fake' libraries with inode 0 when determining. However, some + // systems don't share inodes via /proc, so we turn off this check + // if we don't see any evidence that we're getting inode info. + if (inode != 0) { + saw_nonzero_inode = true; + } + if ((strstr(filename, "lib") && strstr(filename, ".so")) || + strstr(filename, ".dll") || + // not all .dylib filenames start with lib. .dylib is big enough + // that we are unlikely to get false matches just checking that. + strstr(filename, ".dylib") || strstr(filename, ".bundle")) { saw_shared_lib = true; + if (inode != 0) { + saw_shared_lib_with_nonzero_inode = true; + } } + switch (proc_maps_task) { case DISABLE_LIBRARY_ALLOCS: // All lines starting like @@ -895,6 +935,12 @@ HeapLeakChecker::ProcMapsResult HeapLeakChecker::UseProcMapsLocked( RAW_CHECK(0, ""); } } + // If /proc/self/maps is reporting inodes properly (we saw a + // non-zero inode), then we only say we saw a shared lib if we saw a + // 'real' one, with a non-zero inode. + if (saw_nonzero_inode) { + saw_shared_lib = saw_shared_lib_with_nonzero_inode; + } if (!saw_shared_lib) { RAW_LOG(ERROR, "No shared libs detected. Will likely report false leak " "positives for statically linked executables."); @@ -962,7 +1008,8 @@ static enum { // specially via self_thread_stack, not here: if (thread_pids[i] == self_thread_pid) continue; RAW_VLOG(11, "Handling thread with pid %d", thread_pids[i]); -#if defined(HAVE_LINUX_PTRACE_H) && defined(HAVE_SYS_SYSCALL_H) && defined(DUMPER) +#if (defined(__i386__) || defined(__x86_64)) && \ + defined(HAVE_LINUX_PTRACE_H) && defined(HAVE_SYS_SYSCALL_H) && defined(DUMPER) i386_regs thread_regs; #define sys_ptrace(r, p, a, d) syscall(SYS_ptrace, (r), (p), (a), (d)) // We use sys_ptrace to avoid thread locking @@ -1633,6 +1680,13 @@ bool HeapLeakChecker::DoNoLeaks(ShouldSymbolize should_symbolize) { return true; } + // Update global_region_caller_ranges. They may need to change since + // e.g. initialization because shared libraries might have been loaded or + // unloaded. + Allocator::DeleteAndNullIfNot(&global_region_caller_ranges); + ProcMapsResult pm_result = UseProcMapsLocked(DISABLE_LIBRARY_ALLOCS); + RAW_CHECK(pm_result == PROC_MAPS_USED, ""); + // Keep track of number of internally allocated objects so we // can detect leaks in the heap-leak-checket itself const int initial_allocs = Allocator::alloc_count(); @@ -1645,18 +1699,6 @@ bool HeapLeakChecker::DoNoLeaks(ShouldSymbolize should_symbolize) { MemoryRegionMap::LockHolder ml; int a_local_var; // Use our stack ptr to make stack data live: - // Sanity check that nobody is messing with the hooks we need: - // Important to have it here: else we can misteriously SIGSEGV - // in IgnoreLiveObjectsLocked inside ListAllProcessThreads's callback - // by looking into a region that got unmapped w/o our knowledge. - MemoryRegionMap::CheckMallocHooks(); - if (MallocHook::GetNewHook() != NewHook || - MallocHook::GetDeleteHook() != DeleteHook) { - RAW_LOG(FATAL, "Had our new/delete MallocHook-s replaced. " - "Are you using another MallocHook client? " - "Use --heap_check=\"\" to avoid this conflict."); - } - // Make the heap profile, other threads are locked out. HeapProfileTable::Snapshot* base = reinterpret_cast<HeapProfileTable::Snapshot*>(start_snapshot_); @@ -1833,9 +1875,6 @@ void HeapCleaner::RunHeapCleanups() { if (!FLAGS_heap_check_after_destructors) DoMainHeapCheck(); } -// defined below -static int GetCommandLineFrom(const char* file, char* cmdline, int size); - static bool internal_init_start_has_run = false; // Called exactly once, before main() (but hopefully just before). @@ -1859,29 +1898,20 @@ static bool internal_init_start_has_run = false; // turns out we do not need checking in the end; can stop profiling TurnItselfOffLocked(); return; + } else if (RunningOnValgrind()) { + // There is no point in trying -- we'll just fail. + RAW_LOG(WARNING, "Can't run under Valgrind; will turn itself off"); + TurnItselfOffLocked(); + return; } } // Changing this to false can be useful when debugging heap-checker itself: - if (!FLAGS_heap_check_run_under_gdb) { - // See if heap checker should turn itself off because we are - // running under gdb (to avoid conflicts over ptrace-ing rights): - char name_buf[15+15]; - snprintf(name_buf, sizeof(name_buf), - "/proc/%d/cmdline", static_cast<int>(getppid())); - char cmdline[1024*8]; // /proc/*/cmdline is at most 4Kb anyway usually - int size = GetCommandLineFrom(name_buf, cmdline, sizeof(cmdline)-1); - cmdline[size] = '\0'; - // look for "gdb" in the executable's name: - const char* last = strrchr(cmdline, '/'); - if (last) last += 1; - else last = cmdline; - if (strncmp(last, "gdb", 3) == 0) { - RAW_LOG(WARNING, "We seem to be running under gdb; will turn itself off"); - SpinLockHolder l(&heap_checker_lock); - TurnItselfOffLocked(); - return; - } + if (!FLAGS_heap_check_run_under_gdb && IsDebuggerAttached()) { + RAW_LOG(WARNING, "Someone is ptrace()ing us; will turn itself off"); + SpinLockHolder l(&heap_checker_lock); + TurnItselfOffLocked(); + return; } { SpinLockHolder l(&heap_checker_lock); @@ -2084,98 +2114,15 @@ void HeapLeakChecker::CancelGlobalCheck() { } } -//---------------------------------------------------------------------- -// HeapLeakChecker global constructor/destructor ordering components -//---------------------------------------------------------------------- - -static bool in_initial_malloc_hook = false; - -#ifdef HAVE___ATTRIBUTE__ // we need __attribute__((weak)) for this to work -#define INSTALLED_INITIAL_MALLOC_HOOKS - -void HeapLeakChecker_BeforeConstructors(); // below - -// Helper for InitialMallocHook_* below -static inline void InitHeapLeakCheckerFromMallocHook() { - { SpinLockHolder l(&heap_checker_lock); - RAW_CHECK(!in_initial_malloc_hook, - "Something did not reset initial MallocHook-s"); - in_initial_malloc_hook = true; - } - // Initialize heap checker on the very first allocation/mmap/sbrk call: - HeapLeakChecker_BeforeConstructors(); - { SpinLockHolder l(&heap_checker_lock); - in_initial_malloc_hook = false; - } -} - -// These will owerwrite the weak definitions in malloc_hook.cc: - -// Important to have this to catch the first allocation call from the binary: -extern void InitialMallocHook_New(const void* ptr, size_t size) { - InitHeapLeakCheckerFromMallocHook(); - // record this first allocation as well (if we need to): - MallocHook::InvokeNewHook(ptr, size); -} - -// Important to have this to catch the first mmap call (say from tcmalloc): -extern void InitialMallocHook_MMap(const void* result, - const void* start, - size_t size, - int protection, - int flags, - int fd, - off_t offset) { - InitHeapLeakCheckerFromMallocHook(); - // record this first mmap as well (if we need to): - MallocHook::InvokeMmapHook( - result, start, size, protection, flags, fd, offset); -} - -// Important to have this to catch the first sbrk call (say from tcmalloc): -extern void InitialMallocHook_Sbrk(const void* result, std::ptrdiff_t increment) { - InitHeapLeakCheckerFromMallocHook(); - // record this first sbrk as well (if we need to): - MallocHook::InvokeSbrkHook(result, increment); -} - -// static -void CancelInitialMallocHooks() { - if (MallocHook::GetNewHook() == InitialMallocHook_New) { - MallocHook::SetNewHook(NULL); - } - RAW_DCHECK(MallocHook::GetNewHook() == NULL, ""); - if (MallocHook::GetMmapHook() == InitialMallocHook_MMap) { - MallocHook::SetMmapHook(NULL); - } - RAW_DCHECK(MallocHook::GetMmapHook() == NULL, ""); - if (MallocHook::GetSbrkHook() == InitialMallocHook_Sbrk) { - MallocHook::SetSbrkHook(NULL); - } - RAW_DCHECK(MallocHook::GetSbrkHook() == NULL, ""); -} - -#else - -// static -void CancelInitialMallocHooks() {} - -#endif - // static void HeapLeakChecker::BeforeConstructorsLocked() { RAW_DCHECK(heap_checker_lock.IsHeld(), ""); RAW_CHECK(!constructor_heap_profiling, "BeforeConstructorsLocked called multiple times"); - CancelInitialMallocHooks(); // Set hooks early to crash if 'new' gets called before we make heap_profile, // and make sure no other hooks existed: - if (MallocHook::SetNewHook(NewHook) != NULL || - MallocHook::SetDeleteHook(DeleteHook) != NULL) { - RAW_LOG(FATAL, "Had other new/delete MallocHook-s set. " - "Somehow leak checker got activated " - "after something else have set up these hooks."); - } + RAW_CHECK(MallocHook::AddNewHook(&NewHook), ""); + RAW_CHECK(MallocHook::AddDeleteHook(&DeleteHook), ""); constructor_heap_profiling = true; MemoryRegionMap::Init(1); // Set up MemoryRegionMap with (at least) one caller stack frame to record @@ -2198,12 +2145,9 @@ void HeapLeakChecker::TurnItselfOffLocked() { RAW_CHECK(heap_checker_on, ""); RAW_VLOG(heap_checker_info_level, "Turning perftools heap leak checking off"); heap_checker_on = false; - // Unset our hooks checking they were the ones set: - if (MallocHook::SetNewHook(NULL) != NewHook || - MallocHook::SetDeleteHook(NULL) != DeleteHook) { - RAW_LOG(FATAL, "Had our new/delete MallocHook-s replaced. " - "Are you using another MallocHook client?"); - } + // Unset our hooks checking they were set: + RAW_CHECK(MallocHook::RemoveNewHook(&NewHook), ""); + RAW_CHECK(MallocHook::RemoveDeleteHook(&DeleteHook), ""); Allocator::DeleteAndNull(&heap_profile); // free our optional global data: Allocator::DeleteAndNullIfNot(&ignored_objects); @@ -2215,57 +2159,13 @@ void HeapLeakChecker::TurnItselfOffLocked() { RAW_CHECK(!heap_checker_on, ""); } -// Read in the command line from 'file' into 'cmdline' and return the size read -// 'size' is the space available in 'cmdline'. -// We need this because we don't yet have argv/argc. -// CAVEAT: 'file' (some /proc/*/cmdline) usually contains the command line -// already truncated (to 4K on Linux). -// Arguments in cmdline will be '\0'-terminated, -// the first one will be the binary's name. -static int GetCommandLineFrom(const char* file, char* cmdline, int size) { - // This routine is only used to check if we're running under gdb, so - // it's ok if this #if fails and the routine is a no-op. - // - // This function is called before memory allocation hooks are set up - // so we must not have any memory allocations in it. We use syscall - // versions of open/read/close here because we don't trust the non-syscall - // versions: they might 'accidentally' cause a memory allocation. - // Here's a real-life problem scenario we had: - // 1) A program LD_PRELOADed a library called list_file_used.a - // 2) list_file_used intercepted open/read/close and called dlsym() - // 3) dlsym() called pthread_setspecific() which called malloc(). - // This malloced memory is 'hidden' from the heap-checker. By - // definition, this thread-local data is live, and everything it points - // to is live (not a memory leak) as well. But because this memory - // was hidden from the heap-checker, everything it points to was - // taken to be orphaned, and therefore, a memory leak. -#if defined(_WIN32) || defined(__CYGWIN__) || defined(__CYGWIN32__) || defined(__MINGW32__) - // Use a win32 call to get the command line. - const char* command_line = ::GetCommandLine(); - strncpy(cmdline, command_line, size); - cmdline[size - 1] = '\0'; - return strlen(cmdline); -#elif defined(HAVE_SYS_SYSCALL_H) - int fd = syscall(SYS_open, file, O_RDONLY); - int result = 0; - if (fd >= 0) { - ssize_t r; - while ((r = syscall(SYS_read, fd, cmdline + result, size)) > 0) { - result += r; - size -= r; - } - syscall(SYS_close, fd); - } - return result; -#else - return 0; -#endif -} - extern bool heap_leak_checker_bcad_variable; // in heap-checker-bcad.cc static bool has_called_before_constructors = false; +// TODO(maxim): inline this function with +// MallocHook_InitAtFirstAllocation_HeapLeakChecker, and also rename +// HeapLeakChecker::BeforeConstructorsLocked. void HeapLeakChecker_BeforeConstructors() { SpinLockHolder l(&heap_checker_lock); // We can be called from several places: the first mmap/sbrk/alloc call @@ -2304,11 +2204,19 @@ void HeapLeakChecker_BeforeConstructors() { #endif if (need_heap_check) { HeapLeakChecker::BeforeConstructorsLocked(); - } else { // cancel our initial hooks - CancelInitialMallocHooks(); } } +// This function overrides the weak function defined in malloc_hook.cc and +// called by one of the initial malloc hooks (malloc_hook.cc) when the very +// first memory allocation or an mmap/sbrk happens. This ensures that +// HeapLeakChecker is initialized and installs all its hooks early enough to +// track absolutely all memory allocations and all memory region acquisitions +// via mmap and sbrk. +extern "C" void MallocHook_InitAtFirstAllocation_HeapLeakChecker() { + HeapLeakChecker_BeforeConstructors(); +} + // This function is executed after all global object destructors run. void HeapLeakChecker_AfterDestructors() { { SpinLockHolder l(&heap_checker_lock); diff --git a/third_party/tcmalloc/chromium/src/heap-profile-table.cc b/third_party/tcmalloc/chromium/src/heap-profile-table.cc index ecaf75f..6d75c4a 100644 --- a/third_party/tcmalloc/chromium/src/heap-profile-table.cc +++ b/third_party/tcmalloc/chromium/src/heap-profile-table.cc @@ -342,7 +342,8 @@ int HeapProfileTable::FillOrderedProfile(char buf[], int size) const { // any gaps. Whew! int map_length = snprintf(buf, size, "%s", kProcSelfMapsHeader); if (map_length < 0 || map_length >= size) return 0; - map_length += FillProcSelfMaps(buf + map_length, size - map_length); + bool dummy; // "wrote_all" -- did /proc/self/maps fit in its entirety? + map_length += FillProcSelfMaps(buf + map_length, size - map_length, &dummy); RAW_DCHECK(map_length <= size, ""); char* const map_start = buf + size - map_length; // move to end memmove(map_start, buf, map_length); diff --git a/third_party/tcmalloc/chromium/src/heap-profiler.cc b/third_party/tcmalloc/chromium/src/heap-profiler.cc index 4eb46e2..cb34f01 100644 --- a/third_party/tcmalloc/chromium/src/heap-profiler.cc +++ b/third_party/tcmalloc/chromium/src/heap-profiler.cc @@ -33,7 +33,7 @@ // TODO: Log large allocations #include <config.h> - +#include <stddef.h> #include <stdio.h> #include <stdlib.h> #ifdef HAVE_UNISTD_H @@ -182,7 +182,6 @@ enum AddOrRemove { ADD, REMOVE }; static void AddRemoveMMapDataLocked(AddOrRemove mode) { RAW_DCHECK(heap_lock.IsHeld(), ""); if (!FLAGS_mmap_profile || !is_on) return; - if (!FLAGS_mmap_log) MemoryRegionMap::CheckMallocHooks(); // MemoryRegionMap maintained all the data we need for all // mmap-like allocations, so we just use it here: MemoryRegionMap::LockHolder l; @@ -210,6 +209,7 @@ static char* DoGetHeapProfileLocked(char* buf, int buflen) { int bytes_written = 0; if (is_on) { HeapProfileTable::Stats const stats = heap_profile->total(); + (void)stats; // avoid an unused-variable warning in non-debug mode. AddRemoveMMapDataLocked(ADD); bytes_written = heap_profile->FillOrderedProfile(buf, buflen - 1); // FillOrderedProfile should not reduce the set of active mmap-ed regions, @@ -244,15 +244,6 @@ static void DumpProfileLocked(const char* reason) { if (filename_prefix == NULL) return; // we do not yet need dumping - if (FLAGS_only_mmap_profile == false) { - if (MallocHook::GetNewHook() != NewHook || - MallocHook::GetDeleteHook() != DeleteHook) { - RAW_LOG(FATAL, "Had our new/delete MallocHook-s replaced. " - "Are you using another MallocHook client? " - "Do not use --heap_profile=... to avoid this conflict."); - } - } - dumping = true; // Make file name @@ -371,12 +362,6 @@ static void RawInfoStackDumper(const char* message, void*) { } #endif -// Saved MemoryRegionMap's hooks to daisy-chain calls to. -MallocHook::MmapHook saved_mmap_hook = NULL; -MallocHook::MremapHook saved_mremap_hook = NULL; -MallocHook::MunmapHook saved_munmap_hook = NULL; -MallocHook::SbrkHook saved_sbrk_hook = NULL; - static void MmapHook(const void* result, const void* start, size_t size, int prot, int flags, int fd, off_t offset) { if (FLAGS_mmap_log) { // log it @@ -392,11 +377,6 @@ static void MmapHook(const void* result, const void* start, size_t size, DumpStackTrace(1, RawInfoStackDumper, NULL); #endif } - if (saved_mmap_hook) { - // Call MemoryRegionMap's hook: it will record needed info about the mmap - // for us w/o deadlocks: - (*saved_mmap_hook)(result, start, size, prot, flags, fd, offset); - } } static void MremapHook(const void* result, const void* old_addr, @@ -416,9 +396,6 @@ static void MremapHook(const void* result, const void* old_addr, DumpStackTrace(1, RawInfoStackDumper, NULL); #endif } - if (saved_mremap_hook) { // call MemoryRegionMap's hook - (*saved_mremap_hook)(result, old_addr, old_size, new_size, flags, new_addr); - } } static void MunmapHook(const void* ptr, size_t size) { @@ -432,9 +409,6 @@ static void MunmapHook(const void* ptr, size_t size) { DumpStackTrace(1, RawInfoStackDumper, NULL); #endif } - if (saved_munmap_hook) { // call MemoryRegionMap's hook - (*saved_munmap_hook)(ptr, size); - } } static void SbrkHook(const void* result, std::ptrdiff_t increment) { @@ -445,9 +419,6 @@ static void SbrkHook(const void* result, std::ptrdiff_t increment) { DumpStackTrace(1, RawInfoStackDumper, NULL); #endif } - if (saved_sbrk_hook) { // call MemoryRegionMap's hook - (*saved_sbrk_hook)(result, increment); - } } //---------------------------------------------------------------------- @@ -478,12 +449,11 @@ extern "C" void HeapProfilerStart(const char* prefix) { } if (FLAGS_mmap_log) { - // Install our hooks to do the logging - // and maybe save MemoryRegionMap's hooks to call: - saved_mmap_hook = MallocHook::SetMmapHook(MmapHook); - saved_mremap_hook = MallocHook::SetMremapHook(MremapHook); - saved_munmap_hook = MallocHook::SetMunmapHook(MunmapHook); - saved_sbrk_hook = MallocHook::SetSbrkHook(SbrkHook); + // Install our hooks to do the logging: + RAW_CHECK(MallocHook::AddMmapHook(&MmapHook), ""); + RAW_CHECK(MallocHook::AddMremapHook(&MremapHook), ""); + RAW_CHECK(MallocHook::AddMunmapHook(&MunmapHook), ""); + RAW_CHECK(MallocHook::AddSbrkHook(&SbrkHook), ""); } heap_profiler_memory = @@ -506,14 +476,9 @@ extern "C" void HeapProfilerStart(const char* prefix) { // sequence of profiles. if (FLAGS_only_mmap_profile == false) { - // Now set the hooks that capture new/delete and malloc/free - // and check that these are the only hooks: - if (MallocHook::SetNewHook(NewHook) != NULL || - MallocHook::SetDeleteHook(DeleteHook) != NULL) { - RAW_LOG(FATAL, "Had other new/delete MallocHook-s set. " - "Are you using the heap leak checker? " - "Use --heap_check=\"\" to avoid this conflict."); - } + // Now set the hooks that capture new/delete and malloc/free. + RAW_CHECK(MallocHook::AddNewHook(&NewHook), ""); + RAW_CHECK(MallocHook::AddDeleteHook(&DeleteHook), ""); } // Copy filename prefix @@ -535,24 +500,16 @@ extern "C" void HeapProfilerStop() { if (!is_on) return; if (FLAGS_only_mmap_profile == false) { - // Unset our new/delete hooks, checking they were the ones set: - if (MallocHook::SetNewHook(NULL) != NewHook || - MallocHook::SetDeleteHook(NULL) != DeleteHook) { - RAW_LOG(FATAL, "Had our new/delete MallocHook-s replaced. " - "Are you using another MallocHook client? " - "Do not use --heap_profile=... to avoid this conflict."); - } + // Unset our new/delete hooks, checking they were set: + RAW_CHECK(MallocHook::RemoveNewHook(&NewHook), ""); + RAW_CHECK(MallocHook::RemoveDeleteHook(&DeleteHook), ""); } if (FLAGS_mmap_log) { - // Restore mmap/sbrk hooks, checking that our hooks were the ones set: - if (MallocHook::SetMmapHook(saved_mmap_hook) != MmapHook || - MallocHook::SetMremapHook(saved_mremap_hook) != MremapHook || - MallocHook::SetMunmapHook(saved_munmap_hook) != MunmapHook || - MallocHook::SetSbrkHook(saved_sbrk_hook) != SbrkHook) { - RAW_LOG(FATAL, "Had our mmap/mremap/munmap/sbrk MallocHook-s replaced. " - "Are you using another MallocHook client? " - "Do not use --heap_profile=... to avoid this conflict."); - } + // Restore mmap/sbrk hooks, checking that our hooks were set: + RAW_CHECK(MallocHook::RemoveMmapHook(&MmapHook), ""); + RAW_CHECK(MallocHook::RemoveMremapHook(&MremapHook), ""); + RAW_CHECK(MallocHook::RemoveSbrkHook(&SbrkHook), ""); + RAW_CHECK(MallocHook::RemoveMunmapHook(&MunmapHook), ""); } // free profile diff --git a/third_party/tcmalloc/chromium/src/internal_logging.cc b/third_party/tcmalloc/chromium/src/internal_logging.cc index ea8e56f1..4c90190 100644 --- a/third_party/tcmalloc/chromium/src/internal_logging.cc +++ b/third_party/tcmalloc/chromium/src/internal_logging.cc @@ -31,14 +31,18 @@ // Sanjay Ghemawat <opensource@google.com> #include <config.h> -#include <stdio.h> -#include <stdarg.h> +#include "internal_logging.h" +#include <stdarg.h> // for va_end, va_start +#include <stdio.h> // for vsnprintf, va_list, etc +#include <stdlib.h> // for abort +#include <string.h> // for strlen, memcpy #ifdef HAVE_UNISTD_H #include <unistd.h> // for write() #endif -#include <string.h> + #include <google/malloc_extension.h> -#include "internal_logging.h" +#include "base/logging.h" // for perftools_vsnprintf +#include "base/spinlock.h" // for SpinLockHolder, SpinLock static const int kLogBufSize = 800; @@ -50,7 +54,7 @@ void TCMalloc_MESSAGE(const char* filename, if (n < kLogBufSize) { va_list ap; va_start(ap, format); - vsnprintf(buf + n, kLogBufSize - n, format, ap); + perftools_vsnprintf(buf + n, kLogBufSize - n, format, ap); va_end(ap); } write(STDERR_FILENO, buf, strlen(buf)); @@ -66,7 +70,7 @@ static void TCMalloc_CRASH_internal(bool dump_stats, char buf[kLogBufSize]; const int n = snprintf(buf, sizeof(buf), "%s:%d] ", filename, line_number); if (n < kLogBufSize) { - vsnprintf(buf + n, kLogBufSize - n, format, ap); + perftools_vsnprintf(buf + n, kLogBufSize - n, format, ap); } write(STDERR_FILENO, buf, strlen(buf)); if (dump_stats) { @@ -99,7 +103,7 @@ void TCMalloc_Printer::printf(const char* format, ...) { if (left_ > 0) { va_list ap; va_start(ap, format); - const int r = vsnprintf(buf_, left_, format, ap); + const int r = perftools_vsnprintf(buf_, left_, format, ap); va_end(ap); if (r < 0) { // Perhaps an old glibc that returns -1 on truncation? diff --git a/third_party/tcmalloc/chromium/src/internal_logging.h b/third_party/tcmalloc/chromium/src/internal_logging.h index 0cb9ba2..ce4a516 100644 --- a/third_party/tcmalloc/chromium/src/internal_logging.h +++ b/third_party/tcmalloc/chromium/src/internal_logging.h @@ -36,10 +36,7 @@ #define TCMALLOC_INTERNAL_LOGGING_H_ #include <config.h> -#include <stdlib.h> // for abort() -#ifdef HAVE_UNISTD_H -#include <unistd.h> // for write() -#endif +#include <stddef.h> // for size_t //------------------------------------------------------------------- // Utility routines diff --git a/third_party/tcmalloc/chromium/src/linked_list.h b/third_party/tcmalloc/chromium/src/linked_list.h index 638174b..4b0af1b 100644 --- a/third_party/tcmalloc/chromium/src/linked_list.h +++ b/third_party/tcmalloc/chromium/src/linked_list.h @@ -36,6 +36,8 @@ #ifndef TCMALLOC_LINKED_LIST_H_ #define TCMALLOC_LINKED_LIST_H_ +#include <stddef.h> + namespace tcmalloc { inline void *SLL_Next(void *t) { diff --git a/third_party/tcmalloc/chromium/src/malloc_extension.cc b/third_party/tcmalloc/chromium/src/malloc_extension.cc index c2f8b54..e9a0da7 100644 --- a/third_party/tcmalloc/chromium/src/malloc_extension.cc +++ b/third_party/tcmalloc/chromium/src/malloc_extension.cc @@ -32,7 +32,6 @@ #include <config.h> #include <assert.h> -#include <stdio.h> #include <string.h> #include <stdio.h> #if defined HAVE_STDINT_H @@ -52,6 +51,7 @@ #include "maybe_threads.h" using STL_NAMESPACE::string; +using STL_NAMESPACE::vector; static void DumpAddressMap(string* result) { *result += "\nMAPPED_LIBRARIES:\n"; @@ -59,9 +59,11 @@ static void DumpAddressMap(string* result) { const size_t old_resultlen = result->size(); for (int amap_size = 10240; amap_size < 10000000; amap_size *= 2) { result->resize(old_resultlen + amap_size); + bool wrote_all = false; const int bytes_written = - tcmalloc::FillProcSelfMaps(&((*result)[old_resultlen]), amap_size); - if (bytes_written < amap_size - 1) { // we fit! + tcmalloc::FillProcSelfMaps(&((*result)[old_resultlen]), amap_size, + &wrote_all); + if (wrote_all) { // we fit! (*result)[old_resultlen + bytes_written] = '\0'; result->resize(old_resultlen + bytes_written); return; @@ -99,6 +101,9 @@ void MallocExtension::Initialize() { #endif /* __GLIBC__ */ } +// SysAllocator implementation +SysAllocator::~SysAllocator() {} + // Default implementation -- does nothing MallocExtension::~MallocExtension() { } bool MallocExtension::VerifyAllMemory() { return true; } @@ -143,6 +148,14 @@ void MallocExtension::MarkThreadBusy() { // Default implementation does nothing } +SysAllocator* MallocExtension::GetSystemAllocator() { + return NULL; +} + +void MallocExtension::SetSystemAllocator(SysAllocator *a) { + // Default implementation does nothing +} + void MallocExtension::ReleaseToSystem(size_t num_bytes) { // Default implementation does nothing } @@ -167,6 +180,11 @@ size_t MallocExtension::GetAllocatedSize(void* p) { return 0; } +void MallocExtension::GetFreeListSizes( + vector<MallocExtension::FreeListInfo>* v) { + v->clear(); +} + // The current malloc extension object. static pthread_once_t module_init = PTHREAD_ONCE_INIT; diff --git a/third_party/tcmalloc/chromium/src/malloc_hook-inl.h b/third_party/tcmalloc/chromium/src/malloc_hook-inl.h index daadf7f..e7bfd61 100644 --- a/third_party/tcmalloc/chromium/src/malloc_hook-inl.h +++ b/third_party/tcmalloc/chromium/src/malloc_hook-inl.h @@ -45,6 +45,7 @@ namespace base { namespace internal { +// The following (implementation) code is DEPRECATED. // A simple atomic pointer class that can be initialized by the linker // when you define a namespace-scope variable as: // @@ -70,8 +71,17 @@ class AtomicPtr { // Sets the contained value to new_val and returns the old value, // atomically, with acquire and release semantics. + // This is a full-barrier instruction. PtrT Exchange(PtrT new_val); + // Atomically executes: + // result = data_ + // if (data_ == old_val) + // data_ = new_val; + // return result; + // This is a full-barrier instruction. + PtrT CompareAndSwap(PtrT old_val, PtrT new_val); + // Not private so that the class is an aggregate and can be // initialized by the linker. Don't access this directly. AtomicWord data_; @@ -86,27 +96,92 @@ extern AtomicPtr<MallocHook::MunmapHook> munmap_hook_; extern AtomicPtr<MallocHook::MremapHook> mremap_hook_; extern AtomicPtr<MallocHook::PreSbrkHook> presbrk_hook_; extern AtomicPtr<MallocHook::SbrkHook> sbrk_hook_; +// End DEPRECATED code. + +// Maximum of 7 hooks means that HookList is 8 words. +static const int kHookListMaxValues = 7; + +// HookList: a class that provides synchronized insertions and removals and +// lockless traversal. Most of the implementation is in malloc_hook.cc. +template <typename T> +struct HookList { + COMPILE_ASSERT(sizeof(T) <= sizeof(AtomicWord), T_should_fit_in_AtomicWord); + + // Adds value to the list. Note that duplicates are allowed. Thread-safe and + // blocking (acquires hooklist_spinlock). Returns true on success; false + // otherwise (failures include invalid value and no space left). + bool Add(T value); + + // Removes the first entry matching value from the list. Thread-safe and + // blocking (acquires hooklist_spinlock). Returns true on success; false + // otherwise (failures include invalid value and no value found). + bool Remove(T value); + + // Store up to n values of the list in output_array, and return the number of + // elements stored. Thread-safe and non-blocking. This is fast (one memory + // access) if the list is empty. + int Traverse(T* output_array, int n) const; + + // Fast inline implementation for fast path of Invoke*Hook. + bool empty() const { + return base::subtle::Acquire_Load(&priv_end) == 0; + } + + // This internal data is not private so that the class is an aggregate and can + // be initialized by the linker. Don't access this directly. Use the + // INIT_HOOK_LIST macro in malloc_hook.cc. + + // One more than the index of the last valid element in priv_data. During + // 'Remove' this may be past the last valid element in priv_data, but + // subsequent values will be 0. + AtomicWord priv_end; + AtomicWord priv_data[kHookListMaxValues]; +}; + +extern HookList<MallocHook::NewHook> new_hooks_; +extern HookList<MallocHook::DeleteHook> delete_hooks_; +extern HookList<MallocHook::PreMmapHook> premmap_hooks_; +extern HookList<MallocHook::MmapHook> mmap_hooks_; +extern HookList<MallocHook::MmapReplacement> mmap_replacement_; +extern HookList<MallocHook::MunmapHook> munmap_hooks_; +extern HookList<MallocHook::MunmapReplacement> munmap_replacement_; +extern HookList<MallocHook::MremapHook> mremap_hooks_; +extern HookList<MallocHook::PreSbrkHook> presbrk_hooks_; +extern HookList<MallocHook::SbrkHook> sbrk_hooks_; } } // namespace base::internal +// The following method is DEPRECATED inline MallocHook::NewHook MallocHook::GetNewHook() { return base::internal::new_hook_.Get(); } inline void MallocHook::InvokeNewHook(const void* p, size_t s) { + if (!base::internal::new_hooks_.empty()) { + InvokeNewHookSlow(p, s); + } + // The following code is DEPRECATED. MallocHook::NewHook hook = MallocHook::GetNewHook(); if (hook != NULL) (*hook)(p, s); + // End DEPRECATED code. } +// The following method is DEPRECATED inline MallocHook::DeleteHook MallocHook::GetDeleteHook() { return base::internal::delete_hook_.Get(); } inline void MallocHook::InvokeDeleteHook(const void* p) { + if (!base::internal::delete_hooks_.empty()) { + InvokeDeleteHookSlow(p); + } + // The following code is DEPRECATED. MallocHook::DeleteHook hook = MallocHook::GetDeleteHook(); if (hook != NULL) (*hook)(p); + // End DEPRECATED code. } +// The following method is DEPRECATED inline MallocHook::PreMmapHook MallocHook::GetPreMmapHook() { return base::internal::premmap_hook_.Get(); } @@ -117,12 +192,18 @@ inline void MallocHook::InvokePreMmapHook(const void* start, int flags, int fd, off_t offset) { + if (!base::internal::premmap_hooks_.empty()) { + InvokePreMmapHookSlow(start, size, protection, flags, fd, offset); + } + // The following code is DEPRECATED. MallocHook::PreMmapHook hook = MallocHook::GetPreMmapHook(); if (hook != NULL) (*hook)(start, size, protection, flags, fd, offset); + // End DEPRECATED code. } +// The following method is DEPRECATED inline MallocHook::MmapHook MallocHook::GetMmapHook() { return base::internal::mmap_hook_.Get(); } @@ -134,22 +215,58 @@ inline void MallocHook::InvokeMmapHook(const void* result, int flags, int fd, off_t offset) { + if (!base::internal::mmap_hooks_.empty()) { + InvokeMmapHookSlow(result, start, size, protection, flags, fd, offset); + } + // The following code is DEPRECATED. MallocHook::MmapHook hook = MallocHook::GetMmapHook(); if (hook != NULL) (*hook)(result, start, size, protection, flags, fd, offset); + // End DEPRECATED code. } +inline bool MallocHook::InvokeMmapReplacement(const void* start, + size_t size, + int protection, + int flags, + int fd, + off_t offset, + void** result) { + if (!base::internal::mmap_replacement_.empty()) { + return InvokeMmapReplacementSlow(start, size, + protection, flags, + fd, offset, + result); + } + return false; +} + +// The following method is DEPRECATED inline MallocHook::MunmapHook MallocHook::GetMunmapHook() { return base::internal::munmap_hook_.Get(); } inline void MallocHook::InvokeMunmapHook(const void* p, size_t size) { + if (!base::internal::munmap_hooks_.empty()) { + InvokeMunmapHookSlow(p, size); + } + // The following code is DEPRECATED. MallocHook::MunmapHook hook = MallocHook::GetMunmapHook(); if (hook != NULL) (*hook)(p, size); + // End DEPRECATED code. } +inline bool MallocHook::InvokeMunmapReplacement( + const void* p, size_t size, int* result) { + if (!base::internal::mmap_replacement_.empty()) { + return InvokeMunmapReplacementSlow(p, size, result); + } + return false; +} + +// The following method is DEPRECATED inline MallocHook::MremapHook MallocHook::GetMremapHook() { return base::internal::mremap_hook_.Get(); } @@ -160,29 +277,46 @@ inline void MallocHook::InvokeMremapHook(const void* result, size_t new_size, int flags, const void* new_addr) { + if (!base::internal::mremap_hooks_.empty()) { + InvokeMremapHookSlow(result, old_addr, old_size, new_size, flags, new_addr); + } + // The following code is DEPRECATED. MallocHook::MremapHook hook = MallocHook::GetMremapHook(); if (hook != NULL) (*hook)(result, old_addr, old_size, new_size, flags, new_addr); + // End DEPRECATED code. } +// The following method is DEPRECATED inline MallocHook::PreSbrkHook MallocHook::GetPreSbrkHook() { return base::internal::presbrk_hook_.Get(); } inline void MallocHook::InvokePreSbrkHook(std::ptrdiff_t increment) { + if (!base::internal::presbrk_hooks_.empty() && increment != 0) { + InvokePreSbrkHookSlow(increment); + } + // The following code is DEPRECATED. MallocHook::PreSbrkHook hook = MallocHook::GetPreSbrkHook(); if (hook != NULL && increment != 0) (*hook)(increment); + // End DEPRECATED code. } +// The following method is DEPRECATED inline MallocHook::SbrkHook MallocHook::GetSbrkHook() { return base::internal::sbrk_hook_.Get(); } inline void MallocHook::InvokeSbrkHook(const void* result, std::ptrdiff_t increment) { + if (!base::internal::sbrk_hooks_.empty() && increment != 0) { + InvokeSbrkHookSlow(result, increment); + } + // The following code is DEPRECATED. MallocHook::SbrkHook hook = MallocHook::GetSbrkHook(); if (hook != NULL && increment != 0) (*hook)(result, increment); + // End DEPRECATED code. } #endif /* _MALLOC_HOOK_INL_H_ */ diff --git a/third_party/tcmalloc/chromium/src/malloc_hook.cc b/third_party/tcmalloc/chromium/src/malloc_hook.cc index dfb34ce..f6af7d8 100644 --- a/third_party/tcmalloc/chromium/src/malloc_hook.cc +++ b/third_party/tcmalloc/chromium/src/malloc_hook.cc @@ -41,9 +41,15 @@ # undef mremap #endif +#include <stddef.h> +#ifdef HAVE_STDINT_H +#include <stdint.h> +#endif #include <algorithm> #include "base/basictypes.h" #include "base/logging.h" +#include "base/spinlock.h" +#include "maybe_threads.h" #include "malloc_hook-inl.h" #include <google/malloc_hook.h> @@ -66,48 +72,93 @@ using std::copy; -// Declarations of three default weak hook functions, that can be overridden by -// linking-in a strong definition (as heap-checker.cc does) -// -// These default hooks let some other library we link in -// to define strong versions of InitialMallocHook_New, InitialMallocHook_MMap, -// InitialMallocHook_PreMMap, InitialMallocHook_PreSbrk, and -// InitialMallocHook_Sbrk to have a chance to hook into the very first -// invocation of an allocation function call, mmap, or sbrk. +// Declaration of default weak initialization function, that can be overridden +// by linking-in a strong definition (as heap-checker.cc does). This is +// extern "C" so that it doesn't trigger gold's --detect-odr-violations warning, +// which only looks at C++ symbols. // -// These functions are declared here as weak, and defined later, rather than a -// more straightforward simple weak definition, as a workround for an icc -// compiler issue ((Intel reference 290819). This issue causes icc to resolve -// weak symbols too early, at compile rather than link time. By declaring it -// (weak) here, then defining it below after its use, we can avoid the problem. +// This function is declared here as weak, and defined later, rather than a more +// straightforward simple weak definition, as a workround for an icc compiler +// issue ((Intel reference 290819). This issue causes icc to resolve weak +// symbols too early, at compile rather than link time. By declaring it (weak) +// here, then defining it below after its use, we can avoid the problem. +extern "C" { +ATTRIBUTE_WEAK void MallocHook_InitAtFirstAllocation_HeapLeakChecker(); +} + +namespace { + +void RemoveInitialHooksAndCallInitializers(); // below. + +pthread_once_t once = PTHREAD_ONCE_INIT; + +// These hooks are installed in MallocHook as the only initial hooks. The first +// hook that is called will run RemoveInitialHooksAndCallInitializers (see the +// definition below) and then redispatch to any malloc hooks installed by +// RemoveInitialHooksAndCallInitializers. // -ATTRIBUTE_WEAK -extern void InitialMallocHook_New(const void* ptr, size_t size); +// Note(llib): there is a possibility of a race in the event that there are +// multiple threads running before the first allocation. This is pretty +// difficult to achieve, but if it is then multiple threads may concurrently do +// allocations. The first caller will call +// RemoveInitialHooksAndCallInitializers via one of the initial hooks. A +// concurrent allocation may, depending on timing either: +// * still have its initial malloc hook installed, run that and block on waiting +// for the first caller to finish its call to +// RemoveInitialHooksAndCallInitializers, and proceed normally. +// * occur some time during the RemoveInitialHooksAndCallInitializers call, at +// which point there could be no initial hooks and the subsequent hooks that +// are about to be set up by RemoveInitialHooksAndCallInitializers haven't +// been installed yet. I think the worst we can get is that some allocations +// will not get reported to some hooks set by the initializers called from +// RemoveInitialHooksAndCallInitializers. + +void InitialNewHook(const void* ptr, size_t size) { + perftools_pthread_once(&once, &RemoveInitialHooksAndCallInitializers); + MallocHook::InvokeNewHook(ptr, size); +} -ATTRIBUTE_WEAK -extern void InitialMallocHook_PreMMap(const void* start, - size_t size, - int protection, - int flags, - int fd, - off_t offset); +void InitialPreMMapHook(const void* start, + size_t size, + int protection, + int flags, + int fd, + off_t offset) { + perftools_pthread_once(&once, &RemoveInitialHooksAndCallInitializers); + MallocHook::InvokePreMmapHook(start, size, protection, flags, fd, offset); +} -ATTRIBUTE_WEAK -extern void InitialMallocHook_MMap(const void* result, - const void* start, - size_t size, - int protection, - int flags, - int fd, - off_t offset); +void InitialPreSbrkHook(std::ptrdiff_t increment) { + perftools_pthread_once(&once, &RemoveInitialHooksAndCallInitializers); + MallocHook::InvokePreSbrkHook(increment); +} -ATTRIBUTE_WEAK -extern void InitialMallocHook_PreSbrk(std::ptrdiff_t increment); +// This function is called at most once by one of the above initial malloc +// hooks. It removes all initial hooks and initializes all other clients that +// want to get control at the very first memory allocation. The initializers +// may assume that the initial malloc hooks have been removed. The initializers +// may set up malloc hooks and allocate memory. +void RemoveInitialHooksAndCallInitializers() { + RAW_CHECK(MallocHook::RemoveNewHook(&InitialNewHook), ""); + RAW_CHECK(MallocHook::RemovePreMmapHook(&InitialPreMMapHook), ""); + RAW_CHECK(MallocHook::RemovePreSbrkHook(&InitialPreSbrkHook), ""); + + // HeapLeakChecker is currently the only module that needs to get control on + // the first memory allocation, but one can add other modules by following the + // same weak/strong function pattern. + MallocHook_InitAtFirstAllocation_HeapLeakChecker(); +} + +} // namespace -ATTRIBUTE_WEAK -extern void InitialMallocHook_Sbrk(const void* result, std::ptrdiff_t increment); +// Weak default initialization function that must go after its use. +extern "C" void MallocHook_InitAtFirstAllocation_HeapLeakChecker() { + // Do nothing. +} namespace base { namespace internal { + +// The code below is DEPRECATED. template<typename PtrT> PtrT AtomicPtr<PtrT>::Exchange(PtrT new_val) { base::subtle::MemoryBarrier(); // Release semantics. @@ -123,22 +174,139 @@ PtrT AtomicPtr<PtrT>::Exchange(PtrT new_val) { return old_val; } -AtomicPtr<MallocHook::NewHook> new_hook_ = { - reinterpret_cast<AtomicWord>(InitialMallocHook_New) }; +template<typename PtrT> +PtrT AtomicPtr<PtrT>::CompareAndSwap(PtrT old_val, PtrT new_val) { + base::subtle::MemoryBarrier(); // Release semantics. + PtrT retval = reinterpret_cast<PtrT>(static_cast<AtomicWord>( + base::subtle::NoBarrier_CompareAndSwap( + &data_, + reinterpret_cast<AtomicWord>(old_val), + reinterpret_cast<AtomicWord>(new_val)))); + base::subtle::MemoryBarrier(); // And acquire semantics. + return retval; +} + +AtomicPtr<MallocHook::NewHook> new_hook_ = { 0 }; AtomicPtr<MallocHook::DeleteHook> delete_hook_ = { 0 }; -AtomicPtr<MallocHook::PreMmapHook> premmap_hook_ = { - reinterpret_cast<AtomicWord>(InitialMallocHook_PreMMap) }; -AtomicPtr<MallocHook::MmapHook> mmap_hook_ = { - reinterpret_cast<AtomicWord>(InitialMallocHook_MMap) }; +AtomicPtr<MallocHook::PreMmapHook> premmap_hook_ = { 0 }; +AtomicPtr<MallocHook::MmapHook> mmap_hook_ = { 0 }; AtomicPtr<MallocHook::MunmapHook> munmap_hook_ = { 0 }; AtomicPtr<MallocHook::MremapHook> mremap_hook_ = { 0 }; -AtomicPtr<MallocHook::PreSbrkHook> presbrk_hook_ = { - reinterpret_cast<AtomicWord>(InitialMallocHook_PreSbrk) }; -AtomicPtr<MallocHook::SbrkHook> sbrk_hook_ = { - reinterpret_cast<AtomicWord>(InitialMallocHook_Sbrk) }; +AtomicPtr<MallocHook::PreSbrkHook> presbrk_hook_ = { 0 }; +AtomicPtr<MallocHook::SbrkHook> sbrk_hook_ = { 0 }; +// End of DEPRECATED code section. + +// This lock is shared between all implementations of HookList::Add & Remove. +// The potential for contention is very small. This needs to be a SpinLock and +// not a Mutex since it's possible for Mutex locking to allocate memory (e.g., +// per-thread allocation in debug builds), which could cause infinite recursion. +static SpinLock hooklist_spinlock(base::LINKER_INITIALIZED); + +template <typename T> +bool HookList<T>::Add(T value_as_t) { + // Note: we need to check this _before_ reinterpret_cast, since + // reinterpret_cast may include random junk from memory. + if (value_as_t == 0) { + return false; + } + AtomicWord value = reinterpret_cast<const AtomicWord&>(value_as_t); + if (value == 0) { + // This should not actually happen, but just to be sure... + return false; + } + SpinLockHolder l(&hooklist_spinlock); + // Find the first slot in data that is 0. + int index = 0; + while ((index < kHookListMaxValues) && + (base::subtle::NoBarrier_Load(&priv_data[index]) != 0)) { + ++index; + } + if (index == kHookListMaxValues) { + return false; + } + AtomicWord prev_num_hooks = base::subtle::Acquire_Load(&priv_end); + base::subtle::Release_Store(&priv_data[index], value); + if (prev_num_hooks <= index) { + base::subtle::Release_Store(&priv_end, index + 1); + } + return true; +} + +template <typename T> +bool HookList<T>::Remove(T value_as_t) { + if (value_as_t == 0) { + return false; + } + SpinLockHolder l(&hooklist_spinlock); + AtomicWord hooks_end = base::subtle::Acquire_Load(&priv_end); + int index = 0; + // Note: we need to cast back to T since T may be smaller than AtomicWord. + while (index < hooks_end && value_as_t != reinterpret_cast<T>( + base::subtle::Acquire_Load(&priv_data[index]))) { + ++index; + } + if (index == hooks_end) { + return false; + } + base::subtle::Release_Store(&priv_data[index], 0); + if (hooks_end == index + 1) { + // Adjust hooks_end down to the lowest possible value. + hooks_end = index; + while ((hooks_end > 0) && + (base::subtle::Acquire_Load(&priv_data[hooks_end - 1]) == 0)) { + --hooks_end; + } + base::subtle::Release_Store(&priv_end, hooks_end); + } + return true; +} + +template <typename T> +int HookList<T>::Traverse(T* output_array, int n) const { + AtomicWord hooks_end = base::subtle::Acquire_Load(&priv_end); + int actual_hooks_end = 0; + for (int i = 0; i < hooks_end && n > 0; ++i) { + AtomicWord data = base::subtle::Acquire_Load(&priv_data[i]); + if (data != 0) { + *output_array++ = reinterpret_cast<const T&>(data); + ++actual_hooks_end; + --n; + } + } + return actual_hooks_end; +} + +// Initialize a HookList (optionally with the given initial_value in index 0). +#define INIT_HOOK_LIST { 0 } +#define INIT_HOOK_LIST_WITH_VALUE(initial_value) \ + { 1, { reinterpret_cast<AtomicWord>(initial_value) } } + +// Explicit instantiation for malloc_hook_test.cc. This ensures all the methods +// are instantiated. +template class HookList<MallocHook::NewHook>; + +HookList<MallocHook::NewHook> new_hooks_ = + INIT_HOOK_LIST_WITH_VALUE(&InitialNewHook); +HookList<MallocHook::DeleteHook> delete_hooks_ = INIT_HOOK_LIST; +HookList<MallocHook::PreMmapHook> premmap_hooks_ = + INIT_HOOK_LIST_WITH_VALUE(&InitialPreMMapHook); +HookList<MallocHook::MmapHook> mmap_hooks_ = INIT_HOOK_LIST; +HookList<MallocHook::MunmapHook> munmap_hooks_ = INIT_HOOK_LIST; +HookList<MallocHook::MremapHook> mremap_hooks_ = INIT_HOOK_LIST; +HookList<MallocHook::PreSbrkHook> presbrk_hooks_ = + INIT_HOOK_LIST_WITH_VALUE(InitialPreSbrkHook); +HookList<MallocHook::SbrkHook> sbrk_hooks_ = INIT_HOOK_LIST; + +// These lists contain either 0 or 1 hooks. +HookList<MallocHook::MmapReplacement> mmap_replacement_ = { 0 }; +HookList<MallocHook::MunmapReplacement> munmap_replacement_ = { 0 }; + +#undef INIT_HOOK_LIST_WITH_VALUE +#undef INIT_HOOK_LIST } } // namespace base::internal +// The code below is DEPRECATED. using base::internal::new_hook_; using base::internal::delete_hook_; using base::internal::premmap_hook_; @@ -147,109 +315,290 @@ using base::internal::munmap_hook_; using base::internal::mremap_hook_; using base::internal::presbrk_hook_; using base::internal::sbrk_hook_; - +// End of DEPRECATED code section. + +using base::internal::kHookListMaxValues; +using base::internal::new_hooks_; +using base::internal::delete_hooks_; +using base::internal::premmap_hooks_; +using base::internal::mmap_hooks_; +using base::internal::mmap_replacement_; +using base::internal::munmap_hooks_; +using base::internal::munmap_replacement_; +using base::internal::mremap_hooks_; +using base::internal::presbrk_hooks_; +using base::internal::sbrk_hooks_; // These are available as C bindings as well as C++, hence their // definition outside the MallocHook class. extern "C" +int MallocHook_AddNewHook(MallocHook_NewHook hook) { + RAW_VLOG(10, "AddNewHook(%p)", hook); + return new_hooks_.Add(hook); +} + +extern "C" +int MallocHook_RemoveNewHook(MallocHook_NewHook hook) { + RAW_VLOG(10, "RemoveNewHook(%p)", hook); + return new_hooks_.Remove(hook); +} + +extern "C" +int MallocHook_AddDeleteHook(MallocHook_DeleteHook hook) { + RAW_VLOG(10, "AddDeleteHook(%p)", hook); + return delete_hooks_.Add(hook); +} + +extern "C" +int MallocHook_RemoveDeleteHook(MallocHook_DeleteHook hook) { + RAW_VLOG(10, "RemoveDeleteHook(%p)", hook); + return delete_hooks_.Remove(hook); +} + +extern "C" +int MallocHook_AddPreMmapHook(MallocHook_PreMmapHook hook) { + RAW_VLOG(10, "AddPreMmapHook(%p)", hook); + return premmap_hooks_.Add(hook); +} + +extern "C" +int MallocHook_RemovePreMmapHook(MallocHook_PreMmapHook hook) { + RAW_VLOG(10, "RemovePreMmapHook(%p)", hook); + return premmap_hooks_.Remove(hook); +} + +extern "C" +int MallocHook_SetMmapReplacement(MallocHook_MmapReplacement hook) { + RAW_VLOG(10, "SetMmapReplacement(%p)", hook); + // NOTE this is a best effort CHECK. Concurrent sets could succeed since + // this test is outside of the Add spin lock. + RAW_CHECK(mmap_replacement_.empty(), "Only one MMapReplacement is allowed."); + return mmap_replacement_.Add(hook); +} + +extern "C" +int MallocHook_RemoveMmapReplacement(MallocHook_MmapReplacement hook) { + RAW_VLOG(10, "RemoveMmapReplacement(%p)", hook); + return mmap_replacement_.Remove(hook); +} + +extern "C" +int MallocHook_AddMmapHook(MallocHook_MmapHook hook) { + RAW_VLOG(10, "AddMmapHook(%p)", hook); + return mmap_hooks_.Add(hook); +} + +extern "C" +int MallocHook_RemoveMmapHook(MallocHook_MmapHook hook) { + RAW_VLOG(10, "RemoveMmapHook(%p)", hook); + return mmap_hooks_.Remove(hook); +} + +extern "C" +int MallocHook_AddMunmapHook(MallocHook_MunmapHook hook) { + RAW_VLOG(10, "AddMunmapHook(%p)", hook); + return munmap_hooks_.Add(hook); +} + +extern "C" +int MallocHook_RemoveMunmapHook(MallocHook_MunmapHook hook) { + RAW_VLOG(10, "RemoveMunmapHook(%p)", hook); + return munmap_hooks_.Remove(hook); +} + +extern "C" +int MallocHook_SetMunmapReplacement(MallocHook_MunmapReplacement hook) { + RAW_VLOG(10, "SetMunmapReplacement(%p)", hook); + // NOTE this is a best effort CHECK. Concurrent sets could succeed since + // this test is outside of the Add spin lock. + RAW_CHECK(munmap_replacement_.empty(), + "Only one MunmapReplacement is allowed."); + return munmap_replacement_.Add(hook); +} + +extern "C" +int MallocHook_RemoveMunmapReplacement(MallocHook_MunmapReplacement hook) { + RAW_VLOG(10, "RemoveMunmapReplacement(%p)", hook); + return munmap_replacement_.Remove(hook); +} + +extern "C" +int MallocHook_AddMremapHook(MallocHook_MremapHook hook) { + RAW_VLOG(10, "AddMremapHook(%p)", hook); + return mremap_hooks_.Add(hook); +} + +extern "C" +int MallocHook_RemoveMremapHook(MallocHook_MremapHook hook) { + RAW_VLOG(10, "RemoveMremapHook(%p)", hook); + return mremap_hooks_.Remove(hook); +} + +extern "C" +int MallocHook_AddPreSbrkHook(MallocHook_PreSbrkHook hook) { + RAW_VLOG(10, "AddPreSbrkHook(%p)", hook); + return presbrk_hooks_.Add(hook); +} + +extern "C" +int MallocHook_RemovePreSbrkHook(MallocHook_PreSbrkHook hook) { + RAW_VLOG(10, "RemovePreSbrkHook(%p)", hook); + return presbrk_hooks_.Remove(hook); +} + +extern "C" +int MallocHook_AddSbrkHook(MallocHook_SbrkHook hook) { + RAW_VLOG(10, "AddSbrkHook(%p)", hook); + return sbrk_hooks_.Add(hook); +} + +extern "C" +int MallocHook_RemoveSbrkHook(MallocHook_SbrkHook hook) { + RAW_VLOG(10, "RemoveSbrkHook(%p)", hook); + return sbrk_hooks_.Remove(hook); +} + +// The code below is DEPRECATED. +extern "C" MallocHook_NewHook MallocHook_SetNewHook(MallocHook_NewHook hook) { + RAW_VLOG(10, "SetNewHook(%p)", hook); return new_hook_.Exchange(hook); } extern "C" MallocHook_DeleteHook MallocHook_SetDeleteHook(MallocHook_DeleteHook hook) { + RAW_VLOG(10, "SetDeleteHook(%p)", hook); return delete_hook_.Exchange(hook); } extern "C" MallocHook_PreMmapHook MallocHook_SetPreMmapHook(MallocHook_PreMmapHook hook) { + RAW_VLOG(10, "SetPreMmapHook(%p)", hook); return premmap_hook_.Exchange(hook); } extern "C" MallocHook_MmapHook MallocHook_SetMmapHook(MallocHook_MmapHook hook) { + RAW_VLOG(10, "SetMmapHook(%p)", hook); return mmap_hook_.Exchange(hook); } extern "C" MallocHook_MunmapHook MallocHook_SetMunmapHook(MallocHook_MunmapHook hook) { + RAW_VLOG(10, "SetMunmapHook(%p)", hook); return munmap_hook_.Exchange(hook); } extern "C" MallocHook_MremapHook MallocHook_SetMremapHook(MallocHook_MremapHook hook) { + RAW_VLOG(10, "SetMremapHook(%p)", hook); return mremap_hook_.Exchange(hook); } extern "C" MallocHook_PreSbrkHook MallocHook_SetPreSbrkHook(MallocHook_PreSbrkHook hook) { + RAW_VLOG(10, "SetPreSbrkHook(%p)", hook); return presbrk_hook_.Exchange(hook); } extern "C" MallocHook_SbrkHook MallocHook_SetSbrkHook(MallocHook_SbrkHook hook) { + RAW_VLOG(10, "SetSbrkHook(%p)", hook); return sbrk_hook_.Exchange(hook); } +// End of DEPRECATED code section. + +// Note: embedding the function calls inside the traversal of HookList would be +// very confusing, as it is legal for a hook to remove itself and add other +// hooks. Doing traversal first, and then calling the hooks ensures we only +// call the hooks registered at the start. +#define INVOKE_HOOKS(HookType, hook_list, args) do { \ + HookType hooks[kHookListMaxValues]; \ + int num_hooks = hook_list.Traverse(hooks, kHookListMaxValues); \ + for (int i = 0; i < num_hooks; ++i) { \ + (*hooks[i])args; \ + } \ + } while (0) + +// There should only be one replacement. Return the result of the first +// one, or false if there is none. +#define INVOKE_REPLACEMENT(HookType, hook_list, args) do { \ + HookType hooks[kHookListMaxValues]; \ + int num_hooks = hook_list.Traverse(hooks, kHookListMaxValues); \ + return (num_hooks > 0 && (*hooks[0])args); \ + } while (0) + + +void MallocHook::InvokeNewHookSlow(const void* p, size_t s) { + INVOKE_HOOKS(NewHook, new_hooks_, (p, s)); +} +void MallocHook::InvokeDeleteHookSlow(const void* p) { + INVOKE_HOOKS(DeleteHook, delete_hooks_, (p)); +} -// The definitions of weak default malloc hooks (New, MMap, and Sbrk) -// that self deinstall on their first call. This is entirely for -// efficiency: the default version of these functions will be called a -// maximum of one time. If these functions were a no-op instead, they'd -// be called every time, costing an extra function call per malloc. -// -// However, this 'delete self' isn't safe in general -- it's possible -// that this function will be called via a daisy chain. That is, -// someone else might do -// old_hook = MallocHook::SetNewHook(&myhook); -// void myhook(void* ptr, size_t size) { -// do_my_stuff(); -// old_hook(ptr, size); // daisy-chain the hooks -// } -// If old_hook is InitialMallocHook_New(), then this is broken code! -- -// after the first run it'll deregister not only InitialMallocHook_New() -// but also myhook. To protect against that, InitialMallocHook_New() -// makes sure it's the 'top-level' hook before doing the deregistration. -// This means the daisy-chain case will be less efficient because the -// hook will be called, and do an if check, for every new. Alas. -// TODO(csilvers): add support for removing a hook from the middle of a chain. - -void InitialMallocHook_New(const void* ptr, size_t size) { - if (MallocHook::GetNewHook() == &InitialMallocHook_New) - MallocHook::SetNewHook(NULL); -} - -void InitialMallocHook_PreMMap(const void* start, - size_t size, - int protection, - int flags, - int fd, - off_t offset) { - if (MallocHook::GetPreMmapHook() == &InitialMallocHook_PreMMap) - MallocHook::SetPreMmapHook(NULL); +void MallocHook::InvokePreMmapHookSlow(const void* start, + size_t size, + int protection, + int flags, + int fd, + off_t offset) { + INVOKE_HOOKS(PreMmapHook, premmap_hooks_, (start, size, protection, flags, fd, + offset)); } -void InitialMallocHook_MMap(const void* result, - const void* start, - size_t size, - int protection, - int flags, - int fd, - off_t offset) { - if (MallocHook::GetMmapHook() == &InitialMallocHook_MMap) - MallocHook::SetMmapHook(NULL); +void MallocHook::InvokeMmapHookSlow(const void* result, + const void* start, + size_t size, + int protection, + int flags, + int fd, + off_t offset) { + INVOKE_HOOKS(MmapHook, mmap_hooks_, (result, start, size, protection, flags, + fd, offset)); } -void InitialMallocHook_PreSbrk(std::ptrdiff_t increment) { - if (MallocHook::GetPreSbrkHook() == &InitialMallocHook_PreSbrk) - MallocHook::SetPreSbrkHook(NULL); +bool MallocHook::InvokeMmapReplacementSlow(const void* start, + size_t size, + int protection, + int flags, + int fd, + off_t offset, + void** result) { + INVOKE_REPLACEMENT(MmapReplacement, mmap_replacement_, + (start, size, protection, flags, fd, offset, result)); +} + +void MallocHook::InvokeMunmapHookSlow(const void* p, size_t s) { + INVOKE_HOOKS(MunmapHook, munmap_hooks_, (p, s)); +} + +bool MallocHook::InvokeMunmapReplacementSlow(const void* p, + size_t s, + int* result) { + INVOKE_REPLACEMENT(MunmapReplacement, munmap_replacement_, (p, s, result)); +} + +void MallocHook::InvokeMremapHookSlow(const void* result, + const void* old_addr, + size_t old_size, + size_t new_size, + int flags, + const void* new_addr) { + INVOKE_HOOKS(MremapHook, mremap_hooks_, (result, old_addr, old_size, new_size, + flags, new_addr)); } -void InitialMallocHook_Sbrk(const void* result, std::ptrdiff_t increment) { - if (MallocHook::GetSbrkHook() == &InitialMallocHook_Sbrk) - MallocHook::SetSbrkHook(NULL); +void MallocHook::InvokePreSbrkHookSlow(std::ptrdiff_t increment) { + INVOKE_HOOKS(PreSbrkHook, presbrk_hooks_, (increment)); } +void MallocHook::InvokeSbrkHookSlow(const void* result, std::ptrdiff_t increment) { + INVOKE_HOOKS(SbrkHook, sbrk_hooks_, (result, increment)); +} + +#undef INVOKE_HOOKS + DEFINE_ATTRIBUTE_SECTION_VARS(google_malloc); DECLARE_ATTRIBUTE_SECTION_VARS(google_malloc); // actual functions are in debugallocation.cc or tcmalloc.cc @@ -398,7 +747,8 @@ static inline void* do_mmap64(void *start, size_t length, } result = (void *)syscall(SYS_mmap2, - start, length, prot, flags, fd, offset / pagesize); + start, length, prot, flags, fd, + (off_t) (offset / pagesize)); if (result != MAP_FAILED || errno != ENOSYS) goto out; // We don't have mmap2() after all - don't bother trying it in future @@ -456,7 +806,11 @@ extern "C" { extern "C" void* mmap64(void *start, size_t length, int prot, int flags, int fd, __off64_t offset) __THROW { MallocHook::InvokePreMmapHook(start, length, prot, flags, fd, offset); - void *result = do_mmap64(start, length, prot, flags, fd, offset); + void *result; + if (!MallocHook::InvokeMmapReplacement( + start, length, prot, flags, fd, offset, &result)) { + result = do_mmap64(start, length, prot, flags, fd, offset); + } MallocHook::InvokeMmapHook(result, start, length, prot, flags, fd, offset); return result; } @@ -466,8 +820,12 @@ extern "C" void* mmap64(void *start, size_t length, int prot, int flags, extern "C" void* mmap(void *start, size_t length, int prot, int flags, int fd, off_t offset) __THROW { MallocHook::InvokePreMmapHook(start, length, prot, flags, fd, offset); - void *result = do_mmap64(start, length, prot, flags, fd, - static_cast<size_t>(offset)); // avoid sign extension + void *result; + if (!MallocHook::InvokeMmapReplacement( + start, length, prot, flags, fd, offset, &result)) { + result = do_mmap64(start, length, prot, flags, fd, + static_cast<size_t>(offset)); // avoid sign extension + } MallocHook::InvokeMmapHook(result, start, length, prot, flags, fd, offset); return result; } @@ -476,7 +834,11 @@ extern "C" void* mmap(void *start, size_t length, int prot, int flags, extern "C" int munmap(void* start, size_t length) __THROW { MallocHook::InvokeMunmapHook(start, length); - return syscall(SYS_munmap, start, length); + int result; + if (!MallocHook::InvokeMunmapReplacement(start, length, &result)) { + result = syscall(SYS_munmap, start, length); + } + return result; } extern "C" void* mremap(void* old_addr, size_t old_size, size_t new_size, @@ -503,11 +865,20 @@ extern "C" void* sbrk(std::ptrdiff_t increment) __THROW { /*static*/void* MallocHook::UnhookedMMap(void *start, size_t length, int prot, int flags, int fd, off_t offset) { - return do_mmap64(start, length, prot, flags, fd, offset); + void* result; + if (!MallocHook::InvokeMmapReplacement( + start, length, prot, flags, fd, offset, &result)) { + result = do_mmap64(start, length, prot, flags, fd, offset); + } + return result; } /*static*/int MallocHook::UnhookedMUnmap(void *start, size_t length) { - return sys_munmap(start, length); + int result; + if (!MallocHook::InvokeMunmapReplacement(start, length, &result)) { + result = sys_munmap(start, length); + } + return result; } #else // defined(__linux) && @@ -515,11 +886,20 @@ extern "C" void* sbrk(std::ptrdiff_t increment) __THROW { /*static*/void* MallocHook::UnhookedMMap(void *start, size_t length, int prot, int flags, int fd, off_t offset) { - return mmap(start, length, prot, flags, fd, offset); + void* result; + if (!MallocHook::InvokeMmapReplacement( + start, length, prot, flags, fd, offset, &result)) { + result = mmap(start, length, prot, flags, fd, offset); + } + return result; } /*static*/int MallocHook::UnhookedMUnmap(void *start, size_t length) { - return munmap(start, length); + int result; + if (!MallocHook::InvokeMunmapReplacement(start, length, &result)) { + result = munmap(start, length); + } + return result; } #endif // defined(__linux) && diff --git a/third_party/tcmalloc/chromium/src/memfs_malloc.cc b/third_party/tcmalloc/chromium/src/memfs_malloc.cc index 9df4cad..3fb55a4 100644 --- a/third_party/tcmalloc/chromium/src/memfs_malloc.cc +++ b/third_party/tcmalloc/chromium/src/memfs_malloc.cc @@ -38,16 +38,23 @@ #ifdef __linux #include <config.h> -#include <errno.h> -#include <fcntl.h> -#include <unistd.h> -#include <inttypes.h> -#include <sys/mman.h> -#include <sys/param.h> -#include <sys/types.h> -#include <sys/vfs.h> // for statfs +#include <errno.h> // for errno, EINVAL +#include <inttypes.h> // for PRId64 +#include <limits.h> // for PATH_MAX +#include <stddef.h> // for size_t, NULL +#ifdef HAVE_STDINT_H +#include <stdint.h> // for int64_t, uintptr_t +#endif +#include <stdio.h> // for snprintf +#include <stdlib.h> // for mkstemp +#include <string.h> // for strerror +#include <sys/mman.h> // for mmap, MAP_FAILED, etc +#include <sys/statfs.h> // for fstatfs, statfs +#include <unistd.h> // for ftruncate, off_t, unlink +#include <new> // for operator new #include <string> +#include <google/malloc_extension.h> #include "base/basictypes.h" #include "base/googleinit.h" #include "base/sysinfo.h" @@ -71,55 +78,75 @@ DEFINE_bool(memfs_malloc_abort_on_fail, DEFINE_bool(memfs_malloc_ignore_mmap_fail, EnvToBool("TCMALLOC_MEMFS_IGNORE_MMAP_FAIL", false), "Ignore failures from mmap"); +DEFINE_bool(memfs_malloc_map_private, + EnvToBool("TCMALLOC_MEMFS_MAP_PRIVATE", false), + "Use MAP_PRIVATE with mmap"); // Hugetlbfs based allocator for tcmalloc class HugetlbSysAllocator: public SysAllocator { public: - HugetlbSysAllocator(int fd, int page_size) - : big_page_size_(page_size), - hugetlb_fd_(fd), - hugetlb_base_(0) { + explicit HugetlbSysAllocator(SysAllocator* fallback) + : failed_(true), // Unusable until FlagsInitialized() is called + big_page_size_(0), + hugetlb_fd_(-1), + hugetlb_base_(0), + fallback_(fallback) { } void* Alloc(size_t size, size_t *actual_size, size_t alignment); - void DumpStats(TCMalloc_Printer* printer); + void FlagsInitialized(); + bool failed_; // Whether failed to allocate memory. private: + void* AllocInternal(size_t size, size_t *actual_size, size_t alignment); + int64 big_page_size_; - int hugetlb_fd_; // file descriptor for hugetlb + int hugetlb_fd_; // file descriptor for hugetlb off_t hugetlb_base_; -}; -void HugetlbSysAllocator::DumpStats(TCMalloc_Printer* printer) { - printer->printf("HugetlbSysAllocator: failed_=%d allocated=%"PRId64"\n", - failed_, static_cast<int64_t>(hugetlb_base_)); -} + SysAllocator* fallback_; // Default system allocator to fall back to. +}; +static char hugetlb_space[sizeof(HugetlbSysAllocator)]; // No locking needed here since we assume that tcmalloc calls // us with an internal lock held (see tcmalloc/system-alloc.cc). void* HugetlbSysAllocator::Alloc(size_t size, size_t *actual_size, size_t alignment) { - - // don't go any further if we haven't opened the backing file - if (hugetlb_fd_ == -1) { - return NULL; + if (failed_) { + return fallback_->Alloc(size, actual_size, alignment); } // We don't respond to allocation requests smaller than big_page_size_ unless - // the caller is willing to take more than they asked for. + // the caller is ok to take more than they asked for. Used by MetaDataAlloc. if (actual_size == NULL && size < big_page_size_) { - return NULL; + return fallback_->Alloc(size, actual_size, alignment); } // Enforce huge page alignment. Be careful to deal with overflow. - if (alignment < big_page_size_) alignment = big_page_size_; - size_t aligned_size = ((size + alignment - 1) / alignment) * alignment; + size_t new_alignment = alignment; + if (new_alignment < big_page_size_) new_alignment = big_page_size_; + size_t aligned_size = ((size + new_alignment - 1) / + new_alignment) * new_alignment; if (aligned_size < size) { - return NULL; + return fallback_->Alloc(size, actual_size, alignment); + } + + void* result = AllocInternal(aligned_size, actual_size, new_alignment); + if (result != NULL) { + return result; + } + TCMalloc_MESSAGE(__FILE__, __LINE__, + "HugetlbSysAllocator: failed_=%d allocated=%"PRId64"\n", + failed_, static_cast<int64_t>(hugetlb_base_)); + if (FLAGS_memfs_malloc_abort_on_fail) { + CRASH("memfs_malloc_abort_on_fail is set\n"); } - size = aligned_size; + return fallback_->Alloc(size, actual_size, alignment); +} +void* HugetlbSysAllocator::AllocInternal(size_t size, size_t* actual_size, + size_t alignment) { // Ask for extra memory if alignment > pagesize size_t extra = 0; if (alignment > big_page_size_) { @@ -139,9 +166,6 @@ void* HugetlbSysAllocator::Alloc(size_t size, size_t *actual_size, " too large while %"PRId64" bytes remain\n", size, static_cast<int64_t>(limit - hugetlb_base_)); } - if (FLAGS_memfs_malloc_abort_on_fail) { - CRASH("memfs_malloc_abort_on_fail is set\n"); - } return NULL; } @@ -152,9 +176,6 @@ void* HugetlbSysAllocator::Alloc(size_t size, size_t *actual_size, TCMalloc_MESSAGE(__FILE__, __LINE__, "ftruncate failed: %s\n", strerror(errno)); failed_ = true; - if (FLAGS_memfs_malloc_abort_on_fail) { - CRASH("memfs_malloc_abort_on_fail is set\n"); - } return NULL; } @@ -162,16 +183,15 @@ void* HugetlbSysAllocator::Alloc(size_t size, size_t *actual_size, // size + alignment < (1<<NBITS). // and extra <= alignment // therefore size + extra < (1<<NBITS) - void *result = mmap(0, size + extra, PROT_WRITE|PROT_READ, - MAP_SHARED, hugetlb_fd_, hugetlb_base_); + void *result; + result = mmap(0, size + extra, PROT_WRITE|PROT_READ, + FLAGS_memfs_malloc_map_private ? MAP_PRIVATE : MAP_SHARED, + hugetlb_fd_, hugetlb_base_); if (result == reinterpret_cast<void*>(MAP_FAILED)) { if (!FLAGS_memfs_malloc_ignore_mmap_fail) { TCMalloc_MESSAGE(__FILE__, __LINE__, "mmap of size %"PRIuS" failed: %s\n", size + extra, strerror(errno)); failed_ = true; - if (FLAGS_memfs_malloc_abort_on_fail) { - CRASH("memfs_malloc_abort_on_fail is set\n"); - } } return NULL; } @@ -192,7 +212,7 @@ void* HugetlbSysAllocator::Alloc(size_t size, size_t *actual_size, return reinterpret_cast<void*>(ptr); } -static void InitSystemAllocator() { +void HugetlbSysAllocator::FlagsInitialized() { if (FLAGS_memfs_malloc_path.length()) { char path[PATH_MAX]; int rc = snprintf(path, sizeof(path), "%s.XXXXXX", @@ -223,12 +243,18 @@ static void InitSystemAllocator() { } int64 page_size = sfs.f_bsize; - SysAllocator *alloc = new HugetlbSysAllocator(hugetlb_fd, page_size); - // Register ourselves with tcmalloc - RegisterSystemAllocator(alloc, 0); + hugetlb_fd_ = hugetlb_fd; + big_page_size_ = page_size; + failed_ = false; } } +static void InitSystemAllocator() { + SysAllocator *alloc = MallocExtension::instance()->GetSystemAllocator(); + HugetlbSysAllocator *hugetlb = new (hugetlb_space) HugetlbSysAllocator(alloc); + MallocExtension::instance()->SetSystemAllocator(hugetlb); +} + REGISTER_MODULE_INITIALIZER(memfs_malloc, { InitSystemAllocator(); }); #endif /* ifdef __linux */ diff --git a/third_party/tcmalloc/chromium/src/memory_region_map.cc b/third_party/tcmalloc/chromium/src/memory_region_map.cc index 60071a0..31c3bc2 100644 --- a/third_party/tcmalloc/chromium/src/memory_region_map.cc +++ b/third_party/tcmalloc/chromium/src/memory_region_map.cc @@ -111,13 +111,13 @@ #ifdef HAVE_PTHREAD #include <pthread.h> // for pthread_t, pthread_self() #endif +#include <stddef.h> #include <algorithm> #include <set> #include "memory_region_map.h" -#include "base/linux_syscall_support.h" #include "base/logging.h" #include "base/low_level_alloc.h" #include "malloc_hook-inl.h" @@ -195,15 +195,11 @@ void MemoryRegionMap::Init(int max_stack_depth) { RAW_VLOG(10, "MemoryRegionMap Init increment done"); return; } - // Set our hooks and make sure no other hooks existed: - if (MallocHook::SetMmapHook(MmapHook) != NULL || - MallocHook::SetMremapHook(MremapHook) != NULL || - MallocHook::SetSbrkHook(SbrkHook) != NULL || - MallocHook::SetMunmapHook(MunmapHook) != NULL) { - RAW_LOG(FATAL, "Had other mmap/mremap/munmap/sbrk MallocHook-s set. " - "Make sure only one of MemoryRegionMap and the other " - "client is active."); - } + // Set our hooks and make sure they were installed: + RAW_CHECK(MallocHook::AddMmapHook(&MmapHook), ""); + RAW_CHECK(MallocHook::AddMremapHook(&MremapHook), ""); + RAW_CHECK(MallocHook::AddSbrkHook(&SbrkHook), ""); + RAW_CHECK(MallocHook::AddMunmapHook(&MunmapHook), ""); // We need to set recursive_insert since the NewArena call itself // will already do some allocations with mmap which our hooks will catch // recursive_insert allows us to buffer info about these mmap calls. @@ -230,11 +226,10 @@ bool MemoryRegionMap::Shutdown() { RAW_VLOG(10, "MemoryRegionMap Shutdown decrement done"); return true; } - CheckMallocHooks(); // we assume no other hooks - MallocHook::SetMmapHook(NULL); - MallocHook::SetMremapHook(NULL); - MallocHook::SetSbrkHook(NULL); - MallocHook::SetMunmapHook(NULL); + RAW_CHECK(MallocHook::RemoveMmapHook(&MmapHook), ""); + RAW_CHECK(MallocHook::RemoveMremapHook(&MremapHook), ""); + RAW_CHECK(MallocHook::RemoveSbrkHook(&SbrkHook), ""); + RAW_CHECK(MallocHook::RemoveMunmapHook(&MunmapHook), ""); if (regions_) regions_->~RegionSet(); regions_ = NULL; bool deleted_arena = LowLevelAlloc::DeleteArena(arena_); @@ -248,15 +243,6 @@ bool MemoryRegionMap::Shutdown() { return deleted_arena; } -void MemoryRegionMap::CheckMallocHooks() { - if (MallocHook::GetMmapHook() != MmapHook || - MallocHook::GetMunmapHook() != MunmapHook || - MallocHook::GetMremapHook() != MremapHook || - MallocHook::GetSbrkHook() != SbrkHook) { - RAW_LOG(FATAL, "Our mmap/mremap/munmap/sbrk MallocHook-s got changed."); - } -} - // Invariants (once libpthread_initialized is true): // * While lock_ is not held, recursion_count_ is 0 (and // lock_owner_tid_ is the previous owner, but we don't rely on diff --git a/third_party/tcmalloc/chromium/src/memory_region_map.h b/third_party/tcmalloc/chromium/src/memory_region_map.h index 6b38b25..09561ce 100644 --- a/third_party/tcmalloc/chromium/src/memory_region_map.h +++ b/third_party/tcmalloc/chromium/src/memory_region_map.h @@ -39,6 +39,7 @@ #ifdef HAVE_PTHREAD #include <pthread.h> #endif +#include <stddef.h> #include <set> #include "base/stl_allocator.h" #include "base/spinlock.h" @@ -98,10 +99,6 @@ class MemoryRegionMap { // the number of Init() calls. static bool Shutdown(); - // Check that our hooks are still in place and crash if not. - // No need for locking. - static void CheckMallocHooks(); - // Locks to protect our internal data structures. // These also protect use of arena_ if our Init() has been done. // The lock is recursive. @@ -231,7 +228,7 @@ class MemoryRegionMap { static void *Allocate(size_t n) { return LowLevelAlloc::AllocWithArena(n, arena_); } - static void Free(const void *p) { + static void Free(const void *p, size_t /* n */) { LowLevelAlloc::Free(const_cast<void*>(p)); } }; @@ -260,7 +257,6 @@ class MemoryRegionMap { union RegionSetRep; private: - // representation =========================================================== // Counter of clients of this module that have called Init(). diff --git a/third_party/tcmalloc/chromium/src/packed-cache-inl.h b/third_party/tcmalloc/chromium/src/packed-cache-inl.h index 9d2cfe3..77f42b6 100644 --- a/third_party/tcmalloc/chromium/src/packed-cache-inl.h +++ b/third_party/tcmalloc/chromium/src/packed-cache-inl.h @@ -112,10 +112,11 @@ #define TCMALLOC_PACKED_CACHE_INL_H_ #include "config.h" +#include <stddef.h> // for size_t #ifdef HAVE_STDINT_H -#include <stdint.h> +#include <stdint.h> // for uintptr_t #endif -#include "base/basictypes.h" // for COMPILE_ASSERT +#include "base/basictypes.h" #include "internal_logging.h" // A safe way of doing "(1 << n) - 1" -- without worrying about overflow @@ -134,7 +135,12 @@ class PackedCache { public: typedef uintptr_t K; typedef size_t V; +#ifdef TCMALLOC_SMALL_BUT_SLOW + // Decrease the size map cache if running in the small memory mode. static const int kHashbits = 12; +#else + static const int kHashbits = 16; +#endif static const int kValuebits = 7; static const bool kUseWholeKeys = kKeybits + kValuebits <= 8 * sizeof(T); diff --git a/third_party/tcmalloc/chromium/src/page_heap.cc b/third_party/tcmalloc/chromium/src/page_heap.cc index 31130e9..83ff892 100644 --- a/third_party/tcmalloc/chromium/src/page_heap.cc +++ b/third_party/tcmalloc/chromium/src/page_heap.cc @@ -31,10 +31,16 @@ // Author: Sanjay Ghemawat <opensource@google.com> #include <config.h> -#include "page_heap.h" - -#include "static_vars.h" -#include "system-alloc.h" +#ifdef HAVE_INTTYPES_H +#include <inttypes.h> // for PRIuPTR +#endif +#include <google/malloc_extension.h> // for MallocRange, etc +#include "base/basictypes.h" +#include "base/commandlineflags.h" +#include "internal_logging.h" // for ASSERT, TCMalloc_Printer, etc +#include "page_heap_allocator.h" // for PageHeapAllocator +#include "static_vars.h" // for Static +#include "system-alloc.h" // for TCMalloc_SystemAlloc, etc DEFINE_double(tcmalloc_release_rate, EnvToDouble("TCMALLOC_RELEASE_RATE", 1.0), @@ -61,7 +67,7 @@ PageHeap::PageHeap() } } -Span* PageHeap::New(Length n) { +Span* PageHeap::SearchFreeAndLargeLists(Length n) { ASSERT(Check()); ASSERT(n > 0); @@ -79,20 +85,26 @@ Span* PageHeap::New(Length n) { ASSERT(ll->next->location == Span::ON_RETURNED_FREELIST); return Carve(ll->next, n); } - // Still no luck, so keep looking in larger classes. } + // No luck in free lists, our last chance is in a larger class. + return AllocLarge(n); // May be NULL +} - Span* result = AllocLarge(n); - if (result != NULL) return result; +Span* PageHeap::New(Length n) { + ASSERT(Check()); + ASSERT(n > 0); - // Grow the heap and try again + Span* result = SearchFreeAndLargeLists(n); + if (result != NULL) + return result; + + // Grow the heap and try again. if (!GrowHeap(n)) { ASSERT(stats_.unmapped_bytes+ stats_.committed_bytes==stats_.system_bytes); ASSERT(Check()); return NULL; } - - return AllocLarge(n); + return SearchFreeAndLargeLists(n); } Span* PageHeap::AllocLarge(Length n) { @@ -391,14 +403,43 @@ void PageHeap::RegisterSizeClass(Span* span, size_t sc) { } } -static double MB(uint64_t bytes) { +static double MiB(uint64_t bytes) { return bytes / 1048576.0; } -static double PagesToMB(uint64_t pages) { +static double PagesToMiB(uint64_t pages) { return (pages << kPageShift) / 1048576.0; } +void PageHeap::GetClassSizes(int64 class_sizes_normal[kMaxPages], + int64 class_sizes_returned[kMaxPages], + int64* normal_pages_in_spans, + int64* returned_pages_in_spans) { + + for (int s = 0; s < kMaxPages; s++) { + if (class_sizes_normal != NULL) { + class_sizes_normal[s] = DLL_Length(&free_[s].normal); + } + if (class_sizes_returned != NULL) { + class_sizes_returned[s] = DLL_Length(&free_[s].returned); + } + } + + if (normal_pages_in_spans != NULL) { + *normal_pages_in_spans = 0; + for (Span* s = large_.normal.next; s != &large_.normal; s = s->next) { + *normal_pages_in_spans += s->length;; + } + } + + if (returned_pages_in_spans != NULL) { + *returned_pages_in_spans = 0; + for (Span* s = large_.returned.next; s != &large_.returned; s = s->next) { + *returned_pages_in_spans += s->length; + } + } +} + void PageHeap::Dump(TCMalloc_Printer* out) { int nonempty_sizes = 0; for (int s = 0; s < kMaxPages; s++) { @@ -407,8 +448,9 @@ void PageHeap::Dump(TCMalloc_Printer* out) { } } out->printf("------------------------------------------------\n"); - out->printf("PageHeap: %d sizes; %6.1f MB free; %6.1f MB unmapped\n", - nonempty_sizes, MB(stats_.free_bytes), MB(stats_.unmapped_bytes)); + out->printf("PageHeap: %d sizes; %6.1f MiB free; %6.1f MiB unmapped\n", + nonempty_sizes, MiB(stats_.free_bytes), + MiB(stats_.unmapped_bytes)); out->printf("------------------------------------------------\n"); uint64_t total_normal = 0; uint64_t total_returned = 0; @@ -420,14 +462,14 @@ void PageHeap::Dump(TCMalloc_Printer* out) { uint64_t r_pages = s * r_length; total_normal += n_pages; total_returned += r_pages; - out->printf("%6u pages * %6u spans ~ %6.1f MB; %6.1f MB cum" - "; unmapped: %6.1f MB; %6.1f MB cum\n", + out->printf("%6u pages * %6u spans ~ %6.1f MiB; %6.1f MiB cum" + "; unmapped: %6.1f MiB; %6.1f MiB cum\n", s, (n_length + r_length), - PagesToMB(n_pages + r_pages), - PagesToMB(total_normal + total_returned), - PagesToMB(r_pages), - PagesToMB(total_returned)); + PagesToMiB(n_pages + r_pages), + PagesToMiB(total_normal + total_returned), + PagesToMiB(r_pages), + PagesToMiB(total_returned)); } } @@ -437,27 +479,27 @@ void PageHeap::Dump(TCMalloc_Printer* out) { int r_spans = 0; out->printf("Normal large spans:\n"); for (Span* s = large_.normal.next; s != &large_.normal; s = s->next) { - out->printf(" [ %6" PRIuPTR " pages ] %6.1f MB\n", - s->length, PagesToMB(s->length)); + out->printf(" [ %6" PRIuPTR " pages ] %6.1f MiB\n", + s->length, PagesToMiB(s->length)); n_pages += s->length; n_spans++; } out->printf("Unmapped large spans:\n"); for (Span* s = large_.returned.next; s != &large_.returned; s = s->next) { - out->printf(" [ %6" PRIuPTR " pages ] %6.1f MB\n", - s->length, PagesToMB(s->length)); + out->printf(" [ %6" PRIuPTR " pages ] %6.1f MiB\n", + s->length, PagesToMiB(s->length)); r_pages += s->length; r_spans++; } total_normal += n_pages; total_returned += r_pages; - out->printf(">255 large * %6u spans ~ %6.1f MB; %6.1f MB cum" - "; unmapped: %6.1f MB; %6.1f MB cum\n", + out->printf(">255 large * %6u spans ~ %6.1f MiB; %6.1f MiB cum" + "; unmapped: %6.1f MiB; %6.1f MiB cum\n", (n_spans + r_spans), - PagesToMB(n_pages + r_pages), - PagesToMB(total_normal + total_returned), - PagesToMB(r_pages), - PagesToMB(total_returned)); + PagesToMiB(n_pages + r_pages), + PagesToMiB(total_normal + total_returned), + PagesToMiB(r_pages), + PagesToMiB(total_returned)); } bool PageHeap::GetNextRange(PageID start, base::MallocRange* r) { diff --git a/third_party/tcmalloc/chromium/src/page_heap.h b/third_party/tcmalloc/chromium/src/page_heap.h index 52acedb..ab7a541 100644 --- a/third_party/tcmalloc/chromium/src/page_heap.h +++ b/third_party/tcmalloc/chromium/src/page_heap.h @@ -34,7 +34,12 @@ #define TCMALLOC_PAGE_HEAP_H_ #include <config.h> +#include <stddef.h> // for size_t +#ifdef HAVE_STDINT_H +#include <stdint.h> // for uint64_t, int64_t, uint16_t +#endif #include <google/malloc_extension.h> +#include "base/basictypes.h" #include "common.h" #include "packed-cache-inl.h" #include "pagemap.h" @@ -50,6 +55,8 @@ // This #ifdef should almost never be set. Set NO_TCMALLOC_SAMPLES if // you're porting to a system where you really can't get a stacktrace. +// Because we control the definition of GetStackTrace, all clients of +// GetStackTrace should #include us rather than stacktrace.h. #ifdef NO_TCMALLOC_SAMPLES // We use #define so code compiles even if you #include stacktrace.h somehow. # define GetStackTrace(stack, depth, skip) (0) @@ -57,6 +64,11 @@ # include <google/stacktrace.h> #endif +class TCMalloc_Printer; +namespace base { +struct MallocRange; +} + namespace tcmalloc { // ------------------------------------------------------------------------- @@ -150,6 +162,10 @@ class PERFTOOLS_DLL_DECL PageHeap { }; inline Stats stats() const { return stats_; } + void GetClassSizes(int64 class_sizes_normal[kMaxPages], + int64 class_sizes_returned[kMaxPages], + int64* normal_pages_in_spans, + int64* returned_pages_in_spans); bool Check(); // Like Check() but does some more comprehensive checking. @@ -186,11 +202,8 @@ class PERFTOOLS_DLL_DECL PageHeap { // should keep this value big because various incarnations of Linux // have small limits on the number of mmap() regions per // address-space. - static const int kMinSystemAlloc = 1 << (20 - kPageShift); - - // For all span-lengths < kMaxPages we keep an exact-size list. - // REQUIRED: kMaxPages >= kMinSystemAlloc; - static const size_t kMaxPages = kMinSystemAlloc; + // REQUIRED: kMinSystemAlloc <= kMaxPages; + static const int kMinSystemAlloc = kMaxPages; // Never delay scavenging for more than the following number of // deallocated pages. With 4K pages, this comes to 4GB of @@ -204,8 +217,8 @@ class PERFTOOLS_DLL_DECL PageHeap { static const int kDefaultReleaseDelay = 1 << 12; // Pick the appropriate map and cache types based on pointer size - typedef MapSelector<8*sizeof(uintptr_t)>::Type PageMap; - typedef MapSelector<8*sizeof(uintptr_t)>::CacheType PageMapCache; + typedef MapSelector<kAddressBits>::Type PageMap; + typedef MapSelector<kAddressBits>::CacheType PageMapCache; PageMap pagemap_; mutable PageMapCache pagemap_cache_; @@ -225,6 +238,8 @@ class PERFTOOLS_DLL_DECL PageHeap { // Statistics on system, free, and unmapped bytes Stats stats_; + Span* SearchFreeAndLargeLists(Length n); + bool GrowHeap(Length n); // REQUIRES: span->length >= n diff --git a/third_party/tcmalloc/chromium/src/page_heap_allocator.h b/third_party/tcmalloc/chromium/src/page_heap_allocator.h index 3f75939..bcff8b3 100644 --- a/third_party/tcmalloc/chromium/src/page_heap_allocator.h +++ b/third_party/tcmalloc/chromium/src/page_heap_allocator.h @@ -33,6 +33,11 @@ #ifndef TCMALLOC_PAGE_HEAP_ALLOCATOR_H_ #define TCMALLOC_PAGE_HEAP_ALLOCATOR_H_ +#include <stddef.h> // for NULL, size_t + +#include "common.h" // for MetaDataAlloc +#include "internal_logging.h" // for ASSERT, CRASH + namespace tcmalloc { // Simple allocator for objects of a specified type. External locking diff --git a/third_party/tcmalloc/chromium/src/pagemap.h b/third_party/tcmalloc/chromium/src/pagemap.h index c8540f7..0db01c4 100644 --- a/third_party/tcmalloc/chromium/src/pagemap.h +++ b/third_party/tcmalloc/chromium/src/pagemap.h @@ -46,6 +46,9 @@ #define TCMALLOC_PAGEMAP_H_ #include "config.h" + +#include <stddef.h> // for NULL, size_t +#include <string.h> // for memset #if defined HAVE_STDINT_H #include <stdint.h> #elif defined HAVE_INTTYPES_H @@ -59,7 +62,7 @@ #include "common.h" #endif -#include "internal_logging.h" +#include "internal_logging.h" // for ASSERT // Single-level array template <int BITS> diff --git a/third_party/tcmalloc/chromium/src/pprof b/third_party/tcmalloc/chromium/src/pprof index d70ee30..03bafa4 100755 --- a/third_party/tcmalloc/chromium/src/pprof +++ b/third_party/tcmalloc/chromium/src/pprof @@ -72,7 +72,7 @@ use strict; use warnings; use Getopt::Long; -my $PPROF_VERSION = "1.5"; +my $PPROF_VERSION = "1.7"; # These are the object tools we use which can come from a # user-specified location using --tools, from the PPROF_TOOLS @@ -89,6 +89,7 @@ my %obj_tool_map = ( ); my $DOT = "dot"; # leave non-absolute, since it may be in /usr/local my $GV = "gv"; +my $EVINCE = "evince"; # could also be xpdf or perhaps acroread my $KCACHEGRIND = "kcachegrind"; my $PS2PDF = "ps2pdf"; # These are used for dynamic profiles @@ -103,9 +104,16 @@ my $GROWTH_PAGE = "/pprof/growth"; my $CONTENTION_PAGE = "/pprof/contention"; my $WALL_PAGE = "/pprof/wall(?:\\?.*)?"; # accepts options like namefilter my $FILTEREDPROFILE_PAGE = "/pprof/filteredprofile(?:\\?.*)?"; +my $CENSUSPROFILE_PAGE = "/pprof/censusprofile"; # must support "?seconds=#" my $SYMBOL_PAGE = "/pprof/symbol"; # must support symbol lookup via POST my $PROGRAM_NAME_PAGE = "/pprof/cmdline"; +# These are the web pages that can be named on the command line. +# All the alternatives must begin with /. +my $PROFILES = "($HEAP_PAGE|$PROFILE_PAGE|$PMUPROFILE_PAGE|" . + "$GROWTH_PAGE|$CONTENTION_PAGE|$WALL_PAGE|" . + "$FILTEREDPROFILE_PAGE|$CENSUSPROFILE_PAGE)"; + # default binary name my $UNKNOWN_BINARY = "(unknown)"; @@ -114,6 +122,11 @@ my $UNKNOWN_BINARY = "(unknown)"; # 64-bit profiles. To err on the safe size, default to 64-bit here: my $address_length = 16; +my $dev_null = "/dev/null"; +if (! -e $dev_null && $^O =~ /MSWin/) { # $^O is the OS perl was built for + $dev_null = "nul"; +} + # A list of paths to search for shared object files my @prefix_list = (); @@ -142,7 +155,7 @@ pprof [options] <profile> The /<service> can be $HEAP_PAGE, $PROFILE_PAGE, /pprof/pmuprofile, $GROWTH_PAGE, $CONTENTION_PAGE, /pprof/wall, - or /pprof/filteredprofile. + $CENSUSPROFILE_PAGE, or /pprof/filteredprofile. For instance: "pprof http://myserver.com:80$HEAP_PAGE". If /<service> is omitted, the service defaults to $PROFILE_PAGE (cpu profiling). pprof --symbols <program> @@ -174,6 +187,7 @@ Output type: --text Generate text report --callgrind Generate callgrind format to stdout --gv Generate Postscript and display + --evince Generate PDF and display --web Generate SVG and display --list=<regexp> Generate source listing of matching routines --disasm=<regexp> Generate disassembly of matching routines @@ -202,6 +216,7 @@ Call-graph Options: --nodecount=<n> Show at most so many nodes [default=80] --nodefraction=<f> Hide nodes below <f>*total [default=.005] --edgefraction=<f> Hide edges below <f>*total [default=.001] + --maxdegree=<n> Max incoming/outgoing edges per node [default=8] --focus=<regexp> Focus on nodes matching <regexp> --ignore=<regexp> Ignore nodes matching <regexp> --scale=<n> Set GV scaling [default=0] @@ -209,7 +224,7 @@ Call-graph Options: (i.e. direct leak generators) more visible Miscellaneous: - --tools=<prefix> Prefix for object tool pathnames + --tools=<prefix or binary:fullpath>[,...] \$PATH for object tool pathnames --test Run unit tests --help This message --version Version information @@ -298,6 +313,7 @@ sub Init() { $main::opt_disasm = ""; $main::opt_symbols = 0; $main::opt_gv = 0; + $main::opt_evince = 0; $main::opt_web = 0; $main::opt_dot = 0; $main::opt_ps = 0; @@ -309,6 +325,7 @@ sub Init() { $main::opt_nodecount = 80; $main::opt_nodefraction = 0.005; $main::opt_edgefraction = 0.001; + $main::opt_maxdegree = 8; $main::opt_focus = ''; $main::opt_ignore = ''; $main::opt_scale = 0; @@ -366,6 +383,7 @@ sub Init() { "disasm=s" => \$main::opt_disasm, "symbols!" => \$main::opt_symbols, "gv!" => \$main::opt_gv, + "evince!" => \$main::opt_evince, "web!" => \$main::opt_web, "dot!" => \$main::opt_dot, "ps!" => \$main::opt_ps, @@ -377,6 +395,7 @@ sub Init() { "nodecount=i" => \$main::opt_nodecount, "nodefraction=f" => \$main::opt_nodefraction, "edgefraction=f" => \$main::opt_edgefraction, + "maxdegree=i" => \$main::opt_maxdegree, "focus=s" => \$main::opt_focus, "ignore=s" => \$main::opt_ignore, "scale=i" => \$main::opt_scale, @@ -446,6 +465,7 @@ sub Init() { ($main::opt_disasm eq '' ? 0 : 1) + ($main::opt_symbols == 0 ? 0 : 1) + $main::opt_gv + + $main::opt_evince + $main::opt_web + $main::opt_dot + $main::opt_ps + @@ -588,6 +608,10 @@ sub Main() { } elsif ($main::use_symbol_page) { $symbols = FetchSymbols($pcs); } else { + # TODO(csilvers): $libs uses the /proc/self/maps data from profile1, + # which may differ from the data from subsequent profiles, especially + # if they were run on different machines. Use appropriate libs for + # each pc somehow. $symbols = ExtractSymbols($libs, $pcs); } @@ -617,7 +641,7 @@ sub Main() { # Print if (!$main::opt_interactive) { if ($main::opt_disasm) { - PrintDisassembly($libs, $flat, $cumulative, $main::opt_disasm, $total); + PrintDisassembly($libs, $flat, $cumulative, $main::opt_disasm); } elsif ($main::opt_list) { PrintListing($libs, $flat, $cumulative, $main::opt_list); } elsif ($main::opt_text) { @@ -627,7 +651,7 @@ sub Main() { if ($total != 0) { printf("Total: %s %s\n", Unparse($total), Units()); } - PrintText($symbols, $flat, $cumulative, $total, -1); + PrintText($symbols, $flat, $cumulative, -1); } elsif ($main::opt_raw) { PrintSymbolizedProfile($symbols, $profile, $main::prog); } elsif ($main::opt_callgrind) { @@ -636,6 +660,8 @@ sub Main() { if (PrintDot($main::prog, $symbols, $profile, $flat, $cumulative, $total)) { if ($main::opt_gv) { RunGV(TempName($main::next_tmpfile, "ps"), ""); + } elsif ($main::opt_evince) { + RunEvince(TempName($main::next_tmpfile, "pdf"), ""); } elsif ($main::opt_web) { my $tmp = TempName($main::next_tmpfile, "svg"); RunWeb($tmp); @@ -684,7 +710,7 @@ sub ReadlineMightFail { sub RunGV { my $fname = shift; my $bg = shift; # "" or " &" if we should run in background - if (!system("$GV --version >/dev/null 2>&1")) { + if (!system("$GV --version >$dev_null 2>&1")) { # Options using double dash are supported by this gv version. # Also, turn on noantialias to better handle bug in gv for # postscript files with large dimensions. @@ -698,6 +724,12 @@ sub RunGV { } } +sub RunEvince { + my $fname = shift; + my $bg = shift; # "" or " &" if we should run in background + system("$EVINCE " . $fname . $bg); +} + sub RunWeb { my $fname = shift; print STDERR "Loading web page file:///$fname\n"; @@ -718,10 +750,8 @@ sub RunWeb { "firefox", ); foreach my $b (@alt) { - if (-f $b) { - if (system($b, $fname) == 0) { - return; - } + if (system($b, $fname) == 0) { + return; } } @@ -797,6 +827,7 @@ sub InteractiveCommand { $main::opt_disasm = 0; $main::opt_list = 0; $main::opt_gv = 0; + $main::opt_evince = 0; $main::opt_cum = 0; if (m/^\s*(text|top)(\d*)\s*(.*)/) { @@ -815,7 +846,7 @@ sub InteractiveCommand { my $flat = FlatProfile($reduced); my $cumulative = CumulativeProfile($reduced); - PrintText($symbols, $flat, $cumulative, $total, $line_limit); + PrintText($symbols, $flat, $cumulative, $line_limit); return 1; } if (m/^\s*callgrind\s*([^ \n]*)/) { @@ -867,14 +898,17 @@ sub InteractiveCommand { my $flat = FlatProfile($reduced); my $cumulative = CumulativeProfile($reduced); - PrintDisassembly($libs, $flat, $cumulative, $routine, $total); + PrintDisassembly($libs, $flat, $cumulative, $routine); return 1; } - if (m/^\s*(gv|web)\s*(.*)/) { + if (m/^\s*(gv|web|evince)\s*(.*)/) { $main::opt_gv = 0; + $main::opt_evince = 0; $main::opt_web = 0; if ($1 eq "gv") { $main::opt_gv = 1; + } elsif ($1 eq "evince") { + $main::opt_evince = 1; } elsif ($1 eq "web") { $main::opt_web = 1; } @@ -894,6 +928,8 @@ sub InteractiveCommand { if (PrintDot($main::prog, $symbols, $profile, $flat, $cumulative, $total)) { if ($main::opt_gv) { RunGV(TempName($main::next_tmpfile, "ps"), " &"); + } elsif ($main::opt_evince) { + RunEvince(TempName($main::next_tmpfile, "pdf"), " &"); } elsif ($main::opt_web) { RunWeb(TempName($main::next_tmpfile, "svg")); } @@ -1106,9 +1142,10 @@ sub PrintText { my $symbols = shift; my $flat = shift; my $cumulative = shift; - my $total = shift; my $line_limit = shift; + my $total = TotalProfile($flat); + # Which profile to sort by? my $s = $main::opt_cum ? $cumulative : $flat; @@ -1183,7 +1220,8 @@ sub PrintDisassembly { my $flat = shift; my $cumulative = shift; my $disasm_opts = shift; - my $total = shift; + + my $total = TotalProfile($flat); foreach my $lib (@{$libs}) { my $symbol_table = GetProcedureBoundaries($lib->[0], $disasm_opts); @@ -1677,6 +1715,8 @@ sub PrintDot { my $output; if ($main::opt_gv) { $output = "| $DOT -Tps2 >" . TempName($main::next_tmpfile, "ps"); + } elsif ($main::opt_evince) { + $output = "| $DOT -Tps2 | $PS2PDF - " . TempName($main::next_tmpfile, "pdf"); } elsif ($main::opt_ps) { $output = "| $DOT -Tps2"; } elsif ($main::opt_pdf) { @@ -1737,7 +1777,7 @@ sub PrintDot { if ($f != $c) { $extra = sprintf("\\rof %s (%s)", Unparse($c), - Percent($c, $overall_total)); + Percent($c, $local_total)); } my $style = ""; if ($main::opt_heapcheck) { @@ -1756,7 +1796,7 @@ sub PrintDot { $node{$a}, $sym, Unparse($f), - Percent($f, $overall_total), + Percent($f, $local_total), $extra, $fs, $style, @@ -1784,12 +1824,38 @@ sub PrintDot { } } - # Print edges - foreach my $e (keys(%edge)) { + # Print edges (process in order of decreasing counts) + my %indegree = (); # Number of incoming edges added per node so far + my %outdegree = (); # Number of outgoing edges added per node so far + foreach my $e (sort { $edge{$b} <=> $edge{$a} } keys(%edge)) { my @x = split(/\001/, $e); $n = $edge{$e}; - if (abs($n) > $edgelimit) { + # Initialize degree of kept incoming and outgoing edges if necessary + my $src = $x[0]; + my $dst = $x[1]; + if (!exists($outdegree{$src})) { $outdegree{$src} = 0; } + if (!exists($indegree{$dst})) { $indegree{$dst} = 0; } + + my $keep; + if ($indegree{$dst} == 0) { + # Keep edge if needed for reachability + $keep = 1; + } elsif (abs($n) <= $edgelimit) { + # Drop if we are below --edgefraction + $keep = 0; + } elsif ($outdegree{$src} >= $main::opt_maxdegree || + $indegree{$dst} >= $main::opt_maxdegree) { + # Keep limited number of in/out edges per node + $keep = 0; + } else { + $keep = 1; + } + + if ($keep) { + $outdegree{$src}++; + $indegree{$dst}++; + # Compute line width based on edge count my $fraction = abs($local_total ? (3 * ($n / $local_total)) : 0); if ($fraction > 1) { $fraction = 1; } @@ -2127,6 +2193,19 @@ function handleMouseUp(evt) { EOF } +# Return a small number that identifies the argument. +# Multiple calls with the same argument will return the same number. +# Calls with different arguments will return different numbers. +sub ShortIdFor { + my $key = shift; + my $id = $main::uniqueid{$key}; + if (!defined($id)) { + $id = keys(%main::uniqueid) + 1; + $main::uniqueid{$key} = $id; + } + return $id; +} + # Translate a stack of addresses into a stack of symbols sub TranslateStack { my $symbols = shift; @@ -2164,6 +2243,15 @@ sub TranslateStack { if ($j > 2) { $func = "$func (inline)"; } + + # Do not merge nodes corresponding to Callback::Run since that + # causes confusing cycles in dot display. Instead, we synthesize + # a unique name for this frame per caller. + if ($func =~ m/Callback.*::Run$/) { + my $caller = ($i > 0) ? $addrs[$i-1] : 0; + $func = "Run#" . ShortIdFor($caller); + } + if ($main::opt_addresses) { push(@result, "$a $func $fileline"); } elsif ($main::opt_lines) { @@ -2407,7 +2495,16 @@ sub RemoveUninterestingFrames { # old code out of the system. $skip_regexp = "TCMalloc|^tcmalloc::"; } elsif ($main::profile_type eq 'contention') { - foreach my $vname ('Mutex::Unlock', 'Mutex::UnlockSlow') { + foreach my $vname ('base::RecordLockProfileData', + 'base::SubmitMutexProfileData', + 'base::SubmitSpinLockProfileData', + 'Mutex::Unlock', + 'Mutex::UnlockSlow', + 'Mutex::ReaderUnlock', + 'MutexLock::~MutexLock', + 'SpinLock::Unlock', + 'SpinLock::SlowUnlock', + 'SpinLockHolder::~SpinLockHolder') { $skip{$vname} = 1; } } elsif ($main::profile_type eq 'cpu') { @@ -2704,32 +2801,44 @@ sub CheckSymbolPage { sub IsProfileURL { my $profile_name = shift; - my ($host, $port, $prefix, $path) = ParseProfileURL($profile_name); - return defined($host) and defined($port) and defined($path); + if (-f $profile_name) { + printf STDERR "Using local file $profile_name.\n"; + return 0; + } + return 1; } sub ParseProfileURL { my $profile_name = shift; - if (defined($profile_name) && - $profile_name =~ m,^(http://|)([^/:]+):(\d+)(|\@\d+)(|/|(.*?)($PROFILE_PAGE|$PMUPROFILE_PAGE|$HEAP_PAGE|$GROWTH_PAGE|$CONTENTION_PAGE|$WALL_PAGE|$FILTEREDPROFILE_PAGE))$,o) { - # $7 is $PROFILE_PAGE/$HEAP_PAGE/etc. $5 is *everything* after - # the hostname, as long as that everything is the empty string, - # a slash, or something ending in $PROFILE_PAGE/$HEAP_PAGE/etc. - # So "$7 || $5" is $PROFILE_PAGE/etc if there, or else it's "/" or "". - return ($2, $3, $6, $7 || $5); - } - return (); + + if (!defined($profile_name) || $profile_name eq "") { + return (); + } + + # Split profile URL - matches all non-empty strings, so no test. + $profile_name =~ m,^(https?://)?([^/]+)(.*?)(/|$PROFILES)?$,; + + my $proto = $1 || "http://"; + my $hostport = $2; + my $prefix = $3; + my $profile = $4 || "/"; + + my $host = $hostport; + $host =~ s/:.*//; + + my $baseurl = "$proto$hostport$prefix"; + return ($host, $baseurl, $profile); } # We fetch symbols from the first profile argument. sub SymbolPageURL { - my ($host, $port, $prefix, $path) = ParseProfileURL($main::pfile_args[0]); - return "http://$host:$port$prefix$SYMBOL_PAGE"; + my ($host, $baseURL, $path) = ParseProfileURL($main::pfile_args[0]); + return "$baseURL$SYMBOL_PAGE"; } sub FetchProgramName() { - my ($host, $port, $prefix, $path) = ParseProfileURL($main::pfile_args[0]); - my $url = "http://$host:$port$prefix$PROGRAM_NAME_PAGE"; + my ($host, $baseURL, $path) = ParseProfileURL($main::pfile_args[0]); + my $url = "$baseURL$PROGRAM_NAME_PAGE"; my $command_line = "$URL_FETCHER '$url'"; open(CMDLINE, "$command_line |") or error($command_line); my $cmdline = <CMDLINE>; @@ -2880,10 +2989,10 @@ sub BaseName { sub MakeProfileBaseName { my ($binary_name, $profile_name) = @_; - my ($host, $port, $prefix, $path) = ParseProfileURL($profile_name); + my ($host, $baseURL, $path) = ParseProfileURL($profile_name); my $binary_shortname = BaseName($binary_name); - return sprintf("%s.%s.%s-port%s", - $binary_shortname, $main::op_time, $host, $port); + return sprintf("%s.%s.%s", + $binary_shortname, $main::op_time, $host); } sub FetchDynamicProfile { @@ -2895,7 +3004,7 @@ sub FetchDynamicProfile { if (!IsProfileURL($profile_name)) { return $profile_name; } else { - my ($host, $port, $prefix, $path) = ParseProfileURL($profile_name); + my ($host, $baseURL, $path) = ParseProfileURL($profile_name); if ($path eq "" || $path eq "/") { # Missing type specifier defaults to cpu-profile $path = $PROFILE_PAGE; @@ -2903,33 +3012,26 @@ sub FetchDynamicProfile { my $profile_file = MakeProfileBaseName($binary_name, $profile_name); - my $url; + my $url = "$baseURL$path"; my $fetch_timeout = undef; - if (($path =~ m/$PROFILE_PAGE/) || ($path =~ m/$PMUPROFILE_PAGE/)) { - if ($path =~ m/$PROFILE_PAGE/) { - $url = sprintf("http://$host:$port$prefix$path?seconds=%d", - $main::opt_seconds); + if ($path =~ m/$PROFILE_PAGE|$PMUPROFILE_PAGE/) { + if ($path =~ m/[?]/) { + $url .= "&"; } else { - if ($profile_name =~ m/[?]/) { - $profile_name .= "&" - } else { - $profile_name .= "?" - } - $url = sprintf("http://$profile_name" . "seconds=%d", - $main::opt_seconds); + $url .= "?"; } + $url .= sprintf("seconds=%d", $main::opt_seconds); $fetch_timeout = $main::opt_seconds * 1.01 + 60; } else { # For non-CPU profiles, we add a type-extension to # the target profile file name. my $suffix = $path; $suffix =~ s,/,.,g; - $profile_file .= "$suffix"; - $url = "http://$host:$port$prefix$path"; + $profile_file .= $suffix; } my $profile_dir = $ENV{"PPROF_TMPDIR"} || ($ENV{HOME} . "/pprof"); - if (!(-d $profile_dir)) { + if (! -d $profile_dir) { mkdir($profile_dir) || die("Unable to create profile directory $profile_dir: $!\n"); } @@ -2942,13 +3044,13 @@ sub FetchDynamicProfile { my $fetcher = AddFetchTimeout($URL_FETCHER, $fetch_timeout); my $cmd = "$fetcher '$url' > '$tmp_profile'"; - if (($path =~ m/$PROFILE_PAGE/) || ($path =~ m/$PMUPROFILE_PAGE/)){ + if ($path =~ m/$PROFILE_PAGE|$PMUPROFILE_PAGE|$CENSUSPROFILE_PAGE/){ print STDERR "Gathering CPU profile from $url for $main::opt_seconds seconds to\n ${real_profile}\n"; if ($encourage_patience) { print STDERR "Be patient...\n"; } } else { - print STDERR "Fetching $path profile from $host:$port to\n ${real_profile}\n"; + print STDERR "Fetching $path profile from $url to\n ${real_profile}\n"; } (system($cmd) == 0) || error("Failed to get profile: $cmd: $!\n"); @@ -3034,6 +3136,7 @@ BEGIN { stride => 512 * 1024, # must be a multiple of bitsize/8 slots => [], unpack_code => "", # N for big-endian, V for little + perl_is_64bit => 1, # matters if profile is 64-bit }; bless $self, $class; # Let unittests adjust the stride @@ -3057,17 +3160,15 @@ BEGIN { } @$slots = unpack($self->{unpack_code} . "*", $str); } else { - # If we're a 64-bit profile, make sure we're a 64-bit-capable + # If we're a 64-bit profile, check if we're a 64-bit-capable # perl. Otherwise, each slot will be represented as a float # instead of an int64, losing precision and making all the - # 64-bit addresses right. We *could* try to handle this with - # software emulation of 64-bit ints, but that's added complexity - # for no clear benefit (yet). We use 'Q' to test for 64-bit-ness; - # perl docs say it's only available on 64-bit perl systems. + # 64-bit addresses wrong. We won't complain yet, but will + # later if we ever see a value that doesn't fit in 32 bits. my $has_q = 0; eval { $has_q = pack("Q", "1") ? 1 : 1; }; if (!$has_q) { - ::error("$fname: need a 64-bit perl to process this 64-bit profile.\n"); + $self->{perl_is_64bit} = 0; } read($self->{file}, $str, 8); if (substr($str, 4, 4) eq chr(0)x4) { @@ -3103,11 +3204,17 @@ BEGIN { # TODO(csilvers): if this is a 32-bit perl, the math below # could end up in a too-large int, which perl will promote # to a double, losing necessary precision. Deal with that. - if ($self->{unpack_code} eq 'V') { # little-endian - push(@b64_values, $b32_values[$i] + $b32_values[$i+1] * (2**32)); - } else { - push(@b64_values, $b32_values[$i] * (2**32) + $b32_values[$i+1]); - } + # Right now, we just die. + my ($lo, $hi) = ($b32_values[$i], $b32_values[$i+1]); + if ($self->{unpack_code} eq 'N') { # big-endian + ($lo, $hi) = ($hi, $lo); + } + my $value = $lo + $hi * (2**32); + if (!$self->{perl_is_64bit} && # check value is exactly represented + (($value % (2**32)) != $lo || int($value / (2**32)) != $hi)) { + ::error("Need a 64-bit perl to process this 64-bit profile.\n"); + } + push(@b64_values, $value); } @$slots = @b64_values; } @@ -3136,24 +3243,47 @@ BEGIN { } } -# Return the next line from the profile file, assuming it's a text -# line (which in this case means, doesn't start with a NUL byte). If -# it's not a text line, return "". At EOF, return undef, like perl does. -# Input file should be in binmode. -sub ReadProfileLine { +# Reads the top, 'header' section of a profile, and returns the last +# line of the header, commonly called a 'header line'. The header +# section of a profile consists of zero or more 'command' lines that +# are instructions to pprof, which pprof executes when reading the +# header. All 'command' lines start with a %. After the command +# lines is the 'header line', which is a profile-specific line that +# indicates what type of profile it is, and perhaps other global +# information about the profile. For instance, here's a header line +# for a heap profile: +# heap profile: 53: 38236 [ 5525: 1284029] @ heapprofile +# For historical reasons, the CPU profile does not contain a text- +# readable header line. If the profile looks like a CPU profile, +# this function returns "". If no header line could be found, this +# function returns undef. +# +# The following commands are recognized: +# %warn -- emit the rest of this line to stderr, prefixed by 'WARNING:' +# +# The input file should be in binmode. +sub ReadProfileHeader { local *PROFILE = shift; my $firstchar = ""; my $line = ""; read(PROFILE, $firstchar, 1); - seek(PROFILE, -1, 1); # unread the firstchar - if ($firstchar eq "\0") { + seek(PROFILE, -1, 1); # unread the firstchar + if ($firstchar !~ /[[:print:]]/) { # is not a text character return ""; } - $line = <PROFILE>; - if (defined($line)) { + while (defined($line = <PROFILE>)) { $line =~ s/\r//g; # turn windows-looking lines into unix-looking lines + if ($line =~ /^%warn\s+(.*)/) { # 'warn' command + # Note this matches both '%warn blah\n' and '%warn\n'. + print STDERR "WARNING: $1\n"; # print the rest of the line + } elsif ($line =~ /^%/) { + print STDERR "Ignoring unknown command from profile header: $line"; + } else { + # End of commands, must be the header line. + return $line; + } } - return $line; + return undef; # got to EOF without seeing a header line } sub IsSymbolizedProfileFile { @@ -3164,7 +3294,7 @@ sub IsSymbolizedProfileFile { # Check if the file contains a symbol-section marker. open(TFILE, "<$file_name"); binmode TFILE; - my $firstline = ReadProfileLine(*TFILE); + my $firstline = ReadProfileHeader(*TFILE); close(TFILE); if (!$firstline) { return 0; @@ -3184,14 +3314,7 @@ sub IsSymbolizedProfileFile { sub ReadProfile { my $prog = shift; my $fname = shift; - - if (IsSymbolizedProfileFile($fname) && !$main::use_symbolized_profile) { - # we have both a binary and symbolized profiles, abort - usage("Symbolized profile '$fname' cannot be used with a binary arg. " . - "Try again without passing '$prog'."); - } - - $main::profile_type = ''; + my $result; # return value $CONTENTION_PAGE =~ m,[^/]+$,; # matches everything after the last slash my $contention_marker = $&; @@ -3208,40 +3331,45 @@ sub ReadProfile { # whole firstline, since it may be gigabytes(!) of data. open(PROFILE, "<$fname") || error("$fname: $!\n"); binmode PROFILE; # New perls do UTF-8 processing - my $header = ReadProfileLine(*PROFILE); + my $header = ReadProfileHeader(*PROFILE); if (!defined($header)) { # means "at EOF" error("Profile is empty.\n"); } my $symbols; if ($header =~ m/^--- *$symbol_marker/o) { + # Verify that the user asked for a symbolized profile + if (!$main::use_symbolized_profile) { + # we have both a binary and symbolized profiles, abort + error("FATAL ERROR: Symbolized profile\n $fname\ncannot be used with " . + "a binary arg. Try again without passing\n $prog\n"); + } # Read the symbol section of the symbolized profile file. $symbols = ReadSymbols(*PROFILE{IO}); # Read the next line to get the header for the remaining profile. - $header = ReadProfileLine(*PROFILE) || ""; + $header = ReadProfileHeader(*PROFILE) || ""; } - my $result; - + $main::profile_type = ''; if ($header =~ m/^heap profile:.*$growth_marker/o) { $main::profile_type = 'growth'; - $result = ReadHeapProfile($prog, $fname, $header); + $result = ReadHeapProfile($prog, *PROFILE, $header); } elsif ($header =~ m/^heap profile:/) { $main::profile_type = 'heap'; - $result = ReadHeapProfile($prog, $fname, $header); + $result = ReadHeapProfile($prog, *PROFILE, $header); } elsif ($header =~ m/^--- *$contention_marker/o) { $main::profile_type = 'contention'; - $result = ReadSynchProfile($prog, $fname); + $result = ReadSynchProfile($prog, *PROFILE); } elsif ($header =~ m/^--- *Stacks:/) { print STDERR "Old format contention profile: mistakenly reports " . "condition variable signals as lock contentions.\n"; $main::profile_type = 'contention'; - $result = ReadSynchProfile($prog, $fname); + $result = ReadSynchProfile($prog, *PROFILE); } elsif ($header =~ m/^--- *$profile_marker/) { # the binary cpu profile data starts immediately after this line $main::profile_type = 'cpu'; - $result = ReadCPUProfile($prog, $fname); + $result = ReadCPUProfile($prog, $fname, *PROFILE); } else { if (defined($symbols)) { # a symbolized profile contains a format we don't recognize, bail out @@ -3249,9 +3377,11 @@ sub ReadProfile { } # no ascii header present -- must be a CPU profile $main::profile_type = 'cpu'; - $result = ReadCPUProfile($prog, $fname); + $result = ReadCPUProfile($prog, $fname, *PROFILE); } + close(PROFILE); + # if we got symbols along with the profile, return those as well if (defined($symbols)) { $result->{symbols} = $symbols; @@ -3290,7 +3420,8 @@ sub FixCallerAddresses { # CPU profile reader sub ReadCPUProfile { my $prog = shift; - my $fname = shift; + my $fname = shift; # just used for logging + local *PROFILE = shift; my $version; my $period; my $i; @@ -3357,7 +3488,6 @@ sub ReadCPUProfile { my $map = ''; seek(PROFILE, $i * 4, 0); read(PROFILE, $map, (stat PROFILE)[7]); - close(PROFILE); my $r = {}; $r->{version} = $version; @@ -3371,7 +3501,7 @@ sub ReadCPUProfile { sub ReadHeapProfile { my $prog = shift; - my $fname = shift; + local *PROFILE = shift; my $header = shift; my $index = 1; @@ -3513,16 +3643,18 @@ sub ReadHeapProfile { # The sampling frequency is the rate of a Poisson process. # This means that the probability of sampling an allocation of # size X with sampling rate Y is 1 - exp(-X/Y) - my $ratio; - $ratio = (($s1*1.0)/$n1)/($sample_adjustment); - my $scale_factor; - $scale_factor = 1/(1 - exp(-$ratio)); - $n1 *= $scale_factor; - $s1 *= $scale_factor; - $ratio = (($s2*1.0)/$n2)/($sample_adjustment); - $scale_factor = 1/(1 - exp(-$ratio)); - $n2 *= $scale_factor; - $s2 *= $scale_factor; + if ($n1 != 0) { + my $ratio = (($s1*1.0)/$n1)/($sample_adjustment); + my $scale_factor = 1/(1 - exp(-$ratio)); + $n1 *= $scale_factor; + $s1 *= $scale_factor; + } + if ($n2 != 0) { + my $ratio = (($s2*1.0)/$n2)/($sample_adjustment); + my $scale_factor = 1/(1 - exp(-$ratio)); + $n2 *= $scale_factor; + $s2 *= $scale_factor; + } } else { # Remote-heap version 1 my $ratio; @@ -3554,7 +3686,9 @@ sub ReadHeapProfile { } sub ReadSynchProfile { - my ($prog, $fname, $header) = @_; + my $prog = shift; + local *PROFILE = shift; + my $header = shift; my $map = ''; my $profile = {}; @@ -3629,7 +3763,6 @@ sub ReadSynchProfile { $map .= $line; } } - close PROFILE; if (!$seen_clockrate) { printf STDERR ("No cycles/second entry in profile; Guessing %.1f GHz\n", @@ -4073,9 +4206,15 @@ sub ExtractSymbols { my $symbols = {}; - # Map each PC value to the containing library - my %seen = (); - foreach my $lib (@{$libs}) { + # Map each PC value to the containing library. To make this faster, + # we sort libraries by their starting pc value (highest first), and + # advance through the libraries as we advance the pc. Sometimes the + # addresses of libraries may overlap with the addresses of the main + # binary, so to make sure the libraries 'win', we iterate over the + # libraries in reverse order (which assumes the binary doesn't start + # in the middle of a library, which seems a fair assumption). + my @pcs = (sort { $a cmp $b } keys(%{$pcset})); # pcset is 0-extended strings + foreach my $lib (sort {$b->[1] cmp $a->[1]} @{$libs}) { my $libname = $lib->[0]; my $start = $lib->[1]; my $finish = $lib->[2]; @@ -4083,12 +4222,21 @@ sub ExtractSymbols { # Get list of pcs that belong in this library. my $contained = []; - foreach my $pc (keys(%{$pcset})) { - if (!$seen{$pc} && ($pc ge $start) && ($pc le $finish)) { - $seen{$pc} = 1; - push(@{$contained}, $pc); - } - } + my ($start_pc_index, $finish_pc_index); + # Find smallest finish_pc_index such that $finish < $pc[$finish_pc_index]. + for ($finish_pc_index = $#pcs + 1; $finish_pc_index > 0; + $finish_pc_index--) { + last if $pcs[$finish_pc_index - 1] le $finish; + } + # Find smallest start_pc_index such that $start <= $pc[$start_pc_index]. + for ($start_pc_index = $finish_pc_index; $start_pc_index > 0; + $start_pc_index--) { + last if $pcs[$start_pc_index - 1] lt $start; + } + # This keeps PC values higher than $pc[$finish_pc_index] in @pcs, + # in case there are overlaps in libraries and the main binary. + @{$contained} = splice(@pcs, $start_pc_index, + $finish_pc_index - $start_pc_index); # Map to symbols MapToSymbols($libname, AddressSub($start, $offset), $contained, $symbols); } @@ -4118,7 +4266,7 @@ sub MapToSymbols { # If "addr2line" isn't installed on the system at all, just use # nm to get what info we can (function names, but not line numbers). - if (system("$addr2line --help >/dev/null 2>&1") != 0) { + if (system("$addr2line --help >$dev_null 2>&1") != 0) { MapSymbolsWithNM($image, $offset, $pclist, $symbols); return; } @@ -4136,7 +4284,7 @@ sub MapToSymbols { if (defined($sep_address)) { # Only add " -i" to addr2line if the binary supports it. # addr2line --help returns 0, but not if it sees an unknown flag first. - if (system("$cmd -i --help >/dev/null 2>&1") == 0) { + if (system("$cmd -i --help >$dev_null 2>&1") == 0) { $cmd .= " -i"; } else { $sep_address = undef; # no need for sep_address if we don't support -i @@ -4282,8 +4430,16 @@ sub ConfigureObjTools { # predictably return error status in prod. (-e $prog_file) || error("$prog_file does not exist.\n"); - # Follow symlinks (at least for systems where "file" supports that) - my $file_type = `/usr/bin/file -L $prog_file 2>/dev/null || /usr/bin/file $prog_file`; + my $file_type = undef; + if (-e "/usr/bin/file") { + # Follow symlinks (at least for systems where "file" supports that). + $file_type = `/usr/bin/file -L $prog_file 2>$dev_null || /usr/bin/file $prog_file`; + } elsif ($^O == "MSWin32") { + $file_type = "MS Windows"; + } else { + print STDERR "WARNING: Can't determine the file type of $prog_file"; + } + if ($file_type =~ /64-bit/) { # Change $address_length to 16 if the program file is ELF 64-bit. # We can't detect this from many (most?) heap or lock contention @@ -4322,18 +4478,27 @@ sub ConfigureTool { my $tool = shift; my $path; - if ($main::opt_tools ne "") { - # Use a prefix specified by the --tools option... - $path = $main::opt_tools . $tool; - if (!-x $path) { - error("No '$tool' found with prefix specified by --tools $main::opt_tools\n"); + # --tools (or $PPROF_TOOLS) is a comma separated list, where each + # item is either a) a pathname prefix, or b) a map of the form + # <tool>:<path>. First we look for an entry of type (b) for our + # tool. If one is found, we use it. Otherwise, we consider all the + # pathname prefixes in turn, until one yields an existing file. If + # none does, we use a default path. + my $tools = $main::opt_tools || $ENV{"PPROF_TOOLS"} || ""; + if ($tools =~ m/(,|^)\Q$tool\E:([^,]*)/) { + $path = $2; + # TODO(csilvers): sanity-check that $path exists? Hard if it's relative. + } elsif ($tools ne '') { + foreach my $prefix (split(',', $tools)) { + next if ($prefix =~ /:/); # ignore "tool:fullpath" entries in the list + if (-x $prefix . $tool) { + $path = $prefix . $tool; + last; + } } - } elsif (exists $ENV{"PPROF_TOOLS"} && - $ENV{"PPROF_TOOLS"} ne "") { - #... or specified with the PPROF_TOOLS environment variable... - $path = $ENV{"PPROF_TOOLS"} . $tool; - if (!-x $path) { - error("No '$tool' found with prefix specified by PPROF_TOOLS=$ENV{PPROF_TOOLS}\n"); + if (!$path) { + error("No '$tool' found with prefix specified by " . + "--tools (or \$PPROF_TOOLS) '$tools'\n"); } } else { # ... otherwise use the version that exists in the same directory as @@ -4486,16 +4651,16 @@ sub GetProcedureBoundaries { # --demangle and -f. my $demangle_flag = ""; my $cppfilt_flag = ""; - if (system("$nm --demangle $image >/dev/null 2>&1") == 0) { + if (system("$nm --demangle $image >$dev_null 2>&1") == 0) { # In this mode, we do "nm --demangle <foo>" $demangle_flag = "--demangle"; $cppfilt_flag = ""; - } elsif (system("$cppfilt $image >/dev/null 2>&1") == 0) { + } elsif (system("$cppfilt $image >$dev_null 2>&1") == 0) { # In this mode, we do "nm <foo> | c++filt" $cppfilt_flag = " | $cppfilt"; }; my $flatten_flag = ""; - if (system("$nm -f $image >/dev/null 2>&1") == 0) { + if (system("$nm -f $image >$dev_null 2>&1") == 0) { $flatten_flag = "-f"; } @@ -4503,11 +4668,11 @@ sub GetProcedureBoundaries { # -D to at least get *exported* symbols. If we can't use --demangle, # we use c++filt instead, if it exists on this system. my @nm_commands = ("$nm -n $flatten_flag $demangle_flag" . - " $image 2>/dev/null $cppfilt_flag", + " $image 2>$dev_null $cppfilt_flag", "$nm -D -n $flatten_flag $demangle_flag" . - " $image 2>/dev/null $cppfilt_flag", + " $image 2>$dev_null $cppfilt_flag", # 6nm is for Go binaries - "6nm $image 2>/dev/null | sort", + "6nm $image 2>$dev_null | sort", ); # If the executable is an MS Windows PDB-format executable, we'll @@ -4516,7 +4681,7 @@ sub GetProcedureBoundaries { # PDB-format executables can apparently include dwarf .o files. if (exists $obj_tool_map{"nm_pdb"}) { my $nm_pdb = $obj_tool_map{"nm_pdb"}; - push(@nm_commands, "$nm_pdb --demangle $image 2>/dev/null"); + push(@nm_commands, "$nm_pdb --demangle $image 2>$dev_null"); } foreach my $nm_command (@nm_commands) { diff --git a/third_party/tcmalloc/chromium/src/profiler.cc b/third_party/tcmalloc/chromium/src/profiler.cc index 3ac51d4..38fbb93 100644 --- a/third_party/tcmalloc/chromium/src/profiler.cc +++ b/third_party/tcmalloc/chromium/src/profiler.cc @@ -111,7 +111,7 @@ class CpuProfiler { int (*filter_)(void*); void* filter_arg_; - // Opague token returned by the profile handler. To be used when calling + // Opaque token returned by the profile handler. To be used when calling // ProfileHandlerUnregisterCallback. ProfileHandlerToken* prof_handler_token_; diff --git a/third_party/tcmalloc/chromium/src/raw_printer.cc b/third_party/tcmalloc/chromium/src/raw_printer.cc index 019555a..730d6e2 100644 --- a/third_party/tcmalloc/chromium/src/raw_printer.cc +++ b/third_party/tcmalloc/chromium/src/raw_printer.cc @@ -54,7 +54,7 @@ void RawPrinter::Printf(const char* format, ...) { int avail = limit_ - ptr_; // We pass avail+1 to vsnprintf() since that routine needs room // to store the trailing \0. - const int r = vsnprintf(ptr_, avail+1, format, ap); + const int r = perftools_vsnprintf(ptr_, avail+1, format, ap); va_end(ap); if (r < 0) { // Perhaps an old glibc that returns -1 on truncation? diff --git a/third_party/tcmalloc/chromium/src/sampler.cc b/third_party/tcmalloc/chromium/src/sampler.cc index bdf8779..0ea6df1 100644 --- a/third_party/tcmalloc/chromium/src/sampler.cc +++ b/third_party/tcmalloc/chromium/src/sampler.cc @@ -35,14 +35,15 @@ #include "sampler.h" #include <algorithm> // For min() -#include <cmath> +#include <math.h> +#include "base/commandlineflags.h" using std::min; // The approximate gap in bytes between sampling actions. // I.e., we take one sample approximately once every // tcmalloc_sample_parameter bytes of allocation -// i.e. about once every 512KB. +// i.e. about once every 512KB if value is 1<<19. #ifdef NO_TCMALLOC_SAMPLES DEFINE_int64(tcmalloc_sample_parameter, 0, "Unused: code is compiled with NO_TCMALLOC_SAMPLES"); @@ -50,8 +51,7 @@ DEFINE_int64(tcmalloc_sample_parameter, 0, DEFINE_int64(tcmalloc_sample_parameter, EnvToInt64("TCMALLOC_SAMPLE_PARAMETER", 0), "The approximate gap in bytes between sampling actions. " - "This must be between 1 and 1<<58."); -// Note: there are other places in this file where the number 19 occurs. + "This must be between 1 and 2^58."); #endif namespace tcmalloc { diff --git a/third_party/tcmalloc/chromium/src/sampler.h b/third_party/tcmalloc/chromium/src/sampler.h index fa9e554..8e67fb0 100644 --- a/third_party/tcmalloc/chromium/src/sampler.h +++ b/third_party/tcmalloc/chromium/src/sampler.h @@ -36,8 +36,13 @@ #define TCMALLOC_SAMPLER_H_ #include "config.h" -#include "common.h" -#include "static_vars.h" +#include <stddef.h> // for size_t +#ifdef HAVE_STDINT_H +#include <stdint.h> // for uint64_t, uint32_t, int32_t +#endif +#include <string.h> // for memcpy +#include "base/basictypes.h" // for ASSERT +#include "internal_logging.h" // for ASSERT namespace tcmalloc { diff --git a/third_party/tcmalloc/chromium/src/span.cc b/third_party/tcmalloc/chromium/src/span.cc index ca0bab3..426a6bd 100644 --- a/third_party/tcmalloc/chromium/src/span.cc +++ b/third_party/tcmalloc/chromium/src/span.cc @@ -33,11 +33,11 @@ #include <config.h> #include "span.h" -#ifdef HAVE_INTTYPES_H -#include <inttypes.h> -#endif +#include <string.h> // for NULL, memset -#include "static_vars.h" +#include "internal_logging.h" // for ASSERT +#include "page_heap_allocator.h" // for PageHeapAllocator +#include "static_vars.h" // for Static namespace tcmalloc { diff --git a/third_party/tcmalloc/chromium/src/stack_trace_table.cc b/third_party/tcmalloc/chromium/src/stack_trace_table.cc index 6672af9..faeca6b 100644 --- a/third_party/tcmalloc/chromium/src/stack_trace_table.cc +++ b/third_party/tcmalloc/chromium/src/stack_trace_table.cc @@ -31,10 +31,13 @@ // Author: Andrew Fikes #include <config.h> -#include "base/spinlock.h" -#include "common.h" -#include "static_vars.h" #include "stack_trace_table.h" +#include <string.h> // for NULL, memset +#include "base/spinlock.h" // for SpinLockHolder +#include "common.h" // for StackTrace +#include "internal_logging.h" // for MESSAGE, ASSERT +#include "page_heap_allocator.h" // for PageHeapAllocator +#include "static_vars.h" // for Static namespace tcmalloc { diff --git a/third_party/tcmalloc/chromium/src/stack_trace_table.h b/third_party/tcmalloc/chromium/src/stack_trace_table.h index 799571a..26d21c1 100644 --- a/third_party/tcmalloc/chromium/src/stack_trace_table.h +++ b/third_party/tcmalloc/chromium/src/stack_trace_table.h @@ -36,6 +36,9 @@ #define TCMALLOC_STACK_TRACE_TABLE_H_ #include <config.h> +#ifdef HAVE_STDINT_H +#include <stdint.h> // for uintptr_t +#endif #include "common.h" namespace tcmalloc { diff --git a/third_party/tcmalloc/chromium/src/static_vars.cc b/third_party/tcmalloc/chromium/src/static_vars.cc index 18d5146..2ca132e 100644 --- a/third_party/tcmalloc/chromium/src/static_vars.cc +++ b/third_party/tcmalloc/chromium/src/static_vars.cc @@ -31,7 +31,10 @@ // Author: Ken Ashcraft <opensource@google.com> #include "static_vars.h" -#include "sampler.h" // for the init function +#include <stddef.h> // for NULL +#include <new> // for operator new +#include "internal_logging.h" // for CHECK_CONDITION +#include "sampler.h" // for Sampler namespace tcmalloc { diff --git a/third_party/tcmalloc/chromium/src/symbolize.h b/third_party/tcmalloc/chromium/src/symbolize.h index e80cc49..da07037 100644 --- a/third_party/tcmalloc/chromium/src/symbolize.h +++ b/third_party/tcmalloc/chromium/src/symbolize.h @@ -38,6 +38,7 @@ #include <stddef.h> #include <stdint.h> // for uintptr_t #endif +#include <stddef.h> // for NULL #include <map> using std::map; diff --git a/third_party/tcmalloc/chromium/src/system-alloc.cc b/third_party/tcmalloc/chromium/src/system-alloc.cc index 0f41ac2..5053981 100644 --- a/third_party/tcmalloc/chromium/src/system-alloc.cc +++ b/third_party/tcmalloc/chromium/src/system-alloc.cc @@ -31,26 +31,29 @@ // Author: Sanjay Ghemawat #include <config.h> +#include <errno.h> // for EAGAIN, errno +#include <fcntl.h> // for open, O_RDWR +#include <stddef.h> // for size_t, NULL, ptrdiff_t #if defined HAVE_STDINT_H -#include <stdint.h> +#include <stdint.h> // for uintptr_t, intptr_t #elif defined HAVE_INTTYPES_H #include <inttypes.h> #else #include <sys/types.h> #endif -#ifdef HAVE_UNISTD_H -#include <unistd.h> -#endif -#include <fcntl.h> // for open() #ifdef HAVE_MMAP -#include <sys/mman.h> +#include <sys/mman.h> // for munmap, mmap, MADV_DONTNEED, etc #endif -#include <errno.h> -#include "system-alloc.h" -#include "internal_logging.h" -#include "base/logging.h" +#ifdef HAVE_UNISTD_H +#include <unistd.h> // for sbrk, getpagesize, off_t +#endif +#include <new> // for operator new +#include <google/malloc_extension.h> +#include "base/basictypes.h" #include "base/commandlineflags.h" -#include "base/spinlock.h" +#include "base/spinlock.h" // for SpinLockHolder, SpinLock, etc +#include "common.h" +#include "internal_logging.h" // On systems (like freebsd) that don't define MAP_ANONYMOUS, use the old // form of the name instead. @@ -73,6 +76,24 @@ static const bool kDebugMode = false; static const bool kDebugMode = true; #endif +// Anonymous namespace to avoid name conflicts on "CheckAddressBits". +namespace { + +// Check that no bit is set at position ADDRESS_BITS or higher. +template <int ADDRESS_BITS> bool CheckAddressBits(uintptr_t ptr) { + return (ptr >> ADDRESS_BITS) == 0; +} + +// Specialize for the bit width of a pointer to avoid undefined shift. +template <> bool CheckAddressBits<8 * sizeof(void*)>(uintptr_t ptr) { + return true; +} + +} // Anonymous namespace to avoid name conflicts on "CheckAddressBits". + +COMPILE_ASSERT(kAddressBits <= 8 * sizeof(void*), + address_bits_larger_than_pointer_size); + // Structure for discovering alignment union MemoryAligner { void* p; @@ -87,8 +108,10 @@ static SpinLock spinlock(SpinLock::LINKER_INITIALIZED); static size_t pagesize = 0; #endif -// Configuration parameters. +// The current system allocator +SysAllocator* sys_alloc = NULL; +// Configuration parameters. DEFINE_int32(malloc_devmem_start, EnvToInt("TCMALLOC_DEVMEM_START", 0), "Physical memory starting location in MB for /dev/mem allocation." @@ -110,7 +133,7 @@ public: SbrkSysAllocator() : SysAllocator() { } void* Alloc(size_t size, size_t *actual_size, size_t alignment); - void DumpStats(TCMalloc_Printer* printer); + void FlagsInitialized() {} }; static char sbrk_space[sizeof(SbrkSysAllocator)]; @@ -119,7 +142,7 @@ public: MmapSysAllocator() : SysAllocator() { } void* Alloc(size_t size, size_t *actual_size, size_t alignment); - void DumpStats(TCMalloc_Printer* printer); + void FlagsInitialized() {} }; static char mmap_space[sizeof(MmapSysAllocator)]; @@ -128,30 +151,41 @@ public: DevMemSysAllocator() : SysAllocator() { } void* Alloc(size_t size, size_t *actual_size, size_t alignment); - void DumpStats(TCMalloc_Printer* printer); + void FlagsInitialized() {} }; -static char devmem_space[sizeof(DevMemSysAllocator)]; - -static const int kStaticAllocators = 3; -// kMaxDynamicAllocators + kStaticAllocators; -static const int kMaxAllocators = 5; -static SysAllocator *allocators[kMaxAllocators]; -bool RegisterSystemAllocator(SysAllocator *a, int priority) { - SpinLockHolder lock_holder(&spinlock); +class DefaultSysAllocator : public SysAllocator { + public: + DefaultSysAllocator() : SysAllocator() { + for (int i = 0; i < kMaxAllocators; i++) { + failed_[i] = true; + allocs_[i] = NULL; + } + } + void SetChildAllocator(SysAllocator* alloc, unsigned int index, + const char* name) { + if (index < kMaxAllocators && alloc != NULL) { + allocs_[index] = alloc; + failed_[index] = false; + } + } + void* Alloc(size_t size, size_t *actual_size, size_t alignment); + void FlagsInitialized() {} - // No two allocators should have a priority conflict, since the order - // is determined at compile time. - CHECK_CONDITION(allocators[priority] == NULL); - allocators[priority] = a; - return true; -} + private: + static const int kMaxAllocators = 2; + bool failed_[kMaxAllocators]; + SysAllocator* allocs_[kMaxAllocators]; + const char* names_[kMaxAllocators]; +}; +static char default_space[sizeof(DefaultSysAllocator)]; +static const char sbrk_name[] = "SbrkSysAllocator"; +static const char mmap_name[] = "MmapSysAllocator"; void* SbrkSysAllocator::Alloc(size_t size, size_t *actual_size, size_t alignment) { #ifndef HAVE_SBRK - failed_ = true; return NULL; #else // Check if we should use sbrk allocation. @@ -186,13 +220,11 @@ void* SbrkSysAllocator::Alloc(size_t size, size_t *actual_size, // http://sourceware.org/cgi-bin/cvsweb.cgi/~checkout~/libc/misc/sbrk.c?rev=1.1.2.1&content-type=text/plain&cvsroot=glibc // Without this check, sbrk may succeed when it ought to fail.) if (reinterpret_cast<intptr_t>(sbrk(0)) + size < size) { - failed_ = true; return NULL; } void* result = sbrk(size); if (result == reinterpret_cast<void*>(-1)) { - failed_ = true; return NULL; } @@ -212,7 +244,6 @@ void* SbrkSysAllocator::Alloc(size_t size, size_t *actual_size, // that we can find an aligned region within it. result = sbrk(size + alignment - 1); if (result == reinterpret_cast<void*>(-1)) { - failed_ = true; return NULL; } ptr = reinterpret_cast<uintptr_t>(result); @@ -223,14 +254,9 @@ void* SbrkSysAllocator::Alloc(size_t size, size_t *actual_size, #endif // HAVE_SBRK } -void SbrkSysAllocator::DumpStats(TCMalloc_Printer* printer) { - printer->printf("SbrkSysAllocator: failed_=%d\n", failed_); -} - void* MmapSysAllocator::Alloc(size_t size, size_t *actual_size, size_t alignment) { #ifndef HAVE_MMAP - failed_ = true; return NULL; #else // Check if we should use mmap allocation. @@ -273,7 +299,6 @@ void* MmapSysAllocator::Alloc(size_t size, size_t *actual_size, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); if (result == reinterpret_cast<void*>(MAP_FAILED)) { - failed_ = true; return NULL; } @@ -297,14 +322,9 @@ void* MmapSysAllocator::Alloc(size_t size, size_t *actual_size, #endif // HAVE_MMAP } -void MmapSysAllocator::DumpStats(TCMalloc_Printer* printer) { - printer->printf("MmapSysAllocator: failed_=%d\n", failed_); -} - void* DevMemSysAllocator::Alloc(size_t size, size_t *actual_size, size_t alignment) { #ifndef HAVE_MMAP - failed_ = true; return NULL; #else static bool initialized = false; @@ -325,7 +345,6 @@ void* DevMemSysAllocator::Alloc(size_t size, size_t *actual_size, if (!initialized) { physmem_fd = open("/dev/mem", O_RDWR); if (physmem_fd < 0) { - failed_ = true; return NULL; } physmem_base = FLAGS_malloc_devmem_start*1024LL*1024LL; @@ -357,7 +376,6 @@ void* DevMemSysAllocator::Alloc(size_t size, size_t *actual_size, // check to see if we have any memory left if (physmem_limit != 0 && ((size + extra) > (physmem_limit - physmem_base))) { - failed_ = true; return NULL; } @@ -368,7 +386,6 @@ void* DevMemSysAllocator::Alloc(size_t size, size_t *actual_size, void *result = mmap(0, size + extra, PROT_WRITE|PROT_READ, MAP_SHARED, physmem_fd, physmem_base); if (result == reinterpret_cast<void*>(MAP_FAILED)) { - failed_ = true; return NULL; } uintptr_t ptr = reinterpret_cast<uintptr_t>(result); @@ -394,15 +411,30 @@ void* DevMemSysAllocator::Alloc(size_t size, size_t *actual_size, #endif // HAVE_MMAP } -void DevMemSysAllocator::DumpStats(TCMalloc_Printer* printer) { - printer->printf("DevMemSysAllocator: failed_=%d\n", failed_); +void* DefaultSysAllocator::Alloc(size_t size, size_t *actual_size, + size_t alignment) { + for (int i = 0; i < kMaxAllocators; i++) { + if (!failed_[i] && allocs_[i] != NULL) { + void* result = allocs_[i]->Alloc(size, actual_size, alignment); + if (result != NULL) { + return result; + } + TCMalloc_MESSAGE(__FILE__, __LINE__, "%s failed.\n", names_[i]); + failed_[i] = true; + } + } + // After both failed, reset "failed_" to false so that a single failed + // allocation won't make the allocator never work again. + for (int i = 0; i < kMaxAllocators; i++) { + failed_[i] = false; + } + return NULL; } static bool system_alloc_inited = false; void InitSystemAllocators(void) { - // This determines the order in which system allocators are called - int i = kMaxDynamicAllocators; - allocators[i++] = new (devmem_space) DevMemSysAllocator(); + MmapSysAllocator *mmap = new (mmap_space) MmapSysAllocator(); + SbrkSysAllocator *sbrk = new (sbrk_space) SbrkSysAllocator(); // In 64-bit debug mode, place the mmap allocator first since it // allocates pointers that do not fit in 32 bits and therefore gives @@ -411,13 +443,15 @@ void InitSystemAllocators(void) { // likely to look like pointers and therefore the conservative gc in // the heap-checker is less likely to misinterpret a number as a // pointer). + DefaultSysAllocator *sdef = new (default_space) DefaultSysAllocator(); if (kDebugMode && sizeof(void*) > 4) { - allocators[i++] = new (mmap_space) MmapSysAllocator(); - allocators[i++] = new (sbrk_space) SbrkSysAllocator(); + sdef->SetChildAllocator(mmap, 0, mmap_name); + sdef->SetChildAllocator(sbrk, 1, sbrk_name); } else { - allocators[i++] = new (sbrk_space) SbrkSysAllocator(); - allocators[i++] = new (mmap_space) MmapSysAllocator(); + sdef->SetChildAllocator(sbrk, 0, sbrk_name); + sdef->SetChildAllocator(mmap, 1, mmap_name); } + sys_alloc = sdef; } void* TCMalloc_SystemAlloc(size_t size, size_t *actual_size, @@ -435,26 +469,17 @@ void* TCMalloc_SystemAlloc(size_t size, size_t *actual_size, // Enforce minimum alignment if (alignment < sizeof(MemoryAligner)) alignment = sizeof(MemoryAligner); - // Try twice, once avoiding allocators that failed before, and once - // more trying all allocators even if they failed before. - for (int i = 0; i < 2; i++) { - for (int j = 0; j < kMaxAllocators; j++) { - SysAllocator *a = allocators[j]; - if (a == NULL) continue; - if (a->usable_ && !a->failed_) { - void* result = a->Alloc(size, actual_size, alignment); - if (result != NULL) return result; - } - } - - // nothing worked - reset failed_ flags and try again - for (int j = 0; j < kMaxAllocators; j++) { - SysAllocator *a = allocators[j]; - if (a == NULL) continue; - a->failed_ = false; + void* result = sys_alloc->Alloc(size, actual_size, alignment); + if (result != NULL) { + if (actual_size) { + CheckAddressBits<kAddressBits>( + reinterpret_cast<uintptr_t>(result) + *actual_size - 1); + } else { + CheckAddressBits<kAddressBits>( + reinterpret_cast<uintptr_t>(result) + size - 1); } } - return NULL; + return result; } void TCMalloc_SystemRelease(void* start, size_t length) { @@ -498,13 +523,3 @@ void TCMalloc_SystemCommit(void* start, size_t length) { // such that they need to be re-committed before they can be used by the // application. } - -void DumpSystemAllocatorStats(TCMalloc_Printer* printer) { - for (int j = 0; j < kMaxAllocators; j++) { - SysAllocator *a = allocators[j]; - if (a == NULL) continue; - if (a->usable_) { - a->DumpStats(printer); - } - } -} diff --git a/third_party/tcmalloc/chromium/src/system-alloc.h b/third_party/tcmalloc/chromium/src/system-alloc.h index 8d982ef..17e098a 100644 --- a/third_party/tcmalloc/chromium/src/system-alloc.h +++ b/third_party/tcmalloc/chromium/src/system-alloc.h @@ -37,7 +37,9 @@ #define TCMALLOC_SYSTEM_ALLOC_H_ #include <config.h> -#include "internal_logging.h" +#include <stddef.h> // for size_t + +class SysAllocator; // REQUIRES: "alignment" is a power of two or "0" to indicate default alignment // @@ -75,49 +77,7 @@ extern void TCMalloc_SystemRelease(void* start, size_t length); // function to fail. extern void TCMalloc_SystemCommit(void* start, size_t length); -// Interface to a pluggable system allocator. -class SysAllocator { - public: - SysAllocator() - : usable_(true), - failed_(false) { - }; - virtual ~SysAllocator() {}; - - virtual void* Alloc(size_t size, size_t *actual_size, size_t alignment) = 0; - - // Populate the map with whatever properties the specified allocator finds - // useful for debugging (such as number of bytes allocated and whether the - // allocator has failed). The callee is responsible for any necessary - // locking (and avoiding deadlock). - virtual void DumpStats(TCMalloc_Printer* printer) = 0; - - // So the allocator can be turned off at compile time - bool usable_; - - // Did this allocator fail? If so, we don't need to retry more than twice. - bool failed_; -}; - -// Register a new system allocator. The priority determines the order in -// which the allocators will be invoked. Allocators with numerically lower -// priority are tried first. To keep things simple, the priority of various -// allocators is known at compile time. -// -// Valid range of priorities: [0, kMaxDynamicAllocators) -// -// Please note that we can't use complex data structures and cause -// recursive calls to malloc within this function. So all data structures -// are statically allocated. -// -// Returns true on success. Does nothing on failure. -extern PERFTOOLS_DLL_DECL bool RegisterSystemAllocator(SysAllocator *allocator, - int priority); - -// Number of SysAllocators known to call RegisterSystemAllocator -static const int kMaxDynamicAllocators = 2; - -// Retrieve the current state of various system allocators. -extern PERFTOOLS_DLL_DECL void DumpSystemAllocatorStats(TCMalloc_Printer* printer); +// The current system allocator. +extern PERFTOOLS_DLL_DECL SysAllocator* sys_alloc; #endif /* TCMALLOC_SYSTEM_ALLOC_H_ */ diff --git a/third_party/tcmalloc/chromium/src/tcmalloc.cc b/third_party/tcmalloc/chromium/src/tcmalloc.cc index f0f6aa2..5cfa634 100644 --- a/third_party/tcmalloc/chromium/src/tcmalloc.cc +++ b/third_party/tcmalloc/chromium/src/tcmalloc.cc @@ -86,10 +86,16 @@ // * allocation of a reasonably complicated struct // goes from about 1100 ns to about 300 ns. -#include <config.h> -#include <new> -#include <stdio.h> -#include <stddef.h> +#include "config.h" +#include <google/tcmalloc.h> + +#include <errno.h> // for ENOMEM, EINVAL, errno +#ifdef HAVE_SYS_CDEFS_H +#include <sys/cdefs.h> // for __THROW +#endif +#ifdef HAVE_FEATURES_H +#include <features.h> // for __GLIBC__ +#endif #if defined HAVE_STDINT_H #include <stdint.h> #elif defined HAVE_INTTYPES_H @@ -97,46 +103,59 @@ #else #include <sys/types.h> #endif -#if defined(HAVE_MALLOC_H) && defined(HAVE_STRUCT_MALLINFO) -#include <malloc.h> // for struct mallinfo -#endif -#include <string.h> -#ifdef HAVE_PTHREAD -#include <pthread.h> -#endif +#include <stddef.h> // for size_t, NULL +#include <stdlib.h> // for getenv +#include <string.h> // for strcmp, memset, strlen, etc #ifdef HAVE_UNISTD_H -#include <unistd.h> +#include <unistd.h> // for getpagesize, write, etc #endif -#include <errno.h> -#include <stdarg.h> -#include <algorithm> -#include <google/tcmalloc.h> -#include "base/commandlineflags.h" -#include "base/basictypes.h" // gets us PRIu64 -#include "base/sysinfo.h" -#include "base/spinlock.h" -#include "common.h" -#include "malloc_hook-inl.h" -#include <google/malloc_hook.h> +#include <algorithm> // for max, min +#include <limits> // for numeric_limits +#include <new> // for nothrow_t (ptr only), etc +#include <vector> // for vector + #include <google/malloc_extension.h> -#include "central_freelist.h" -#include "internal_logging.h" -#include "linked_list.h" -#include "maybe_threads.h" -#include "page_heap.h" -#include "pagemap.h" -#include "span.h" -#include "static_vars.h" -#include "system-alloc.h" -#include "tcmalloc_guard.h" -#include "thread_cache.h" +#include <google/malloc_hook.h> // for MallocHook +#include "base/basictypes.h" // for int64 +#include "base/commandlineflags.h" // for RegisterFlagValidator, etc +#include "base/dynamic_annotations.h" // for RunningOnValgrind +#include "base/spinlock.h" // for SpinLockHolder +#include "central_freelist.h" // for CentralFreeListPadded +#include "common.h" // for StackTrace, kPageShift, etc +#include "internal_logging.h" // for ASSERT, TCMalloc_Printer, etc +#include "linked_list.h" // for SLL_SetNext +#include "malloc_hook-inl.h" // for MallocHook::InvokeNewHook, etc +#include "page_heap.h" // for PageHeap, PageHeap::Stats +#include "page_heap_allocator.h" // for PageHeapAllocator +#include "span.h" // for Span, DLL_Prepend, etc +#include "stack_trace_table.h" // for StackTraceTable +#include "static_vars.h" // for Static +#include "system-alloc.h" // for DumpSystemAllocatorStats, etc +#include "tcmalloc_guard.h" // for TCMallocGuard +#include "thread_cache.h" // for ThreadCache + +// We only need malloc.h for struct mallinfo. +#ifdef HAVE_STRUCT_MALLINFO +// Malloc can be in several places on older versions of OS X. +# if defined(HAVE_MALLOC_H) +# include <malloc.h> +# elif defined(HAVE_SYS_MALLOC_H) +# include <sys/malloc.h> +# elif defined(HAVE_MALLOC_MALLOC_H) +# include <malloc/malloc.h> +# endif +#endif #if (defined(_WIN32) && !defined(__CYGWIN__) && !defined(__CYGWIN32__)) && !defined(WIN32_OVERRIDE_ALLOCATORS) # define WIN32_DO_PATCHING 1 #endif -using std::max; +using STL_NAMESPACE::max; +using STL_NAMESPACE::numeric_limits; +using STL_NAMESPACE::vector; +using tcmalloc::AlignmentForSize; using tcmalloc::PageHeap; +using tcmalloc::PageHeapAllocator; using tcmalloc::SizeMap; using tcmalloc::Span; using tcmalloc::StackTrace; @@ -217,7 +236,7 @@ extern "C" { ATTRIBUTE_SECTION(google_malloc); int tc_mallopt(int cmd, int value) __THROW ATTRIBUTE_SECTION(google_malloc); -#ifdef HAVE_STRUCT_MALLINFO // struct mallinfo isn't defined on freebsd +#ifdef HAVE_STRUCT_MALLINFO struct mallinfo tc_mallinfo(void) __THROW ATTRIBUTE_SECTION(google_malloc); #endif @@ -237,12 +256,21 @@ extern "C" { void* tc_newarray_nothrow(size_t size, const std::nothrow_t&) __THROW ATTRIBUTE_SECTION(google_malloc); // Surprisingly, standard C++ library implementations use a - // nothrow-delete internally. See, eg: + // nothrow-delete internally. See, eg: // http://www.dinkumware.com/manuals/?manual=compleat&page=new.html void tc_delete_nothrow(void* ptr, const std::nothrow_t&) __THROW ATTRIBUTE_SECTION(google_malloc); void tc_deletearray_nothrow(void* ptr, const std::nothrow_t&) __THROW ATTRIBUTE_SECTION(google_malloc); + + // Some non-standard extensions that we support. + + // This is equivalent to + // OS X: malloc_size() + // glibc: malloc_usable_size() + // Windows: _msize() + size_t tc_malloc_size(void* p) __THROW + ATTRIBUTE_SECTION(google_malloc); } // extern "C" // Override the libc functions to prefer our own instead. This comes @@ -250,7 +278,7 @@ extern "C" { // exception: in windows, by default, we patch our code into these // functions (via src/windows/patch_function.cc) rather than override // them. In that case, we don't want to do this overriding here. -#if !defined(WIN32_DO_PATCHING) && !defined(TCMALLOC_FOR_DEBUGALLOCATION) +#if !defined(WIN32_DO_PATCHING) // TODO(mbelshe): Turn off TCMalloc's symbols for libc. We do that // elsewhere. @@ -290,7 +318,8 @@ extern "C" { #ifdef HAVE_STRUCT_MALLINFO struct mallinfo mallinfo(void) __THROW ALIAS("tc_mallinfo"); #endif - size_t malloc_usable_size(void* ptr) __THROW ALIAS("tc_malloc_usable_size"); + size_t malloc_size(void* p) __THROW ALIAS("tc_malloc_size"); + size_t malloc_usable_size(void* p) __THROW ALIAS("tc_malloc_size"); } // extern "C" #else // #if defined(__GNUC__) && !defined(__MACH__) // Portable wrappers @@ -327,9 +356,8 @@ extern "C" { #ifdef HAVE_STRUCT_MALLINFO struct mallinfo mallinfo(void) __THROW { return tc_mallinfo(); } #endif - size_t malloc_usable_size(void* p) __THROW { - return tc_malloc_usable_size(p); - } + size_t malloc_size(void* p) __THROW { return tc_malloc_size(p); } + size_t malloc_usable_size(void* p) __THROW { return tc_malloc_size(p); } } // extern "C" #endif // #if defined(__GNUC__) @@ -397,7 +425,7 @@ void (*__free_hook)(void* ptr, const void* caller) = tc_ptmalloc_free_hook; #endif // #ifndef _WIN32 #undef ALIAS -#endif // #ifndef(WIN32_DO_PATCHING) && ndef(TCMALLOC_FOR_DEBUGALLOCATION) +#endif // #ifndef(WIN32_DO_PATCHING) // ----------------------- IMPLEMENTATION ------------------------------- @@ -464,22 +492,68 @@ static void DumpStats(TCMalloc_Printer* out, int level) { uint64_t class_count[kNumClasses]; ExtractStats(&stats, (level >= 2 ? class_count : NULL)); - static const double MB = 1048576.0; - - const uint64_t bytes_in_use = stats.pageheap.system_bytes - - stats.pageheap.free_bytes - - stats.pageheap.unmapped_bytes - - stats.central_bytes - - stats.transfer_bytes - - stats.thread_bytes; - - out->printf("WASTE: %7.1f MB committed but not used\n" - "WASTE: %7.1f MB bytes committed, %7.1f MB bytes in use\n" - "WASTE: committed/used ratio of %f\n", - (stats.pageheap.committed_bytes - bytes_in_use) / MB, - stats.pageheap.committed_bytes / MB, - bytes_in_use / MB, - stats.pageheap.committed_bytes / static_cast<double>(bytes_in_use)); + static const double MiB = 1048576.0; + + const uint64_t virtual_memory_used = (stats.pageheap.system_bytes + + stats.metadata_bytes); + const uint64_t physical_memory_used = (virtual_memory_used + - stats.pageheap.unmapped_bytes); + const uint64_t bytes_in_use_by_app = (physical_memory_used + - stats.metadata_bytes + - stats.pageheap.free_bytes + - stats.central_bytes + - stats.transfer_bytes + - stats.thread_bytes); + + out->printf( + "WASTE: %7.1f MiB committed but not used\n" + "WASTE: %7.1f MiB bytes committed, %7.1f MiB bytes in use\n" + "WASTE: committed/used ratio of %f\n", + (stats.pageheap.committed_bytes - bytes_in_use_by_app) / MiB, + stats.pageheap.committed_bytes / MiB, + bytes_in_use_by_app / MiB, + stats.pageheap.committed_bytes / static_cast<double>(bytes_in_use_by_app) + ); +#ifdef TCMALLOC_SMALL_BUT_SLOW + out->printf( + "NOTE: SMALL MEMORY MODEL IS IN USE, PERFORMANCE MAY SUFFER.\n"); +#endif + out->printf( + "------------------------------------------------\n" + "MALLOC: %12" PRIu64 " (%7.1f MiB) Bytes in use by application\n" + "MALLOC: %12" PRIu64 " (%7.1f MB) Bytes committed\n" + "MALLOC: + %12" PRIu64 " (%7.1f MiB) Bytes in page heap freelist\n" + "MALLOC: + %12" PRIu64 " (%7.1f MiB) Bytes in central cache freelist\n" + "MALLOC: + %12" PRIu64 " (%7.1f MiB) Bytes in transfer cache freelist\n" + "MALLOC: + %12" PRIu64 " (%7.1f MiB) Bytes in thread cache freelists\n" + "MALLOC: + %12" PRIu64 " (%7.1f MiB) Bytes in malloc metadata\n" + "MALLOC: ------------\n" + "MALLOC: = %12" PRIu64 " (%7.1f MiB) Actual memory used (physical + swap)\n" + "MALLOC: + %12" PRIu64 " (%7.1f MiB) Bytes released to OS (aka unmapped)\n" + "MALLOC: ------------\n" + "MALLOC: = %12" PRIu64 " (%7.1f MiB) Virtual address space used\n" + "MALLOC:\n" + "MALLOC: %12" PRIu64 " Spans in use\n" + "MALLOC: %12" PRIu64 " Thread heaps in use\n" + "MALLOC: %12" PRIu64 " Tcmalloc page size\n" + "------------------------------------------------\n" + "Call ReleaseFreeMemory() to release freelist memory to the OS" + " (via madvise()).\n" + "Bytes released to the OS take up virtual address space" + " but no physical memory.\n", + bytes_in_use_by_app, bytes_in_use_by_app / MiB, + stats.pageheap.committed_bytes, stats.pageheap.committed_bytes / MiB, + stats.pageheap.free_bytes, stats.pageheap.free_bytes / MiB, + stats.central_bytes, stats.central_bytes / MiB, + stats.transfer_bytes, stats.transfer_bytes / MiB, + stats.thread_bytes, stats.thread_bytes / MiB, + stats.metadata_bytes, stats.metadata_bytes / MiB, + physical_memory_used, physical_memory_used / MiB, + stats.pageheap.unmapped_bytes, stats.pageheap.unmapped_bytes / MiB, + virtual_memory_used, virtual_memory_used / MiB, + uint64_t(Static::span_allocator()->inuse()), + uint64_t(ThreadCache::HeapsInUse()), + uint64_t(kPageSize)); if (level >= 2) { out->printf("------------------------------------------------\n"); @@ -492,45 +566,17 @@ static void DumpStats(TCMalloc_Printer* out, int level) { class_count[cl] * Static::sizemap()->ByteSizeForClass(cl); cumulative += class_bytes; out->printf("class %3d [ %8" PRIuS " bytes ] : " - "%8" PRIu64 " objs; %5.1f MB; %5.1f cum MB\n", + "%8" PRIu64 " objs; %5.1f MiB; %5.1f cum MiB\n", cl, Static::sizemap()->ByteSizeForClass(cl), class_count[cl], - class_bytes / MB, - cumulative / MB); + class_bytes / MiB, + cumulative / MiB); } } SpinLockHolder h(Static::pageheap_lock()); Static::pageheap()->Dump(out); - - out->printf("------------------------------------------------\n"); - DumpSystemAllocatorStats(out); } - - out->printf("------------------------------------------------\n" - "MALLOC: %12" PRIu64 " (%7.1f MB) Heap size\n" - "MALLOC: %12" PRIu64 " (%7.1f MB) Bytes committed\n" - "MALLOC: %12" PRIu64 " (%7.1f MB) Bytes in use by application\n" - "MALLOC: %12" PRIu64 " (%7.1f MB) Bytes free in page heap\n" - "MALLOC: %12" PRIu64 " (%7.1f MB) Bytes unmapped in page heap\n" - "MALLOC: %12" PRIu64 " (%7.1f MB) Bytes free in central cache\n" - "MALLOC: %12" PRIu64 " (%7.1f MB) Bytes free in transfer cache\n" - "MALLOC: %12" PRIu64 " (%7.1f MB) Bytes free in thread caches\n" - "MALLOC: %12" PRIu64 " Spans in use\n" - "MALLOC: %12" PRIu64 " Thread heaps in use\n" - "MALLOC: %12" PRIu64 " (%7.1f MB) Metadata allocated\n" - "------------------------------------------------\n", - stats.pageheap.system_bytes, stats.pageheap.system_bytes / MB, - stats.pageheap.committed_bytes, stats.pageheap.committed_bytes / MB, - bytes_in_use, bytes_in_use / MB, - stats.pageheap.free_bytes, stats.pageheap.free_bytes / MB, - stats.pageheap.unmapped_bytes, stats.pageheap.unmapped_bytes / MB, - stats.central_bytes, stats.central_bytes / MB, - stats.transfer_bytes, stats.transfer_bytes / MB, - stats.thread_bytes, stats.thread_bytes / MB, - uint64_t(Static::span_allocator()->inuse()), - uint64_t(ThreadCache::HeapsInUse()), - stats.metadata_bytes, stats.metadata_bytes / MB); } static void PrintStats(int level) { @@ -644,6 +690,22 @@ class TCMallocImplementation : public MallocExtension { } } + // We may print an extra, tcmalloc-specific warning message here. + virtual void GetHeapSample(MallocExtensionWriter* writer) { + if (FLAGS_tcmalloc_sample_parameter == 0) { + const char* const kWarningMsg = + "%warn\n" + "%warn This heap profile does not have any data in it, because\n" + "%warn the application was run with heap sampling turned off.\n" + "%warn To get useful data from GetHeapSample(), you must\n" + "%warn set the environment variable TCMALLOC_SAMPLE_PARAMETER to\n" + "%warn a positive sampling period, such as 524288.\n" + "%warn\n"; + writer->append(kWarningMsg, strlen(kWarningMsg)); + } + MallocExtension::GetHeapSample(writer); + } + virtual void** ReadStackTraces(int* sample_period) { tcmalloc::StackTraceTable table; { @@ -688,9 +750,8 @@ class TCMallocImplementation : public MallocExtension { } if (strcmp(name, "tcmalloc.slack_bytes") == 0) { - // We assume that bytes in the page heap are not fragmented too - // badly, and are therefore available for allocation without - // growing the pageheap system byte count. + // Kept for backwards compatibility. Now defined externally as: + // pageheap_free_bytes + pageheap_unmapped_bytes. SpinLockHolder l(Static::pageheap_lock()); PageHeap::Stats stats = Static::pageheap()->stats(); *value = stats.free_bytes + stats.unmapped_bytes; @@ -743,6 +804,16 @@ class TCMallocImplementation : public MallocExtension { virtual void MarkThreadBusy(); // Implemented below + virtual SysAllocator* GetSystemAllocator() { + SpinLockHolder h(Static::pageheap_lock()); + return sys_alloc; + } + + virtual void SetSystemAllocator(SysAllocator* alloc) { + SpinLockHolder h(Static::pageheap_lock()); + sys_alloc = alloc; + } + virtual void ReleaseToSystem(size_t num_bytes) { SpinLockHolder h(Static::pageheap_lock()); if (num_bytes <= extra_bytes_released_) { @@ -789,6 +860,99 @@ class TCMallocImplementation : public MallocExtension { // unnamed namespace, we need to move the definition below it in the // file. virtual size_t GetAllocatedSize(void* ptr); + + virtual void GetFreeListSizes(vector<MallocExtension::FreeListInfo>* v) { + static const char* kCentralCacheType = "tcmalloc.central"; + static const char* kTransferCacheType = "tcmalloc.transfer"; + static const char* kThreadCacheType = "tcmalloc.thread"; + static const char* kPageHeapType = "tcmalloc.page"; + static const char* kPageHeapUnmappedType = "tcmalloc.page_unmapped"; + static const char* kLargeSpanType = "tcmalloc.large"; + static const char* kLargeUnmappedSpanType = "tcmalloc.large_unmapped"; + + v->clear(); + + // central class information + int64 prev_class_size = 0; + for (int cl = 1; cl < kNumClasses; ++cl) { + size_t class_size = Static::sizemap()->ByteSizeForClass(cl); + MallocExtension::FreeListInfo i; + i.min_object_size = prev_class_size + 1; + i.max_object_size = class_size; + i.total_bytes_free = + Static::central_cache()[cl].length() * class_size; + i.type = kCentralCacheType; + v->push_back(i); + + // transfer cache + i.total_bytes_free = + Static::central_cache()[cl].tc_length() * class_size; + i.type = kTransferCacheType; + v->push_back(i); + + prev_class_size = Static::sizemap()->ByteSizeForClass(cl); + } + + // Add stats from per-thread heaps + uint64_t class_count[kNumClasses]; + memset(class_count, 0, sizeof(class_count)); + { + SpinLockHolder h(Static::pageheap_lock()); + uint64_t thread_bytes = 0; + ThreadCache::GetThreadStats(&thread_bytes, class_count); + } + + prev_class_size = 0; + for (int cl = 1; cl < kNumClasses; ++cl) { + MallocExtension::FreeListInfo i; + i.min_object_size = prev_class_size + 1; + i.max_object_size = Static::sizemap()->ByteSizeForClass(cl); + i.total_bytes_free = + class_count[cl] * Static::sizemap()->ByteSizeForClass(cl); + i.type = kThreadCacheType; + v->push_back(i); + } + + // append page heap info + int64 page_count_normal[kMaxPages]; + int64 page_count_returned[kMaxPages]; + int64 span_count_normal; + int64 span_count_returned; + { + SpinLockHolder h(Static::pageheap_lock()); + Static::pageheap()->GetClassSizes(page_count_normal, + page_count_returned, + &span_count_normal, + &span_count_returned); + } + + // spans: mapped + MallocExtension::FreeListInfo span_info; + span_info.type = kLargeSpanType; + span_info.max_object_size = (numeric_limits<size_t>::max)(); + span_info.min_object_size = kMaxPages << kPageShift; + span_info.total_bytes_free = span_count_normal << kPageShift; + v->push_back(span_info); + + // spans: unmapped + span_info.type = kLargeUnmappedSpanType; + span_info.total_bytes_free = span_count_returned << kPageShift; + v->push_back(span_info); + + for (int s = 1; s < kMaxPages; s++) { + MallocExtension::FreeListInfo i; + i.max_object_size = (s << kPageShift); + i.min_object_size = ((s - 1) << kPageShift); + + i.type = kPageHeapType; + i.total_bytes_free = (s << kPageShift) * page_count_normal[s]; + v->push_back(i); + + i.type = kPageHeapUnmappedType; + i.total_bytes_free = (s << kPageShift) * page_count_returned[s]; + v->push_back(i); + } + } }; // The constructor allocates an object to ensure that initialization @@ -818,8 +982,8 @@ TCMallocGuard::TCMallocGuard() { ThreadCache::InitTSD(); tc_free(tc_malloc(1)); // Either we, or debugallocation.cc, or valgrind will control memory - // management. We register our extension if we're the winner. -#ifdef TCMALLOC_FOR_DEBUGALLOCATION + // management. We register our extension if we're the winner. +#ifdef TCMALLOC_USING_DEBUGALLOCATION // Let debugallocation register its extension. #else if (RunningOnValgrind()) { @@ -887,7 +1051,6 @@ static void* DoSampledAllocation(size_t size) { // Sampling failed because of lack of memory return span; } - *stack = tmp; span->sample = 1; span->objects = stack; @@ -896,6 +1059,8 @@ static void* DoSampledAllocation(size_t size) { return SpanToMallocResult(span); } +namespace { + // Copy of FLAGS_tcmalloc_large_alloc_report_threshold with // automatic increases factored in. static int64_t large_alloc_threshold = @@ -919,8 +1084,6 @@ static void ReportLargeAlloc(Length num_pages, void* result) { write(STDERR_FILENO, buffer, strlen(buffer)); } -namespace { - inline void* cpp_alloc(size_t size, bool nothrow); inline void* do_malloc(size_t size); @@ -944,7 +1107,7 @@ inline bool should_report_large(Length num_pages) { const int64 threshold = large_alloc_threshold; if (threshold > 0 && num_pages >= (threshold >> kPageShift)) { // Increase the threshold by 1/8 every time we generate a report. - // We cap the threshold at 8GB to avoid overflow problems. + // We cap the threshold at 8GiB to avoid overflow problems. large_alloc_threshold = (threshold + threshold/8 < 8ll<<30 ? threshold + threshold/8 : 8ll<<30); return true; @@ -959,6 +1122,7 @@ inline void* do_malloc_pages(ThreadCache* heap, size_t size) { Length num_pages = tcmalloc::pages(size); size = num_pages << kPageShift; + if ((FLAGS_tcmalloc_sample_parameter > 0) && heap->SampleAllocation(size)) { result = DoSampledAllocation(size); @@ -1064,9 +1228,9 @@ inline void do_free_with_callback(void* ptr, void (*invalid_free_fn)(void*)) { ASSERT(reinterpret_cast<uintptr_t>(ptr) % kPageSize == 0); ASSERT(span != NULL && span->start == p); if (span->sample) { + StackTrace* st = reinterpret_cast<StackTrace*>(span->objects); tcmalloc::DLL_Remove(span); - Static::stacktrace_allocator()->Delete( - reinterpret_cast<StackTrace*>(span->objects)); + Static::stacktrace_allocator()->Delete(st); span->objects = NULL; } Static::pageheap()->Delete(span); @@ -1156,11 +1320,11 @@ inline void* do_realloc(void* old_ptr, size_t new_size) { // For use by exported routines below that want specific alignments // -// Note: this code can be slow, and can significantly fragment memory. -// The expectation is that memalign/posix_memalign/valloc/pvalloc will -// not be invoked very often. This requirement simplifies our -// implementation and allows us to tune for expected allocation -// patterns. +// Note: this code can be slow for alignments > 16, and can +// significantly fragment memory. The expectation is that +// memalign/posix_memalign/valloc/pvalloc will not be invoked very +// often. This requirement simplifies our implementation and allows +// us to tune for expected allocation patterns. void* do_memalign(size_t align, size_t size) { ASSERT((align & (align - 1)) == 0); ASSERT(align > 0); @@ -1168,6 +1332,13 @@ void* do_memalign(size_t align, size_t size) { AddRoomForMark(&size); if (size + align < size) return NULL; // Overflow + // Fall back to malloc if we would already align this memory access properly. + if (align <= AlignmentForSize(size)) { + void* p = do_malloc(size); + ASSERT((reinterpret_cast<uintptr_t>(p) % align) == 0); + return p; + } + if (Static::pageheap() == NULL) ThreadCache::InitModule(); // Allocate at least one byte to avoid boundary conditions below @@ -1240,7 +1411,7 @@ inline int do_mallopt(int cmd, int value) { return 1; // Indicates error } -#ifdef HAVE_STRUCT_MALLINFO // mallinfo isn't defined on freebsd, for instance +#ifdef HAVE_STRUCT_MALLINFO inline struct mallinfo do_mallinfo() { TCMallocStats stats; ExtractStats(&stats, NULL); @@ -1266,7 +1437,7 @@ inline struct mallinfo do_mallinfo() { return info; } -#endif // #ifndef HAVE_STRUCT_MALLINFO +#endif // HAVE_STRUCT_MALLINFO static SpinLock set_new_handler_lock(SpinLock::LINKER_INITIALIZED); @@ -1390,6 +1561,19 @@ extern "C" PERFTOOLS_DLL_DECL const char* tc_version( return TC_VERSION_STRING; } +// This function behaves similarly to MSVC's _set_new_mode. +// If flag is 0 (default), calls to malloc will behave normally. +// If flag is 1, calls to malloc will behave like calls to new, +// and the std_new_handler will be invoked on failure. +// Returns the previous mode. +extern "C" PERFTOOLS_DLL_DECL int tc_set_new_mode(int flag) __THROW { + int old_mode = tc_new_mode; + tc_new_mode = flag; + return old_mode; +} + +#ifndef TCMALLOC_USING_DEBUGALLOCATION // debugallocation.cc defines its own + // CAVEAT: The code structure below ensures that MallocHook methods are always // called from the stack frame of the invoked allocation function. // heap-checker.cc depends on this to start a stack trace from @@ -1474,7 +1658,8 @@ extern "C" PERFTOOLS_DLL_DECL void* tc_newarray(size_t size) { return p; } -extern "C" PERFTOOLS_DLL_DECL void* tc_newarray_nothrow(size_t size, const std::nothrow_t&) __THROW { +extern "C" PERFTOOLS_DLL_DECL void* tc_newarray_nothrow(size_t size, const std::nothrow_t&) + __THROW { void* p = cpp_alloc(size, true); MallocHook::InvokeNewHook(p, size); return p; @@ -1551,21 +1736,10 @@ extern "C" PERFTOOLS_DLL_DECL struct mallinfo tc_mallinfo(void) __THROW { } #endif -extern "C" PERFTOOLS_DLL_DECL size_t tc_malloc_usable_size(void* ptr) __THROW { +extern "C" PERFTOOLS_DLL_DECL size_t tc_malloc_size(void* ptr) __THROW { return GetSizeWithCallback(ptr, &InvalidGetAllocatedSize); } -// This function behaves similarly to MSVC's _set_new_mode. -// If flag is 0 (default), calls to malloc will behave normally. -// If flag is 1, calls to malloc will behave like calls to new, -// and the std_new_handler will be invoked on failure. -// Returns the previous mode. -extern "C" PERFTOOLS_DLL_DECL int tc_set_new_mode(int flag) __THROW { - int old_mode = tc_new_mode; - tc_new_mode = flag; - return old_mode; -} - // Override __libc_memalign in libc on linux boxes specially. // They have a bug in libc that causes them to (very rarely) allocate @@ -1574,7 +1748,6 @@ extern "C" PERFTOOLS_DLL_DECL int tc_set_new_mode(int flag) __THROW { // This function is an exception to the rule of calling MallocHook method // from the stack frame of the allocation function; // heap-checker handles this special case explicitly. -#ifndef TCMALLOC_FOR_DEBUGALLOCATION static void *MemalignOverride(size_t align, size_t size, const void *caller) __THROW ATTRIBUTE_SECTION(google_malloc); @@ -1585,7 +1758,7 @@ static void *MemalignOverride(size_t align, size_t size, const void *caller) return result; } void *(*__memalign_hook)(size_t, size_t, const void *) = MemalignOverride; -#endif // #ifndef TCMALLOC_FOR_DEBUGALLOCATION +#endif // TCMALLOC_USING_DEBUGALLOCATION // ---Double free() debugging implementation ----------------------------------- // We will put a mark at the extreme end of each allocation block. We make diff --git a/third_party/tcmalloc/chromium/src/tests/current_allocated_bytes_test.cc b/third_party/tcmalloc/chromium/src/tests/current_allocated_bytes_test.cc new file mode 100644 index 0000000..8188e7b --- /dev/null +++ b/third_party/tcmalloc/chromium/src/tests/current_allocated_bytes_test.cc @@ -0,0 +1,63 @@ +// Copyright (c) 2011, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// --- +// +// Author: Craig Silverstein + +// This tests the accounting done by tcmalloc. When we allocate and +// free a small buffer, the number of bytes used by the application +// before the alloc+free should match the number of bytes used after. +// However, the internal data structures used by tcmalloc will be +// quite different -- new spans will have been allocated, etc. This +// is, thus, a simple test that we account properly for the internal +// data structures, so that we report the actual application-used +// bytes properly. + +#include "config_for_unittests.h" +#include <stdlib.h> +#include <stdio.h> +#include <google/malloc_extension.h> +#include "base/logging.h" + +const char kCurrent[] = "generic.current_allocated_bytes"; + +int main() { + // We don't do accounting right when using debugallocation.cc, so + // turn off the test then. TODO(csilvers): get this working too. +#ifdef NDEBUG + size_t before_bytes, after_bytes; + MallocExtension::instance()->GetNumericProperty(kCurrent, &before_bytes); + free(malloc(200)); + MallocExtension::instance()->GetNumericProperty(kCurrent, &after_bytes); + + CHECK_EQ(before_bytes, after_bytes); +#endif + printf("PASS\n"); + return 0; +} diff --git a/third_party/tcmalloc/chromium/src/tests/debugallocation_test.cc b/third_party/tcmalloc/chromium/src/tests/debugallocation_test.cc index c482187..07b8604 100644 --- a/third_party/tcmalloc/chromium/src/tests/debugallocation_test.cc +++ b/third_party/tcmalloc/chromium/src/tests/debugallocation_test.cc @@ -213,6 +213,17 @@ TEST(DebugAllocationTest, DanglingWriteAtExitTest) { *x = old_x_value; // restore x so that the test can exit successfully. } +TEST(DebugAllocationTest, StackTraceWithDanglingWriteAtExitTest) { + int *x = new int; + delete x; + int old_x_value = *x; + *x = 1; + // verify that we also get a stack trace when we have a dangling write. + // The " @ " is part of the stack trace output. + IF_DEBUG_EXPECT_DEATH(exit(0), " @ .*main"); + *x = old_x_value; // restore x so that the test can exit successfully. +} + static size_t CurrentlyAllocatedBytes() { size_t value; CHECK(MallocExtension::instance()->GetNumericProperty( @@ -259,27 +270,24 @@ TEST(DebugAllocationTest, GetAllocatedSizeTest) { } TEST(DebugAllocationTest, HugeAlloc) { - const size_t kTooBig = ~static_cast<size_t>(0); + // This must not be a const variable so it doesn't form an + // integral-constant-expression which can be *statically* rejected by the + // compiler as too large for the allocation. + size_t kTooBig = ~static_cast<size_t>(0); void* a = NULL; - char* b = NULL; #ifndef NDEBUG a = malloc(kTooBig); EXPECT_EQ(NULL, a); - b = NULL; - IF_DEBUG_EXPECT_DEATH(b = new char[kTooBig], - "Unable to allocate.*new\\[\\] failed\\."); - EXPECT_EQ(NULL, b); // kAlsoTooBig is small enough not to get caught by debugallocation's check, - // but will still fall through to tcmalloc's check. - const size_t kAlsoTooBig = kTooBig - 1024; + // but will still fall through to tcmalloc's check. This must also be + // a non-const variable. See kTooBig for more details. + size_t kAlsoTooBig = kTooBig - 1024; a = malloc(kAlsoTooBig); EXPECT_EQ(NULL, a); - IF_DEBUG_EXPECT_DEATH(b = new char[kAlsoTooBig], "Unable to allocate.*new failed"); - EXPECT_EQ(NULL, b); #endif } diff --git a/third_party/tcmalloc/chromium/src/tests/debugallocation_test.sh b/third_party/tcmalloc/chromium/src/tests/debugallocation_test.sh index 2568d54..faa6c79 100644 --- a/third_party/tcmalloc/chromium/src/tests/debugallocation_test.sh +++ b/third_party/tcmalloc/chromium/src/tests/debugallocation_test.sh @@ -52,21 +52,38 @@ num_failures=0 # Increments num_failures if the death test does not succeed. OneDeathTest() { "$DEBUGALLOCATION_TEST" "$1" 2>&1 | { - read regex_line - regex=`expr "$regex_line" : "Expected regex:\(.*\)"` - test -z "$regex" && echo "done" # no regex line, not a death-case - grep "$regex" >/dev/null 2>&1 # pass the rest of the lines through grep - } || num_failures=`expr $num_failures + 1` + regex_line='dummy' + # Normally the regex_line is the first line of output, but not + # always (if tcmalloc itself does any logging to stderr). + while test -n "$regex_line"; do + read regex_line + regex=`expr "$regex_line" : "Expected regex:\(.*\)"` + test -n "$regex" && break # found the regex line + done + test -z "$regex" && echo "done" || grep "$regex" 2>&1 + } } death_test_num=0 # which death test to run -while test -z `OneDeathTest "$death_test_num"`; do - echo "Done with death test $death_test_num" +while :; do # same as 'while true', but more portable + echo -n "Running death test $death_test_num..." + output="`OneDeathTest $death_test_num`" + case $output in + # Empty string means grep didn't find anything. + "") echo "FAILED"; num_failures=`expr $num_failures + 1`;; + "done"*) echo "done with death tests"; break;; + # Any other string means grep found something, like it ought to. + *) echo "OK";; + esac death_test_num=`expr $death_test_num + 1` done # Test the non-death parts of the test too -if ! "$DEBUGALLOCATION_TEST"; then +echo -n "Running non-death tests..." +if "$DEBUGALLOCATION_TEST"; then + echo "OK" +else + echo "FAILED" num_failures=`expr $num_failures + 1` fi diff --git a/third_party/tcmalloc/chromium/src/tests/frag_unittest.cc b/third_party/tcmalloc/chromium/src/tests/frag_unittest.cc index 08494b4..5ba02bd 100644 --- a/third_party/tcmalloc/chromium/src/tests/frag_unittest.cc +++ b/third_party/tcmalloc/chromium/src/tests/frag_unittest.cc @@ -44,13 +44,16 @@ #endif #include <vector> #include "base/logging.h" +#include "common.h" #include <google/malloc_extension.h> using std::vector; int main(int argc, char** argv) { - static const int kAllocSize = 36<<10; // Bigger than tcmalloc page size - static const int kTotalAlloc = 400 << 20; // Allocate 400MB in total + // Make kAllocSize one page larger than the maximum small object size. + static const int kAllocSize = kMaxSize + kPageSize; + // Allocate 400MB in total. + static const int kTotalAlloc = 400 << 20; static const int kAllocIterations = kTotalAlloc / kAllocSize; // Allocate lots of objects @@ -59,6 +62,11 @@ int main(int argc, char** argv) { saved[i] = new char[kAllocSize]; } + // Check the current "slack". + size_t slack_before; + MallocExtension::instance()->GetNumericProperty("tcmalloc.slack_bytes", + &slack_before); + // Free alternating ones to fragment heap size_t free_bytes = 0; for (int i = 0; i < saved.size(); i += 2) { @@ -66,10 +74,13 @@ int main(int argc, char** argv) { free_bytes += kAllocSize; } - // Check that slack is within 10% of expected - size_t slack; + // Check that slack delta is within 10% of expected. + size_t slack_after; MallocExtension::instance()->GetNumericProperty("tcmalloc.slack_bytes", - &slack); + &slack_after); + CHECK_GE(slack_after, slack_before); + size_t slack = slack_after - slack_before; + CHECK_GT(double(slack), 0.9*free_bytes); CHECK_LT(double(slack), 1.1*free_bytes); diff --git a/third_party/tcmalloc/chromium/src/tests/heap-checker_unittest.cc b/third_party/tcmalloc/chromium/src/tests/heap-checker_unittest.cc index 3f4e7f1..404c9f1 100644 --- a/third_party/tcmalloc/chromium/src/tests/heap-checker_unittest.cc +++ b/third_party/tcmalloc/chromium/src/tests/heap-checker_unittest.cc @@ -76,11 +76,6 @@ #include <sys/mman.h> #endif #include <fcntl.h> // for open(), close() -// FreeBSD has malloc.h, but complains if you use it -#if defined(HAVE_MALLOC_H) && !defined(__FreeBSD__) -#include <malloc.h> -#endif - #ifdef HAVE_EXECINFO_H #include <execinfo.h> // backtrace #endif @@ -91,15 +86,17 @@ #include <pwd.h> #endif +#include <algorithm> #include <iostream> // for cout #include <iomanip> // for hex -#include <set> -#include <map> #include <list> +#include <map> #include <memory> -#include <vector> +#include <set> #include <string> +#include <vector> +#include "base/commandlineflags.h" #include "base/googleinit.h" #include "base/logging.h" #include "base/commandlineflags.h" @@ -291,7 +288,8 @@ static void Use(T** foo) { // Arbitrary value, but not such that xor'ing with it is likely // to map one valid pointer to another valid pointer: -static const uintptr_t kHideMask = 0xF03A5F7B; +static const uintptr_t kHideMask = + static_cast<uintptr_t>(0xF03A5F7BF03A5F7BLL); // Helpers to hide a pointer from live data traversal. // We just xor the pointer so that (with high probability) @@ -683,7 +681,7 @@ static void ScopedDisabledLeaks() { HeapLeakChecker::Disabler disabler; AllocHidden(3 * sizeof(int)); TransLeaks(); - malloc(10); // Direct leak + (void)malloc(10); // Direct leak } // have different disabled leaks diff --git a/third_party/tcmalloc/chromium/src/tests/low_level_alloc_unittest.cc b/third_party/tcmalloc/chromium/src/tests/low_level_alloc_unittest.cc index f98f8a5..4228e12 100644 --- a/third_party/tcmalloc/chromium/src/tests/low_level_alloc_unittest.cc +++ b/third_party/tcmalloc/chromium/src/tests/low_level_alloc_unittest.cc @@ -146,17 +146,12 @@ static void Test(bool use_new_arena, bool call_malloc_hook, int n) { // used for counting allocates and frees static int32 allocates; static int32 frees; -static MallocHook::NewHook old_alloc_hook; -static MallocHook::DeleteHook old_free_hook; // called on each alloc if kCallMallocHook specified static void AllocHook(const void *p, size_t size) { if (using_low_level_alloc) { allocates++; } - if (old_alloc_hook != 0) { - (*old_alloc_hook)(p, size); - } } // called on each free if kCallMallocHook specified @@ -164,9 +159,6 @@ static void FreeHook(const void *p) { if (using_low_level_alloc) { frees++; } - if (old_free_hook != 0) { - (*old_free_hook)(p); - } } int main(int argc, char *argv[]) { @@ -177,8 +169,8 @@ int main(int argc, char *argv[]) { return 1; } - old_alloc_hook = MallocHook::SetNewHook(AllocHook); - old_free_hook = MallocHook::SetDeleteHook(FreeHook); + CHECK(MallocHook::AddNewHook(&AllocHook)); + CHECK(MallocHook::AddDeleteHook(&FreeHook)); CHECK_EQ(allocates, 0); CHECK_EQ(frees, 0); Test(false, false, 50000); @@ -198,7 +190,7 @@ int main(int argc, char *argv[]) { } } printf("\nPASS\n"); - CHECK_EQ(MallocHook::SetNewHook(old_alloc_hook), AllocHook); - CHECK_EQ(MallocHook::SetDeleteHook(old_free_hook), FreeHook); + CHECK(MallocHook::RemoveNewHook(&AllocHook)); + CHECK(MallocHook::RemoveDeleteHook(&FreeHook)); return 0; } diff --git a/third_party/tcmalloc/chromium/src/tests/malloc_extension_c_test.c b/third_party/tcmalloc/chromium/src/tests/malloc_extension_c_test.c index b6319a1..e384b76 100644 --- a/third_party/tcmalloc/chromium/src/tests/malloc_extension_c_test.c +++ b/third_party/tcmalloc/chromium/src/tests/malloc_extension_c_test.c @@ -72,8 +72,12 @@ void TestMallocHook(void) { } #endif - MallocHook_SetNewHook(&TestNewHook); - MallocHook_SetDeleteHook(&TestDeleteHook); + if (!MallocHook_AddNewHook(&TestNewHook)) { + FAIL("Failed to add new hook"); + } + if (!MallocHook_AddDeleteHook(&TestDeleteHook)) { + FAIL("Failed to add delete hook"); + } free(malloc(10)); free(malloc(20)); if (g_new_hook_calls != 2) { @@ -82,6 +86,12 @@ void TestMallocHook(void) { if (g_delete_hook_calls != 2) { FAIL("Wrong number of calls to the delete hook"); } + if (!MallocHook_RemoveNewHook(&TestNewHook)) { + FAIL("Failed to remove new hook"); + } + if (!MallocHook_RemoveDeleteHook(&TestDeleteHook)) { + FAIL("Failed to remove delete hook"); + } } void TestMallocExtension(void) { diff --git a/third_party/tcmalloc/chromium/src/tests/malloc_extension_test.cc b/third_party/tcmalloc/chromium/src/tests/malloc_extension_test.cc index ef76766..0bd85ad 100644 --- a/third_party/tcmalloc/chromium/src/tests/malloc_extension_test.cc +++ b/third_party/tcmalloc/chromium/src/tests/malloc_extension_test.cc @@ -39,6 +39,8 @@ #include <google/malloc_extension.h> #include <google/malloc_extension_c.h> +using STL_NAMESPACE::vector; + int main(int argc, char** argv) { void* a = malloc(1000); diff --git a/third_party/tcmalloc/chromium/src/tests/malloc_hook_test.cc b/third_party/tcmalloc/chromium/src/tests/malloc_hook_test.cc new file mode 100644 index 0000000..dc65b68 --- /dev/null +++ b/third_party/tcmalloc/chromium/src/tests/malloc_hook_test.cc @@ -0,0 +1,345 @@ +// Copyright (c) 2011, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// ---- +// Author: llib@google.com (Bill Clarke) + +#include "config_for_unittests.h" +#include <assert.h> +#include <stdio.h> +#ifdef HAVE_MMAP +#include <sys/mman.h> +#endif +#include <algorithm> +#include <string> +#include <vector> +#include <google/malloc_hook.h> +#include "malloc_hook-inl.h" +#include "base/logging.h" +#include "base/spinlock.h" +#include "base/sysinfo.h" +#include "tests/testutil.h" + +namespace { + +using std::string; +using std::vector; + +vector<void (*)()> g_testlist; // the tests to run + +#define TEST(a, b) \ + struct Test_##a##_##b { \ + Test_##a##_##b() { g_testlist.push_back(&Run); } \ + static void Run(); \ + }; \ + static Test_##a##_##b g_test_##a##_##b; \ + void Test_##a##_##b::Run() + + +static int RUN_ALL_TESTS() { + vector<void (*)()>::const_iterator it; + for (it = g_testlist.begin(); it != g_testlist.end(); ++it) { + (*it)(); // The test will error-exit if there's a problem. + } + fprintf(stderr, "\nPassed %d tests\n\nPASS\n", + static_cast<int>(g_testlist.size())); + return 0; +} + +using base::internal::kHookListMaxValues; + +// Since HookList is a template and is defined in malloc_hook.cc, we can only +// use an instantiation of it from malloc_hook.cc. We then reinterpret those +// values as integers for testing. +typedef base::internal::HookList<MallocHook::NewHook> TestHookList; + +int TestHookList_Traverse(const TestHookList& list, int* output_array, int n) { + MallocHook::NewHook values_as_hooks[kHookListMaxValues]; + int result = list.Traverse(values_as_hooks, std::min(n, kHookListMaxValues)); + for (int i = 0; i < result; ++i) { + output_array[i] = reinterpret_cast<const int&>(values_as_hooks[i]); + } + return result; +} + +bool TestHookList_Add(TestHookList* list, int val) { + return list->Add(reinterpret_cast<MallocHook::NewHook>(val)); +} + +bool TestHookList_Remove(TestHookList* list, int val) { + return list->Remove(reinterpret_cast<MallocHook::NewHook>(val)); +} + +// Note that this is almost the same as INIT_HOOK_LIST in malloc_hook.cc without +// the cast. +#define INIT_HOOK_LIST(initial_value) { 1, { initial_value } } + +TEST(HookListTest, InitialValueExists) { + TestHookList list = INIT_HOOK_LIST(69); + int values[2] = { 0, 0 }; + EXPECT_EQ(1, TestHookList_Traverse(list, values, 2)); + EXPECT_EQ(69, values[0]); + EXPECT_EQ(1, list.priv_end); +} + +TEST(HookListTest, CanRemoveInitialValue) { + TestHookList list = INIT_HOOK_LIST(69); + ASSERT_TRUE(TestHookList_Remove(&list, 69)); + EXPECT_EQ(0, list.priv_end); + + int values[2] = { 0, 0 }; + EXPECT_EQ(0, TestHookList_Traverse(list, values, 2)); +} + +TEST(HookListTest, AddAppends) { + TestHookList list = INIT_HOOK_LIST(69); + ASSERT_TRUE(TestHookList_Add(&list, 42)); + EXPECT_EQ(2, list.priv_end); + + int values[2] = { 0, 0 }; + EXPECT_EQ(2, TestHookList_Traverse(list, values, 2)); + EXPECT_EQ(69, values[0]); + EXPECT_EQ(42, values[1]); +} + +TEST(HookListTest, RemoveWorksAndWillClearSize) { + TestHookList list = INIT_HOOK_LIST(69); + ASSERT_TRUE(TestHookList_Add(&list, 42)); + + ASSERT_TRUE(TestHookList_Remove(&list, 69)); + EXPECT_EQ(2, list.priv_end); + + int values[2] = { 0, 0 }; + EXPECT_EQ(1, TestHookList_Traverse(list, values, 2)); + EXPECT_EQ(42, values[0]); + + ASSERT_TRUE(TestHookList_Remove(&list, 42)); + EXPECT_EQ(0, list.priv_end); + EXPECT_EQ(0, TestHookList_Traverse(list, values, 2)); +} + +TEST(HookListTest, AddPrependsAfterRemove) { + TestHookList list = INIT_HOOK_LIST(69); + ASSERT_TRUE(TestHookList_Add(&list, 42)); + + ASSERT_TRUE(TestHookList_Remove(&list, 69)); + EXPECT_EQ(2, list.priv_end); + + ASSERT_TRUE(TestHookList_Add(&list, 7)); + EXPECT_EQ(2, list.priv_end); + + int values[2] = { 0, 0 }; + EXPECT_EQ(2, TestHookList_Traverse(list, values, 2)); + EXPECT_EQ(7, values[0]); + EXPECT_EQ(42, values[1]); +} + +TEST(HookListTest, InvalidAddRejected) { + TestHookList list = INIT_HOOK_LIST(69); + EXPECT_FALSE(TestHookList_Add(&list, 0)); + + int values[2] = { 0, 0 }; + EXPECT_EQ(1, TestHookList_Traverse(list, values, 2)); + EXPECT_EQ(69, values[0]); + EXPECT_EQ(1, list.priv_end); +} + +TEST(HookListTest, FillUpTheList) { + TestHookList list = INIT_HOOK_LIST(69); + int num_inserts = 0; + while (TestHookList_Add(&list, ++num_inserts)) + ; + EXPECT_EQ(kHookListMaxValues, num_inserts); + EXPECT_EQ(kHookListMaxValues, list.priv_end); + + int values[kHookListMaxValues + 1]; + EXPECT_EQ(kHookListMaxValues, TestHookList_Traverse(list, values, + kHookListMaxValues)); + EXPECT_EQ(69, values[0]); + for (int i = 1; i < kHookListMaxValues; ++i) { + EXPECT_EQ(i, values[i]); + } +} + +void MultithreadedTestThread(TestHookList* list, int shift, + int thread_num) { + string message; + char buf[64]; + for (int i = 1; i < 1000; ++i) { + // In each loop, we insert a unique value, check it exists, remove it, and + // check it doesn't exist. We also record some stats to log at the end of + // each thread. Each insertion location and the length of the list is + // non-deterministic (except for the very first one, over all threads, and + // after the very last one the list should be empty). + int value = (i << shift) + thread_num; + EXPECT_TRUE(TestHookList_Add(list, value)); + sched_yield(); // Ensure some more interleaving. + int values[kHookListMaxValues + 1]; + int num_values = TestHookList_Traverse(*list, values, kHookListMaxValues); + EXPECT_LT(0, num_values); + int value_index; + for (value_index = 0; + value_index < num_values && values[value_index] != value; + ++value_index) + ; + EXPECT_LT(value_index, num_values); // Should have found value. + snprintf(buf, sizeof(buf), "[%d/%d; ", value_index, num_values); + message += buf; + sched_yield(); + EXPECT_TRUE(TestHookList_Remove(list, value)); + sched_yield(); + num_values = TestHookList_Traverse(*list, values, kHookListMaxValues); + for (value_index = 0; + value_index < num_values && values[value_index] != value; + ++value_index) + ; + EXPECT_EQ(value_index, num_values); // Should not have found value. + snprintf(buf, sizeof(buf), "%d]", num_values); + message += buf; + sched_yield(); + } + fprintf(stderr, "thread %d: %s\n", thread_num, message.c_str()); +} + +static volatile int num_threads_remaining; +static TestHookList list = INIT_HOOK_LIST(69); +static SpinLock threadcount_lock; + +void MultithreadedTestThreadRunner(int thread_num) { + // Wait for all threads to start running. + { + SpinLockHolder h(&threadcount_lock); + assert(num_threads_remaining > 0); + --num_threads_remaining; + + // We should use condvars and the like, but for this test, we'll + // go simple and busy-wait. + while (num_threads_remaining > 0) { + threadcount_lock.Unlock(); + SleepForMilliseconds(100); + threadcount_lock.Lock(); + } + } + + // shift is the smallest number such that (1<<shift) > kHookListMaxValues + int shift = 0; + for (int i = kHookListMaxValues; i > 0; i >>= 1) + shift += 1; + + MultithreadedTestThread(&list, shift, thread_num); +} + + +TEST(HookListTest, MultithreadedTest) { + ASSERT_TRUE(TestHookList_Remove(&list, 69)); + ASSERT_EQ(0, list.priv_end); + + // Run kHookListMaxValues thread, each running MultithreadedTestThread. + // First, we need to set up the rest of the globals. + num_threads_remaining = kHookListMaxValues; // a global var + RunManyThreadsWithId(&MultithreadedTestThreadRunner, num_threads_remaining, + 1 << 15); + + int values[kHookListMaxValues + 1]; + EXPECT_EQ(0, TestHookList_Traverse(list, values, kHookListMaxValues)); + EXPECT_EQ(0, list.priv_end); +} + +#ifdef HAVE_MMAP +int mmap_calls = 0; +int mmap_matching_calls = 0; +int munmap_calls = 0; +int munmap_matching_calls = 0; +const int kMmapMagicFd = 1; +void* const kMmapMagicPointer = reinterpret_cast<void*>(1); + +int MmapReplacement(const void* start, + size_t size, + int protection, + int flags, + int fd, + off_t offset, + void** result) { + ++mmap_calls; + if (fd == kMmapMagicFd) { + ++mmap_matching_calls; + *result = kMmapMagicPointer; + return true; + } + return false; +} + +int MunmapReplacement(const void* ptr, size_t size, int* result) { + ++munmap_calls; + if (ptr == kMmapMagicPointer) { + ++munmap_matching_calls; + *result = 0; + return true; + } + return false; +} + +TEST(MallocMookTest, MmapReplacements) { + mmap_calls = mmap_matching_calls = munmap_calls = munmap_matching_calls = 0; + MallocHook::SetMmapReplacement(&MmapReplacement); + MallocHook::SetMunmapReplacement(&MunmapReplacement); + EXPECT_EQ(kMmapMagicPointer, mmap(NULL, 1, PROT_READ, MAP_PRIVATE, + kMmapMagicFd, 0)); + EXPECT_EQ(1, mmap_matching_calls); + + char* ptr = reinterpret_cast<char*>( + mmap(NULL, 1, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0)); + EXPECT_EQ(2, mmap_calls); + EXPECT_EQ(1, mmap_matching_calls); + ASSERT_NE(MAP_FAILED, ptr); + *ptr = 'a'; + + EXPECT_EQ(0, munmap(kMmapMagicPointer, 1)); + EXPECT_EQ(1, munmap_calls); + EXPECT_EQ(1, munmap_matching_calls); + + EXPECT_EQ(0, munmap(ptr, 1)); + EXPECT_EQ(2, munmap_calls); + EXPECT_EQ(1, munmap_matching_calls); + + // The DEATH test below is flaky, because we've just munmapped the memory, + // making it available for mmap()ing again. There is no guarantee that it + // will stay unmapped, and in fact it gets reused ~10% of the time. + // It the area is reused, then not only we don't die, but we also corrupt + // whoever owns that memory now. + // EXPECT_DEATH(*ptr = 'a', "SIGSEGV"); +} +#endif // #ifdef HAVE_MMAN + +} // namespace + +int main(int argc, char** argv) { + return RUN_ALL_TESTS(); +} diff --git a/third_party/tcmalloc/chromium/src/tests/memalign_unittest.cc b/third_party/tcmalloc/chromium/src/tests/memalign_unittest.cc index d5b60db..b354bb4 100644 --- a/third_party/tcmalloc/chromium/src/tests/memalign_unittest.cc +++ b/third_party/tcmalloc/chromium/src/tests/memalign_unittest.cc @@ -49,6 +49,7 @@ #include <unistd.h> // for getpagesize() #endif #include "tcmalloc.h" // must come early, to pick up posix_memalign +#include <assert.h> #include <stdlib.h> // defines posix_memalign #include <stdio.h> // for the printf at the end #ifdef HAVE_STDINT_H @@ -57,8 +58,13 @@ #ifdef HAVE_UNISTD_H #include <unistd.h> // for getpagesize() #endif -#ifdef HAVE_MALLOC_H -#include <malloc.h> +// Malloc can be in several places on older versions of OS X. +#if defined(HAVE_MALLOC_H) +#include <malloc.h> // for memalign() and valloc() +#elif defined(HAVE_MALLOC_MALLOC_H) +#include <malloc/malloc.h> +#elif defined(HAVE_SYS_MALLOC_H) +#include <sys/malloc.h> #endif #include "base/basictypes.h" #include "base/logging.h" diff --git a/third_party/tcmalloc/chromium/src/tests/page_heap_test.cc b/third_party/tcmalloc/chromium/src/tests/page_heap_test.cc index 9120b78..9f5f3c87 100644 --- a/third_party/tcmalloc/chromium/src/tests/page_heap_test.cc +++ b/third_party/tcmalloc/chromium/src/tests/page_heap_test.cc @@ -1,11 +1,11 @@ // Copyright 2009 Google Inc. All Rights Reserved. // Author: fikes@google.com (Andrew Fikes) -#include <stdio.h> #include "config_for_unittests.h" +#include "page_heap.h" +#include <stdio.h> #include "base/logging.h" #include "common.h" -#include "page_heap.h" namespace { diff --git a/third_party/tcmalloc/chromium/src/tests/realloc_unittest.cc b/third_party/tcmalloc/chromium/src/tests/realloc_unittest.cc index 20edb50..4267421 100644 --- a/third_party/tcmalloc/chromium/src/tests/realloc_unittest.cc +++ b/third_party/tcmalloc/chromium/src/tests/realloc_unittest.cc @@ -33,13 +33,16 @@ // Test realloc() functionality #include "config_for_unittests.h" +#include <assert.h> // for assert #include <stdio.h> -#include <stdlib.h> -#include <algorithm> // for min() +#include <stddef.h> // for size_t, NULL +#include <stdlib.h> // for free, malloc, realloc +#include <algorithm> // for min #include "base/logging.h" using std::min; + // Fill a buffer of the specified size with a predetermined pattern static void Fill(unsigned char* buffer, int n) { for (int i = 0; i < n; i++) { diff --git a/third_party/tcmalloc/chromium/src/tests/sampler_test.cc b/third_party/tcmalloc/chromium/src/tests/sampler_test.cc index fca10ac..31c87cd 100644 --- a/third_party/tcmalloc/chromium/src/tests/sampler_test.cc +++ b/third_party/tcmalloc/chromium/src/tests/sampler_test.cc @@ -87,7 +87,7 @@ static std::string StringPrintf(const char* format, ...) { char buf[256]; // should be big enough for all logging va_list ap; va_start(ap, format); - vsnprintf(buf, sizeof(buf), format, ap); + perftools_vsnprintf(buf, sizeof(buf), format, ap); va_end(ap); return buf; } @@ -647,6 +647,11 @@ TEST(Sample, size_of_class) { LOG(INFO) << "Size of Sampler object is: " << sizeof(sampler); } +// Make sure sampling is enabled, or the tests won't work right. +DECLARE_int64(tcmalloc_sample_parameter); + int main(int argc, char **argv) { + if (FLAGS_tcmalloc_sample_parameter == 0) + FLAGS_tcmalloc_sample_parameter = 524288; return RUN_ALL_TESTS(); } diff --git a/third_party/tcmalloc/chromium/src/tests/sampling_test.cc b/third_party/tcmalloc/chromium/src/tests/sampling_test.cc index b75e70e..c1bd693 100644 --- a/third_party/tcmalloc/chromium/src/tests/sampling_test.cc +++ b/third_party/tcmalloc/chromium/src/tests/sampling_test.cc @@ -45,6 +45,8 @@ using std::string; +extern "C" void* AllocateAllocate() ATTRIBUTE_NOINLINE; + extern "C" void* AllocateAllocate() { // The VLOG's are mostly to discourage inlining VLOG(1, "Allocating some more"); diff --git a/third_party/tcmalloc/chromium/src/tests/sampling_test.sh b/third_party/tcmalloc/chromium/src/tests/sampling_test.sh index 8c96bc1..2a58426 100644 --- a/third_party/tcmalloc/chromium/src/tests/sampling_test.sh +++ b/third_party/tcmalloc/chromium/src/tests/sampling_test.sh @@ -81,13 +81,13 @@ mkdir "$OUTDIR" || die "Unable to create $OUTDIR" echo "Testing heap output..." "$PPROF" --text "$SAMPLING_TEST_BINARY" "$OUTDIR/out.heap" \ - | grep '^ *[5-9][0-9]\.[0-9][ 0-9.%]*_*AllocateAllocate' >/dev/null \ + | grep '[5-9][0-9]\.[0-9][ 0-9.%]*_*AllocateAllocate' >/dev/null \ || die "$PPROF" --text "$SAMPLING_TEST_BINARY" "$OUTDIR/out.heap" echo "OK" echo "Testing growth output..." "$PPROF" --text "$SAMPLING_TEST_BINARY" "$OUTDIR/out.growth" \ - | grep '^ *[5-9][0-9]\.[0-9][ 0-9.%]*_*AllocateAllocate' >/dev/null \ + | grep '[5-9][0-9]\.[0-9][ 0-9.%]*_*AllocateAllocate' >/dev/null \ || die "$PPROF" --text "$SAMPLING_TEST_BINARY" "$OUTDIR/out.growth" echo "OK" diff --git a/third_party/tcmalloc/chromium/src/tests/system-alloc_unittest.cc b/third_party/tcmalloc/chromium/src/tests/system-alloc_unittest.cc index a160a34..c006425 100644 --- a/third_party/tcmalloc/chromium/src/tests/system-alloc_unittest.cc +++ b/third_party/tcmalloc/chromium/src/tests/system-alloc_unittest.cc @@ -31,6 +31,7 @@ // Author: Arun Sharma #include "config_for_unittests.h" +#include "system-alloc.h" #include <stdio.h> #if defined HAVE_STDINT_H #include <stdint.h> // to get uintptr_t @@ -38,8 +39,10 @@ #include <inttypes.h> // another place uintptr_t might be defined #endif #include <sys/types.h> -#include "base/logging.h" -#include "system-alloc.h" +#include <algorithm> +#include "base/logging.h" // for Check_GEImpl, Check_LTImpl, etc +#include <google/malloc_extension.h> // for MallocExtension::instance +#include "common.h" // for kAddressBits class ArraySysAllocator : public SysAllocator { public: @@ -53,6 +56,11 @@ public: void* Alloc(size_t size, size_t *actual_size, size_t alignment) { invoked_ = true; + + if (size > kArraySize) { + return NULL; + } + void *result = &array_[ptr_]; uintptr_t ptr = reinterpret_cast<uintptr_t>(result); @@ -73,8 +81,9 @@ public: return reinterpret_cast<void *>(ptr); } - void DumpStats(TCMalloc_Printer* printer) { + void DumpStats() { } + void FlagsInitialized() {} private: static const int kArraySize = 8 * 1024 * 1024; @@ -87,7 +96,7 @@ const int ArraySysAllocator::kArraySize; ArraySysAllocator a; static void TestBasicInvoked() { - RegisterSystemAllocator(&a, 0); + MallocExtension::instance()->SetSystemAllocator(&a); // An allocation size that is likely to trigger the system allocator. // XXX: this is implementation specific. @@ -98,8 +107,31 @@ static void TestBasicInvoked() { CHECK(a.invoked_); } +#if 0 // could port this to various OSs, but won't bother for now +TEST(AddressBits, CpuVirtualBits) { + // Check that kAddressBits is as least as large as either the number of bits + // in a pointer or as the number of virtual bits handled by the processor. + // To be effective this test must be run on each processor model. + const int kPointerBits = 8 * sizeof(void*); + const int kImplementedVirtualBits = NumImplementedVirtualBits(); + + CHECK_GE(kAddressBits, min(kImplementedVirtualBits, kPointerBits)); +} +#endif + +static void TestBasicRetryFailTest() { + // Check with the allocator still works after a failed allocation. + void* p = malloc(1ULL << 50); // Asking for 1P ram + CHECK(p == NULL); + + char* q = new char[1024]; + CHECK(q != NULL); + delete [] q; +} + int main(int argc, char** argv) { TestBasicInvoked(); + TestBasicRetryFailTest(); printf("PASS\n"); return 0; diff --git a/third_party/tcmalloc/chromium/src/tests/tcmalloc_large_unittest.cc b/third_party/tcmalloc/chromium/src/tests/tcmalloc_large_unittest.cc index 260ac29..ad3482e 100644 --- a/third_party/tcmalloc/chromium/src/tests/tcmalloc_large_unittest.cc +++ b/third_party/tcmalloc/chromium/src/tests/tcmalloc_large_unittest.cc @@ -35,12 +35,12 @@ // For 32 bits, this means allocations near 2^32 bytes and 2^31 bytes. // For 64 bits, this means allocations near 2^64 bytes and 2^63 bytes. -#include <stddef.h> -#include <stdlib.h> +#include <stddef.h> // for size_t, NULL +#include <stdlib.h> // for malloc, free, realloc #include <stdio.h> -#include <set> +#include <set> // for set, etc -#include "base/logging.h" +#include "base/logging.h" // for operator<<, CHECK, etc using std::set; diff --git a/third_party/tcmalloc/chromium/src/tests/tcmalloc_unittest.cc b/third_party/tcmalloc/chromium/src/tests/tcmalloc_unittest.cc index 6b2ec26..b430460 100644 --- a/third_party/tcmalloc/chromium/src/tests/tcmalloc_unittest.cc +++ b/third_party/tcmalloc/chromium/src/tests/tcmalloc_unittest.cc @@ -100,17 +100,12 @@ # define cfree free // don't bother to try to test these obsolete fns # define valloc malloc # define pvalloc malloc -# ifdef PERFTOOLS_NO_ALIGNED_MALLOC -# define _aligned_malloc(size, alignment) malloc(size) -# else -# include <malloc.h> // for _aligned_malloc -# endif -# define memalign(alignment, size) _aligned_malloc(size, alignment) -// Assume if we fail, it's because of out-of-memory. -// Note, this isn't a perfect analogue: we don't enforce constraints on "align" +// I'd like to map posix_memalign to _aligned_malloc, but _aligned_malloc +// must be paired with _aligned_free (not normal free), which is too +// invasive a change to how we allocate memory here. So just bail # include <errno.h> -# define posix_memalign(pptr, align, size) \ - ((*(pptr)=_aligned_malloc(size, align)) ? 0 : ENOMEM) +# define memalign(alignment, size) malloc(size) +# define posix_memalign(pptr, align, size) ((*(pptr)=malloc(size)) ? 0 : ENOMEM) #endif // On systems (like freebsd) that don't define MAP_ANONYMOUS, use the old @@ -126,6 +121,7 @@ using std::string; DECLARE_double(tcmalloc_release_rate); DECLARE_int32(max_free_queue_size); // in debugallocation.cc +DECLARE_int64(tcmalloc_sample_parameter); namespace testing { @@ -559,6 +555,13 @@ static void TestCalloc(size_t n, size_t s, bool ok) { // direction doesn't cause us to allocate new memory. static void TestRealloc() { #ifndef DEBUGALLOCATION // debug alloc doesn't try to minimize reallocs + // When sampling, we always allocate in units of page-size, which + // makes reallocs of small sizes do extra work (thus, failing these + // checks). Since sampling is random, we turn off sampling to make + // sure that doesn't happen to us here. + const int64 old_sample_parameter = FLAGS_tcmalloc_sample_parameter; + FLAGS_tcmalloc_sample_parameter = 0; // turn off sampling + int start_sizes[] = { 100, 1000, 10000, 100000 }; int deltas[] = { 1, -2, 4, -8, 16, -32, 64, -128 }; @@ -566,7 +569,7 @@ static void TestRealloc() { void* p = malloc(start_sizes[s]); CHECK(p); // The larger the start-size, the larger the non-reallocing delta. - for (int d = 0; d < s*2; ++d) { + for (int d = 0; d < (s+1) * 2; ++d) { void* new_p = realloc(p, start_sizes[s] + deltas[d]); CHECK(p == new_p); // realloc should not allocate new memory } @@ -577,6 +580,7 @@ static void TestRealloc() { } free(p); } + FLAGS_tcmalloc_sample_parameter = old_sample_parameter; #endif } @@ -688,14 +692,13 @@ static void TestNothrowNew(void* (*func)(size_t, const std::nothrow_t&)) { CHECK_GT(g_##hook_type##_calls, 0); \ g_##hook_type##_calls = 0; /* reset for next call */ \ } \ - static MallocHook::hook_type g_old_##hook_type; \ static void Set##hook_type() { \ - g_old_##hook_type = MallocHook::Set##hook_type( \ - (MallocHook::hook_type)&IncrementCallsTo##hook_type); \ + CHECK(MallocHook::Add##hook_type( \ + (MallocHook::hook_type)&IncrementCallsTo##hook_type)); \ } \ static void Reset##hook_type() { \ - CHECK_EQ(MallocHook::Set##hook_type(g_old_##hook_type), \ - (MallocHook::hook_type)&IncrementCallsTo##hook_type); \ + CHECK(MallocHook::Remove##hook_type( \ + (MallocHook::hook_type)&IncrementCallsTo##hook_type)); \ } // We do one for each hook typedef in malloc_hook.h @@ -717,11 +720,9 @@ static void TestAlignmentForSize(int size) { CHECK((p % sizeof(double)) == 0); // Must have 16-byte alignment for large enough objects -#ifndef DEBUGALLOCATION // debug allocation doesn't need to align like this if (size >= 16) { CHECK((p % 16) == 0); } -#endif } for (int i = 0; i < kNum; i++) { free(ptrs[i]); @@ -763,7 +764,15 @@ static void RangeCallback(void* arg, const base::MallocRange* r) { RangeCallbackState* state = reinterpret_cast<RangeCallbackState*>(arg); if (state->ptr >= r->address && state->ptr < r->address + r->length) { - CHECK_EQ(r->type, state->expected_type); + if (state->expected_type == base::MallocRange::FREE) { + // We are expecting r->type == FREE, but ReleaseMemory + // may have already moved us to UNMAPPED state instead (this happens in + // approximately 0.1% of executions). Accept either state. + CHECK(r->type == base::MallocRange::FREE || + r->type == base::MallocRange::UNMAPPED); + } else { + CHECK_EQ(r->type, state->expected_type); + } CHECK_GE(r->length, state->min_size); state->matched = true; } @@ -869,7 +878,10 @@ static void TestReleaseToSystem() { #endif // #ifndef DEBUGALLOCATION } -bool g_no_memory = false; +// On MSVC10, in release mode, the optimizer convinces itself +// g_no_memory is never changed (I guess it doesn't realize OnNoMemory +// might be called). Work around this by setting the var volatile. +volatile bool g_no_memory = false; std::new_handler g_old_handler = NULL; static void OnNoMemory() { g_no_memory = true; @@ -997,70 +1009,108 @@ static int RunAllTests(int argc, char** argv) { SetDeleteHook(); // ditto void* p1 = malloc(10); + CHECK(p1 != NULL); // force use of this variable VerifyNewHookWasCalled(); + // Also test the non-standard tc_malloc_size + size_t actual_p1_size = tc_malloc_size(p1); + CHECK_GE(actual_p1_size, 10); + CHECK_LT(actual_p1_size, 100000); // a reasonable upper-bound, I think free(p1); VerifyDeleteHookWasCalled(); + p1 = calloc(10, 2); + CHECK(p1 != NULL); VerifyNewHookWasCalled(); - p1 = realloc(p1, 30); + // We make sure we realloc to a big size, since some systems (OS + // X) will notice if the realloced size continues to fit into the + // malloc-block and make this a noop if so. + p1 = realloc(p1, 30000); + CHECK(p1 != NULL); VerifyNewHookWasCalled(); VerifyDeleteHookWasCalled(); cfree(p1); // synonym for free VerifyDeleteHookWasCalled(); CHECK_EQ(posix_memalign(&p1, sizeof(p1), 40), 0); + CHECK(p1 != NULL); VerifyNewHookWasCalled(); free(p1); VerifyDeleteHookWasCalled(); p1 = memalign(sizeof(p1) * 2, 50); + CHECK(p1 != NULL); VerifyNewHookWasCalled(); free(p1); VerifyDeleteHookWasCalled(); + // Windows has _aligned_malloc. Let's test that that's captured too. +#if (defined(_MSC_VER) || defined(__MINGW32__)) && !defined(PERFTOOLS_NO_ALIGNED_MALLOC) + p1 = _aligned_malloc(sizeof(p1) * 2, 64); + VerifyNewHookWasCalled(); + _aligned_free(p1); + VerifyDeleteHookWasCalled(); +#endif + p1 = valloc(60); + CHECK(p1 != NULL); VerifyNewHookWasCalled(); free(p1); VerifyDeleteHookWasCalled(); p1 = pvalloc(70); + CHECK(p1 != NULL); VerifyNewHookWasCalled(); free(p1); VerifyDeleteHookWasCalled(); char* p2 = new char; + CHECK(p2 != NULL); VerifyNewHookWasCalled(); delete p2; VerifyDeleteHookWasCalled(); p2 = new char[100]; + CHECK(p2 != NULL); VerifyNewHookWasCalled(); delete[] p2; VerifyDeleteHookWasCalled(); p2 = new(std::nothrow) char; + CHECK(p2 != NULL); VerifyNewHookWasCalled(); delete p2; VerifyDeleteHookWasCalled(); p2 = new(std::nothrow) char[100]; + CHECK(p2 != NULL); VerifyNewHookWasCalled(); delete[] p2; VerifyDeleteHookWasCalled(); // Another way of calling operator new p2 = static_cast<char*>(::operator new(100)); + CHECK(p2 != NULL); VerifyNewHookWasCalled(); ::operator delete(p2); VerifyDeleteHookWasCalled(); // Try to call nothrow's delete too. Compilers use this. p2 = static_cast<char*>(::operator new(100, std::nothrow)); + CHECK(p2 != NULL); VerifyNewHookWasCalled(); ::operator delete(p2, std::nothrow); VerifyDeleteHookWasCalled(); + // Try strdup(), which the system allocates but we must free. If + // all goes well, libc will use our malloc! + p2 = strdup("test"); + CHECK(p2 != NULL); + VerifyNewHookWasCalled(); + free(p2); + VerifyDeleteHookWasCalled(); + + // Test mmap too: both anonymous mmap and mmap of a file // Note that for right now we only override mmap on linux // systems, so those are the only ones for which we check. @@ -1072,8 +1122,10 @@ static int RunAllTests(int argc, char** argv) { int size = 8192*2; p1 = mmap(NULL, size, PROT_WRITE|PROT_READ, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); + CHECK(p1 != NULL); VerifyMmapHookWasCalled(); p1 = mremap(p1, size, size/2, 0); + CHECK(p1 != NULL); VerifyMremapHookWasCalled(); size /= 2; munmap(p1, size); @@ -1082,6 +1134,7 @@ static int RunAllTests(int argc, char** argv) { int fd = open("/dev/zero", O_RDONLY); CHECK_GE(fd, 0); // make sure the open succeeded p1 = mmap(NULL, 8192, PROT_READ, MAP_SHARED, fd, 0); + CHECK(p1 != NULL); VerifyMmapHookWasCalled(); munmap(p1, 8192); VerifyMunmapHookWasCalled(); @@ -1100,11 +1153,14 @@ static int RunAllTests(int argc, char** argv) { #if defined(HAVE_SBRK) && defined(__linux) && \ (defined(__i386__) || defined(__x86_64__)) p1 = sbrk(8192); + CHECK(p1 != NULL); VerifySbrkHookWasCalled(); p1 = sbrk(-8192); + CHECK(p1 != NULL); VerifySbrkHookWasCalled(); // However, sbrk hook should *not* be called with sbrk(0) p1 = sbrk(0); + CHECK(p1 != NULL); CHECK_EQ(g_SbrkHook_calls, 0); #else // this is just to quiet the compiler: make sure all fns are called IncrementCallsToSbrkHook(); diff --git a/third_party/tcmalloc/chromium/src/tests/testutil.cc b/third_party/tcmalloc/chromium/src/tests/testutil.cc index f2b8592..745de99 100644 --- a/third_party/tcmalloc/chromium/src/tests/testutil.cc +++ b/third_party/tcmalloc/chromium/src/tests/testutil.cc @@ -80,7 +80,7 @@ struct FunctionAndId { int id; }; -#if defined(NO_THREADS) || !(defined(HAVE_PTHREADS) || defined(_WIN32)) +#if defined(NO_THREADS) || !(defined(HAVE_PTHREAD) || defined(_WIN32)) extern "C" void RunThread(void (*fn)()) { (*fn)(); diff --git a/third_party/tcmalloc/chromium/src/thread_cache.cc b/third_party/tcmalloc/chromium/src/thread_cache.cc index 64f4deb..b00e3b4 100644 --- a/third_party/tcmalloc/chromium/src/thread_cache.cc +++ b/third_party/tcmalloc/chromium/src/thread_cache.cc @@ -31,18 +31,20 @@ // Author: Ken Ashcraft <opensource@google.com> #include <config.h> -#ifdef HAVE_INTTYPES_H -#include <inttypes.h> -#endif -#include <algorithm> // for min and max #include "thread_cache.h" +#include <string.h> // for memcpy +#include <algorithm> // for max, min +#include "base/commandlineflags.h" // for SpinLockHolder +#include "base/spinlock.h" // for SpinLockHolder +#include "central_freelist.h" // for CentralFreeListPadded #include "maybe_threads.h" using std::min; using std::max; DEFINE_int64(tcmalloc_max_total_thread_cache_bytes, - EnvToInt64("TCMALLOC_MAX_TOTAL_THREAD_CACHE_BYTES", 16<<20), + EnvToInt64("TCMALLOC_MAX_TOTAL_THREAD_CACHE_BYTES", + kDefaultOverallThreadCacheSize), "Bound on the total amount of bytes allocated to " "thread caches. This bound is not strict, so it is possible " "for the cache to go over this bound in certain circumstances. "); @@ -311,16 +313,6 @@ void ThreadCache::InitTSD() { ASSERT(!tsd_inited_); perftools_pthread_key_create(&heap_key_, DestroyThreadCache); tsd_inited_ = true; - - // We may have used a fake pthread_t for the main thread. Fix it. - pthread_t zero; - memset(&zero, 0, sizeof(zero)); - SpinLockHolder h(Static::pageheap_lock()); - for (ThreadCache* h = thread_heaps_; h != NULL; h = h->next_) { - if (h->tid_ == zero) { - h->tid_ = pthread_self(); - } - } } ThreadCache* ThreadCache::CreateCacheIfNecessary() { @@ -328,14 +320,17 @@ ThreadCache* ThreadCache::CreateCacheIfNecessary() { ThreadCache* heap = NULL; { SpinLockHolder h(Static::pageheap_lock()); - - // Early on in glibc's life, we cannot even call pthread_self() - pthread_t me; - if (!tsd_inited_) { - memset(&me, 0, sizeof(me)); - } else { - me = pthread_self(); - } + // On very old libc's, this call may crash if it happens too + // early. No libc using NPTL should be affected. If there + // is a crash here, we could use code (on linux, at least) + // to detect NPTL vs LinuxThreads: + // http://www.redhat.com/archives/phil-list/2003-April/msg00038.html + // If we detect not-NPTL, we could execute the old code from + // http://google-perftools.googlecode.com/svn/tags/google-perftools-1.7/src/thread_cache.cc + // that avoids calling pthread_self too early. The problem with + // that code is it caused a race condition when tcmalloc is linked + // in statically and other libraries spawn threads before main. + const pthread_t me = pthread_self(); // This may be a recursive malloc call from pthread_setspecific() // In that case, the heap for this thread has already been created diff --git a/third_party/tcmalloc/chromium/src/thread_cache.h b/third_party/tcmalloc/chromium/src/thread_cache.h index 1165447..1742d5b 100644 --- a/third_party/tcmalloc/chromium/src/thread_cache.h +++ b/third_party/tcmalloc/chromium/src/thread_cache.h @@ -35,8 +35,13 @@ #include <config.h> #ifdef HAVE_PTHREAD -#include <pthread.h> +#include <pthread.h> // for pthread_t, pthread_key_t #endif +#include <stddef.h> // for size_t, NULL +#ifdef HAVE_STDINT_H +#include <stdint.h> // for uint32_t, uint64_t +#endif +#include <sys/types.h> // for ssize_t #include "common.h" #include "linked_list.h" #include "maybe_threads.h" @@ -44,6 +49,13 @@ #include "sampler.h" #include "static_vars.h" +#include "common.h" // for SizeMap, kMaxSize, etc +#include "internal_logging.h" // for ASSERT, etc +#include "linked_list.h" // for SLL_Pop, SLL_PopRange, etc +#include "page_heap_allocator.h" // for PageHeapAllocator +#include "sampler.h" // for Sampler +#include "static_vars.h" // for Static + namespace tcmalloc { // Even if we have support for thread-local storage in the compiler @@ -63,9 +75,6 @@ inline bool KernelSupportsTLS() { class ThreadCache { public: - // Default bound on the total amount of thread caches. - static const size_t kDefaultOverallThreadCacheSize = 16 << 20; - // All ThreadCache objects are kept in a linked list (for stats collection) ThreadCache* next_; ThreadCache* prev_; @@ -213,19 +222,6 @@ class ThreadCache { } }; - // The number of bytes one ThreadCache will steal from another when - // the first ThreadCache is forced to Scavenge(), delaying the - // next call to Scavenge for this thread. - static const size_t kStealAmount = 1 << 16; - - // Lower and upper bounds on the per-thread cache sizes - static const size_t kMinThreadCacheSize = kMaxSize * 2; //kStealAmount; - static const size_t kMaxThreadCacheSize = 2 << 20; - - // The number of times that a deallocation can cause a freelist to - // go over its max_length() before shrinking max_length(). - static const int kMaxOverages = 3; - // Gets and returns an object from the central cache, and, if possible, // also adds some objects of that size class to this thread cache. void* FetchFromCentralCache(size_t cl, size_t byte_size); diff --git a/third_party/tcmalloc/chromium/src/windows/config.h b/third_party/tcmalloc/chromium/src/windows/config.h index b5d9bb6..1d93c4f 100644 --- a/third_party/tcmalloc/chromium/src/windows/config.h +++ b/third_party/tcmalloc/chromium/src/windows/config.h @@ -92,7 +92,7 @@ #undef HAVE_LINUX_PTRACE_H /* Define to 1 if you have the <malloc.h> header file. */ -#undef HAVE_MALLOC_H +#define HAVE_MALLOC_H 1 /* Define to 1 if you have the <memory.h> header file. */ #undef HAVE_MEMORY_H @@ -136,6 +136,9 @@ /* Define to 1 if the system has the type `struct mallinfo'. */ #undef HAVE_STRUCT_MALLINFO +/* Define to 1 if you have the <sys/param.h> header file. */ +#undef HAVE_SYS_PARAM_H + /* Define to 1 if you have the <sys/prctl.h> header file. */ #undef HAVE_SYS_PRCTL_H @@ -154,7 +157,7 @@ /* Define to 1 if you have the <sys/types.h> header file. */ #define HAVE_SYS_TYPES_H 1 -/* Define to 1 if you have the <sys/ucontext.h> header file. */ +/* <sys/ucontext.h> is broken on redhat 7 */ #undef HAVE_SYS_UCONTEXT_H /* Define to 1 if you have the <sys/wait.h> header file. */ @@ -172,6 +175,9 @@ /* Define to 1 if you have the <unwind.h> header file. */ #undef HAVE_UNWIND_H +/* Define to 1 if you have the <valgrind.h> header file. */ +#undef HAVE_VALGRIND_H + /* define if your compiler has __attribute__ */ #undef HAVE___ATTRIBUTE__ @@ -187,6 +193,10 @@ /* Define to 1 if int32_t is equivalent to intptr_t */ #undef INT32_EQUALS_INTPTR +/* Define to the sub-directory in which libtool stores uninstalled libraries. + */ +#undef LT_OBJDIR + /* Define to 1 if your C compiler doesn't accept -c and -o together. */ #undef NO_MINUS_C_MINUS_O @@ -200,7 +210,7 @@ #define PACKAGE_NAME "google-perftools" /* Define to the full name and version of this package. */ -#define PACKAGE_STRING "google-perftools 1.4" +#define PACKAGE_STRING "google-perftools 1.7" /* Define to the one symbol short name of this package. */ #define PACKAGE_TARNAME "google-perftools" @@ -209,7 +219,7 @@ #undef PACKAGE_URL /* Define to the version of this package. */ -#define PACKAGE_VERSION "1.4" +#define PACKAGE_VERSION "1.7" /* How to access the PC from a struct ucontext */ #undef PC_FROM_UCONTEXT diff --git a/third_party/tcmalloc/chromium/src/windows/google/tcmalloc.h b/third_party/tcmalloc/chromium/src/windows/google/tcmalloc.h index 663b7f9..f6c17f5 100644 --- a/third_party/tcmalloc/chromium/src/windows/google/tcmalloc.h +++ b/third_party/tcmalloc/chromium/src/windows/google/tcmalloc.h @@ -35,12 +35,6 @@ #ifndef TCMALLOC_TCMALLOC_H_ #define TCMALLOC_TCMALLOC_H_ -// Define the version number so folks can check against it -#define TC_VERSION_MAJOR 1 -#define TC_VERSION_MINOR 4 -#define TC_VERSION_PATCH "" -#define TC_VERSION_STRING "google-perftools 1.4" - // __THROW is defined in glibc systems. It means, counter-intuitively, // "This function will never throw an exception." It's an optional // optimization tool, but we may need to use it to match glibc prototypes. @@ -48,6 +42,11 @@ # define __THROW /* __THROW is just an optimization, so ok to make it "" */ #endif +// Define the version number so folks can check against it +#define TC_VERSION_MAJOR 1 +#define TC_VERSION_MINOR 7 +#define TC_VERSION_PATCH "" +#define TC_VERSION_STRING "google-perftools 1.7" #include <stdlib.h> // for struct mallinfo, if it's defined @@ -90,6 +89,13 @@ extern "C" { PERFTOOLS_DLL_DECL struct mallinfo tc_mallinfo(void) __THROW; #endif + // This is an alias for MallocExtension::instance()->GetAllocatedSize(). + // It is equivalent to + // OS X: malloc_size() + // glibc: malloc_usable_size() + // Windows: _msize() + PERFTOOLS_DLL_DECL size_t tc_malloc_size(void* ptr) __THROW; + #ifdef __cplusplus PERFTOOLS_DLL_DECL int tc_set_new_mode(int flag) __THROW; PERFTOOLS_DLL_DECL void* tc_new(size_t size); diff --git a/third_party/tcmalloc/chromium/src/windows/google/tcmalloc.h.in b/third_party/tcmalloc/chromium/src/windows/google/tcmalloc.h.in new file mode 100644 index 0000000..a031b35 --- /dev/null +++ b/third_party/tcmalloc/chromium/src/windows/google/tcmalloc.h.in @@ -0,0 +1,116 @@ +/* Copyright (c) 2003, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * Author: Sanjay Ghemawat <opensource@google.com> + * .h.in file by Craig Silverstein <opensource@google.com> + */ + +#ifndef TCMALLOC_TCMALLOC_H_ +#define TCMALLOC_TCMALLOC_H_ + +// __THROW is defined in glibc systems. It means, counter-intuitively, +// "This function will never throw an exception." It's an optional +// optimization tool, but we may need to use it to match glibc prototypes. +#ifndef __THROW /* I guess we're not on a glibc system */ +# define __THROW /* __THROW is just an optimization, so ok to make it "" */ +#endif + +// Define the version number so folks can check against it +#define TC_VERSION_MAJOR @TC_VERSION_MAJOR@ +#define TC_VERSION_MINOR @TC_VERSION_MINOR@ +#define TC_VERSION_PATCH "@TC_VERSION_PATCH@" +#define TC_VERSION_STRING "google-perftools @TC_VERSION_MAJOR@.@TC_VERSION_MINOR@@TC_VERSION_PATCH@" + +#include <stdlib.h> // for struct mallinfo, if it's defined + +// Annoying stuff for windows -- makes sure clients can import these functions +#ifndef PERFTOOLS_DLL_DECL +# ifdef _WIN32 +# define PERFTOOLS_DLL_DECL __declspec(dllimport) +# else +# define PERFTOOLS_DLL_DECL +# endif +#endif + +#ifdef __cplusplus +#include <new> // for std::nothrow_t + +extern "C" { +#endif + // Returns a human-readable version string. If major, minor, + // and/or patch are not NULL, they are set to the major version, + // minor version, and patch-code (a string, usually ""). + PERFTOOLS_DLL_DECL const char* tc_version(int* major, int* minor, + const char** patch) __THROW; + + PERFTOOLS_DLL_DECL void* tc_malloc(size_t size) __THROW; + PERFTOOLS_DLL_DECL void tc_free(void* ptr) __THROW; + PERFTOOLS_DLL_DECL void* tc_realloc(void* ptr, size_t size) __THROW; + PERFTOOLS_DLL_DECL void* tc_calloc(size_t nmemb, size_t size) __THROW; + PERFTOOLS_DLL_DECL void tc_cfree(void* ptr) __THROW; + + PERFTOOLS_DLL_DECL void* tc_memalign(size_t __alignment, + size_t __size) __THROW; + PERFTOOLS_DLL_DECL int tc_posix_memalign(void** ptr, + size_t align, size_t size) __THROW; + PERFTOOLS_DLL_DECL void* tc_valloc(size_t __size) __THROW; + PERFTOOLS_DLL_DECL void* tc_pvalloc(size_t __size) __THROW; + + PERFTOOLS_DLL_DECL void tc_malloc_stats(void) __THROW; + PERFTOOLS_DLL_DECL int tc_mallopt(int cmd, int value) __THROW; +#if 0 + PERFTOOLS_DLL_DECL struct mallinfo tc_mallinfo(void) __THROW; +#endif + + // This is an alias for MallocExtension::instance()->GetAllocatedSize(). + // It is equivalent to + // OS X: malloc_size() + // glibc: malloc_usable_size() + // Windows: _msize() + PERFTOOLS_DLL_DECL size_t tc_malloc_size(void* ptr) __THROW; + +#ifdef __cplusplus + PERFTOOLS_DLL_DECL int tc_set_new_mode(int flag) __THROW; + PERFTOOLS_DLL_DECL void* tc_new(size_t size); + PERFTOOLS_DLL_DECL void* tc_new_nothrow(size_t size, + const std::nothrow_t&) __THROW; + PERFTOOLS_DLL_DECL void tc_delete(void* p) __THROW; + PERFTOOLS_DLL_DECL void tc_delete_nothrow(void* p, + const std::nothrow_t&) __THROW; + PERFTOOLS_DLL_DECL void* tc_newarray(size_t size); + PERFTOOLS_DLL_DECL void* tc_newarray_nothrow(size_t size, + const std::nothrow_t&) __THROW; + PERFTOOLS_DLL_DECL void tc_deletearray(void* p) __THROW; + PERFTOOLS_DLL_DECL void tc_deletearray_nothrow(void* p, + const std::nothrow_t&) __THROW; +} +#endif + +#endif // #ifndef TCMALLOC_TCMALLOC_H_ diff --git a/third_party/tcmalloc/chromium/src/windows/mingw.h b/third_party/tcmalloc/chromium/src/windows/mingw.h index e69b5da..747b285 100644 --- a/third_party/tcmalloc/chromium/src/windows/mingw.h +++ b/third_party/tcmalloc/chromium/src/windows/mingw.h @@ -45,10 +45,23 @@ # define PERFTOOLS_NO_ALIGNED_MALLOC 1 #endif +// This must be defined before the windows.h is included. We need at +// least 0x0400 for mutex.h to have access to TryLock, and at least +// 0x0501 for patch_functions.cc to have access to GetModuleHandleEx. +// (This latter is an optimization we could take out if need be.) +#ifndef _WIN32_WINNT +# define _WIN32_WINNT 0x0501 +#endif + #include "windows/port.h" #define HAVE_SNPRINTF 1 +// Some mingw distributions have a pthreads wrapper, but it doesn't +// work as well as native windows spinlocks (at least for us). So +// pretend the pthreads wrapper doesn't exist, even when it does. +#undef HAVE_PTHREAD + #endif /* __MINGW32__ */ #endif /* GOOGLE_PERFTOOLS_WINDOWS_MINGW_H_ */ diff --git a/third_party/tcmalloc/chromium/src/windows/patch_functions.cc b/third_party/tcmalloc/chromium/src/windows/patch_functions.cc index deb841b..f837e7a 100644 --- a/third_party/tcmalloc/chromium/src/windows/patch_functions.cc +++ b/third_party/tcmalloc/chromium/src/windows/patch_functions.cc @@ -122,6 +122,11 @@ const char kMangledDeleteArrayNothrow[] = "??_V@YAXPAXABUnothrow_t@std@@@Z"; extern "C" PERFTOOLS_DLL_DECL void _tcmalloc(); void _tcmalloc() { } +// This is the version needed for windows x64, which has a different +// decoration scheme which doesn't auto-add a leading underscore. +extern "C" PERFTOOLS_DLL_DECL void __tcmalloc(); +void __tcmalloc() { } + namespace { // most everything here is in an unnamed namespace typedef void (*GenericFnPtr)(); @@ -175,7 +180,7 @@ class LibcInfo { kNew, kNewArray, kDelete, kDeleteArray, kNewNothrow, kNewArrayNothrow, kDeleteNothrow, kDeleteArrayNothrow, // These are windows-only functions from malloc.h - k_Msize, k_Expand, k_Aligned_malloc, k_Aligned_free, + k_Msize, k_Expand, kNumFunctions }; @@ -274,12 +279,12 @@ template<int> class LibcInfoWithPatchFunctions : public LibcInfo { const std::nothrow_t&) __THROW; static size_t Perftools__msize(void *ptr) __THROW; static void* Perftools__expand(void *ptr, size_t size) __THROW; - static void* Perftools__aligned_malloc(size_t size, size_t alignment) __THROW; - static void Perftools__aligned_free(void *ptr) __THROW; // malloc.h also defines these functions: + // _aligned_malloc, _aligned_free, // _recalloc, _aligned_offset_malloc, _aligned_realloc, _aligned_recalloc // _aligned_offset_realloc, _aligned_offset_recalloc, _malloca, _freea // But they seem pretty obscure, and I'm fine not overriding them for now. + // It may be they all call into malloc/free anyway. }; // This is a subset of MODDULEENTRY32, that we need for patching. @@ -300,10 +305,19 @@ struct ModuleEntryCopy { ModuleEntryCopy(const MODULEINFO& mi) { this->modBaseAddr = mi.lpBaseOfDll; this->modBaseSize = mi.SizeOfImage; - for (int i = 0; i < sizeof(rgProcAddresses)/sizeof(*rgProcAddresses); i++) - rgProcAddresses[i] = (GenericFnPtr)::GetProcAddress( + LPVOID modEndAddr = (char*)mi.lpBaseOfDll + mi.SizeOfImage; + for (int i = 0; i < sizeof(rgProcAddresses)/sizeof(*rgProcAddresses); i++) { + FARPROC target = ::GetProcAddress( reinterpret_cast<const HMODULE>(mi.lpBaseOfDll), LibcInfo::function_name(i)); + // Sometimes a DLL forwards a function to a function in another + // DLL. We don't want to patch those forwarded functions -- + // they'll get patched when the other DLL is processed. + if (target >= modBaseAddr && target < modEndAddr) + rgProcAddresses[i] = (GenericFnPtr)target; + else + rgProcAddresses[i] = (GenericFnPtr)NULL; + } } }; @@ -390,7 +404,7 @@ const char* const LibcInfo::function_name_[] = { NULL, // kMangledNewArrayNothrow, NULL, // kMangledDeleteNothrow, NULL, // kMangledDeleteArrayNothrow, - "_msize", "_expand", "_aligned_malloc", "_aligned_free", + "_msize", "_expand", }; // For mingw, I can't patch the new/delete here, because the @@ -421,14 +435,6 @@ const GenericFnPtr LibcInfo::static_fn_[] = { #endif (GenericFnPtr)&::_msize, (GenericFnPtr)&::_expand, -#ifdef PERFTOOLS_NO_ALIGNED_MALLOC // for older versions of mingw - // _aligned_malloc isn't always available in mingw, so don't try to patch. - (GenericFnPtr)NULL, - (GenericFnPtr)NULL, -#else - (GenericFnPtr)&::_aligned_malloc, - (GenericFnPtr)&::_aligned_free, -#endif }; template<int T> GenericFnPtr LibcInfoWithPatchFunctions<T>::origstub_fn_[] = { @@ -451,8 +457,6 @@ const GenericFnPtr LibcInfoWithPatchFunctions<T>::perftools_fn_[] = { (GenericFnPtr)&Perftools_deletearray_nothrow, (GenericFnPtr)&Perftools__msize, (GenericFnPtr)&Perftools__expand, - (GenericFnPtr)&Perftools__aligned_malloc, - (GenericFnPtr)&Perftools__aligned_free, }; /*static*/ WindowsInfo::FunctionInfo WindowsInfo::function_info_[] = { @@ -908,21 +912,6 @@ void* LibcInfoWithPatchFunctions<T>::Perftools__expand(void *ptr, return NULL; } -template<int T> -void* LibcInfoWithPatchFunctions<T>::Perftools__aligned_malloc(size_t size, - size_t alignment) - __THROW { - void* result = do_memalign_or_cpp_memalign(alignment, size); - MallocHook::InvokeNewHook(result, size); - return result; -} - -template<int T> -void LibcInfoWithPatchFunctions<T>::Perftools__aligned_free(void *ptr) __THROW { - MallocHook::InvokeDeleteHook(ptr); - do_free_with_callback(ptr, (void (*)(void*))origstub_fn_[k_Aligned_free]); -} - LPVOID WINAPI WindowsInfo::Perftools_HeapAlloc(HANDLE hHeap, DWORD dwFlags, DWORD_PTR dwBytes) { LPVOID result = ((LPVOID (WINAPI *)(HANDLE, DWORD, DWORD_PTR)) diff --git a/third_party/tcmalloc/chromium/src/windows/port.cc b/third_party/tcmalloc/chromium/src/windows/port.cc index 59b0417..e77468c 100644 --- a/third_party/tcmalloc/chromium/src/windows/port.cc +++ b/third_party/tcmalloc/chromium/src/windows/port.cc @@ -35,6 +35,7 @@ # error You should only be including windows/port.cc in a windows environment! #endif +#define NOMINMAX // so std::max, below, compiles correctly #include <config.h> #include <string.h> // for strlen(), memset(), memcmp() #include <assert.h> @@ -43,29 +44,12 @@ #include "port.h" #include "base/logging.h" #include "base/spinlock.h" +#include "internal_logging.h" #include "system-alloc.h" // ----------------------------------------------------------------------- // Basic libraries -// These call the windows _vsnprintf, but always NUL-terminate. -int safe_vsnprintf(char *str, size_t size, const char *format, va_list ap) { - if (size == 0) // not even room for a \0? - return -1; // not what C99 says to do, but what windows does - str[size-1] = '\0'; - return _vsnprintf(str, size-1, format, ap); -} - -#ifndef HAVE_SNPRINTF -int snprintf(char *str, size_t size, const char *format, ...) { - va_list ap; - va_start(ap, format); - const int r = vsnprintf(str, size, format, ap); - va_end(ap); - return r; -} -#endif - int getpagesize() { static int pagesize = 0; if (pagesize == 0) { @@ -82,9 +66,22 @@ extern "C" PERFTOOLS_DLL_DECL void* __sbrk(std::ptrdiff_t increment) { return NULL; } +// We need to write to 'stderr' without having windows allocate memory. +// The safest way is via a low-level call like WriteConsoleA(). But +// even then we need to be sure to print in small bursts so as to not +// require memory allocation. +extern "C" PERFTOOLS_DLL_DECL void WriteToStderr(const char* buf, int len) { + // Looks like windows allocates for writes of >80 bytes + for (int i = 0; i < len; i += 80) { + write(STDERR_FILENO, buf + i, std::min(80, len - i)); + } +} + + // ----------------------------------------------------------------------- // Threads code +// Declared (not extern "C") in thread_cache.h bool CheckIfKernelSupportsTLS() { // TODO(csilvers): return true (all win's since win95, at least, support this) return false; @@ -105,9 +102,15 @@ bool CheckIfKernelSupportsTLS() { // Force a reference to p_thread_callback_tcmalloc and p_process_term_tcmalloc // to prevent whole program optimization from discarding the variables. #ifdef _MSC_VER +#if defined(_M_IX86) #pragma comment(linker, "/INCLUDE:__tls_used") #pragma comment(linker, "/INCLUDE:_p_thread_callback_tcmalloc") #pragma comment(linker, "/INCLUDE:_p_process_term_tcmalloc") +#elif defined(_M_X64) +#pragma comment(linker, "/INCLUDE:_tls_used") +#pragma comment(linker, "/INCLUDE:p_thread_callback_tcmalloc") +#pragma comment(linker, "/INCLUDE:p_process_term_tcmalloc") +#endif #endif // When destr_fn eventually runs, it's supposed to take as its @@ -173,7 +176,7 @@ BOOL WINAPI DllMain(HINSTANCE h, DWORD dwReason, PVOID pv) { #endif // #ifdef _MSC_VER -pthread_key_t PthreadKeyCreate(void (*destr_fn)(void*)) { +extern "C" pthread_key_t PthreadKeyCreate(void (*destr_fn)(void*)) { // Semantics are: we create a new key, and then promise to call // destr_fn with TlsGetValue(key) when the thread is destroyed // (as long as TlsGetValue(key) is not NULL). @@ -187,10 +190,38 @@ pthread_key_t PthreadKeyCreate(void (*destr_fn)(void*)) { return key; } +// NOTE: this is Win2K and later. For Win98 we could use a CRITICAL_SECTION... +extern "C" int perftools_pthread_once(pthread_once_t *once_control, + void (*init_routine)(void)) { + // Try for a fast path first. Note: this should be an acquire semantics read. + // It is on x86 and x64, where Windows runs. + if (*once_control != 1) { + while (true) { + switch (InterlockedCompareExchange(once_control, 2, 0)) { + case 0: + init_routine(); + InterlockedExchange(once_control, 1); + return 0; + case 1: + // The initializer has already been executed + return 0; + default: + // The initializer is being processed by another thread + SwitchToThread(); + } + } + } + return 0; +} + // ----------------------------------------------------------------------- // These functions replace system-alloc.cc +// The current system allocator. Because we don't link with system-alloc.cc, +// we need to define our own. +SysAllocator* sys_alloc = NULL; + // This is mostly like MmapSysAllocator::Alloc, except it does these weird // munmap's in the middle of the page, which is forbidden in windows. extern void* TCMalloc_SystemAlloc(size_t size, size_t *actual_size, diff --git a/third_party/tcmalloc/chromium/src/windows/port.h b/third_party/tcmalloc/chromium/src/windows/port.h index 66745d1..0faba01 100644 --- a/third_party/tcmalloc/chromium/src/windows/port.h +++ b/third_party/tcmalloc/chromium/src/windows/port.h @@ -40,8 +40,8 @@ #ifndef GOOGLE_BASE_WINDOWS_H_ #define GOOGLE_BASE_WINDOWS_H_ -// You should never include this file directly, but always include it -// from either config.h (MSVC) or mingw.h (MinGW/msys). +/* You should never include this file directly, but always include it + from either config.h (MSVC) or mingw.h (MinGW/msys). */ #if !defined(GOOGLE_PERFTOOLS_WINDOWS_CONFIG_H_) && \ !defined(GOOGLE_PERFTOOLS_WINDOWS_MINGW_H_) # error "port.h should only be included from config.h or mingw.h" @@ -54,21 +54,45 @@ #endif #include <windows.h> #include <io.h> /* because we so often use open/close/etc */ +#include <direct.h> /* for _getcwd */ #include <process.h> /* for _getpid */ +#include <limits.h> /* for PATH_MAX */ #include <stdarg.h> /* for va_list */ #include <stdio.h> /* need this to override stdio's (v)snprintf */ - -// 4018: signed/unsigned mismatch is common (and ok for signed_i < unsigned_i) -// 4244: otherwise we get problems when substracting two size_t's to an int -// 4288: VC++7 gets confused when a var is defined in a loop and then after it -// 4267: too many false positives for "conversion gives possible data loss" -// 4290: it's ok windows ignores the "throw" directive -// 4996: Yes, we're ok using "unsafe" functions like vsnprintf and getenv() +#include <sys/types.h> /* for _off_t */ +#include <assert.h> +#include <stdlib.h> /* for rand, srand, _strtoxxx */ + +/* + * 4018: signed/unsigned mismatch is common (and ok for signed_i < unsigned_i) + * 4244: otherwise we get problems when substracting two size_t's to an int + * 4288: VC++7 gets confused when a var is defined in a loop and then after it + * 4267: too many false positives for "conversion gives possible data loss" + * 4290: it's ok windows ignores the "throw" directive + * 4996: Yes, we're ok using "unsafe" functions like vsnprintf and getenv() + */ #ifdef _MSC_VER #pragma warning(disable:4018 4244 4288 4267 4290 4996) #endif -// ----------------------------------- BASIC TYPES +#ifndef __cplusplus +/* MSVC does not support C99 */ +# if !defined(__STDC_VERSION__) || __STDC_VERSION__ < 199901L +# ifdef _MSC_VER +# define inline __inline +# else +# define inline static +# endif +# endif +#endif + +#ifdef __cplusplus +# define EXTERN_C extern "C" +#else +# define EXTERN_C extern +#endif + +/* ----------------------------------- BASIC TYPES */ #ifndef HAVE_STDINT_H #ifndef HAVE___INT64 /* we need to have all the __intX names */ @@ -83,53 +107,78 @@ typedef unsigned __int8 uint8_t; typedef unsigned __int16 uint16_t; typedef unsigned __int32 uint32_t; typedef unsigned __int64 uint64_t; -#endif // #ifndef HAVE_STDINT_H +#endif /* #ifndef HAVE_STDINT_H */ -// I guess MSVC's <types.h> doesn't include ssize_t by default? +/* I guess MSVC's <types.h> doesn't include ssize_t by default? */ #ifdef _MSC_VER typedef intptr_t ssize_t; #endif -// ----------------------------------- THREADS +/* ----------------------------------- THREADS */ -#ifndef HAVE_PTHREAD // not true for MSVC, but may be true for MSYS +#ifndef HAVE_PTHREAD /* not true for MSVC, but may be true for MSYS */ typedef DWORD pthread_t; typedef DWORD pthread_key_t; typedef LONG pthread_once_t; -enum { PTHREAD_ONCE_INIT = 0 }; // important that this be 0! for SpinLock -#define pthread_self GetCurrentThreadId -#define pthread_equal(pthread_t_1, pthread_t_2) ((pthread_t_1)==(pthread_t_2)) +enum { PTHREAD_ONCE_INIT = 0 }; /* important that this be 0! for SpinLock */ + +inline pthread_t pthread_self(void) { + return GetCurrentThreadId(); +} #ifdef __cplusplus -// This replaces maybe_threads.{h,cc} -extern pthread_key_t PthreadKeyCreate(void (*destr_fn)(void*)); // in port.cc -#define perftools_pthread_key_create(pkey, destr_fn) \ - *(pkey) = PthreadKeyCreate(destr_fn) +inline bool pthread_equal(pthread_t left, pthread_t right) { + return left == right; +} + +/* This replaces maybe_threads.{h,cc} */ +EXTERN_C pthread_key_t PthreadKeyCreate(void (*destr_fn)(void*)); /* port.cc */ + +inline int perftools_pthread_key_create(pthread_key_t *pkey, + void (*destructor)(void*)) { + pthread_key_t key = PthreadKeyCreate(destructor); + if (key != TLS_OUT_OF_INDEXES) { + *(pkey) = key; + return 0; + } else { + return GetLastError(); + } +} + inline void* perftools_pthread_getspecific(DWORD key) { DWORD err = GetLastError(); void* rv = TlsGetValue(key); if (err) SetLastError(err); return rv; } -#define perftools_pthread_setspecific(key, val) \ - TlsSetValue((key), (val)) -// NOTE: this is Win2K and later. For Win98 we could use a CRITICAL_SECTION... -#define perftools_pthread_once(once, init) do { \ - if (InterlockedCompareExchange(once, 1, 0) == 0) (init)(); \ -} while (0) -#endif // __cplusplus -#endif // HAVE_PTHREAD - -// __declspec(thread) isn't usable in a dll opened via LoadLibrary(). -// But it doesn't work to LoadLibrary() us anyway, because of all the -// things we need to do before main()! So this kind of TLS is safe for us. + +inline int perftools_pthread_setspecific(pthread_key_t key, const void *value) { + if (TlsSetValue(key, (LPVOID)value)) + return 0; + else + return GetLastError(); +} + +EXTERN_C int perftools_pthread_once(pthread_once_t *once_control, + void (*init_routine)(void)); + +#endif /* __cplusplus */ +#endif /* HAVE_PTHREAD */ + +/* + * __declspec(thread) isn't usable in a dll opened via LoadLibrary(). + * But it doesn't work to LoadLibrary() us anyway, because of all the + * things we need to do before main()! So this kind of TLS is safe for us. + */ #define __thread __declspec(thread) -// This code is obsolete, but I keep it around in case we are ever in -// an environment where we can't or don't want to use google spinlocks -// (from base/spinlock.{h,cc}). In that case, uncommenting this out, -// and removing spinlock.cc from the build, should be enough to revert -// back to using native spinlocks. +/* + * This code is obsolete, but I keep it around in case we are ever in + * an environment where we can't or don't want to use google spinlocks + * (from base/spinlock.{h,cc}). In that case, uncommenting this out, + * and removing spinlock.cc from the build, should be enough to revert + * back to using native spinlocks. + */ #if 0 // Windows uses a spinlock internally for its mutexes, making our life easy! // However, the Windows spinlock must always be initialized, making life hard, @@ -197,51 +246,80 @@ class SpinLockHolder { // Acquires a spinlock for as long as the scope lasts // This keeps us from using base/spinlock.h's implementation of SpinLock. #define BASE_SPINLOCK_H_ 1 -#endif // #if 0 - -// This replaces testutil.{h,cc} -extern PERFTOOLS_DLL_DECL void RunInThread(void (*fn)()); -extern PERFTOOLS_DLL_DECL void RunManyInThread(void (*fn)(), int count); -extern PERFTOOLS_DLL_DECL void RunManyInThreadWithId(void (*fn)(int), int count, - int stacksize); +#endif /* #if 0 */ +/* ----------------------------------- MMAP and other memory allocation */ -// ----------------------------------- MMAP and other memory allocation - -#ifndef HAVE_MMAP // not true for MSVC, but may be true for msys +#ifndef HAVE_MMAP /* not true for MSVC, but may be true for msys */ #define MAP_FAILED 0 -#define MREMAP_FIXED 2 // the value in linux, though it doesn't really matter -// These, when combined with the mmap invariants below, yield the proper action +#define MREMAP_FIXED 2 /* the value in linux, though it doesn't really matter */ +/* These, when combined with the mmap invariants below, yield the proper action */ #define PROT_READ PAGE_READWRITE #define PROT_WRITE PAGE_READWRITE #define MAP_ANONYMOUS MEM_RESERVE #define MAP_PRIVATE MEM_COMMIT -#define MAP_SHARED MEM_RESERVE // value of this #define is 100% arbitrary +#define MAP_SHARED MEM_RESERVE /* value of this #define is 100% arbitrary */ + +#if __STDC__ +typedef _off_t off_t; +#endif + +/* VirtualAlloc only replaces for mmap when certain invariants are kept. */ +inline void *mmap(void *addr, size_t length, int prot, int flags, + int fd, off_t offset) { + if (addr == NULL && fd == -1 && offset == 0 && + prot == (PROT_READ|PROT_WRITE) && flags == (MAP_PRIVATE|MAP_ANONYMOUS)) { + return VirtualAlloc(0, length, MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE); + } else { + return NULL; + } +} -// VirtualAlloc is only a replacement for mmap when certain invariants are kept -#define mmap(start, length, prot, flags, fd, offset) \ - ( (start) == NULL && (fd) == -1 && (offset) == 0 && \ - (prot) == (PROT_READ|PROT_WRITE) && (flags) == (MAP_PRIVATE|MAP_ANONYMOUS)\ - ? VirtualAlloc(0, length, MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE) \ - : NULL ) +inline int munmap(void *addr, size_t length) { + return VirtualFree(addr, 0, MEM_RELEASE) ? 0 : -1; +} +#endif /* HAVE_MMAP */ -#define munmap(start, length) (VirtualFree(start, 0, MEM_RELEASE) ? 0 : -1) -#endif // HAVE_MMAP +/* We could maybe use VirtualAlloc for sbrk as well, but no need */ +inline void *sbrk(intptr_t increment) { + // sbrk returns -1 on failure + return (void*)-1; +} -// We could maybe use VirtualAlloc for sbrk as well, but no need -#define sbrk(increment) ( (void*)-1 ) // sbrk returns -1 on failure +/* ----------------------------------- STRING ROUTINES */ -// ----------------------------------- STRING ROUTINES +/* + * We can't just use _vsnprintf and _snprintf as drop-in-replacements, + * because they don't always NUL-terminate. :-( We also can't use the + * name vsnprintf, since windows defines that (but not snprintf (!)). + */ +#if defined(_MSC_VER) && _MSC_VER >= 1400 +/* We can use safe CRT functions, which the required functionality */ +inline int perftools_vsnprintf(char *str, size_t size, const char *format, + va_list ap) { + return vsnprintf_s(str, size, _TRUNCATE, format, ap); +} +#else +inline int perftools_vsnprintf(char *str, size_t size, const char *format, + va_list ap) { + if (size == 0) /* not even room for a \0? */ + return -1; /* not what C99 says to do, but what windows does */ + str[size-1] = '\0'; + return _vsnprintf(str, size-1, format, ap); +} +#endif -// We can't just use _vsnprintf and _snprintf as drop-in-replacements, -// because they don't always NUL-terminate. :-( We also can't use the -// name vsnprintf, since windows defines that (but not snprintf (!)). -extern PERFTOOLS_DLL_DECL int snprintf(char *str, size_t size, - const char *format, ...); -extern PERFTOOLS_DLL_DECL int safe_vsnprintf(char *str, size_t size, - const char *format, va_list ap); -#define vsnprintf(str, size, format, ap) safe_vsnprintf(str, size, format, ap) +#ifndef HAVE_SNPRINTF +inline int snprintf(char *str, size_t size, const char *format, ...) { + va_list ap; + int r; + va_start(ap, format); + r = perftools_vsnprintf(str, size, format, ap); + va_end(ap); + return r; +} +#endif #define PRIx64 "I64x" #define SCNx64 "I64x" @@ -256,79 +334,132 @@ extern PERFTOOLS_DLL_DECL int safe_vsnprintf(char *str, size_t size, # define PRIxPTR "lx" #endif -// ----------------------------------- FILE IO +/* ----------------------------------- FILE IO */ + #ifndef PATH_MAX #define PATH_MAX 1024 #endif #ifndef __MINGW32__ enum { STDIN_FILENO = 0, STDOUT_FILENO = 1, STDERR_FILENO = 2 }; #endif -#define getcwd _getcwd -#define access _access -#define open _open -#define read _read -#define write _write -#define lseek _lseek -#define close _close -#define popen _popen -#define pclose _pclose -#define mkdir(dirname, mode) _mkdir(dirname) #ifndef O_RDONLY #define O_RDONLY _O_RDONLY #endif -// ----------------------------------- SYSTEM/PROCESS -typedef int pid_t; -#define getpid _getpid -#define getppid() (0) +#if __STDC__ && !defined(__MINGW32__) +/* These functions are considered non-standard */ +inline int access(const char *pathname, int mode) { + return _access(pathname, mode); +} +inline int open(const char *pathname, int flags, int mode = 0) { + return _open(pathname, flags, mode); +} +inline int close(int fd) { + return _close(fd); +} +inline ssize_t read(int fd, void *buf, size_t count) { + return _read(fd, buf, count); +} +inline ssize_t write(int fd, const void *buf, size_t count) { + return _write(fd, buf, count); +} +inline off_t lseek(int fd, off_t offset, int whence) { + return _lseek(fd, offset, whence); +} +inline char *getcwd(char *buf, size_t size) { + return _getcwd(buf, size); +} +inline int mkdir(const char *pathname, int) { + return _mkdir(pathname); +} +#endif -// Handle case when poll is used to simulate sleep. -#define poll(r, w, t) \ - do { \ - assert(r == 0); \ - assert(w == 0); \ - Sleep(t); \ - } while(0) +inline FILE *popen(const char *command, const char *type) { + return _popen(command, type); +} +inline int pclose(FILE *stream) { + return _pclose(stream); +} + +EXTERN_C PERFTOOLS_DLL_DECL void WriteToStderr(const char* buf, int len); + +/* ----------------------------------- SYSTEM/PROCESS */ + +typedef int pid_t; +#if __STDC__ +inline pid_t getpid(void) { return _getpid(); } +#endif +inline pid_t getppid(void) { return 0; } + +/* Handle case when poll is used to simulate sleep. */ +inline int poll(struct pollfd* fds, int nfds, int timeout) { + assert(fds == NULL); + assert(nfds == 0); + Sleep(timeout); + return 0; +} -extern PERFTOOLS_DLL_DECL int getpagesize(); // in port.cc +EXTERN_C int getpagesize(); /* in port.cc */ -// ----------------------------------- OTHER +/* ----------------------------------- OTHER */ -#define srandom srand -#define random rand -#define sleep(t) Sleep(t * 1000) +inline void srandom(unsigned int seed) { srand(seed); } +inline long random(void) { return rand(); } +inline unsigned int sleep(unsigned int seconds) { + Sleep(seconds * 1000); + return 0; +} struct timespec { int tv_sec; int tv_nsec; }; -#define nanosleep(tm_ptr, ignored) \ - Sleep((tm_ptr)->tv_sec * 1000 + (tm_ptr)->tv_nsec / 1000000) +inline int nanosleep(const struct timespec *req, struct timespec *rem) { + Sleep(req->tv_sec * 1000 + req->tv_nsec / 1000000); + return 0; +} #ifndef __MINGW32__ -#define strtoq _strtoi64 -#define strtouq _strtoui64 -#define strtoll _strtoi64 -#define strtoull _strtoui64 -#define atoll _atoi64 +inline long long int strtoll(const char *nptr, char **endptr, int base) { + return _strtoi64(nptr, endptr, base); +} +inline unsigned long long int strtoull(const char *nptr, char **endptr, + int base) { + return _strtoui64(nptr, endptr, base); +} +inline long long int strtoq(const char *nptr, char **endptr, int base) { + return _strtoi64(nptr, endptr, base); +} +inline unsigned long long int strtouq(const char *nptr, char **endptr, + int base) { + return _strtoui64(nptr, endptr, base); +} +inline long long atoll(const char *nptr) { + return _atoi64(nptr); +} #endif #define __THROW throw() -// ----------------------------------- TCMALLOC-SPECIFIC +/* ----------------------------------- TCMALLOC-SPECIFIC */ -// tcmalloc.cc calls this so we can patch VirtualAlloc() et al. -extern PERFTOOLS_DLL_DECL void PatchWindowsFunctions(); +/* tcmalloc.cc calls this so we can patch VirtualAlloc() et al. */ +extern void PatchWindowsFunctions(); // ----------------------------------- BUILD-SPECIFIC -// windows/port.h defines compatibility APIs for several .h files, which -// we therefore shouldn't be #including directly. This hack keeps us from -// doing so. TODO(csilvers): do something more principled. +/* + * windows/port.h defines compatibility APIs for several .h files, which + * we therefore shouldn't be #including directly. This hack keeps us from + * doing so. TODO(csilvers): do something more principled. + */ #define GOOGLE_MAYBE_THREADS_H_ 1 #endif /* _WIN32 */ +#undef inline +#undef EXTERN_C + #endif /* GOOGLE_BASE_WINDOWS_H_ */ |