Update the tcmalloc vendor branch to r144 (gperftools 2.0), and README.chromium.

It's a retry of r126715 with updating README.chromium. The major reason for us to enable a fix for HEAP_PROFILE_MMAP. Please note that this change doesn't affect Chromium builds. BUG=114302 TEST=none Review URL: https://chromiumcodereview.appspot.com/9702045 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@126770 0039d316-1c4b-4281-b951-d872f2087c98
author: dmikurube@chromium.org <dmikurube@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2012-03-14 22:31:22 +0000
committer: dmikurube@chromium.org <dmikurube@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2012-03-14 22:31:22 +0000
commit: e6619c2e162540a42b44602d9ca15c15afd1bb63 (patch)
tree: 90a53114df5ab8fb4afdef1ee9324a3e903bc84e /third_party/tcmalloc/vendor/src
parent: 449019cafa2e658d0765038a4d5d4e06d781e0ad (diff)
download: chromium_src-e6619c2e162540a42b44602d9ca15c15afd1bb63.zip
chromium_src-e6619c2e162540a42b44602d9ca15c15afd1bb63.tar.gz
chromium_src-e6619c2e162540a42b44602d9ca15c15afd1bb63.tar.bz2
131 files changed, 5892 insertions, 4130 deletions
diff --git a/third_party/tcmalloc/vendor/src/base/atomicops-internals-arm-gcc.h b/third_party/tcmalloc/vendor/src/base/atomicops-internals-arm-gcc.h
deleted file mode 100644
index 423e993..0000000
--- a/third_party/tcmalloc/vendor/src/base/atomicops-internals-arm-gcc.h
+++ /dev/null
@@ -1,234 +0,0 @@
-/* Copyright (c) 2010, Google Inc.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- *
- *     * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following disclaimer
- * in the documentation and/or other materials provided with the
- * distribution.
- *     * Neither the name of Google Inc. nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * ---
- * Author: Lei Zhang, Sasha Levitskiy
- */
-
-// This file is an internal atomic implementation, use base/atomicops.h instead.
-//
-// LinuxKernelCmpxchg and Barrier_AtomicIncrement are from Google Gears.
-
-#ifndef BASE_ATOMICOPS_INTERNALS_ARM_GCC_H_
-#define BASE_ATOMICOPS_INTERNALS_ARM_GCC_H_
-
-#include <stdio.h>
-#include "base/basictypes.h"  // For COMPILE_ASSERT
-
-typedef int32_t Atomic32;
-
-namespace base {
-namespace subtle {
-
-typedef int64_t Atomic64;
-
-// 0xffff0fc0 is the hard coded address of a function provided by
-// the kernel which implements an atomic compare-exchange. On older
-// ARM architecture revisions (pre-v6) this may be implemented using
-// a syscall. This address is stable, and in active use (hard coded)
-// by at least glibc-2.7 and the Android C library.
-// pLinuxKernelCmpxchg has both acquire and release barrier sematincs.
-typedef Atomic32 (*LinuxKernelCmpxchgFunc)(Atomic32 old_value,
-                                           Atomic32 new_value,
-                                           volatile Atomic32* ptr);
-LinuxKernelCmpxchgFunc pLinuxKernelCmpxchg __attribute__((weak)) =
-    (LinuxKernelCmpxchgFunc) 0xffff0fc0;
-
-typedef void (*LinuxKernelMemoryBarrierFunc)(void);
-LinuxKernelMemoryBarrierFunc pLinuxKernelMemoryBarrier __attribute__((weak)) =
-    (LinuxKernelMemoryBarrierFunc) 0xffff0fa0;
-
-
-inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr,
-                                         Atomic32 old_value,
-                                         Atomic32 new_value) {
-  Atomic32 prev_value = *ptr;
-  do {
-    if (!pLinuxKernelCmpxchg(old_value, new_value,
-                             const_cast<Atomic32*>(ptr))) {
-      return old_value;
-    }
-    prev_value = *ptr;
-  } while (prev_value == old_value);
-  return prev_value;
-}
-
-inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr,
-                                         Atomic32 new_value) {
-  Atomic32 old_value;
-  do {
-    old_value = *ptr;
-  } while (pLinuxKernelCmpxchg(old_value, new_value,
-                               const_cast<Atomic32*>(ptr)));
-  return old_value;
-}
-
-inline Atomic32 Barrier_AtomicIncrement(volatile Atomic32* ptr,
-                                        Atomic32 increment) {
-  for (;;) {
-    // Atomic exchange the old value with an incremented one.
-    Atomic32 old_value = *ptr;
-    Atomic32 new_value = old_value + increment;
-    if (pLinuxKernelCmpxchg(old_value, new_value,
-                            const_cast<Atomic32*>(ptr)) == 0) {
-      // The exchange took place as expected.
-      return new_value;
-    }
-    // Otherwise, *ptr changed mid-loop and we need to retry.
-  }
-}
-
-inline Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32* ptr,
-                                          Atomic32 increment) {
-  return Barrier_AtomicIncrement(ptr, increment);
-}
-
-inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr,
-                                       Atomic32 old_value,
-                                       Atomic32 new_value) {
-  return NoBarrier_CompareAndSwap(ptr, old_value, new_value);
-}
-
-inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr,
-                                       Atomic32 old_value,
-                                       Atomic32 new_value) {
-  return NoBarrier_CompareAndSwap(ptr, old_value, new_value);
-}
-
-inline void NoBarrier_Store(volatile Atomic32* ptr, Atomic32 value) {
-  *ptr = value;
-}
-
-inline void MemoryBarrier() {
-  pLinuxKernelMemoryBarrier();
-}
-
-inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) {
-  *ptr = value;
-  MemoryBarrier();
-}
-
-inline void Release_Store(volatile Atomic32* ptr, Atomic32 value) {
-  MemoryBarrier();
-  *ptr = value;
-}
-
-inline Atomic32 NoBarrier_Load(volatile const Atomic32* ptr) {
-  return *ptr;
-}
-
-inline Atomic32 Acquire_Load(volatile const Atomic32* ptr) {
-  Atomic32 value = *ptr;
-  MemoryBarrier();
-  return value;
-}
-
-inline Atomic32 Release_Load(volatile const Atomic32* ptr) {
-  MemoryBarrier();
-  return *ptr;
-}
-
-
-// 64-bit versions are not implemented yet.
-
-inline void NotImplementedFatalError(const char *function_name) {
-  fprintf(stderr, "64-bit %s() not implemented on this platform\n",
-          function_name);
-  abort();
-}
-
-inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64* ptr,
-                                         Atomic64 old_value,
-                                         Atomic64 new_value) {
-  NotImplementedFatalError("NoBarrier_CompareAndSwap");
-  return 0;
-}
-
-inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr,
-                                         Atomic64 new_value) {
-  NotImplementedFatalError("NoBarrier_AtomicExchange");
-  return 0;
-}
-
-inline Atomic64 NoBarrier_AtomicIncrement(volatile Atomic64* ptr,
-                                          Atomic64 increment) {
-  NotImplementedFatalError("NoBarrier_AtomicIncrement");
-  return 0;
-}
-
-inline Atomic64 Barrier_AtomicIncrement(volatile Atomic64* ptr,
-                                        Atomic64 increment) {
-  NotImplementedFatalError("Barrier_AtomicIncrement");
-  return 0;
-}
-
-inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) {
-  NotImplementedFatalError("NoBarrier_Store");
-}
-
-inline void Acquire_Store(volatile Atomic64* ptr, Atomic64 value) {
-  NoBarrier_AtomicExchange(ptr, value);
-              // acts as a barrier in this implementation
-}
-
-inline void Release_Store(volatile Atomic64* ptr, Atomic64 value) {
-  NotImplementedFatalError("Release_Store");
-}
-
-inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) {
-  NotImplementedFatalError("NoBarrier_Load");
-  return 0;
-}
-
-inline Atomic64 Acquire_Load(volatile const Atomic64* ptr) {
-  Atomic64 value = NoBarrier_Load(ptr);
-  return value;
-}
-
-inline Atomic64 Release_Load(volatile const Atomic64* ptr) {
-  MemoryBarrier();
-  return NoBarrier_Load(ptr);
-}
-
-inline Atomic64 Acquire_CompareAndSwap(volatile Atomic64* ptr,
-                                       Atomic64 old_value,
-                                       Atomic64 new_value) {
-  return NoBarrier_CompareAndSwap(ptr, old_value, new_value);
-}
-
-inline Atomic64 Release_CompareAndSwap(volatile Atomic64* ptr,
-                                       Atomic64 old_value,
-                                       Atomic64 new_value) {
-  return NoBarrier_CompareAndSwap(ptr, old_value, new_value);
-}
-
-}  // namespace base::subtle
-}  // namespace base
-
-#endif  // BASE_ATOMICOPS_INTERNALS_ARM_GCC_H_
diff --git a/third_party/tcmalloc/vendor/src/base/atomicops-internals-arm-generic.h b/third_party/tcmalloc/vendor/src/base/atomicops-internals-arm-generic.h
index 7882b0d..4acb76a 100644
--- a/third_party/tcmalloc/vendor/src/base/atomicops-internals-arm-generic.h
+++ b/third_party/tcmalloc/vendor/src/base/atomicops-internals-arm-generic.h
@@ -39,8 +39,7 @@
 
 #include <stdio.h>
 #include <stdlib.h>
-#include "base/macros.h"  // For COMPILE_ASSERT
-#include "base/port.h"  // ATTRIBUTE_WEAK
+#include "base/basictypes.h"
 
 typedef int32_t Atomic32;
 
diff --git a/third_party/tcmalloc/vendor/src/base/atomicops-internals-arm-v6plus.h b/third_party/tcmalloc/vendor/src/base/atomicops-internals-arm-v6plus.h
index ee09f32..8d5b9b5 100644
--- a/third_party/tcmalloc/vendor/src/base/atomicops-internals-arm-v6plus.h
+++ b/third_party/tcmalloc/vendor/src/base/atomicops-internals-arm-v6plus.h
@@ -42,6 +42,13 @@
 #include <stdlib.h>
 #include "base/basictypes.h"  // For COMPILE_ASSERT
 
+// The LDREXD and STREXD instructions in ARM all v7 variants or above.  In v6,
+// only some variants support it.  For simplicity, we only use exclusive
+// 64-bit load/store in V7 or above.
+#if defined(ARMV7)
+# define BASE_ATOMICOPS_HAS_LDREXD_AND_STREXD
+#endif
+
 typedef int32_t Atomic32;
 
 namespace base {
@@ -60,6 +67,10 @@ inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr,
     "ldrex   %1, [%3]\n"
     "mov     %0, #0\n"
     "teq     %1, %4\n"
+    // The following IT (if-then) instruction is needed for the subsequent
+    // conditional instruction STREXEQ when compiling in THUMB mode.
+    // In ARM mode, the compiler/assembler will not generate any code for it.
+    "it      eq\n"
     "strexeq %0, %5, [%3]\n"
         : "=&r" (res), "=&r" (oldval), "+Qo" (*ptr)
         : "r" (ptr), "Ir" (old_value), "r" (new_value)
@@ -164,7 +175,114 @@ inline Atomic32 Release_Load(volatile const Atomic32* ptr) {
   return *ptr;
 }
 
-// 64-bit versions are not implemented yet.
+// 64-bit versions are only available if LDREXD and STREXD instructions
+// are available.
+#ifdef BASE_ATOMICOPS_HAS_LDREXD_AND_STREXD
+
+#define BASE_HAS_ATOMIC64 1
+
+inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64* ptr,
+                                         Atomic64 old_value,
+                                         Atomic64 new_value) {
+  Atomic64 oldval, res;
+  do {
+    __asm__ __volatile__(
+    "ldrexd   %1, [%3]\n"
+    "mov      %0, #0\n"
+    "teq      %Q1, %Q4\n"
+    // The following IT (if-then) instructions are needed for the subsequent
+    // conditional instructions when compiling in THUMB mode.
+    // In ARM mode, the compiler/assembler will not generate any code for it.
+    "it       eq\n"
+    "teqeq    %R1, %R4\n"
+    "it       eq\n"
+    "strexdeq %0, %5, [%3]\n"
+        : "=&r" (res), "=&r" (oldval), "+Q" (*ptr)
+        : "r" (ptr), "Ir" (old_value), "r" (new_value)
+        : "cc");
+  } while (res);
+  return oldval;
+}
+
+inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr,
+                                         Atomic64 new_value) {
+  int store_failed;
+  Atomic64 old;
+  __asm__ __volatile__(
+      "1:\n"
+      "ldrexd  %1, [%2]\n"
+      "strexd  %0, %3, [%2]\n"
+      "teq     %0, #0\n"
+      "bne     1b"
+      : "=&r" (store_failed), "=&r" (old)
+      : "r" (ptr), "r" (new_value)
+      : "cc", "memory");
+  return old;
+}
+
+inline Atomic64 NoBarrier_AtomicIncrement(volatile Atomic64* ptr,
+                                          Atomic64 increment) {
+  int store_failed;
+  Atomic64 res;
+  __asm__ __volatile__(
+      "1:\n"
+      "ldrexd  %1, [%2]\n"
+      "adds    %Q1, %Q1, %Q3\n"
+      "adc     %R1, %R1, %R3\n"
+      "strexd  %0, %1, [%2]\n"
+      "teq     %0, #0\n"
+      "bne     1b"
+      : "=&r" (store_failed), "=&r"(res)
+      : "r" (ptr), "r"(increment)
+      : "cc", "memory");
+  return res;
+}
+
+inline Atomic64 Barrier_AtomicIncrement(volatile Atomic64* ptr,
+                                        Atomic64 increment) {
+  int store_failed;
+  Atomic64 res;
+  __asm__ __volatile__(
+      "1:\n"
+      "ldrexd  %1, [%2]\n"
+      "adds    %Q1, %Q1, %Q3\n"
+      "adc     %R1, %R1, %R3\n"
+      "dmb\n"
+      "strexd  %0, %1, [%2]\n"
+      "teq     %0, #0\n"
+      "bne     1b"
+      : "=&r" (store_failed), "=&r"(res)
+      : "r" (ptr), "r"(increment)
+      : "cc", "memory");
+  return res;
+}
+
+inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) {
+  int store_failed;
+  Atomic64 dummy;
+  __asm__ __volatile__(
+      "1:\n"
+      // Dummy load to lock cache line.
+      "ldrexd  %1, [%3]\n"
+      "strexd  %0, %2, [%3]\n"
+      "teq     %0, #0\n"
+      "bne     1b"
+      : "=&r" (store_failed), "=&r"(dummy)
+      : "r"(value), "r" (ptr)
+      : "cc", "memory");
+}
+
+inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) {
+  Atomic64 res;
+  __asm__ __volatile__(
+  "ldrexd   %0, [%1]\n"
+  "clrex\n"
+      : "=r" (res)
+      : "r"(ptr), "Q"(*ptr));
+  return res;
+}
+
+#else // BASE_ATOMICOPS_HAS_LDREXD_AND_STREXD
 
 inline void NotImplementedFatalError(const char *function_name) {
   fprintf(stderr, "64-bit %s() not implemented on this platform\n",
@@ -201,41 +319,47 @@ inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) {
   NotImplementedFatalError("NoBarrier_Store");
 }
 
-inline void Acquire_Store(volatile Atomic64* ptr, Atomic64 value) {
-  NotImplementedFatalError("Acquire_Store64");
+inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) {
+  NotImplementedFatalError("NoBarrier_Load");
+  return 0;
 }
 
-inline void Release_Store(volatile Atomic64* ptr, Atomic64 value) {
-  NotImplementedFatalError("Release_Store");
+#endif // BASE_ATOMICOPS_HAS_LDREXD_AND_STREXD
+
+inline void Acquire_Store(volatile Atomic64* ptr, Atomic64 value) {
+  NoBarrier_Store(ptr, value);
+  MemoryBarrier();
 }
 
-inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) {
-  NotImplementedFatalError("NoBarrier_Load");
-  return 0;
+inline void Release_Store(volatile Atomic64* ptr, Atomic64 value) {
+  MemoryBarrier();
+  NoBarrier_Store(ptr, value);
 }
 
 inline Atomic64 Acquire_Load(volatile const Atomic64* ptr) {
-  NotImplementedFatalError("Atomic64 Acquire_Load");
-  return 0;
+  Atomic64 value = NoBarrier_Load(ptr);
+  MemoryBarrier();
+  return value;
 }
 
 inline Atomic64 Release_Load(volatile const Atomic64* ptr) {
-  NotImplementedFatalError("Atomic64 Release_Load");
-  return 0;
+  MemoryBarrier();
+  return NoBarrier_Load(ptr);
 }
 
 inline Atomic64 Acquire_CompareAndSwap(volatile Atomic64* ptr,
                                        Atomic64 old_value,
                                        Atomic64 new_value) {
-  NotImplementedFatalError("Atomic64 Acquire_CompareAndSwap");
-  return 0;
+  Atomic64 value = NoBarrier_CompareAndSwap(ptr, old_value, new_value);
+  MemoryBarrier();
+  return value;
 }
 
 inline Atomic64 Release_CompareAndSwap(volatile Atomic64* ptr,
                                        Atomic64 old_value,
                                        Atomic64 new_value) {
-  NotImplementedFatalError("Atomic64 Release_CompareAndSwap");
-  return 0;
+  MemoryBarrier();
+  return NoBarrier_CompareAndSwap(ptr, old_value, new_value);
 }
 
 }  // namespace subtle ends
diff --git a/third_party/tcmalloc/vendor/src/base/basictypes.h b/third_party/tcmalloc/vendor/src/base/basictypes.h
index 0f21fca..75b7b5a 100644
--- a/third_party/tcmalloc/vendor/src/base/basictypes.h
+++ b/third_party/tcmalloc/vendor/src/base/basictypes.h
@@ -31,6 +31,7 @@
 #define _BASICTYPES_H_
 
 #include <config.h>
+#include <string.h>       // for memcpy()
 #ifdef HAVE_INTTYPES_H
 #include <inttypes.h>     // gets us PRId64, etc
 #endif
@@ -193,6 +194,28 @@ struct CompileAssert {
    (reinterpret_cast<char*>(&reinterpret_cast<strct*>(16)->field) -     \
     reinterpret_cast<char*>(16))
 
+// bit_cast<Dest,Source> implements the equivalent of
+// "*reinterpret_cast<Dest*>(&source)".
+//
+// The reinterpret_cast method would produce undefined behavior
+// according to ISO C++ specification section 3.10 -15 -.
+// bit_cast<> calls memcpy() which is blessed by the standard,
+// especially by the example in section 3.9.
+//
+// Fortunately memcpy() is very fast.  In optimized mode, with a
+// constant size, gcc 2.95.3, gcc 4.0.1, and msvc 7.1 produce inline
+// code with the minimal amount of data movement.  On a 32-bit system,
+// memcpy(d,s,4) compiles to one load and one store, and memcpy(d,s,8)
+// compiles to two loads and two stores.
+
+template <class Dest, class Source>
+inline Dest bit_cast(const Source& source) {
+  COMPILE_ASSERT(sizeof(Dest) == sizeof(Source), bitcasting_unequal_sizes);
+  Dest dest;
+  memcpy(&dest, &source, sizeof(dest));
+  return dest;
+}
+
 #ifdef HAVE___ATTRIBUTE__
 # define ATTRIBUTE_WEAK      __attribute__((weak))
 # define ATTRIBUTE_NOINLINE  __attribute__((noinline))
@@ -309,8 +332,7 @@ class AssignAttributeStartEnd {
 #endif  // HAVE___ATTRIBUTE__ and __ELF__ or __MACH__
 
 #if defined(HAVE___ATTRIBUTE__) && (defined(__i386__) || defined(__x86_64__))
-# define CACHELINE_SIZE 64
-# define CACHELINE_ALIGNED __attribute__((aligned(CACHELINE_SIZE)))
+# define CACHELINE_ALIGNED __attribute__((aligned(64)))
 #else
 # define CACHELINE_ALIGNED
 #endif  // defined(HAVE___ATTRIBUTE__) && (__i386__ || __x86_64__)
diff --git a/third_party/tcmalloc/vendor/src/base/cycleclock.h b/third_party/tcmalloc/vendor/src/base/cycleclock.h
index db5f49a..c704e1d 100644
--- a/third_party/tcmalloc/vendor/src/base/cycleclock.h
+++ b/third_party/tcmalloc/vendor/src/base/cycleclock.h
@@ -47,6 +47,11 @@
 
 #include "base/basictypes.h"   // make sure we get the def for int64
 #include "base/arm_instruction_set_select.h"
+// base/sysinfo.h is really big and we don't want to include it unless
+// it is necessary.
+#if defined(__arm__) || defined(__mips__)
+# include "base/sysinfo.h"
+#endif
 #if defined(__MACH__) && defined(__APPLE__)
 # include <mach/mach_time.h>
 #endif
@@ -61,7 +66,7 @@
 extern "C" uint64 __rdtsc();
 #pragma intrinsic(__rdtsc)
 #endif
-#ifdef ARMV3
+#if defined(ARMV3) || defined(__mips__)
 #include <sys/time.h>
 #endif
 
@@ -124,11 +129,11 @@ struct CycleClock {
     uint32 pmuseren;
     uint32 pmcntenset;
     // Read the user mode perf monitor counter access permissions.
-    asm("mrc p15, 0, %0, c9, c14, 0" : "=r" (pmuseren));
+    asm volatile ("mrc p15, 0, %0, c9, c14, 0" : "=r" (pmuseren));
     if (pmuseren & 1) {  // Allows reading perfmon counters for user mode code.
-      asm("mrc p15, 0, %0, c9, c12, 1" : "=r" (pmcntenset));
+      asm volatile ("mrc p15, 0, %0, c9, c12, 1" : "=r" (pmcntenset));
       if (pmcntenset & 0x80000000ul) {  // Is it counting?
-        asm("mrc p15, 0, %0, c9, c13, 0" : "=r" (pmccntr));
+        asm volatile ("mrc p15, 0, %0, c9, c13, 0" : "=r" (pmccntr));
         // The counter is set up to count every 64th cycle
         return static_cast<int64>(pmccntr) * 64;  // Should optimize to << 6
       }
@@ -136,7 +141,15 @@ struct CycleClock {
 #endif
     struct timeval tv;
     gettimeofday(&tv, NULL);
-    return static_cast<int64>(tv.tv_sec) * 1000000 + tv.tv_usec;
+    return static_cast<int64>((tv.tv_sec + tv.tv_usec * 0.000001)
+                              * CyclesPerSecond());
+#elif defined(__mips__)
+    // mips apparently only allows rdtsc for superusers, so we fall
+    // back to gettimeofday.  It's possible clock_gettime would be better.
+    struct timeval tv;
+    gettimeofday(&tv, NULL);
+    return static_cast<int64>((tv.tv_sec + tv.tv_usec * 0.000001)
+                              * CyclesPerSecond());
 #else
 // The soft failover to a generic implementation is automatic only for ARM.
 // For other platforms the developer is expected to make an attempt to create
diff --git a/third_party/tcmalloc/vendor/src/base/dynamic_annotations.c b/third_party/tcmalloc/vendor/src/base/dynamic_annotations.c
index 1005f90..c8b61be 100644
--- a/third_party/tcmalloc/vendor/src/base/dynamic_annotations.c
+++ b/third_party/tcmalloc/vendor/src/base/dynamic_annotations.c
@@ -50,10 +50,19 @@
 # endif
 #endif
 
+/* Compiler-based ThreadSanitizer defines
+   DYNAMIC_ANNOTATIONS_EXTERNAL_IMPL = 1
+   and provides its own definitions of the functions. */
+
+#ifndef DYNAMIC_ANNOTATIONS_EXTERNAL_IMPL
+# define DYNAMIC_ANNOTATIONS_EXTERNAL_IMPL 0
+#endif
+
 /* Each function is empty and called (via a macro) only in debug mode.
    The arguments are captured by dynamic tools at runtime. */
 
-#if DYNAMIC_ANNOTATIONS_ENABLED == 1
+#if DYNAMIC_ANNOTATIONS_ENABLED == 1 \
+    && DYNAMIC_ANNOTATIONS_EXTERNAL_IMPL == 0
 
 void AnnotateRWLockCreate(const char *file, int line,
                           const volatile void *lock){}
@@ -122,7 +131,10 @@ void AnnotateNoOp(const char *file, int line,
                   const volatile void *arg){}
 void AnnotateFlushState(const char *file, int line){}
 
-#endif  /* DYNAMIC_ANNOTATIONS_ENABLED == 1 */
+#endif  /* DYNAMIC_ANNOTATIONS_ENABLED == 1
+    && DYNAMIC_ANNOTATIONS_EXTERNAL_IMPL == 0 */
+
+#if DYNAMIC_ANNOTATIONS_EXTERNAL_IMPL == 0
 
 static int GetRunningOnValgrind(void) {
 #ifdef RUNNING_ON_VALGRIND
@@ -159,6 +171,8 @@ int RunningOnValgrind(void) {
   return local_running_on_valgrind;
 }
 
+#endif  /* DYNAMIC_ANNOTATIONS_EXTERNAL_IMPL == 0 */
+
 /* See the comments in dynamic_annotations.h */
 double ValgrindSlowdown(void) {
   /* Same initialization hack as in RunningOnValgrind(). */
diff --git a/third_party/tcmalloc/vendor/src/base/dynamic_annotations.h b/third_party/tcmalloc/vendor/src/base/dynamic_annotations.h
index 811bb5e..4669315 100644
--- a/third_party/tcmalloc/vendor/src/base/dynamic_annotations.h
+++ b/third_party/tcmalloc/vendor/src/base/dynamic_annotations.h
@@ -378,9 +378,14 @@
 
 #define ANNOTALYSIS_STATIC_INLINE
 #define ANNOTALYSIS_SEMICOLON_OR_EMPTY_BODY ;
+#define ANNOTALYSIS_IGNORE_READS_BEGIN
+#define ANNOTALYSIS_IGNORE_READS_END
+#define ANNOTALYSIS_IGNORE_WRITES_BEGIN
+#define ANNOTALYSIS_IGNORE_WRITES_END
+#define ANNOTALYSIS_UNPROTECTED_READ
 
-#if defined(__GNUC__) && defined(__SUPPORT_TS_ANNOTATION__) \
-  && (!defined(SWIG)) && defined(__SUPPORT_DYN_ANNOTATION__)
+#if defined(__GNUC__) && (!defined(SWIG)) && (!defined(__clang__)) && \
+    defined(__SUPPORT_TS_ANNOTATION__) && defined(__SUPPORT_DYN_ANNOTATION__)
 
 #if DYNAMIC_ANNOTATIONS_ENABLED == 0
 #define ANNOTALYSIS_ONLY 1
@@ -389,22 +394,23 @@
 #undef ANNOTALYSIS_SEMICOLON_OR_EMPTY_BODY
 #define ANNOTALYSIS_SEMICOLON_OR_EMPTY_BODY { (void)file; (void)line; }
 #endif
-#define ANNOTALYSIS_IGNORE_READS_BEGIN   __attribute__ ((ignore_reads_begin))
-#define ANNOTALYSIS_IGNORE_READS_END     __attribute__ ((ignore_reads_end))
-#define ANNOTALYSIS_IGNORE_WRITES_BEGIN  __attribute__ ((ignore_writes_begin))
-#define ANNOTALYSIS_IGNORE_WRITES_END    __attribute__ ((ignore_writes_end))
-#define ANNOTALYSIS_UNPROTECTED_READ     __attribute__ ((unprotected_read))
-
-#else
-
-#define ANNOTALYSIS_IGNORE_READS_BEGIN
-#define ANNOTALYSIS_IGNORE_READS_END
-#define ANNOTALYSIS_IGNORE_WRITES_BEGIN
-#define ANNOTALYSIS_IGNORE_WRITES_END
-#define ANNOTALYSIS_UNPROTECTED_READ
 
+/* Only emit attributes when annotalysis is enabled. */
+#if defined(__SUPPORT_TS_ANNOTATION__) && defined(__SUPPORT_DYN_ANNOTATION__)
+#undef  ANNOTALYSIS_IGNORE_READS_BEGIN
+#define ANNOTALYSIS_IGNORE_READS_BEGIN  __attribute__ ((ignore_reads_begin))
+#undef  ANNOTALYSIS_IGNORE_READS_END
+#define ANNOTALYSIS_IGNORE_READS_END    __attribute__ ((ignore_reads_end))
+#undef  ANNOTALYSIS_IGNORE_WRITES_BEGIN
+#define ANNOTALYSIS_IGNORE_WRITES_BEGIN __attribute__ ((ignore_writes_begin))
+#undef  ANNOTALYSIS_IGNORE_WRITES_END
+#define ANNOTALYSIS_IGNORE_WRITES_END   __attribute__ ((ignore_writes_end))
+#undef  ANNOTALYSIS_UNPROTECTED_READ
+#define ANNOTALYSIS_UNPROTECTED_READ    __attribute__ ((unprotected_read))
 #endif
 
+#endif // defined(__GNUC__) && (!defined(SWIG)) && (!defined(__clang__))
+
 /* Use the macros above rather than using these functions directly. */
 #ifdef __cplusplus
 extern "C" {
@@ -604,7 +610,7 @@ double ValgrindSlowdown(void);
     #undef ANNOTATE_UNPROTECTED_READ
     template <class T>
     inline T ANNOTATE_UNPROTECTED_READ(const volatile T &x)
-        __attribute__ ((unprotected_read)) {
+         ANNOTALYSIS_UNPROTECTED_READ {
       ANNOTATE_IGNORE_READS_BEGIN();
       T res = x;
       ANNOTATE_IGNORE_READS_END();
diff --git a/third_party/tcmalloc/vendor/src/base/elf_mem_image.cc b/third_party/tcmalloc/vendor/src/base/elf_mem_image.cc
index d8b8090..2949343 100644
--- a/third_party/tcmalloc/vendor/src/base/elf_mem_image.cc
+++ b/third_party/tcmalloc/vendor/src/base/elf_mem_image.cc
@@ -37,6 +37,7 @@
 
 #ifdef HAVE_ELF_MEM_IMAGE  // defined in elf_mem_image.h
 
+#include <stddef.h>   // for size_t, ptrdiff_t
 #include "base/logging.h"
 
 // From binutils/include/elf/common.h (this doesn't appear to be documented
diff --git a/third_party/tcmalloc/vendor/src/base/googleinit.h b/third_party/tcmalloc/vendor/src/base/googleinit.h
index 62ad84c..dbf610c 100644
--- a/third_party/tcmalloc/vendor/src/base/googleinit.h
+++ b/third_party/tcmalloc/vendor/src/base/googleinit.h
@@ -33,19 +33,41 @@
 #ifndef _GOOGLEINIT_H
 #define _GOOGLEINIT_H
 
+#include "base/logging.h"
+
 class GoogleInitializer {
  public:
-  typedef void (*void_function)(void);
-  GoogleInitializer(const char* name, void_function f) {
-    f();
+  typedef void (*VoidFunction)(void);
+  GoogleInitializer(const char* name, VoidFunction ctor, VoidFunction dtor)
+      : name_(name), destructor_(dtor) {
+    RAW_VLOG(10, "<GoogleModuleObject> constructing: %s\n", name_);
+    if (ctor)
+      ctor();
+  }
+  ~GoogleInitializer() {
+    RAW_VLOG(10, "<GoogleModuleObject> destroying: %s\n", name_);
+    if (destructor_)
+      destructor_();
   }
+
+ private:
+  const char* const name_;
+  const VoidFunction destructor_;
 };
 
 #define REGISTER_MODULE_INITIALIZER(name, body)                 \
   namespace {                                                   \
     static void google_init_module_##name () { body; }          \
     GoogleInitializer google_initializer_module_##name(#name,   \
-            google_init_module_##name);                         \
+            google_init_module_##name, NULL);                   \
   }
 
+#define REGISTER_MODULE_DESTRUCTOR(name, body)                  \
+  namespace {                                                   \
+    static void google_destruct_module_##name () { body; }      \
+    GoogleInitializer google_destructor_module_##name(#name,    \
+            NULL, google_destruct_module_##name);               \
+  }
+
+
 #endif /* _GOOGLEINIT_H */
diff --git a/third_party/tcmalloc/vendor/src/base/linux_syscall_support.h b/third_party/tcmalloc/vendor/src/base/linux_syscall_support.h
index 79beafa..99dac9e 100644
--- a/third_party/tcmalloc/vendor/src/base/linux_syscall_support.h
+++ b/third_party/tcmalloc/vendor/src/base/linux_syscall_support.h
@@ -69,6 +69,63 @@
  * This file defines a few internal symbols that all start with "LSS_".
  * Do not access these symbols from outside this file. They are not part
  * of the supported API.
+ *
+ * NOTE: This is a stripped down version of the official opensource
+ * version of linux_syscall_support.h, which lives at
+ *    http://code.google.com/p/linux-syscall-support/
+ * It includes only the syscalls that are used in perftools, plus a
+ * few extra.  Here's the breakdown:
+ * 1) Perftools uses these: grep -rho 'sys_[a-z0-9_A-Z]* *(' src | sort -u
+ *      sys__exit(
+ *      sys_clone(
+ *      sys_close(
+ *      sys_fcntl(
+ *      sys_fstat(
+ *      sys_futex(
+ *      sys_futex1(
+ *      sys_getcpu(
+ *      sys_getdents(
+ *      sys_getppid(
+ *      sys_gettid(
+ *      sys_lseek(
+ *      sys_mmap(
+ *      sys_mremap(
+ *      sys_munmap(
+ *      sys_open(
+ *      sys_pipe(
+ *      sys_prctl(
+ *      sys_ptrace(
+ *      sys_ptrace_detach(
+ *      sys_read(
+ *      sys_sched_yield(
+ *      sys_sigaction(
+ *      sys_sigaltstack(
+ *      sys_sigdelset(
+ *      sys_sigfillset(
+ *      sys_sigprocmask(
+ *      sys_socket(
+ *      sys_stat(
+ *      sys_waitpid(
+ * 2) These are used as subroutines of the above:
+ *      sys_getpid       -- gettid
+ *      sys_kill         -- ptrace_detach
+ *      sys_restore      -- sigaction
+ *      sys_restore_rt   -- sigaction
+ *      sys_socketcall   -- socket
+ *      sys_wait4        -- waitpid
+ * 3) I left these in even though they're not used.  They either
+ * complement the above (write vs read) or are variants (rt_sigaction):
+ *      sys_fstat64
+ *      sys_getdents64
+ *      sys_llseek
+ *      sys_mmap2
+ *      sys_openat
+ *      sys_rt_sigaction
+ *      sys_rt_sigprocmask
+ *      sys_sigaddset
+ *      sys_sigemptyset
+ *      sys_stat64
+ *      sys_write
  */
 #ifndef SYS_LINUX_SYSCALL_SUPPORT_H
 #define SYS_LINUX_SYSCALL_SUPPORT_H
@@ -76,7 +133,7 @@
 /* We currently only support x86-32, x86-64, ARM, MIPS, and PPC on Linux.
  * Porting to other related platforms should not be difficult.
  */
-#if (defined(__i386__) || defined(__x86_64__) || defined(__ARM_ARCH_3__) ||   \
+#if (defined(__i386__) || defined(__x86_64__) || defined(__arm__) || \
      defined(__mips__) || defined(__PPC__)) && defined(__linux)
 
 #ifndef SYS_CPLUSPLUS
@@ -154,36 +211,6 @@ struct kernel_dirent {
   char               d_name[256];
 };
 
-/* include/linux/uio.h                                                       */
-struct kernel_iovec {
-  void               *iov_base;
-  unsigned long      iov_len;
-};
-
-/* include/linux/socket.h                                                    */
-struct kernel_msghdr {
-  void               *msg_name;
-  int                msg_namelen;
-  struct kernel_iovec*msg_iov;
-  unsigned long      msg_iovlen;
-  void               *msg_control;
-  unsigned long      msg_controllen;
-  unsigned           msg_flags;
-};
-
-/* include/asm-generic/poll.h                                                */
-struct kernel_pollfd {
-  int                fd;
-  short              events;
-  short              revents;
-};
-
-/* include/linux/resource.h                                                  */
-struct kernel_rlimit {
-  unsigned long      rlim_cur;
-  unsigned long      rlim_max;
-};
-
 /* include/linux/time.h                                                      */
 struct kernel_timespec {
   long               tv_sec;
@@ -217,7 +244,7 @@ struct kernel_rusage {
 };
 
 struct siginfo;
-#if defined(__i386__) || defined(__ARM_ARCH_3__) || defined(__PPC__)
+#if defined(__i386__) || defined(__arm__) || defined(__PPC__)
 
 /* include/asm-{arm,i386,mips,ppc}/signal.h                                  */
 struct kernel_old_sigaction {
@@ -274,12 +301,6 @@ struct kernel_sigaction {
 #endif
 };
 
-/* include/linux/socket.h                                                    */
-struct kernel_sockaddr {
-  unsigned short     sa_family;
-  char               sa_data[14];
-};
-
 /* include/asm-{arm,i386,mips,ppc}/stat.h                                    */
 #ifdef __mips__
 #if _MIPS_SIM == _MIPS_SIM_ABI64
@@ -354,7 +375,7 @@ struct kernel_stat64 {
 #endif
 
 /* include/asm-{arm,i386,mips,x86_64,ppc}/stat.h                             */
-#if defined(__i386__) || defined(__ARM_ARCH_3__)
+#if defined(__i386__) || defined(__arm__)
 struct kernel_stat {
   /* The kernel headers suggest that st_dev and st_rdev should be 32bit
    * quantities encoding 12bit major and 20bit minor numbers in an interleaved
@@ -449,89 +470,15 @@ struct kernel_stat {
 };
 #endif
 
-/* include/asm-{arm,i386,mips,x86_64,ppc}/statfs.h                           */
-#ifdef __mips__
-#if _MIPS_SIM != _MIPS_SIM_ABI64
-struct kernel_statfs64 {
-  unsigned long      f_type;
-  unsigned long      f_bsize;
-  unsigned long      f_frsize;
-  unsigned long      __pad;
-  unsigned long long f_blocks;
-  unsigned long long f_bfree;
-  unsigned long long f_files;
-  unsigned long long f_ffree;
-  unsigned long long f_bavail;
-  struct { int val[2]; } f_fsid;
-  unsigned long      f_namelen;
-  unsigned long      f_spare[6];
-};
-#endif
-#elif !defined(__x86_64__)
-struct kernel_statfs64 {
-  unsigned long      f_type;
-  unsigned long      f_bsize;
-  unsigned long long f_blocks;
-  unsigned long long f_bfree;
-  unsigned long long f_bavail;
-  unsigned long long f_files;
-  unsigned long long f_ffree;
-  struct { int val[2]; } f_fsid;
-  unsigned long      f_namelen;
-  unsigned long      f_frsize;
-  unsigned long      f_spare[5];
-};
-#endif
-
-/* include/asm-{arm,i386,mips,x86_64,ppc,generic}/statfs.h                   */
-#ifdef __mips__
-struct kernel_statfs {
-  long               f_type;
-  long               f_bsize;
-  long               f_frsize;
-  long               f_blocks;
-  long               f_bfree;
-  long               f_files;
-  long               f_ffree;
-  long               f_bavail;
-  struct { int val[2]; } f_fsid;
-  long               f_namelen;
-  long               f_spare[6];
-};
-#else
-struct kernel_statfs {
-  /* x86_64 actually defines all these fields as signed, whereas all other  */
-  /* platforms define them as unsigned. Leaving them at unsigned should not */
-  /* cause any problems.                                                    */
-  unsigned long      f_type;
-  unsigned long      f_bsize;
-  unsigned long      f_blocks;
-  unsigned long      f_bfree;
-  unsigned long      f_bavail;
-  unsigned long      f_files;
-  unsigned long      f_ffree;
-  struct { int val[2]; } f_fsid;
-  unsigned long      f_namelen;
-  unsigned long      f_frsize;
-  unsigned long      f_spare[5];
-};
-#endif
-
 
 /* Definitions missing from the standard header files                        */
 #ifndef O_DIRECTORY
-#if defined(__ARM_ARCH_3__)
+#if defined(__arm__)
 #define O_DIRECTORY             0040000
 #else
 #define O_DIRECTORY             0200000
 #endif
 #endif
-#ifndef NT_PRXFPREG
-#define NT_PRXFPREG             0x46e62b7f
-#endif
-#ifndef PTRACE_GETFPXREGS
-#define PTRACE_GETFPXREGS       ((enum __ptrace_request)18)
-#endif
 #ifndef PR_GET_DUMPABLE
 #define PR_GET_DUMPABLE         3
 #endif
@@ -553,46 +500,11 @@ struct kernel_statfs {
 #ifndef SA_RESTORER
 #define SA_RESTORER             0x04000000
 #endif
-#ifndef CPUCLOCK_PROF
-#define CPUCLOCK_PROF           0
-#endif
-#ifndef CPUCLOCK_VIRT
-#define CPUCLOCK_VIRT           1
-#endif
-#ifndef CPUCLOCK_SCHED
-#define CPUCLOCK_SCHED          2
-#endif
-#ifndef CPUCLOCK_PERTHREAD_MASK
-#define CPUCLOCK_PERTHREAD_MASK 4
-#endif
-#ifndef MAKE_PROCESS_CPUCLOCK
-#define MAKE_PROCESS_CPUCLOCK(pid, clock)                                     \
-        ((~(int)(pid) << 3) | (int)(clock))
-#endif
-#ifndef MAKE_THREAD_CPUCLOCK
-#define MAKE_THREAD_CPUCLOCK(tid, clock)                                      \
-        ((~(int)(tid) << 3) | (int)((clock) | CPUCLOCK_PERTHREAD_MASK))
-#endif
 
 #if defined(__i386__)
-#ifndef __NR_setresuid
-#define __NR_setresuid          164
-#define __NR_setresgid          170
-#endif
 #ifndef __NR_rt_sigaction
 #define __NR_rt_sigaction       174
 #define __NR_rt_sigprocmask     175
-#define __NR_rt_sigpending      176
-#define __NR_rt_sigsuspend      179
-#endif
-#ifndef __NR_pread64
-#define __NR_pread64            180
-#endif
-#ifndef __NR_pwrite64
-#define __NR_pwrite64           181
-#endif
-#ifndef __NR_ugetrlimit
-#define __NR_ugetrlimit         191
 #endif
 #ifndef __NR_stat64
 #define __NR_stat64             195
@@ -600,110 +512,43 @@ struct kernel_statfs {
 #ifndef __NR_fstat64
 #define __NR_fstat64            197
 #endif
-#ifndef __NR_setresuid32
-#define __NR_setresuid32        208
-#define __NR_setresgid32        210
-#endif
-#ifndef __NR_setfsuid32
-#define __NR_setfsuid32         215
-#define __NR_setfsgid32         216
-#endif
 #ifndef __NR_getdents64
 #define __NR_getdents64         220
 #endif
 #ifndef __NR_gettid
 #define __NR_gettid             224
 #endif
-#ifndef __NR_readahead
-#define __NR_readahead          225
-#endif
-#ifndef __NR_setxattr
-#define __NR_setxattr           226
-#endif
-#ifndef __NR_lsetxattr
-#define __NR_lsetxattr          227
-#endif
-#ifndef __NR_getxattr
-#define __NR_getxattr           229
-#endif
-#ifndef __NR_lgetxattr
-#define __NR_lgetxattr          230
-#endif
-#ifndef __NR_listxattr
-#define __NR_listxattr          232
-#endif
-#ifndef __NR_llistxattr
-#define __NR_llistxattr         233
-#endif
 #ifndef __NR_futex
 #define __NR_futex              240
 #endif
-#ifndef __NR_sched_setaffinity
-#define __NR_sched_setaffinity  241
-#define __NR_sched_getaffinity  242
-#endif
-#ifndef __NR_set_tid_address
-#define __NR_set_tid_address    258
-#endif
-#ifndef __NR_clock_gettime
-#define __NR_clock_gettime      265
-#endif
-#ifndef __NR_clock_getres
-#define __NR_clock_getres       266
-#endif
-#ifndef __NR_statfs64
-#define __NR_statfs64           268
-#endif
-#ifndef __NR_fstatfs64
-#define __NR_fstatfs64          269
-#endif
-#ifndef __NR_fadvise64_64
-#define __NR_fadvise64_64       272
-#endif
-#ifndef __NR_ioprio_set
-#define __NR_ioprio_set         289
-#endif
-#ifndef __NR_ioprio_get
-#define __NR_ioprio_get         290
-#endif
 #ifndef __NR_openat
 #define __NR_openat             295
 #endif
-#ifndef __NR_fstatat64
-#define __NR_fstatat64          300
-#endif
-#ifndef __NR_unlinkat
-#define __NR_unlinkat           301
-#endif
-#ifndef __NR_move_pages
-#define __NR_move_pages         317
-#endif
 #ifndef __NR_getcpu
 #define __NR_getcpu             318
 #endif
-#ifndef __NR_fallocate
-#define __NR_fallocate          324
-#endif
 /* End of i386 definitions                                                   */
-#elif defined(__ARM_ARCH_3__)
-#ifndef __NR_setresuid
-#define __NR_setresuid          (__NR_SYSCALL_BASE + 164)
-#define __NR_setresgid          (__NR_SYSCALL_BASE + 170)
+#elif defined(__arm__)
+#ifndef __syscall
+#if defined(__thumb__) || defined(__ARM_EABI__)
+#define __SYS_REG(name) register long __sysreg __asm__("r6") = __NR_##name;
+#define __SYS_REG_LIST(regs...) [sysreg] "r" (__sysreg) , ##regs
+#define __syscall(name) "swi\t0"
+#define __syscall_safe(name)                     \
+  "push  {r7}\n"                                 \
+  "mov   r7,%[sysreg]\n"                         \
+  __syscall(name)"\n"                            \
+  "pop   {r7}"
+#else
+#define __SYS_REG(name)
+#define __SYS_REG_LIST(regs...) regs
+#define __syscall(name) "swi\t" __sys1(__NR_##name) ""
+#define __syscall_safe(name) __syscall(name)
+#endif
 #endif
 #ifndef __NR_rt_sigaction
 #define __NR_rt_sigaction       (__NR_SYSCALL_BASE + 174)
 #define __NR_rt_sigprocmask     (__NR_SYSCALL_BASE + 175)
-#define __NR_rt_sigpending      (__NR_SYSCALL_BASE + 176)
-#define __NR_rt_sigsuspend      (__NR_SYSCALL_BASE + 179)
-#endif
-#ifndef __NR_pread64
-#define __NR_pread64            (__NR_SYSCALL_BASE + 180)
-#endif
-#ifndef __NR_pwrite64
-#define __NR_pwrite64           (__NR_SYSCALL_BASE + 181)
-#endif
-#ifndef __NR_ugetrlimit
-#define __NR_ugetrlimit         (__NR_SYSCALL_BASE + 191)
 #endif
 #ifndef __NR_stat64
 #define __NR_stat64             (__NR_SYSCALL_BASE + 195)
@@ -711,172 +556,35 @@ struct kernel_statfs {
 #ifndef __NR_fstat64
 #define __NR_fstat64            (__NR_SYSCALL_BASE + 197)
 #endif
-#ifndef __NR_setresuid32
-#define __NR_setresuid32        (__NR_SYSCALL_BASE + 208)
-#define __NR_setresgid32        (__NR_SYSCALL_BASE + 210)
-#endif
-#ifndef __NR_setfsuid32
-#define __NR_setfsuid32         (__NR_SYSCALL_BASE + 215)
-#define __NR_setfsgid32         (__NR_SYSCALL_BASE + 216)
-#endif
 #ifndef __NR_getdents64
 #define __NR_getdents64         (__NR_SYSCALL_BASE + 217)
 #endif
 #ifndef __NR_gettid
 #define __NR_gettid             (__NR_SYSCALL_BASE + 224)
 #endif
-#ifndef __NR_readahead
-#define __NR_readahead          (__NR_SYSCALL_BASE + 225)
-#endif
-#ifndef __NR_setxattr
-#define __NR_setxattr           (__NR_SYSCALL_BASE + 226)
-#endif
-#ifndef __NR_lsetxattr
-#define __NR_lsetxattr          (__NR_SYSCALL_BASE + 227)
-#endif
-#ifndef __NR_getxattr
-#define __NR_getxattr           (__NR_SYSCALL_BASE + 229)
-#endif
-#ifndef __NR_lgetxattr
-#define __NR_lgetxattr          (__NR_SYSCALL_BASE + 230)
-#endif
-#ifndef __NR_listxattr
-#define __NR_listxattr          (__NR_SYSCALL_BASE + 232)
-#endif
-#ifndef __NR_llistxattr
-#define __NR_llistxattr         (__NR_SYSCALL_BASE + 233)
-#endif
 #ifndef __NR_futex
 #define __NR_futex              (__NR_SYSCALL_BASE + 240)
 #endif
-#ifndef __NR_sched_setaffinity
-#define __NR_sched_setaffinity  (__NR_SYSCALL_BASE + 241)
-#define __NR_sched_getaffinity  (__NR_SYSCALL_BASE + 242)
-#endif
-#ifndef __NR_set_tid_address
-#define __NR_set_tid_address    (__NR_SYSCALL_BASE + 256)
-#endif
-#ifndef __NR_clock_gettime
-#define __NR_clock_gettime      (__NR_SYSCALL_BASE + 263)
-#endif
-#ifndef __NR_clock_getres
-#define __NR_clock_getres       (__NR_SYSCALL_BASE + 264)
-#endif
-#ifndef __NR_statfs64
-#define __NR_statfs64           (__NR_SYSCALL_BASE + 266)
-#endif
-#ifndef __NR_fstatfs64
-#define __NR_fstatfs64          (__NR_SYSCALL_BASE + 267)
-#endif
-#ifndef __NR_ioprio_set
-#define __NR_ioprio_set         (__NR_SYSCALL_BASE + 314)
-#endif
-#ifndef __NR_ioprio_get
-#define __NR_ioprio_get         (__NR_SYSCALL_BASE + 315)
-#endif
-#ifndef __NR_move_pages
-#define __NR_move_pages         (__NR_SYSCALL_BASE + 344)
-#endif
-#ifndef __NR_getcpu
-#define __NR_getcpu             (__NR_SYSCALL_BASE + 345)
-#endif
-/* End of ARM 3 definitions                                                  */
+/* End of ARM definitions                                                  */
 #elif defined(__x86_64__)
-#ifndef __NR_pread64
-#define __NR_pread64             17
-#endif
-#ifndef __NR_pwrite64
-#define __NR_pwrite64            18
-#endif
-#ifndef __NR_setresuid
-#define __NR_setresuid          117
-#define __NR_setresgid          119
-#endif
 #ifndef __NR_gettid
 #define __NR_gettid             186
 #endif
-#ifndef __NR_readahead
-#define __NR_readahead          187
-#endif
-#ifndef __NR_setxattr
-#define __NR_setxattr           188
-#endif
-#ifndef __NR_lsetxattr
-#define __NR_lsetxattr          189
-#endif
-#ifndef __NR_getxattr
-#define __NR_getxattr           191
-#endif
-#ifndef __NR_lgetxattr
-#define __NR_lgetxattr          192
-#endif
-#ifndef __NR_listxattr
-#define __NR_listxattr          194
-#endif
-#ifndef __NR_llistxattr
-#define __NR_llistxattr         195
-#endif
 #ifndef __NR_futex
 #define __NR_futex              202
 #endif
-#ifndef __NR_sched_setaffinity
-#define __NR_sched_setaffinity  203
-#define __NR_sched_getaffinity  204
-#endif
 #ifndef __NR_getdents64
 #define __NR_getdents64         217
 #endif
-#ifndef __NR_set_tid_address
-#define __NR_set_tid_address    218
-#endif
-#ifndef __NR_fadvise64
-#define __NR_fadvise64          221
-#endif
-#ifndef __NR_clock_gettime
-#define __NR_clock_gettime      228
-#endif
-#ifndef __NR_clock_getres
-#define __NR_clock_getres       229
-#endif
-#ifndef __NR_ioprio_set
-#define __NR_ioprio_set         251
-#endif
-#ifndef __NR_ioprio_get
-#define __NR_ioprio_get         252
-#endif
 #ifndef __NR_openat
 #define __NR_openat             257
 #endif
-#ifndef __NR_newfstatat
-#define __NR_newfstatat         262
-#endif
-#ifndef __NR_unlinkat
-#define __NR_unlinkat           263
-#endif
-#ifndef __NR_move_pages
-#define __NR_move_pages         279
-#endif
-#ifndef __NR_fallocate
-#define __NR_fallocate          285
-#endif
 /* End of x86-64 definitions                                                 */
 #elif defined(__mips__)
 #if _MIPS_SIM == _MIPS_SIM_ABI32
-#ifndef __NR_setresuid
-#define __NR_setresuid          (__NR_Linux + 185)
-#define __NR_setresgid          (__NR_Linux + 190)
-#endif
 #ifndef __NR_rt_sigaction
 #define __NR_rt_sigaction       (__NR_Linux + 194)
 #define __NR_rt_sigprocmask     (__NR_Linux + 195)
-#define __NR_rt_sigpending      (__NR_Linux + 196)
-#define __NR_rt_sigsuspend      (__NR_Linux + 199)
-#endif
-#ifndef __NR_pread64
-#define __NR_pread64            (__NR_Linux + 200)
-#endif
-#ifndef __NR_pwrite64
-#define __NR_pwrite64           (__NR_Linux + 201)
 #endif
 #ifndef __NR_stat64
 #define __NR_stat64             (__NR_Linux + 213)
@@ -890,245 +598,59 @@ struct kernel_statfs {
 #ifndef __NR_gettid
 #define __NR_gettid             (__NR_Linux + 222)
 #endif
-#ifndef __NR_readahead
-#define __NR_readahead          (__NR_Linux + 223)
-#endif
-#ifndef __NR_setxattr
-#define __NR_setxattr           (__NR_Linux + 224)
-#endif
-#ifndef __NR_lsetxattr
-#define __NR_lsetxattr          (__NR_Linux + 225)
-#endif
-#ifndef __NR_getxattr
-#define __NR_getxattr           (__NR_Linux + 227)
-#endif
-#ifndef __NR_lgetxattr
-#define __NR_lgetxattr          (__NR_Linux + 228)
-#endif
-#ifndef __NR_listxattr
-#define __NR_listxattr          (__NR_Linux + 230)
-#endif
-#ifndef __NR_llistxattr
-#define __NR_llistxattr         (__NR_Linux + 231)
-#endif
 #ifndef __NR_futex
 #define __NR_futex              (__NR_Linux + 238)
 #endif
-#ifndef __NR_sched_setaffinity
-#define __NR_sched_setaffinity  (__NR_Linux + 239)
-#define __NR_sched_getaffinity  (__NR_Linux + 240)
-#endif
-#ifndef __NR_set_tid_address
-#define __NR_set_tid_address    (__NR_Linux + 252)
-#endif
-#ifndef __NR_statfs64
-#define __NR_statfs64           (__NR_Linux + 255)
-#endif
-#ifndef __NR_fstatfs64
-#define __NR_fstatfs64          (__NR_Linux + 256)
-#endif
-#ifndef __NR_clock_gettime
-#define __NR_clock_gettime      (__NR_Linux + 263)
-#endif
-#ifndef __NR_clock_getres
-#define __NR_clock_getres       (__NR_Linux + 264)
-#endif
 #ifndef __NR_openat
 #define __NR_openat             (__NR_Linux + 288)
 #endif
 #ifndef __NR_fstatat
 #define __NR_fstatat            (__NR_Linux + 293)
 #endif
-#ifndef __NR_unlinkat
-#define __NR_unlinkat           (__NR_Linux + 294)
-#endif
-#ifndef __NR_move_pages
-#define __NR_move_pages         (__NR_Linux + 308)
-#endif
 #ifndef __NR_getcpu
 #define __NR_getcpu             (__NR_Linux + 312)
 #endif
-#ifndef __NR_ioprio_set
-#define __NR_ioprio_set         (__NR_Linux + 314)
-#endif
-#ifndef __NR_ioprio_get
-#define __NR_ioprio_get         (__NR_Linux + 315)
-#endif
 /* End of MIPS (old 32bit API) definitions */
 #elif  _MIPS_SIM == _MIPS_SIM_ABI64
-#ifndef __NR_pread64
-#define __NR_pread64            (__NR_Linux +  16)
-#endif
-#ifndef __NR_pwrite64
-#define __NR_pwrite64           (__NR_Linux +  17)
-#endif
-#ifndef __NR_setresuid
-#define __NR_setresuid          (__NR_Linux + 115)
-#define __NR_setresgid          (__NR_Linux + 117)
-#endif
 #ifndef __NR_gettid
 #define __NR_gettid             (__NR_Linux + 178)
 #endif
-#ifndef __NR_readahead
-#define __NR_readahead          (__NR_Linux + 179)
-#endif
-#ifndef __NR_setxattr
-#define __NR_setxattr           (__NR_Linux + 180)
-#endif
-#ifndef __NR_lsetxattr
-#define __NR_lsetxattr          (__NR_Linux + 181)
-#endif
-#ifndef __NR_getxattr
-#define __NR_getxattr           (__NR_Linux + 183)
-#endif
-#ifndef __NR_lgetxattr
-#define __NR_lgetxattr          (__NR_Linux + 184)
-#endif
-#ifndef __NR_listxattr
-#define __NR_listxattr          (__NR_Linux + 186)
-#endif
-#ifndef __NR_llistxattr
-#define __NR_llistxattr         (__NR_Linux + 187)
-#endif
 #ifndef __NR_futex
 #define __NR_futex              (__NR_Linux + 194)
 #endif
-#ifndef __NR_sched_setaffinity
-#define __NR_sched_setaffinity  (__NR_Linux + 195)
-#define __NR_sched_getaffinity  (__NR_Linux + 196)
-#endif
-#ifndef __NR_set_tid_address
-#define __NR_set_tid_address    (__NR_Linux + 212)
-#endif
-#ifndef __NR_clock_gettime
-#define __NR_clock_gettime      (__NR_Linux + 222)
-#endif
-#ifndef __NR_clock_getres
-#define __NR_clock_getres       (__NR_Linux + 223)
-#endif
 #ifndef __NR_openat
 #define __NR_openat             (__NR_Linux + 247)
 #endif
 #ifndef __NR_fstatat
 #define __NR_fstatat            (__NR_Linux + 252)
 #endif
-#ifndef __NR_unlinkat
-#define __NR_unlinkat           (__NR_Linux + 253)
-#endif
-#ifndef __NR_move_pages
-#define __NR_move_pages         (__NR_Linux + 267)
-#endif
 #ifndef __NR_getcpu
 #define __NR_getcpu             (__NR_Linux + 271)
 #endif
-#ifndef __NR_ioprio_set
-#define __NR_ioprio_set         (__NR_Linux + 273)
-#endif
-#ifndef __NR_ioprio_get
-#define __NR_ioprio_get         (__NR_Linux + 274)
-#endif
 /* End of MIPS (64bit API) definitions */
 #else
-#ifndef __NR_setresuid
-#define __NR_setresuid          (__NR_Linux + 115)
-#define __NR_setresgid          (__NR_Linux + 117)
-#endif
 #ifndef __NR_gettid
 #define __NR_gettid             (__NR_Linux + 178)
 #endif
-#ifndef __NR_readahead
-#define __NR_readahead          (__NR_Linux + 179)
-#endif
-#ifndef __NR_setxattr
-#define __NR_setxattr           (__NR_Linux + 180)
-#endif
-#ifndef __NR_lsetxattr
-#define __NR_lsetxattr          (__NR_Linux + 181)
-#endif
-#ifndef __NR_getxattr
-#define __NR_getxattr           (__NR_Linux + 183)
-#endif
-#ifndef __NR_lgetxattr
-#define __NR_lgetxattr          (__NR_Linux + 184)
-#endif
-#ifndef __NR_listxattr
-#define __NR_listxattr          (__NR_Linux + 186)
-#endif
-#ifndef __NR_llistxattr
-#define __NR_llistxattr         (__NR_Linux + 187)
-#endif
 #ifndef __NR_futex
 #define __NR_futex              (__NR_Linux + 194)
 #endif
-#ifndef __NR_sched_setaffinity
-#define __NR_sched_setaffinity  (__NR_Linux + 195)
-#define __NR_sched_getaffinity  (__NR_Linux + 196)
-#endif
-#ifndef __NR_set_tid_address
-#define __NR_set_tid_address    (__NR_Linux + 213)
-#endif
-#ifndef __NR_statfs64
-#define __NR_statfs64           (__NR_Linux + 217)
-#endif
-#ifndef __NR_fstatfs64
-#define __NR_fstatfs64          (__NR_Linux + 218)
-#endif
-#ifndef __NR_clock_gettime
-#define __NR_clock_gettime      (__NR_Linux + 226)
-#endif
-#ifndef __NR_clock_getres
-#define __NR_clock_getres       (__NR_Linux + 227)
-#endif
 #ifndef __NR_openat
 #define __NR_openat             (__NR_Linux + 251)
 #endif
 #ifndef __NR_fstatat
 #define __NR_fstatat            (__NR_Linux + 256)
 #endif
-#ifndef __NR_unlinkat
-#define __NR_unlinkat           (__NR_Linux + 257)
-#endif
-#ifndef __NR_move_pages
-#define __NR_move_pages         (__NR_Linux + 271)
-#endif
 #ifndef __NR_getcpu
 #define __NR_getcpu             (__NR_Linux + 275)
 #endif
-#ifndef __NR_ioprio_set
-#define __NR_ioprio_set         (__NR_Linux + 277)
-#endif
-#ifndef __NR_ioprio_get
-#define __NR_ioprio_get         (__NR_Linux + 278)
-#endif
 /* End of MIPS (new 32bit API) definitions                                   */
 #endif
 /* End of MIPS definitions                                                   */
 #elif defined(__PPC__)
-#ifndef __NR_setfsuid
-#define __NR_setfsuid           138
-#define __NR_setfsgid           139
-#endif
-#ifndef __NR_setresuid
-#define __NR_setresuid          164
-#define __NR_setresgid          169
-#endif
 #ifndef __NR_rt_sigaction
 #define __NR_rt_sigaction       173
 #define __NR_rt_sigprocmask     174
-#define __NR_rt_sigpending      175
-#define __NR_rt_sigsuspend      178
-#endif
-#ifndef __NR_pread64
-#define __NR_pread64            179
-#endif
-#ifndef __NR_pwrite64
-#define __NR_pwrite64           180
-#endif
-#ifndef __NR_ugetrlimit
-#define __NR_ugetrlimit         190
-#endif
-#ifndef __NR_readahead
-#define __NR_readahead          191
 #endif
 #ifndef __NR_stat64
 #define __NR_stat64             195
@@ -1142,67 +664,12 @@ struct kernel_statfs {
 #ifndef __NR_gettid
 #define __NR_gettid             207
 #endif
-#ifndef __NR_setxattr
-#define __NR_setxattr           209
-#endif
-#ifndef __NR_lsetxattr
-#define __NR_lsetxattr          210
-#endif
-#ifndef __NR_getxattr
-#define __NR_getxattr           212
-#endif
-#ifndef __NR_lgetxattr
-#define __NR_lgetxattr          213
-#endif
-#ifndef __NR_listxattr
-#define __NR_listxattr          215
-#endif
-#ifndef __NR_llistxattr
-#define __NR_llistxattr         216
-#endif
 #ifndef __NR_futex
 #define __NR_futex              221
 #endif
-#ifndef __NR_sched_setaffinity
-#define __NR_sched_setaffinity  222
-#define __NR_sched_getaffinity  223
-#endif
-#ifndef __NR_set_tid_address
-#define __NR_set_tid_address    232
-#endif
-#ifndef __NR_clock_gettime
-#define __NR_clock_gettime      246
-#endif
-#ifndef __NR_clock_getres
-#define __NR_clock_getres       247
-#endif
-#ifndef __NR_statfs64
-#define __NR_statfs64           252
-#endif
-#ifndef __NR_fstatfs64
-#define __NR_fstatfs64          253
-#endif
-#ifndef __NR_fadvise64_64
-#define __NR_fadvise64_64       254
-#endif
-#ifndef __NR_ioprio_set
-#define __NR_ioprio_set         273
-#endif
-#ifndef __NR_ioprio_get
-#define __NR_ioprio_get         274
-#endif
 #ifndef __NR_openat
 #define __NR_openat             286
 #endif
-#ifndef __NR_fstatat64
-#define __NR_fstatat64          291
-#endif
-#ifndef __NR_unlinkat
-#define __NR_unlinkat           292
-#endif
-#ifndef __NR_move_pages
-#define __NR_move_pages         301
-#endif
 #ifndef __NR_getcpu
 #define __NR_getcpu             302
 #endif
@@ -1269,7 +736,7 @@ struct kernel_statfs {
   #endif
 
   #undef  LSS_RETURN
-  #if (defined(__i386__) || defined(__x86_64__) || defined(__ARM_ARCH_3__))
+  #if (defined(__i386__) || defined(__x86_64__) || defined(__arm__))
   /* Failing system calls return a negative result in the range of
    * -1..-4095. These are "errno" values with the sign inverted.
    */
@@ -1307,6 +774,15 @@ struct kernel_statfs {
    } while (0)
   #endif
   #if defined(__i386__)
+    #if defined(NO_FRAME_POINTER) && (100 * __GNUC__ + __GNUC_MINOR__ >= 404)
+      /* This only works for GCC-4.4 and above -- the first version to use
+         .cfi directives for dwarf unwind info.  */
+      #define CFI_ADJUST_CFA_OFFSET(adjust)                                   \
+                  ".cfi_adjust_cfa_offset " #adjust "\n"
+    #else
+      #define CFI_ADJUST_CFA_OFFSET(adjust) /**/
+    #endif
+
     /* In PIC mode (e.g. when building shared libraries), gcc for i386
      * reserves ebx. Unfortunately, most distribution ship with implementations
      * of _syscallX() which clobber ebx.
@@ -1319,11 +795,13 @@ struct kernel_statfs {
     #define LSS_BODY(type,args...)                                            \
       long __res;                                                             \
       __asm__ __volatile__("push %%ebx\n"                                     \
+                           CFI_ADJUST_CFA_OFFSET(4)                           \
                            "movl %2,%%ebx\n"                                  \
                            "int $0x80\n"                                      \
-                           "pop %%ebx"                                        \
+                           "pop %%ebx\n"                                      \
+                           CFI_ADJUST_CFA_OFFSET(-4)                          \
                            args                                               \
-                           : "memory");                                       \
+                           : "esp", "memory");                                \
       LSS_RETURN(type,__res)
     #undef  _syscall0
     #define _syscall0(type,name)                                              \
@@ -1380,7 +858,7 @@ struct kernel_statfs {
                              : "i" (__NR_##name), "ri" ((long)(arg1)),        \
                                "c" ((long)(arg2)), "d" ((long)(arg3)),        \
                                "S" ((long)(arg4)), "D" ((long)(arg5))         \
-                             : "memory");                                     \
+                             : "esp", "memory");                              \
         LSS_RETURN(type,__res);                                               \
       }
     #undef  _syscall6
@@ -1402,7 +880,7 @@ struct kernel_statfs {
                              : "i" (__NR_##name),  "0" ((long)(&__s)),        \
                                "c" ((long)(arg2)), "d" ((long)(arg3)),        \
                                "S" ((long)(arg4)), "D" ((long)(arg5))         \
-                             : "memory");                                     \
+                             : "esp", "memory");                              \
         LSS_RETURN(type,__res);                                               \
       }
     LSS_INLINE int LSS_NAME(clone)(int (*fn)(void *), void *child_stack,
@@ -1488,36 +966,10 @@ struct kernel_statfs {
                            : "0"(-EINVAL), "i"(__NR_clone),
                              "m"(fn), "m"(child_stack), "m"(flags), "m"(arg),
                              "m"(parent_tidptr), "m"(newtls), "m"(child_tidptr)
-                           : "memory", "ecx", "edx", "esi", "edi");
+                           : "esp", "memory", "ecx", "edx", "esi", "edi");
       LSS_RETURN(int, __res);
     }
 
-    #define __NR__fadvise64_64 __NR_fadvise64_64
-    LSS_INLINE _syscall6(int, _fadvise64_64, int, fd,
-                         unsigned, offset_lo, unsigned, offset_hi,
-                         unsigned, len_lo, unsigned, len_hi,
-                         int, advice)
-
-    LSS_INLINE int LSS_NAME(fadvise64)(int fd, loff_t offset,
-                                       loff_t len, int advice) {
-      return LSS_NAME(_fadvise64_64)(fd,
-                                     (unsigned)offset, (unsigned)(offset >>32),
-                                     (unsigned)len, (unsigned)(len >> 32),
-                                     advice);
-    }
-
-    #define __NR__fallocate __NR_fallocate
-    LSS_INLINE _syscall6(int, _fallocate, int, fd,
-                         int, mode,
-                         unsigned, offset_lo, unsigned, offset_hi,
-                         unsigned, len_lo, unsigned, len_hi)
-
-    LSS_INLINE int LSS_NAME(fallocate)(int fd, int mode,
-                                       loff_t offset, loff_t len) {
-      union { loff_t off; unsigned w[2]; } o = { offset }, l = { len };
-      return LSS_NAME(_fallocate)(fd, mode, o.w[0], o.w[1], l.w[0], l.w[1]);
-    }
-
     LSS_INLINE void (*LSS_NAME(restore_rt)(void))(void) {
       /* On i386, the kernel does not know how to return from a signal
        * handler. Instead, it relies on user space to provide a
@@ -1596,7 +1048,7 @@ struct kernel_statfs {
           __asm__ __volatile__("movq %5,%%r10; syscall" :                     \
             "=a" (__res) : "0" (__NR_##name),                                 \
             "D" ((long)(arg1)), "S" ((long)(arg2)), "d" ((long)(arg3)),       \
-            "g" ((long)(arg4)) : "r10", "r11", "rcx", "memory");              \
+            "r" ((long)(arg4)) : "r10", "r11", "rcx", "memory");              \
           LSS_RETURN(type, __res);                                            \
       }
     #undef _syscall5
@@ -1608,7 +1060,7 @@ struct kernel_statfs {
           __asm__ __volatile__("movq %5,%%r10; movq %6,%%r8; syscall" :       \
             "=a" (__res) : "0" (__NR_##name),                                 \
             "D" ((long)(arg1)), "S" ((long)(arg2)), "d" ((long)(arg3)),       \
-            "g" ((long)(arg4)), "g" ((long)(arg5)) :                          \
+            "r" ((long)(arg4)), "r" ((long)(arg5)) :                          \
             "r8", "r10", "r11", "rcx", "memory");                             \
           LSS_RETURN(type, __res);                                            \
       }
@@ -1622,7 +1074,7 @@ struct kernel_statfs {
                                "syscall" :                                    \
             "=a" (__res) : "0" (__NR_##name),                                 \
             "D" ((long)(arg1)), "S" ((long)(arg2)), "d" ((long)(arg3)),       \
-            "g" ((long)(arg4)), "g" ((long)(arg5)), "g" ((long)(arg6)) :      \
+            "r" ((long)(arg4)), "r" ((long)(arg5)), "r" ((long)(arg6)) :      \
             "r8", "r9", "r10", "r11", "rcx", "memory");                       \
           LSS_RETURN(type, __res);                                            \
       }
@@ -1631,8 +1083,6 @@ struct kernel_statfs {
                                    void *newtls, int *child_tidptr) {
       long __res;
       {
-        register void *__tls  __asm__("r8")  = newtls;
-        register int  *__ctid __asm__("r10") = child_tidptr;
         __asm__ __volatile__(/* if (fn == NULL)
                               *   return -EINVAL;
                               */
@@ -1645,8 +1095,10 @@ struct kernel_statfs {
                              "testq  %5,%5\n"
                              "jz     1f\n"
 
-                             /* childstack -= 2*sizeof(void *);
+                             /* Set up alignment of the child stack:
+                              * child_stack = (child_stack & ~0xF) - 16;
                               */
+                             "andq   $-16,%5\n"
                              "subq   $16,%5\n"
 
                              /* Push "arg" and "fn" onto the stack that will be
@@ -1663,6 +1115,8 @@ struct kernel_statfs {
                               *                %r10 = child_tidptr)
                               */
                              "movq   %2,%%rax\n"
+                             "movq   %9,%%r8\n"
+                             "movq   %10,%%r10\n"
                              "syscall\n"
 
                              /* if (%rax != 0)
@@ -1693,13 +1147,11 @@ struct kernel_statfs {
                              : "=a" (__res)
                              : "0"(-EINVAL), "i"(__NR_clone), "i"(__NR_exit),
                                "r"(fn), "S"(child_stack), "D"(flags), "r"(arg),
-                               "d"(parent_tidptr), "r"(__tls), "r"(__ctid)
-                             : "memory", "r11", "rcx");
+                               "d"(parent_tidptr), "g"(newtls), "g"(child_tidptr)
+                             : "rsp", "memory", "r8", "r10", "r11", "rcx");
       }
       LSS_RETURN(int, __res);
     }
-    LSS_INLINE _syscall4(int, fadvise64, int, fd, loff_t, offset, loff_t, len,
-                         int,  advice)
 
     LSS_INLINE void (*LSS_NAME(restore_rt)(void))(void) {
       /* On x86-64, the kernel does not know how to return from
@@ -1719,7 +1171,7 @@ struct kernel_statfs {
                            : "i"  (__NR_rt_sigreturn));
       return res;
     }
-  #elif defined(__ARM_ARCH_3__)
+  #elif defined(__arm__)
     /* Most definitions of _syscallX() neglect to mark "memory" as being
      * clobbered. This causes problems with compilers, that do a better job
      * at optimizing across __asm__ calls.
@@ -1727,12 +1179,26 @@ struct kernel_statfs {
      */
     #undef LSS_REG
     #define LSS_REG(r,a) register long __r##r __asm__("r"#r) = (long)a
+
+    /* r0..r3 are scratch registers and not preserved across function
+     * calls.  We need to first evaluate the first 4 syscall arguments
+     * and store them on stack.  They must be loaded into r0..r3 after
+     * all function calls to avoid r0..r3 being clobbered.
+     */
+    #undef LSS_SAVE_ARG
+    #define LSS_SAVE_ARG(r,a) long __tmp##r = (long)a
+    #undef LSS_LOAD_ARG
+    #define LSS_LOAD_ARG(r) register long __r##r __asm__("r"#r) = __tmp##r
+
     #undef  LSS_BODY
-    #define LSS_BODY(type,name,args...)                                       \
+    #define LSS_BODY(type, name, args...)                                     \
           register long __res_r0 __asm__("r0");                               \
           long __res;                                                         \
-          __asm__ __volatile__ (__syscall(name)                               \
-                                : "=r"(__res_r0) : args : "lr", "memory");    \
+          __SYS_REG(name)                                                     \
+          __asm__ __volatile__ (__syscall_safe(name)                          \
+                                : "=r"(__res_r0)                              \
+                                : __SYS_REG_LIST(args)                        \
+                                : "lr", "memory");                            \
           __res = __res_r0;                                                   \
           LSS_RETURN(type, __res)
     #undef _syscall0
@@ -1743,77 +1209,126 @@ struct kernel_statfs {
     #undef _syscall1
     #define _syscall1(type, name, type1, arg1)                                \
       type LSS_NAME(name)(type1 arg1) {                                       \
-        LSS_REG(0, arg1); LSS_BODY(type, name, "r"(__r0));                    \
+        /* There is no need for using a volatile temp.  */                    \
+        LSS_REG(0, arg1);                                                     \
+        LSS_BODY(type, name, "r"(__r0));                                      \
       }
     #undef _syscall2
     #define _syscall2(type, name, type1, arg1, type2, arg2)                   \
       type LSS_NAME(name)(type1 arg1, type2 arg2) {                           \
-        LSS_REG(0, arg1); LSS_REG(1, arg2);                                   \
+        LSS_SAVE_ARG(0, arg1);                                                \
+        LSS_SAVE_ARG(1, arg2);                                                \
+        LSS_LOAD_ARG(0);                                                      \
+        LSS_LOAD_ARG(1);                                                      \
         LSS_BODY(type, name, "r"(__r0), "r"(__r1));                           \
       }
     #undef _syscall3
     #define _syscall3(type, name, type1, arg1, type2, arg2, type3, arg3)      \
       type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3) {               \
-        LSS_REG(0, arg1); LSS_REG(1, arg2); LSS_REG(2, arg3);                 \
+        LSS_SAVE_ARG(0, arg1);                                                \
+        LSS_SAVE_ARG(1, arg2);                                                \
+        LSS_SAVE_ARG(2, arg3);                                                \
+        LSS_LOAD_ARG(0);                                                      \
+        LSS_LOAD_ARG(1);                                                      \
+        LSS_LOAD_ARG(2);                                                      \
         LSS_BODY(type, name, "r"(__r0), "r"(__r1), "r"(__r2));                \
       }
     #undef _syscall4
-    #define _syscall4(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4)  \
+    #define _syscall4(type, name, type1, arg1, type2, arg2, type3, arg3,      \
+                      type4, arg4)                                            \
       type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4) {   \
-        LSS_REG(0, arg1); LSS_REG(1, arg2); LSS_REG(2, arg3);                 \
-        LSS_REG(3, arg4);                                                     \
+        LSS_SAVE_ARG(0, arg1);                                                \
+        LSS_SAVE_ARG(1, arg2);                                                \
+        LSS_SAVE_ARG(2, arg3);                                                \
+        LSS_SAVE_ARG(3, arg4);                                                \
+        LSS_LOAD_ARG(0);                                                      \
+        LSS_LOAD_ARG(1);                                                      \
+        LSS_LOAD_ARG(2);                                                      \
+        LSS_LOAD_ARG(3);                                                      \
         LSS_BODY(type, name, "r"(__r0), "r"(__r1), "r"(__r2), "r"(__r3));     \
       }
     #undef _syscall5
-    #define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4,  \
-                      type5,arg5)                                             \
+    #define _syscall5(type, name, type1, arg1, type2, arg2, type3, arg3,      \
+                      type4, arg4, type5, arg5)                               \
       type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4,     \
                           type5 arg5) {                                       \
-        LSS_REG(0, arg1); LSS_REG(1, arg2); LSS_REG(2, arg3);                 \
-        LSS_REG(3, arg4); LSS_REG(4, arg5);                                   \
+        LSS_SAVE_ARG(0, arg1);                                                \
+        LSS_SAVE_ARG(1, arg2);                                                \
+        LSS_SAVE_ARG(2, arg3);                                                \
+        LSS_SAVE_ARG(3, arg4);                                                \
+        LSS_REG(4, arg5);                                                     \
+        LSS_LOAD_ARG(0);                                                      \
+        LSS_LOAD_ARG(1);                                                      \
+        LSS_LOAD_ARG(2);                                                      \
+        LSS_LOAD_ARG(3);                                                      \
         LSS_BODY(type, name, "r"(__r0), "r"(__r1), "r"(__r2), "r"(__r3),      \
                              "r"(__r4));                                      \
       }
     #undef _syscall6
-    #define _syscall6(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4,  \
-                      type5,arg5,type6,arg6)                                  \
+    #define _syscall6(type, name, type1, arg1, type2, arg2, type3, arg3,      \
+                      type4, arg4, type5, arg5, type6, arg6)                  \
       type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4,     \
                           type5 arg5, type6 arg6) {                           \
-        LSS_REG(0, arg1); LSS_REG(1, arg2); LSS_REG(2, arg3);                 \
-        LSS_REG(3, arg4); LSS_REG(4, arg5); LSS_REG(5, arg6);                 \
+        LSS_SAVE_ARG(0, arg1);                                                \
+        LSS_SAVE_ARG(1, arg2);                                                \
+        LSS_SAVE_ARG(2, arg3);                                                \
+        LSS_SAVE_ARG(3, arg4);                                                \
+        LSS_REG(4, arg5);                                                     \
+        LSS_REG(5, arg6);                                                     \
+        LSS_LOAD_ARG(0);                                                      \
+        LSS_LOAD_ARG(1);                                                      \
+        LSS_LOAD_ARG(2);                                                      \
+        LSS_LOAD_ARG(3);                                                      \
         LSS_BODY(type, name, "r"(__r0), "r"(__r1), "r"(__r2), "r"(__r3),      \
                              "r"(__r4), "r"(__r5));                           \
       }
     LSS_INLINE int LSS_NAME(clone)(int (*fn)(void *), void *child_stack,
                                    int flags, void *arg, int *parent_tidptr,
                                    void *newtls, int *child_tidptr) {
-      long __res;
+      register long __res __asm__("r5");
       {
-        register int   __flags __asm__("r0") = flags;
-        register void *__stack __asm__("r1") = child_stack;
-        register void *__ptid  __asm__("r2") = parent_tidptr;
-        register void *__tls   __asm__("r3") = newtls;
-        register int  *__ctid  __asm__("r4") = child_tidptr;
-        __asm__ __volatile__(/* if (fn == NULL || child_stack == NULL)
-                              *   return -EINVAL;
-                              */
-                             "cmp   %2,#0\n"
-                             "cmpne %3,#0\n"
-                             "moveq %0,%1\n"
-                             "beq   1f\n"
+        if (fn == NULL || child_stack == NULL) {
+            __res = -EINVAL;
+            goto clone_exit;
+        }
 
-                             /* Push "arg" and "fn" onto the stack that will be
-                              * used by the child.
-                              */
-                             "str   %5,[%3,#-4]!\n"
-                             "str   %2,[%3,#-4]!\n"
+        /* stash first 4 arguments on stack first because we can only load
+         * them after all function calls.
+         */
+        int    tmp_flags = flags;
+        int  * tmp_stack = (int*) child_stack;
+        void * tmp_ptid  = parent_tidptr;
+        void * tmp_tls   = newtls;
+
+        register int  *__ctid  __asm__("r4") = child_tidptr;
 
-                             /* %r0 = syscall(%r0 = flags,
+        /* Push "arg" and "fn" onto the stack that will be
+         * used by the child.
+         */
+        *(--tmp_stack) = (int) arg;
+        *(--tmp_stack) = (int) fn;
+
+        /* We must load r0..r3 last after all possible function calls.  */
+        register int   __flags __asm__("r0") = tmp_flags;
+        register void *__stack __asm__("r1") = tmp_stack;
+        register void *__ptid  __asm__("r2") = tmp_ptid;
+        register void *__tls   __asm__("r3") = tmp_tls;
+
+        /* %r0 = syscall(%r0 = flags,
+         *               %r1 = child_stack,
+         *               %r2 = parent_tidptr,
+         *               %r3 = newtls,
+         *               %r4 = child_tidptr)
+         */
+        __SYS_REG(clone)
+        __asm__ __volatile__(/* %r0 = syscall(%r0 = flags,
                               *               %r1 = child_stack,
                               *               %r2 = parent_tidptr,
                               *               %r3 = newtls,
                               *               %r4 = child_tidptr)
                               */
+                             "push  {r7}\n"
+                             "mov   r7,%1\n"
                              __syscall(clone)"\n"
 
                              /* if (%r0 != 0)
@@ -1828,30 +1343,48 @@ struct kernel_statfs {
                              "mov   lr,pc\n"
                              "ldr   pc,[sp]\n"
 
-                             /* Call _exit(%r0).
+                             /* Call _exit(%r0), which never returns.  We only
+                              * need to set r7 for EABI syscall ABI but we do
+                              * this always to simplify code sharing between
+                              * old and new syscall ABIs.
                               */
+                             "mov   r7,%2\n"
                              __syscall(exit)"\n"
-                           "1:\n"
+
+                             /* Pop r7 from the stack only in the parent.
+                              */
+                           "1: pop {r7}\n"
                              : "=r" (__res)
-                             : "i"(-EINVAL),
-                               "r"(fn), "r"(__stack), "r"(__flags), "r"(arg),
+                             : "r"(__sysreg),
+                               "i"(__NR_exit), "r"(__stack), "r"(__flags),
                                "r"(__ptid), "r"(__tls), "r"(__ctid)
-                             : "lr", "memory");
+                             : "cc", "lr", "memory");
       }
+      clone_exit:
       LSS_RETURN(int, __res);
     }
   #elif defined(__mips__)
     #undef LSS_REG
     #define LSS_REG(r,a) register unsigned long __r##r __asm__("$"#r) =       \
                                  (unsigned long)(a)
+
+    #if _MIPS_SIM == _MIPS_SIM_ABI32
+    // See http://sources.redhat.com/ml/libc-alpha/2004-10/msg00050.html
+    // or http://www.linux-mips.org/archives/linux-mips/2004-10/msg00142.html
+    #define MIPS_SYSCALL_CLOBBERS "$1", "$3", "$8", "$9", "$10", "$11", "$12",\
+                                "$13", "$14", "$15", "$24", "$25", "memory"
+    #else
+    #define MIPS_SYSCALL_CLOBBERS "$1", "$3", "$10", "$11", "$12", "$13",     \
+                                "$14", "$15", "$24", "$25", "memory"
+    #endif
+
     #undef  LSS_BODY
     #define LSS_BODY(type,name,r7,...)                                        \
           register unsigned long __v0 __asm__("$2") = __NR_##name;            \
           __asm__ __volatile__ ("syscall\n"                                   \
                                 : "=&r"(__v0), r7 (__r7)                      \
                                 : "0"(__v0), ##__VA_ARGS__                    \
-                                : "$8", "$9", "$10", "$11", "$12",            \
-                                  "$13", "$14", "$15", "$24", "memory");      \
+                                : MIPS_SYSCALL_CLOBBERS);                     \
           LSS_RETURN(type, __v0, __r7)
     #undef _syscall0
     #define _syscall0(type, name)                                             \
@@ -1909,8 +1442,7 @@ struct kernel_statfs {
                               : "=&r"(__v0), "+r" (__r7)                      \
                               : "i" (__NR_##name), "r"(__r4), "r"(__r5),      \
                                 "r"(__r6), "m" ((unsigned long)arg5)          \
-                              : "$8", "$9", "$10", "$11", "$12",              \
-                                "$13", "$14", "$15", "$24", "memory");        \
+                              : MIPS_SYSCALL_CLOBBERS);                       \
         LSS_RETURN(type, __v0, __r7);                                         \
       }
     #else
@@ -1950,8 +1482,7 @@ struct kernel_statfs {
                               : "i" (__NR_##name), "r"(__r4), "r"(__r5),      \
                                 "r"(__r6), "r" ((unsigned long)arg5),         \
                                 "r" ((unsigned long)arg6)                     \
-                              : "$8", "$9", "$10", "$11", "$12",              \
-                                "$13", "$14", "$15", "$24", "memory");        \
+                              : MIPS_SYSCALL_CLOBBERS);                       \
         LSS_RETURN(type, __v0, __r7);                                         \
       }
     #else
@@ -2247,173 +1778,68 @@ struct kernel_statfs {
   #define __NR__exit   __NR_exit
   #define __NR__gettid __NR_gettid
   #define __NR__mremap __NR_mremap
-  LSS_INLINE _syscall1(int,     chdir,           const char *,p)
   LSS_INLINE _syscall1(int,     close,           int,         f)
-  LSS_INLINE _syscall2(int,     clock_getres,    int,         c,
-                       struct kernel_timespec*, t)
-  LSS_INLINE _syscall2(int,     clock_gettime,   int,         c,
-                       struct kernel_timespec*, t)
-  LSS_INLINE _syscall1(int,     dup,             int,         f)
-  LSS_INLINE _syscall2(int,     dup2,            int,         s,
-                       int,            d)
-  LSS_INLINE _syscall3(int,     execve,          const char*, f,
-                       const char*const*,a,const char*const*, e)
   LSS_INLINE _syscall1(int,     _exit,           int,         e)
   LSS_INLINE _syscall3(int,     fcntl,           int,         f,
                        int,            c, long,   a)
-  LSS_INLINE _syscall0(pid_t,   fork)
   LSS_INLINE _syscall2(int,     fstat,           int,         f,
                       struct kernel_stat*,   b)
-  LSS_INLINE _syscall2(int,     fstatfs,         int,         f,
-                      struct kernel_statfs*, b)
   LSS_INLINE _syscall4(int,     futex,           int*,        a,
                        int,            o, int,    v,
                       struct kernel_timespec*, t)
   LSS_INLINE _syscall3(int,     getdents,        int,         f,
                       struct kernel_dirent*, d, int,    c)
+#ifdef __NR_getdents64
   LSS_INLINE _syscall3(int,     getdents64,      int,         f,
                       struct kernel_dirent64*, d, int,    c)
-  LSS_INLINE _syscall0(gid_t,   getegid)
-  LSS_INLINE _syscall0(uid_t,   geteuid)
-  LSS_INLINE _syscall0(pid_t,   getpgrp)
+#endif
   LSS_INLINE _syscall0(pid_t,   getpid)
   LSS_INLINE _syscall0(pid_t,   getppid)
-  LSS_INLINE _syscall2(int,     getpriority,     int,         a,
-                       int,            b)
-  LSS_INLINE _syscall2(int,     getrlimit,       int,         r,
-                      struct kernel_rlimit*, l)
-  LSS_INLINE _syscall1(pid_t,   getsid,          pid_t,       p)
   LSS_INLINE _syscall0(pid_t,   _gettid)
-  LSS_INLINE _syscall5(int,     setxattr,        const char *,p,
-                       const char *,   n,        const void *,v,
-                       size_t,         s,        int,         f)
-  LSS_INLINE _syscall5(int,     lsetxattr,       const char *,p,
-                       const char *,   n,        const void *,v,
-                       size_t,         s,        int,         f)
-  LSS_INLINE _syscall4(ssize_t, getxattr,        const char *,p,
-                       const char *,   n,        void *,      v, size_t, s)
-  LSS_INLINE _syscall4(ssize_t, lgetxattr,       const char *,p,
-                       const char *,   n,        void *,      v, size_t, s)
-  LSS_INLINE _syscall3(ssize_t, listxattr,       const char *,p,
-                       char *,   l,              size_t,      s)
-  LSS_INLINE _syscall3(ssize_t, llistxattr,      const char *,p,
-                       char *,   l,              size_t,      s)
-  LSS_INLINE _syscall2(int,     ioprio_get,      int,         which,
-                       int,     who)
-  LSS_INLINE _syscall3(int,     ioprio_set,      int,         which,
-                       int,     who,             int,         ioprio)
   LSS_INLINE _syscall2(int,     kill,            pid_t,       p,
                        int,            s)
   LSS_INLINE _syscall3(off_t,   lseek,           int,         f,
                        off_t,          o, int,    w)
   LSS_INLINE _syscall2(int,     munmap,          void*,       s,
                        size_t,         l)
-  LSS_INLINE _syscall6(long,    move_pages,      pid_t,       p,
-                       unsigned long,  n, void **,g, int *,   d,
-                       int *,          s, int,    f)
   LSS_INLINE _syscall5(void*,   _mremap,         void*,       o,
                        size_t,         os,       size_t,      ns,
                        unsigned long,  f, void *, a)
   LSS_INLINE _syscall3(int,     open,            const char*, p,
                        int,            f, int,    m)
-  LSS_INLINE _syscall3(int,     poll,           struct kernel_pollfd*, u,
-                       unsigned int,   n, int,    t)
   LSS_INLINE _syscall2(int,     prctl,           int,         o,
                        long,           a)
   LSS_INLINE _syscall4(long,    ptrace,          int,         r,
                        pid_t,          p, void *, a, void *, d)
   LSS_INLINE _syscall3(ssize_t, read,            int,         f,
                        void *,         b, size_t, c)
-  LSS_INLINE _syscall3(int,     readlink,        const char*, p,
-                       char*,          b, size_t, s)
   LSS_INLINE _syscall4(int,     rt_sigaction,    int,         s,
                        const struct kernel_sigaction*, a,
                        struct kernel_sigaction*, o, size_t,   c)
-  LSS_INLINE _syscall2(int, rt_sigpending, struct kernel_sigset_t *, s,
-                       size_t,         c)
   LSS_INLINE _syscall4(int, rt_sigprocmask,      int,         h,
                        const struct kernel_sigset_t*,  s,
                        struct kernel_sigset_t*,        o, size_t, c);
-  LSS_INLINE _syscall2(int, rt_sigsuspend,
-                       const struct kernel_sigset_t*, s,  size_t, c);
-  LSS_INLINE _syscall3(int,     sched_getaffinity,pid_t,      p,
-                       unsigned int,   l, unsigned long *, m)
-  LSS_INLINE _syscall3(int,     sched_setaffinity,pid_t,      p,
-                       unsigned int,   l, unsigned long *, m)
   LSS_INLINE _syscall0(int,     sched_yield)
-  LSS_INLINE _syscall1(long,    set_tid_address, int *,       t)
-  LSS_INLINE _syscall1(int,     setfsgid,        gid_t,       g)
-  LSS_INLINE _syscall1(int,     setfsuid,        uid_t,       u)
-  LSS_INLINE _syscall1(int,     setuid,          uid_t,       u)
-  LSS_INLINE _syscall1(int,     setgid,          gid_t,       g)
-  LSS_INLINE _syscall2(int,     setpgid,         pid_t,       p,
-                       pid_t,          g)
-  LSS_INLINE _syscall3(int,     setpriority,     int,         a,
-                       int,            b, int,    p)
-  LSS_INLINE _syscall3(int,     setresgid,       gid_t,       r,
-                       gid_t,          e, gid_t,  s)
-  LSS_INLINE _syscall3(int,     setresuid,       uid_t,       r,
-                       uid_t,          e, uid_t,  s)
-  LSS_INLINE _syscall2(int,     setrlimit,       int,         r,
-                       const struct kernel_rlimit*, l)
-  LSS_INLINE _syscall0(pid_t,    setsid)
   LSS_INLINE _syscall2(int,     sigaltstack,     const stack_t*, s,
                        const stack_t*, o)
   LSS_INLINE _syscall2(int,     stat,            const char*, f,
                       struct kernel_stat*,   b)
-  LSS_INLINE _syscall2(int,     statfs,          const char*, f,
-                      struct kernel_statfs*, b)
   LSS_INLINE _syscall3(ssize_t, write,            int,        f,
                        const void *,   b, size_t, c)
-  LSS_INLINE _syscall3(ssize_t, writev,           int,        f,
-                       const struct kernel_iovec*, v, size_t, c)
   #if defined(__NR_getcpu)
     LSS_INLINE _syscall3(long, getcpu, unsigned *, cpu,
                          unsigned *, node, void *, unused);
   #endif
   #if defined(__x86_64__) ||                                                  \
      (defined(__mips__) && _MIPS_SIM != _MIPS_SIM_ABI32)
-    LSS_INLINE _syscall3(int, recvmsg,            int,   s,
-                        struct kernel_msghdr*,     m, int, f)
-    LSS_INLINE _syscall3(int, sendmsg,            int,   s,
-                         const struct kernel_msghdr*, m, int, f)
-    LSS_INLINE _syscall6(int, sendto,             int,   s,
-                         const void*,             m, size_t, l,
-                         int,                     f,
-                         const struct kernel_sockaddr*, a, int, t)
-    LSS_INLINE _syscall2(int, shutdown,           int,   s,
-                         int,                     h)
     LSS_INLINE _syscall3(int, socket,             int,   d,
                          int,                     t, int,       p)
-    LSS_INLINE _syscall4(int, socketpair,         int,   d,
-                         int,                     t, int,       p, int*, s)
   #endif
   #if defined(__x86_64__)
-    LSS_INLINE _syscall4(int, fallocate, int, fd, int, mode,
-                         loff_t, offset, loff_t, len)
     LSS_INLINE _syscall6(void*, mmap,              void*, s,
                          size_t,                   l, int,               p,
                          int,                      f, int,               d,
                          __off64_t,                o)
-    LSS_INLINE _syscall4(int, newfstatat,         int,   d,
-                         const char *,            p,
-                        struct kernel_stat*,       b, int, f)
-
-    LSS_INLINE int LSS_NAME(setfsgid32)(gid_t gid) {
-      return LSS_NAME(setfsgid)(gid);
-    }
-
-    LSS_INLINE int LSS_NAME(setfsuid32)(uid_t uid) {
-      return LSS_NAME(setfsuid)(uid);
-    }
-
-    LSS_INLINE int LSS_NAME(setresgid32)(gid_t rgid, gid_t egid, gid_t sgid) {
-      return LSS_NAME(setresgid)(rgid, egid, sgid);
-    }
-
-    LSS_INLINE int LSS_NAME(setresuid32)(uid_t ruid, uid_t euid, uid_t suid) {
-      return LSS_NAME(setresuid)(ruid, euid, suid);
-    }
 
     LSS_INLINE int LSS_NAME(sigaction)(int signum,
                                        const struct kernel_sigaction *act,
@@ -2435,114 +1861,35 @@ struct kernel_statfs {
       }
     }
 
-    LSS_INLINE int LSS_NAME(sigpending)(struct kernel_sigset_t *set) {
-      return LSS_NAME(rt_sigpending)(set, (KERNEL_NSIG+7)/8);
-    }
-
     LSS_INLINE int LSS_NAME(sigprocmask)(int how,
                                          const struct kernel_sigset_t *set,
                                          struct kernel_sigset_t *oldset) {
       return LSS_NAME(rt_sigprocmask)(how, set, oldset, (KERNEL_NSIG+7)/8);
     }
-
-    LSS_INLINE int LSS_NAME(sigsuspend)(const struct kernel_sigset_t *set) {
-      return LSS_NAME(rt_sigsuspend)(set, (KERNEL_NSIG+7)/8);
-    }
   #endif
-  #if defined(__x86_64__) || defined(__ARM_ARCH_3__) ||                       \
+  #if defined(__x86_64__) || \
+      defined(__arm__) || \
      (defined(__mips__) && _MIPS_SIM != _MIPS_SIM_ABI32)
     LSS_INLINE _syscall4(pid_t, wait4,            pid_t, p,
                          int*,                    s, int,       o,
-                        struct kernel_rusage*,     r)
-
+                         struct kernel_rusage*,   r)
     LSS_INLINE pid_t LSS_NAME(waitpid)(pid_t pid, int *status, int options){
       return LSS_NAME(wait4)(pid, status, options, 0);
     }
-  #endif
-  #if defined(__i386__) || defined(__x86_64__)
+   #endif
+  #if defined(__i386__) || defined(__x86_64__) || defined(__arm__)
     LSS_INLINE _syscall4(int, openat, int, d, const char *, p, int, f, int, m)
-    LSS_INLINE _syscall3(int, unlinkat, int, d, const char *, p, int, f)
-  #endif
-  #if defined(__i386__) || defined(__ARM_ARCH_3__)
-    #define __NR__setfsgid32  __NR_setfsgid32
-    #define __NR__setfsuid32  __NR_setfsuid32
-    #define __NR__setresgid32 __NR_setresgid32
-    #define __NR__setresuid32 __NR_setresuid32
-    LSS_INLINE _syscall2(int,   ugetrlimit,        int,          r,
-                        struct kernel_rlimit*, l)
-    LSS_INLINE _syscall1(int,     _setfsgid32,      gid_t,       f)
-    LSS_INLINE _syscall1(int,     _setfsuid32,      uid_t,       f)
-    LSS_INLINE _syscall3(int,     _setresgid32,    gid_t,       r,
-                         gid_t,          e, gid_t,  s)
-    LSS_INLINE _syscall3(int,     _setresuid32,    uid_t,       r,
-                         uid_t,          e, uid_t,  s)
-
-    LSS_INLINE int LSS_NAME(setfsgid32)(gid_t gid) {
-      int rc;
-      if ((rc = LSS_NAME(_setfsgid32)(gid)) < 0 &&
-          LSS_ERRNO == ENOSYS) {
-        if ((unsigned int)gid & ~0xFFFFu) {
-          rc = EINVAL;
-        } else {
-          rc = LSS_NAME(setfsgid)(gid);
-        }
-      }
-      return rc;
-    }
-
-    LSS_INLINE int LSS_NAME(setfsuid32)(uid_t uid) {
-      int rc;
-      if ((rc = LSS_NAME(_setfsuid32)(uid)) < 0 &&
-          LSS_ERRNO == ENOSYS) {
-        if ((unsigned int)uid & ~0xFFFFu) {
-          rc = EINVAL;
-        } else {
-          rc = LSS_NAME(setfsuid)(uid);
-        }
-      }
-      return rc;
-    }
-
-    LSS_INLINE int LSS_NAME(setresgid32)(gid_t rgid, gid_t egid, gid_t sgid) {
-      int rc;
-      if ((rc = LSS_NAME(_setresgid32)(rgid, egid, sgid)) < 0 &&
-          LSS_ERRNO == ENOSYS) {
-        if ((unsigned int)rgid & ~0xFFFFu ||
-            (unsigned int)egid & ~0xFFFFu ||
-            (unsigned int)sgid & ~0xFFFFu) {
-          rc = EINVAL;
-        } else {
-          rc = LSS_NAME(setresgid)(rgid, egid, sgid);
-        }
-      }
-      return rc;
-    }
-
-    LSS_INLINE int LSS_NAME(setresuid32)(uid_t ruid, uid_t euid, uid_t suid) {
-      int rc;
-      if ((rc = LSS_NAME(_setresuid32)(ruid, euid, suid)) < 0 &&
-          LSS_ERRNO == ENOSYS) {
-        if ((unsigned int)ruid & ~0xFFFFu ||
-            (unsigned int)euid & ~0xFFFFu ||
-            (unsigned int)suid & ~0xFFFFu) {
-          rc = EINVAL;
-        } else {
-          rc = LSS_NAME(setresuid)(ruid, euid, suid);
-        }
-      }
-      return rc;
-    }
   #endif
   LSS_INLINE int LSS_NAME(sigemptyset)(struct kernel_sigset_t *set) {
     memset(&set->sig, 0, sizeof(set->sig));
     return 0;
   }
-  
+
   LSS_INLINE int LSS_NAME(sigfillset)(struct kernel_sigset_t *set) {
     memset(&set->sig, -1, sizeof(set->sig));
     return 0;
   }
-  
+
   LSS_INLINE int LSS_NAME(sigaddset)(struct kernel_sigset_t *set,
                                      int signum) {
     if (signum < 1 || signum > (int)(8*sizeof(set->sig))) {
@@ -2554,7 +1901,7 @@ struct kernel_statfs {
       return 0;
     }
   }
-  
+
   LSS_INLINE int LSS_NAME(sigdelset)(struct kernel_sigset_t *set,
                                         int signum) {
     if (signum < 1 || signum > (int)(8*sizeof(set->sig))) {
@@ -2566,30 +1913,26 @@ struct kernel_statfs {
       return 0;
     }
   }
-  
-  LSS_INLINE int LSS_NAME(sigismember)(struct kernel_sigset_t *set,
-                                          int signum) {
-    if (signum < 1 || signum > (int)(8*sizeof(set->sig))) {
-      LSS_ERRNO = EINVAL;
-      return -1;
-    } else {
-      return !!(set->sig[(signum - 1)/(8*sizeof(set->sig[0]))] &
-                (1UL << ((signum - 1) % (8*sizeof(set->sig[0])))));
-    }
-  }
-  #if defined(__i386__) || defined(__ARM_ARCH_3__) ||                         \
+
+  #if defined(__i386__) || \
+      defined(__arm__) || \
      (defined(__mips__) && _MIPS_SIM == _MIPS_SIM_ABI32) || defined(__PPC__)
     #define __NR__sigaction   __NR_sigaction
-    #define __NR__sigpending  __NR_sigpending
     #define __NR__sigprocmask __NR_sigprocmask
-    #define __NR__sigsuspend  __NR_sigsuspend
-    #define __NR__socketcall  __NR_socketcall
     LSS_INLINE _syscall2(int, fstat64,             int, f,
                          struct kernel_stat64 *, b)
     LSS_INLINE _syscall5(int, _llseek,     uint, fd, ulong, hi, ulong, lo,
                          loff_t *, res, uint, wh)
+#ifdef __PPC64__
+    LSS_INLINE _syscall6(void*, mmap,              void*, s,
+                         size_t,                   l, int,               p,
+                         int,                      f, int,               d,
+                         off_t,                    o)
+#else
+    #ifndef __ARM_EABI__
+    /* Not available on ARM EABI Linux.  */
     LSS_INLINE _syscall1(void*, mmap,              void*, a)
-#ifndef __PPC64__
+    #endif
     LSS_INLINE _syscall6(void*, mmap2,             void*, s,
                          size_t,                   l, int,               p,
                          int,                      f, int,               d,
@@ -2598,17 +1941,9 @@ struct kernel_statfs {
     LSS_INLINE _syscall3(int,   _sigaction,        int,   s,
                          const struct kernel_old_sigaction*,  a,
                          struct kernel_old_sigaction*,        o)
-    LSS_INLINE _syscall1(int,   _sigpending, unsigned long*, s)
     LSS_INLINE _syscall3(int,   _sigprocmask,      int,   h,
                          const unsigned long*,     s,
                          unsigned long*,           o)
-    #ifdef __PPC__
-    LSS_INLINE _syscall1(int, _sigsuspend,         unsigned long, s)
-    #else
-    LSS_INLINE _syscall3(int, _sigsuspend,         const void*, a,
-                         int,                      b,
-                         unsigned long,            s)
-    #endif
     LSS_INLINE _syscall2(int, stat64,              const char *, p,
                          struct kernel_stat64 *, b)
 
@@ -2674,17 +2009,6 @@ struct kernel_statfs {
       return rc;
     }
 
-    LSS_INLINE int LSS_NAME(sigpending)(struct kernel_sigset_t *set) {
-      int old_errno = LSS_ERRNO;
-      int rc = LSS_NAME(rt_sigpending)(set, (KERNEL_NSIG+7)/8);
-      if (rc < 0 && LSS_ERRNO == ENOSYS) {
-        LSS_ERRNO = old_errno;
-        LSS_NAME(sigemptyset)(set);
-        rc = LSS_NAME(_sigpending)(&set->sig[0]);
-      }
-      return rc;
-    }
-
     LSS_INLINE int LSS_NAME(sigprocmask)(int how,
                                          const struct kernel_sigset_t *set,
                                          struct kernel_sigset_t *oldset) {
@@ -2701,20 +2025,6 @@ struct kernel_statfs {
       }
       return rc;
     }
-
-    LSS_INLINE int LSS_NAME(sigsuspend)(const struct kernel_sigset_t *set) {
-      int olderrno = LSS_ERRNO;
-      int rc = LSS_NAME(rt_sigsuspend)(set, (KERNEL_NSIG+7)/8);
-      if (rc < 0 && LSS_ERRNO == ENOSYS) {
-        LSS_ERRNO = olderrno;
-        rc = LSS_NAME(_sigsuspend)(
-        #ifndef __PPC__
-                                   set, 0,
-        #endif
-                                   set->sig[0]);
-      }
-      return rc;
-    }
   #endif
   #if defined(__PPC__)
     #undef LSS_SC_LOADARGS_0
@@ -2771,90 +2081,31 @@ struct kernel_statfs {
         }                                                                     \
         LSS_RETURN(type, __sc_ret, __sc_err)
 
-    LSS_INLINE ssize_t LSS_NAME(recvmsg)(int s,struct kernel_msghdr *msg,
-                                         int flags){
-      LSS_SC_BODY(3, ssize_t, 17, s, msg, flags);
-    }
-
-    LSS_INLINE ssize_t LSS_NAME(sendmsg)(int s,
-                                         const struct kernel_msghdr *msg,
-                                         int flags) {
-      LSS_SC_BODY(3, ssize_t, 16, s, msg, flags);
-    }
-
-    // TODO(csilvers): why is this ifdef'ed out?
-#if 0
-    LSS_INLINE ssize_t LSS_NAME(sendto)(int s, const void *buf, size_t len,
-                                        int flags,
-                                        const struct kernel_sockaddr *to,
-                                        unsigned int tolen) {
-      LSS_BODY(6, ssize_t, 11, s, buf, len, flags, to, tolen);
-    }
-#endif
-
-    LSS_INLINE int LSS_NAME(shutdown)(int s, int how) {
-      LSS_SC_BODY(2, int, 13, s, how);
-    }
-
     LSS_INLINE int LSS_NAME(socket)(int domain, int type, int protocol) {
       LSS_SC_BODY(3, int, 1, domain, type, protocol);
     }
-
-    LSS_INLINE int LSS_NAME(socketpair)(int d, int type, int protocol,
-                                        int sv[2]) {
-      LSS_SC_BODY(4, int, 8, d, type, protocol, sv);
-    }
   #endif
-  #if defined(__i386__) || defined(__ARM_ARCH_3__) ||                         \
+  #if defined(__i386__) || \
+      (defined(__arm__) && !defined(__ARM_EABI__)) || \
       (defined(__mips__) && _MIPS_SIM == _MIPS_SIM_ABI32)
-    #define __NR__socketcall  __NR_socketcall
-    LSS_INLINE _syscall2(int,      _socketcall,    int,   c,
-                         va_list,                  a)
-
-    LSS_INLINE int LSS_NAME(socketcall)(int op, ...) {
-      int rc;
-      va_list ap;
-      va_start(ap, op);
-      rc = LSS_NAME(_socketcall)(op, ap);
-      va_end(ap);
-      return rc;
-    }
-
-    LSS_INLINE ssize_t LSS_NAME(recvmsg)(int s,struct kernel_msghdr *msg,
-                                         int flags){
-      return (ssize_t)LSS_NAME(socketcall)(17, s, msg, flags);
-    }
 
-    LSS_INLINE ssize_t LSS_NAME(sendmsg)(int s,
-                                         const struct kernel_msghdr *msg,
-                                         int flags) {
-      return (ssize_t)LSS_NAME(socketcall)(16, s, msg, flags);
-    }
-
-    LSS_INLINE ssize_t LSS_NAME(sendto)(int s, const void *buf, size_t len,
-                                        int flags,
-                                        const struct kernel_sockaddr *to,
-                                        unsigned int tolen) {
-      return (ssize_t)LSS_NAME(socketcall)(11, s, buf, len, flags, to, tolen);
-    }
-
-    LSS_INLINE int LSS_NAME(shutdown)(int s, int how) {
-      return LSS_NAME(socketcall)(13, s, how);
-    }
+    /* See sys_socketcall in net/socket.c in kernel source.
+     * It de-multiplexes on its first arg and unpacks the arglist
+     * array in its second arg.
+     */
+    LSS_INLINE _syscall2(long, socketcall, int, c, unsigned long*, a)
 
     LSS_INLINE int LSS_NAME(socket)(int domain, int type, int protocol) {
-      return LSS_NAME(socketcall)(1, domain, type, protocol);
+      unsigned long args[3] = {
+        (unsigned long) domain,
+        (unsigned long) type,
+        (unsigned long) protocol
+      };
+      return LSS_NAME(socketcall)(1, args);
     }
-
-    LSS_INLINE int LSS_NAME(socketpair)(int d, int type, int protocol,
-                                        int sv[2]) {
-      return LSS_NAME(socketcall)(8, d, type, protocol, sv);
-    }
-  #endif
-  #if defined(__i386__) || defined(__PPC__)
-    LSS_INLINE _syscall4(int,   fstatat64,        int,   d,
-                         const char *,      p,
-                         struct kernel_stat64 *,   b,    int,   f)
+  #elif defined(__ARM_EABI__)
+    LSS_INLINE _syscall3(int, socket,             int,   d,
+                         int,                     t, int,       p)
   #endif
   #if defined(__i386__) || defined(__PPC__) ||                                \
      (defined(__mips__) && _MIPS_SIM == _MIPS_SIM_ABI32)
@@ -2886,28 +2137,6 @@ struct kernel_statfs {
   #else
     LSS_INLINE _syscall1(int,     pipe,           int *, p)
   #endif
-  /* TODO(csilvers): see if ppc can/should support this as well              */
-  #if defined(__i386__) || defined(__ARM_ARCH_3__) ||                         \
-     (defined(__mips__) && _MIPS_SIM != _MIPS_SIM_ABI64)
-    #define __NR__statfs64  __NR_statfs64
-    #define __NR__fstatfs64 __NR_fstatfs64
-    LSS_INLINE _syscall3(int, _statfs64,     const char*, p,
-                         size_t, s,struct kernel_statfs64*, b)
-    LSS_INLINE _syscall3(int, _fstatfs64,          int,   f,
-                         size_t, s,struct kernel_statfs64*, b)
-    LSS_INLINE int LSS_NAME(statfs64)(const char *p,
-                                     struct kernel_statfs64 *b) {
-      return LSS_NAME(_statfs64)(p, sizeof(*b), b);
-    }
-    LSS_INLINE int LSS_NAME(fstatfs64)(int f,struct kernel_statfs64 *b) {
-      return LSS_NAME(_fstatfs64)(f, sizeof(*b), b);
-    }
-  #endif
-
-  LSS_INLINE int LSS_NAME(execv)(const char *path, const char *const argv[]) {
-    extern char **environ;
-    return LSS_NAME(execve)(path, argv, (const char *const *)environ);
-  }
 
   LSS_INLINE pid_t LSS_NAME(gettid)() {
     pid_t tid = LSS_NAME(_gettid)();
@@ -2944,72 +2173,6 @@ struct kernel_statfs {
     LSS_ERRNO = err;
     return rc;
   }
-
-  LSS_INLINE int LSS_NAME(raise)(int sig) {
-    return LSS_NAME(kill)(LSS_NAME(getpid)(), sig);
-  }
-
-  LSS_INLINE int LSS_NAME(setpgrp)() {
-    return LSS_NAME(setpgid)(0, 0);
-  }
-
-  LSS_INLINE int LSS_NAME(sysconf)(int name) {
-    extern int __getpagesize(void);
-    switch (name) {
-      case _SC_OPEN_MAX: {
-        struct kernel_rlimit limit;
-        return LSS_NAME(getrlimit)(RLIMIT_NOFILE, &limit) < 0
-               ? 8192 : limit.rlim_cur;
-      }
-      case _SC_PAGESIZE:
-        return __getpagesize();
-      default:
-        errno = ENOSYS;
-        return -1;
-    }
-  }
-  #if defined(__x86_64__) ||                                                  \
-     (defined(__mips__) && _MIPS_SIM == _MIPS_SIM_ABI64)
-    LSS_INLINE _syscall4(ssize_t, pread64,        int,         f,
-                         void *,         b, size_t,   c,
-                         loff_t,         o)
-    LSS_INLINE _syscall4(ssize_t, pwrite64,       int,         f,
-                         const void *,   b, size_t,   c,
-                         loff_t,         o)
-    LSS_INLINE _syscall3(int,     readahead,      int,         f,
-                         loff_t,         o, unsigned, c)
-  #else
-    #define __NR__pread64   __NR_pread64
-    #define __NR__pwrite64  __NR_pwrite64
-    #define __NR__readahead __NR_readahead
-    LSS_INLINE _syscall5(ssize_t, _pread64,        int,         f,
-                         void *,         b, size_t, c, unsigned, o1,
-                         unsigned, o2)
-    LSS_INLINE _syscall5(ssize_t, _pwrite64,       int,         f,
-                         const void *,   b, size_t, c, unsigned, o1,
-                         long, o2)
-    LSS_INLINE _syscall4(int, _readahead,          int,         f,
-                         unsigned,       o1, unsigned, o2, size_t, c);
-    /* We force 64bit-wide parameters onto the stack, then access each
-     * 32-bit component individually. This guarantees that we build the
-     * correct parameters independent of the native byte-order of the
-     * underlying architecture.
-     */
-    LSS_INLINE ssize_t LSS_NAME(pread64)(int fd, void *buf, size_t count,
-                                         loff_t off) {
-      union { loff_t off; unsigned arg[2]; } o = { off };
-      return LSS_NAME(_pread64)(fd, buf, count, o.arg[0], o.arg[1]);
-    }
-    LSS_INLINE ssize_t LSS_NAME(pwrite64)(int fd, const void *buf,
-                                          size_t count, loff_t off) {
-      union { loff_t off; unsigned arg[2]; } o = { off };
-      return LSS_NAME(_pwrite64)(fd, buf, count, o.arg[0], o.arg[1]);
-    }
-    LSS_INLINE int LSS_NAME(readahead)(int fd, loff_t off, int len) {
-      union { loff_t off; unsigned arg[2]; } o = { off };
-      return LSS_NAME(_readahead)(fd, o.arg[0], o.arg[1], len);
-    }
-  #endif
 #endif
 
 #if defined(__cplusplus) && !defined(SYS_CPLUSPLUS)
diff --git a/third_party/tcmalloc/vendor/src/base/logging.h b/third_party/tcmalloc/vendor/src/base/logging.h
index 70491ba..d6a6ab5 100644
--- a/third_party/tcmalloc/vendor/src/base/logging.h
+++ b/third_party/tcmalloc/vendor/src/base/logging.h
@@ -85,7 +85,7 @@ DECLARE_int32(verbose);
     if (!(condition)) {                                                 \
       WRITE_TO_STDERR("Check failed: " #condition "\n",                 \
                       sizeof("Check failed: " #condition "\n")-1);      \
-      exit(1);                                                          \
+      abort();                                                          \
     }                                                                   \
   } while (0)
 
@@ -95,7 +95,7 @@ DECLARE_int32(verbose);
     if (!(condition)) {                                                        \
       WRITE_TO_STDERR("Check failed: " #condition ": " message "\n",           \
                       sizeof("Check failed: " #condition ": " message "\n")-1);\
-      exit(1);                                                                 \
+      abort();                                                                 \
     }                                                                          \
   } while (0)
 
@@ -118,7 +118,7 @@ enum { DEBUG_MODE = 1 };
                       sizeof("Check failed: " #condition ": ")-1);      \
       WRITE_TO_STDERR(strerror(err_no), strlen(strerror(err_no)));      \
       WRITE_TO_STDERR("\n", sizeof("\n")-1);                            \
-      exit(1);                                                          \
+      abort();                                                          \
     }                                                                   \
   } while (0)
 
@@ -135,7 +135,7 @@ enum { DEBUG_MODE = 1 };
   do {                                                                  \
     if (!((val1) op (val2))) {                                          \
       fprintf(stderr, "Check failed: %s %s %s\n", #val1, #op, #val2);   \
-      exit(1);                                                          \
+      abort();                                                          \
     }                                                                   \
   } while (0)
 
diff --git a/third_party/tcmalloc/vendor/src/base/low_level_alloc.cc b/third_party/tcmalloc/vendor/src/base/low_level_alloc.cc
index 70f510d6..c043cb6 100644
--- a/third_party/tcmalloc/vendor/src/base/low_level_alloc.cc
+++ b/third_party/tcmalloc/vendor/src/base/low_level_alloc.cc
@@ -38,7 +38,7 @@
 #include "base/spinlock.h"
 #include "base/logging.h"
 #include "malloc_hook-inl.h"
-#include <google/malloc_hook.h>
+#include <gperftools/malloc_hook.h>
 #include <errno.h>
 #ifdef HAVE_UNISTD_H
 #include <unistd.h>
diff --git a/third_party/tcmalloc/vendor/src/base/spinlock_internal.cc b/third_party/tcmalloc/vendor/src/base/spinlock_internal.cc
index 7bd0e21..b9fadde 100644
--- a/third_party/tcmalloc/vendor/src/base/spinlock_internal.cc
+++ b/third_party/tcmalloc/vendor/src/base/spinlock_internal.cc
@@ -42,9 +42,12 @@
 
 #include "base/spinlock_internal.h"
 
+// forward declaration for use by spinlock_*-inl.h
+namespace base { namespace internal { static int SuggestedDelayNS(int loop); }}
+
 #if defined(_WIN32)
 #include "base/spinlock_win32-inl.h"
-#elif defined(__linux__) && !defined(__native_client__)
+#elif defined(__linux__)
 #include "base/spinlock_linux-inl.h"
 #else
 #include "base/spinlock_posix-inl.h"
@@ -73,5 +76,27 @@ int32 SpinLockWait(volatile Atomic32 *w, int n,
   return v;
 }
 
+// Return a suggested delay in nanoseconds for iteration number "loop"
+static int SuggestedDelayNS(int loop) {
+  // Weak pseudo-random number generator to get some spread between threads
+  // when many are spinning.
+  static base::subtle::Atomic64 rand;
+  uint64 r = base::subtle::NoBarrier_Load(&rand);
+  r = 0x5deece66dLL * r + 0xb;   // numbers from nrand48()
+  base::subtle::NoBarrier_Store(&rand, r);
+
+  r <<= 16;   // 48-bit random number now in top 48-bits.
+  if (loop < 0 || loop > 32) {   // limit loop to 0..32
+    loop = 32;
+  }
+  // loop>>3 cannot exceed 4 because loop cannot exceed 32.
+  // Select top 20..24 bits of lower 48 bits,
+  // giving approximately 0ms to 16ms.
+  // Mean is exponential in loop for first 32 iterations, then 8ms.
+  // The futex path multiplies this by 16, since we expect explicit wakeups
+  // almost always on that path.
+  return r >> (44 - (loop >> 3));
+}
+
 } // namespace internal
 } // namespace base
diff --git a/third_party/tcmalloc/vendor/src/base/spinlock_linux-inl.h b/third_party/tcmalloc/vendor/src/base/spinlock_linux-inl.h
index a178dd5..bee2d4c 100644
--- a/third_party/tcmalloc/vendor/src/base/spinlock_linux-inl.h
+++ b/third_party/tcmalloc/vendor/src/base/spinlock_linux-inl.h
@@ -31,6 +31,7 @@
  * This file is a Linux-specific part of spinlock_internal.cc
  */
 
+#include <errno.h>
 #include <sched.h>
 #include <time.h>
 #include <limits.h>
@@ -75,12 +76,12 @@ void SpinLockDelay(volatile Atomic32 *w, int32 value, int loop) {
     struct timespec tm;
     tm.tv_sec = 0;
     if (have_futex) {
-      tm.tv_nsec = 1000000;   // 1ms; really we're trying to sleep for one
-                              // kernel clock tick
+      tm.tv_nsec = base::internal::SuggestedDelayNS(loop);
     } else {
       tm.tv_nsec = 2000001;   // above 2ms so linux 2.4 doesn't spin
     }
     if (have_futex) {
+      tm.tv_nsec *= 16;  // increase the delay; we expect explicit wakeups
       sys_futex(reinterpret_cast<int *>(const_cast<Atomic32 *>(w)),
                 FUTEX_WAIT | futex_private_flag,
                 value, reinterpret_cast<struct kernel_timespec *>(&tm));
diff --git a/third_party/tcmalloc/vendor/src/base/spinlock_posix-inl.h b/third_party/tcmalloc/vendor/src/base/spinlock_posix-inl.h
index d188ebd..e1d43b7 100644
--- a/third_party/tcmalloc/vendor/src/base/spinlock_posix-inl.h
+++ b/third_party/tcmalloc/vendor/src/base/spinlock_posix-inl.h
@@ -49,7 +49,7 @@ void SpinLockDelay(volatile Atomic32 *w, int32 value, int loop) {
   } else {
     struct timespec tm;
     tm.tv_sec = 0;
-    tm.tv_nsec = 1000000;
+    tm.tv_nsec = base::internal::SuggestedDelayNS(loop);
     nanosleep(&tm, NULL);
   }
   errno = save_errno;
diff --git a/third_party/tcmalloc/vendor/src/base/spinlock_win32-inl.h b/third_party/tcmalloc/vendor/src/base/spinlock_win32-inl.h
index ee23541..64decd2 100644
--- a/third_party/tcmalloc/vendor/src/base/spinlock_win32-inl.h
+++ b/third_party/tcmalloc/vendor/src/base/spinlock_win32-inl.h
@@ -42,7 +42,7 @@ void SpinLockDelay(volatile Atomic32 *w, int32 value, int loop) {
   } else if (loop == 1) {
     Sleep(0);
   } else {
-    Sleep(1);
+    Sleep(base::internal::SuggestedDelayNS(loop) / 1000000);
   }
 }
 
diff --git a/third_party/tcmalloc/vendor/src/base/stl_allocator.h b/third_party/tcmalloc/vendor/src/base/stl_allocator.h
index 3152cf9..8276a83 100644
--- a/third_party/tcmalloc/vendor/src/base/stl_allocator.h
+++ b/third_party/tcmalloc/vendor/src/base/stl_allocator.h
@@ -87,6 +87,7 @@ class STL_Allocator {
   size_type max_size() const { return size_t(-1) / sizeof(T); }
 
   void construct(pointer p, const T& val) { ::new(p) T(val); }
+  void construct(pointer p) { ::new(p) T(); }
   void destroy(pointer p) { p->~T(); }
 
   // There's no state, so these allocators are always equal
diff --git a/third_party/tcmalloc/vendor/src/base/sysinfo.cc b/third_party/tcmalloc/vendor/src/base/sysinfo.cc
index 5396743..2559158 100644
--- a/third_party/tcmalloc/vendor/src/base/sysinfo.cc
+++ b/third_party/tcmalloc/vendor/src/base/sysinfo.cc
@@ -86,12 +86,20 @@
 // time, so prefer making the syscalls directly if we can.
 #ifdef HAVE_SYS_SYSCALL_H
 # include <sys/syscall.h>
+#endif
+#ifdef SYS_open   // solaris 11, at least sometimes, only defines SYS_openat
 # define safeopen(filename, mode)  syscall(SYS_open, filename, mode)
-# define saferead(fd, buffer, size)  syscall(SYS_read, fd, buffer, size)
-# define safeclose(fd)  syscall(SYS_close, fd)
 #else
 # define safeopen(filename, mode)  open(filename, mode)
+#endif
+#ifdef SYS_read
+# define saferead(fd, buffer, size)  syscall(SYS_read, fd, buffer, size)
+#else
 # define saferead(fd, buffer, size)  read(fd, buffer, size)
+#endif
+#ifdef SYS_close
+# define safeclose(fd)  syscall(SYS_close, fd)
+#else
 # define safeclose(fd)  close(fd)
 #endif
 
diff --git a/third_party/tcmalloc/vendor/src/central_freelist.cc b/third_party/tcmalloc/vendor/src/central_freelist.cc
index 9b20cf8..87a1825 100644
--- a/third_party/tcmalloc/vendor/src/central_freelist.cc
+++ b/third_party/tcmalloc/vendor/src/central_freelist.cc
@@ -319,7 +319,8 @@ void CentralFreeList::Populate() {
     if (span) Static::pageheap()->RegisterSizeClass(span, size_class_);
   }
   if (span == NULL) {
-    MESSAGE("tcmalloc: allocation failed", npages << kPageShift);
+    Log(kLog, __FILE__, __LINE__,
+        "tcmalloc: allocation failed", npages << kPageShift);
     lock_.Lock();
     return;
   }
diff --git a/third_party/tcmalloc/vendor/src/central_freelist.h b/third_party/tcmalloc/vendor/src/central_freelist.h
index 27e9d35..4fd5799 100644
--- a/third_party/tcmalloc/vendor/src/central_freelist.h
+++ b/third_party/tcmalloc/vendor/src/central_freelist.h
@@ -48,6 +48,11 @@ namespace tcmalloc {
 // Data kept per size-class in central cache.
 class CentralFreeList {
  public:
+  // A CentralFreeList may be used before its constructor runs.
+  // So we prevent lock_'s constructor from doing anything to the
+  // lock_ state.
+  CentralFreeList() : lock_(base::LINKER_INITIALIZED) { }
+
   void Init(size_t cl);
 
   // These methods all do internal locking.
diff --git a/third_party/tcmalloc/vendor/src/common.cc b/third_party/tcmalloc/vendor/src/common.cc
index 90e6626..dad7372 100644
--- a/third_party/tcmalloc/vendor/src/common.cc
+++ b/third_party/tcmalloc/vendor/src/common.cc
@@ -99,10 +99,12 @@ int SizeMap::NumMoveSize(size_t size) {
 void SizeMap::Init() {
   // Do some sanity checking on add_amount[]/shift_amount[]/class_array[]
   if (ClassIndex(0) < 0) {
-    CRASH("Invalid class index %d for size 0\n", ClassIndex(0));
+    Log(kCrash, __FILE__, __LINE__,
+        "Invalid class index for size 0", ClassIndex(0));
   }
   if (ClassIndex(kMaxSize) >= sizeof(class_array_)) {
-    CRASH("Invalid class index %d for kMaxSize\n", ClassIndex(kMaxSize));
+    Log(kCrash, __FILE__, __LINE__,
+        "Invalid class index for kMaxSize", ClassIndex(kMaxSize));
   }
 
   // Compute the size classes we want to use
@@ -147,8 +149,8 @@ void SizeMap::Init() {
     sc++;
   }
   if (sc != kNumClasses) {
-    CRASH("wrong number of size classes: found %d instead of %d\n",
-          sc, int(kNumClasses));
+    Log(kCrash, __FILE__, __LINE__,
+        "wrong number of size classes: (found vs. expected )", sc, kNumClasses);
   }
 
   // Initialize the mapping arrays
@@ -165,18 +167,17 @@ void SizeMap::Init() {
   for (size_t size = 0; size <= kMaxSize; size++) {
     const int sc = SizeClass(size);
     if (sc <= 0 || sc >= kNumClasses) {
-      CRASH("Bad size class %d for %" PRIuS "\n", sc, size);
+      Log(kCrash, __FILE__, __LINE__,
+          "Bad size class (class, size)", sc, size);
     }
     if (sc > 1 && size <= class_to_size_[sc-1]) {
-      CRASH("Allocating unnecessarily large class %d for %" PRIuS
-            "\n", sc, size);
+      Log(kCrash, __FILE__, __LINE__,
+          "Allocating unnecessarily large class (class, size)", sc, size);
     }
     const size_t s = class_to_size_[sc];
-    if (size > s) {
-      CRASH("Bad size %" PRIuS " for %" PRIuS " (sc = %d)\n", s, size, sc);
-    }
-    if (s == 0) {
-      CRASH("Bad size %" PRIuS " for %" PRIuS " (sc = %d)\n", s, size, sc);
+    if (size > s || s == 0) {
+      Log(kCrash, __FILE__, __LINE__,
+          "Bad (class, size, requested)", sc, s, size);
     }
   }
 
@@ -186,23 +187,6 @@ void SizeMap::Init() {
   }
 }
 
-void SizeMap::Dump(TCMalloc_Printer* out) {
-  // Dump class sizes and maximum external wastage per size class
-  for (size_t cl = 1; cl  < kNumClasses; ++cl) {
-    const int alloc_size = class_to_pages_[cl] << kPageShift;
-    const int alloc_objs = alloc_size / class_to_size_[cl];
-    const int min_used = (class_to_size_[cl-1] + 1) * alloc_objs;
-    const int max_waste = alloc_size - min_used;
-    out->printf("SC %3d [ %8d .. %8d ] from %8d ; %2.0f%% maxwaste\n",
-                int(cl),
-                int(class_to_size_[cl-1] + 1),
-                int(class_to_size_[cl]),
-                int(class_to_pages_[cl] << kPageShift),
-                max_waste * 100.0 / alloc_size
-                );
-  }
-}
-
 // Metadata allocator -- keeps stats about how many bytes allocated.
 static uint64_t metadata_system_bytes_ = 0;
 void* MetaDataAlloc(size_t bytes) {
diff --git a/third_party/tcmalloc/vendor/src/common.h b/third_party/tcmalloc/vendor/src/common.h
index 72725ca..49458f2 100644
--- a/third_party/tcmalloc/vendor/src/common.h
+++ b/third_party/tcmalloc/vendor/src/common.h
@@ -221,9 +221,6 @@ class SizeMap {
   inline int num_objects_to_move(size_t cl) {
     return num_objects_to_move_[cl];
   }
-
-  // Dump contents of the computed size map
-  void Dump(TCMalloc_Printer* out);
 };
 
 // Allocates "bytes" worth of memory and returns it.  Increments
diff --git a/third_party/tcmalloc/vendor/src/config.h.in b/third_party/tcmalloc/vendor/src/config.h.in
index 64766ff..4eed17a 100644
--- a/third_party/tcmalloc/vendor/src/config.h.in
+++ b/third_party/tcmalloc/vendor/src/config.h.in
@@ -1,8 +1,8 @@
 /* src/config.h.in.  Generated from configure.ac by autoheader.  */
 
 
-#ifndef GOOGLE_PERFTOOLS_CONFIG_H_
-#define GOOGLE_PERFTOOLS_CONFIG_H_
+#ifndef GPERFTOOLS_CONFIG_H_
+#define GPERFTOOLS_CONFIG_H_
 
 
 /* Define to 1 if compiler supports __builtin_stack_pointer */
@@ -270,5 +270,5 @@
 #include "windows/mingw.h"
 #endif
 
-#endif  /* #ifndef GOOGLE_PERFTOOLS_CONFIG_H_ */
+#endif  /* #ifndef GPERFTOOLS_CONFIG_H_ */
 
diff --git a/third_party/tcmalloc/vendor/src/debugallocation.cc b/third_party/tcmalloc/vendor/src/debugallocation.cc
index 7cac751..70ec162 100644
--- a/third_party/tcmalloc/vendor/src/debugallocation.cc
+++ b/third_party/tcmalloc/vendor/src/debugallocation.cc
@@ -31,6 +31,13 @@
 // Author: Urs Holzle <opensource@google.com>
 
 #include "config.h"
+#include <errno.h>
+#ifdef HAVE_FCNTL_H
+#include <fcntl.h>
+#endif
+#ifdef HAVE_INTTYPES_H
+#include <inttypes.h>
+#endif
 // We only need malloc.h for struct mallinfo.
 #ifdef HAVE_STRUCT_MALLINFO
 // Malloc can be in several places on older versions of OS X.
@@ -42,34 +49,29 @@
 # include <sys/malloc.h>
 # endif
 #endif
+#ifdef HAVE_PTHREAD
 #include <pthread.h>
-#include <stdio.h>
-#ifdef HAVE_INTTYPES_H
-#include <inttypes.h>
 #endif
 #include <stdarg.h>
+#include <stdio.h>
+#include <string.h>
 #ifdef HAVE_MMAP
 #include <sys/mman.h>
 #endif
-#include <sys/types.h>
 #include <sys/stat.h>
-#ifdef HAVE_FCNTL_H
-#include <fcntl.h>
-#endif
+#include <sys/types.h>
 #ifdef HAVE_UNISTD_H
 #include <unistd.h>
 #endif
-#include <errno.h>
-#include <string.h>
 
-#include <google/malloc_extension.h>
-#include <google/malloc_hook.h>
-#include <google/stacktrace.h>
+#include <gperftools/malloc_extension.h>
+#include <gperftools/malloc_hook.h>
+#include <gperftools/stacktrace.h>
+#include "addressmap-inl.h"
 #include "base/commandlineflags.h"
 #include "base/googleinit.h"
 #include "base/logging.h"
 #include "base/spinlock.h"
-#include "addressmap-inl.h"
 #include "malloc_hook-inl.h"
 #include "symbolize.h"
 
@@ -124,6 +126,13 @@ DEFINE_bool(symbolize_stacktrace,
             EnvToBool("TCMALLOC_SYMBOLIZE_STACKTRACE", true),
             "Symbolize the stack trace when provided (on some error exits)");
 
+// If we are LD_PRELOAD-ed against a non-pthreads app, then
+// pthread_once won't be defined.  We declare it here, for that
+// case (with weak linkage) which will cause the non-definition to
+// resolve to NULL.  We can then check for NULL or not in Instance.
+extern "C" int pthread_once(pthread_once_t *, void (*)(void))
+    ATTRIBUTE_WEAK;
+
 // ========================================================================= //
 
 // A safe version of printf() that does not do any allocation and
@@ -169,6 +178,7 @@ class FreeQueue {
   }
 
   QueueEntry Pop() {
+    RAW_CHECK(q_back_ != q_front_, "Queue is empty");
     const QueueEntry& ret = q_[q_back_];
     q_back_ = (q_back_ + 1) % kFreeQueueSize;
     return ret;
@@ -191,7 +201,7 @@ struct MallocBlockQueueEntry {
   MallocBlockQueueEntry() : block(NULL), size(0),
                             num_deleter_pcs(0), deleter_threadid(0) {}
   MallocBlockQueueEntry(MallocBlock* b, size_t s) : block(b), size(s) {
-    if (FLAGS_max_free_queue_size != 0) {
+    if (FLAGS_max_free_queue_size != 0 && b != NULL) {
       // Adjust the number of frames to skip (4) if you change the
       // location of this call.
       num_deleter_pcs =
@@ -266,7 +276,8 @@ class MallocBlock {
 
   // This array will be filled with 0xCD, for use with memcmp.
   static unsigned char kMagicDeletedBuffer[1024];
-  static bool deleted_buffer_initialized_;
+  static pthread_once_t deleted_buffer_initialized_;
+  static bool deleted_buffer_initialized_no_pthreads_;
 
  private:  // data layout
 
@@ -561,14 +572,18 @@ class MallocBlock {
 
   static void ProcessFreeQueue(MallocBlock* b, size_t size,
                                int max_free_queue_size) {
-    SpinLockHolder l(&free_queue_lock_);
+    // MallocBlockQueueEntry are about 144 in size, so we can only
+    // use a small array of them on the stack.
+    MallocBlockQueueEntry entries[4];
+    int num_entries = 0;
+    MallocBlockQueueEntry new_entry(b, size);
+    free_queue_lock_.Lock();
     if (free_queue_ == NULL)
       free_queue_ = new FreeQueue<MallocBlockQueueEntry>;
     RAW_CHECK(!free_queue_->Full(), "Free queue mustn't be full!");
 
     if (b != NULL) {
       free_queue_size_ += size + sizeof(MallocBlockQueueEntry);
-      MallocBlockQueueEntry new_entry(b, size);
       free_queue_->Push(new_entry);
     }
 
@@ -576,20 +591,46 @@ class MallocBlock {
     // max_free_queue_size, and the free queue has at least one free
     // space in it.
     while (free_queue_size_ > max_free_queue_size || free_queue_->Full()) {
-      MallocBlockQueueEntry cur = free_queue_->Pop();
-      CheckForDanglingWrites(cur);
-      free_queue_size_ -= cur.size + sizeof(MallocBlockQueueEntry);
-      BASE_FREE(cur.block);
+      RAW_CHECK(num_entries < arraysize(entries), "entries array overflow");
+      entries[num_entries] = free_queue_->Pop();
+      free_queue_size_ -=
+          entries[num_entries].size + sizeof(MallocBlockQueueEntry);
+      num_entries++;
+      if (num_entries == arraysize(entries)) {
+        // The queue will not be full at this point, so it is ok to
+        // release the lock.  The queue may still contain more than
+        // max_free_queue_size, but this is not a strict invariant.
+        free_queue_lock_.Unlock();
+        for (int i = 0; i < num_entries; i++) {
+          CheckForDanglingWrites(entries[i]);
+          BASE_FREE(entries[i].block);
+        }
+        num_entries = 0;
+        free_queue_lock_.Lock();
+      }
     }
     RAW_CHECK(free_queue_size_ >= 0, "Free queue size went negative!");
+    free_queue_lock_.Unlock();
+    for (int i = 0; i < num_entries; i++) {
+      CheckForDanglingWrites(entries[i]);
+      BASE_FREE(entries[i].block);
+    }
+  }
+
+  static void InitDeletedBuffer() {
+    memset(kMagicDeletedBuffer, kMagicDeletedByte, sizeof(kMagicDeletedBuffer));
+    deleted_buffer_initialized_no_pthreads_ = true;
   }
 
   static void CheckForDanglingWrites(const MallocBlockQueueEntry& queue_entry) {
     // Initialize the buffer if necessary.
-    if (!deleted_buffer_initialized_) {
-      // This is threadsafe.  We hold free_queue_lock_.
-      memset(kMagicDeletedBuffer, 0xcd, sizeof(kMagicDeletedBuffer));
-      deleted_buffer_initialized_ = true;
+    if (pthread_once)
+      pthread_once(&deleted_buffer_initialized_, &InitDeletedBuffer);
+    if (!deleted_buffer_initialized_no_pthreads_) {
+      // This will be the case on systems that don't link in pthreads,
+      // including on FreeBSD where pthread_once has a non-zero address
+      // (but doesn't do anything) even when pthreads isn't linked in.
+      InitDeletedBuffer();
     }
 
     const unsigned char* p =
@@ -625,9 +666,9 @@ class MallocBlock {
     // lines we'll output:
     if (size_of_buffer <= 1024) {
       for (int i = 0; i < size_of_buffer; ++i) {
-        if (buffer[i] != 0xcd) {
-          RAW_LOG(ERROR, "Buffer byte %d is 0x%02x (should be 0xcd).",
-                  i, buffer[i]);
+        if (buffer[i] != kMagicDeletedByte) {
+          RAW_LOG(ERROR, "Buffer byte %d is 0x%02x (should be 0x%02x).",
+                  i, buffer[i], kMagicDeletedByte);
         }
       }
     } else {
@@ -671,8 +712,10 @@ class MallocBlock {
     RAW_LOG(FATAL,
             "Memory was written to after being freed.  MallocBlock: %p, user "
             "ptr: %p, size: %zd.  If you can't find the source of the error, "
-            "try using valgrind or purify, or study the output of the "
-            "deleter's stack printed above.", b, b->data_addr(), size);
+            "try using ASan (http://code.google.com/p/address-sanitizer/), "
+            "Valgrind, or Purify, or study the "
+            "output of the deleter's stack printed above.",
+            b, b->data_addr(), size);
   }
 
   static MallocBlock* FromRawPointer(void* p) {
@@ -699,7 +742,7 @@ class MallocBlock {
     return FromRawPointer(const_cast<void*>(p));
   }
 
-  void Check(int type) {
+  void Check(int type) const {
     alloc_map_lock_.Lock();
     CheckLocked(type);
     alloc_map_lock_.Unlock();
@@ -777,7 +820,8 @@ size_t MallocBlock::free_queue_size_ = 0;
 SpinLock MallocBlock::free_queue_lock_(SpinLock::LINKER_INITIALIZED);
 
 unsigned char MallocBlock::kMagicDeletedBuffer[1024];
-bool MallocBlock::deleted_buffer_initialized_ = false;
+pthread_once_t MallocBlock::deleted_buffer_initialized_ = PTHREAD_ONCE_INIT;
+bool MallocBlock::deleted_buffer_initialized_no_pthreads_ = false;
 
 const char* const MallocBlock::kAllocName[] = {
   "malloc",
@@ -971,17 +1015,17 @@ class DebugMallocImplementation : public TCMallocImplementation {
     return result;
   }
 
-  virtual bool VerifyNewMemory(void* p) {
+  virtual bool VerifyNewMemory(const void* p) {
     if (p)  MallocBlock::FromRawPointer(p)->Check(MallocBlock::kNewType);
     return true;
   }
 
-  virtual bool VerifyArrayNewMemory(void* p) {
+  virtual bool VerifyArrayNewMemory(const void* p) {
     if (p)  MallocBlock::FromRawPointer(p)->Check(MallocBlock::kArrayNewType);
     return true;
   }
 
-  virtual bool VerifyMallocMemory(void* p) {
+  virtual bool VerifyMallocMemory(const void* p) {
     if (p)  MallocBlock::FromRawPointer(p)->Check(MallocBlock::kMallocType);
     return true;
   }
@@ -999,7 +1043,7 @@ class DebugMallocImplementation : public TCMallocImplementation {
     return size;
   }
 
-  virtual size_t GetAllocatedSize(void* p) {
+  virtual size_t GetAllocatedSize(const void* p) {
     if (p) {
       RAW_CHECK(GetOwnership(p) != MallocExtension::kNotOwned,
                 "ptr not allocated by tcmalloc");
@@ -1035,14 +1079,18 @@ static DebugMallocImplementation debug_malloc_implementation;
 
 REGISTER_MODULE_INITIALIZER(debugallocation, {
   // Either we or valgrind will control memory management.  We
-  // register our extension if we're the winner.
-  if (RunningOnValgrind()) {
-    // Let Valgrind uses its own malloc (so don't register our extension).
-  } else {
+  // register our extension if we're the winner. Otherwise let
+  // Valgrind use its own malloc (so don't register our extension).
+  if (!RunningOnValgrind()) {
     MallocExtension::Register(&debug_malloc_implementation);
+  }
+});
+
+REGISTER_MODULE_DESTRUCTOR(debugallocation, {
+  if (!RunningOnValgrind()) {
     // When the program exits, check all blocks still in the free
     // queue for corruption.
-    atexit(DanglingWriteChecker);
+    DanglingWriteChecker();
   }
 });
 
diff --git a/third_party/tcmalloc/vendor/src/google/heap-checker.h b/third_party/tcmalloc/vendor/src/google/heap-checker.h
index a431fe9..8aa5ea4 100644
--- a/third_party/tcmalloc/vendor/src/google/heap-checker.h
+++ b/third_party/tcmalloc/vendor/src/google/heap-checker.h
@@ -27,391 +27,7 @@
 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-// ---
-// Author: Maxim Lifantsev (with design ideas by Sanjay Ghemawat)
-//
-//
-// Module for detecing heap (memory) leaks.
-//
-// For full(er) information, see doc/heap_checker.html
-//
-// This module can be linked into programs with
-// no slowdown caused by this unless you activate the leak-checker:
-//
-//    1. Set the environment variable HEAPCHEK to _type_ before
-//       running the program.
-//
-// _type_ is usually "normal" but can also be "minimal", "strict", or
-// "draconian".  (See the html file for other options, like 'local'.)
-//
-// After that, just run your binary.  If the heap-checker detects
-// a memory leak at program-exit, it will print instructions on how
-// to track down the leak.
-
-#ifndef BASE_HEAP_CHECKER_H_
-#define BASE_HEAP_CHECKER_H_
-
-#include <sys/types.h>  // for size_t
-// I can't #include config.h in this public API file, but I should
-// really use configure (and make malloc_extension.h a .in file) to
-// figure out if the system has stdint.h or not.  But I'm lazy, so
-// for now I'm assuming it's a problem only with MSVC.
-#ifndef _MSC_VER
-#include <stdint.h>     // for uintptr_t
-#endif
-#include <stdarg.h>     // for va_list
-#include <vector>
-
-// Annoying stuff for windows -- makes sure clients can import these functions
-#ifndef PERFTOOLS_DLL_DECL
-# ifdef _WIN32
-#   define PERFTOOLS_DLL_DECL  __declspec(dllimport)
-# else
-#   define PERFTOOLS_DLL_DECL
-# endif
-#endif
-
-
-// The class is thread-safe with respect to all the provided static methods,
-// as well as HeapLeakChecker objects: they can be accessed by multiple threads.
-class PERFTOOLS_DLL_DECL HeapLeakChecker {
- public:
-
-  // ----------------------------------------------------------------------- //
-  // Static functions for working with (whole-program) leak checking.
-
-  // If heap leak checking is currently active in some mode
-  // e.g. if leak checking was started (and is still active now)
-  // due to HEAPCHECK=... defined in the environment.
-  // The return value reflects iff HeapLeakChecker objects manually
-  // constructed right now will be doing leak checking or nothing.
-  // Note that we can go from active to inactive state during InitGoogle()
-  // if FLAGS_heap_check gets set to "" by some code before/during InitGoogle().
-  static bool IsActive();
-
-  // Return pointer to the whole-program checker if it has been created
-  // and NULL otherwise.
-  // Once GlobalChecker() returns non-NULL that object will not disappear and
-  // will be returned by all later GlobalChecker calls.
-  // This is mainly to access BytesLeaked() and ObjectsLeaked() (see below)
-  // for the whole-program checker after one calls NoGlobalLeaks()
-  // or similar and gets false.
-  static HeapLeakChecker* GlobalChecker();
-
-  // Do whole-program leak check now (if it was activated for this binary);
-  // return false only if it was activated and has failed.
-  // The mode of the check is controlled by the command-line flags.
-  // This method can be called repeatedly.
-  // Things like GlobalChecker()->SameHeap() can also be called explicitly
-  // to do the desired flavor of the check.
-  static bool NoGlobalLeaks();
-
-  // If whole-program checker if active,
-  // cancel its automatic execution after main() exits.
-  // This requires that some leak check (e.g. NoGlobalLeaks())
-  // has been called at least once on the whole-program checker.
-  static void CancelGlobalCheck();
-
-  // ----------------------------------------------------------------------- //
-  // Non-static functions for starting and doing leak checking.
-
-  // Start checking and name the leak check performed.
-  // The name is used in naming dumped profiles
-  // and needs to be unique only within your binary.
-  // It must also be a string that can be a part of a file name,
-  // in particular not contain path expressions.
-  explicit HeapLeakChecker(const char *name);
-
-  // Destructor (verifies that some *NoLeaks or *SameHeap method
-  // has been called at least once).
-  ~HeapLeakChecker();
-
-  // These used to be different but are all the same now: they return
-  // true iff all memory allocated since this HeapLeakChecker object
-  // was constructor is still reachable from global state.
-  //
-  // Because we fork to convert addresses to symbol-names, and forking
-  // is not thread-safe, and we may be called in a threaded context,
-  // we do not try to symbolize addresses when called manually.
-  bool NoLeaks() { return DoNoLeaks(DO_NOT_SYMBOLIZE); }
-
-  // These forms are obsolete; use NoLeaks() instead.
-  // TODO(csilvers): mark as DEPRECATED.
-  bool QuickNoLeaks()  { return NoLeaks(); }
-  bool BriefNoLeaks()  { return NoLeaks(); }
-  bool SameHeap()      { return NoLeaks(); }
-  bool QuickSameHeap() { return NoLeaks(); }
-  bool BriefSameHeap() { return NoLeaks(); }
-
-  // Detailed information about the number of leaked bytes and objects
-  // (both of these can be negative as well).
-  // These are available only after a *SameHeap or *NoLeaks
-  // method has been called.
-  // Note that it's possible for both of these to be zero
-  // while SameHeap() or NoLeaks() returned false in case
-  // of a heap state change that is significant
-  // but preserves the byte and object counts.
-  ssize_t BytesLeaked() const;
-  ssize_t ObjectsLeaked() const;
-
-  // ----------------------------------------------------------------------- //
-  // Static helpers to make us ignore certain leaks.
-
-  // Scoped helper class.  Should be allocated on the stack inside a
-  // block of code.  Any heap allocations done in the code block
-  // covered by the scoped object (including in nested function calls
-  // done by the code block) will not be reported as leaks.  This is
-  // the recommended replacement for the GetDisableChecksStart() and
-  // DisableChecksToHereFrom() routines below.
-  //
-  // Example:
-  //   void Foo() {
-  //     HeapLeakChecker::Disabler disabler;
-  //     ... code that allocates objects whose leaks should be ignored ...
-  //   }
-  //
-  // REQUIRES: Destructor runs in same thread as constructor
-  class Disabler {
-   public:
-    Disabler();
-    ~Disabler();
-   private:
-    Disabler(const Disabler&);        // disallow copy
-    void operator=(const Disabler&);  // and assign
-  };
-
-  // Ignore an object located at 'ptr' (can go at the start or into the object)
-  // as well as all heap objects (transitively) referenced from it
-  // for the purposes of heap leak checking.
-  // If 'ptr' does not point to an active allocated object
-  // at the time of this call, it is ignored;
-  // but if it does, the object must not get deleted from the heap later on.
-  //
-  // See also HiddenPointer, below, if you need to prevent a pointer from
-  // being traversed by the heap checker but do not wish to transitively
-  // whitelist objects referenced through it.
-  static void IgnoreObject(const void* ptr);
-
-  // Undo what an earlier IgnoreObject() call promised and asked to do.
-  // At the time of this call 'ptr' must point at or inside of an active
-  // allocated object which was previously registered with IgnoreObject().
-  static void UnIgnoreObject(const void* ptr);
-
-  // ----------------------------------------------------------------------- //
-  // Initialization; to be called from main() only.
-
-  // Full starting of recommended whole-program checking.
-  static void InternalInitStart();
-
-  // ----------------------------------------------------------------------- //
-  // Internal types defined in .cc
-
-  class Allocator;
-  struct RangeValue;
-
- private:
-
-  // ----------------------------------------------------------------------- //
-  // Various helpers
-
-  // Create the name of the heap profile file.
-  // Should be deleted via Allocator::Free().
-  char* MakeProfileNameLocked();
-
-  // Helper for constructors
-  void Create(const char *name, bool make_start_snapshot);
-
-  enum ShouldSymbolize { SYMBOLIZE, DO_NOT_SYMBOLIZE };
-
-  // Helper for *NoLeaks and *SameHeap
-  bool DoNoLeaks(ShouldSymbolize should_symbolize);
-
-  // These used to be public, but they are now deprecated.
-  // Will remove entirely when all internal uses are fixed.
-  // In the meantime, use friendship so the unittest can still test them.
-  static void* GetDisableChecksStart();
-  static void DisableChecksToHereFrom(const void* start_address);
-  static void DisableChecksIn(const char* pattern);
-  friend void RangeDisabledLeaks();
-  friend void NamedTwoDisabledLeaks();
-  friend void* RunNamedDisabledLeaks(void*);
-  friend void TestHeapLeakCheckerNamedDisabling();
-  friend int main(int, char**);
-
-
-  // Helper for DisableChecksIn
-  static void DisableChecksInLocked(const char* pattern);
-
-  // Disable checks based on stack trace entry at a depth <=
-  // max_depth.  Used to hide allocations done inside some special
-  // libraries.
-  static void DisableChecksFromToLocked(const void* start_address,
-                                        const void* end_address,
-                                        int max_depth);
-
-  // Helper for DoNoLeaks to ignore all objects reachable from all live data
-  static void IgnoreAllLiveObjectsLocked(const void* self_stack_top);
-
-  // Callback we pass to ListAllProcessThreads (see thread_lister.h)
-  // that is invoked when all threads of our process are found and stopped.
-  // The call back does the things needed to ignore live data reachable from
-  // thread stacks and registers for all our threads
-  // as well as do other global-live-data ignoring
-  // (via IgnoreNonThreadLiveObjectsLocked)
-  // during the quiet state of all threads being stopped.
-  // For the argument meaning see the comment by ListAllProcessThreads.
-  // Here we only use num_threads and thread_pids, that ListAllProcessThreads
-  // fills for us with the number and pids of all the threads of our process
-  // it found and attached to.
-  static int IgnoreLiveThreadsLocked(void* parameter,
-                                     int num_threads,
-                                     pid_t* thread_pids,
-                                     va_list ap);
-
-  // Helper for IgnoreAllLiveObjectsLocked and IgnoreLiveThreadsLocked
-  // that we prefer to execute from IgnoreLiveThreadsLocked
-  // while all threads are stopped.
-  // This helper does live object discovery and ignoring
-  // for all objects that are reachable from everything
-  // not related to thread stacks and registers.
-  static void IgnoreNonThreadLiveObjectsLocked();
-
-  // Helper for IgnoreNonThreadLiveObjectsLocked and IgnoreLiveThreadsLocked
-  // to discover and ignore all heap objects
-  // reachable from currently considered live objects
-  // (live_objects static global variable in out .cc file).
-  // "name", "name2" are two strings that we print one after another
-  // in a debug message to describe what kind of live object sources
-  // are being used.
-  static void IgnoreLiveObjectsLocked(const char* name, const char* name2);
-
-  // Runs REGISTER_HEAPCHECK_CLEANUP cleanups and potentially
-  // calls DoMainHeapCheck
-  static void RunHeapCleanups();
-
-  // Do the overall whole-program heap leak check if needed;
-  // returns true when did the leak check.
-  static bool DoMainHeapCheck();
-
-  // Type of task for UseProcMapsLocked
-  enum ProcMapsTask {
-    RECORD_GLOBAL_DATA,
-    DISABLE_LIBRARY_ALLOCS
-  };
-
-  // Success/Error Return codes for UseProcMapsLocked.
-  enum ProcMapsResult {
-    PROC_MAPS_USED,
-    CANT_OPEN_PROC_MAPS,
-    NO_SHARED_LIBS_IN_PROC_MAPS
-  };
-
-  // Read /proc/self/maps, parse it, and do the 'proc_maps_task' for each line.
-  static ProcMapsResult UseProcMapsLocked(ProcMapsTask proc_maps_task);
-
-  // A ProcMapsTask to disable allocations from 'library'
-  // that is mapped to [start_address..end_address)
-  // (only if library is a certain system library).
-  static void DisableLibraryAllocsLocked(const char* library,
-                                         uintptr_t start_address,
-                                         uintptr_t end_address);
-
-  // Return true iff "*ptr" points to a heap object
-  // ("*ptr" can point at the start or inside of a heap object
-  //  so that this works e.g. for pointers to C++ arrays, C++ strings,
-  //  multiple-inherited objects, or pointers to members).
-  // We also fill *object_size for this object then
-  // and we move "*ptr" to point to the very start of the heap object.
-  static inline bool HaveOnHeapLocked(const void** ptr, size_t* object_size);
-
-  // Helper to shutdown heap leak checker when it's not needed
-  // or can't function properly.
-  static void TurnItselfOffLocked();
-
-  // Internally-used c-tor to start whole-executable checking.
-  HeapLeakChecker();
-
-  // ----------------------------------------------------------------------- //
-  // Friends and externally accessed helpers.
-
-  // Helper for VerifyHeapProfileTableStackGet in the unittest
-  // to get the recorded allocation caller for ptr,
-  // which must be a heap object.
-  static const void* GetAllocCaller(void* ptr);
-  friend void VerifyHeapProfileTableStackGet();
-
-  // This gets to execute before constructors for all global objects
-  static void BeforeConstructorsLocked();
-  friend void HeapLeakChecker_BeforeConstructors();
-
-  // This gets to execute after destructors for all global objects
-  friend void HeapLeakChecker_AfterDestructors();
-
-  // ----------------------------------------------------------------------- //
-  // Member data.
-
-  class SpinLock* lock_;  // to make HeapLeakChecker objects thread-safe
-  const char* name_;  // our remembered name (we own it)
-                      // NULL means this leak checker is a noop
-
-  // Snapshot taken when the checker was created.  May be NULL
-  // for the global heap checker object.  We use void* instead of
-  // HeapProfileTable::Snapshot* to avoid including heap-profile-table.h.
-  void* start_snapshot_;
-
-  bool has_checked_;  // if we have done the leak check, so these are ready:
-  ssize_t inuse_bytes_increase_;  // bytes-in-use increase for this checker
-  ssize_t inuse_allocs_increase_;  // allocations-in-use increase
-                                   // for this checker
-  bool keep_profiles_;  // iff we should keep the heap profiles we've made
-
-  // ----------------------------------------------------------------------- //
-
-  // Disallow "evil" constructors.
-  HeapLeakChecker(const HeapLeakChecker&);
-  void operator=(const HeapLeakChecker&);
-};
-
-
-// Holds a pointer that will not be traversed by the heap checker.
-// Contrast with HeapLeakChecker::IgnoreObject(o), in which o and
-// all objects reachable from o are ignored by the heap checker.
-template <class T>
-class HiddenPointer {
- public:
-  explicit HiddenPointer(T* t)
-      : masked_t_(reinterpret_cast<uintptr_t>(t) ^ kHideMask) {
-  }
-  // Returns unhidden pointer.  Be careful where you save the result.
-  T* get() const { return reinterpret_cast<T*>(masked_t_ ^ kHideMask); }
-
- private:
-  // Arbitrary value, but not such that xor'ing with it is likely
-  // to map one valid pointer to another valid pointer:
-  static const uintptr_t kHideMask =
-      static_cast<uintptr_t>(0xF03A5F7BF03A5F7Bll);
-  uintptr_t masked_t_;
-};
-
-// A class that exists solely to run its destructor.  This class should not be
-// used directly, but instead by the REGISTER_HEAPCHECK_CLEANUP macro below.
-class PERFTOOLS_DLL_DECL HeapCleaner {
- public:
-  typedef void (*void_function)(void);
-  HeapCleaner(void_function f);
-  static void RunHeapCleanups();
- private:
-  static std::vector<void_function>* heap_cleanups_;
-};
-
-// A macro to declare module heap check cleanup tasks
-// (they run only if we are doing heap leak checking.)
-// 'body' should be the cleanup code to run.  'name' doesn't matter,
-// but must be unique amongst all REGISTER_HEAPCHECK_CLEANUP calls.
-#define REGISTER_HEAPCHECK_CLEANUP(name, body)  \
-  namespace { \
-  void heapcheck_cleanup_##name() { body; } \
-  static HeapCleaner heapcheck_cleaner_##name(&heapcheck_cleanup_##name); \
-  }
-
-#endif  // BASE_HEAP_CHECKER_H_
+/* The code has moved to gperftools/.  Use that include-directory for
+ * new code.
+ */
+#include <gperftools/heap-checker.h>
diff --git a/third_party/tcmalloc/vendor/src/google/heap-profiler.h b/third_party/tcmalloc/vendor/src/google/heap-profiler.h
index 57cb97a..be43959 100644
--- a/third_party/tcmalloc/vendor/src/google/heap-profiler.h
+++ b/third_party/tcmalloc/vendor/src/google/heap-profiler.h
@@ -26,79 +26,9 @@
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * ---
- * Author: Sanjay Ghemawat
- *
- * Module for heap-profiling.
- *
- * For full(er) information, see doc/heapprofile.html
- *
- * This module can be linked into your program with
- * no slowdown caused by this unless you activate the profiler
- * using one of the following methods:
- *
- *    1. Before starting the program, set the environment variable
- *       "HEAPPROFILE" to be the name of the file to which the profile
- *       data should be written.
- *
- *    2. Programmatically, start and stop the profiler using the
- *       routines "HeapProfilerStart(filename)" and "HeapProfilerStop()".
- *
  */
 
-#ifndef BASE_HEAP_PROFILER_H_
-#define BASE_HEAP_PROFILER_H_
-
-#include <stddef.h>
-
-/* Annoying stuff for windows; makes sure clients can import these functions */
-#ifndef PERFTOOLS_DLL_DECL
-# ifdef _WIN32
-#   define PERFTOOLS_DLL_DECL  __declspec(dllimport)
-# else
-#   define PERFTOOLS_DLL_DECL
-# endif
-#endif
-
-/* All this code should be usable from within C apps. */
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/* Start profiling and arrange to write profile data to file names
- * of the form: "prefix.0000", "prefix.0001", ...
- */
-PERFTOOLS_DLL_DECL void HeapProfilerStart(const char* prefix);
-
-/* Returns non-zero if we are currently profiling the heap.  (Returns
- * an int rather than a bool so it's usable from C.)  This is true
- * between calls to HeapProfilerStart() and HeapProfilerStop(), and
- * also if the program has been run with HEAPPROFILER, or some other
- * way to turn on whole-program profiling.
- */
-int IsHeapProfilerRunning();
-
-/* Stop heap profiling.  Can be restarted again with HeapProfilerStart(),
- * but the currently accumulated profiling information will be cleared.
- */
-PERFTOOLS_DLL_DECL void HeapProfilerStop();
-
-/* Dump a profile now - can be used for dumping at a hopefully
- * quiescent state in your program, in order to more easily track down
- * memory leaks. Will include the reason in the logged message
+/* The code has moved to gperftools/.  Use that include-directory for
+ * new code.
  */
-PERFTOOLS_DLL_DECL void HeapProfilerDump(const char *reason);
-
-/* Generate current heap profiling information.
- * Returns an empty string when heap profiling is not active.
- * The returned pointer is a '\0'-terminated string allocated using malloc()
- * and should be free()-ed as soon as the caller does not need it anymore.
- */
-PERFTOOLS_DLL_DECL char* GetHeapProfile();
-
-#ifdef __cplusplus
-}  // extern "C"
-#endif
-
-#endif  /* BASE_HEAP_PROFILER_H_ */
+#include <gperftools/heap-profiler.h>
diff --git a/third_party/tcmalloc/vendor/src/google/malloc_extension.h b/third_party/tcmalloc/vendor/src/google/malloc_extension.h
index 4b06b2d..55150e5 100644
--- a/third_party/tcmalloc/vendor/src/google/malloc_extension.h
+++ b/third_party/tcmalloc/vendor/src/google/malloc_extension.h
@@ -27,376 +27,7 @@
 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-// ---
-// Author: Sanjay Ghemawat <opensource@google.com>
-//
-// Extra extensions exported by some malloc implementations.  These
-// extensions are accessed through a virtual base class so an
-// application can link against a malloc that does not implement these
-// extensions, and it will get default versions that do nothing.
-//
-// NOTE FOR C USERS: If you wish to use this functionality from within
-// a C program, see malloc_extension_c.h.
-
-#ifndef BASE_MALLOC_EXTENSION_H_
-#define BASE_MALLOC_EXTENSION_H_
-
-#include <stddef.h>
-// I can't #include config.h in this public API file, but I should
-// really use configure (and make malloc_extension.h a .in file) to
-// figure out if the system has stdint.h or not.  But I'm lazy, so
-// for now I'm assuming it's a problem only with MSVC.
-#ifndef _MSC_VER
-#include <stdint.h>
-#endif
-#include <string>
-#include <vector>
-
-// Annoying stuff for windows -- makes sure clients can import these functions
-#ifndef PERFTOOLS_DLL_DECL
-# ifdef _WIN32
-#   define PERFTOOLS_DLL_DECL  __declspec(dllimport)
-# else
-#   define PERFTOOLS_DLL_DECL
-# endif
-#endif
-
-static const int kMallocHistogramSize = 64;
-
-// One day, we could support other types of writers (perhaps for C?)
-typedef std::string MallocExtensionWriter;
-
-namespace base {
-struct MallocRange;
-}
-
-// Interface to a pluggable system allocator.
-class SysAllocator {
- public:
-  SysAllocator() {
-  }
-  virtual ~SysAllocator();
-
-  // Allocates "size"-byte of memory from system aligned with "alignment".
-  // Returns NULL if failed. Otherwise, the returned pointer p up to and
-  // including (p + actual_size -1) have been allocated.
-  virtual void* Alloc(size_t size, size_t *actual_size, size_t alignment) = 0;
-};
-
-// The default implementations of the following routines do nothing.
-// All implementations should be thread-safe; the current one
-// (TCMallocImplementation) is.
-class PERFTOOLS_DLL_DECL MallocExtension {
- public:
-  virtual ~MallocExtension();
-
-  // Call this very early in the program execution -- say, in a global
-  // constructor -- to set up parameters and state needed by all
-  // instrumented malloc implemenatations.  One example: this routine
-  // sets environemnt variables to tell STL to use libc's malloc()
-  // instead of doing its own memory management.  This is safe to call
-  // multiple times, as long as each time is before threads start up.
-  static void Initialize();
-
-  // See "verify_memory.h" to see what these routines do
-  virtual bool VerifyAllMemory();
-  // TODO(csilvers): change these to const void*.
-  virtual bool VerifyNewMemory(void* p);
-  virtual bool VerifyArrayNewMemory(void* p);
-  virtual bool VerifyMallocMemory(void* p);
-  virtual bool MallocMemoryStats(int* blocks, size_t* total,
-                                 int histogram[kMallocHistogramSize]);
-
-  // Get a human readable description of the current state of the malloc
-  // data structures.  The state is stored as a null-terminated string
-  // in a prefix of "buffer[0,buffer_length-1]".
-  // REQUIRES: buffer_length > 0.
-  virtual void GetStats(char* buffer, int buffer_length);
-
-  // Outputs to "writer" a sample of live objects and the stack traces
-  // that allocated these objects.  The format of the returned output
-  // is equivalent to the output of the heap profiler and can
-  // therefore be passed to "pprof". This function is equivalent to
-  // ReadStackTraces. The main difference is that this function returns
-  // serialized data appropriately formatted for use by the pprof tool.
-  // NOTE: by default, tcmalloc does not do any heap sampling, and this
-  //       function will always return an empty sample.  To get useful
-  //       data from GetHeapSample, you must also set the environment
-  //       variable TCMALLOC_SAMPLE_PARAMETER to a value such as 524288.
-  virtual void GetHeapSample(MallocExtensionWriter* writer);
-
-  // Outputs to "writer" the stack traces that caused growth in the
-  // address space size.  The format of the returned output is
-  // equivalent to the output of the heap profiler and can therefore
-  // be passed to "pprof". This function is equivalent to
-  // ReadHeapGrowthStackTraces. The main difference is that this function
-  // returns serialized data appropriately formatted for use by the
-  // pprof tool.  (This does not depend on, or require,
-  // TCMALLOC_SAMPLE_PARAMETER.)
-  virtual void GetHeapGrowthStacks(MallocExtensionWriter* writer);
-
-  // Invokes func(arg, range) for every controlled memory
-  // range.  *range is filled in with information about the range.
-  //
-  // This is a best-effort interface useful only for performance
-  // analysis.  The implementation may not call func at all.
-  typedef void (RangeFunction)(void*, const base::MallocRange*);
-  virtual void Ranges(void* arg, RangeFunction func);
-
-  // -------------------------------------------------------------------
-  // Control operations for getting and setting malloc implementation
-  // specific parameters.  Some currently useful properties:
-  //
-  // generic
-  // -------
-  // "generic.current_allocated_bytes"
-  //      Number of bytes currently allocated by application
-  //      This property is not writable.
-  //
-  // "generic.heap_size"
-  //      Number of bytes in the heap ==
-  //            current_allocated_bytes +
-  //            fragmentation +
-  //            freed memory regions
-  //      This property is not writable.
-  //
-  // tcmalloc
-  // --------
-  // "tcmalloc.max_total_thread_cache_bytes"
-  //      Upper limit on total number of bytes stored across all
-  //      per-thread caches.  Default: 16MB.
-  //
-  // "tcmalloc.current_total_thread_cache_bytes"
-  //      Number of bytes used across all thread caches.
-  //      This property is not writable.
-  //
-  // "tcmalloc.pageheap_free_bytes"
-  //      Number of bytes in free, mapped pages in page heap.  These
-  //      bytes can be used to fulfill allocation requests.  They
-  //      always count towards virtual memory usage, and unless the
-  //      underlying memory is swapped out by the OS, they also count
-  //      towards physical memory usage.  This property is not writable.
-  //
-  // "tcmalloc.pageheap_unmapped_bytes"
-  //        Number of bytes in free, unmapped pages in page heap.
-  //        These are bytes that have been released back to the OS,
-  //        possibly by one of the MallocExtension "Release" calls.
-  //        They can be used to fulfill allocation requests, but
-  //        typically incur a page fault.  They always count towards
-  //        virtual memory usage, and depending on the OS, typically
-  //        do not count towards physical memory usage.  This property
-  //        is not writable.
-  // -------------------------------------------------------------------
-
-  // Get the named "property"'s value.  Returns true if the property
-  // is known.  Returns false if the property is not a valid property
-  // name for the current malloc implementation.
-  // REQUIRES: property != NULL; value != NULL
-  virtual bool GetNumericProperty(const char* property, size_t* value);
-
-  // Set the named "property"'s value.  Returns true if the property
-  // is known and writable.  Returns false if the property is not a
-  // valid property name for the current malloc implementation, or
-  // is not writable.
-  // REQUIRES: property != NULL
-  virtual bool SetNumericProperty(const char* property, size_t value);
-
-  // Mark the current thread as "idle".  This routine may optionally
-  // be called by threads as a hint to the malloc implementation that
-  // any thread-specific resources should be released.  Note: this may
-  // be an expensive routine, so it should not be called too often.
-  //
-  // Also, if the code that calls this routine will go to sleep for
-  // a while, it should take care to not allocate anything between
-  // the call to this routine and the beginning of the sleep.
-  //
-  // Most malloc implementations ignore this routine.
-  virtual void MarkThreadIdle();
-
-  // Mark the current thread as "busy".  This routine should be
-  // called after MarkThreadIdle() if the thread will now do more
-  // work.  If this method is not called, performance may suffer.
-  //
-  // Most malloc implementations ignore this routine.
-  virtual void MarkThreadBusy();
-
-  // Gets the system allocator used by the malloc extension instance. Returns
-  // NULL for malloc implementations that do not support pluggable system
-  // allocators.
-  virtual SysAllocator* GetSystemAllocator();
-
-  // Sets the system allocator to the specified.
-  //
-  // Users could register their own system allocators for malloc implementation
-  // that supports pluggable system allocators, such as TCMalloc, by doing:
-  //   alloc = new MyOwnSysAllocator();
-  //   MallocExtension::instance()->SetSystemAllocator(alloc);
-  // It's up to users whether to fall back (recommended) to the default
-  // system allocator (use GetSystemAllocator() above) or not. The caller is
-  // responsible to any necessary locking.
-  // See tcmalloc/system-alloc.h for the interface and
-  //     tcmalloc/memfs_malloc.cc for the examples.
-  //
-  // It's a no-op for malloc implementations that do not support pluggable
-  // system allocators.
-  virtual void SetSystemAllocator(SysAllocator *a);
-
-  // Try to release num_bytes of free memory back to the operating
-  // system for reuse.  Use this extension with caution -- to get this
-  // memory back may require faulting pages back in by the OS, and
-  // that may be slow.  (Currently only implemented in tcmalloc.)
-  virtual void ReleaseToSystem(size_t num_bytes);
-
-  // Same as ReleaseToSystem() but release as much memory as possible.
-  virtual void ReleaseFreeMemory();
-
-  // Sets the rate at which we release unused memory to the system.
-  // Zero means we never release memory back to the system.  Increase
-  // this flag to return memory faster; decrease it to return memory
-  // slower.  Reasonable rates are in the range [0,10].  (Currently
-  // only implemented in tcmalloc).
-  virtual void SetMemoryReleaseRate(double rate);
-
-  // Gets the release rate.  Returns a value < 0 if unknown.
-  virtual double GetMemoryReleaseRate();
-
-  // Returns the estimated number of bytes that will be allocated for
-  // a request of "size" bytes.  This is an estimate: an allocation of
-  // SIZE bytes may reserve more bytes, but will never reserve less.
-  // (Currently only implemented in tcmalloc, other implementations
-  // always return SIZE.)
-  // This is equivalent to malloc_good_size() in OS X.
-  virtual size_t GetEstimatedAllocatedSize(size_t size);
-
-  // Returns the actual number N of bytes reserved by tcmalloc for the
-  // pointer p.  The client is allowed to use the range of bytes
-  // [p, p+N) in any way it wishes (i.e. N is the "usable size" of this
-  // allocation).  This number may be equal to or greater than the number
-  // of bytes requested when p was allocated.
-  // p must have been allocated by this malloc implementation,
-  // must not be an interior pointer -- that is, must be exactly
-  // the pointer returned to by malloc() et al., not some offset
-  // from that -- and should not have been freed yet.  p may be NULL.
-  // (Currently only implemented in tcmalloc; other implementations
-  // will return 0.)
-  // This is equivalent to malloc_size() in OS X, malloc_usable_size()
-  // in glibc, and _msize() for windows.
-  // TODO(csilvers): change to const void*.
-  virtual size_t GetAllocatedSize(void* p);
-
-  // Returns kOwned if this malloc implementation allocated the memory
-  // pointed to by p, or kNotOwned if some other malloc implementation
-  // allocated it or p is NULL.  May also return kUnknownOwnership if
-  // the malloc implementation does not keep track of ownership.
-  // REQUIRES: p must be a value returned from a previous call to
-  // malloc(), calloc(), realloc(), memalign(), posix_memalign(),
-  // valloc(), pvalloc(), new, or new[], and must refer to memory that
-  // is currently allocated (so, for instance, you should not pass in
-  // a pointer after having called free() on it).
-  enum Ownership {
-    // NOTE: Enum values MUST be kept in sync with the version in
-    // malloc_extension_c.h
-    kUnknownOwnership = 0,
-    kOwned,
-    kNotOwned
-  };
-  virtual Ownership GetOwnership(const void* p);
-
-  // The current malloc implementation.  Always non-NULL.
-  static MallocExtension* instance();
-
-  // Change the malloc implementation.  Typically called by the
-  // malloc implementation during initialization.
-  static void Register(MallocExtension* implementation);
-
-  // Returns detailed information about malloc's freelists. For each list,
-  // return a FreeListInfo:
-  struct FreeListInfo {
-    size_t min_object_size;
-    size_t max_object_size;
-    size_t total_bytes_free;
-    const char* type;
-  };
-  // Each item in the vector refers to a different freelist. The lists
-  // are identified by the range of allocations that objects in the
-  // list can satisfy ([min_object_size, max_object_size]) and the
-  // type of freelist (see below). The current size of the list is
-  // returned in total_bytes_free (which count against a processes
-  // resident and virtual size).
-  //
-  // Currently supported types are:
-  //
-  // "tcmalloc.page{_unmapped}" - tcmalloc's page heap. An entry for each size
-  //          class in the page heap is returned. Bytes in "page_unmapped"
-  //          are no longer backed by physical memory and do not count against
-  //          the resident size of a process.
-  //
-  // "tcmalloc.large{_unmapped}" - tcmalloc's list of objects larger
-  //          than the largest page heap size class. Only one "large"
-  //          entry is returned. There is no upper-bound on the size
-  //          of objects in the large free list; this call returns
-  //          kint64max for max_object_size.  Bytes in
-  //          "large_unmapped" are no longer backed by physical memory
-  //          and do not count against the resident size of a process.
-  //
-  // "tcmalloc.central" - tcmalloc's central free-list. One entry per
-  //          size-class is returned. Never unmapped.
-  //
-  // "debug.free_queue" - free objects queued by the debug allocator
-  //                      and not returned to tcmalloc.
-  //
-  // "tcmalloc.thread" - tcmalloc's per-thread caches. Never unmapped.
-  virtual void GetFreeListSizes(std::vector<FreeListInfo>* v);
-
-  // Get a list of stack traces of sampled allocation points.  Returns
-  // a pointer to a "new[]-ed" result array, and stores the sample
-  // period in "sample_period".
-  //
-  // The state is stored as a sequence of adjacent entries
-  // in the returned array.  Each entry has the following form:
-  //    uintptr_t count;        // Number of objects with following trace
-  //    uintptr_t size;         // Total size of objects with following trace
-  //    uintptr_t depth;        // Number of PC values in stack trace
-  //    void*     stack[depth]; // PC values that form the stack trace
-  //
-  // The list of entries is terminated by a "count" of 0.
-  //
-  // It is the responsibility of the caller to "delete[]" the returned array.
-  //
-  // May return NULL to indicate no results.
-  //
-  // This is an internal extension.  Callers should use the more
-  // convenient "GetHeapSample(string*)" method defined above.
-  virtual void** ReadStackTraces(int* sample_period);
-
-  // Like ReadStackTraces(), but returns stack traces that caused growth
-  // in the address space size.
-  virtual void** ReadHeapGrowthStackTraces();
-};
-
-namespace base {
-
-// Information passed per range.  More fields may be added later.
-struct MallocRange {
-  enum Type {
-    INUSE,                // Application is using this range
-    FREE,                 // Range is currently free
-    UNMAPPED,             // Backing physical memory has been returned to the OS
-    UNKNOWN,
-    // More enum values may be added in the future
-  };
-
-  uintptr_t address;    // Address of range
-  size_t length;        // Byte length of range
-  Type type;            // Type of this range
-  double fraction;      // Fraction of range that is being used (0 if !INUSE)
-
-  // Perhaps add the following:
-  // - stack trace if this range was sampled
-  // - heap growth stack trace if applicable to this range
-  // - age when allocated (for inuse) or freed (if not in use)
-};
-
-} // namespace base
-
-#endif  // BASE_MALLOC_EXTENSION_H_
+/* The code has moved to gperftools/.  Use that include-directory for
+ * new code.
+ */
+#include <gperftools/malloc_extension.h>
diff --git a/third_party/tcmalloc/vendor/src/google/malloc_extension_c.h b/third_party/tcmalloc/vendor/src/google/malloc_extension_c.h
index e3f7f79..87d727b 100644
--- a/third_party/tcmalloc/vendor/src/google/malloc_extension_c.h
+++ b/third_party/tcmalloc/vendor/src/google/malloc_extension_c.h
@@ -26,74 +26,9 @@
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * --
- * Author: Craig Silverstein
- *
- * C shims for the C++ malloc_extension.h.  See malloc_extension.h for
- * details.  Note these C shims always work on
- * MallocExtension::instance(); it is not possible to have more than
- * one MallocExtension object in C applications.
  */
 
-#ifndef _MALLOC_EXTENSION_C_H_
-#define _MALLOC_EXTENSION_C_H_
-
-#include <stddef.h>
-#include <sys/types.h>
-
-/* Annoying stuff for windows -- makes sure clients can import these fns */
-#ifndef PERFTOOLS_DLL_DECL
-# ifdef _WIN32
-#   define PERFTOOLS_DLL_DECL  __declspec(dllimport)
-# else
-#   define PERFTOOLS_DLL_DECL
-# endif
-#endif
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#define kMallocExtensionHistogramSize 64
-
-PERFTOOLS_DLL_DECL int MallocExtension_VerifyAllMemory(void);
-PERFTOOLS_DLL_DECL int MallocExtension_VerifyNewMemory(void* p);
-PERFTOOLS_DLL_DECL int MallocExtension_VerifyArrayNewMemory(void* p);
-PERFTOOLS_DLL_DECL int MallocExtension_VerifyMallocMemory(void* p);
-PERFTOOLS_DLL_DECL int MallocExtension_MallocMemoryStats(int* blocks, size_t* total,
-                                      int histogram[kMallocExtensionHistogramSize]);
-PERFTOOLS_DLL_DECL void MallocExtension_GetStats(char* buffer, int buffer_length);
-
-/* TODO(csilvers): write a C version of these routines, that perhaps
- * takes a function ptr and a void *.
+/* The code has moved to gperftools/.  Use that include-directory for
+ * new code.
  */
-/* void MallocExtension_GetHeapSample(string* result); */
-/* void MallocExtension_GetHeapGrowthStacks(string* result); */
-
-PERFTOOLS_DLL_DECL int MallocExtension_GetNumericProperty(const char* property, size_t* value);
-PERFTOOLS_DLL_DECL int MallocExtension_SetNumericProperty(const char* property, size_t value);
-PERFTOOLS_DLL_DECL void MallocExtension_MarkThreadIdle(void);
-PERFTOOLS_DLL_DECL void MallocExtension_MarkThreadBusy(void);
-PERFTOOLS_DLL_DECL void MallocExtension_ReleaseToSystem(size_t num_bytes);
-PERFTOOLS_DLL_DECL void MallocExtension_ReleaseFreeMemory(void);
-PERFTOOLS_DLL_DECL size_t MallocExtension_GetEstimatedAllocatedSize(size_t size);
-PERFTOOLS_DLL_DECL size_t MallocExtension_GetAllocatedSize(void* p);
-
-/*
- * NOTE: These enum values MUST be kept in sync with the version in
- *       malloc_extension.h
- */
-typedef enum {
-  MallocExtension_kUnknownOwnership = 0,
-  MallocExtension_kOwned,
-  MallocExtension_kNotOwned
-} MallocExtension_Ownership;
-
-PERFTOOLS_DLL_DECL MallocExtension_Ownership MallocExtension_GetOwnership(const void* p);
-
-#ifdef __cplusplus
-}   // extern "C"
-#endif
-
-#endif /* _MALLOC_EXTENSION_C_H_ */
+#include <gperftools/malloc_extension_c.h>
diff --git a/third_party/tcmalloc/vendor/src/google/malloc_hook.h b/third_party/tcmalloc/vendor/src/google/malloc_hook.h
index 245d0e1..e5b8e7c 100644
--- a/third_party/tcmalloc/vendor/src/google/malloc_hook.h
+++ b/third_party/tcmalloc/vendor/src/google/malloc_hook.h
@@ -27,332 +27,7 @@
 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-// ---
-// Author: Sanjay Ghemawat
-//
-// Some of our malloc implementations can invoke the following hooks whenever
-// memory is allocated or deallocated.  MallocHook is thread-safe, and things
-// you do before calling AddFooHook(MyHook) are visible to any resulting calls
-// to MyHook.  Hooks must be thread-safe.  If you write:
-//
-//   CHECK(MallocHook::AddNewHook(&MyNewHook));
-//
-// MyNewHook will be invoked in subsequent calls in the current thread, but
-// there are no guarantees on when it might be invoked in other threads.
-//
-// There are a limited number of slots available for each hook type.  Add*Hook
-// will return false if there are no slots available.  Remove*Hook will return
-// false if the given hook was not already installed.
-//
-// The order in which individual hooks are called in Invoke*Hook is undefined.
-//
-// It is safe for a hook to remove itself within Invoke*Hook and add other
-// hooks.  Any hooks added inside a hook invocation (for the same hook type)
-// will not be invoked for the current invocation.
-//
-// One important user of these hooks is the heap profiler.
-//
-// CAVEAT: If you add new MallocHook::Invoke* calls then those calls must be
-// directly in the code of the (de)allocation function that is provided to the
-// user and that function must have an ATTRIBUTE_SECTION(malloc_hook) attribute.
-//
-// Note: the Invoke*Hook() functions are defined in malloc_hook-inl.h.  If you
-// need to invoke a hook (which you shouldn't unless you're part of tcmalloc),
-// be sure to #include malloc_hook-inl.h in addition to malloc_hook.h.
-//
-// NOTE FOR C USERS: If you want to use malloc_hook functionality from
-// a C program, #include malloc_hook_c.h instead of this file.
-
-#ifndef _MALLOC_HOOK_H_
-#define _MALLOC_HOOK_H_
-
-#include <stddef.h>
-#include <sys/types.h>
-extern "C" {
-#include <google/malloc_hook_c.h>  // a C version of the malloc_hook interface
-}
-
-// Annoying stuff for windows -- makes sure clients can import these functions
-#ifndef PERFTOOLS_DLL_DECL
-# ifdef _WIN32
-#   define PERFTOOLS_DLL_DECL  __declspec(dllimport)
-# else
-#   define PERFTOOLS_DLL_DECL
-# endif
-#endif
-
-// The C++ methods below call the C version (MallocHook_*), and thus
-// convert between an int and a bool.  Windows complains about this
-// (a "performance warning") which we don't care about, so we suppress.
-#ifdef _MSC_VER
-#pragma warning(push)
-#pragma warning(disable:4800)
-#endif
-
-// Note: malloc_hook_c.h defines MallocHook_*Hook and
-// MallocHook_{Add,Remove}*Hook.  The version of these inside the MallocHook
-// class are defined in terms of the malloc_hook_c version.  See malloc_hook_c.h
-// for details of these types/functions.
-
-class PERFTOOLS_DLL_DECL MallocHook {
- public:
-  // The NewHook is invoked whenever an object is allocated.
-  // It may be passed NULL if the allocator returned NULL.
-  typedef MallocHook_NewHook NewHook;
-  inline static bool AddNewHook(NewHook hook) {
-    return MallocHook_AddNewHook(hook);
-  }
-  inline static bool RemoveNewHook(NewHook hook) {
-    return MallocHook_RemoveNewHook(hook);
-  }
-  inline static void InvokeNewHook(const void* p, size_t s);
-
-  // The DeleteHook is invoked whenever an object is deallocated.
-  // It may be passed NULL if the caller is trying to delete NULL.
-  typedef MallocHook_DeleteHook DeleteHook;
-  inline static bool AddDeleteHook(DeleteHook hook) {
-    return MallocHook_AddDeleteHook(hook);
-  }
-  inline static bool RemoveDeleteHook(DeleteHook hook) {
-    return MallocHook_RemoveDeleteHook(hook);
-  }
-  inline static void InvokeDeleteHook(const void* p);
-
-  // The PreMmapHook is invoked with mmap or mmap64 arguments just
-  // before the call is actually made.  Such a hook may be useful
-  // in memory limited contexts, to catch allocations that will exceed
-  // a memory limit, and take outside actions to increase that limit.
-  typedef MallocHook_PreMmapHook PreMmapHook;
-  inline static bool AddPreMmapHook(PreMmapHook hook) {
-    return MallocHook_AddPreMmapHook(hook);
-  }
-  inline static bool RemovePreMmapHook(PreMmapHook hook) {
-    return MallocHook_RemovePreMmapHook(hook);
-  }
-  inline static void InvokePreMmapHook(const void* start,
-                                       size_t size,
-                                       int protection,
-                                       int flags,
-                                       int fd,
-                                       off_t offset);
-
-  // The MmapReplacement is invoked after the PreMmapHook but before
-  // the call is actually made. The MmapReplacement should return true
-  // if it handled the call, or false if it is still necessary to
-  // call mmap/mmap64.
-  // This should be used only by experts, and users must be be
-  // extremely careful to avoid recursive calls to mmap. The replacement
-  // should be async signal safe.
-  // Only one MmapReplacement is supported. After setting an MmapReplacement
-  // you must call RemoveMmapReplacement before calling SetMmapReplacement
-  // again.
-  typedef MallocHook_MmapReplacement MmapReplacement;
-  inline static bool SetMmapReplacement(MmapReplacement hook) {
-    return MallocHook_SetMmapReplacement(hook);
-  }
-  inline static bool RemoveMmapReplacement(MmapReplacement hook) {
-    return MallocHook_RemoveMmapReplacement(hook);
-  }
-  inline static bool InvokeMmapReplacement(const void* start,
-                                           size_t size,
-                                           int protection,
-                                           int flags,
-                                           int fd,
-                                           off_t offset,
-                                           void** result);
-
-
-  // The MmapHook is invoked whenever a region of memory is mapped.
-  // It may be passed MAP_FAILED if the mmap failed.
-  typedef MallocHook_MmapHook MmapHook;
-  inline static bool AddMmapHook(MmapHook hook) {
-    return MallocHook_AddMmapHook(hook);
-  }
-  inline static bool RemoveMmapHook(MmapHook hook) {
-    return MallocHook_RemoveMmapHook(hook);
-  }
-  inline static void InvokeMmapHook(const void* result,
-                                    const void* start,
-                                    size_t size,
-                                    int protection,
-                                    int flags,
-                                    int fd,
-                                    off_t offset);
-
-  // The MunmapReplacement is invoked with munmap arguments just before
-  // the call is actually made. The MunmapReplacement should return true
-  // if it handled the call, or false if it is still necessary to
-  // call munmap.
-  // This should be used only by experts. The replacement should be
-  // async signal safe.
-  // Only one MunmapReplacement is supported. After setting an
-  // MunmapReplacement you must call RemoveMunmapReplacement before
-  // calling SetMunmapReplacement again.
-  typedef MallocHook_MunmapReplacement MunmapReplacement;
-  inline static bool SetMunmapReplacement(MunmapReplacement hook) {
-    return MallocHook_SetMunmapReplacement(hook);
-  }
-  inline static bool RemoveMunmapReplacement(MunmapReplacement hook) {
-    return MallocHook_RemoveMunmapReplacement(hook);
-  }
-  inline static bool InvokeMunmapReplacement(const void* p,
-                                             size_t size,
-                                             int* result);
-
-  // The MunmapHook is invoked whenever a region of memory is unmapped.
-  typedef MallocHook_MunmapHook MunmapHook;
-  inline static bool AddMunmapHook(MunmapHook hook) {
-    return MallocHook_AddMunmapHook(hook);
-  }
-  inline static bool RemoveMunmapHook(MunmapHook hook) {
-    return MallocHook_RemoveMunmapHook(hook);
-  }
-  inline static void InvokeMunmapHook(const void* p, size_t size);
-
-  // The MremapHook is invoked whenever a region of memory is remapped.
-  typedef MallocHook_MremapHook MremapHook;
-  inline static bool AddMremapHook(MremapHook hook) {
-    return MallocHook_AddMremapHook(hook);
-  }
-  inline static bool RemoveMremapHook(MremapHook hook) {
-    return MallocHook_RemoveMremapHook(hook);
-  }
-  inline static void InvokeMremapHook(const void* result,
-                                      const void* old_addr,
-                                      size_t old_size,
-                                      size_t new_size,
-                                      int flags,
-                                      const void* new_addr);
-
-  // The PreSbrkHook is invoked just before sbrk is called -- except when
-  // the increment is 0.  This is because sbrk(0) is often called
-  // to get the top of the memory stack, and is not actually a
-  // memory-allocation call.  It may be useful in memory-limited contexts,
-  // to catch allocations that will exceed the limit and take outside
-  // actions to increase such a limit.
-  typedef MallocHook_PreSbrkHook PreSbrkHook;
-  inline static bool AddPreSbrkHook(PreSbrkHook hook) {
-    return MallocHook_AddPreSbrkHook(hook);
-  }
-  inline static bool RemovePreSbrkHook(PreSbrkHook hook) {
-    return MallocHook_RemovePreSbrkHook(hook);
-  }
-  inline static void InvokePreSbrkHook(ptrdiff_t increment);
-
-  // The SbrkHook is invoked whenever sbrk is called -- except when
-  // the increment is 0.  This is because sbrk(0) is often called
-  // to get the top of the memory stack, and is not actually a
-  // memory-allocation call.
-  typedef MallocHook_SbrkHook SbrkHook;
-  inline static bool AddSbrkHook(SbrkHook hook) {
-    return MallocHook_AddSbrkHook(hook);
-  }
-  inline static bool RemoveSbrkHook(SbrkHook hook) {
-    return MallocHook_RemoveSbrkHook(hook);
-  }
-  inline static void InvokeSbrkHook(const void* result, ptrdiff_t increment);
-
-  // Get the current stack trace.  Try to skip all routines up to and
-  // and including the caller of MallocHook::Invoke*.
-  // Use "skip_count" (similarly to GetStackTrace from stacktrace.h)
-  // as a hint about how many routines to skip if better information
-  // is not available.
-  inline static int GetCallerStackTrace(void** result, int max_depth,
-                                        int skip_count) {
-    return MallocHook_GetCallerStackTrace(result, max_depth, skip_count);
-  }
-
-  // Unhooked versions of mmap() and munmap().   These should be used
-  // only by experts, since they bypass heapchecking, etc.
-  // Note: These do not run hooks, but they still use the MmapReplacement
-  // and MunmapReplacement.
-  static void* UnhookedMMap(void *start, size_t length, int prot, int flags,
-                            int fd, off_t offset);
-  static int UnhookedMUnmap(void *start, size_t length);
-
-  // The following are DEPRECATED.
-  inline static NewHook GetNewHook();
-  inline static NewHook SetNewHook(NewHook hook) {
-    return MallocHook_SetNewHook(hook);
-  }
-
-  inline static DeleteHook GetDeleteHook();
-  inline static DeleteHook SetDeleteHook(DeleteHook hook) {
-    return MallocHook_SetDeleteHook(hook);
-  }
-
-  inline static PreMmapHook GetPreMmapHook();
-  inline static PreMmapHook SetPreMmapHook(PreMmapHook hook) {
-    return MallocHook_SetPreMmapHook(hook);
-  }
-
-  inline static MmapHook GetMmapHook();
-  inline static MmapHook SetMmapHook(MmapHook hook) {
-    return MallocHook_SetMmapHook(hook);
-  }
-
-  inline static MunmapHook GetMunmapHook();
-  inline static MunmapHook SetMunmapHook(MunmapHook hook) {
-    return MallocHook_SetMunmapHook(hook);
-  }
-
-  inline static MremapHook GetMremapHook();
-  inline static MremapHook SetMremapHook(MremapHook hook) {
-    return MallocHook_SetMremapHook(hook);
-  }
-
-  inline static PreSbrkHook GetPreSbrkHook();
-  inline static PreSbrkHook SetPreSbrkHook(PreSbrkHook hook) {
-    return MallocHook_SetPreSbrkHook(hook);
-  }
-
-  inline static SbrkHook GetSbrkHook();
-  inline static SbrkHook SetSbrkHook(SbrkHook hook) {
-    return MallocHook_SetSbrkHook(hook);
-  }
-  // End of DEPRECATED methods.
-
- private:
-  // Slow path versions of Invoke*Hook.
-  static void InvokeNewHookSlow(const void* p, size_t s);
-  static void InvokeDeleteHookSlow(const void* p);
-  static void InvokePreMmapHookSlow(const void* start,
-                                    size_t size,
-                                    int protection,
-                                    int flags,
-                                    int fd,
-                                    off_t offset);
-  static void InvokeMmapHookSlow(const void* result,
-                                 const void* start,
-                                 size_t size,
-                                 int protection,
-                                 int flags,
-                                 int fd,
-                                 off_t offset);
-  static bool InvokeMmapReplacementSlow(const void* start,
-                                        size_t size,
-                                        int protection,
-                                        int flags,
-                                        int fd,
-                                        off_t offset,
-                                        void** result);
-  static void InvokeMunmapHookSlow(const void* p, size_t size);
-  static bool InvokeMunmapReplacementSlow(const void* p,
-                                          size_t size,
-                                          int* result);
-  static void InvokeMremapHookSlow(const void* result,
-                                   const void* old_addr,
-                                   size_t old_size,
-                                   size_t new_size,
-                                   int flags,
-                                   const void* new_addr);
-  static void InvokePreSbrkHookSlow(ptrdiff_t increment);
-  static void InvokeSbrkHookSlow(const void* result, ptrdiff_t increment);
-};
-
-#ifdef _MSC_VER
-#pragma warning(pop)
-#endif
-
-
-#endif /* _MALLOC_HOOK_H_ */
+/* The code has moved to gperftools/.  Use that include-directory for
+ * new code.
+ */
+#include <gperftools/malloc_hook.h>
diff --git a/third_party/tcmalloc/vendor/src/google/malloc_hook_c.h b/third_party/tcmalloc/vendor/src/google/malloc_hook_c.h
index 56337e1..e3ac0a4 100644
--- a/third_party/tcmalloc/vendor/src/google/malloc_hook_c.h
+++ b/third_party/tcmalloc/vendor/src/google/malloc_hook_c.h
@@ -26,148 +26,9 @@
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * --
- * Author: Craig Silverstein
- *
- * C shims for the C++ malloc_hook.h.  See malloc_hook.h for details
- * on how to use these.
  */
 
-#ifndef _MALLOC_HOOK_C_H_
-#define _MALLOC_HOOK_C_H_
-
-#include <stddef.h>
-#include <sys/types.h>
-
-/* Annoying stuff for windows; makes sure clients can import these functions */
-#ifndef PERFTOOLS_DLL_DECL
-# ifdef _WIN32
-#   define PERFTOOLS_DLL_DECL  __declspec(dllimport)
-# else
-#   define PERFTOOLS_DLL_DECL
-# endif
-#endif
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/* Get the current stack trace.  Try to skip all routines up to and
- * and including the caller of MallocHook::Invoke*.
- * Use "skip_count" (similarly to GetStackTrace from stacktrace.h)
- * as a hint about how many routines to skip if better information
- * is not available.
- */
-PERFTOOLS_DLL_DECL
-int MallocHook_GetCallerStackTrace(void** result, int max_depth,
-                                   int skip_count);
-
-/* The MallocHook_{Add,Remove}*Hook functions return 1 on success and 0 on
- * failure.
+/* The code has moved to gperftools/.  Use that include-directory for
+ * new code.
  */
-
-typedef void (*MallocHook_NewHook)(const void* ptr, size_t size);
-PERFTOOLS_DLL_DECL
-int MallocHook_AddNewHook(MallocHook_NewHook hook);
-PERFTOOLS_DLL_DECL
-int MallocHook_RemoveNewHook(MallocHook_NewHook hook);
-
-typedef void (*MallocHook_DeleteHook)(const void* ptr);
-PERFTOOLS_DLL_DECL
-int MallocHook_AddDeleteHook(MallocHook_DeleteHook hook);
-PERFTOOLS_DLL_DECL
-int MallocHook_RemoveDeleteHook(MallocHook_DeleteHook hook);
-
-typedef void (*MallocHook_PreMmapHook)(const void *start,
-                                       size_t size,
-                                       int protection,
-                                       int flags,
-                                       int fd,
-                                       off_t offset);
-PERFTOOLS_DLL_DECL
-int MallocHook_AddPreMmapHook(MallocHook_PreMmapHook hook);
-PERFTOOLS_DLL_DECL
-int MallocHook_RemovePreMmapHook(MallocHook_PreMmapHook hook);
-
-typedef void (*MallocHook_MmapHook)(const void* result,
-                                    const void* start,
-                                    size_t size,
-                                    int protection,
-                                    int flags,
-                                    int fd,
-                                    off_t offset);
-PERFTOOLS_DLL_DECL
-int MallocHook_AddMmapHook(MallocHook_MmapHook hook);
-PERFTOOLS_DLL_DECL
-int MallocHook_RemoveMmapHook(MallocHook_MmapHook hook);
-
-typedef int (*MallocHook_MmapReplacement)(const void* start,
-                                          size_t size,
-                                          int protection,
-                                          int flags,
-                                          int fd,
-                                          off_t offset,
-                                          void** result);
-int MallocHook_SetMmapReplacement(MallocHook_MmapReplacement hook);
-int MallocHook_RemoveMmapReplacement(MallocHook_MmapReplacement hook);
-
-typedef void (*MallocHook_MunmapHook)(const void* ptr, size_t size);
-PERFTOOLS_DLL_DECL
-int MallocHook_AddMunmapHook(MallocHook_MunmapHook hook);
-PERFTOOLS_DLL_DECL
-int MallocHook_RemoveMunmapHook(MallocHook_MunmapHook hook);
-
-typedef int (*MallocHook_MunmapReplacement)(const void* ptr,
-                                            size_t size,
-                                            int* result);
-int MallocHook_SetMunmapReplacement(MallocHook_MunmapReplacement hook);
-int MallocHook_RemoveMunmapReplacement(MallocHook_MunmapReplacement hook);
-
-typedef void (*MallocHook_MremapHook)(const void* result,
-                                      const void* old_addr,
-                                      size_t old_size,
-                                      size_t new_size,
-                                      int flags,
-                                      const void* new_addr);
-PERFTOOLS_DLL_DECL
-int MallocHook_AddMremapHook(MallocHook_MremapHook hook);
-PERFTOOLS_DLL_DECL
-int MallocHook_RemoveMremapHook(MallocHook_MremapHook hook);
-
-typedef void (*MallocHook_PreSbrkHook)(ptrdiff_t increment);
-PERFTOOLS_DLL_DECL
-int MallocHook_AddPreSbrkHook(MallocHook_PreSbrkHook hook);
-PERFTOOLS_DLL_DECL
-int MallocHook_RemovePreSbrkHook(MallocHook_PreSbrkHook hook);
-
-typedef void (*MallocHook_SbrkHook)(const void* result, ptrdiff_t increment);
-PERFTOOLS_DLL_DECL
-int MallocHook_AddSbrkHook(MallocHook_SbrkHook hook);
-PERFTOOLS_DLL_DECL
-int MallocHook_RemoveSbrkHook(MallocHook_SbrkHook hook);
-
-/* The following are DEPRECATED. */
-PERFTOOLS_DLL_DECL
-MallocHook_NewHook MallocHook_SetNewHook(MallocHook_NewHook hook);
-PERFTOOLS_DLL_DECL
-MallocHook_DeleteHook MallocHook_SetDeleteHook(MallocHook_DeleteHook hook);
-PERFTOOLS_DLL_DECL
-MallocHook_PreMmapHook MallocHook_SetPreMmapHook(MallocHook_PreMmapHook hook);
-PERFTOOLS_DLL_DECL
-MallocHook_MmapHook MallocHook_SetMmapHook(MallocHook_MmapHook hook);
-PERFTOOLS_DLL_DECL
-MallocHook_MunmapHook MallocHook_SetMunmapHook(MallocHook_MunmapHook hook);
-PERFTOOLS_DLL_DECL
-MallocHook_MremapHook MallocHook_SetMremapHook(MallocHook_MremapHook hook);
-PERFTOOLS_DLL_DECL
-MallocHook_PreSbrkHook MallocHook_SetPreSbrkHook(MallocHook_PreSbrkHook hook);
-PERFTOOLS_DLL_DECL
-MallocHook_SbrkHook MallocHook_SetSbrkHook(MallocHook_SbrkHook hook);
-/* End of DEPRECATED functions. */
-
-#ifdef __cplusplus
-}   // extern "C"
-#endif
-
-#endif /* _MALLOC_HOOK_C_H_ */
+#include <gperftools/malloc_hook_c.h>
diff --git a/third_party/tcmalloc/vendor/src/google/profiler.h b/third_party/tcmalloc/vendor/src/google/profiler.h
index 7971e04..67a89c1 100644
--- a/third_party/tcmalloc/vendor/src/google/profiler.h
+++ b/third_party/tcmalloc/vendor/src/google/profiler.h
@@ -26,143 +26,9 @@
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * ---
- * Author: Sanjay Ghemawat
- *
- * Module for CPU profiling based on periodic pc-sampling.
- *
- * For full(er) information, see doc/cpuprofile.html
- *
- * This module is linked into your program with
- * no slowdown caused by this unless you activate the profiler
- * using one of the following methods:
- *
- *    1. Before starting the program, set the environment variable
- *       "PROFILE" to be the name of the file to which the profile
- *       data should be written.
- *
- *    2. Programmatically, start and stop the profiler using the
- *       routines "ProfilerStart(filename)" and "ProfilerStop()".
- *
- *
- * (Note: if using linux 2.4 or earlier, only the main thread may be
- * profiled.)
- *
- * Use pprof to view the resulting profile output.
- *    % pprof <path_to_executable> <profile_file_name>
- *    % pprof --gv  <path_to_executable> <profile_file_name>
- *
- * These functions are thread-safe.
  */
 
-#ifndef BASE_PROFILER_H_
-#define BASE_PROFILER_H_
-
-#include <time.h>       /* For time_t */
-
-/* Annoying stuff for windows; makes sure clients can import these functions */
-#ifndef PERFTOOLS_DLL_DECL
-# ifdef _WIN32
-#   define PERFTOOLS_DLL_DECL  __declspec(dllimport)
-# else
-#   define PERFTOOLS_DLL_DECL
-# endif
-#endif
-
-/* All this code should be usable from within C apps. */
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/* Profiler options, for use with ProfilerStartWithOptions.  To use:
- *
- *   struct ProfilerOptions options;
- *   memset(&options, 0, sizeof options);
- *
- * then fill in fields as needed.
- *
- * This structure is intended to be usable from C code, so no constructor
- * is provided to initialize it.  (Use memset as described above).
- */
-struct ProfilerOptions {
-  /* Filter function and argument.
-   *
-   * If filter_in_thread is not NULL, when a profiling tick is delivered
-   * the profiler will call:
-   *
-   *   (*filter_in_thread)(filter_in_thread_arg)
-   *
-   * If it returns nonzero, the sample will be included in the profile.
-   * Note that filter_in_thread runs in a signal handler, so must be
-   * async-signal-safe.
-   *
-   * A typical use would be to set up filter results for each thread
-   * in the system before starting the profiler, then to make
-   * filter_in_thread be a very simple function which retrieves those
-   * results in an async-signal-safe way.  Retrieval could be done
-   * using thread-specific data, or using a shared data structure that
-   * supports async-signal-safe lookups.
-   */
-  int (*filter_in_thread)(void *arg);
-  void *filter_in_thread_arg;
-};
-
-/* Start profiling and write profile info into fname, discarding any
- * existing profiling data in that file.
- *
- * This is equivalent to calling ProfilerStartWithOptions(fname, NULL).
+/* The code has moved to gperftools/.  Use that include-directory for
+ * new code.
  */
-PERFTOOLS_DLL_DECL int ProfilerStart(const char* fname);
-
-/* Start profiling and write profile into fname, discarding any
- * existing profiling data in that file.
- *
- * The profiler is configured using the options given by 'options'.
- * Options which are not specified are given default values.
- *
- * 'options' may be NULL, in which case all are given default values.
- *
- * Returns nonzero if profiling was started sucessfully, or zero else.
- */
-PERFTOOLS_DLL_DECL int ProfilerStartWithOptions(
-    const char *fname, const struct ProfilerOptions *options);
-
-/* Stop profiling. Can be started again with ProfilerStart(), but
- * the currently accumulated profiling data will be cleared.
- */
-PERFTOOLS_DLL_DECL void ProfilerStop();
-
-/* Flush any currently buffered profiling state to the profile file.
- * Has no effect if the profiler has not been started.
- */
-PERFTOOLS_DLL_DECL void ProfilerFlush();
-
-
-/* DEPRECATED: these functions were used to enable/disable profiling
- * in the current thread, but no longer do anything.
- */
-PERFTOOLS_DLL_DECL void ProfilerEnable();
-PERFTOOLS_DLL_DECL void ProfilerDisable();
-
-/* Returns nonzero if profile is currently enabled, zero if it's not. */
-PERFTOOLS_DLL_DECL int ProfilingIsEnabledForAllThreads();
-
-/* Routine for registering new threads with the profiler.
- */
-PERFTOOLS_DLL_DECL void ProfilerRegisterThread();
-
-/* Stores state about profiler's current status into "*state". */
-struct ProfilerState {
-  int    enabled;             /* Is profiling currently enabled? */
-  time_t start_time;          /* If enabled, when was profiling started? */
-  char   profile_name[1024];  /* Name of profile file being written, or '\0' */
-  int    samples_gathered;    /* Number of samples gathered so far (or 0) */
-};
-PERFTOOLS_DLL_DECL void ProfilerGetCurrentState(struct ProfilerState* state);
-
-#ifdef __cplusplus
-}  // extern "C"
-#endif
-
-#endif  /* BASE_PROFILER_H_ */
+#include <gperftools/profiler.h>
diff --git a/third_party/tcmalloc/vendor/src/google/stacktrace.h b/third_party/tcmalloc/vendor/src/google/stacktrace.h
index fd186d6..eb761ca 100644
--- a/third_party/tcmalloc/vendor/src/google/stacktrace.h
+++ b/third_party/tcmalloc/vendor/src/google/stacktrace.h
@@ -27,90 +27,7 @@
 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-// ---
-// Author: Sanjay Ghemawat
-//
-// Routines to extract the current stack trace.  These functions are
-// thread-safe.
-
-#ifndef GOOGLE_STACKTRACE_H_
-#define GOOGLE_STACKTRACE_H_
-
-// Annoying stuff for windows -- makes sure clients can import these functions
-#ifndef PERFTOOLS_DLL_DECL
-# ifdef _WIN32
-#   define PERFTOOLS_DLL_DECL  __declspec(dllimport)
-# else
-#   define PERFTOOLS_DLL_DECL
-# endif
-#endif
-
-
-// Skips the most recent "skip_count" stack frames (also skips the
-// frame generated for the "GetStackFrames" routine itself), and then
-// records the pc values for up to the next "max_depth" frames in
-// "result", and the corresponding stack frame sizes in "sizes".
-// Returns the number of values recorded in "result"/"sizes".
-//
-// Example:
-//      main() { foo(); }
-//      foo() { bar(); }
-//      bar() {
-//        void* result[10];
-//        int sizes[10];
-//        int depth = GetStackFrames(result, sizes, 10, 1);
-//      }
-//
-// The GetStackFrames call will skip the frame for "bar".  It will
-// return 2 and will produce pc values that map to the following
-// procedures:
-//      result[0]       foo
-//      result[1]       main
-// (Actually, there may be a few more entries after "main" to account for
-// startup procedures.)
-// And corresponding stack frame sizes will also be recorded:
-//    sizes[0]       16
-//    sizes[1]       16
-// (Stack frame sizes of 16 above are just for illustration purposes.)
-// Stack frame sizes of 0 or less indicate that those frame sizes couldn't
-// be identified.
-//
-// This routine may return fewer stack frame entries than are
-// available. Also note that "result" and "sizes" must both be non-NULL.
-extern PERFTOOLS_DLL_DECL int GetStackFrames(void** result, int* sizes, int max_depth,
-                          int skip_count);
-
-// Same as above, but to be used from a signal handler. The "uc" parameter
-// should be the pointer to ucontext_t which was passed as the 3rd parameter
-// to sa_sigaction signal handler. It may help the unwinder to get a
-// better stack trace under certain conditions. The "uc" may safely be NULL.
-extern PERFTOOLS_DLL_DECL int GetStackFramesWithContext(void** result, int* sizes, int max_depth,
-                                     int skip_count, const void *uc);
-
-// This is similar to the GetStackFrames routine, except that it returns
-// the stack trace only, and not the stack frame sizes as well.
-// Example:
-//      main() { foo(); }
-//      foo() { bar(); }
-//      bar() {
-//        void* result[10];
-//        int depth = GetStackTrace(result, 10, 1);
-//      }
-//
-// This produces:
-//      result[0]       foo
-//      result[1]       main
-//           ....       ...
-//
-// "result" must not be NULL.
-extern PERFTOOLS_DLL_DECL int GetStackTrace(void** result, int max_depth,
-                                            int skip_count);
-
-// Same as above, but to be used from a signal handler. The "uc" parameter
-// should be the pointer to ucontext_t which was passed as the 3rd parameter
-// to sa_sigaction signal handler. It may help the unwinder to get a
-// better stack trace under certain conditions. The "uc" may safely be NULL.
-extern PERFTOOLS_DLL_DECL int GetStackTraceWithContext(void** result, int max_depth,
-                                    int skip_count, const void *uc);
-
-#endif /* GOOGLE_STACKTRACE_H_ */
+/* The code has moved to gperftools/.  Use that include-directory for
+ * new code.
+ */
+#include <gperftools/stacktrace.h>
diff --git a/third_party/tcmalloc/vendor/src/google/tcmalloc.h b/third_party/tcmalloc/vendor/src/google/tcmalloc.h
new file mode 100644
index 0000000..c7db631
--- /dev/null
+++ b/third_party/tcmalloc/vendor/src/google/tcmalloc.h
@@ -0,0 +1,34 @@
+/* Copyright (c) 2003, Google Inc.
+ * All rights reserved.
+ * 
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ * 
+ *     * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ *     * Neither the name of Google Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* The code has moved to gperftools/.  Use that include-directory for
+ * new code.
+ */
+#include <gperftools/tcmalloc.h>
diff --git a/third_party/tcmalloc/vendor/src/gperftools/heap-checker.h b/third_party/tcmalloc/vendor/src/gperftools/heap-checker.h
new file mode 100644
index 0000000..32ed10a
--- /dev/null
+++ b/third_party/tcmalloc/vendor/src/gperftools/heap-checker.h
@@ -0,0 +1,424 @@
+// Copyright (c) 2005, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// ---
+// Author: Maxim Lifantsev (with design ideas by Sanjay Ghemawat)
+//
+//
+// Module for detecing heap (memory) leaks.
+//
+// For full(er) information, see doc/heap_checker.html
+//
+// This module can be linked into programs with
+// no slowdown caused by this unless you activate the leak-checker:
+//
+//    1. Set the environment variable HEAPCHEK to _type_ before
+//       running the program.
+//
+// _type_ is usually "normal" but can also be "minimal", "strict", or
+// "draconian".  (See the html file for other options, like 'local'.)
+//
+// After that, just run your binary.  If the heap-checker detects
+// a memory leak at program-exit, it will print instructions on how
+// to track down the leak.
+
+#ifndef BASE_HEAP_CHECKER_H_
+#define BASE_HEAP_CHECKER_H_
+
+#include <sys/types.h>  // for size_t
+// I can't #include config.h in this public API file, but I should
+// really use configure (and make malloc_extension.h a .in file) to
+// figure out if the system has stdint.h or not.  But I'm lazy, so
+// for now I'm assuming it's a problem only with MSVC.
+#ifndef _MSC_VER
+#include <stdint.h>     // for uintptr_t
+#endif
+#include <stdarg.h>     // for va_list
+#include <vector>
+
+// Annoying stuff for windows -- makes sure clients can import these functions
+#ifndef PERFTOOLS_DLL_DECL
+# ifdef _WIN32
+#   define PERFTOOLS_DLL_DECL  __declspec(dllimport)
+# else
+#   define PERFTOOLS_DLL_DECL
+# endif
+#endif
+
+
+// The class is thread-safe with respect to all the provided static methods,
+// as well as HeapLeakChecker objects: they can be accessed by multiple threads.
+class PERFTOOLS_DLL_DECL HeapLeakChecker {
+ public:
+
+  // ----------------------------------------------------------------------- //
+  // Static functions for working with (whole-program) leak checking.
+
+  // If heap leak checking is currently active in some mode
+  // e.g. if leak checking was started (and is still active now)
+  // due to HEAPCHECK=... defined in the environment.
+  // The return value reflects iff HeapLeakChecker objects manually
+  // constructed right now will be doing leak checking or nothing.
+  // Note that we can go from active to inactive state during InitGoogle()
+  // if FLAGS_heap_check gets set to "" by some code before/during InitGoogle().
+  static bool IsActive();
+
+  // Return pointer to the whole-program checker if it has been created
+  // and NULL otherwise.
+  // Once GlobalChecker() returns non-NULL that object will not disappear and
+  // will be returned by all later GlobalChecker calls.
+  // This is mainly to access BytesLeaked() and ObjectsLeaked() (see below)
+  // for the whole-program checker after one calls NoGlobalLeaks()
+  // or similar and gets false.
+  static HeapLeakChecker* GlobalChecker();
+
+  // Do whole-program leak check now (if it was activated for this binary);
+  // return false only if it was activated and has failed.
+  // The mode of the check is controlled by the command-line flags.
+  // This method can be called repeatedly.
+  // Things like GlobalChecker()->SameHeap() can also be called explicitly
+  // to do the desired flavor of the check.
+  static bool NoGlobalLeaks();
+
+  // If whole-program checker if active,
+  // cancel its automatic execution after main() exits.
+  // This requires that some leak check (e.g. NoGlobalLeaks())
+  // has been called at least once on the whole-program checker.
+  static void CancelGlobalCheck();
+
+  // ----------------------------------------------------------------------- //
+  // Non-static functions for starting and doing leak checking.
+
+  // Start checking and name the leak check performed.
+  // The name is used in naming dumped profiles
+  // and needs to be unique only within your binary.
+  // It must also be a string that can be a part of a file name,
+  // in particular not contain path expressions.
+  explicit HeapLeakChecker(const char *name);
+
+  // Destructor (verifies that some *NoLeaks or *SameHeap method
+  // has been called at least once).
+  ~HeapLeakChecker();
+
+  // These used to be different but are all the same now: they return
+  // true iff all memory allocated since this HeapLeakChecker object
+  // was constructor is still reachable from global state.
+  //
+  // Because we fork to convert addresses to symbol-names, and forking
+  // is not thread-safe, and we may be called in a threaded context,
+  // we do not try to symbolize addresses when called manually.
+  bool NoLeaks() { return DoNoLeaks(DO_NOT_SYMBOLIZE); }
+
+  // These forms are obsolete; use NoLeaks() instead.
+  // TODO(csilvers): mark as DEPRECATED.
+  bool QuickNoLeaks()  { return NoLeaks(); }
+  bool BriefNoLeaks()  { return NoLeaks(); }
+  bool SameHeap()      { return NoLeaks(); }
+  bool QuickSameHeap() { return NoLeaks(); }
+  bool BriefSameHeap() { return NoLeaks(); }
+
+  // Detailed information about the number of leaked bytes and objects
+  // (both of these can be negative as well).
+  // These are available only after a *SameHeap or *NoLeaks
+  // method has been called.
+  // Note that it's possible for both of these to be zero
+  // while SameHeap() or NoLeaks() returned false in case
+  // of a heap state change that is significant
+  // but preserves the byte and object counts.
+  ssize_t BytesLeaked() const;
+  ssize_t ObjectsLeaked() const;
+
+  // ----------------------------------------------------------------------- //
+  // Static helpers to make us ignore certain leaks.
+
+  // Scoped helper class.  Should be allocated on the stack inside a
+  // block of code.  Any heap allocations done in the code block
+  // covered by the scoped object (including in nested function calls
+  // done by the code block) will not be reported as leaks.  This is
+  // the recommended replacement for the GetDisableChecksStart() and
+  // DisableChecksToHereFrom() routines below.
+  //
+  // Example:
+  //   void Foo() {
+  //     HeapLeakChecker::Disabler disabler;
+  //     ... code that allocates objects whose leaks should be ignored ...
+  //   }
+  //
+  // REQUIRES: Destructor runs in same thread as constructor
+  class Disabler {
+   public:
+    Disabler();
+    ~Disabler();
+   private:
+    Disabler(const Disabler&);        // disallow copy
+    void operator=(const Disabler&);  // and assign
+  };
+
+  // Ignore an object located at 'ptr' (can go at the start or into the object)
+  // as well as all heap objects (transitively) referenced from it for the
+  // purposes of heap leak checking. Returns 'ptr' so that one can write
+  //   static T* obj = IgnoreObject(new T(...));
+  //
+  // If 'ptr' does not point to an active allocated object at the time of this
+  // call, it is ignored; but if it does, the object must not get deleted from
+  // the heap later on.
+  //
+  // See also HiddenPointer, below, if you need to prevent a pointer from
+  // being traversed by the heap checker but do not wish to transitively
+  // whitelist objects referenced through it.
+  template <typename T>
+  static T* IgnoreObject(T* ptr) {
+    DoIgnoreObject(static_cast<const void*>(const_cast<const T*>(ptr)));
+    return ptr;
+  }
+
+  // Undo what an earlier IgnoreObject() call promised and asked to do.
+  // At the time of this call 'ptr' must point at or inside of an active
+  // allocated object which was previously registered with IgnoreObject().
+  static void UnIgnoreObject(const void* ptr);
+
+  // ----------------------------------------------------------------------- //
+  // Internal types defined in .cc
+
+  class Allocator;
+  struct RangeValue;
+
+ private:
+
+  // ----------------------------------------------------------------------- //
+  // Various helpers
+
+  // Create the name of the heap profile file.
+  // Should be deleted via Allocator::Free().
+  char* MakeProfileNameLocked();
+
+  // Helper for constructors
+  void Create(const char *name, bool make_start_snapshot);
+
+  enum ShouldSymbolize { SYMBOLIZE, DO_NOT_SYMBOLIZE };
+
+  // Helper for *NoLeaks and *SameHeap
+  bool DoNoLeaks(ShouldSymbolize should_symbolize);
+
+  // Helper for NoGlobalLeaks, also called by the global destructor.
+  static bool NoGlobalLeaksMaybeSymbolize(ShouldSymbolize should_symbolize);
+
+  // These used to be public, but they are now deprecated.
+  // Will remove entirely when all internal uses are fixed.
+  // In the meantime, use friendship so the unittest can still test them.
+  static void* GetDisableChecksStart();
+  static void DisableChecksToHereFrom(const void* start_address);
+  static void DisableChecksIn(const char* pattern);
+  friend void RangeDisabledLeaks();
+  friend void NamedTwoDisabledLeaks();
+  friend void* RunNamedDisabledLeaks(void*);
+  friend void TestHeapLeakCheckerNamedDisabling();
+  // TODO(csilvers): remove this one, at least
+  friend int main(int, char**);
+
+
+  // Actually implements IgnoreObject().
+  static void DoIgnoreObject(const void* ptr);
+
+  // Disable checks based on stack trace entry at a depth <=
+  // max_depth.  Used to hide allocations done inside some special
+  // libraries.
+  static void DisableChecksFromToLocked(const void* start_address,
+                                        const void* end_address,
+                                        int max_depth);
+
+  // Helper for DoNoLeaks to ignore all objects reachable from all live data
+  static void IgnoreAllLiveObjectsLocked(const void* self_stack_top);
+
+  // Callback we pass to ListAllProcessThreads (see thread_lister.h)
+  // that is invoked when all threads of our process are found and stopped.
+  // The call back does the things needed to ignore live data reachable from
+  // thread stacks and registers for all our threads
+  // as well as do other global-live-data ignoring
+  // (via IgnoreNonThreadLiveObjectsLocked)
+  // during the quiet state of all threads being stopped.
+  // For the argument meaning see the comment by ListAllProcessThreads.
+  // Here we only use num_threads and thread_pids, that ListAllProcessThreads
+  // fills for us with the number and pids of all the threads of our process
+  // it found and attached to.
+  static int IgnoreLiveThreadsLocked(void* parameter,
+                                     int num_threads,
+                                     pid_t* thread_pids,
+                                     va_list ap);
+
+  // Helper for IgnoreAllLiveObjectsLocked and IgnoreLiveThreadsLocked
+  // that we prefer to execute from IgnoreLiveThreadsLocked
+  // while all threads are stopped.
+  // This helper does live object discovery and ignoring
+  // for all objects that are reachable from everything
+  // not related to thread stacks and registers.
+  static void IgnoreNonThreadLiveObjectsLocked();
+
+  // Helper for IgnoreNonThreadLiveObjectsLocked and IgnoreLiveThreadsLocked
+  // to discover and ignore all heap objects
+  // reachable from currently considered live objects
+  // (live_objects static global variable in out .cc file).
+  // "name", "name2" are two strings that we print one after another
+  // in a debug message to describe what kind of live object sources
+  // are being used.
+  static void IgnoreLiveObjectsLocked(const char* name, const char* name2);
+
+  // Do the overall whole-program heap leak check if needed;
+  // returns true when did the leak check.
+  static bool DoMainHeapCheck();
+
+  // Type of task for UseProcMapsLocked
+  enum ProcMapsTask {
+    RECORD_GLOBAL_DATA,
+    DISABLE_LIBRARY_ALLOCS
+  };
+
+  // Success/Error Return codes for UseProcMapsLocked.
+  enum ProcMapsResult {
+    PROC_MAPS_USED,
+    CANT_OPEN_PROC_MAPS,
+    NO_SHARED_LIBS_IN_PROC_MAPS
+  };
+
+  // Read /proc/self/maps, parse it, and do the 'proc_maps_task' for each line.
+  static ProcMapsResult UseProcMapsLocked(ProcMapsTask proc_maps_task);
+
+  // A ProcMapsTask to disable allocations from 'library'
+  // that is mapped to [start_address..end_address)
+  // (only if library is a certain system library).
+  static void DisableLibraryAllocsLocked(const char* library,
+                                         uintptr_t start_address,
+                                         uintptr_t end_address);
+
+  // Return true iff "*ptr" points to a heap object
+  // ("*ptr" can point at the start or inside of a heap object
+  //  so that this works e.g. for pointers to C++ arrays, C++ strings,
+  //  multiple-inherited objects, or pointers to members).
+  // We also fill *object_size for this object then
+  // and we move "*ptr" to point to the very start of the heap object.
+  static inline bool HaveOnHeapLocked(const void** ptr, size_t* object_size);
+
+  // Helper to shutdown heap leak checker when it's not needed
+  // or can't function properly.
+  static void TurnItselfOffLocked();
+
+  // Internally-used c-tor to start whole-executable checking.
+  HeapLeakChecker();
+
+  // ----------------------------------------------------------------------- //
+  // Friends and externally accessed helpers.
+
+  // Helper for VerifyHeapProfileTableStackGet in the unittest
+  // to get the recorded allocation caller for ptr,
+  // which must be a heap object.
+  static const void* GetAllocCaller(void* ptr);
+  friend void VerifyHeapProfileTableStackGet();
+
+  // This gets to execute before constructors for all global objects
+  static void BeforeConstructorsLocked();
+  friend void HeapLeakChecker_BeforeConstructors();
+
+  // This gets to execute after destructors for all global objects
+  friend void HeapLeakChecker_AfterDestructors();
+
+  // Full starting of recommended whole-program checking.
+  friend void HeapLeakChecker_InternalInitStart();
+
+  // Runs REGISTER_HEAPCHECK_CLEANUP cleanups and potentially
+  // calls DoMainHeapCheck
+  friend void HeapLeakChecker_RunHeapCleanups();
+
+  // ----------------------------------------------------------------------- //
+  // Member data.
+
+  class SpinLock* lock_;  // to make HeapLeakChecker objects thread-safe
+  const char* name_;  // our remembered name (we own it)
+                      // NULL means this leak checker is a noop
+
+  // Snapshot taken when the checker was created.  May be NULL
+  // for the global heap checker object.  We use void* instead of
+  // HeapProfileTable::Snapshot* to avoid including heap-profile-table.h.
+  void* start_snapshot_;
+
+  bool has_checked_;  // if we have done the leak check, so these are ready:
+  ssize_t inuse_bytes_increase_;  // bytes-in-use increase for this checker
+  ssize_t inuse_allocs_increase_;  // allocations-in-use increase
+                                   // for this checker
+  bool keep_profiles_;  // iff we should keep the heap profiles we've made
+
+  // ----------------------------------------------------------------------- //
+
+  // Disallow "evil" constructors.
+  HeapLeakChecker(const HeapLeakChecker&);
+  void operator=(const HeapLeakChecker&);
+};
+
+
+// Holds a pointer that will not be traversed by the heap checker.
+// Contrast with HeapLeakChecker::IgnoreObject(o), in which o and
+// all objects reachable from o are ignored by the heap checker.
+template <class T>
+class HiddenPointer {
+ public:
+  explicit HiddenPointer(T* t)
+      : masked_t_(reinterpret_cast<uintptr_t>(t) ^ kHideMask) {
+  }
+  // Returns unhidden pointer.  Be careful where you save the result.
+  T* get() const { return reinterpret_cast<T*>(masked_t_ ^ kHideMask); }
+
+ private:
+  // Arbitrary value, but not such that xor'ing with it is likely
+  // to map one valid pointer to another valid pointer:
+  static const uintptr_t kHideMask =
+      static_cast<uintptr_t>(0xF03A5F7BF03A5F7Bll);
+  uintptr_t masked_t_;
+};
+
+// A class that exists solely to run its destructor.  This class should not be
+// used directly, but instead by the REGISTER_HEAPCHECK_CLEANUP macro below.
+class PERFTOOLS_DLL_DECL HeapCleaner {
+ public:
+  typedef void (*void_function)(void);
+  HeapCleaner(void_function f);
+  static void RunHeapCleanups();
+ private:
+  static std::vector<void_function>* heap_cleanups_;
+};
+
+// A macro to declare module heap check cleanup tasks
+// (they run only if we are doing heap leak checking.)
+// 'body' should be the cleanup code to run.  'name' doesn't matter,
+// but must be unique amongst all REGISTER_HEAPCHECK_CLEANUP calls.
+#define REGISTER_HEAPCHECK_CLEANUP(name, body)  \
+  namespace { \
+  void heapcheck_cleanup_##name() { body; } \
+  static HeapCleaner heapcheck_cleaner_##name(&heapcheck_cleanup_##name); \
+  }
+
+#endif  // BASE_HEAP_CHECKER_H_
diff --git a/third_party/tcmalloc/vendor/src/gperftools/heap-profiler.h b/third_party/tcmalloc/vendor/src/gperftools/heap-profiler.h
new file mode 100644
index 0000000..57cb97a
--- /dev/null
+++ b/third_party/tcmalloc/vendor/src/gperftools/heap-profiler.h
@@ -0,0 +1,104 @@
+/* Copyright (c) 2005, Google Inc.
+ * All rights reserved.
+ * 
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ * 
+ *     * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ *     * Neither the name of Google Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * ---
+ * Author: Sanjay Ghemawat
+ *
+ * Module for heap-profiling.
+ *
+ * For full(er) information, see doc/heapprofile.html
+ *
+ * This module can be linked into your program with
+ * no slowdown caused by this unless you activate the profiler
+ * using one of the following methods:
+ *
+ *    1. Before starting the program, set the environment variable
+ *       "HEAPPROFILE" to be the name of the file to which the profile
+ *       data should be written.
+ *
+ *    2. Programmatically, start and stop the profiler using the
+ *       routines "HeapProfilerStart(filename)" and "HeapProfilerStop()".
+ *
+ */
+
+#ifndef BASE_HEAP_PROFILER_H_
+#define BASE_HEAP_PROFILER_H_
+
+#include <stddef.h>
+
+/* Annoying stuff for windows; makes sure clients can import these functions */
+#ifndef PERFTOOLS_DLL_DECL
+# ifdef _WIN32
+#   define PERFTOOLS_DLL_DECL  __declspec(dllimport)
+# else
+#   define PERFTOOLS_DLL_DECL
+# endif
+#endif
+
+/* All this code should be usable from within C apps. */
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Start profiling and arrange to write profile data to file names
+ * of the form: "prefix.0000", "prefix.0001", ...
+ */
+PERFTOOLS_DLL_DECL void HeapProfilerStart(const char* prefix);
+
+/* Returns non-zero if we are currently profiling the heap.  (Returns
+ * an int rather than a bool so it's usable from C.)  This is true
+ * between calls to HeapProfilerStart() and HeapProfilerStop(), and
+ * also if the program has been run with HEAPPROFILER, or some other
+ * way to turn on whole-program profiling.
+ */
+int IsHeapProfilerRunning();
+
+/* Stop heap profiling.  Can be restarted again with HeapProfilerStart(),
+ * but the currently accumulated profiling information will be cleared.
+ */
+PERFTOOLS_DLL_DECL void HeapProfilerStop();
+
+/* Dump a profile now - can be used for dumping at a hopefully
+ * quiescent state in your program, in order to more easily track down
+ * memory leaks. Will include the reason in the logged message
+ */
+PERFTOOLS_DLL_DECL void HeapProfilerDump(const char *reason);
+
+/* Generate current heap profiling information.
+ * Returns an empty string when heap profiling is not active.
+ * The returned pointer is a '\0'-terminated string allocated using malloc()
+ * and should be free()-ed as soon as the caller does not need it anymore.
+ */
+PERFTOOLS_DLL_DECL char* GetHeapProfile();
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  /* BASE_HEAP_PROFILER_H_ */
diff --git a/third_party/tcmalloc/vendor/src/gperftools/malloc_extension.h b/third_party/tcmalloc/vendor/src/gperftools/malloc_extension.h
new file mode 100644
index 0000000..b180115
--- /dev/null
+++ b/third_party/tcmalloc/vendor/src/gperftools/malloc_extension.h
@@ -0,0 +1,420 @@
+// Copyright (c) 2005, Google Inc.
+// All rights reserved.
+// 
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+// 
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+// 
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// ---
+// Author: Sanjay Ghemawat <opensource@google.com>
+//
+// Extra extensions exported by some malloc implementations.  These
+// extensions are accessed through a virtual base class so an
+// application can link against a malloc that does not implement these
+// extensions, and it will get default versions that do nothing.
+//
+// NOTE FOR C USERS: If you wish to use this functionality from within
+// a C program, see malloc_extension_c.h.
+
+#ifndef BASE_MALLOC_EXTENSION_H_
+#define BASE_MALLOC_EXTENSION_H_
+
+#include <stddef.h>
+// I can't #include config.h in this public API file, but I should
+// really use configure (and make malloc_extension.h a .in file) to
+// figure out if the system has stdint.h or not.  But I'm lazy, so
+// for now I'm assuming it's a problem only with MSVC.
+#ifndef _MSC_VER
+#include <stdint.h>
+#endif
+#include <string>
+#include <vector>
+
+// Annoying stuff for windows -- makes sure clients can import these functions
+#ifndef PERFTOOLS_DLL_DECL
+# ifdef _WIN32
+#   define PERFTOOLS_DLL_DECL  __declspec(dllimport)
+# else
+#   define PERFTOOLS_DLL_DECL
+# endif
+#endif
+
+static const int kMallocHistogramSize = 64;
+
+// One day, we could support other types of writers (perhaps for C?)
+typedef std::string MallocExtensionWriter;
+
+namespace base {
+struct MallocRange;
+}
+
+// Interface to a pluggable system allocator.
+class SysAllocator {
+ public:
+  SysAllocator() {
+  }
+  virtual ~SysAllocator();
+
+  // Allocates "size"-byte of memory from system aligned with "alignment".
+  // Returns NULL if failed. Otherwise, the returned pointer p up to and
+  // including (p + actual_size -1) have been allocated.
+  virtual void* Alloc(size_t size, size_t *actual_size, size_t alignment) = 0;
+};
+
+// The default implementations of the following routines do nothing.
+// All implementations should be thread-safe; the current one
+// (TCMallocImplementation) is.
+class PERFTOOLS_DLL_DECL MallocExtension {
+ public:
+  virtual ~MallocExtension();
+
+  // Call this very early in the program execution -- say, in a global
+  // constructor -- to set up parameters and state needed by all
+  // instrumented malloc implemenatations.  One example: this routine
+  // sets environemnt variables to tell STL to use libc's malloc()
+  // instead of doing its own memory management.  This is safe to call
+  // multiple times, as long as each time is before threads start up.
+  static void Initialize();
+
+  // See "verify_memory.h" to see what these routines do
+  virtual bool VerifyAllMemory();
+  virtual bool VerifyNewMemory(const void* p);
+  virtual bool VerifyArrayNewMemory(const void* p);
+  virtual bool VerifyMallocMemory(const void* p);
+  virtual bool MallocMemoryStats(int* blocks, size_t* total,
+                                 int histogram[kMallocHistogramSize]);
+
+  // Get a human readable description of the current state of the malloc
+  // data structures.  The state is stored as a null-terminated string
+  // in a prefix of "buffer[0,buffer_length-1]".
+  // REQUIRES: buffer_length > 0.
+  virtual void GetStats(char* buffer, int buffer_length);
+
+  // Outputs to "writer" a sample of live objects and the stack traces
+  // that allocated these objects.  The format of the returned output
+  // is equivalent to the output of the heap profiler and can
+  // therefore be passed to "pprof". This function is equivalent to
+  // ReadStackTraces. The main difference is that this function returns
+  // serialized data appropriately formatted for use by the pprof tool.
+  // NOTE: by default, tcmalloc does not do any heap sampling, and this
+  //       function will always return an empty sample.  To get useful
+  //       data from GetHeapSample, you must also set the environment
+  //       variable TCMALLOC_SAMPLE_PARAMETER to a value such as 524288.
+  virtual void GetHeapSample(MallocExtensionWriter* writer);
+
+  // Outputs to "writer" the stack traces that caused growth in the
+  // address space size.  The format of the returned output is
+  // equivalent to the output of the heap profiler and can therefore
+  // be passed to "pprof". This function is equivalent to
+  // ReadHeapGrowthStackTraces. The main difference is that this function
+  // returns serialized data appropriately formatted for use by the
+  // pprof tool.  (This does not depend on, or require,
+  // TCMALLOC_SAMPLE_PARAMETER.)
+  virtual void GetHeapGrowthStacks(MallocExtensionWriter* writer);
+
+  // Invokes func(arg, range) for every controlled memory
+  // range.  *range is filled in with information about the range.
+  //
+  // This is a best-effort interface useful only for performance
+  // analysis.  The implementation may not call func at all.
+  typedef void (RangeFunction)(void*, const base::MallocRange*);
+  virtual void Ranges(void* arg, RangeFunction func);
+
+  // -------------------------------------------------------------------
+  // Control operations for getting and setting malloc implementation
+  // specific parameters.  Some currently useful properties:
+  //
+  // generic
+  // -------
+  // "generic.current_allocated_bytes"
+  //      Number of bytes currently allocated by application
+  //      This property is not writable.
+  //
+  // "generic.heap_size"
+  //      Number of bytes in the heap ==
+  //            current_allocated_bytes +
+  //            fragmentation +
+  //            freed memory regions
+  //      This property is not writable.
+  //
+  // tcmalloc
+  // --------
+  // "tcmalloc.max_total_thread_cache_bytes"
+  //      Upper limit on total number of bytes stored across all
+  //      per-thread caches.  Default: 16MB.
+  //
+  // "tcmalloc.current_total_thread_cache_bytes"
+  //      Number of bytes used across all thread caches.
+  //      This property is not writable.
+  //
+  // "tcmalloc.central_cache_free_bytes"
+  //      Number of free bytes in the central cache that have been
+  //      assigned to size classes. They always count towards virtual
+  //      memory usage, and unless the underlying memory is swapped out
+  //      by the OS, they also count towards physical memory usage.
+  //      This property is not writable.
+  //
+  // "tcmalloc.transfer_cache_free_bytes"
+  //      Number of free bytes that are waiting to be transfered between
+  //      the central cache and a thread cache. They always count
+  //      towards virtual memory usage, and unless the underlying memory
+  //      is swapped out by the OS, they also count towards physical
+  //      memory usage. This property is not writable.
+  //
+  // "tcmalloc.thread_cache_free_bytes"
+  //      Number of free bytes in thread caches. They always count
+  //      towards virtual memory usage, and unless the underlying memory
+  //      is swapped out by the OS, they also count towards physical
+  //      memory usage. This property is not writable.
+  //
+  // "tcmalloc.pageheap_free_bytes"
+  //      Number of bytes in free, mapped pages in page heap.  These
+  //      bytes can be used to fulfill allocation requests.  They
+  //      always count towards virtual memory usage, and unless the
+  //      underlying memory is swapped out by the OS, they also count
+  //      towards physical memory usage.  This property is not writable.
+  //
+  // "tcmalloc.pageheap_unmapped_bytes"
+  //        Number of bytes in free, unmapped pages in page heap.
+  //        These are bytes that have been released back to the OS,
+  //        possibly by one of the MallocExtension "Release" calls.
+  //        They can be used to fulfill allocation requests, but
+  //        typically incur a page fault.  They always count towards
+  //        virtual memory usage, and depending on the OS, typically
+  //        do not count towards physical memory usage.  This property
+  //        is not writable.
+  // -------------------------------------------------------------------
+
+  // Get the named "property"'s value.  Returns true if the property
+  // is known.  Returns false if the property is not a valid property
+  // name for the current malloc implementation.
+  // REQUIRES: property != NULL; value != NULL
+  virtual bool GetNumericProperty(const char* property, size_t* value);
+
+  // Set the named "property"'s value.  Returns true if the property
+  // is known and writable.  Returns false if the property is not a
+  // valid property name for the current malloc implementation, or
+  // is not writable.
+  // REQUIRES: property != NULL
+  virtual bool SetNumericProperty(const char* property, size_t value);
+
+  // Mark the current thread as "idle".  This routine may optionally
+  // be called by threads as a hint to the malloc implementation that
+  // any thread-specific resources should be released.  Note: this may
+  // be an expensive routine, so it should not be called too often.
+  //
+  // Also, if the code that calls this routine will go to sleep for
+  // a while, it should take care to not allocate anything between
+  // the call to this routine and the beginning of the sleep.
+  //
+  // Most malloc implementations ignore this routine.
+  virtual void MarkThreadIdle();
+
+  // Mark the current thread as "busy".  This routine should be
+  // called after MarkThreadIdle() if the thread will now do more
+  // work.  If this method is not called, performance may suffer.
+  //
+  // Most malloc implementations ignore this routine.
+  virtual void MarkThreadBusy();
+
+  // Gets the system allocator used by the malloc extension instance. Returns
+  // NULL for malloc implementations that do not support pluggable system
+  // allocators.
+  virtual SysAllocator* GetSystemAllocator();
+
+  // Sets the system allocator to the specified.
+  //
+  // Users could register their own system allocators for malloc implementation
+  // that supports pluggable system allocators, such as TCMalloc, by doing:
+  //   alloc = new MyOwnSysAllocator();
+  //   MallocExtension::instance()->SetSystemAllocator(alloc);
+  // It's up to users whether to fall back (recommended) to the default
+  // system allocator (use GetSystemAllocator() above) or not. The caller is
+  // responsible to any necessary locking.
+  // See tcmalloc/system-alloc.h for the interface and
+  //     tcmalloc/memfs_malloc.cc for the examples.
+  //
+  // It's a no-op for malloc implementations that do not support pluggable
+  // system allocators.
+  virtual void SetSystemAllocator(SysAllocator *a);
+
+  // Try to release num_bytes of free memory back to the operating
+  // system for reuse.  Use this extension with caution -- to get this
+  // memory back may require faulting pages back in by the OS, and
+  // that may be slow.  (Currently only implemented in tcmalloc.)
+  virtual void ReleaseToSystem(size_t num_bytes);
+
+  // Same as ReleaseToSystem() but release as much memory as possible.
+  virtual void ReleaseFreeMemory();
+
+  // Sets the rate at which we release unused memory to the system.
+  // Zero means we never release memory back to the system.  Increase
+  // this flag to return memory faster; decrease it to return memory
+  // slower.  Reasonable rates are in the range [0,10].  (Currently
+  // only implemented in tcmalloc).
+  virtual void SetMemoryReleaseRate(double rate);
+
+  // Gets the release rate.  Returns a value < 0 if unknown.
+  virtual double GetMemoryReleaseRate();
+
+  // Returns the estimated number of bytes that will be allocated for
+  // a request of "size" bytes.  This is an estimate: an allocation of
+  // SIZE bytes may reserve more bytes, but will never reserve less.
+  // (Currently only implemented in tcmalloc, other implementations
+  // always return SIZE.)
+  // This is equivalent to malloc_good_size() in OS X.
+  virtual size_t GetEstimatedAllocatedSize(size_t size);
+
+  // Returns the actual number N of bytes reserved by tcmalloc for the
+  // pointer p.  The client is allowed to use the range of bytes
+  // [p, p+N) in any way it wishes (i.e. N is the "usable size" of this
+  // allocation).  This number may be equal to or greater than the number
+  // of bytes requested when p was allocated.
+  // p must have been allocated by this malloc implementation,
+  // must not be an interior pointer -- that is, must be exactly
+  // the pointer returned to by malloc() et al., not some offset
+  // from that -- and should not have been freed yet.  p may be NULL.
+  // (Currently only implemented in tcmalloc; other implementations
+  // will return 0.)
+  // This is equivalent to malloc_size() in OS X, malloc_usable_size()
+  // in glibc, and _msize() for windows.
+  virtual size_t GetAllocatedSize(const void* p);
+
+  // Returns kOwned if this malloc implementation allocated the memory
+  // pointed to by p, or kNotOwned if some other malloc implementation
+  // allocated it or p is NULL.  May also return kUnknownOwnership if
+  // the malloc implementation does not keep track of ownership.
+  // REQUIRES: p must be a value returned from a previous call to
+  // malloc(), calloc(), realloc(), memalign(), posix_memalign(),
+  // valloc(), pvalloc(), new, or new[], and must refer to memory that
+  // is currently allocated (so, for instance, you should not pass in
+  // a pointer after having called free() on it).
+  enum Ownership {
+    // NOTE: Enum values MUST be kept in sync with the version in
+    // malloc_extension_c.h
+    kUnknownOwnership = 0,
+    kOwned,
+    kNotOwned
+  };
+  virtual Ownership GetOwnership(const void* p);
+
+  // The current malloc implementation.  Always non-NULL.
+  static MallocExtension* instance();
+
+  // Change the malloc implementation.  Typically called by the
+  // malloc implementation during initialization.
+  static void Register(MallocExtension* implementation);
+
+  // Returns detailed information about malloc's freelists. For each list,
+  // return a FreeListInfo:
+  struct FreeListInfo {
+    size_t min_object_size;
+    size_t max_object_size;
+    size_t total_bytes_free;
+    const char* type;
+  };
+  // Each item in the vector refers to a different freelist. The lists
+  // are identified by the range of allocations that objects in the
+  // list can satisfy ([min_object_size, max_object_size]) and the
+  // type of freelist (see below). The current size of the list is
+  // returned in total_bytes_free (which count against a processes
+  // resident and virtual size).
+  //
+  // Currently supported types are:
+  //
+  // "tcmalloc.page{_unmapped}" - tcmalloc's page heap. An entry for each size
+  //          class in the page heap is returned. Bytes in "page_unmapped"
+  //          are no longer backed by physical memory and do not count against
+  //          the resident size of a process.
+  //
+  // "tcmalloc.large{_unmapped}" - tcmalloc's list of objects larger
+  //          than the largest page heap size class. Only one "large"
+  //          entry is returned. There is no upper-bound on the size
+  //          of objects in the large free list; this call returns
+  //          kint64max for max_object_size.  Bytes in
+  //          "large_unmapped" are no longer backed by physical memory
+  //          and do not count against the resident size of a process.
+  //
+  // "tcmalloc.central" - tcmalloc's central free-list. One entry per
+  //          size-class is returned. Never unmapped.
+  //
+  // "debug.free_queue" - free objects queued by the debug allocator
+  //                      and not returned to tcmalloc.
+  //
+  // "tcmalloc.thread" - tcmalloc's per-thread caches. Never unmapped.
+  virtual void GetFreeListSizes(std::vector<FreeListInfo>* v);
+
+  // Get a list of stack traces of sampled allocation points.  Returns
+  // a pointer to a "new[]-ed" result array, and stores the sample
+  // period in "sample_period".
+  //
+  // The state is stored as a sequence of adjacent entries
+  // in the returned array.  Each entry has the following form:
+  //    uintptr_t count;        // Number of objects with following trace
+  //    uintptr_t size;         // Total size of objects with following trace
+  //    uintptr_t depth;        // Number of PC values in stack trace
+  //    void*     stack[depth]; // PC values that form the stack trace
+  //
+  // The list of entries is terminated by a "count" of 0.
+  //
+  // It is the responsibility of the caller to "delete[]" the returned array.
+  //
+  // May return NULL to indicate no results.
+  //
+  // This is an internal extension.  Callers should use the more
+  // convenient "GetHeapSample(string*)" method defined above.
+  virtual void** ReadStackTraces(int* sample_period);
+
+  // Like ReadStackTraces(), but returns stack traces that caused growth
+  // in the address space size.
+  virtual void** ReadHeapGrowthStackTraces();
+};
+
+namespace base {
+
+// Information passed per range.  More fields may be added later.
+struct MallocRange {
+  enum Type {
+    INUSE,                // Application is using this range
+    FREE,                 // Range is currently free
+    UNMAPPED,             // Backing physical memory has been returned to the OS
+    UNKNOWN,
+    // More enum values may be added in the future
+  };
+
+  uintptr_t address;    // Address of range
+  size_t length;        // Byte length of range
+  Type type;            // Type of this range
+  double fraction;      // Fraction of range that is being used (0 if !INUSE)
+
+  // Perhaps add the following:
+  // - stack trace if this range was sampled
+  // - heap growth stack trace if applicable to this range
+  // - age when allocated (for inuse) or freed (if not in use)
+};
+
+} // namespace base
+
+#endif  // BASE_MALLOC_EXTENSION_H_
diff --git a/third_party/tcmalloc/vendor/src/gperftools/malloc_extension_c.h b/third_party/tcmalloc/vendor/src/gperftools/malloc_extension_c.h
new file mode 100644
index 0000000..72a0a7c
--- /dev/null
+++ b/third_party/tcmalloc/vendor/src/gperftools/malloc_extension_c.h
@@ -0,0 +1,99 @@
+/* Copyright (c) 2008, Google Inc.
+ * All rights reserved.
+ * 
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ * 
+ *     * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ *     * Neither the name of Google Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * --
+ * Author: Craig Silverstein
+ *
+ * C shims for the C++ malloc_extension.h.  See malloc_extension.h for
+ * details.  Note these C shims always work on
+ * MallocExtension::instance(); it is not possible to have more than
+ * one MallocExtension object in C applications.
+ */
+
+#ifndef _MALLOC_EXTENSION_C_H_
+#define _MALLOC_EXTENSION_C_H_
+
+#include <stddef.h>
+#include <sys/types.h>
+
+/* Annoying stuff for windows -- makes sure clients can import these fns */
+#ifndef PERFTOOLS_DLL_DECL
+# ifdef _WIN32
+#   define PERFTOOLS_DLL_DECL  __declspec(dllimport)
+# else
+#   define PERFTOOLS_DLL_DECL
+# endif
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define kMallocExtensionHistogramSize 64
+
+PERFTOOLS_DLL_DECL int MallocExtension_VerifyAllMemory(void);
+PERFTOOLS_DLL_DECL int MallocExtension_VerifyNewMemory(const void* p);
+PERFTOOLS_DLL_DECL int MallocExtension_VerifyArrayNewMemory(const void* p);
+PERFTOOLS_DLL_DECL int MallocExtension_VerifyMallocMemory(const void* p);
+PERFTOOLS_DLL_DECL int MallocExtension_MallocMemoryStats(int* blocks, size_t* total,
+                                      int histogram[kMallocExtensionHistogramSize]);
+PERFTOOLS_DLL_DECL void MallocExtension_GetStats(char* buffer, int buffer_length);
+
+/* TODO(csilvers): write a C version of these routines, that perhaps
+ * takes a function ptr and a void *.
+ */
+/* void MallocExtension_GetHeapSample(string* result); */
+/* void MallocExtension_GetHeapGrowthStacks(string* result); */
+
+PERFTOOLS_DLL_DECL int MallocExtension_GetNumericProperty(const char* property, size_t* value);
+PERFTOOLS_DLL_DECL int MallocExtension_SetNumericProperty(const char* property, size_t value);
+PERFTOOLS_DLL_DECL void MallocExtension_MarkThreadIdle(void);
+PERFTOOLS_DLL_DECL void MallocExtension_MarkThreadBusy(void);
+PERFTOOLS_DLL_DECL void MallocExtension_ReleaseToSystem(size_t num_bytes);
+PERFTOOLS_DLL_DECL void MallocExtension_ReleaseFreeMemory(void);
+PERFTOOLS_DLL_DECL size_t MallocExtension_GetEstimatedAllocatedSize(size_t size);
+PERFTOOLS_DLL_DECL size_t MallocExtension_GetAllocatedSize(const void* p);
+
+/*
+ * NOTE: These enum values MUST be kept in sync with the version in
+ *       malloc_extension.h
+ */
+typedef enum {
+  MallocExtension_kUnknownOwnership = 0,
+  MallocExtension_kOwned,
+  MallocExtension_kNotOwned
+} MallocExtension_Ownership;
+
+PERFTOOLS_DLL_DECL MallocExtension_Ownership MallocExtension_GetOwnership(const void* p);
+
+#ifdef __cplusplus
+}   // extern "C"
+#endif
+
+#endif /* _MALLOC_EXTENSION_C_H_ */
diff --git a/third_party/tcmalloc/vendor/src/gperftools/malloc_hook.h b/third_party/tcmalloc/vendor/src/gperftools/malloc_hook.h
new file mode 100644
index 0000000..b99c047
--- /dev/null
+++ b/third_party/tcmalloc/vendor/src/gperftools/malloc_hook.h
@@ -0,0 +1,358 @@
+// Copyright (c) 2005, Google Inc.
+// All rights reserved.
+// 
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+// 
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+// 
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// ---
+// Author: Sanjay Ghemawat
+//
+// Some of our malloc implementations can invoke the following hooks whenever
+// memory is allocated or deallocated.  MallocHook is thread-safe, and things
+// you do before calling AddFooHook(MyHook) are visible to any resulting calls
+// to MyHook.  Hooks must be thread-safe.  If you write:
+//
+//   CHECK(MallocHook::AddNewHook(&MyNewHook));
+//
+// MyNewHook will be invoked in subsequent calls in the current thread, but
+// there are no guarantees on when it might be invoked in other threads.
+//
+// There are a limited number of slots available for each hook type.  Add*Hook
+// will return false if there are no slots available.  Remove*Hook will return
+// false if the given hook was not already installed.
+//
+// The order in which individual hooks are called in Invoke*Hook is undefined.
+//
+// It is safe for a hook to remove itself within Invoke*Hook and add other
+// hooks.  Any hooks added inside a hook invocation (for the same hook type)
+// will not be invoked for the current invocation.
+//
+// One important user of these hooks is the heap profiler.
+//
+// CAVEAT: If you add new MallocHook::Invoke* calls then those calls must be
+// directly in the code of the (de)allocation function that is provided to the
+// user and that function must have an ATTRIBUTE_SECTION(malloc_hook) attribute.
+//
+// Note: the Invoke*Hook() functions are defined in malloc_hook-inl.h.  If you
+// need to invoke a hook (which you shouldn't unless you're part of tcmalloc),
+// be sure to #include malloc_hook-inl.h in addition to malloc_hook.h.
+//
+// NOTE FOR C USERS: If you want to use malloc_hook functionality from
+// a C program, #include malloc_hook_c.h instead of this file.
+
+#ifndef _MALLOC_HOOK_H_
+#define _MALLOC_HOOK_H_
+
+#include <stddef.h>
+#include <sys/types.h>
+extern "C" {
+#include <gperftools/malloc_hook_c.h>  // a C version of the malloc_hook interface
+}
+
+// Annoying stuff for windows -- makes sure clients can import these functions
+#ifndef PERFTOOLS_DLL_DECL
+# ifdef _WIN32
+#   define PERFTOOLS_DLL_DECL  __declspec(dllimport)
+# else
+#   define PERFTOOLS_DLL_DECL
+# endif
+#endif
+
+// The C++ methods below call the C version (MallocHook_*), and thus
+// convert between an int and a bool.  Windows complains about this
+// (a "performance warning") which we don't care about, so we suppress.
+#ifdef _MSC_VER
+#pragma warning(push)
+#pragma warning(disable:4800)
+#endif
+
+// Note: malloc_hook_c.h defines MallocHook_*Hook and
+// MallocHook_{Add,Remove}*Hook.  The version of these inside the MallocHook
+// class are defined in terms of the malloc_hook_c version.  See malloc_hook_c.h
+// for details of these types/functions.
+
+class PERFTOOLS_DLL_DECL MallocHook {
+ public:
+  // The NewHook is invoked whenever an object is allocated.
+  // It may be passed NULL if the allocator returned NULL.
+  typedef MallocHook_NewHook NewHook;
+  inline static bool AddNewHook(NewHook hook) {
+    return MallocHook_AddNewHook(hook);
+  }
+  inline static bool RemoveNewHook(NewHook hook) {
+    return MallocHook_RemoveNewHook(hook);
+  }
+  inline static void InvokeNewHook(const void* p, size_t s);
+
+  // The DeleteHook is invoked whenever an object is deallocated.
+  // It may be passed NULL if the caller is trying to delete NULL.
+  typedef MallocHook_DeleteHook DeleteHook;
+  inline static bool AddDeleteHook(DeleteHook hook) {
+    return MallocHook_AddDeleteHook(hook);
+  }
+  inline static bool RemoveDeleteHook(DeleteHook hook) {
+    return MallocHook_RemoveDeleteHook(hook);
+  }
+  inline static void InvokeDeleteHook(const void* p);
+
+  // The PreMmapHook is invoked with mmap or mmap64 arguments just
+  // before the call is actually made.  Such a hook may be useful
+  // in memory limited contexts, to catch allocations that will exceed
+  // a memory limit, and take outside actions to increase that limit.
+  typedef MallocHook_PreMmapHook PreMmapHook;
+  inline static bool AddPreMmapHook(PreMmapHook hook) {
+    return MallocHook_AddPreMmapHook(hook);
+  }
+  inline static bool RemovePreMmapHook(PreMmapHook hook) {
+    return MallocHook_RemovePreMmapHook(hook);
+  }
+  inline static void InvokePreMmapHook(const void* start,
+                                       size_t size,
+                                       int protection,
+                                       int flags,
+                                       int fd,
+                                       off_t offset);
+
+  // The MmapReplacement is invoked after the PreMmapHook but before
+  // the call is actually made. The MmapReplacement should return true
+  // if it handled the call, or false if it is still necessary to
+  // call mmap/mmap64.
+  // This should be used only by experts, and users must be be
+  // extremely careful to avoid recursive calls to mmap. The replacement
+  // should be async signal safe.
+  // Only one MmapReplacement is supported. After setting an MmapReplacement
+  // you must call RemoveMmapReplacement before calling SetMmapReplacement
+  // again.
+  typedef MallocHook_MmapReplacement MmapReplacement;
+  inline static bool SetMmapReplacement(MmapReplacement hook) {
+    return MallocHook_SetMmapReplacement(hook);
+  }
+  inline static bool RemoveMmapReplacement(MmapReplacement hook) {
+    return MallocHook_RemoveMmapReplacement(hook);
+  }
+  inline static bool InvokeMmapReplacement(const void* start,
+                                           size_t size,
+                                           int protection,
+                                           int flags,
+                                           int fd,
+                                           off_t offset,
+                                           void** result);
+
+
+  // The MmapHook is invoked whenever a region of memory is mapped.
+  // It may be passed MAP_FAILED if the mmap failed.
+  typedef MallocHook_MmapHook MmapHook;
+  inline static bool AddMmapHook(MmapHook hook) {
+    return MallocHook_AddMmapHook(hook);
+  }
+  inline static bool RemoveMmapHook(MmapHook hook) {
+    return MallocHook_RemoveMmapHook(hook);
+  }
+  inline static void InvokeMmapHook(const void* result,
+                                    const void* start,
+                                    size_t size,
+                                    int protection,
+                                    int flags,
+                                    int fd,
+                                    off_t offset);
+
+  // The MunmapReplacement is invoked with munmap arguments just before
+  // the call is actually made. The MunmapReplacement should return true
+  // if it handled the call, or false if it is still necessary to
+  // call munmap.
+  // This should be used only by experts. The replacement should be
+  // async signal safe.
+  // Only one MunmapReplacement is supported. After setting an
+  // MunmapReplacement you must call RemoveMunmapReplacement before
+  // calling SetMunmapReplacement again.
+  typedef MallocHook_MunmapReplacement MunmapReplacement;
+  inline static bool SetMunmapReplacement(MunmapReplacement hook) {
+    return MallocHook_SetMunmapReplacement(hook);
+  }
+  inline static bool RemoveMunmapReplacement(MunmapReplacement hook) {
+    return MallocHook_RemoveMunmapReplacement(hook);
+  }
+  inline static bool InvokeMunmapReplacement(const void* p,
+                                             size_t size,
+                                             int* result);
+
+  // The MunmapHook is invoked whenever a region of memory is unmapped.
+  typedef MallocHook_MunmapHook MunmapHook;
+  inline static bool AddMunmapHook(MunmapHook hook) {
+    return MallocHook_AddMunmapHook(hook);
+  }
+  inline static bool RemoveMunmapHook(MunmapHook hook) {
+    return MallocHook_RemoveMunmapHook(hook);
+  }
+  inline static void InvokeMunmapHook(const void* p, size_t size);
+
+  // The MremapHook is invoked whenever a region of memory is remapped.
+  typedef MallocHook_MremapHook MremapHook;
+  inline static bool AddMremapHook(MremapHook hook) {
+    return MallocHook_AddMremapHook(hook);
+  }
+  inline static bool RemoveMremapHook(MremapHook hook) {
+    return MallocHook_RemoveMremapHook(hook);
+  }
+  inline static void InvokeMremapHook(const void* result,
+                                      const void* old_addr,
+                                      size_t old_size,
+                                      size_t new_size,
+                                      int flags,
+                                      const void* new_addr);
+
+  // The PreSbrkHook is invoked just before sbrk is called -- except when
+  // the increment is 0.  This is because sbrk(0) is often called
+  // to get the top of the memory stack, and is not actually a
+  // memory-allocation call.  It may be useful in memory-limited contexts,
+  // to catch allocations that will exceed the limit and take outside
+  // actions to increase such a limit.
+  typedef MallocHook_PreSbrkHook PreSbrkHook;
+  inline static bool AddPreSbrkHook(PreSbrkHook hook) {
+    return MallocHook_AddPreSbrkHook(hook);
+  }
+  inline static bool RemovePreSbrkHook(PreSbrkHook hook) {
+    return MallocHook_RemovePreSbrkHook(hook);
+  }
+  inline static void InvokePreSbrkHook(ptrdiff_t increment);
+
+  // The SbrkHook is invoked whenever sbrk is called -- except when
+  // the increment is 0.  This is because sbrk(0) is often called
+  // to get the top of the memory stack, and is not actually a
+  // memory-allocation call.
+  typedef MallocHook_SbrkHook SbrkHook;
+  inline static bool AddSbrkHook(SbrkHook hook) {
+    return MallocHook_AddSbrkHook(hook);
+  }
+  inline static bool RemoveSbrkHook(SbrkHook hook) {
+    return MallocHook_RemoveSbrkHook(hook);
+  }
+  inline static void InvokeSbrkHook(const void* result, ptrdiff_t increment);
+
+  // Get the current stack trace.  Try to skip all routines up to and
+  // and including the caller of MallocHook::Invoke*.
+  // Use "skip_count" (similarly to GetStackTrace from stacktrace.h)
+  // as a hint about how many routines to skip if better information
+  // is not available.
+  inline static int GetCallerStackTrace(void** result, int max_depth,
+                                        int skip_count) {
+    return MallocHook_GetCallerStackTrace(result, max_depth, skip_count);
+  }
+
+  // Unhooked versions of mmap() and munmap().   These should be used
+  // only by experts, since they bypass heapchecking, etc.
+  // Note: These do not run hooks, but they still use the MmapReplacement
+  // and MunmapReplacement.
+  static void* UnhookedMMap(void *start, size_t length, int prot, int flags,
+                            int fd, off_t offset);
+  static int UnhookedMUnmap(void *start, size_t length);
+
+  // The following are DEPRECATED.
+  inline static NewHook GetNewHook();
+  inline static NewHook SetNewHook(NewHook hook) {
+    return MallocHook_SetNewHook(hook);
+  }
+
+  inline static DeleteHook GetDeleteHook();
+  inline static DeleteHook SetDeleteHook(DeleteHook hook) {
+    return MallocHook_SetDeleteHook(hook);
+  }
+
+  inline static PreMmapHook GetPreMmapHook();
+  inline static PreMmapHook SetPreMmapHook(PreMmapHook hook) {
+    return MallocHook_SetPreMmapHook(hook);
+  }
+
+  inline static MmapHook GetMmapHook();
+  inline static MmapHook SetMmapHook(MmapHook hook) {
+    return MallocHook_SetMmapHook(hook);
+  }
+
+  inline static MunmapHook GetMunmapHook();
+  inline static MunmapHook SetMunmapHook(MunmapHook hook) {
+    return MallocHook_SetMunmapHook(hook);
+  }
+
+  inline static MremapHook GetMremapHook();
+  inline static MremapHook SetMremapHook(MremapHook hook) {
+    return MallocHook_SetMremapHook(hook);
+  }
+
+  inline static PreSbrkHook GetPreSbrkHook();
+  inline static PreSbrkHook SetPreSbrkHook(PreSbrkHook hook) {
+    return MallocHook_SetPreSbrkHook(hook);
+  }
+
+  inline static SbrkHook GetSbrkHook();
+  inline static SbrkHook SetSbrkHook(SbrkHook hook) {
+    return MallocHook_SetSbrkHook(hook);
+  }
+  // End of DEPRECATED methods.
+
+ private:
+  // Slow path versions of Invoke*Hook.
+  static void InvokeNewHookSlow(const void* p, size_t s);
+  static void InvokeDeleteHookSlow(const void* p);
+  static void InvokePreMmapHookSlow(const void* start,
+                                    size_t size,
+                                    int protection,
+                                    int flags,
+                                    int fd,
+                                    off_t offset);
+  static void InvokeMmapHookSlow(const void* result,
+                                 const void* start,
+                                 size_t size,
+                                 int protection,
+                                 int flags,
+                                 int fd,
+                                 off_t offset);
+  static bool InvokeMmapReplacementSlow(const void* start,
+                                        size_t size,
+                                        int protection,
+                                        int flags,
+                                        int fd,
+                                        off_t offset,
+                                        void** result);
+  static void InvokeMunmapHookSlow(const void* p, size_t size);
+  static bool InvokeMunmapReplacementSlow(const void* p,
+                                          size_t size,
+                                          int* result);
+  static void InvokeMremapHookSlow(const void* result,
+                                   const void* old_addr,
+                                   size_t old_size,
+                                   size_t new_size,
+                                   int flags,
+                                   const void* new_addr);
+  static void InvokePreSbrkHookSlow(ptrdiff_t increment);
+  static void InvokeSbrkHookSlow(const void* result, ptrdiff_t increment);
+};
+
+#ifdef _MSC_VER
+#pragma warning(pop)
+#endif
+
+
+#endif /* _MALLOC_HOOK_H_ */
diff --git a/third_party/tcmalloc/vendor/src/gperftools/malloc_hook_c.h b/third_party/tcmalloc/vendor/src/gperftools/malloc_hook_c.h
new file mode 100644
index 0000000..56337e1
--- /dev/null
+++ b/third_party/tcmalloc/vendor/src/gperftools/malloc_hook_c.h
@@ -0,0 +1,173 @@
+/* Copyright (c) 2008, Google Inc.
+ * All rights reserved.
+ * 
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ * 
+ *     * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ *     * Neither the name of Google Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * --
+ * Author: Craig Silverstein
+ *
+ * C shims for the C++ malloc_hook.h.  See malloc_hook.h for details
+ * on how to use these.
+ */
+
+#ifndef _MALLOC_HOOK_C_H_
+#define _MALLOC_HOOK_C_H_
+
+#include <stddef.h>
+#include <sys/types.h>
+
+/* Annoying stuff for windows; makes sure clients can import these functions */
+#ifndef PERFTOOLS_DLL_DECL
+# ifdef _WIN32
+#   define PERFTOOLS_DLL_DECL  __declspec(dllimport)
+# else
+#   define PERFTOOLS_DLL_DECL
+# endif
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Get the current stack trace.  Try to skip all routines up to and
+ * and including the caller of MallocHook::Invoke*.
+ * Use "skip_count" (similarly to GetStackTrace from stacktrace.h)
+ * as a hint about how many routines to skip if better information
+ * is not available.
+ */
+PERFTOOLS_DLL_DECL
+int MallocHook_GetCallerStackTrace(void** result, int max_depth,
+                                   int skip_count);
+
+/* The MallocHook_{Add,Remove}*Hook functions return 1 on success and 0 on
+ * failure.
+ */
+
+typedef void (*MallocHook_NewHook)(const void* ptr, size_t size);
+PERFTOOLS_DLL_DECL
+int MallocHook_AddNewHook(MallocHook_NewHook hook);
+PERFTOOLS_DLL_DECL
+int MallocHook_RemoveNewHook(MallocHook_NewHook hook);
+
+typedef void (*MallocHook_DeleteHook)(const void* ptr);
+PERFTOOLS_DLL_DECL
+int MallocHook_AddDeleteHook(MallocHook_DeleteHook hook);
+PERFTOOLS_DLL_DECL
+int MallocHook_RemoveDeleteHook(MallocHook_DeleteHook hook);
+
+typedef void (*MallocHook_PreMmapHook)(const void *start,
+                                       size_t size,
+                                       int protection,
+                                       int flags,
+                                       int fd,
+                                       off_t offset);
+PERFTOOLS_DLL_DECL
+int MallocHook_AddPreMmapHook(MallocHook_PreMmapHook hook);
+PERFTOOLS_DLL_DECL
+int MallocHook_RemovePreMmapHook(MallocHook_PreMmapHook hook);
+
+typedef void (*MallocHook_MmapHook)(const void* result,
+                                    const void* start,
+                                    size_t size,
+                                    int protection,
+                                    int flags,
+                                    int fd,
+                                    off_t offset);
+PERFTOOLS_DLL_DECL
+int MallocHook_AddMmapHook(MallocHook_MmapHook hook);
+PERFTOOLS_DLL_DECL
+int MallocHook_RemoveMmapHook(MallocHook_MmapHook hook);
+
+typedef int (*MallocHook_MmapReplacement)(const void* start,
+                                          size_t size,
+                                          int protection,
+                                          int flags,
+                                          int fd,
+                                          off_t offset,
+                                          void** result);
+int MallocHook_SetMmapReplacement(MallocHook_MmapReplacement hook);
+int MallocHook_RemoveMmapReplacement(MallocHook_MmapReplacement hook);
+
+typedef void (*MallocHook_MunmapHook)(const void* ptr, size_t size);
+PERFTOOLS_DLL_DECL
+int MallocHook_AddMunmapHook(MallocHook_MunmapHook hook);
+PERFTOOLS_DLL_DECL
+int MallocHook_RemoveMunmapHook(MallocHook_MunmapHook hook);
+
+typedef int (*MallocHook_MunmapReplacement)(const void* ptr,
+                                            size_t size,
+                                            int* result);
+int MallocHook_SetMunmapReplacement(MallocHook_MunmapReplacement hook);
+int MallocHook_RemoveMunmapReplacement(MallocHook_MunmapReplacement hook);
+
+typedef void (*MallocHook_MremapHook)(const void* result,
+                                      const void* old_addr,
+                                      size_t old_size,
+                                      size_t new_size,
+                                      int flags,
+                                      const void* new_addr);
+PERFTOOLS_DLL_DECL
+int MallocHook_AddMremapHook(MallocHook_MremapHook hook);
+PERFTOOLS_DLL_DECL
+int MallocHook_RemoveMremapHook(MallocHook_MremapHook hook);
+
+typedef void (*MallocHook_PreSbrkHook)(ptrdiff_t increment);
+PERFTOOLS_DLL_DECL
+int MallocHook_AddPreSbrkHook(MallocHook_PreSbrkHook hook);
+PERFTOOLS_DLL_DECL
+int MallocHook_RemovePreSbrkHook(MallocHook_PreSbrkHook hook);
+
+typedef void (*MallocHook_SbrkHook)(const void* result, ptrdiff_t increment);
+PERFTOOLS_DLL_DECL
+int MallocHook_AddSbrkHook(MallocHook_SbrkHook hook);
+PERFTOOLS_DLL_DECL
+int MallocHook_RemoveSbrkHook(MallocHook_SbrkHook hook);
+
+/* The following are DEPRECATED. */
+PERFTOOLS_DLL_DECL
+MallocHook_NewHook MallocHook_SetNewHook(MallocHook_NewHook hook);
+PERFTOOLS_DLL_DECL
+MallocHook_DeleteHook MallocHook_SetDeleteHook(MallocHook_DeleteHook hook);
+PERFTOOLS_DLL_DECL
+MallocHook_PreMmapHook MallocHook_SetPreMmapHook(MallocHook_PreMmapHook hook);
+PERFTOOLS_DLL_DECL
+MallocHook_MmapHook MallocHook_SetMmapHook(MallocHook_MmapHook hook);
+PERFTOOLS_DLL_DECL
+MallocHook_MunmapHook MallocHook_SetMunmapHook(MallocHook_MunmapHook hook);
+PERFTOOLS_DLL_DECL
+MallocHook_MremapHook MallocHook_SetMremapHook(MallocHook_MremapHook hook);
+PERFTOOLS_DLL_DECL
+MallocHook_PreSbrkHook MallocHook_SetPreSbrkHook(MallocHook_PreSbrkHook hook);
+PERFTOOLS_DLL_DECL
+MallocHook_SbrkHook MallocHook_SetSbrkHook(MallocHook_SbrkHook hook);
+/* End of DEPRECATED functions. */
+
+#ifdef __cplusplus
+}   // extern "C"
+#endif
+
+#endif /* _MALLOC_HOOK_C_H_ */
diff --git a/third_party/tcmalloc/vendor/src/gperftools/profiler.h b/third_party/tcmalloc/vendor/src/gperftools/profiler.h
new file mode 100644
index 0000000..7971e04
--- /dev/null
+++ b/third_party/tcmalloc/vendor/src/gperftools/profiler.h
@@ -0,0 +1,168 @@
+/* Copyright (c) 2005, Google Inc.
+ * All rights reserved.
+ * 
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ * 
+ *     * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ *     * Neither the name of Google Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * ---
+ * Author: Sanjay Ghemawat
+ *
+ * Module for CPU profiling based on periodic pc-sampling.
+ *
+ * For full(er) information, see doc/cpuprofile.html
+ *
+ * This module is linked into your program with
+ * no slowdown caused by this unless you activate the profiler
+ * using one of the following methods:
+ *
+ *    1. Before starting the program, set the environment variable
+ *       "PROFILE" to be the name of the file to which the profile
+ *       data should be written.
+ *
+ *    2. Programmatically, start and stop the profiler using the
+ *       routines "ProfilerStart(filename)" and "ProfilerStop()".
+ *
+ *
+ * (Note: if using linux 2.4 or earlier, only the main thread may be
+ * profiled.)
+ *
+ * Use pprof to view the resulting profile output.
+ *    % pprof <path_to_executable> <profile_file_name>
+ *    % pprof --gv  <path_to_executable> <profile_file_name>
+ *
+ * These functions are thread-safe.
+ */
+
+#ifndef BASE_PROFILER_H_
+#define BASE_PROFILER_H_
+
+#include <time.h>       /* For time_t */
+
+/* Annoying stuff for windows; makes sure clients can import these functions */
+#ifndef PERFTOOLS_DLL_DECL
+# ifdef _WIN32
+#   define PERFTOOLS_DLL_DECL  __declspec(dllimport)
+# else
+#   define PERFTOOLS_DLL_DECL
+# endif
+#endif
+
+/* All this code should be usable from within C apps. */
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Profiler options, for use with ProfilerStartWithOptions.  To use:
+ *
+ *   struct ProfilerOptions options;
+ *   memset(&options, 0, sizeof options);
+ *
+ * then fill in fields as needed.
+ *
+ * This structure is intended to be usable from C code, so no constructor
+ * is provided to initialize it.  (Use memset as described above).
+ */
+struct ProfilerOptions {
+  /* Filter function and argument.
+   *
+   * If filter_in_thread is not NULL, when a profiling tick is delivered
+   * the profiler will call:
+   *
+   *   (*filter_in_thread)(filter_in_thread_arg)
+   *
+   * If it returns nonzero, the sample will be included in the profile.
+   * Note that filter_in_thread runs in a signal handler, so must be
+   * async-signal-safe.
+   *
+   * A typical use would be to set up filter results for each thread
+   * in the system before starting the profiler, then to make
+   * filter_in_thread be a very simple function which retrieves those
+   * results in an async-signal-safe way.  Retrieval could be done
+   * using thread-specific data, or using a shared data structure that
+   * supports async-signal-safe lookups.
+   */
+  int (*filter_in_thread)(void *arg);
+  void *filter_in_thread_arg;
+};
+
+/* Start profiling and write profile info into fname, discarding any
+ * existing profiling data in that file.
+ *
+ * This is equivalent to calling ProfilerStartWithOptions(fname, NULL).
+ */
+PERFTOOLS_DLL_DECL int ProfilerStart(const char* fname);
+
+/* Start profiling and write profile into fname, discarding any
+ * existing profiling data in that file.
+ *
+ * The profiler is configured using the options given by 'options'.
+ * Options which are not specified are given default values.
+ *
+ * 'options' may be NULL, in which case all are given default values.
+ *
+ * Returns nonzero if profiling was started sucessfully, or zero else.
+ */
+PERFTOOLS_DLL_DECL int ProfilerStartWithOptions(
+    const char *fname, const struct ProfilerOptions *options);
+
+/* Stop profiling. Can be started again with ProfilerStart(), but
+ * the currently accumulated profiling data will be cleared.
+ */
+PERFTOOLS_DLL_DECL void ProfilerStop();
+
+/* Flush any currently buffered profiling state to the profile file.
+ * Has no effect if the profiler has not been started.
+ */
+PERFTOOLS_DLL_DECL void ProfilerFlush();
+
+
+/* DEPRECATED: these functions were used to enable/disable profiling
+ * in the current thread, but no longer do anything.
+ */
+PERFTOOLS_DLL_DECL void ProfilerEnable();
+PERFTOOLS_DLL_DECL void ProfilerDisable();
+
+/* Returns nonzero if profile is currently enabled, zero if it's not. */
+PERFTOOLS_DLL_DECL int ProfilingIsEnabledForAllThreads();
+
+/* Routine for registering new threads with the profiler.
+ */
+PERFTOOLS_DLL_DECL void ProfilerRegisterThread();
+
+/* Stores state about profiler's current status into "*state". */
+struct ProfilerState {
+  int    enabled;             /* Is profiling currently enabled? */
+  time_t start_time;          /* If enabled, when was profiling started? */
+  char   profile_name[1024];  /* Name of profile file being written, or '\0' */
+  int    samples_gathered;    /* Number of samples gathered so far (or 0) */
+};
+PERFTOOLS_DLL_DECL void ProfilerGetCurrentState(struct ProfilerState* state);
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  /* BASE_PROFILER_H_ */
diff --git a/third_party/tcmalloc/vendor/src/gperftools/stacktrace.h b/third_party/tcmalloc/vendor/src/gperftools/stacktrace.h
new file mode 100644
index 0000000..fd186d6
--- /dev/null
+++ b/third_party/tcmalloc/vendor/src/gperftools/stacktrace.h
@@ -0,0 +1,116 @@
+// Copyright (c) 2005, Google Inc.
+// All rights reserved.
+// 
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+// 
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+// 
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// ---
+// Author: Sanjay Ghemawat
+//
+// Routines to extract the current stack trace.  These functions are
+// thread-safe.
+
+#ifndef GOOGLE_STACKTRACE_H_
+#define GOOGLE_STACKTRACE_H_
+
+// Annoying stuff for windows -- makes sure clients can import these functions
+#ifndef PERFTOOLS_DLL_DECL
+# ifdef _WIN32
+#   define PERFTOOLS_DLL_DECL  __declspec(dllimport)
+# else
+#   define PERFTOOLS_DLL_DECL
+# endif
+#endif
+
+
+// Skips the most recent "skip_count" stack frames (also skips the
+// frame generated for the "GetStackFrames" routine itself), and then
+// records the pc values for up to the next "max_depth" frames in
+// "result", and the corresponding stack frame sizes in "sizes".
+// Returns the number of values recorded in "result"/"sizes".
+//
+// Example:
+//      main() { foo(); }
+//      foo() { bar(); }
+//      bar() {
+//        void* result[10];
+//        int sizes[10];
+//        int depth = GetStackFrames(result, sizes, 10, 1);
+//      }
+//
+// The GetStackFrames call will skip the frame for "bar".  It will
+// return 2 and will produce pc values that map to the following
+// procedures:
+//      result[0]       foo
+//      result[1]       main
+// (Actually, there may be a few more entries after "main" to account for
+// startup procedures.)
+// And corresponding stack frame sizes will also be recorded:
+//    sizes[0]       16
+//    sizes[1]       16
+// (Stack frame sizes of 16 above are just for illustration purposes.)
+// Stack frame sizes of 0 or less indicate that those frame sizes couldn't
+// be identified.
+//
+// This routine may return fewer stack frame entries than are
+// available. Also note that "result" and "sizes" must both be non-NULL.
+extern PERFTOOLS_DLL_DECL int GetStackFrames(void** result, int* sizes, int max_depth,
+                          int skip_count);
+
+// Same as above, but to be used from a signal handler. The "uc" parameter
+// should be the pointer to ucontext_t which was passed as the 3rd parameter
+// to sa_sigaction signal handler. It may help the unwinder to get a
+// better stack trace under certain conditions. The "uc" may safely be NULL.
+extern PERFTOOLS_DLL_DECL int GetStackFramesWithContext(void** result, int* sizes, int max_depth,
+                                     int skip_count, const void *uc);
+
+// This is similar to the GetStackFrames routine, except that it returns
+// the stack trace only, and not the stack frame sizes as well.
+// Example:
+//      main() { foo(); }
+//      foo() { bar(); }
+//      bar() {
+//        void* result[10];
+//        int depth = GetStackTrace(result, 10, 1);
+//      }
+//
+// This produces:
+//      result[0]       foo
+//      result[1]       main
+//           ....       ...
+//
+// "result" must not be NULL.
+extern PERFTOOLS_DLL_DECL int GetStackTrace(void** result, int max_depth,
+                                            int skip_count);
+
+// Same as above, but to be used from a signal handler. The "uc" parameter
+// should be the pointer to ucontext_t which was passed as the 3rd parameter
+// to sa_sigaction signal handler. It may help the unwinder to get a
+// better stack trace under certain conditions. The "uc" may safely be NULL.
+extern PERFTOOLS_DLL_DECL int GetStackTraceWithContext(void** result, int max_depth,
+                                    int skip_count, const void *uc);
+
+#endif /* GOOGLE_STACKTRACE_H_ */
diff --git a/third_party/tcmalloc/vendor/src/google/tcmalloc.h.in b/third_party/tcmalloc/vendor/src/gperftools/tcmalloc.h.in
index c887559..dbca6ec 100644
--- a/third_party/tcmalloc/vendor/src/google/tcmalloc.h.in
+++ b/third_party/tcmalloc/vendor/src/gperftools/tcmalloc.h.in
@@ -51,7 +51,7 @@
 #define TC_VERSION_MAJOR  @TC_VERSION_MAJOR@
 #define TC_VERSION_MINOR  @TC_VERSION_MINOR@
 #define TC_VERSION_PATCH  "@TC_VERSION_PATCH@"
-#define TC_VERSION_STRING "google-perftools @TC_VERSION_MAJOR@.@TC_VERSION_MINOR@@TC_VERSION_PATCH@"
+#define TC_VERSION_STRING "gperftools @TC_VERSION_MAJOR@.@TC_VERSION_MINOR@@TC_VERSION_PATCH@"
 
 #include <stdlib.h>   // for struct mallinfo, if it's defined
 
diff --git a/third_party/tcmalloc/vendor/src/heap-checker-bcad.cc b/third_party/tcmalloc/vendor/src/heap-checker-bcad.cc
index 82a3109..7ed6942 100644
--- a/third_party/tcmalloc/vendor/src/heap-checker-bcad.cc
+++ b/third_party/tcmalloc/vendor/src/heap-checker-bcad.cc
@@ -45,7 +45,7 @@
 // the allocated object is reachable from global data and hence "live").
 
 #include <stdlib.h>      // for abort()
-#include <google/malloc_extension.h>
+#include <gperftools/malloc_extension.h>
 
 // A dummy variable to refer from heap-checker.cc.  This is to make
 // sure this file is not optimized out by the linker.
diff --git a/third_party/tcmalloc/vendor/src/heap-checker.cc b/third_party/tcmalloc/vendor/src/heap-checker.cc
index 8473739..5967b02 100644
--- a/third_party/tcmalloc/vendor/src/heap-checker.cc
+++ b/third_party/tcmalloc/vendor/src/heap-checker.cc
@@ -52,7 +52,7 @@
 #include <time.h>
 #include <assert.h>
 
-#if defined(HAVE_LINUX_PTRACE_H) && !defined(__native_client__)
+#if defined(HAVE_LINUX_PTRACE_H)
 #include <linux/ptrace.h>
 #endif
 #ifdef HAVE_SYS_SYSCALL_H
@@ -73,20 +73,20 @@
 #include <algorithm>
 #include <functional>
 
-#include <google/heap-checker.h>
+#include <gperftools/heap-checker.h>
 
 #include "base/basictypes.h"
 #include "base/googleinit.h"
 #include "base/logging.h"
-#include <google/stacktrace.h>
+#include <gperftools/stacktrace.h>
 #include "base/commandlineflags.h"
 #include "base/elfcore.h"              // for i386_regs
 #include "base/thread_lister.h"
 #include "heap-profile-table.h"
 #include "base/low_level_alloc.h"
 #include "malloc_hook-inl.h"
-#include <google/malloc_hook.h>
-#include <google/malloc_extension.h>
+#include <gperftools/malloc_hook.h>
+#include <gperftools/malloc_extension.h>
 #include "maybe_threads.h"
 #include "memory_region_map.h"
 #include "base/spinlock.h"
@@ -144,7 +144,7 @@ DEFINE_string(heap_check,
               "\"minimal\", \"normal\", \"strict\", "
               "\"draconian\", \"as-is\", and \"local\" "
               " or the empty string are the supported choices. "
-              "(See HeapLeakChecker::InternalInitStart for details.)");
+              "(See HeapLeakChecker_InternalInitStart for details.)");
 
 DEFINE_bool(heap_check_report, true, "Obsolete");
 
@@ -223,6 +223,10 @@ DEFINE_int32(heap_check_delay_seconds, 0,
              " its checks. Report any such issues to the heap-checker"
              " maintainer(s).");
 
+DEFINE_int32(heap_check_error_exit_code,
+             EnvToInt("HEAP_CHECK_ERROR_EXIT_CODE", 1),
+             "Exit code to return if any leaks were detected.");
+
 //----------------------------------------------------------------------
 
 DEFINE_string(heap_profile_pprof,
@@ -545,6 +549,23 @@ inline static uintptr_t AsInt(const void* ptr) {
 
 //----------------------------------------------------------------------
 
+// We've seen reports that strstr causes heap-checker crashes in some
+// libc's (?):
+//    http://code.google.com/p/gperftools/issues/detail?id=263
+// It's simple enough to use our own.  This is not in time-critical code.
+static const char* hc_strstr(const char* s1, const char* s2) {
+  const size_t len = strlen(s2);
+  RAW_CHECK(len > 0, "Unexpected empty string passed to strstr()");
+  for (const char* p = strchr(s1, *s2); p != NULL; p = strchr(p+1, *s2)) {
+    if (strncmp(p, s2, len) == 0) {
+      return p;
+    }
+  }
+  return NULL;
+}
+
+//----------------------------------------------------------------------
+
 // Our hooks for MallocHook
 static void NewHook(const void* ptr, size_t size) {
   if (ptr != NULL) {
@@ -552,6 +573,11 @@ static void NewHook(const void* ptr, size_t size) {
     const bool ignore = (counter > 0);
     RAW_VLOG(16, "Recording Alloc: %p of %"PRIuS "; %d", ptr, size,
              int(counter));
+
+    // Fetch the caller's stack trace before acquiring heap_checker_lock.
+    void* stack[HeapProfileTable::kMaxStackDepth];
+    int depth = HeapProfileTable::GetCallerStackTrace(0, stack);
+
     { SpinLockHolder l(&heap_checker_lock);
       if (size > max_heap_object_size) max_heap_object_size = size;
       uintptr_t addr = AsInt(ptr);
@@ -559,7 +585,7 @@ static void NewHook(const void* ptr, size_t size) {
       addr += size;
       if (addr > max_heap_address) max_heap_address = addr;
       if (heap_checker_on) {
-        heap_profile->RecordAlloc(ptr, size, 0);
+        heap_profile->RecordAlloc(ptr, size, depth, stack);
         if (ignore) {
           heap_profile->MarkAsIgnored(ptr);
         }
@@ -762,6 +788,8 @@ static void MakeDisabledLiveCallbackLocked(
   }
 }
 
+static const char kUnnamedProcSelfMapEntry[] = "UNNAMED";
+
 // This function takes some fields from a /proc/self/maps line:
 //
 //   start_address  start address of a memory region.
@@ -779,7 +807,9 @@ static void RecordGlobalDataLocked(uintptr_t start_address,
   RAW_DCHECK(heap_checker_lock.IsHeld(), "");
   // Ignore non-writeable regions.
   if (strchr(permissions, 'w') == NULL) return;
-  if (filename == NULL  ||  *filename == '\0')  filename = "UNNAMED";
+  if (filename == NULL  ||  *filename == '\0') {
+    filename = kUnnamedProcSelfMapEntry;
+  }
   RAW_VLOG(11, "Looking into %s: 0x%" PRIxPTR "..0x%" PRIxPTR,
               filename, start_address, end_address);
   (*library_live_objects)[filename].
@@ -791,7 +821,7 @@ static void RecordGlobalDataLocked(uintptr_t start_address,
 // See if 'library' from /proc/self/maps has base name 'library_base'
 // i.e. contains it and has '.' or '-' after it.
 static bool IsLibraryNamed(const char* library, const char* library_base) {
-  const char* p = strstr(library, library_base);
+  const char* p = hc_strstr(library, library_base);
   size_t sz = strlen(library_base);
   return p != NULL  &&  (p[sz] == '.'  ||  p[sz] == '-');
 }
@@ -903,11 +933,11 @@ HeapLeakChecker::ProcMapsResult HeapLeakChecker::UseProcMapsLocked(
     if (inode != 0) {
       saw_nonzero_inode = true;
     }
-    if ((strstr(filename, "lib") && strstr(filename, ".so")) ||
-        strstr(filename, ".dll") ||
+    if ((hc_strstr(filename, "lib") && hc_strstr(filename, ".so")) ||
+        hc_strstr(filename, ".dll") ||
         // not all .dylib filenames start with lib. .dylib is big enough
         // that we are unlikely to get false matches just checking that.
-        strstr(filename, ".dylib") || strstr(filename, ".bundle")) {
+        hc_strstr(filename, ".dylib") || hc_strstr(filename, ".bundle")) {
       saw_shared_lib = true;
       if (inode != 0) {
         saw_shared_lib_with_nonzero_inode = true;
@@ -1383,6 +1413,31 @@ static SpinLock alignment_checker_lock(SpinLock::LINKER_INITIALIZED);
       }
     }
     if (size < sizeof(void*)) continue;
+
+#ifdef NO_FRAME_POINTER
+    // Frame pointer omission requires us to use libunwind, which uses direct
+    // mmap and munmap system calls, and that needs special handling.
+    if (name2 == kUnnamedProcSelfMapEntry) {
+      static const uintptr_t page_mask = ~(getpagesize() - 1);
+      const uintptr_t addr = reinterpret_cast<uintptr_t>(object);
+      if ((addr & page_mask) == 0 && (size & page_mask) == 0) {
+        // This is an object we slurped from /proc/self/maps.
+        // It may or may not be readable at this point.
+        //
+        // In case all the above conditions made a mistake, and the object is
+        // not related to libunwind, we also verify that it's not readable
+        // before ignoring it.
+        if (msync(const_cast<char*>(object), size, MS_ASYNC) != 0) {
+          // Skip unreadable object, so we don't crash trying to sweep it.
+          RAW_VLOG(0, "Ignoring inaccessible object [%p, %p) "
+                   "(msync error %d (%s))",
+                   object, object + size, errno, strerror(errno));
+          continue;
+        }
+      }
+    }
+#endif
+
     const char* const max_object = object + size - sizeof(void*);
     while (object <= max_object) {
       // potentially unaligned load:
@@ -1455,7 +1510,7 @@ void HeapLeakChecker::DisableChecksIn(const char* pattern) {
 }
 
 // static
-void HeapLeakChecker::IgnoreObject(const void* ptr) {
+void HeapLeakChecker::DoIgnoreObject(const void* ptr) {
   SpinLockHolder l(&heap_checker_lock);
   if (!heap_checker_on) return;
   size_t object_size;
@@ -1859,16 +1914,18 @@ void HeapCleaner::RunHeapCleanups() {
   heap_cleanups_ = NULL;
 }
 
-// Program exit heap cleanup registered with atexit().
+// Program exit heap cleanup registered as a module object destructor.
 // Will not get executed when we crash on a signal.
 //
-/*static*/ void HeapLeakChecker::RunHeapCleanups() {
+void HeapLeakChecker_RunHeapCleanups() {
+  if (FLAGS_heap_check == "local")   // don't check heap in this mode
+    return;
   { SpinLockHolder l(&heap_checker_lock);
     // can get here (via forks?) with other pids
     if (heap_checker_pid != getpid()) return;
   }
   HeapCleaner::RunHeapCleanups();
-  if (!FLAGS_heap_check_after_destructors) DoMainHeapCheck();
+  if (!FLAGS_heap_check_after_destructors) HeapLeakChecker::DoMainHeapCheck();
 }
 
 static bool internal_init_start_has_run = false;
@@ -1883,21 +1940,26 @@ static bool internal_init_start_has_run = false;
 // We can't hold heap_checker_lock throughout because it would deadlock
 // on a memory allocation since our new/delete hooks can be on.
 //
-/*static*/ void HeapLeakChecker::InternalInitStart() {
+void HeapLeakChecker_InternalInitStart() {
   { SpinLockHolder l(&heap_checker_lock);
     RAW_CHECK(!internal_init_start_has_run,
               "Heap-check constructor called twice.  Perhaps you both linked"
               " in the heap checker, and also used LD_PRELOAD to load it?");
     internal_init_start_has_run = true;
 
+#ifdef ADDRESS_SANITIZER
+    // AddressSanitizer's custom malloc conflicts with HeapChecker.
+    FLAGS_heap_check = "";
+#endif
+
     if (FLAGS_heap_check.empty()) {
       // turns out we do not need checking in the end; can stop profiling
-      TurnItselfOffLocked();
+      HeapLeakChecker::TurnItselfOffLocked();
       return;
     } else if (RunningOnValgrind()) {
       // There is no point in trying -- we'll just fail.
       RAW_LOG(WARNING, "Can't run under Valgrind; will turn itself off");
-      TurnItselfOffLocked();
+      HeapLeakChecker::TurnItselfOffLocked();
       return;
     }
   }
@@ -1906,7 +1968,7 @@ static bool internal_init_start_has_run = false;
   if (!FLAGS_heap_check_run_under_gdb && IsDebuggerAttached()) {
     RAW_LOG(WARNING, "Someone is ptrace()ing us; will turn itself off");
     SpinLockHolder l(&heap_checker_lock);
-    TurnItselfOffLocked();
+    HeapLeakChecker::TurnItselfOffLocked();
     return;
   }
 
@@ -1957,15 +2019,25 @@ static bool internal_init_start_has_run = false;
     RAW_LOG(FATAL, "Unsupported heap_check flag: %s",
                    FLAGS_heap_check.c_str());
   }
+  // FreeBSD doesn't seem to honor atexit execution order:
+  //    http://code.google.com/p/gperftools/issues/detail?id=375
+  // Since heap-checking before destructors depends on atexit running
+  // at the right time, on FreeBSD we always check after, even in the
+  // less strict modes.  This just means FreeBSD is always a bit
+  // stricter in its checking than other OSes.
+#ifdef __FreeBSD__
+  FLAGS_heap_check_after_destructors = true;
+#endif
+
   { SpinLockHolder l(&heap_checker_lock);
     RAW_DCHECK(heap_checker_pid == getpid(), "");
     heap_checker_on = true;
     RAW_DCHECK(heap_profile, "");
-    ProcMapsResult pm_result = UseProcMapsLocked(DISABLE_LIBRARY_ALLOCS);
+    HeapLeakChecker::ProcMapsResult pm_result = HeapLeakChecker::UseProcMapsLocked(HeapLeakChecker::DISABLE_LIBRARY_ALLOCS);
       // might neeed to do this more than once
       // if one later dynamically loads libraries that we want disabled
-    if (pm_result != PROC_MAPS_USED) {  // can't function
-      TurnItselfOffLocked();
+    if (pm_result != HeapLeakChecker::PROC_MAPS_USED) {  // can't function
+      HeapLeakChecker::TurnItselfOffLocked();
       return;
     }
   }
@@ -2019,8 +2091,6 @@ static bool internal_init_start_has_run = false;
            "-- Performance may suffer");
 
   if (FLAGS_heap_check != "local") {
-    // Schedule registered heap cleanup
-    atexit(RunHeapCleanups);
     HeapLeakChecker* main_hc = new HeapLeakChecker();
     SpinLockHolder l(&heap_checker_lock);
     RAW_DCHECK(main_heap_checker == NULL,
@@ -2053,7 +2123,20 @@ static bool internal_init_start_has_run = false;
 // We want this to run early as well, but not so early as
 // ::BeforeConstructors (we want flag assignments to have already
 // happened, for instance).  Initializer-registration does the trick.
-REGISTER_MODULE_INITIALIZER(init_start, HeapLeakChecker::InternalInitStart());
+REGISTER_MODULE_INITIALIZER(init_start, HeapLeakChecker_InternalInitStart());
+REGISTER_MODULE_DESTRUCTOR(init_start, HeapLeakChecker_RunHeapCleanups());
+
+// static
+bool HeapLeakChecker::NoGlobalLeaksMaybeSymbolize(
+    ShouldSymbolize should_symbolize) {
+  // we never delete or change main_heap_checker once it's set:
+  HeapLeakChecker* main_hc = GlobalChecker();
+  if (main_hc) {
+    RAW_VLOG(10, "Checking for whole-program memory leaks");
+    return main_hc->DoNoLeaks(should_symbolize);
+  }
+  return true;
+}
 
 // static
 bool HeapLeakChecker::DoMainHeapCheck() {
@@ -2066,13 +2149,20 @@ bool HeapLeakChecker::DoMainHeapCheck() {
     do_main_heap_check = false;  // will do it now; no need to do it more
   }
 
-  if (!NoGlobalLeaks()) {
+  // The program is over, so it's safe to symbolize addresses (which
+  // requires a fork) because no serious work is expected to be done
+  // after this.  Symbolizing is really useful -- knowing what
+  // function has a leak is better than knowing just an address --
+  // and while we can only safely symbolize once in a program run,
+  // now is the time (after all, there's no "later" that would be better).
+  if (!NoGlobalLeaksMaybeSymbolize(SYMBOLIZE)) {
     if (FLAGS_heap_check_identify_leaks) {
       RAW_LOG(FATAL, "Whole-program memory leaks found.");
     }
     RAW_LOG(ERROR, "Exiting with error code (instead of crashing) "
                    "because of whole-program memory leaks");
-    _exit(1);    // we don't want to call atexit() routines!
+    // We don't want to call atexit() routines!
+    _exit(FLAGS_heap_check_error_exit_code);
   }
   return true;
 }
@@ -2085,19 +2175,8 @@ HeapLeakChecker* HeapLeakChecker::GlobalChecker() {
 
 // static
 bool HeapLeakChecker::NoGlobalLeaks() {
-  // we never delete or change main_heap_checker once it's set:
-  HeapLeakChecker* main_hc = GlobalChecker();
-  if (main_hc) {
-    RAW_VLOG(10, "Checking for whole-program memory leaks");
-    // The program is over, so it's safe to symbolize addresses (which
-    // requires a fork) because no serious work is expected to be done
-    // after this.  Symbolizing is really useful -- knowing what
-    // function has a leak is better than knowing just an address --
-    // and while we can only safely symbolize once in a program run,
-    // now is the time (after all, there's no "later" that would be better).
-    return main_hc->DoNoLeaks(SYMBOLIZE);
-  }
-  return true;
+  // symbolizing requires a fork, which isn't safe to do in general.
+  return NoGlobalLeaksMaybeSymbolize(DO_NOT_SYMBOLIZE);
 }
 
 // static
@@ -2115,6 +2194,10 @@ void HeapLeakChecker::BeforeConstructorsLocked() {
   RAW_DCHECK(heap_checker_lock.IsHeld(), "");
   RAW_CHECK(!constructor_heap_profiling,
             "BeforeConstructorsLocked called multiple times");
+#ifdef ADDRESS_SANITIZER
+  // AddressSanitizer's custom malloc conflicts with HeapChecker.
+  return;
+#endif
   // Set hooks early to crash if 'new' gets called before we make heap_profile,
   // and make sure no other hooks existed:
   RAW_CHECK(MallocHook::AddNewHook(&NewHook), "");
diff --git a/third_party/tcmalloc/vendor/src/heap-profile-table.cc b/third_party/tcmalloc/vendor/src/heap-profile-table.cc
index 6d75c4a..8b2ed33 100644
--- a/third_party/tcmalloc/vendor/src/heap-profile-table.cc
+++ b/third_party/tcmalloc/vendor/src/heap-profile-table.cc
@@ -61,8 +61,9 @@
 #include "base/logging.h"
 #include "raw_printer.h"
 #include "symbolize.h"
-#include <google/stacktrace.h>
-#include <google/malloc_hook.h>
+#include <gperftools/stacktrace.h>
+#include <gperftools/malloc_hook.h>
+#include "memory_region_map.h"
 #include "base/commandlineflags.h"
 #include "base/logging.h"    // for the RawFD I/O commands
 #include "base/sysinfo.h"
@@ -98,7 +99,8 @@ const char HeapProfileTable::kFileExt[] = ".heap";
 
 //----------------------------------------------------------------------
 
-static const int kHashTableSize = 179999;   // Size for table_.
+// Size for alloc_table_ and mmap_table_.
+static const int kHashTableSize = 179999;
 /*static*/ const int HeapProfileTable::kMaxStackDepth;
 
 //----------------------------------------------------------------------
@@ -122,38 +124,60 @@ static bool ByAllocatedSpace(HeapProfileTable::Stats* a,
 
 HeapProfileTable::HeapProfileTable(Allocator alloc, DeAllocator dealloc)
     : alloc_(alloc), dealloc_(dealloc) {
-  // Make the table
-  const int table_bytes = kHashTableSize * sizeof(*table_);
-  table_ = reinterpret_cast<Bucket**>(alloc_(table_bytes));
-  memset(table_, 0, table_bytes);
-  // Make allocation map
-  allocation_ =
-    new(alloc_(sizeof(AllocationMap))) AllocationMap(alloc_, dealloc_);
-  // init the rest:
+  // Initialize the overall profile stats.
   memset(&total_, 0, sizeof(total_));
-  num_buckets_ = 0;
+
+  // Make the malloc table.
+  const int alloc_table_bytes = kHashTableSize * sizeof(*alloc_table_);
+  alloc_table_ = reinterpret_cast<Bucket**>(alloc_(alloc_table_bytes));
+  memset(alloc_table_, 0, alloc_table_bytes);
+  num_alloc_buckets_ = 0;
+
+  // Initialize the mmap table.
+  mmap_table_ = NULL;
+  num_available_mmap_buckets_ = 0;
+
+  // Make malloc and mmap allocation maps.
+  alloc_address_map_ =
+      new(alloc_(sizeof(AllocationMap))) AllocationMap(alloc_, dealloc_);
+  mmap_address_map_ = NULL;
 }
 
 HeapProfileTable::~HeapProfileTable() {
-  // free allocation map
-  allocation_->~AllocationMap();
-  dealloc_(allocation_);
-  allocation_ = NULL;
-  // free hash table
-  for (int b = 0; b < kHashTableSize; b++) {
-    for (Bucket* x = table_[b]; x != 0; /**/) {
-      Bucket* b = x;
-      x = x->next;
-      dealloc_(b->stack);
-      dealloc_(b);
+  DeallocateBucketTable(alloc_table_);
+  alloc_table_ = NULL;
+  DeallocateBucketTable(mmap_table_);
+  mmap_table_ = NULL;
+  DeallocateAllocationMap(alloc_address_map_);
+  alloc_address_map_ = NULL;
+  DeallocateAllocationMap(mmap_address_map_);
+  mmap_address_map_ = NULL;
+}
+
+void HeapProfileTable::DeallocateAllocationMap(AllocationMap* allocation) {
+  if (allocation != NULL) {
+    alloc_address_map_->~AllocationMap();
+    dealloc_(allocation);
+  }
+}
+
+void HeapProfileTable::DeallocateBucketTable(Bucket** table) {
+  if (table != NULL) {
+    for (int b = 0; b < kHashTableSize; b++) {
+      for (Bucket* x = table[b]; x != 0; /**/) {
+        Bucket* b = x;
+        x = x->next;
+        dealloc_(b->stack);
+        dealloc_(b);
+      }
     }
+    dealloc_(table);
   }
-  dealloc_(table_);
-  table_ = NULL;
 }
 
-HeapProfileTable::Bucket* HeapProfileTable::GetBucket(int depth,
-                                                      const void* const key[]) {
+HeapProfileTable::Bucket* HeapProfileTable::GetBucket(
+    int depth, const void* const key[], Bucket** table,
+    int* bucket_count) {
   // Make hash-value
   uintptr_t h = 0;
   for (int i = 0; i < depth; i++) {
@@ -166,7 +190,7 @@ HeapProfileTable::Bucket* HeapProfileTable::GetBucket(int depth,
 
   // Lookup stack trace in table
   unsigned int buck = ((unsigned int) h) % kHashTableSize;
-  for (Bucket* b = table_[buck]; b != 0; b = b->next) {
+  for (Bucket* b = table[buck]; b != 0; b = b->next) {
     if ((b->hash == h) &&
         (b->depth == depth) &&
         equal(key, key + depth, b->stack)) {
@@ -183,24 +207,25 @@ HeapProfileTable::Bucket* HeapProfileTable::GetBucket(int depth,
   b->hash  = h;
   b->depth = depth;
   b->stack = kcopy;
-  b->next  = table_[buck];
-  table_[buck] = b;
-  num_buckets_++;
+  b->next  = table[buck];
+  table[buck] = b;
+  if (bucket_count != NULL) {
+    ++(*bucket_count);
+  }
   return b;
 }
 
-void HeapProfileTable::RecordAlloc(const void* ptr, size_t bytes,
-                                   int skip_count) {
-  void* key[kMaxStackDepth];
-  int depth = MallocHook::GetCallerStackTrace(
-    key, kMaxStackDepth, kStripFrames + skip_count + 1);
-  RecordAllocWithStack(ptr, bytes, depth, key);
+int HeapProfileTable::GetCallerStackTrace(
+    int skip_count, void* stack[kMaxStackDepth]) {
+  return MallocHook::GetCallerStackTrace(
+      stack, kMaxStackDepth, kStripFrames + skip_count + 1);
 }
 
-void HeapProfileTable::RecordAllocWithStack(
+void HeapProfileTable::RecordAlloc(
     const void* ptr, size_t bytes, int stack_depth,
     const void* const call_stack[]) {
-  Bucket* b = GetBucket(stack_depth, call_stack);
+  Bucket* b = GetBucket(stack_depth, call_stack, alloc_table_,
+                        &num_alloc_buckets_);
   b->allocs++;
   b->alloc_size += bytes;
   total_.allocs++;
@@ -209,12 +234,12 @@ void HeapProfileTable::RecordAllocWithStack(
   AllocValue v;
   v.set_bucket(b);  // also did set_live(false); set_ignore(false)
   v.bytes = bytes;
-  allocation_->Insert(ptr, v);
+  alloc_address_map_->Insert(ptr, v);
 }
 
 void HeapProfileTable::RecordFree(const void* ptr) {
   AllocValue v;
-  if (allocation_->FindAndRemove(ptr, &v)) {
+  if (alloc_address_map_->FindAndRemove(ptr, &v)) {
     Bucket* b = v.bucket();
     b->frees++;
     b->free_size += v.bytes;
@@ -224,14 +249,14 @@ void HeapProfileTable::RecordFree(const void* ptr) {
 }
 
 bool HeapProfileTable::FindAlloc(const void* ptr, size_t* object_size) const {
-  const AllocValue* alloc_value = allocation_->Find(ptr);
+  const AllocValue* alloc_value = alloc_address_map_->Find(ptr);
   if (alloc_value != NULL) *object_size = alloc_value->bytes;
   return alloc_value != NULL;
 }
 
 bool HeapProfileTable::FindAllocDetails(const void* ptr,
                                         AllocInfo* info) const {
-  const AllocValue* alloc_value = allocation_->Find(ptr);
+  const AllocValue* alloc_value = alloc_address_map_->Find(ptr);
   if (alloc_value != NULL) {
     info->object_size = alloc_value->bytes;
     info->call_stack = alloc_value->bucket()->stack;
@@ -245,13 +270,13 @@ bool HeapProfileTable::FindInsideAlloc(const void* ptr,
                                        const void** object_ptr,
                                        size_t* object_size) const {
   const AllocValue* alloc_value =
-    allocation_->FindInside(&AllocValueSize, max_size, ptr, object_ptr);
+    alloc_address_map_->FindInside(&AllocValueSize, max_size, ptr, object_ptr);
   if (alloc_value != NULL) *object_size = alloc_value->bytes;
   return alloc_value != NULL;
 }
 
 bool HeapProfileTable::MarkAsLive(const void* ptr) {
-  AllocValue* alloc = allocation_->FindMutable(ptr);
+  AllocValue* alloc = alloc_address_map_->FindMutable(ptr);
   if (alloc && !alloc->live()) {
     alloc->set_live(true);
     return true;
@@ -260,7 +285,7 @@ bool HeapProfileTable::MarkAsLive(const void* ptr) {
 }
 
 void HeapProfileTable::MarkAsIgnored(const void* ptr) {
-  AllocValue* alloc = allocation_->FindMutable(ptr);
+  AllocValue* alloc = alloc_address_map_->FindMutable(ptr);
   if (alloc) {
     alloc->set_ignore(true);
   }
@@ -301,27 +326,81 @@ int HeapProfileTable::UnparseBucket(const Bucket& b,
 
 HeapProfileTable::Bucket**
 HeapProfileTable::MakeSortedBucketList() const {
-  Bucket** list =
-    reinterpret_cast<Bucket**>(alloc_(sizeof(Bucket) * num_buckets_));
+  Bucket** list = reinterpret_cast<Bucket**>(alloc_(sizeof(Bucket) *
+      (num_alloc_buckets_ + num_available_mmap_buckets_)));
+
+  RAW_DCHECK(mmap_table_ != NULL || num_available_mmap_buckets_ == 0, "");
 
   int n = 0;
+
   for (int b = 0; b < kHashTableSize; b++) {
-    for (Bucket* x = table_[b]; x != 0; x = x->next) {
+    for (Bucket* x = alloc_table_[b]; x != 0; x = x->next) {
       list[n++] = x;
     }
   }
-  RAW_DCHECK(n == num_buckets_, "");
+  RAW_DCHECK(n == num_alloc_buckets_, "");
+
+  if (mmap_table_ != NULL) {
+    for (int b = 0; b < kHashTableSize; b++) {
+      for (Bucket* x = mmap_table_[b]; x != 0; x = x->next) {
+        list[n++] = x;
+      }
+    }
+  }
+  RAW_DCHECK(n == num_alloc_buckets_ + num_available_mmap_buckets_, "");
 
-  sort(list, list + num_buckets_, ByAllocatedSpace);
+  sort(list, list + num_alloc_buckets_ + num_available_mmap_buckets_,
+       ByAllocatedSpace);
 
   return list;
 }
 
+void HeapProfileTable::RefreshMMapData() {
+  // Make the table
+  static const int mmap_table_bytes = kHashTableSize * sizeof(*mmap_table_);
+  if (mmap_table_ == NULL) {
+    mmap_table_ = reinterpret_cast<Bucket**>(alloc_(mmap_table_bytes));
+    memset(mmap_table_, 0, mmap_table_bytes);
+  }
+  num_available_mmap_buckets_ = 0;
+
+  ClearMMapData();
+  mmap_address_map_ =
+      new(alloc_(sizeof(AllocationMap))) AllocationMap(alloc_, dealloc_);
+
+  MemoryRegionMap::LockHolder l;
+  for (MemoryRegionMap::RegionIterator r =
+           MemoryRegionMap::BeginRegionLocked();
+       r != MemoryRegionMap::EndRegionLocked(); ++r) {
+    Bucket* b =
+        GetBucket(r->call_stack_depth, r->call_stack, mmap_table_, NULL);
+    if (b->alloc_size == 0) {
+      num_available_mmap_buckets_ += 1;
+    }
+    b->allocs += 1;
+    b->alloc_size += r->end_addr - r->start_addr;
+
+    AllocValue v;
+    v.set_bucket(b);
+    v.bytes = r->end_addr - r->start_addr;
+    mmap_address_map_->Insert(reinterpret_cast<const void*>(r->start_addr), v);
+  }
+}
+
+void HeapProfileTable::ClearMMapData() {
+  if (mmap_address_map_ != NULL) {
+    mmap_address_map_->Iterate(ZeroBucketCountsIterator, this);
+    mmap_address_map_->~AllocationMap();
+    dealloc_(mmap_address_map_);
+    mmap_address_map_ = NULL;
+  }
+}
+
 void HeapProfileTable::IterateOrderedAllocContexts(
     AllocContextIterator callback) const {
   Bucket** list = MakeSortedBucketList();
   AllocContextInfo info;
-  for (int i = 0; i < num_buckets_; ++i) {
+  for (int i = 0; i < num_alloc_buckets_; ++i) {
     *static_cast<Stats*>(&info) = *static_cast<Stats*>(list[i]);
     info.stack_depth = list[i]->depth;
     info.call_stack = list[i]->stack;
@@ -353,9 +432,14 @@ int HeapProfileTable::FillOrderedProfile(char buf[], int size) const {
   memset(&stats, 0, sizeof(stats));
   int bucket_length = snprintf(buf, size, "%s", kProfileHeader);
   if (bucket_length < 0 || bucket_length >= size) return 0;
-  bucket_length = UnparseBucket(total_, buf, bucket_length, size,
+  Bucket total_with_mmap(total_);
+  if (mmap_table_ != NULL) {
+    total_with_mmap.alloc_size += MemoryRegionMap::MapSize();
+    total_with_mmap.free_size += MemoryRegionMap::UnmapSize();
+  }
+  bucket_length = UnparseBucket(total_with_mmap, buf, bucket_length, size,
                                 " heapprofile", &stats);
-  for (int i = 0; i < num_buckets_; i++) {
+  for (int i = 0; i < num_alloc_buckets_; i++) {
     bucket_length = UnparseBucket(*list[i], buf, bucket_length, size, "",
                                   &stats);
   }
@@ -390,6 +474,17 @@ void HeapProfileTable::DumpNonLiveIterator(const void* ptr, AllocValue* v,
   RawWrite(args.fd, buf, len);
 }
 
+inline void HeapProfileTable::ZeroBucketCountsIterator(
+    const void* ptr, AllocValue* v, HeapProfileTable* heap_profile) {
+  Bucket* b = v->bucket();
+  if (b != NULL) {
+    b->allocs = 0;
+    b->alloc_size = 0;
+    b->free_size = 0;
+    b->frees = 0;
+  }
+}
+
 // Callback from NonLiveSnapshot; adds entry to arg->dest
 // if not the entry is not live and is not present in arg->base.
 void HeapProfileTable::AddIfNonLive(const void* ptr, AllocValue* v,
@@ -454,7 +549,7 @@ void HeapProfileTable::CleanupOldProfiles(const char* prefix) {
 
 HeapProfileTable::Snapshot* HeapProfileTable::TakeSnapshot() {
   Snapshot* s = new (alloc_(sizeof(Snapshot))) Snapshot(alloc_, dealloc_);
-  allocation_->Iterate(AddToSnapshot, s);
+  alloc_address_map_->Iterate(AddToSnapshot, s);
   return s;
 }
 
@@ -479,7 +574,7 @@ HeapProfileTable::Snapshot* HeapProfileTable::NonLiveSnapshot(
   AddNonLiveArgs args;
   args.dest = s;
   args.base = base;
-  allocation_->Iterate<AddNonLiveArgs*>(AddIfNonLive, &args);
+  alloc_address_map_->Iterate<AddNonLiveArgs*>(AddIfNonLive, &args);
   RAW_VLOG(2, "NonLiveSnapshot output: %d %d\n",
            int(s->total_.allocs - s->total_.frees),
            int(s->total_.alloc_size - s->total_.free_size));
diff --git a/third_party/tcmalloc/vendor/src/heap-profile-table.h b/third_party/tcmalloc/vendor/src/heap-profile-table.h
index c9bee15..abd3184 100644
--- a/third_party/tcmalloc/vendor/src/heap-profile-table.h
+++ b/third_party/tcmalloc/vendor/src/heap-profile-table.h
@@ -97,15 +97,20 @@ class HeapProfileTable {
   HeapProfileTable(Allocator alloc, DeAllocator dealloc);
   ~HeapProfileTable();
 
-  // Record an allocation at 'ptr' of 'bytes' bytes.
-  // skip_count gives the number of stack frames between this call
-  // and the memory allocation function that was asked to do the allocation.
-  void RecordAlloc(const void* ptr, size_t bytes, int skip_count);
+  // Collect the stack trace for the function that asked to do the
+  // allocation for passing to RecordAlloc() below.
+  //
+  // The stack trace is stored in 'stack'. The stack depth is returned.
+  //
+  // 'skip_count' gives the number of stack frames between this call
+  // and the memory allocation function.
+  static int GetCallerStackTrace(int skip_count, void* stack[kMaxStackDepth]);
 
-  // Direct version of RecordAlloc when the caller stack to use
-  // is already known: call_stack of depth stack_depth.
-  void RecordAllocWithStack(const void* ptr, size_t bytes,
-                            int stack_depth, const void* const call_stack[]);
+  // Record an allocation at 'ptr' of 'bytes' bytes.  'stack_depth'
+  // and 'call_stack' identifying the function that requested the
+  // allocation. They can be generated using GetCallerStackTrace() above.
+  void RecordAlloc(const void* ptr, size_t bytes,
+                   int stack_depth, const void* const call_stack[]);
 
   // Record the deallocation of memory at 'ptr'.
   void RecordFree(const void* ptr);
@@ -133,7 +138,8 @@ class HeapProfileTable {
   // are skipped in heap checking reports.
   void MarkAsIgnored(const void* ptr);
 
-  // Return current total (de)allocation statistics.
+  // Return current total (de)allocation statistics.  It doesn't contain
+  // mmap'ed regions.
   const Stats& total() const { return total_; }
 
   // Allocation data iteration callback: gets passed object pointer and
@@ -143,7 +149,7 @@ class HeapProfileTable {
   // Iterate over the allocation profile data calling "callback"
   // for every allocation.
   void IterateAllocs(AllocIterator callback) const {
-    allocation_->Iterate(MapArgsAllocIterator, callback);
+    alloc_address_map_->Iterate(MapArgsAllocIterator, callback);
   }
 
   // Allocation context profile data iteration callback
@@ -181,6 +187,16 @@ class HeapProfileTable {
   // Caller must call ReleaseSnapshot() on result when no longer needed.
   Snapshot* NonLiveSnapshot(Snapshot* base);
 
+  // Refresh the internal mmap information from MemoryRegionMap.  Results of
+  // FillOrderedProfile and IterateOrderedAllocContexts will contain mmap'ed
+  // memory regions as at calling RefreshMMapData.
+  void RefreshMMapData();
+
+  // Clear the internal mmap information.  Results of FillOrderedProfile and
+  // IterateOrderedAllocContexts won't contain mmap'ed memory regions after
+  // calling ClearMMapData.
+  void ClearMMapData();
+
  private:
 
   // data types ----------------------------
@@ -258,9 +274,18 @@ class HeapProfileTable {
                            const char* extra,
                            Stats* profile_stats);
 
-  // Get the bucket for the caller stack trace 'key' of depth 'depth'
-  // creating the bucket if needed.
-  Bucket* GetBucket(int depth, const void* const key[]);
+  // Deallocate a given allocation map.
+  void DeallocateAllocationMap(AllocationMap* allocation);
+
+  // Deallocate a given bucket table.
+  void DeallocateBucketTable(Bucket** table);
+
+  // Get the bucket for the caller stack trace 'key' of depth 'depth' from a
+  // bucket hash map 'table' creating the bucket if needed.  '*bucket_count'
+  // is incremented both when 'bucket_count' is not NULL and when a new
+  // bucket object is created.
+  Bucket* GetBucket(int depth, const void* const key[], Bucket** table,
+                    int* bucket_count);
 
   // Helper for IterateAllocs to do callback signature conversion
   // from AllocationMap::Iterate to AllocIterator.
@@ -280,9 +305,14 @@ class HeapProfileTable {
   inline static void DumpNonLiveIterator(const void* ptr, AllocValue* v,
                                          const DumpArgs& args);
 
+  // Helper for filling size variables in buckets by zero.
+  inline static void ZeroBucketCountsIterator(
+      const void* ptr, AllocValue* v, HeapProfileTable* heap_profile);
+
   // Helper for IterateOrderedAllocContexts and FillOrderedProfile.
-  // Creates a sorted list of Buckets whose length is num_buckets_.
-  // The caller is responsible for dellocating the returned list.
+  // Creates a sorted list of Buckets whose length is num_alloc_buckets_ +
+  // num_avaliable_mmap_buckets_.
+  // The caller is responsible for deallocating the returned list.
   Bucket** MakeSortedBucketList() const;
 
   // Helper for TakeSnapshot.  Saves object to snapshot.
@@ -314,17 +344,25 @@ class HeapProfileTable {
 
   // Overall profile stats; we use only the Stats part,
   // but make it a Bucket to pass to UnparseBucket.
+  // It doesn't contain mmap'ed regions.
   Bucket total_;
 
-  // Bucket hash table.
+  // Bucket hash table for malloc.
   // We hand-craft one instead of using one of the pre-written
   // ones because we do not want to use malloc when operating on the table.
   // It is only few lines of code, so no big deal.
-  Bucket** table_;
-  int num_buckets_;
-
-  // Map of all currently allocated objects we know about.
-  AllocationMap* allocation_;
+  Bucket** alloc_table_;
+  int num_alloc_buckets_;
+
+  // Bucket hash table for mmap.
+  // This table is filled with the information from MemoryRegionMap by calling
+  // RefreshMMapData.
+  Bucket** mmap_table_;
+  int num_available_mmap_buckets_;
+
+  // Map of all currently allocated objects and mapped regions we know about.
+  AllocationMap* alloc_address_map_;
+  AllocationMap* mmap_address_map_;
 
   DISALLOW_COPY_AND_ASSIGN(HeapProfileTable);
 };
diff --git a/third_party/tcmalloc/vendor/src/heap-profiler.cc b/third_party/tcmalloc/vendor/src/heap-profiler.cc
index 5e30c22..09a3911 100644
--- a/third_party/tcmalloc/vendor/src/heap-profiler.cc
+++ b/third_party/tcmalloc/vendor/src/heap-profiler.cc
@@ -55,7 +55,7 @@
 #include <algorithm>
 #include <string>
 
-#include <google/heap-profiler.h>
+#include <gperftools/heap-profiler.h>
 
 #include "base/logging.h"
 #include "base/basictypes.h"   // for PRId64, among other things
@@ -63,8 +63,8 @@
 #include "base/commandlineflags.h"
 #include "malloc_hook-inl.h"
 #include "tcmalloc_guard.h"
-#include <google/malloc_hook.h>
-#include <google/malloc_extension.h>
+#include <gperftools/malloc_hook.h>
+#include <gperftools/malloc_extension.h>
 #include "base/spinlock.h"
 #include "base/low_level_alloc.h"
 #include "base/sysinfo.h"      // for GetUniquePathFromEnv()
@@ -175,29 +175,6 @@ static HeapProfileTable* heap_profile = NULL;  // the heap profile table
 // Profile generation
 //----------------------------------------------------------------------
 
-enum AddOrRemove { ADD, REMOVE };
-
-// Add or remove all MMap-allocated regions to/from *heap_profile.
-// Assumes heap_lock is held.
-static void AddRemoveMMapDataLocked(AddOrRemove mode) {
-  RAW_DCHECK(heap_lock.IsHeld(), "");
-  if (!FLAGS_mmap_profile || !is_on) return;
-  // MemoryRegionMap maintained all the data we need for all
-  // mmap-like allocations, so we just use it here:
-  MemoryRegionMap::LockHolder l;
-  for (MemoryRegionMap::RegionIterator r = MemoryRegionMap::BeginRegionLocked();
-       r != MemoryRegionMap::EndRegionLocked(); ++r) {
-    if (mode == ADD) {
-      heap_profile->RecordAllocWithStack(
-        reinterpret_cast<const void*>(r->start_addr),
-        r->end_addr - r->start_addr,
-        r->call_stack_depth, r->call_stack);
-    } else {
-      heap_profile->RecordFree(reinterpret_cast<void*>(r->start_addr));
-    }
-  }
-}
-
 // Input must be a buffer of size at least 1MB.
 static char* DoGetHeapProfileLocked(char* buf, int buflen) {
   // We used to be smarter about estimating the required memory and
@@ -208,16 +185,13 @@ static char* DoGetHeapProfileLocked(char* buf, int buflen) {
   RAW_DCHECK(heap_lock.IsHeld(), "");
   int bytes_written = 0;
   if (is_on) {
-    HeapProfileTable::Stats const stats = heap_profile->total();
-    (void)stats;   // avoid an unused-variable warning in non-debug mode.
-    AddRemoveMMapDataLocked(ADD);
+    if (FLAGS_mmap_profile) {
+      heap_profile->RefreshMMapData();
+    }
     bytes_written = heap_profile->FillOrderedProfile(buf, buflen - 1);
-    // FillOrderedProfile should not reduce the set of active mmap-ed regions,
-    // hence MemoryRegionMap will let us remove everything we've added above:
-    AddRemoveMMapDataLocked(REMOVE);
-    RAW_DCHECK(stats.Equivalent(heap_profile->total()), "");
-    // if this fails, we somehow removed by AddRemoveMMapDataLocked
-    // more than we have added.
+    if (FLAGS_mmap_profile) {
+      heap_profile->ClearMMapData();
+    }
   }
   buf[bytes_written] = '\0';
   RAW_DCHECK(bytes_written == strlen(buf), "");
@@ -324,9 +298,12 @@ static void MaybeDumpProfileLocked() {
 
 // Record an allocation in the profile.
 static void RecordAlloc(const void* ptr, size_t bytes, int skip_count) {
+  // Take the stack trace outside the critical section.
+  void* stack[HeapProfileTable::kMaxStackDepth];
+  int depth = HeapProfileTable::GetCallerStackTrace(skip_count + 1, stack);
   SpinLockHolder l(&heap_lock);
   if (is_on) {
-    heap_profile->RecordAlloc(ptr, bytes, skip_count + 1);
+    heap_profile->RecordAlloc(ptr, bytes, depth, stack);
     MaybeDumpProfileLocked();
   }
 }
diff --git a/third_party/tcmalloc/vendor/src/internal_logging.cc b/third_party/tcmalloc/vendor/src/internal_logging.cc
index 4c90190..2189d84 100644
--- a/third_party/tcmalloc/vendor/src/internal_logging.cc
+++ b/third_party/tcmalloc/vendor/src/internal_logging.cc
@@ -40,65 +40,139 @@
 #include <unistd.h>    // for write()
 #endif
 
-#include <google/malloc_extension.h>
+#include <gperftools/malloc_extension.h>
 #include "base/logging.h"   // for perftools_vsnprintf
 #include "base/spinlock.h"              // for SpinLockHolder, SpinLock
 
 static const int kLogBufSize = 800;
 
-void TCMalloc_MESSAGE(const char* filename,
-                      int line_number,
-                      const char* format, ...) {
-  char buf[kLogBufSize];
-  const int n = snprintf(buf, sizeof(buf), "%s:%d] ", filename, line_number);
-  if (n < kLogBufSize) {
-    va_list ap;
-    va_start(ap, format);
-    perftools_vsnprintf(buf + n, kLogBufSize - n, format, ap);
-    va_end(ap);
-  }
-  write(STDERR_FILENO, buf, strlen(buf));
-}
-
+// Variables for storing crash output.  Allocated statically since we
+// may not be able to heap-allocate while crashing.
+static SpinLock crash_lock(base::LINKER_INITIALIZED);
+static bool crashed = false;
 static const int kStatsBufferSize = 16 << 10;
 static char stats_buffer[kStatsBufferSize] = { 0 };
 
-static void TCMalloc_CRASH_internal(bool dump_stats,
-                                    const char* filename,
-                                    int line_number,
-                                    const char* format, va_list ap) {
-  char buf[kLogBufSize];
-  const int n = snprintf(buf, sizeof(buf), "%s:%d] ", filename, line_number);
-  if (n < kLogBufSize) {
-    perftools_vsnprintf(buf + n, kLogBufSize - n, format, ap);
+namespace tcmalloc {
+
+static void WriteMessage(const char* msg, int length) {
+  write(STDERR_FILENO, msg, length);
+}
+
+void (*log_message_writer)(const char* msg, int length) = WriteMessage;
+
+
+class Logger {
+ public:
+  bool Add(const LogItem& item);
+  bool AddStr(const char* str, int n);
+  bool AddNum(uint64_t num, int base);  // base must be 10 or 16.
+
+  static const int kBufSize = 200;
+  char* p_;
+  char* end_;
+  char buf_[kBufSize];
+};
+
+void Log(LogMode mode, const char* filename, int line,
+         LogItem a, LogItem b, LogItem c, LogItem d) {
+  Logger state;
+  state.p_ = state.buf_;
+  state.end_ = state.buf_ + sizeof(state.buf_);
+  state.AddStr(filename, strlen(filename))
+      && state.AddStr(":", 1)
+      && state.AddNum(line, 10)
+      && state.AddStr("]", 1)
+      && state.Add(a)
+      && state.Add(b)
+      && state.Add(c)
+      && state.Add(d);
+
+  // Teminate with newline
+  if (state.p_ >= state.end_) {
+    state.p_ = state.end_ - 1;
+  }
+  *state.p_ = '\n';
+  state.p_++;
+
+  int msglen = state.p_ - state.buf_;
+  if (mode == kLog) {
+    (*log_message_writer)(state.buf_, msglen);
+    return;
   }
-  write(STDERR_FILENO, buf, strlen(buf));
-  if (dump_stats) {
+
+  bool first_crash = false;
+  {
+    SpinLockHolder l(&crash_lock);
+    if (!crashed) {
+      crashed = true;
+      first_crash = true;
+    }
+  }
+
+  (*log_message_writer)(state.buf_, msglen);
+  if (first_crash && mode == kCrashWithStats) {
     MallocExtension::instance()->GetStats(stats_buffer, kStatsBufferSize);
-    write(STDERR_FILENO, stats_buffer, strlen(stats_buffer));
+    (*log_message_writer)(stats_buffer, strlen(stats_buffer));
   }
 
   abort();
 }
 
-void TCMalloc_CRASH(bool dump_stats,
-                    const char* filename,
-                    int line_number,
-                    const char* format, ...) {
-  va_list ap;
-  va_start(ap, format);
-  TCMalloc_CRASH_internal(dump_stats, filename, line_number, format, ap);
-  va_end(ap);
+bool Logger::Add(const LogItem& item) {
+  // Separate items with spaces
+  if (p_ < end_) {
+    *p_ = ' ';
+    p_++;
+  }
+
+  switch (item.tag_) {
+    case LogItem::kStr:
+      return AddStr(item.u_.str, strlen(item.u_.str));
+    case LogItem::kUnsigned:
+      return AddNum(item.u_.unum, 10);
+    case LogItem::kSigned:
+      if (item.u_.snum < 0) {
+        // The cast to uint64_t is intentionally before the negation
+        // so that we do not attempt to negate -2^63.
+        return AddStr("-", 1)
+            && AddNum(- static_cast<uint64_t>(item.u_.snum), 10);
+      } else {
+        return AddNum(static_cast<uint64_t>(item.u_.snum), 10);
+      }
+    case LogItem::kPtr:
+      return AddStr("0x", 2)
+          && AddNum(reinterpret_cast<uintptr_t>(item.u_.ptr), 16);
+    default:
+      return false;
+  }
 }
 
+bool Logger::AddStr(const char* str, int n) {
+  if (end_ - p_ < n) {
+    return false;
+  } else {
+    memcpy(p_, str, n);
+    p_ += n;
+    return true;
+  }
+}
 
-void TCMalloc_CrashReporter::PrintfAndDie(const char* format, ...) {
-  va_list ap;
-  va_start(ap, format);
-  TCMalloc_CRASH_internal(dump_stats_, file_, line_, format, ap);
-  va_end(ap);
+bool Logger::AddNum(uint64_t num, int base) {
+  static const char kDigits[] = "0123456789abcdef";
+  char space[22];  // more than enough for 2^64 in smallest supported base (10)
+  char* end = space + sizeof(space);
+  char* pos = end;
+  do {
+    pos--;
+    *pos = kDigits[num % base];
+    num /= base;
+  } while (num > 0 && pos > space);
+  return AddStr(pos, end - pos);
 }
 
+}  // end tcmalloc namespace
+
 void TCMalloc_Printer::printf(const char* format, ...) {
   if (left_ > 0) {
     va_list ap;
diff --git a/third_party/tcmalloc/vendor/src/internal_logging.h b/third_party/tcmalloc/vendor/src/internal_logging.h
index ce4a516..0267034 100644
--- a/third_party/tcmalloc/vendor/src/internal_logging.h
+++ b/third_party/tcmalloc/vendor/src/internal_logging.h
@@ -37,77 +37,79 @@
 
 #include <config.h>
 #include <stddef.h>                     // for size_t
+#if defined HAVE_STDINT_H
+#include <stdint.h>
+#elif defined HAVE_INTTYPES_H
+#include <inttypes.h>
+#else
+#include <sys/types.h>
+#endif
 
 //-------------------------------------------------------------------
 // Utility routines
 //-------------------------------------------------------------------
 
-// Safe debugging routine: we write directly to the stderr file
+// Safe logging helper: we write directly to the stderr file
 // descriptor and avoid FILE buffering because that may invoke
-// malloc()
-extern void TCMalloc_MESSAGE(const char* filename,
-                             int line_number,
-                             const char* format, ...)
-#ifdef HAVE___ATTRIBUTE__
-  __attribute__ ((__format__ (__printf__, 3, 4)))
-#endif
-;
+// malloc().
+//
+// Example:
+//   Log(kLog, __FILE__, __LINE__, "error", bytes);
+
+namespace tcmalloc {
+enum LogMode {
+  kLog,                       // Just print the message
+  kCrash,                     // Print the message and crash
+  kCrashWithStats             // Print the message, some stats, and crash
+};
 
-// Right now, the only non-fatal messages we want to report are when
-// an allocation fails (we'll return NULL eventually, but sometimes
-// want more prominent notice to help debug).  message should be
-// a literal string with no %<whatever> format directives.
-#ifdef TCMALLOC_WARNINGS
-#define MESSAGE(message, num_bytes)                                     \
-   TCMalloc_MESSAGE(__FILE__, __LINE__, message " (%"PRIuS" bytes)\n",  \
-                    static_cast<size_t>(num_bytes))
-#else
-#define MESSAGE(message, num_bytes)
-#endif
+class Logger;
 
-// Dumps the specified message and then calls abort().  If
-// "dump_stats" is specified, the first call will also dump the
-// tcmalloc stats.
-extern void TCMalloc_CRASH(bool dump_stats,
-                           const char* filename,
-                           int line_number,
-                           const char* format, ...)
-#ifdef HAVE___ATTRIBUTE__
-  __attribute__ ((__format__ (__printf__, 4, 5)))
-#endif
-;
-
-// This is a class that makes using the macro easier.  With this class,
-// CRASH("%d", i) expands to TCMalloc_CrashReporter.PrintfAndDie("%d", i).
-class PERFTOOLS_DLL_DECL TCMalloc_CrashReporter {
+// A LogItem holds any of the argument types that can be passed to Log()
+class LogItem {
  public:
-  TCMalloc_CrashReporter(bool dump_stats, const char* file, int line)
-      : dump_stats_(dump_stats), file_(file), line_(line) {
-  }
-  void PrintfAndDie(const char* format, ...)
-#ifdef HAVE___ATTRIBUTE__
-      __attribute__ ((__format__ (__printf__, 2, 3)))  // 2,3 due to "this"
-#endif
-;
-
+  LogItem()                     : tag_(kEnd)      { }
+  LogItem(const char* v)        : tag_(kStr)      { u_.str = v; }
+  LogItem(int v)                : tag_(kSigned)   { u_.snum = v; }
+  LogItem(long v)               : tag_(kSigned)   { u_.snum = v; }
+  LogItem(long long v)          : tag_(kSigned)   { u_.snum = v; }
+  LogItem(unsigned int v)       : tag_(kUnsigned) { u_.unum = v; }
+  LogItem(unsigned long v)      : tag_(kUnsigned) { u_.unum = v; }
+  LogItem(unsigned long long v) : tag_(kUnsigned) { u_.unum = v; }
+  LogItem(const void* v)        : tag_(kPtr)      { u_.ptr = v; }
  private:
-  bool dump_stats_;
-  const char* file_;
-  int line_;
+  friend class Logger;
+  enum Tag {
+    kStr,
+    kSigned,
+    kUnsigned,
+    kPtr,
+    kEnd
+  };
+  Tag tag_;
+  union {
+    const char* str;
+    const void* ptr;
+    int64_t snum;
+    uint64_t unum;
+  } u_;
 };
 
-#define CRASH \
-  TCMalloc_CrashReporter(false, __FILE__, __LINE__).PrintfAndDie
+extern PERFTOOLS_DLL_DECL void Log(LogMode mode, const char* filename, int line,
+                LogItem a, LogItem b = LogItem(),
+                LogItem c = LogItem(), LogItem d = LogItem());
+
+// Tests can override this function to collect logging messages.
+extern PERFTOOLS_DLL_DECL void (*log_message_writer)(const char* msg, int length);
 
-#define CRASH_WITH_STATS \
-  TCMalloc_CrashReporter(true, __FILE__, __LINE__).PrintfAndDie
+}  // end tcmalloc namespace
 
 // Like assert(), but executed even in NDEBUG mode
 #undef CHECK_CONDITION
 #define CHECK_CONDITION(cond)                                            \
 do {                                                                     \
   if (!(cond)) {                                                         \
-    CRASH("assertion failed: %s\n", #cond);   \
+    ::tcmalloc::Log(::tcmalloc::kCrash, __FILE__, __LINE__, #cond);      \
   }                                                                      \
 } while (0)
 
diff --git a/third_party/tcmalloc/vendor/src/libc_override.h b/third_party/tcmalloc/vendor/src/libc_override.h
index c198e6a..666d14d 100644
--- a/third_party/tcmalloc/vendor/src/libc_override.h
+++ b/third_party/tcmalloc/vendor/src/libc_override.h
@@ -55,7 +55,7 @@
 #ifdef HAVE_FEATURES_H
 #include <features.h>   // for __GLIBC__
 #endif
-#include <google/tcmalloc.h>
+#include <gperftools/tcmalloc.h>
 
 static void ReplaceSystemAlloc();  // defined in the .h files below
 
diff --git a/third_party/tcmalloc/vendor/src/libc_override_gcc_and_weak.h b/third_party/tcmalloc/vendor/src/libc_override_gcc_and_weak.h
index 3d801d1..070ebf7 100644
--- a/third_party/tcmalloc/vendor/src/libc_override_gcc_and_weak.h
+++ b/third_party/tcmalloc/vendor/src/libc_override_gcc_and_weak.h
@@ -41,7 +41,7 @@
 #ifdef HAVE_SYS_CDEFS_H
 #include <sys/cdefs.h>    // for __THROW
 #endif
-#include <google/tcmalloc.h>
+#include <gperftools/tcmalloc.h>
 
 #ifndef __THROW    // I guess we're not on a glibc-like system
 # define __THROW   // __THROW is just an optimization, so ok to make it ""
diff --git a/third_party/tcmalloc/vendor/src/libc_override_glibc.h b/third_party/tcmalloc/vendor/src/libc_override_glibc.h
index 4490a7a..16badcc 100644
--- a/third_party/tcmalloc/vendor/src/libc_override_glibc.h
+++ b/third_party/tcmalloc/vendor/src/libc_override_glibc.h
@@ -40,7 +40,7 @@
 #ifdef HAVE_SYS_CDEFS_H
 #include <sys/cdefs.h>    // for __THROW
 #endif
-#include <google/tcmalloc.h>
+#include <gperftools/tcmalloc.h>
 
 #ifndef __GLIBC__
 # error libc_override_glibc.h is for glibc distributions only.
diff --git a/third_party/tcmalloc/vendor/src/libc_override_osx.h b/third_party/tcmalloc/vendor/src/libc_override_osx.h
index 0ccf9a3..78a0ef2 100644
--- a/third_party/tcmalloc/vendor/src/libc_override_osx.h
+++ b/third_party/tcmalloc/vendor/src/libc_override_osx.h
@@ -75,7 +75,7 @@
 #ifdef HAVE_FEATURES_H
 #include <features.h>
 #endif
-#include <google/tcmalloc.h>
+#include <gperftools/tcmalloc.h>
 
 #if !defined(__APPLE__)
 # error libc_override_glibc-osx.h is for OS X distributions only.
@@ -84,6 +84,19 @@
 #include <AvailabilityMacros.h>
 #include <malloc/malloc.h>
 
+// from AvailabilityMacros.h
+#if defined(MAC_OS_X_VERSION_10_6) && \
+    MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_6
+extern "C" {
+  // This function is only available on 10.6 (and later) but the
+  // LibSystem headers do not use AvailabilityMacros.h to handle weak
+  // importing automatically.  This prototype is a copy of the one in
+  // <malloc/malloc.h> with the WEAK_IMPORT_ATTRBIUTE added.
+  extern malloc_zone_t *malloc_default_purgeable_zone(void)
+      WEAK_IMPORT_ATTRIBUTE;
+}
+#endif
+
 // We need to provide wrappers around all the libc functions.
 namespace {
 size_t mz_size(malloc_zone_t* zone, const void* ptr) {
@@ -235,8 +248,13 @@ static void ReplaceSystemAlloc() {
   // doing tiny and small allocs.  Sadly, it assumes that the default
   // zone is the szone implementation from OS X and will crash if it
   // isn't.  By creating the zone now, this will be true and changing
-  // the default zone won't cause a problem.  (OS X 10.6 and higher.)
-  malloc_default_purgeable_zone();
+  // the default zone won't cause a problem.  This only needs to
+  // happen when actually running on OS X 10.6 and higher (note the
+  // ifdef above only checks if we were *compiled* with 10.6 or
+  // higher; at runtime we have to check if this symbol is defined.)
+  if (malloc_default_purgeable_zone) {
+    malloc_default_purgeable_zone();
+  }
 #endif
 
   // Register the tcmalloc zone. At this point, it will not be the
diff --git a/third_party/tcmalloc/vendor/src/malloc_extension.cc b/third_party/tcmalloc/vendor/src/malloc_extension.cc
index bf946e6..2d6497f 100644
--- a/third_party/tcmalloc/vendor/src/malloc_extension.cc
+++ b/third_party/tcmalloc/vendor/src/malloc_extension.cc
@@ -45,10 +45,10 @@
 #include "base/dynamic_annotations.h"
 #include "base/sysinfo.h"    // for FillProcSelfMaps
 #ifndef NO_HEAP_CHECK
-#include "google/heap-checker.h"
+#include "gperftools/heap-checker.h"
 #endif
-#include "google/malloc_extension.h"
-#include "google/malloc_extension_c.h"
+#include "gperftools/malloc_extension.h"
+#include "gperftools/malloc_extension_c.h"
 #include "maybe_threads.h"
 
 using STL_NAMESPACE::string;
@@ -108,9 +108,9 @@ SysAllocator::~SysAllocator() {}
 // Default implementation -- does nothing
 MallocExtension::~MallocExtension() { }
 bool MallocExtension::VerifyAllMemory() { return true; }
-bool MallocExtension::VerifyNewMemory(void* p) { return true; }
-bool MallocExtension::VerifyArrayNewMemory(void* p) { return true; }
-bool MallocExtension::VerifyMallocMemory(void* p) { return true; }
+bool MallocExtension::VerifyNewMemory(const void* p) { return true; }
+bool MallocExtension::VerifyArrayNewMemory(const void* p) { return true; }
+bool MallocExtension::VerifyMallocMemory(const void* p) { return true; }
 
 bool MallocExtension::GetNumericProperty(const char* property, size_t* value) {
   return false;
@@ -177,7 +177,7 @@ size_t MallocExtension::GetEstimatedAllocatedSize(size_t size) {
   return size;
 }
 
-size_t MallocExtension::GetAllocatedSize(void* p) {
+size_t MallocExtension::GetAllocatedSize(const void* p) {
   assert(GetOwnership(p) != kNotOwned);
   return 0;
 }
@@ -343,9 +343,9 @@ void MallocExtension::Ranges(void* arg, RangeFunction func) {
   }
 
 C_SHIM(VerifyAllMemory, int, (void), ());
-C_SHIM(VerifyNewMemory, int, (void* p), (p));
-C_SHIM(VerifyArrayNewMemory, int, (void* p), (p));
-C_SHIM(VerifyMallocMemory, int, (void* p), (p));
+C_SHIM(VerifyNewMemory, int, (const void* p), (p));
+C_SHIM(VerifyArrayNewMemory, int, (const void* p), (p));
+C_SHIM(VerifyMallocMemory, int, (const void* p), (p));
 C_SHIM(MallocMemoryStats, int,
        (int* blocks, size_t* total, int histogram[kMallocHistogramSize]),
        (blocks, total, histogram));
@@ -362,7 +362,7 @@ C_SHIM(MarkThreadBusy, void, (void), ());
 C_SHIM(ReleaseFreeMemory, void, (void), ());
 C_SHIM(ReleaseToSystem, void, (size_t num_bytes), (num_bytes));
 C_SHIM(GetEstimatedAllocatedSize, size_t, (size_t size), (size));
-C_SHIM(GetAllocatedSize, size_t, (void* p), (p));
+C_SHIM(GetAllocatedSize, size_t, (const void* p), (p));
 
 // Can't use the shim here because of the need to translate the enums.
 extern "C"
diff --git a/third_party/tcmalloc/vendor/src/malloc_hook-inl.h b/third_party/tcmalloc/vendor/src/malloc_hook-inl.h
index 6210784..27e5bdc 100644
--- a/third_party/tcmalloc/vendor/src/malloc_hook-inl.h
+++ b/third_party/tcmalloc/vendor/src/malloc_hook-inl.h
@@ -41,7 +41,7 @@
 #include <sys/types.h>
 #include "base/atomicops.h"
 #include "base/basictypes.h"
-#include <google/malloc_hook.h>
+#include <gperftools/malloc_hook.h>
 
 namespace base { namespace internal {
 
diff --git a/third_party/tcmalloc/vendor/src/malloc_hook.cc b/third_party/tcmalloc/vendor/src/malloc_hook.cc
index dc4539c..2f8608e 100644
--- a/third_party/tcmalloc/vendor/src/malloc_hook.cc
+++ b/third_party/tcmalloc/vendor/src/malloc_hook.cc
@@ -46,12 +46,11 @@
 #include <stdint.h>
 #endif
 #include <algorithm>
-#include "base/basictypes.h"
 #include "base/logging.h"
 #include "base/spinlock.h"
 #include "maybe_threads.h"
 #include "malloc_hook-inl.h"
-#include <google/malloc_hook.h>
+#include <gperftools/malloc_hook.h>
 
 // This #ifdef should almost never be set.  Set NO_TCMALLOC_SAMPLES if
 // you're porting to a system where you really can't get a stacktrace.
@@ -59,7 +58,7 @@
   // We use #define so code compiles even if you #include stacktrace.h somehow.
 # define GetStackTrace(stack, depth, skip)  (0)
 #else
-# include <google/stacktrace.h>
+# include <gperftools/stacktrace.h>
 #endif
 
 // __THROW is defined in glibc systems.  It means, counter-intuitively,
@@ -204,14 +203,8 @@ static SpinLock hooklist_spinlock(base::LINKER_INITIALIZED);
 
 template <typename T>
 bool HookList<T>::Add(T value_as_t) {
-  // Note: we need to check this _before_ reinterpret_cast, since
-  // reinterpret_cast may include random junk from memory.
-  if (value_as_t == 0) {
-    return false;
-  }
-  AtomicWord value = reinterpret_cast<const AtomicWord&>(value_as_t);
+  AtomicWord value = bit_cast<AtomicWord>(value_as_t);
   if (value == 0) {
-    // This should not actually happen, but just to be sure...
     return false;
   }
   SpinLockHolder l(&hooklist_spinlock);
@@ -240,8 +233,7 @@ bool HookList<T>::Remove(T value_as_t) {
   SpinLockHolder l(&hooklist_spinlock);
   AtomicWord hooks_end = base::subtle::Acquire_Load(&priv_end);
   int index = 0;
-  // Note: we need to cast back to T since T may be smaller than AtomicWord.
-  while (index < hooks_end && value_as_t != reinterpret_cast<T>(
+  while (index < hooks_end && value_as_t != bit_cast<T>(
              base::subtle::Acquire_Load(&priv_data[index]))) {
     ++index;
   }
@@ -268,7 +260,7 @@ int HookList<T>::Traverse(T* output_array, int n) const {
   for (int i = 0; i < hooks_end && n > 0; ++i) {
     AtomicWord data = base::subtle::Acquire_Load(&priv_data[i]);
     if (data != 0) {
-      *output_array++ = reinterpret_cast<const T&>(data);
+      *output_array++ = bit_cast<T>(data);
       ++actual_hooks_end;
       --n;
     }
@@ -705,9 +697,7 @@ extern "C" int MallocHook_GetCallerStackTrace(void** result, int max_depth,
 #if defined(__linux)
 # include "malloc_hook_mmap_linux.h"
 
-// This code doesn't even compile on my freebsd 8.1 (x86_64) system,
-// so comment it out for now.  TODO(csilvers): fix this!
-#elif 0 && defined(__FreeBSD__)
+#elif defined(__FreeBSD__)
 # include "malloc_hook_mmap_freebsd.h"
 
 #else
diff --git a/third_party/tcmalloc/vendor/src/malloc_hook_mmap_freebsd.h b/third_party/tcmalloc/vendor/src/malloc_hook_mmap_freebsd.h
index 4ac2bb3..dae868c 100644
--- a/third_party/tcmalloc/vendor/src/malloc_hook_mmap_freebsd.h
+++ b/third_party/tcmalloc/vendor/src/malloc_hook_mmap_freebsd.h
@@ -40,15 +40,21 @@
 #include <sys/mman.h>
 #include <errno.h>
 
-static inline void* do_mmap(void *start, size_t length,
-                            int prot, int flags,
-                            int fd, off_t offset) __THROW {
-  return (void *)syscall(SYS_mmap, start, length, prot, flags, fd, offset);
-}
-
 // Make sure mmap doesn't get #define'd away by <sys/mman.h>
 #undef mmap
 
+// According to the FreeBSD documentation, use syscall if you do not
+// need 64-bit alignment otherwise use __syscall. Indeed, syscall
+// doesn't work correctly in most situations on 64-bit. It's return
+// type is 'int' so for things like SYS_mmap, it actually truncates
+// the returned address to 32-bits.
+#if defined(__amd64__) || defined(__x86_64__)
+# define MALLOC_HOOK_SYSCALL __syscall
+#else
+# define MALLOC_HOOK_SYSCALL syscall
+#endif
+
+
 extern "C" {
   void* mmap(void *start, size_t length,int prot, int flags,
              int fd, off_t offset) __THROW
@@ -59,20 +65,47 @@ extern "C" {
     ATTRIBUTE_SECTION(malloc_hook);
 }
 
+static inline void* do_mmap(void *start, size_t length,
+                            int prot, int flags,
+                            int fd, off_t offset) __THROW {
+  return (void *)MALLOC_HOOK_SYSCALL(SYS_mmap,
+                                     start, length, prot, flags, fd, offset);
+}
+
 static inline void* do_sbrk(intptr_t increment) {
-  void* curbrk;
+  void* curbrk = 0;
 
+#if defined(__x86_64__) || defined(__amd64__)
+# ifdef PIC
+  __asm__ __volatile__(
+      "movq .curbrk@GOTPCREL(%%rip), %%rdx;"
+      "movq (%%rdx), %%rax;"
+      "movq %%rax, %0;"
+      : "=r" (curbrk)
+      :: "%rdx", "%rax");
+# else
+  __asm__ __volatile__(
+      "movq .curbrk(%%rip), %%rax;"
+      "movq %%rax, %0;"
+      : "=r" (curbrk)
+      :: "%rax");
+# endif
+#else
   __asm__ __volatile__(
       "movl .curbrk, %%eax;"
-      "movl %%eax, %0"
+      "movl %%eax, %0;"
       : "=r" (curbrk)
       :: "%eax");
+#endif
+
+  if (increment == 0) {
+    return curbrk;
+  }
 
   char* prevbrk = static_cast<char*>(curbrk);
   void* newbrk = prevbrk + increment;
 
   if (brk(newbrk) == -1) {
-    assert(0);
     return reinterpret_cast<void*>(static_cast<intptr_t>(-1));
   }
 
@@ -83,15 +116,24 @@ static inline void* do_sbrk(intptr_t increment) {
 extern "C" void* mmap(void *start, size_t length, int prot, int flags,
                       int fd, off_t offset) __THROW {
   MallocHook::InvokePreMmapHook(start, length, prot, flags, fd, offset);
-  void *result = do_mmap(start, length, prot, flags, fd,
-                         static_cast<size_t>(offset)); // avoid sign extension
+  void *result;
+  if (!MallocHook::InvokeMmapReplacement(
+          start, length, prot, flags, fd, offset, &result)) {
+    result = do_mmap(start, length, prot, flags, fd,
+                       static_cast<size_t>(offset)); // avoid sign extension
+  }
   MallocHook::InvokeMmapHook(result, start, length, prot, flags, fd, offset);
   return result;
 }
 
 extern "C" int munmap(void* start, size_t length) __THROW {
   MallocHook::InvokeMunmapHook(start, length);
-  return syscall(SYS_munmap, start, length);
+  int result;
+  if (!MallocHook::InvokeMunmapReplacement(start, length, &result)) {
+    result = MALLOC_HOOK_SYSCALL(SYS_munmap, start, length);
+  }
+
+  return result;
 }
 
 extern "C" void* sbrk(intptr_t increment) __THROW {
@@ -103,9 +145,21 @@ extern "C" void* sbrk(intptr_t increment) __THROW {
 
 /*static*/void* MallocHook::UnhookedMMap(void *start, size_t length, int prot,
                                          int flags, int fd, off_t offset) {
-  return mmap(start, length, prot, flags, fd, offset);
+  void* result;
+  if (!MallocHook::InvokeMmapReplacement(
+	  start, length, prot, flags, fd, offset, &result)) {
+    result = do_mmap(start, length, prot, flags, fd, offset);
+  }
+
+  return result;
 }
 
 /*static*/int MallocHook::UnhookedMUnmap(void *start, size_t length) {
-  return munmap(start, length);
+  int result;
+  if (!MallocHook::InvokeMunmapReplacement(start, length, &result)) {
+    result = MALLOC_HOOK_SYSCALL(SYS_munmap, start, length);
+  }
+  return result;
 }
+
+#undef MALLOC_HOOK_SYSCALL
diff --git a/third_party/tcmalloc/vendor/src/malloc_hook_mmap_linux.h b/third_party/tcmalloc/vendor/src/malloc_hook_mmap_linux.h
index 0026589..dc79f8b 100644
--- a/third_party/tcmalloc/vendor/src/malloc_hook_mmap_linux.h
+++ b/third_party/tcmalloc/vendor/src/malloc_hook_mmap_linux.h
@@ -48,7 +48,21 @@
 // The x86-32 case and the x86-64 case differ:
 // 32b has a mmap2() syscall, 64b does not.
 // 64b and 32b have different calling conventions for mmap().
-#if defined(__i386__) || defined(__PPC__)
+
+// I test for 64-bit first so I don't have to do things like
+// '#if (defined(__mips__) && !defined(__MIPS64__))' as a mips32 check.
+#if defined(__x86_64__) || defined(__PPC64__) || (defined(_MIPS_SIM) && _MIPS_SIM == _ABI64)
+
+static inline void* do_mmap64(void *start, size_t length,
+                              int prot, int flags,
+                              int fd, __off64_t offset) __THROW {
+  return sys_mmap(start, length, prot, flags, fd, offset);
+}
+
+#define MALLOC_HOOK_HAVE_DO_MMAP64 1
+
+#elif defined(__i386__) || defined(__PPC__) || defined(__mips__) || \
+      defined(__arm__)
 
 static inline void* do_mmap64(void *start, size_t length,
                               int prot, int flags,
@@ -85,29 +99,26 @@ static inline void* do_mmap64(void *start, size_t length,
     goto out;
   }
 
+#ifdef __NR_mmap
   {
     // Fall back to old 32-bit offset mmap() call
     // Old syscall interface cannot handle six args, so pass in an array
-    int32 args[6] = { (int32) start, length, prot, flags, fd, (off_t) offset };
+    int32 args[6] = { (int32) start, (int32) length, prot, flags, fd,
+                      (off_t) offset };
     result = (void *)syscall(SYS_mmap, args);
   }
+#else
+  // Some Linux ports like ARM EABI Linux has no mmap, just mmap2.
+  result = MAP_FAILED;
+#endif
+
  out:
   return result;
 }
 
 #define MALLOC_HOOK_HAVE_DO_MMAP64 1
 
-#elif defined(__x86_64__) || defined(__PPC64__)  // #if defined(__i386__) || ...
-
-static inline void* do_mmap64(void *start, size_t length,
-                              int prot, int flags,
-                              int fd, __off64_t offset) __THROW {
-  return (void *)syscall(SYS_mmap, start, length, prot, flags, fd, offset);
-}
-
-#define MALLOC_HOOK_HAVE_DO_MMAP64 1
-
-#endif  // #if defined(__i386__) || defined(__PPC__)
+#endif  // #if defined(__x86_64__)
 
 
 #ifdef MALLOC_HOOK_HAVE_DO_MMAP64
diff --git a/third_party/tcmalloc/vendor/src/memfs_malloc.cc b/third_party/tcmalloc/vendor/src/memfs_malloc.cc
index 0bb27d7..b59f6d9 100644
--- a/third_party/tcmalloc/vendor/src/memfs_malloc.cc
+++ b/third_party/tcmalloc/vendor/src/memfs_malloc.cc
@@ -54,12 +54,16 @@
 #include <new>                          // for operator new
 #include <string>
 
-#include <google/malloc_extension.h>
+#include <gperftools/malloc_extension.h>
 #include "base/basictypes.h"
 #include "base/googleinit.h"
 #include "base/sysinfo.h"
 #include "internal_logging.h"
 
+// TODO(sanjay): Move the code below into the tcmalloc namespace
+using tcmalloc::kLog;
+using tcmalloc::kCrash;
+using tcmalloc::Log;
 using std::string;
 
 DEFINE_string(memfs_malloc_path, EnvToString("TCMALLOC_MEMFS_MALLOC_PATH", ""),
@@ -135,11 +139,11 @@ void* HugetlbSysAllocator::Alloc(size_t size, size_t *actual_size,
   if (result != NULL) {
     return result;
   }
-  TCMalloc_MESSAGE(__FILE__, __LINE__,
-                   "HugetlbSysAllocator: failed_=%d allocated=%"PRId64"\n",
-                   failed_, static_cast<int64_t>(hugetlb_base_));
+  Log(kLog, __FILE__, __LINE__,
+      "HugetlbSysAllocator: (failed, allocated)", failed_, hugetlb_base_);
   if (FLAGS_memfs_malloc_abort_on_fail) {
-    CRASH("memfs_malloc_abort_on_fail is set\n");
+    Log(kCrash, __FILE__, __LINE__,
+        "memfs_malloc_abort_on_fail is set");
   }
   return fallback_->Alloc(size, actual_size, alignment);
 }
@@ -157,13 +161,12 @@ void* HugetlbSysAllocator::AllocInternal(size_t size, size_t* actual_size,
   if (limit > 0 && hugetlb_base_ + size + extra > limit) {
     // Disable the allocator when there's less than one page left.
     if (limit - hugetlb_base_ < big_page_size_) {
-      TCMalloc_MESSAGE(__FILE__, __LINE__, "reached memfs_malloc_limit_mb\n");
+      Log(kLog, __FILE__, __LINE__, "reached memfs_malloc_limit_mb");
       failed_ = true;
     }
     else {
-      TCMalloc_MESSAGE(__FILE__, __LINE__, "alloc size=%"PRIuS
-                       " too large while %"PRId64" bytes remain\n",
-                       size, static_cast<int64_t>(limit - hugetlb_base_));
+      Log(kLog, __FILE__, __LINE__,
+          "alloc too large (size, bytes left)", size, limit-hugetlb_base_);
     }
     return NULL;
   }
@@ -172,8 +175,8 @@ void* HugetlbSysAllocator::AllocInternal(size_t size, size_t* actual_size,
   // hugetlbfs returns EINVAL for ftruncate.
   int ret = ftruncate(hugetlb_fd_, hugetlb_base_ + size + extra);
   if (ret != 0 && errno != EINVAL) {
-    TCMalloc_MESSAGE(__FILE__, __LINE__, "ftruncate failed: %s\n",
-                     strerror(errno));
+    Log(kLog, __FILE__, __LINE__,
+        "ftruncate failed", strerror(errno));
     failed_ = true;
     return NULL;
   }
@@ -188,8 +191,8 @@ void* HugetlbSysAllocator::AllocInternal(size_t size, size_t* actual_size,
                 hugetlb_fd_, hugetlb_base_);
   if (result == reinterpret_cast<void*>(MAP_FAILED)) {
     if (!FLAGS_memfs_malloc_ignore_mmap_fail) {
-      TCMalloc_MESSAGE(__FILE__, __LINE__, "mmap of size %"PRIuS" failed: %s\n",
-                       size + extra, strerror(errno));
+      Log(kLog, __FILE__, __LINE__,
+          "mmap failed (size, error)", size + extra, strerror(errno));
       failed_ = true;
     }
     return NULL;
@@ -213,33 +216,34 @@ void* HugetlbSysAllocator::AllocInternal(size_t size, size_t* actual_size,
 
 bool HugetlbSysAllocator::Initialize() {
   char path[PATH_MAX];
-  int rc = snprintf(path, sizeof(path), "%s.XXXXXX",
-                    FLAGS_memfs_malloc_path.c_str());
-  if (rc < 0 || rc >= sizeof(path)) {
-    CRASH("XX fatal: memfs_malloc_path too long\n");
+  const int pathlen = FLAGS_memfs_malloc_path.size();
+  if (pathlen + 8 > sizeof(path)) {
+    Log(kCrash, __FILE__, __LINE__, "XX fatal: memfs_malloc_path too long");
     return false;
   }
+  memcpy(path, FLAGS_memfs_malloc_path.data(), pathlen);
+  memcpy(path + pathlen, ".XXXXXX", 8);  // Also copies terminating \0
 
   int hugetlb_fd = mkstemp(path);
   if (hugetlb_fd == -1) {
-    TCMalloc_MESSAGE(__FILE__, __LINE__,
-                     "warning: unable to create memfs_malloc_path %s: %s\n",
-                     path, strerror(errno));
+    Log(kLog, __FILE__, __LINE__,
+        "warning: unable to create memfs_malloc_path",
+        path, strerror(errno));
     return false;
   }
 
   // Cleanup memory on process exit
   if (unlink(path) == -1) {
-    CRASH("fatal: error unlinking memfs_malloc_path %s: %s\n",
-          path, strerror(errno));
+    Log(kCrash, __FILE__, __LINE__,
+        "fatal: error unlinking memfs_malloc_path", path, strerror(errno));
     return false;
   }
 
   // Use fstatfs to figure out the default page size for memfs
   struct statfs sfs;
   if (fstatfs(hugetlb_fd, &sfs) == -1) {
-    CRASH("fatal: error fstatfs of memfs_malloc_path: %s\n",
-          strerror(errno));
+    Log(kCrash, __FILE__, __LINE__,
+        "fatal: error fstatfs of memfs_malloc_path", strerror(errno));
     return false;
   }
   int64 page_size = sfs.f_bsize;
diff --git a/third_party/tcmalloc/vendor/src/memory_region_map.cc b/third_party/tcmalloc/vendor/src/memory_region_map.cc
index 3bf8983..1a81172 100644
--- a/third_party/tcmalloc/vendor/src/memory_region_map.cc
+++ b/third_party/tcmalloc/vendor/src/memory_region_map.cc
@@ -122,8 +122,8 @@
 #include "base/low_level_alloc.h"
 #include "malloc_hook-inl.h"
 
-#include <google/stacktrace.h>
-#include <google/malloc_hook.h>
+#include <gperftools/stacktrace.h>
+#include <gperftools/malloc_hook.h>
 
 // MREMAP_FIXED is a linux extension.  How it's used in this file,
 // setting it to 0 is equivalent to saying, "This feature isn't
@@ -145,6 +145,8 @@ SpinLock MemoryRegionMap::owner_lock_(  // ACQUIRED_AFTER(lock_)
     SpinLock::LINKER_INITIALIZED);
 int MemoryRegionMap::recursion_count_ = 0;  // GUARDED_BY(owner_lock_)
 pthread_t MemoryRegionMap::lock_owner_tid_;  // GUARDED_BY(owner_lock_)
+int64 MemoryRegionMap::map_size_ = 0;
+int64 MemoryRegionMap::unmap_size_ = 0;
 
 // ========================================================================= //
 
@@ -462,6 +464,7 @@ void MemoryRegionMap::RecordRegionAddition(const void* start, size_t size) {
               reinterpret_cast<void*>(region.caller()));
   // Note: none of the above allocates memory.
   Lock();  // recursively lock
+  map_size_ += size;
   InsertRegionLocked(region);
     // This will (eventually) allocate storage for and copy over the stack data
     // from region.call_stack_data_ that is pointed by region.call_stack().
@@ -573,6 +576,7 @@ void MemoryRegionMap::RecordRegionRemoval(const void* start, size_t size) {
               reinterpret_cast<void*>(end_addr),
               regions_->size());
   if (VLOG_IS_ON(12))  LogAllLocked();
+  unmap_size_ += size;
   Unlock();
 }
 
diff --git a/third_party/tcmalloc/vendor/src/memory_region_map.h b/third_party/tcmalloc/vendor/src/memory_region_map.h
index 739514c..988ea70 100644
--- a/third_party/tcmalloc/vendor/src/memory_region_map.h
+++ b/third_party/tcmalloc/vendor/src/memory_region_map.h
@@ -252,6 +252,10 @@ class MemoryRegionMap {
   static RegionIterator BeginRegionLocked();
   static RegionIterator EndRegionLocked();
 
+  // Return the accumulated sizes of mapped and unmapped regions.
+  static int64 MapSize() { return map_size_; }
+  static int64 UnmapSize() { return unmap_size_; }
+
   // Effectively private type from our .cc =================================
   // public to let us declare global objects:
   union RegionSetRep;
@@ -286,6 +290,11 @@ class MemoryRegionMap {
   // The thread id of the thread that's inside the recursive lock.
   static pthread_t lock_owner_tid_;
 
+  // Total size of all mapped pages so far
+  static int64 map_size_;
+  // Total size of all unmapped pages so far
+  static int64 unmap_size_;
+
   // helpers ==================================================================
 
   // Helper for FindRegion and FindAndMarkStackRegion:
diff --git a/third_party/tcmalloc/vendor/src/page_heap.cc b/third_party/tcmalloc/vendor/src/page_heap.cc
index 248e462..18bdb94 100644
--- a/third_party/tcmalloc/vendor/src/page_heap.cc
+++ b/third_party/tcmalloc/vendor/src/page_heap.cc
@@ -34,7 +34,7 @@
 #ifdef HAVE_INTTYPES_H
 #include <inttypes.h>                   // for PRIuPTR
 #endif
-#include <google/malloc_extension.h>      // for MallocRange, etc
+#include <gperftools/malloc_extension.h>      // for MallocRange, etc
 #include "base/basictypes.h"
 #include "base/commandlineflags.h"
 #include "internal_logging.h"  // for ASSERT, TCMalloc_Printer, etc
@@ -342,103 +342,25 @@ void PageHeap::RegisterSizeClass(Span* span, size_t sc) {
   }
 }
 
-static double MiB(uint64_t bytes) {
-  return bytes / 1048576.0;
-}
-
-static double PagesToMiB(uint64_t pages) {
-  return (pages << kPageShift) / 1048576.0;
-}
-
-void PageHeap::GetClassSizes(int64 class_sizes_normal[kMaxPages],
-                             int64 class_sizes_returned[kMaxPages],
-                             int64* normal_pages_in_spans,
-                             int64* returned_pages_in_spans) {
-
+void PageHeap::GetSmallSpanStats(SmallSpanStats* result) {
   for (int s = 0; s < kMaxPages; s++) {
-    if (class_sizes_normal != NULL) {
-      class_sizes_normal[s] = DLL_Length(&free_[s].normal);
-    }
-    if (class_sizes_returned != NULL) {
-      class_sizes_returned[s] = DLL_Length(&free_[s].returned);
-    }
-  }
-
-  if (normal_pages_in_spans != NULL) {
-    *normal_pages_in_spans = 0;
-    for (Span* s = large_.normal.next; s != &large_.normal; s = s->next) {
-      *normal_pages_in_spans += s->length;;
-    }
-  }
-
-  if (returned_pages_in_spans != NULL) {
-    *returned_pages_in_spans = 0;
-    for (Span* s = large_.returned.next; s != &large_.returned; s = s->next) {
-      *returned_pages_in_spans += s->length;
-    }
+    result->normal_length[s] = DLL_Length(&free_[s].normal);
+    result->returned_length[s] = DLL_Length(&free_[s].returned);
   }
 }
 
-void PageHeap::Dump(TCMalloc_Printer* out) {
-  int nonempty_sizes = 0;
-  for (int s = 0; s < kMaxPages; s++) {
-    if (!DLL_IsEmpty(&free_[s].normal) || !DLL_IsEmpty(&free_[s].returned)) {
-      nonempty_sizes++;
-    }
-  }
-  out->printf("------------------------------------------------\n");
-  out->printf("PageHeap: %d sizes; %6.1f MiB free; %6.1f MiB unmapped\n",
-              nonempty_sizes, MiB(stats_.free_bytes),
-              MiB(stats_.unmapped_bytes));
-  out->printf("------------------------------------------------\n");
-  uint64_t total_normal = 0;
-  uint64_t total_returned = 0;
-  for (int s = 0; s < kMaxPages; s++) {
-    const int n_length = DLL_Length(&free_[s].normal);
-    const int r_length = DLL_Length(&free_[s].returned);
-    if (n_length + r_length > 0) {
-      uint64_t n_pages = s * n_length;
-      uint64_t r_pages = s * r_length;
-      total_normal += n_pages;
-      total_returned += r_pages;
-      out->printf("%6u pages * %6u spans ~ %6.1f MiB; %6.1f MiB cum"
-                  "; unmapped: %6.1f MiB; %6.1f MiB cum\n",
-                  s,
-                  (n_length + r_length),
-                  PagesToMiB(n_pages + r_pages),
-                  PagesToMiB(total_normal + total_returned),
-                  PagesToMiB(r_pages),
-                  PagesToMiB(total_returned));
-    }
-  }
-
-  uint64_t n_pages = 0;
-  uint64_t r_pages = 0;
-  int n_spans = 0;
-  int r_spans = 0;
-  out->printf("Normal large spans:\n");
+void PageHeap::GetLargeSpanStats(LargeSpanStats* result) {
+  result->spans = 0;
+  result->normal_pages = 0;
+  result->returned_pages = 0;
   for (Span* s = large_.normal.next; s != &large_.normal; s = s->next) {
-    out->printf("   [ %6" PRIuPTR " pages ] %6.1f MiB\n",
-                s->length, PagesToMiB(s->length));
-    n_pages += s->length;
-    n_spans++;
+    result->normal_pages += s->length;;
+    result->spans++;
   }
-  out->printf("Unmapped large spans:\n");
   for (Span* s = large_.returned.next; s != &large_.returned; s = s->next) {
-    out->printf("   [ %6" PRIuPTR " pages ] %6.1f MiB\n",
-                s->length, PagesToMiB(s->length));
-    r_pages += s->length;
-    r_spans++;
+    result->returned_pages += s->length;
+    result->spans++;
   }
-  total_normal += n_pages;
-  total_returned += r_pages;
-  out->printf(">255   large * %6u spans ~ %6.1f MiB; %6.1f MiB cum"
-              "; unmapped: %6.1f MiB; %6.1f MiB cum\n",
-              (n_spans + r_spans),
-              PagesToMiB(n_pages + r_pages),
-              PagesToMiB(total_normal + total_returned),
-              PagesToMiB(r_pages),
-              PagesToMiB(total_returned));
 }
 
 bool PageHeap::GetNextRange(PageID start, base::MallocRange* r) {
diff --git a/third_party/tcmalloc/vendor/src/page_heap.h b/third_party/tcmalloc/vendor/src/page_heap.h
index 603e65a..3718801 100644
--- a/third_party/tcmalloc/vendor/src/page_heap.h
+++ b/third_party/tcmalloc/vendor/src/page_heap.h
@@ -38,7 +38,7 @@
 #ifdef HAVE_STDINT_H
 #include <stdint.h>                     // for uint64_t, int64_t, uint16_t
 #endif
-#include <google/malloc_extension.h>
+#include <gperftools/malloc_extension.h>
 #include "base/basictypes.h"
 #include "common.h"
 #include "packed-cache-inl.h"
@@ -61,10 +61,9 @@
   // We use #define so code compiles even if you #include stacktrace.h somehow.
 # define GetStackTrace(stack, depth, skip)  (0)
 #else
-# include <google/stacktrace.h>
+# include <gperftools/stacktrace.h>
 #endif
 
-class TCMalloc_Printer;
 namespace base {
 struct MallocRange;
 }
@@ -137,9 +136,6 @@ class PERFTOOLS_DLL_DECL PageHeap {
     return reinterpret_cast<Span*>(pagemap_.get(p));
   }
 
-  // Dump state to stderr
-  void Dump(TCMalloc_Printer* out);
-
   // If this page heap is managing a range with starting page # >= start,
   // store info about the range in *r and return true.  Else return false.
   bool GetNextRange(PageID start, base::MallocRange* r);
@@ -152,10 +148,22 @@ class PERFTOOLS_DLL_DECL PageHeap {
     uint64_t unmapped_bytes;  // Total bytes on returned freelists
   };
   inline Stats stats() const { return stats_; }
-  void GetClassSizes(int64 class_sizes_normal[kMaxPages],
-                     int64 class_sizes_returned[kMaxPages],
-                     int64* normal_pages_in_spans,
-                     int64* returned_pages_in_spans);
+
+  struct SmallSpanStats {
+    // For each free list of small spans, the length (in spans) of the
+    // normal and returned free lists for that size.
+    int64 normal_length[kMaxPages];
+    int64 returned_length[kMaxPages];
+  };
+  void GetSmallSpanStats(SmallSpanStats* result);
+
+  // Stats for free large spans (i.e., spans with more than kMaxPages pages).
+  struct LargeSpanStats {
+    int64 spans;           // Number of such spans
+    int64 normal_pages;    // Combined page length of normal large spans
+    int64 returned_pages;  // Combined page length of unmapped spans
+  };
+  void GetLargeSpanStats(LargeSpanStats* result);
 
   bool Check();
   // Like Check() but does some more comprehensive checking.
diff --git a/third_party/tcmalloc/vendor/src/page_heap_allocator.h b/third_party/tcmalloc/vendor/src/page_heap_allocator.h
index bcff8b3..d835282 100644
--- a/third_party/tcmalloc/vendor/src/page_heap_allocator.h
+++ b/third_party/tcmalloc/vendor/src/page_heap_allocator.h
@@ -36,7 +36,7 @@
 #include <stddef.h>                     // for NULL, size_t
 
 #include "common.h"            // for MetaDataAlloc
-#include "internal_logging.h"  // for ASSERT, CRASH
+#include "internal_logging.h"  // for ASSERT
 
 namespace tcmalloc {
 
@@ -70,9 +70,10 @@ class PageHeapAllocator {
         // suitably aligned memory.
         free_area_ = reinterpret_cast<char*>(MetaDataAlloc(kAllocIncrement));
         if (free_area_ == NULL) {
-          CRASH("FATAL ERROR: Out of memory trying to allocate internal "
-                "tcmalloc data (%d bytes, object-size %d)\n",
-                kAllocIncrement, static_cast<int>(sizeof(T)));
+          Log(kCrash, __FILE__, __LINE__,
+              "FATAL ERROR: Out of memory trying to allocate internal "
+              "tcmalloc data (bytes, object-size)",
+              kAllocIncrement, sizeof(T));
         }
         free_avail_ = kAllocIncrement;
       }
diff --git a/third_party/tcmalloc/vendor/src/pprof b/third_party/tcmalloc/vendor/src/pprof
index 0ce5df36..727eb43 100644
--- a/third_party/tcmalloc/vendor/src/pprof
+++ b/third_party/tcmalloc/vendor/src/pprof
@@ -72,7 +72,7 @@ use strict;
 use warnings;
 use Getopt::Long;
 
-my $PPROF_VERSION = "1.8";
+my $PPROF_VERSION = "2.0";
 
 # These are the object tools we use which can come from a
 # user-specified location using --tools, from the PPROF_TOOLS
@@ -87,13 +87,14 @@ my %obj_tool_map = (
   #"addr2line_pdb" => "addr2line-pdb",                                # ditto
   #"otool" => "otool",         # equivalent of objdump on OS X
 );
-my $DOT = "dot";          # leave non-absolute, since it may be in /usr/local
-my $GV = "gv";
-my $EVINCE = "evince";    # could also be xpdf or perhaps acroread
-my $KCACHEGRIND = "kcachegrind";
-my $PS2PDF = "ps2pdf";
+# NOTE: these are lists, so you can put in commandline flags if you want.
+my @DOT = ("dot");          # leave non-absolute, since it may be in /usr/local
+my @GV = ("gv");
+my @EVINCE = ("evince");    # could also be xpdf or perhaps acroread
+my @KCACHEGRIND = ("kcachegrind");
+my @PS2PDF = ("ps2pdf");
 # These are used for dynamic profiles
-my $URL_FETCHER = "curl -s";
+my @URL_FETCHER = ("curl", "-s");
 
 # These are the web pages that servers need to support for dynamic profiles
 my $HEAP_PAGE = "/pprof/heap";
@@ -104,7 +105,10 @@ my $GROWTH_PAGE = "/pprof/growth";
 my $CONTENTION_PAGE = "/pprof/contention";
 my $WALL_PAGE = "/pprof/wall(?:\\?.*)?";  # accepts options like namefilter
 my $FILTEREDPROFILE_PAGE = "/pprof/filteredprofile(?:\\?.*)?";
-my $CENSUSPROFILE_PAGE = "/pprof/censusprofile";  # must support "?seconds=#"
+my $CENSUSPROFILE_PAGE = "/pprof/censusprofile(?:\\?.*)?"; # must support cgi-param
+                                                       # "?seconds=#",
+                                                       # "?tags_regexp=#" and
+                                                       # "?type=#".
 my $SYMBOL_PAGE = "/pprof/symbol";     # must support symbol lookup via POST
 my $PROGRAM_NAME_PAGE = "/pprof/cmdline";
 
@@ -168,7 +172,7 @@ pprof --symbols <program>
    For more help with querying remote servers, including how to add the
    necessary server-side support code, see this filename (or one like it):
 
-   /usr/doc/google-perftools-$PPROF_VERSION/pprof_remote_servers.html
+   /usr/doc/gperftools-$PPROF_VERSION/pprof_remote_servers.html
 
 Options:
    --cum               Sort by cumulative data
@@ -266,7 +270,7 @@ EOF
 
 sub version_string {
   return <<EOF
-pprof (part of google-perftools $PPROF_VERSION)
+pprof (part of gperftools $PPROF_VERSION)
 
 Copyright 1998-2007 Google Inc.
 
@@ -498,11 +502,13 @@ sub Init() {
   @main::pfile_args = ();
 
   # Remote profiling without a binary (using $SYMBOL_PAGE instead)
-  if (IsProfileURL($ARGV[0])) {
-    $main::use_symbol_page = 1;
-  } elsif (IsSymbolizedProfileFile($ARGV[0])) {
-    $main::use_symbolized_profile = 1;
-    $main::prog = $UNKNOWN_BINARY;  # will be set later from the profile file
+  if (@ARGV > 0) {
+    if (IsProfileURL($ARGV[0])) {
+      $main::use_symbol_page = 1;
+    } elsif (IsSymbolizedProfileFile($ARGV[0])) {
+      $main::use_symbolized_profile = 1;
+      $main::prog = $UNKNOWN_BINARY;  # will be set later from the profile file
+    }
   }
 
   if ($main::use_symbol_page || $main::use_symbolized_profile) {
@@ -662,7 +668,7 @@ sub Main() {
         if ($main::opt_gv) {
           RunGV(TempName($main::next_tmpfile, "ps"), "");
         } elsif ($main::opt_evince) {
-	  RunEvince(TempName($main::next_tmpfile, "pdf"), "");
+          RunEvince(TempName($main::next_tmpfile, "pdf"), "");
         } elsif ($main::opt_web) {
           my $tmp = TempName($main::next_tmpfile, "svg");
           RunWeb($tmp);
@@ -711,24 +717,25 @@ sub ReadlineMightFail {
 sub RunGV {
   my $fname = shift;
   my $bg = shift;       # "" or " &" if we should run in background
-  if (!system("$GV --version >$dev_null 2>&1")) {
+  if (!system(ShellEscape(@GV, "--version") . " >$dev_null 2>&1")) {
     # Options using double dash are supported by this gv version.
     # Also, turn on noantialias to better handle bug in gv for
     # postscript files with large dimensions.
     # TODO: Maybe we should not pass the --noantialias flag
     # if the gv version is known to work properly without the flag.
-    system("$GV --scale=$main::opt_scale --noantialias " . $fname . $bg);
+    system(ShellEscape(@GV, "--scale=$main::opt_scale", "--noantialias", $fname)
+           . $bg);
   } else {
     # Old gv version - only supports options that use single dash.
-    print STDERR "$GV -scale $main::opt_scale\n";
-    system("$GV -scale $main::opt_scale " . $fname . $bg);
+    print STDERR ShellEscape(@GV, "-scale", $main::opt_scale) . "\n";
+    system(ShellEscape(@GV, "-scale", "$main::opt_scale", $fname) . $bg);
   }
 }
 
 sub RunEvince {
   my $fname = shift;
   my $bg = shift;       # "" or " &" if we should run in background
-  system("$EVINCE " . $fname . $bg);
+  system(ShellEscape(@EVINCE, $fname) . $bg);
 }
 
 sub RunWeb {
@@ -762,8 +769,8 @@ sub RunWeb {
 sub RunKcachegrind {
   my $fname = shift;
   my $bg = shift;       # "" or " &" if we should run in background
-  print STDERR "Starting '$KCACHEGRIND " . $fname . $bg . "'\n";
-  system("$KCACHEGRIND " . $fname . $bg);
+  print STDERR "Starting '@KCACHEGRIND " . $fname . $bg . "'\n";
+  system(ShellEscape(@KCACHEGRIND, $fname) . $bg);
 }
 
 
@@ -1032,8 +1039,8 @@ parameters will be ignored.
 
 Further pprof details are available at this location (or one similar):
 
- /usr/doc/google-perftools-$PPROF_VERSION/cpu_profiler.html
- /usr/doc/google-perftools-$PPROF_VERSION/heap_profiler.html
+ /usr/doc/gperftools-$PPROF_VERSION/cpu_profiler.html
+ /usr/doc/gperftools-$PPROF_VERSION/heap_profiler.html
 
 ENDOFHELP
 }
@@ -1187,17 +1194,42 @@ sub PrintText {
   }
 }
 
+# Callgrind format has a compression for repeated function and file
+# names.  You show the name the first time, and just use its number
+# subsequently.  This can cut down the file to about a third or a
+# quarter of its uncompressed size.  $key and $val are the key/value
+# pair that would normally be printed by callgrind; $map is a map from
+# value to number.
+sub CompressedCGName {
+  my($key, $val, $map) = @_;
+  my $idx = $map->{$val};
+  # For very short keys, providing an index hurts rather than helps.
+  if (length($val) <= 3) {
+    return "$key=$val\n";
+  } elsif (defined($idx)) {
+    return "$key=($idx)\n";
+  } else {
+    # scalar(keys $map) gives the number of items in the map.
+    $idx = scalar(keys(%{$map})) + 1;
+    $map->{$val} = $idx;
+    return "$key=($idx) $val\n";
+  }
+}
+
 # Print the call graph in a way that's suiteable for callgrind.
 sub PrintCallgrind {
   my $calls = shift;
   my $filename;
+  my %filename_to_index_map;
+  my %fnname_to_index_map;
+
   if ($main::opt_interactive) {
     $filename = shift;
     print STDERR "Writing callgrind file to '$filename'.\n"
   } else {
     $filename = "&STDOUT";
   }
-  open(CG, ">".$filename );
+  open(CG, ">$filename");
   printf CG ("events: Hits\n\n");
   foreach my $call ( map { $_->[0] }
                      sort { $a->[1] cmp $b ->[1] ||
@@ -1211,11 +1243,14 @@ sub PrintCallgrind {
          $callee_file, $callee_line, $callee_function ) =
        ( $1, $2, $3, $5, $6, $7 );
 
-      
-    printf CG ("fl=$caller_file\nfn=$caller_function\n");
+    # TODO(csilvers): for better compression, collect all the
+    # caller/callee_files and functions first, before printing
+    # anything, and only compress those referenced more than once.
+    printf CG CompressedCGName("fl", $caller_file, \%filename_to_index_map);
+    printf CG CompressedCGName("fn", $caller_function, \%fnname_to_index_map);
     if (defined $6) {
-      printf CG ("cfl=$callee_file\n");
-      printf CG ("cfn=$callee_function\n");
+      printf CG CompressedCGName("cfl", $callee_file, \%filename_to_index_map);
+      printf CG CompressedCGName("cfn", $callee_function, \%fnname_to_index_map);
       printf CG ("calls=$count $callee_line\n");
     }
     printf CG ("$caller_line $count\n\n");
@@ -1264,10 +1299,10 @@ sub Disassemble {
   my $end_addr = shift;
 
   my $objdump = $obj_tool_map{"objdump"};
-  my $cmd = sprintf("$objdump -C -d -l --no-show-raw-insn " .
-                    "--start-address=0x$start_addr " .
-                    "--stop-address=0x$end_addr $prog");
-  open(OBJDUMP, "$cmd |") || error("$objdump: $!\n");
+  my $cmd = ShellEscape($objdump, "-C", "-d", "-l", "--no-show-raw-insn",
+                        "--start-address=0x$start_addr",
+                        "--stop-address=0x$end_addr", $prog);
+  open(OBJDUMP, "$cmd |") || error("$cmd: $!\n");
   my @result = ();
   my $filename = "";
   my $linenumber = -1;
@@ -1972,19 +2007,24 @@ sub PrintDot {
 
   # Open DOT output file
   my $output;
+  my $escaped_dot = ShellEscape(@DOT);
+  my $escaped_ps2pdf = ShellEscape(@PS2PDF);
   if ($main::opt_gv) {
-    $output = "| $DOT -Tps2 >" . TempName($main::next_tmpfile, "ps");
+    my $escaped_outfile = ShellEscape(TempName($main::next_tmpfile, "ps"));
+    $output = "| $escaped_dot -Tps2 >$escaped_outfile";
   } elsif ($main::opt_evince) {
-    $output = "| $DOT -Tps2 | $PS2PDF - " . TempName($main::next_tmpfile, "pdf");
+    my $escaped_outfile = ShellEscape(TempName($main::next_tmpfile, "pdf"));
+    $output = "| $escaped_dot -Tps2 | $escaped_ps2pdf - $escaped_outfile";
   } elsif ($main::opt_ps) {
-    $output = "| $DOT -Tps2";
+    $output = "| $escaped_dot -Tps2";
   } elsif ($main::opt_pdf) {
-    $output = "| $DOT -Tps2 | $PS2PDF - -";
+    $output = "| $escaped_dot -Tps2 | $escaped_ps2pdf - -";
   } elsif ($main::opt_web || $main::opt_svg) {
     # We need to post-process the SVG, so write to a temporary file always.
-    $output = "| $DOT -Tsvg >" . TempName($main::next_tmpfile, "svg");
+    my $escaped_outfile = ShellEscape(TempName($main::next_tmpfile, "svg"));
+    $output = "| $escaped_dot -Tsvg >$escaped_outfile";
   } elsif ($main::opt_gif) {
-    $output = "| $DOT -Tgif";
+    $output = "| $escaped_dot -Tgif";
   } else {
     $output = ">&STDOUT";
   }
@@ -2065,10 +2105,12 @@ sub PrintDot {
   # Get edges and counts per edge
   my %edge = ();
   my $n;
+  my $fullname_to_shortname_map = {};
+  FillFullnameToShortnameMap($symbols, $fullname_to_shortname_map);
   foreach my $k (keys(%{$raw})) {
     # TODO: omit low %age edges
     $n = $raw->{$k};
-    my @translated = TranslateStack($symbols, $k);
+    my @translated = TranslateStack($symbols, $fullname_to_shortname_map, $k);
     for (my $i = 1; $i <= $#translated; $i++) {
       my $src = $translated[$i];
       my $dst = $translated[$i-1];
@@ -2452,6 +2494,50 @@ function handleMouseUp(evt) {
 EOF
 }
 
+# Provides a map from fullname to shortname for cases where the
+# shortname is ambiguous.  The symlist has both the fullname and
+# shortname for all symbols, which is usually fine, but sometimes --
+# such as overloaded functions -- two different fullnames can map to
+# the same shortname.  In that case, we use the address of the
+# function to disambiguate the two.  This function fills in a map that
+# maps fullnames to modified shortnames in such cases.  If a fullname
+# is not present in the map, the 'normal' shortname provided by the
+# symlist is the appropriate one to use.
+sub FillFullnameToShortnameMap {
+  my $symbols = shift;
+  my $fullname_to_shortname_map = shift;
+  my $shortnames_seen_once = {};
+  my $shortnames_seen_more_than_once = {};
+
+  foreach my $symlist (values(%{$symbols})) {
+    # TODO(csilvers): deal with inlined symbols too.
+    my $shortname = $symlist->[0];
+    my $fullname = $symlist->[2];
+    if ($fullname !~ /<[0-9a-fA-F]+>$/) {  # fullname doesn't end in an address
+      next;       # the only collisions we care about are when addresses differ
+    }
+    if (defined($shortnames_seen_once->{$shortname}) &&
+        $shortnames_seen_once->{$shortname} ne $fullname) {
+      $shortnames_seen_more_than_once->{$shortname} = 1;
+    } else {
+      $shortnames_seen_once->{$shortname} = $fullname;
+    }
+  }
+
+  foreach my $symlist (values(%{$symbols})) {
+    my $shortname = $symlist->[0];
+    my $fullname = $symlist->[2];
+    # TODO(csilvers): take in a list of addresses we care about, and only
+    # store in the map if $symlist->[1] is in that list.  Saves space.
+    next if defined($fullname_to_shortname_map->{$fullname});
+    if (defined($shortnames_seen_more_than_once->{$shortname})) {
+      if ($fullname =~ /<0*([^>]*)>$/) {   # fullname has address at end of it
+        $fullname_to_shortname_map->{$fullname} = "$shortname\@$1";
+      }
+    }
+  }
+}
+
 # Return a small number that identifies the argument.
 # Multiple calls with the same argument will return the same number.
 # Calls with different arguments will return different numbers.
@@ -2468,6 +2554,7 @@ sub ShortIdFor {
 # Translate a stack of addresses into a stack of symbols
 sub TranslateStack {
   my $symbols = shift;
+  my $fullname_to_shortname_map = shift;
   my $k = shift;
 
   my @addrs = split(/\n/, $k);
@@ -2499,6 +2586,9 @@ sub TranslateStack {
       my $func = $symlist->[$j-2];
       my $fileline = $symlist->[$j-1];
       my $fullfunc = $symlist->[$j];
+      if (defined($fullname_to_shortname_map->{$fullfunc})) {
+        $func = $fullname_to_shortname_map->{$fullfunc};
+      }
       if ($j > 2) {
         $func = "$func (inline)";
       }
@@ -2751,13 +2841,13 @@ sub RemoveUninterestingFrames {
                       '__builtin_vec_new',
                       'operator new',
                       'operator new[]',
-		      # The entry to our memory-allocation routines on OS X
-		      'malloc_zone_malloc',
-		      'malloc_zone_calloc',
-		      'malloc_zone_valloc',
-		      'malloc_zone_realloc',
-		      'malloc_zone_memalign',
-		      'malloc_zone_free',
+                      # The entry to our memory-allocation routines on OS X
+                      'malloc_zone_malloc',
+                      'malloc_zone_calloc',
+                      'malloc_zone_valloc',
+                      'malloc_zone_realloc',
+                      'malloc_zone_memalign',
+                      'malloc_zone_free',
                       # These mark the beginning/end of our custom sections
                       '__start_google_malloc',
                       '__stop_google_malloc',
@@ -2849,9 +2939,11 @@ sub ReduceProfile {
   my $symbols = shift;
   my $profile = shift;
   my $result = {};
+  my $fullname_to_shortname_map = {};
+  FillFullnameToShortnameMap($symbols, $fullname_to_shortname_map);
   foreach my $k (keys(%{$profile})) {
     my $count = $profile->{$k};
-    my @translated = TranslateStack($symbols, $k);
+    my @translated = TranslateStack($symbols, $fullname_to_shortname_map, $k);
     my @path = ();
     my %seen = ();
     $seen{''} = 1;      # So that empty keys are skipped
@@ -3058,7 +3150,8 @@ sub AddEntries {
 
 sub CheckSymbolPage {
   my $url = SymbolPageURL();
-  open(SYMBOL, "$URL_FETCHER '$url' |");
+  my $command = ShellEscape(@URL_FETCHER, $url);
+  open(SYMBOL, "$command |") or error($command);
   my $line = <SYMBOL>;
   $line =~ s/\r//g;         # turn windows-looking lines into unix-looking lines
   close(SYMBOL);
@@ -3115,7 +3208,7 @@ sub SymbolPageURL {
 sub FetchProgramName() {
   my ($host, $baseURL, $path) = ParseProfileURL($main::pfile_args[0]);
   my $url = "$baseURL$PROGRAM_NAME_PAGE";
-  my $command_line = "$URL_FETCHER '$url'";
+  my $command_line = ShellEscape(@URL_FETCHER, $url);
   open(CMDLINE, "$command_line |") or error($command_line);
   my $cmdline = <CMDLINE>;
   $cmdline =~ s/\r//g;   # turn windows-looking lines into unix-looking lines
@@ -3132,7 +3225,7 @@ sub FetchProgramName() {
 # curl.  Redirection happens on borg hosts.
 sub ResolveRedirectionForCurl {
   my $url = shift;
-  my $command_line = "$URL_FETCHER --head '$url'";
+  my $command_line = ShellEscape(@URL_FETCHER, "--head", $url);
   open(CMDLINE, "$command_line |") or error($command_line);
   while (<CMDLINE>) {
     s/\r//g;         # turn windows-looking lines into unix-looking lines
@@ -3144,18 +3237,18 @@ sub ResolveRedirectionForCurl {
   return $url;
 }
 
-# Add a timeout flat to URL_FETCHER
+# Add a timeout flat to URL_FETCHER.  Returns a new list.
 sub AddFetchTimeout {
-  my $fetcher = shift;
   my $timeout = shift;
+  my @fetcher = shift;
   if (defined($timeout)) {
-    if ($fetcher =~ m/\bcurl -s/) {
-      $fetcher .= sprintf(" --max-time %d", $timeout);
-    } elsif ($fetcher =~ m/\brpcget\b/) {
-      $fetcher .= sprintf(" --deadline=%d", $timeout);
+    if (join(" ", @fetcher) =~ m/\bcurl -s/) {
+      push(@fetcher, "--max-time", sprintf("%d", $timeout));
+    } elsif (join(" ", @fetcher) =~ m/\brpcget\b/) {
+      push(@fetcher, sprintf("--deadline=%d", $timeout));
     }
   }
-  return $fetcher;
+  return @fetcher;
 }
 
 # Reads a symbol map from the file handle name given as $1, returning
@@ -3215,15 +3308,17 @@ sub FetchSymbols {
     my $url = SymbolPageURL();
 
     my $command_line;
-    if ($URL_FETCHER =~ m/\bcurl -s/) {
+    if (join(" ", @URL_FETCHER) =~ m/\bcurl -s/) {
       $url = ResolveRedirectionForCurl($url);
-      $command_line = "$URL_FETCHER -d '\@$main::tmpfile_sym' '$url'";
+      $command_line = ShellEscape(@URL_FETCHER, "-d", "\@$main::tmpfile_sym",
+                                  $url);
     } else {
-      $command_line = "$URL_FETCHER --post '$url' < '$main::tmpfile_sym'";
+      $command_line = (ShellEscape(@URL_FETCHER, "--post", $url)
+                       . " < " . ShellEscape($main::tmpfile_sym));
     }
     # We use c++filt in case $SYMBOL_PAGE gives us mangled symbols.
-    my $cppfilt = $obj_tool_map{"c++filt"};
-    open(SYMBOL, "$command_line | $cppfilt |") or error($command_line);
+    my $escaped_cppfilt = ShellEscape($obj_tool_map{"c++filt"});
+    open(SYMBOL, "$command_line | $escaped_cppfilt |") or error($command_line);
     $symbol_map = ReadSymbols(*SYMBOL{IO});
     close(SYMBOL);
   }
@@ -3239,8 +3334,8 @@ sub FetchSymbols {
     my $shortpc = $pc;
     $shortpc =~ s/^0*//;
     # Each line may have a list of names, which includes the function
-    # and also other functions it has inlined.  They are separated
-    # (in PrintSymbolizedFile), by --, which is illegal in function names.
+    # and also other functions it has inlined.  They are separated (in
+    # PrintSymbolizedProfile), by --, which is illegal in function names.
     my $fullnames;
     if (defined($symbol_map->{$shortpc})) {
       $fullnames = $symbol_map->{$shortpc};
@@ -3318,8 +3413,8 @@ sub FetchDynamicProfile {
       return $real_profile;
     }
 
-    my $fetcher = AddFetchTimeout($URL_FETCHER, $fetch_timeout);
-    my $cmd = "$fetcher '$url' > '$tmp_profile'";
+    my @fetcher = AddFetchTimeout($fetch_timeout, @URL_FETCHER);
+    my $cmd = ShellEscape(@fetcher, $url) . " > " . ShellEscape($tmp_profile);
     if ($path =~ m/$PROFILE_PAGE|$PMUPROFILE_PAGE|$CENSUSPROFILE_PAGE/){
       print STDERR "Gathering CPU profile from $url for $main::opt_seconds seconds to\n  ${real_profile}\n";
       if ($encourage_patience) {
@@ -3330,7 +3425,7 @@ sub FetchDynamicProfile {
     }
 
     (system($cmd) == 0) || error("Failed to get profile: $cmd: $!\n");
-    (system("mv $tmp_profile $real_profile") == 0) || error("Unable to rename profile\n");
+    (system("mv", $tmp_profile, $real_profile) == 0) || error("Unable to rename profile\n");
     print STDERR "Wrote profile to $real_profile\n";
     $main::collected_profile = $real_profile;
     return $main::collected_profile;
@@ -3444,7 +3539,7 @@ BEGIN {
       my $has_q = 0;
       eval { $has_q = pack("Q", "1") ? 1 : 1; };
       if (!$has_q) {
-	$self->{perl_is_64bit} = 0;
+        $self->{perl_is_64bit} = 0;
       }
       read($self->{file}, $str, 8);
       if (substr($str, 4, 4) eq chr(0)x4) {
@@ -3480,17 +3575,17 @@ BEGIN {
         # TODO(csilvers): if this is a 32-bit perl, the math below
         #    could end up in a too-large int, which perl will promote
         #    to a double, losing necessary precision.  Deal with that.
-	#    Right now, we just die.
-	my ($lo, $hi) = ($b32_values[$i], $b32_values[$i+1]);
+        #    Right now, we just die.
+        my ($lo, $hi) = ($b32_values[$i], $b32_values[$i+1]);
         if ($self->{unpack_code} eq 'N') {    # big-endian
-	  ($lo, $hi) = ($hi, $lo);
-	}
-	my $value = $lo + $hi * (2**32);
-	if (!$self->{perl_is_64bit} &&   # check value is exactly represented
-	    (($value % (2**32)) != $lo || int($value / (2**32)) != $hi)) {
-	  ::error("Need a 64-bit perl to process this 64-bit profile.\n");
-	}
-	push(@b64_values, $value);
+          ($lo, $hi) = ($hi, $lo);
+        }
+        my $value = $lo + $hi * (2**32);
+        if (!$self->{perl_is_64bit} &&   # check value is exactly represented
+            (($value % (2**32)) != $lo || int($value / (2**32)) != $hi)) {
+          ::error("Need a 64-bit perl to process this 64-bit profile.\n");
+        }
+        push(@b64_values, $value);
       }
       @$slots = @b64_values;
     }
@@ -3618,7 +3713,7 @@ sub ReadProfile {
     if (!$main::use_symbolized_profile) {
       # we have both a binary and symbolized profiles, abort
       error("FATAL ERROR: Symbolized profile\n   $fname\ncannot be used with " .
-	    "a binary arg. Try again without passing\n   $prog\n");
+            "a binary arg. Try again without passing\n   $prog\n");
     }
     # Read the symbol section of the symbolized profile file.
     $symbols = ReadSymbols(*PROFILE{IO});
@@ -3919,18 +4014,18 @@ sub ReadHeapProfile {
           # The sampling frequency is the rate of a Poisson process.
           # This means that the probability of sampling an allocation of
           # size X with sampling rate Y is 1 - exp(-X/Y)
-	  if ($n1 != 0) {
-	    my $ratio = (($s1*1.0)/$n1)/($sample_adjustment);
-	    my $scale_factor = 1/(1 - exp(-$ratio));
-	    $n1 *= $scale_factor;
-	    $s1 *= $scale_factor;
-	  }
-	  if ($n2 != 0) {
-	    my $ratio = (($s2*1.0)/$n2)/($sample_adjustment);
-	    my $scale_factor = 1/(1 - exp(-$ratio));
-	    $n2 *= $scale_factor;
-	    $s2 *= $scale_factor;
-	  }
+          if ($n1 != 0) {
+            my $ratio = (($s1*1.0)/$n1)/($sample_adjustment);
+            my $scale_factor = 1/(1 - exp(-$ratio));
+            $n1 *= $scale_factor;
+            $s1 *= $scale_factor;
+          }
+          if ($n2 != 0) {
+            my $ratio = (($s2*1.0)/$n2)/($sample_adjustment);
+            my $scale_factor = 1/(1 - exp(-$ratio));
+            $n2 *= $scale_factor;
+            $s2 *= $scale_factor;
+          }
         } else {
           # Remote-heap version 1
           my $ratio;
@@ -4054,19 +4149,19 @@ sub ReadSynchProfile {
   return $r;
 }
 
-# Given a hex value in the form "0x1abcd" return "0001abcd" or
-# "000000000001abcd", depending on the current address length.
-# There's probably a more idiomatic (or faster) way to do this...
+# Given a hex value in the form "0x1abcd" or "1abcd", return either
+# "0001abcd" or "000000000001abcd", depending on the current (global)
+# address length.
 sub HexExtend {
   my $addr = shift;
 
-  $addr =~ s/^0x//;
-
-  if (length $addr > $address_length) {
-    printf STDERR "Warning:  address $addr is longer than address length $address_length\n";
+  $addr =~ s/^(0x)?0*//;
+  my $zeros_needed = $address_length - length($addr);
+  if ($zeros_needed < 0) {
+    printf STDERR "Warning: address $addr is longer than address length $address_length\n";
+    return $addr;
   }
-
-  return substr("000000000000000".$addr, -$address_length);
+  return ("0" x $zeros_needed) . $addr;
 }
 
 ##### Symbol extraction #####
@@ -4117,9 +4212,8 @@ sub ParseTextSectionHeaderFromObjdump {
   my $file_offset;
   # Get objdump output from the library file to figure out how to
   # map between mapped addresses and addresses in the library.
-  my $objdump = $obj_tool_map{"objdump"};
-  open(OBJDUMP, "$objdump -h $lib |")
-                || error("$objdump $lib: $!\n");
+  my $cmd = ShellEscape($obj_tool_map{"objdump"}, "-h", $lib);
+  open(OBJDUMP, "$cmd |") || error("$cmd: $!\n");
   while (<OBJDUMP>) {
     s/\r//g;         # turn windows-looking lines into unix-looking lines
     # Idx Name          Size      VMA       LMA       File off  Algn
@@ -4157,9 +4251,8 @@ sub ParseTextSectionHeaderFromOtool {
   my $file_offset = undef;
   # Get otool output from the library file to figure out how to
   # map between mapped addresses and addresses in the library.
-  my $otool = $obj_tool_map{"otool"};
-  open(OTOOL, "$otool -l $lib |")
-                || error("$otool $lib: $!\n");
+  my $command = ShellEscape($obj_tool_map{"otool"}, "-l", $lib);
+  open(OTOOL, "$command |") || error("$command: $!\n");
   my $cmd = "";
   my $sectname = "";
   my $segname = "";
@@ -4501,18 +4594,18 @@ sub ExtractSymbols {
     my ($start_pc_index, $finish_pc_index);
     # Find smallest finish_pc_index such that $finish < $pc[$finish_pc_index].
     for ($finish_pc_index = $#pcs + 1; $finish_pc_index > 0;
-	 $finish_pc_index--) {
+         $finish_pc_index--) {
       last if $pcs[$finish_pc_index - 1] le $finish;
     }
     # Find smallest start_pc_index such that $start <= $pc[$start_pc_index].
     for ($start_pc_index = $finish_pc_index; $start_pc_index > 0;
-	 $start_pc_index--) {
+         $start_pc_index--) {
       last if $pcs[$start_pc_index - 1] lt $start;
     }
     # This keeps PC values higher than $pc[$finish_pc_index] in @pcs,
     # in case there are overlaps in libraries and the main binary.
     @{$contained} = splice(@pcs, $start_pc_index,
-			   $finish_pc_index - $start_pc_index);
+                           $finish_pc_index - $start_pc_index);
     # Map to symbols
     MapToSymbols($libname, AddressSub($start, $offset), $contained, $symbols);
   }
@@ -4534,15 +4627,15 @@ sub MapToSymbols {
 
   # Figure out the addr2line command to use
   my $addr2line = $obj_tool_map{"addr2line"};
-  my $cmd = "$addr2line -f -C -e $image";
+  my $cmd = ShellEscape($addr2line, "-f", "-C", "-e", $image);
   if (exists $obj_tool_map{"addr2line_pdb"}) {
     $addr2line = $obj_tool_map{"addr2line_pdb"};
-    $cmd = "$addr2line --demangle -f -C -e $image";
+    $cmd = ShellEscape($addr2line, "--demangle", "-f", "-C", "-e", $image);
   }
 
   # If "addr2line" isn't installed on the system at all, just use
   # nm to get what info we can (function names, but not line numbers).
-  if (system("$addr2line --help >$dev_null 2>&1") != 0) {
+  if (system(ShellEscape($addr2line, "--help") . " >$dev_null 2>&1") != 0) {
     MapSymbolsWithNM($image, $offset, $pclist, $symbols);
     return;
   }
@@ -4556,7 +4649,6 @@ sub MapToSymbols {
   $sep_address = undef;  # May be filled in by MapSymbolsWithNM()
   my $nm_symbols = {};
   MapSymbolsWithNM($image, $offset, $pclist, $nm_symbols);
-  # TODO(csilvers): only add '-i' if addr2line supports it.
   if (defined($sep_address)) {
     # Only add " -i" to addr2line if the binary supports it.
     # addr2line --help returns 0, but not if it sees an unknown flag first.
@@ -4582,13 +4674,14 @@ sub MapToSymbols {
   close(ADDRESSES);
   if ($debug) {
     print("----\n");
-    system("cat $main::tmpfile_sym");
+    system("cat", $main::tmpfile_sym);
     print("----\n");
-    system("$cmd <$main::tmpfile_sym");
+    system("$cmd < " . ShellEscape($main::tmpfile_sym));
     print("----\n");
   }
 
-  open(SYMBOLS, "$cmd <$main::tmpfile_sym |") || error("$cmd: $!\n");
+  open(SYMBOLS, "$cmd <" . ShellEscape($main::tmpfile_sym) . " |")
+      || error("$cmd: $!\n");
   my $count = 0;   # Index in pclist
   while (<SYMBOLS>) {
     # Read fullfunction and filelineinfo from next pair of lines
@@ -4608,15 +4701,29 @@ sub MapToSymbols {
 
     my $pcstr = $pclist->[$count];
     my $function = ShortFunctionName($fullfunction);
-    if ($fullfunction eq '??') {
-      # See if nm found a symbol
-      my $nms = $nm_symbols->{$pcstr};
-      if (defined($nms)) {
+    my $nms = $nm_symbols->{$pcstr};
+    if (defined($nms)) {
+      if ($fullfunction eq '??') {
+        # nm found a symbol for us.
         $function = $nms->[0];
         $fullfunction = $nms->[2];
+      } else {
+	# MapSymbolsWithNM tags each routine with its starting address,
+	# useful in case the image has multiple occurrences of this
+	# routine.  (It uses a syntax that resembles template paramters,
+	# that are automatically stripped out by ShortFunctionName().)
+	# addr2line does not provide the same information.  So we check
+	# if nm disambiguated our symbol, and if so take the annotated
+	# (nm) version of the routine-name.  TODO(csilvers): this won't
+	# catch overloaded, inlined symbols, which nm doesn't see.
+	# Better would be to do a check similar to nm's, in this fn.
+	if ($nms->[2] =~ m/^\Q$function\E/) {  # sanity check it's the right fn
+	  $function = $nms->[0];
+	  $fullfunction = $nms->[2];
+	}
       }
     }
-
+    
     # Prepend to accumulated symbols for pcstr
     # (so that caller comes before callee)
     my $sym = $symbols->{$pcstr};
@@ -4627,7 +4734,7 @@ sub MapToSymbols {
     unshift(@{$sym}, $function, $filelinenum, $fullfunction);
     if ($debug) { printf STDERR ("%s => [%s]\n", $pcstr, join(" ", @{$sym})); }
     if (!defined($sep_address)) {
-      # Inlining is off, se this entry ends immediately
+      # Inlining is off, so this entry ends immediately
       $count++;
     }
   }
@@ -4734,7 +4841,9 @@ sub ConfigureObjTools {
   my $file_type = undef;
   if (-e "/usr/bin/file") {
     # Follow symlinks (at least for systems where "file" supports that).
-    $file_type = `/usr/bin/file -L $prog_file 2>$dev_null || /usr/bin/file $prog_file`;
+    my $escaped_prog_file = ShellEscape($prog_file);
+    $file_type = `/usr/bin/file -L $escaped_prog_file 2>$dev_null ||
+                  /usr/bin/file $escaped_prog_file`;
   } elsif ($^O == "MSWin32") {
     $file_type = "MS Windows";
   } else {
@@ -4816,6 +4925,19 @@ sub ConfigureTool {
   return $path;
 }
 
+sub ShellEscape {
+  my @escaped_words = ();
+  foreach my $word (@_) {
+    my $escaped_word = $word;
+    if ($word =~ m![^a-zA-Z0-9/.,_=-]!) {  # check for anything not in whitelist
+      $escaped_word =~ s/'/'\\''/;
+      $escaped_word = "'$escaped_word'";
+    }
+    push(@escaped_words, $escaped_word);
+  }
+  return join(" ", @escaped_words);
+}
+
 sub cleanup {
   unlink($main::tmpfile_sym);
   unlink(keys %main::tempnames);
@@ -4853,11 +4975,11 @@ sub error {
 # names match "$regexp" and returns them in a hashtable mapping from
 # procedure name to a two-element vector of [start address, end address]
 sub GetProcedureBoundariesViaNm {
-  my $nm_command = shift;
+  my $escaped_nm_command = shift;    # shell-escaped
   my $regexp = shift;
 
   my $symbol_table = {};
-  open(NM, "$nm_command |") || error("$nm_command: $!\n");
+  open(NM, "$escaped_nm_command |") || error("$escaped_nm_command: $!\n");
   my $last_start = "0";
   my $routine = "";
   while (<NM>) {
@@ -4935,6 +5057,21 @@ sub GetProcedureBoundaries {
   my $image = shift;
   my $regexp = shift;
 
+  # If $image doesn't start with /, then put ./ in front of it.  This works
+  # around an obnoxious bug in our probing of nm -f behavior.
+  # "nm -f $image" is supposed to fail on GNU nm, but if:
+  #
+  # a. $image starts with [BbSsPp] (for example, bin/foo/bar), AND
+  # b. you have a.out in your current directory (a not uncommon occurence)
+  #
+  # then "nm -f $image" succeeds because -f only looks at the first letter of
+  # the argument, which looks valid because it's [BbSsPp], and then since
+  # there's no image provided, it looks for a.out and finds it.
+  #
+  # This regex makes sure that $image starts with . or /, forcing the -f
+  # parsing to fail since . and / are not valid formats.
+  $image =~ s#^[^/]#./$&#;
+
   # For libc libraries, the copy in /usr/lib/debug contains debugging symbols
   my $debugging = DebuggingLibrary($image);
   if ($debugging) {
@@ -4952,28 +5089,29 @@ sub GetProcedureBoundaries {
   # --demangle and -f.
   my $demangle_flag = "";
   my $cppfilt_flag = "";
-  if (system("$nm --demangle $image >$dev_null 2>&1") == 0) {
+  my $to_devnull = ">$dev_null 2>&1";
+  if (system(ShellEscape($nm, "--demangle", "image") . $to_devnull) == 0) {
     # In this mode, we do "nm --demangle <foo>"
     $demangle_flag = "--demangle";
     $cppfilt_flag = "";
-  } elsif (system("$cppfilt $image >$dev_null 2>&1") == 0) {
+  } elsif (system(ShellEscape($cppfilt, $image) . $to_devnull) == 0) {
     # In this mode, we do "nm <foo> | c++filt"
-    $cppfilt_flag = " | $cppfilt";
+    $cppfilt_flag = " | " . ShellEscape($cppfilt);
   };
   my $flatten_flag = "";
-  if (system("$nm -f $image >$dev_null 2>&1") == 0) {
+  if (system(ShellEscape($nm, "-f", $image) . $to_devnull) == 0) {
     $flatten_flag = "-f";
   }
 
   # Finally, in the case $imagie isn't a debug library, we try again with
   # -D to at least get *exported* symbols.  If we can't use --demangle,
   # we use c++filt instead, if it exists on this system.
-  my @nm_commands = ("$nm -n $flatten_flag $demangle_flag" .
-                     " $image 2>$dev_null $cppfilt_flag",
-                     "$nm -D -n $flatten_flag $demangle_flag" .
-                     " $image 2>$dev_null $cppfilt_flag",
+  my @nm_commands = (ShellEscape($nm, "-n", $flatten_flag, $demangle_flag,
+                                 $image) . " 2>$dev_null $cppfilt_flag",
+                     ShellEscape($nm, "-D", "-n", $flatten_flag, $demangle_flag,
+                                 $image) . " 2>$dev_null $cppfilt_flag",
                      # 6nm is for Go binaries
-		     "6nm $image 2>$dev_null | sort",
+                     ShellEscape("6nm", "$image") . " 2>$dev_null | sort",
                      );
 
   # If the executable is an MS Windows PDB-format executable, we'll
@@ -4981,8 +5119,9 @@ sub GetProcedureBoundaries {
   # want to use both unix nm and windows-specific nm_pdb, since
   # PDB-format executables can apparently include dwarf .o files.
   if (exists $obj_tool_map{"nm_pdb"}) {
-    my $nm_pdb = $obj_tool_map{"nm_pdb"};
-    push(@nm_commands, "$nm_pdb --demangle $image 2>$dev_null");
+    push(@nm_commands,
+         ShellEscape($obj_tool_map{"nm_pdb"}, "--demangle", $image)
+         . " 2>$dev_null");
   }
 
   foreach my $nm_command (@nm_commands) {
diff --git a/third_party/tcmalloc/vendor/src/profile-handler.cc b/third_party/tcmalloc/vendor/src/profile-handler.cc
index 700b5c0..20e5cca 100644
--- a/third_party/tcmalloc/vendor/src/profile-handler.cc
+++ b/third_party/tcmalloc/vendor/src/profile-handler.cc
@@ -36,7 +36,7 @@
 #include "config.h"
 #include "profile-handler.h"
 
-#if !(defined(__CYGWIN__) || defined(__CYGWIN32__)) && !defined(__native_client__)
+#if !(defined(__CYGWIN__) || defined(__CYGWIN32__))
 
 #include <stdio.h>
 #include <errno.h>
@@ -46,6 +46,7 @@
 #include <string>
 
 #include "base/dynamic_annotations.h"
+#include "base/googleinit.h"
 #include "base/logging.h"
 #include "base/spinlock.h"
 #include "maybe_threads.h"
@@ -139,6 +140,9 @@ class ProfileHandler {
   // Counts the number of callbacks registered.
   int32 callback_count_ GUARDED_BY(control_lock_);
 
+  // Is profiling allowed at all?
+  bool allowed_;
+
   // Whether or not the threading system provides interval timers that are
   // shared by all threads in a process.
   enum {
@@ -199,6 +203,10 @@ class ProfileHandler {
   // Disables (ignores) the timer interrupt signal.
   void DisableHandler() EXCLUSIVE_LOCKS_REQUIRED(control_lock_);
 
+  // Returns true if the handler is not being used by something else.
+  // This checks the kernel's signal handler table.
+  bool IsSignalHandlerAvailable();
+
   // SIGPROF/SIGALRM handler. Iterate over and call all the registered callbacks.
   static void SignalHandler(int sig, siginfo_t* sinfo, void* ucontext);
 
@@ -239,6 +247,7 @@ ProfileHandler* ProfileHandler::Instance() {
 ProfileHandler::ProfileHandler()
     : interrupts_(0),
       callback_count_(0),
+      allowed_(true),
       timer_sharing_(TIMERS_UNTOUCHED) {
   SpinLockHolder cl(&control_lock_);
 
@@ -255,6 +264,19 @@ ProfileHandler::ProfileHandler()
     frequency_ = kDefaultFrequency;
   }
 
+  if (!allowed_) {
+    return;
+  }
+
+  // If something else is using the signal handler,
+  // assume it has priority over us and stop.
+  if (!IsSignalHandlerAvailable()) {
+    RAW_LOG(INFO, "Disabling profiler because %s handler is already in use.",
+                    timer_type_ == ITIMER_REAL ? "SIGALRM" : "SIGPROF");
+    allowed_ = false;
+    return;
+  }
+
   // Ignore signals until we decide to turn profiling on.  (Paranoia;
   // should already be ignored.)
   DisableHandler();
@@ -267,6 +289,10 @@ ProfileHandler::~ProfileHandler() {
 void ProfileHandler::RegisterThread() {
   SpinLockHolder cl(&control_lock_);
 
+  if (!allowed_) {
+    return;
+  }
+
   // We try to detect whether timers are being shared by setting a
   // timer in the first call to this function, then checking whether
   // it's set in the second call.
@@ -312,6 +338,7 @@ void ProfileHandler::RegisterThread() {
 
 ProfileHandlerToken* ProfileHandler::RegisterCallback(
     ProfileHandlerCallback callback, void* callback_arg) {
+
   ProfileHandlerToken* token = new ProfileHandlerToken(callback, callback_arg);
 
   SpinLockHolder cl(&control_lock_);
@@ -386,9 +413,13 @@ void ProfileHandler::GetState(ProfileHandlerState* state) {
   }
   state->frequency = frequency_;
   state->callback_count = callback_count_;
+  state->allowed = allowed_;
 }
 
 void ProfileHandler::StartTimer() {
+  if (!allowed_) {
+    return;
+  }
   struct itimerval timer;
   timer.it_interval.tv_sec = 0;
   timer.it_interval.tv_usec = 1000000 / frequency_;
@@ -397,12 +428,18 @@ void ProfileHandler::StartTimer() {
 }
 
 void ProfileHandler::StopTimer() {
+  if (!allowed_) {
+    return;
+  }
   struct itimerval timer;
   memset(&timer, 0, sizeof timer);
   setitimer(timer_type_, &timer, 0);
 }
 
 bool ProfileHandler::IsTimerRunning() {
+  if (!allowed_) {
+    return false;
+  }
   struct itimerval current_timer;
   RAW_CHECK(0 == getitimer(timer_type_, &current_timer), "getitimer");
   return (current_timer.it_value.tv_sec != 0 ||
@@ -410,6 +447,9 @@ bool ProfileHandler::IsTimerRunning() {
 }
 
 void ProfileHandler::EnableHandler() {
+  if (!allowed_) {
+    return;
+  }
   struct sigaction sa;
   sa.sa_sigaction = SignalHandler;
   sa.sa_flags = SA_RESTART | SA_SIGINFO;
@@ -419,6 +459,9 @@ void ProfileHandler::EnableHandler() {
 }
 
 void ProfileHandler::DisableHandler() {
+  if (!allowed_) {
+    return;
+  }
   struct sigaction sa;
   sa.sa_handler = SIG_IGN;
   sa.sa_flags = SA_RESTART;
@@ -427,14 +470,33 @@ void ProfileHandler::DisableHandler() {
   RAW_CHECK(sigaction(signal_number, &sa, NULL) == 0, "sigprof (disable)");
 }
 
+bool ProfileHandler::IsSignalHandlerAvailable() {
+  struct sigaction sa;
+  const int signal_number = (timer_type_ == ITIMER_PROF ? SIGPROF : SIGALRM);
+  RAW_CHECK(sigaction(signal_number, NULL, &sa) == 0, "is-signal-handler avail");
+
+  // We only take over the handler if the current one is unset.
+  // It must be SIG_IGN or SIG_DFL, not some other function.
+  // SIG_IGN must be allowed because when profiling is allowed but
+  // not actively in use, this code keeps the handler set to SIG_IGN.
+  // That setting will be inherited across fork+exec.  In order for
+  // any child to be able to use profiling, SIG_IGN must be treated
+  // as available.
+  return sa.sa_handler == SIG_IGN || sa.sa_handler == SIG_DFL;
+}
+
 void ProfileHandler::SignalHandler(int sig, siginfo_t* sinfo, void* ucontext) {
   int saved_errno = errno;
-  RAW_CHECK(instance_ != NULL, "ProfileHandler is not initialized");
+  // At this moment, instance_ must be initialized because the handler is
+  // enabled in RegisterThread or RegisterCallback only after
+  // ProfileHandler::Instance runs.
+  ProfileHandler* instance = ANNOTATE_UNPROTECTED_READ(instance_);
+  RAW_CHECK(instance != NULL, "ProfileHandler is not initialized");
   {
-    SpinLockHolder sl(&instance_->signal_lock_);
-    ++instance_->interrupts_;
-    for (CallbackIterator it = instance_->callbacks_.begin();
-         it != instance_->callbacks_.end();
+    SpinLockHolder sl(&instance->signal_lock_);
+    ++instance->interrupts_;
+    for (CallbackIterator it = instance->callbacks_.begin();
+         it != instance->callbacks_.end();
          ++it) {
       (*it)->callback(sig, sinfo, ucontext, (*it)->callback_arg);
     }
@@ -442,20 +504,9 @@ void ProfileHandler::SignalHandler(int sig, siginfo_t* sinfo, void* ucontext) {
   errno = saved_errno;
 }
 
-// The sole purpose of this class is to initialize the ProfileHandler singleton
-// when the global static objects are created. Note that the main thread will
-// be registered at this time.
-class ProfileHandlerInitializer {
- public:
-  ProfileHandlerInitializer() {
-    ProfileHandler::Instance()->RegisterThread();
-  }
-
- private:
-  DISALLOW_COPY_AND_ASSIGN(ProfileHandlerInitializer);
-};
-// ProfileHandlerInitializer singleton
-static ProfileHandlerInitializer profile_handler_initializer;
+// This module initializer registers the main thread, so it must be
+// executed in the context of the main thread.
+REGISTER_MODULE_INITIALIZER(profile_main, ProfileHandlerRegisterThread());
 
 extern "C" void ProfileHandlerRegisterThread() {
   ProfileHandler::Instance()->RegisterThread();
@@ -478,14 +529,12 @@ extern "C" void ProfileHandlerGetState(ProfileHandlerState* state) {
   ProfileHandler::Instance()->GetState(state);
 }
 
-#else  // !defined(OS_CYGWIN) && !defined(__native_client__)
+#else  // OS_CYGWIN
 
 // ITIMER_PROF doesn't work under cygwin.  ITIMER_REAL is available, but doesn't
 // work as well for profiling, and also interferes with alarm().  Because of
 // these issues, unless a specific need is identified, profiler support is
 // disabled under Cygwin.
-//
-// Native Client runtime also does not have signals working.
 extern "C" void ProfileHandlerRegisterThread() {
 }
 
@@ -503,4 +552,4 @@ extern "C" void ProfileHandlerReset() {
 extern "C" void ProfileHandlerGetState(ProfileHandlerState* state) {
 }
 
-#endif  // !defined(OS_CYGWIN) && !defined(__native_client__)
+#endif  // OS_CYGWIN
diff --git a/third_party/tcmalloc/vendor/src/profile-handler.h b/third_party/tcmalloc/vendor/src/profile-handler.h
index 1cbe253..4b078ec 100644
--- a/third_party/tcmalloc/vendor/src/profile-handler.h
+++ b/third_party/tcmalloc/vendor/src/profile-handler.h
@@ -137,6 +137,7 @@ struct ProfileHandlerState {
   int32 frequency;  /* Profiling frequency */
   int32 callback_count;  /* Number of callbacks registered */
   int64 interrupts;  /* Number of interrupts received */
+  bool allowed; /* Profiling is allowed */
 };
 void ProfileHandlerGetState(struct ProfileHandlerState* state);
 
diff --git a/third_party/tcmalloc/vendor/src/profiler.cc b/third_party/tcmalloc/vendor/src/profiler.cc
index a57953e..dfb6aab 100644
--- a/third_party/tcmalloc/vendor/src/profiler.cc
+++ b/third_party/tcmalloc/vendor/src/profiler.cc
@@ -55,8 +55,8 @@ typedef int ucontext_t;   // just to quiet the compiler, mostly
 #endif
 #include <sys/time.h>
 #include <string>
-#include <google/profiler.h>
-#include <google/stacktrace.h>
+#include <gperftools/profiler.h>
+#include <gperftools/stacktrace.h>
 #include "base/commandlineflags.h"
 #include "base/logging.h"
 #include "base/googleinit.h"
@@ -284,7 +284,7 @@ void CpuProfiler::prof_handler(int sig, siginfo_t*, void* signal_ucontext,
   }
 }
 
-#if !(defined(__CYGWIN__) || defined(__CYGWIN32__)) && !defined(__native_client__)
+#if !(defined(__CYGWIN__) || defined(__CYGWIN32__))
 
 extern "C" PERFTOOLS_DLL_DECL void ProfilerRegisterThread() {
   ProfileHandlerRegisterThread();
@@ -316,14 +316,12 @@ extern "C" PERFTOOLS_DLL_DECL void ProfilerGetCurrentState(
   CpuProfiler::instance_.GetCurrentState(state);
 }
 
-#else  // !defined(OS_CYGWIN) && !defined(__native_client__)
+#else  // OS_CYGWIN
 
 // ITIMER_PROF doesn't work under cygwin.  ITIMER_REAL is available, but doesn't
 // work as well for profiling, and also interferes with alarm().  Because of
 // these issues, unless a specific need is identified, profiler support is
 // disabled under Cygwin.
-//
-// Native Client runtime also does not have signals working.
 extern "C" void ProfilerRegisterThread() { }
 extern "C" void ProfilerFlush() { }
 extern "C" int ProfilingIsEnabledForAllThreads() { return 0; }
@@ -337,7 +335,7 @@ extern "C" void ProfilerGetCurrentState(ProfilerState* state) {
   memset(state, 0, sizeof(*state));
 }
 
-#endif  // !defined(OS_CYGWIN) && !defined(__native_client__)
+#endif  // OS_CYGWIN
 
 // DEPRECATED routines
 extern "C" PERFTOOLS_DLL_DECL void ProfilerEnable() { }
diff --git a/third_party/tcmalloc/vendor/src/sampler.cc b/third_party/tcmalloc/vendor/src/sampler.cc
index 0ea6df1..0ea6df1 100755..100644
--- a/third_party/tcmalloc/vendor/src/sampler.cc
+++ b/third_party/tcmalloc/vendor/src/sampler.cc
diff --git a/third_party/tcmalloc/vendor/src/sampler.h b/third_party/tcmalloc/vendor/src/sampler.h
index 8e67fb0..8e67fb0 100755..100644
--- a/third_party/tcmalloc/vendor/src/sampler.h
+++ b/third_party/tcmalloc/vendor/src/sampler.h
diff --git a/third_party/tcmalloc/vendor/src/span.cc b/third_party/tcmalloc/vendor/src/span.cc
index 426a6bd..7600945 100644
--- a/third_party/tcmalloc/vendor/src/span.cc
+++ b/third_party/tcmalloc/vendor/src/span.cc
@@ -89,16 +89,6 @@ int DLL_Length(const Span* list) {
   return result;
 }
 
-#if 0  // This isn't used.  If that changes, rewrite to use TCMalloc_Printer.
-void DLL_Print(const char* label, const Span* list) {
-  MESSAGE("%-10s %p:", label, list);
-  for (const Span* s = list->next; s != list; s = s->next) {
-    MESSAGE(" <%p,%"PRIuPTR",%"PRIuPTR">", s, s->start, s->length);
-  }
-  MESSAGE("%s\n", "");  // %s is to get around a compiler error.
-}
-#endif
-
 void DLL_Prepend(Span* list, Span* span) {
   ASSERT(span->next == NULL);
   ASSERT(span->prev == NULL);
diff --git a/third_party/tcmalloc/vendor/src/span.h b/third_party/tcmalloc/vendor/src/span.h
index ab9a796..08db629 100644
--- a/third_party/tcmalloc/vendor/src/span.h
+++ b/third_party/tcmalloc/vendor/src/span.h
@@ -96,11 +96,6 @@ void DLL_Prepend(Span* list, Span* span);
 // Return the length of the linked list. O(n)
 int DLL_Length(const Span* list);
 
-// Print the contents of the list to stderr.
-#if 0  // This isn't used.
-void DLL_Print(const char* label, const Span* list);
-#endif
-
 }  // namespace tcmalloc
 
 #endif  // TCMALLOC_SPAN_H_
diff --git a/third_party/tcmalloc/vendor/src/stack_trace_table.cc b/third_party/tcmalloc/vendor/src/stack_trace_table.cc
index faeca6b..d258a4f 100644
--- a/third_party/tcmalloc/vendor/src/stack_trace_table.cc
+++ b/third_party/tcmalloc/vendor/src/stack_trace_table.cc
@@ -35,7 +35,7 @@
 #include <string.h>                     // for NULL, memset
 #include "base/spinlock.h"              // for SpinLockHolder
 #include "common.h"            // for StackTrace
-#include "internal_logging.h"  // for MESSAGE, ASSERT
+#include "internal_logging.h"  // for ASSERT, Log
 #include "page_heap_allocator.h"  // for PageHeapAllocator
 #include "static_vars.h"       // for Static
 
@@ -93,7 +93,8 @@ void StackTraceTable::AddTrace(const StackTrace& t) {
     bucket_total_++;
     b = Static::bucket_allocator()->New();
     if (b == NULL) {
-      MESSAGE("tcmalloc: could not allocate bucket", sizeof(*b));
+      Log(kLog, __FILE__, __LINE__,
+          "tcmalloc: could not allocate bucket", sizeof(*b));
       error_ = true;
     } else {
       b->hash = h;
@@ -114,8 +115,9 @@ void** StackTraceTable::ReadStackTracesAndClear() {
   const int out_len = bucket_total_ * 3 + depth_total_ + 1;
   void** out = new void*[out_len];
   if (out == NULL) {
-    MESSAGE("tcmalloc: allocation failed for stack traces\n",
-            out_len * sizeof(*out));
+    Log(kLog, __FILE__, __LINE__,
+        "tcmalloc: allocation failed for stack traces",
+        out_len * sizeof(*out));
     return NULL;
   }
 
diff --git a/third_party/tcmalloc/vendor/src/stacktrace.cc b/third_party/tcmalloc/vendor/src/stacktrace.cc
index 175cdf5..d96b4d3 100644
--- a/third_party/tcmalloc/vendor/src/stacktrace.cc
+++ b/third_party/tcmalloc/vendor/src/stacktrace.cc
@@ -53,7 +53,7 @@
 // Some code may do that.
 
 #include <config.h>
-#include <google/stacktrace.h>
+#include <gperftools/stacktrace.h>
 #include "stacktrace_config.h"
 
 #if defined(STACKTRACE_INL_HEADER)
@@ -103,8 +103,8 @@
 # include "stacktrace_libunwind-inl.h"
 # include "stacktrace_generic-inl.h"
 # include "stacktrace_powerpc-inl.h"
-# include "stacktrace_nacl-inl.h"
 # include "stacktrace_win32-inl.h"
+# include "stacktrace_arm-inl.h"
 #else
 # error Cannot calculate stack trace: will need to write for your environment
 #endif
diff --git a/third_party/tcmalloc/vendor/src/stacktrace_arm-inl.h b/third_party/tcmalloc/vendor/src/stacktrace_arm-inl.h
new file mode 100644
index 0000000..5ee1bf9
--- /dev/null
+++ b/third_party/tcmalloc/vendor/src/stacktrace_arm-inl.h
@@ -0,0 +1,145 @@
+// Copyright (c) 2011, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// ---
+// Author: Doug Kwan
+// This is inspired by Craig Silverstein's PowerPC stacktrace code.
+//
+
+#ifndef BASE_STACKTRACE_ARM_INL_H_
+#define BASE_STACKTRACE_ARM_INL_H_
+// Note: this file is included into stacktrace.cc more than once.
+// Anything that should only be defined once should be here:
+
+#include <stdint.h>   // for uintptr_t
+#include "base/basictypes.h"  // for NULL
+#include <gperftools/stacktrace.h>
+
+// WARNING:
+// This only works if all your code is in either ARM or THUMB mode.  With
+// interworking, the frame pointer of the caller can either be in r11 (ARM
+// mode) or r7 (THUMB mode).  A callee only saves the frame pointer of its
+// mode in a fixed location on its stack frame.  If the caller is a different
+// mode, there is no easy way to find the frame pointer.  It can either be
+// still in the designated register or saved on stack along with other callee
+// saved registers.
+
+// Given a pointer to a stack frame, locate and return the calling
+// stackframe, or return NULL if no stackframe can be found. Perform sanity
+// checks (the strictness of which is controlled by the boolean parameter
+// "STRICT_UNWINDING") to reduce the chance that a bad pointer is returned.
+template<bool STRICT_UNWINDING>
+static void **NextStackFrame(void **old_sp) {
+  void **new_sp = (void**) old_sp[-1];
+
+  // Check that the transition from frame pointer old_sp to frame
+  // pointer new_sp isn't clearly bogus
+  if (STRICT_UNWINDING) {
+    // With the stack growing downwards, older stack frame must be
+    // at a greater address that the current one.
+    if (new_sp <= old_sp) return NULL;
+    // Assume stack frames larger than 100,000 bytes are bogus.
+    if ((uintptr_t)new_sp - (uintptr_t)old_sp > 100000) return NULL;
+  } else {
+    // In the non-strict mode, allow discontiguous stack frames.
+    // (alternate-signal-stacks for example).
+    if (new_sp == old_sp) return NULL;
+    // And allow frames upto about 1MB.
+    if ((new_sp > old_sp)
+        && ((uintptr_t)new_sp - (uintptr_t)old_sp > 1000000)) return NULL;
+  }
+  if ((uintptr_t)new_sp & (sizeof(void *) - 1)) return NULL;
+  return new_sp;
+}
+
+// This ensures that GetStackTrace stes up the Link Register properly.
+#ifdef __GNUC__
+void StacktraceArmDummyFunction() __attribute__((noinline));
+void StacktraceArmDummyFunction() { __asm__ volatile(""); }
+#else
+# error StacktraceArmDummyFunction() needs to be ported to this platform.
+#endif
+#endif  // BASE_STACKTRACE_ARM_INL_H_
+
+// Note: this part of the file is included several times.
+// Do not put globals below.
+
+// The following 4 functions are generated from the code below:
+//   GetStack{Trace,Frames}()
+//   GetStack{Trace,Frames}WithContext()
+//
+// These functions take the following args:
+//   void** result: the stack-trace, as an array
+//   int* sizes: the size of each stack frame, as an array
+//               (GetStackFrames* only)
+//   int max_depth: the size of the result (and sizes) array(s)
+//   int skip_count: how many stack pointers to skip before storing in result
+//   void* ucp: a ucontext_t* (GetStack{Trace,Frames}WithContext only)
+int GET_STACK_TRACE_OR_FRAMES {
+#ifdef __GNUC__
+  void **sp = reinterpret_cast<void**>(__builtin_frame_address(0));
+#else
+# error reading stack point not yet supported on this platform.
+#endif
+
+  // On ARM, the return address is stored in the link register (r14).
+  // This is not saved on the stack frame of a leaf function.  To
+  // simplify code that reads return addresses, we call a dummy
+  // function so that the return address of this function is also
+  // stored in the stack frame.  This works at least for gcc.
+  StacktraceArmDummyFunction();
+
+  int n = 0;
+  while (sp && n < max_depth) {
+    // The GetStackFrames routine is called when we are in some
+    // informational context (the failure signal handler for example).
+    // Use the non-strict unwinding rules to produce a stack trace
+    // that is as complete as possible (even if it contains a few bogus
+    // entries in some rare cases).
+    void **next_sp = NextStackFrame<IS_STACK_FRAMES == 0>(sp);
+
+    if (skip_count > 0) {
+      skip_count--;
+    } else {
+      result[n] = *sp;
+
+#if IS_STACK_FRAMES
+      if (next_sp > sp) {
+        sizes[n] = (uintptr_t)next_sp - (uintptr_t)sp;
+      } else {
+        // A frame-size of 0 is used to indicate unknown frame size.
+        sizes[n] = 0;
+      }
+#endif
+      n++;
+    }
+    sp = next_sp;
+  }
+  return n;
+}
diff --git a/third_party/tcmalloc/vendor/src/stacktrace_config.h b/third_party/tcmalloc/vendor/src/stacktrace_config.h
index b166ca2..72d108a 100644
--- a/third_party/tcmalloc/vendor/src/stacktrace_config.h
+++ b/third_party/tcmalloc/vendor/src/stacktrace_config.h
@@ -46,11 +46,8 @@
 #ifndef BASE_STACKTRACE_CONFIG_H_
 #define BASE_STACKTRACE_CONFIG_H_
 
-#ifdef __native_client__
-#  define STACKTRACE_INL_HEADER "base/stacktrace_nacl-inl.h"
-
-// i386 and x86_64 case.
-#elif (defined(__i386__) || defined(__x86_64__)) && __GNUC__ >= 2
+// First, the i386 and x86_64 case.
+#if (defined(__i386__) || defined(__x86_64__)) && __GNUC__ >= 2
 # if !defined(NO_FRAME_POINTER)
 #   define STACKTRACE_INL_HEADER "stacktrace_x86-inl.h"
 #   define STACKTRACE_SKIP_CONTEXT_ROUTINES 1
@@ -71,6 +68,14 @@
 #   define STACKTRACE_INL_HEADER "stacktrace_generic-inl.h"
 # endif
 
+// The ARM case
+#elif defined(__arm__)  && __GNUC__ >= 2
+# if !defined(NO_FRAME_POINTER)
+#   define STACKTRACE_INL_HEADER "stacktrace_arm-inl.h"
+# else
+#   error stacktrace without frame pointer is not supported on ARM
+# endif
+
 // The Windows case -- probably cygwin and mingw will use one of the
 // x86-includes above, but if not, we can fall back to windows intrinsics.
 #elif defined(_WIN32) || defined(__CYGWIN__) || defined(__CYGWIN32__) || defined(__MINGW32__)
diff --git a/third_party/tcmalloc/vendor/src/stacktrace_generic-inl.h b/third_party/tcmalloc/vendor/src/stacktrace_generic-inl.h
index 0e72ee7..5a526e2 100644
--- a/third_party/tcmalloc/vendor/src/stacktrace_generic-inl.h
+++ b/third_party/tcmalloc/vendor/src/stacktrace_generic-inl.h
@@ -42,7 +42,7 @@
 
 #include <execinfo.h>
 #include <string.h>
-#include "google/stacktrace.h"
+#include "gperftools/stacktrace.h"
 #endif  // BASE_STACKTRACE_GENERIC_INL_H_
 
 // Note: this part of the file is included several times.
diff --git a/third_party/tcmalloc/vendor/src/stacktrace_libunwind-inl.h b/third_party/tcmalloc/vendor/src/stacktrace_libunwind-inl.h
index a1d5249..82b0cfe 100644
--- a/third_party/tcmalloc/vendor/src/stacktrace_libunwind-inl.h
+++ b/third_party/tcmalloc/vendor/src/stacktrace_libunwind-inl.h
@@ -45,7 +45,7 @@ extern "C" {
 #include <string.h>   // for memset()
 #include <libunwind.h>
 }
-#include "google/stacktrace.h"
+#include "gperftools/stacktrace.h"
 #include "base/logging.h"
 
 // Sometimes, we can try to get a stack trace from within a stack
diff --git a/third_party/tcmalloc/vendor/src/stacktrace_powerpc-inl.h b/third_party/tcmalloc/vendor/src/stacktrace_powerpc-inl.h
index 9a07eea..acf2884 100644
--- a/third_party/tcmalloc/vendor/src/stacktrace_powerpc-inl.h
+++ b/third_party/tcmalloc/vendor/src/stacktrace_powerpc-inl.h
@@ -43,7 +43,7 @@
 
 #include <stdint.h>   // for uintptr_t
 #include <stdlib.h>   // for NULL
-#include <google/stacktrace.h>
+#include <gperftools/stacktrace.h>
 
 // Given a pointer to a stack frame, locate and return the calling
 // stackframe, or return NULL if no stackframe can be found. Perform sanity
@@ -126,16 +126,12 @@ int GET_STACK_TRACE_OR_FRAMES {
 
   int n = 0;
   while (sp && n < max_depth) {
-#if IS_STACK_FRAMES
     // The GetStackFrames routine is called when we are in some
     // informational context (the failure signal handler for example).
     // Use the non-strict unwinding rules to produce a stack trace
-    // that is as complete as possible (even if it contains a few bogus
-    // entries in some rare cases).
-    void **next_sp = NextStackFrame<false>(sp);
-#else
-    void **next_sp = NextStackFrame<true>(sp);
-#endif
+    // that is as complete as possible (even if it contains a few
+    // bogus entries in some rare cases).
+    void **next_sp = NextStackFrame<!IS_STACK_FRAMES>(sp);
 
     if (skip_count > 0) {
       skip_count--;
@@ -145,20 +141,20 @@ int GET_STACK_TRACE_OR_FRAMES {
       // linux ppc64), it's in sp[2].  For SYSV (used by linux ppc),
       // it's in sp[1].
 #if defined(_CALL_AIX) || defined(_CALL_DARWIN)
-      result[n++] = *(sp+2);
+      result[n] = *(sp+2);
 #elif defined(_CALL_SYSV)
-      result[n++] = *(sp+1);
+      result[n] = *(sp+1);
 #elif defined(__APPLE__) || (defined(__linux) && defined(__PPC64__))
       // This check is in case the compiler doesn't define _CALL_AIX/etc.
-      result[n++] = *(sp+2);
+      result[n] = *(sp+2);
 #elif defined(__linux)
       // This check is in case the compiler doesn't define _CALL_SYSV.
-      result[n++] = *(sp+1);
+      result[n] = *(sp+1);
 #else
 #error Need to specify the PPC ABI for your archiecture.
 #endif
 
-#if IS_STACK_FRAME
+#if IS_STACK_FRAMES
       if (next_sp > sp) {
         sizes[n] = (uintptr_t)next_sp - (uintptr_t)sp;
       } else {
@@ -166,6 +162,7 @@ int GET_STACK_TRACE_OR_FRAMES {
         sizes[n] = 0;
       }
 #endif
+      n++;
     }
     sp = next_sp;
   }
diff --git a/third_party/tcmalloc/vendor/src/stacktrace_win32-inl.h b/third_party/tcmalloc/vendor/src/stacktrace_win32-inl.h
index bbd4c43..2af472d 100644
--- a/third_party/tcmalloc/vendor/src/stacktrace_win32-inl.h
+++ b/third_party/tcmalloc/vendor/src/stacktrace_win32-inl.h
@@ -47,7 +47,7 @@
 // server.
 //
 // This code is inspired by a patch from David Vitek:
-//   http://code.google.com/p/google-perftools/issues/detail?id=83
+//   http://code.google.com/p/gperftools/issues/detail?id=83
 
 #ifndef BASE_STACKTRACE_WIN32_INL_H_
 #define BASE_STACKTRACE_WIN32_INL_H_
diff --git a/third_party/tcmalloc/vendor/src/stacktrace_x86-inl.h b/third_party/tcmalloc/vendor/src/stacktrace_x86-inl.h
index a140ab6..9d76342 100644
--- a/third_party/tcmalloc/vendor/src/stacktrace_x86-inl.h
+++ b/third_party/tcmalloc/vendor/src/stacktrace_x86-inl.h
@@ -64,7 +64,7 @@ typedef ucontext ucontext_t;
 #include "base/vdso_support.h"
 #endif
 
-#include "google/stacktrace.h"
+#include "gperftools/stacktrace.h"
 
 #if defined(__linux__) && defined(__i386__) && defined(__ELF__) && defined(HAVE_MMAP)
 // Count "push %reg" instructions in VDSO __kernel_vsyscall(),
@@ -238,9 +238,14 @@ static void **NextStackFrame(void **old_sp, const void *uc) {
     // In the non-strict mode, allow discontiguous stack frames.
     // (alternate-signal-stacks for example).
     if (new_sp == old_sp) return NULL;
-    // And allow frames upto about 1MB.
-    if ((new_sp > old_sp)
-        && ((uintptr_t)new_sp - (uintptr_t)old_sp > 1000000)) return NULL;
+    if (new_sp > old_sp) {
+      // And allow frames upto about 1MB.
+      const uintptr_t delta = (uintptr_t)new_sp - (uintptr_t)old_sp;
+      const uintptr_t acceptable_delta = 1000000;
+      if (delta > acceptable_delta) {
+        return NULL;
+      }
+    }
   }
   if ((uintptr_t)new_sp & (sizeof(void *) - 1)) return NULL;
 #ifdef __i386__
diff --git a/third_party/tcmalloc/vendor/src/stacktrace_x86_64-inl.h b/third_party/tcmalloc/vendor/src/stacktrace_x86_64-inl.h
deleted file mode 100644
index 767ef9b..0000000
--- a/third_party/tcmalloc/vendor/src/stacktrace_x86_64-inl.h
+++ /dev/null
@@ -1,151 +0,0 @@
-// Copyright (c) 2005, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// ---
-// Author: Arun Sharma
-//
-// Produce stack trace using libgcc
-
-extern "C" {
-#include <stdlib.h> // for NULL
-#include <unwind.h> // ABI defined unwinder
-#include <string.h> // for memset
-}
-#include "google/stacktrace.h"
-
-typedef struct {
-  void **result;
-  int max_depth;
-  int skip_count;
-  int count;
-} trace_arg_t;
-
-
-// Workaround for the malloc() in _Unwind_Backtrace() issue.
-static _Unwind_Reason_Code nop_backtrace(struct _Unwind_Context *uc, void *opq) {
-  return _URC_NO_REASON;
-}
-
-
-// This code is not considered ready to run until
-// static initializers run so that we are guaranteed
-// that any malloc-related initialization is done.
-static bool ready_to_run = false;
-class StackTraceInit {
- public:
-   StackTraceInit() {
-     // Extra call to force initialization
-     _Unwind_Backtrace(nop_backtrace, NULL);
-     ready_to_run = true;
-   }
-};
-
-static StackTraceInit module_initializer;  // Force initialization
-
-static _Unwind_Reason_Code GetOneFrame(struct _Unwind_Context *uc, void *opq) {
-  trace_arg_t *targ = (trace_arg_t *) opq;
-
-  if (targ->skip_count > 0) {
-    targ->skip_count--;
-  } else {
-    targ->result[targ->count++] = (void *) _Unwind_GetIP(uc);
-  }
-
-  if (targ->count == targ->max_depth)
-    return _URC_END_OF_STACK;
-
-  return _URC_NO_REASON;
-}
-
-// If you change this function, also change GetStackFrames below.
-int GetStackTrace(void** result, int max_depth, int skip_count) {
-  if (!ready_to_run)
-    return 0;
-
-  trace_arg_t targ;
-
-  skip_count += 1;         // Do not include the "GetStackTrace" frame
-
-  targ.result = result;
-  targ.max_depth = max_depth;
-  targ.skip_count = skip_count;
-  targ.count = 0;
-
-  _Unwind_Backtrace(GetOneFrame, &targ);
-
-  return targ.count;
-}
-
-// If you change this function, also change GetStackTrace above:
-//
-// This GetStackFrames routine shares a lot of code with GetStackTrace
-// above. This code could have been refactored into a common routine,
-// and then both GetStackTrace/GetStackFrames could call that routine.
-// There are two problems with that:
-//
-// (1) The performance of the refactored-code suffers substantially - the
-//     refactored needs to be able to record the stack trace when called
-//     from GetStackTrace, and both the stack trace and stack frame sizes,
-//     when called from GetStackFrames - this introduces enough new
-//     conditionals that GetStackTrace performance can degrade by as much
-//     as 50%.
-//
-// (2) Whether the refactored routine gets inlined into GetStackTrace and
-//     GetStackFrames depends on the compiler, and we can't guarantee the
-//     behavior either-way, even with "__attribute__ ((always_inline))"
-//     or "__attribute__ ((noinline))". But we need this guarantee or the
-//     frame counts may be off by one.
-//
-// Both (1) and (2) can be addressed without this code duplication, by
-// clever use of template functions, and by defining GetStackTrace and
-// GetStackFrames as macros that expand to these template functions.
-// However, this approach comes with its own set of problems - namely,
-// macros and  preprocessor trouble - for example,  if GetStackTrace
-// and/or GetStackFrames is ever defined as a member functions in some
-// class, we are in trouble.
-int GetStackFrames(void** pcs, int* sizes, int max_depth, int skip_count) {
-  if (!ready_to_run)
-    return 0;
-
-  trace_arg_t targ;
-
-  skip_count += 1;         // Do not include the "GetStackFrames" frame
-
-  targ.result = pcs;
-  targ.max_depth = max_depth;
-  targ.skip_count = skip_count;
-  targ.count = 0;
-
-  _Unwind_Backtrace(GetOneFrame, &targ);
-
-  // No implementation for finding out the stack frame sizes yet.
-  memset(sizes, 0, sizeof(*sizes) * targ.count);
-
-  return targ.count;
-}
diff --git a/third_party/tcmalloc/vendor/src/static_vars.cc b/third_party/tcmalloc/vendor/src/static_vars.cc
index 2ca132e..6fc852a 100644
--- a/third_party/tcmalloc/vendor/src/static_vars.cc
+++ b/third_party/tcmalloc/vendor/src/static_vars.cc
@@ -34,6 +34,7 @@
 #include <stddef.h>                     // for NULL
 #include <new>                          // for operator new
 #include "internal_logging.h"  // for CHECK_CONDITION
+#include "common.h"
 #include "sampler.h"           // for Sampler
 
 namespace tcmalloc {
@@ -46,7 +47,7 @@ PageHeapAllocator<StackTrace> Static::stacktrace_allocator_;
 Span Static::sampled_objects_;
 PageHeapAllocator<StackTraceTable::Bucket> Static::bucket_allocator_;
 StackTrace* Static::growth_stacks_ = NULL;
-char Static::pageheap_memory_[sizeof(PageHeap)];
+PageHeap* Static::pageheap_ = NULL;
 
 void Static::InitStaticVars() {
   sizemap_.Init();
@@ -60,7 +61,11 @@ void Static::InitStaticVars() {
   for (int i = 0; i < kNumClasses; ++i) {
     central_cache_[i].Init(i);
   }
-  new ((void*)pageheap_memory_) PageHeap;
+  // It's important to have PageHeap allocated, not in static storage,
+  // so that HeapLeakChecker does not consider all the byte patterns stored
+  // in is caches as pointers that are sources of heap object liveness,
+  // which leads to it missing some memory leaks.
+  pageheap_ = new (MetaDataAlloc(sizeof(PageHeap))) PageHeap;
   DLL_Init(&sampled_objects_);
   Sampler::InitStatics();
 }
diff --git a/third_party/tcmalloc/vendor/src/static_vars.h b/third_party/tcmalloc/vendor/src/static_vars.h
index b21fe2b..185a1d4 100644
--- a/third_party/tcmalloc/vendor/src/static_vars.h
+++ b/third_party/tcmalloc/vendor/src/static_vars.h
@@ -65,9 +65,7 @@ class Static {
   // must be protected by pageheap_lock.
 
   // Page-level allocator.
-  static PageHeap* pageheap() {
-    return reinterpret_cast<PageHeap*>(pageheap_memory_);
-  }
+  static PageHeap* pageheap() { return pageheap_; }
 
   static PageHeapAllocator<Span>* span_allocator() { return &span_allocator_; }
 
@@ -105,10 +103,7 @@ class Static {
   // is stored in trace->stack[kMaxStackDepth-1].
   static StackTrace* growth_stacks_;
 
-  // PageHeap uses a constructor for initialization.  Like the members above,
-  // we can't depend on initialization order, so pageheap is new'd
-  // into this buffer.
-  static char pageheap_memory_[sizeof(PageHeap)];
+  static PageHeap* pageheap_;
 };
 
 }  // namespace tcmalloc
diff --git a/third_party/tcmalloc/vendor/src/symbolize.cc b/third_party/tcmalloc/vendor/src/symbolize.cc
index dfdfb7e..d90c4b8 100644
--- a/third_party/tcmalloc/vendor/src/symbolize.cc
+++ b/third_party/tcmalloc/vendor/src/symbolize.cc
@@ -57,6 +57,7 @@
 #endif
 #include <string>
 #include "base/commandlineflags.h"
+#include "base/logging.h"
 #include "base/sysinfo.h"
 
 using std::string;
@@ -93,6 +94,15 @@ static char* GetProgramInvocationName() {
 #endif
 }
 
+// Prints an error message when you can't run Symbolize().
+static void PrintError(const char* reason) {
+  RAW_LOG(ERROR,
+          "*** WARNING: Cannot convert addresses to symbols in output below.\n"
+          "*** Reason: %s\n"
+          "*** If you cannot fix this, try running pprof directly.\n",
+          reason);
+}
+
 void SymbolTable::Add(const void* addr) {
   symbolization_table_[addr] = "";
 }
@@ -107,13 +117,22 @@ const char* SymbolTable::GetSymbol(const void* addr) {
 // Note that the forking/etc is not thread-safe or re-entrant.  That's
 // ok for the purpose we need -- reporting leaks detected by heap-checker
 // -- but be careful if you decide to use this routine for other purposes.
+// Returns number of symbols read on error.  If can't symbolize, returns 0
+// and emits an error message about why.
 int SymbolTable::Symbolize() {
 #if !defined(HAVE_UNISTD_H)  || !defined(HAVE_SYS_SOCKET_H) || !defined(HAVE_SYS_WAIT_H)
+  PrintError("Perftools does not know how to call a sub-process on this O/S");
   return 0;
 #else
   const char* argv0 = GetProgramInvocationName();
-  if (argv0 == NULL)   // can't call symbolize if we can't figure out our name
+  if (argv0 == NULL) {  // can't call symbolize if we can't figure out our name
+    PrintError("Cannot figure out the name of this executable (argv0)");
     return 0;
+  }
+  if (access(g_pprof_path->c_str(), R_OK) != 0) {
+    PrintError("Cannot find 'pprof' (is PPROF_PATH set correctly?)");
+    return 0;
+  }
 
   // All this work is to do two-way communication.  ugh.
   int *child_in = NULL;   // file descriptors
@@ -131,6 +150,7 @@ int SymbolTable::Symbolize() {
       for (int j = 0; j < i; j++) {
         close(child_fds[j][0]);
         close(child_fds[j][1]);
+        PrintError("Cannot create a socket pair");
         return 0;
       }
     } else {
@@ -156,6 +176,7 @@ int SymbolTable::Symbolize() {
       close(child_in[1]);
       close(child_out[0]);
       close(child_out[1]);
+      PrintError("Unknown error calling fork()");
       return 0;
     }
     case 0: {  // child
@@ -178,6 +199,8 @@ int SymbolTable::Symbolize() {
       close(child_in[0]);   // child uses the 0's, parent uses the 1's
       close(child_out[0]);  // child uses the 0's, parent uses the 1's
 #ifdef HAVE_POLL_H
+      // Waiting for 1ms seems to give the OS time to notice any errors.
+      poll(0, 0, 1);
       // For maximum safety, we check to make sure the execlp
       // succeeded before trying to write.  (Otherwise we'll get a
       // SIGPIPE.)  For systems without poll.h, we'll just skip this
@@ -185,6 +208,7 @@ int SymbolTable::Symbolize() {
       struct pollfd pfd = { child_in[1], POLLOUT, 0 };
       if (!poll(&pfd, 1, 0) || !(pfd.revents & POLLOUT) ||
           (pfd.revents & (POLLHUP|POLLERR))) {
+        PrintError("Cannot run 'pprof' (is PPROF_PATH set correctly?)");
         return 0;
       }
 #endif
@@ -220,6 +244,7 @@ int SymbolTable::Symbolize() {
                               kSymbolBufferSize - total_bytes_read);
         if (bytes_read < 0) {
           close(child_out[1]);
+          PrintError("Cannot read data from pprof");
           return 0;
         } else if (bytes_read == 0) {
           close(child_out[1]);
@@ -249,6 +274,7 @@ int SymbolTable::Symbolize() {
       return num_symbols;
     }
   }
+  PrintError("Unkown error (should never occur!)");
   return 0;  // shouldn't be reachable
 #endif
 }
diff --git a/third_party/tcmalloc/vendor/src/system-alloc.cc b/third_party/tcmalloc/vendor/src/system-alloc.cc
index a05060a..abfe472 100644
--- a/third_party/tcmalloc/vendor/src/system-alloc.cc
+++ b/third_party/tcmalloc/vendor/src/system-alloc.cc
@@ -48,7 +48,7 @@
 #include <unistd.h>                     // for sbrk, getpagesize, off_t
 #endif
 #include <new>                          // for operator new
-#include <google/malloc_extension.h>
+#include <gperftools/malloc_extension.h>
 #include "base/basictypes.h"
 #include "base/commandlineflags.h"
 #include "base/spinlock.h"              // for SpinLockHolder, SpinLock, etc
@@ -61,6 +61,13 @@
 # define MAP_ANONYMOUS MAP_ANON
 #endif
 
+// MADV_FREE is specifically designed for use by malloc(), but only
+// FreeBSD supports it; in linux we fall back to the somewhat inferior
+// MADV_DONTNEED.
+#if !defined(MADV_FREE) && defined(MADV_DONTNEED)
+# define MADV_FREE  MADV_DONTNEED
+#endif
+
 // Solaris has a bug where it doesn't declare madvise() for C++.
 //    http://www.opensolaris.org/jive/thread.jspa?threadID=21035&tstart=0
 #if defined(__sun) && defined(__SVR4)
@@ -76,6 +83,10 @@ static const bool kDebugMode = false;
 static const bool kDebugMode = true;
 #endif
 
+// TODO(sanjay): Move the code below into the tcmalloc namespace
+using tcmalloc::kLog;
+using tcmalloc::Log;
+
 // Anonymous namespace to avoid name conflicts on "CheckAddressBits".
 namespace {
 
@@ -103,7 +114,7 @@ union MemoryAligner {
 
 static SpinLock spinlock(SpinLock::LINKER_INITIALIZED);
 
-#if defined(HAVE_MMAP) || defined(MADV_DONTNEED)
+#if defined(HAVE_MMAP) || defined(MADV_FREE)
 // Page size is initialized on demand (only needed for mmap-based allocators)
 static size_t pagesize = 0;
 #endif
@@ -420,7 +431,6 @@ void* DefaultSysAllocator::Alloc(size_t size, size_t *actual_size,
       if (result != NULL) {
         return result;
       }
-      TCMalloc_MESSAGE(__FILE__, __LINE__, "%s failed.\n", names_[i]);
       failed_[i] = true;
     }
   }
@@ -484,10 +494,10 @@ void* TCMalloc_SystemAlloc(size_t size, size_t *actual_size,
 }
 
 void TCMalloc_SystemRelease(void* start, size_t length) {
-#ifdef MADV_DONTNEED
+#ifdef MADV_FREE
   if (FLAGS_malloc_devmem_start) {
-    // It's not safe to use MADV_DONTNEED if we've been mapping
-    // /dev/mem for heap memory
+    // It's not safe to use MADV_FREE/MADV_DONTNEED if we've been
+    // mapping /dev/mem for heap memory.
     return;
   }
   if (pagesize == 0) pagesize = getpagesize();
@@ -511,7 +521,7 @@ void TCMalloc_SystemRelease(void* start, size_t length) {
     // Note -- ignoring most return codes, because if this fails it
     // doesn't matter...
     while (madvise(reinterpret_cast<char*>(new_start), new_end - new_start,
-                   MADV_DONTNEED) == -1 &&
+                   MADV_FREE) == -1 &&
            errno == EAGAIN) {
       // NOP
     }
diff --git a/third_party/tcmalloc/vendor/src/tcmalloc.cc b/third_party/tcmalloc/vendor/src/tcmalloc.cc
index 263db81..0a9cb9f 100644
--- a/third_party/tcmalloc/vendor/src/tcmalloc.cc
+++ b/third_party/tcmalloc/vendor/src/tcmalloc.cc
@@ -87,7 +87,7 @@
 //   goes from about 1100 ns to about 300 ns.
 
 #include "config.h"
-#include <google/tcmalloc.h>
+#include <gperftools/tcmalloc.h>
 
 #include <errno.h>                      // for ENOMEM, EINVAL, errno
 #ifdef HAVE_SYS_CDEFS_H
@@ -111,8 +111,8 @@
 #include <new>                          // for nothrow_t (ptr only), etc
 #include <vector>                       // for vector
 
-#include <google/malloc_extension.h>
-#include <google/malloc_hook.h>         // for MallocHook
+#include <gperftools/malloc_extension.h>
+#include <gperftools/malloc_hook.h>         // for MallocHook
 #include "base/basictypes.h"            // for int64
 #include "base/commandlineflags.h"      // for RegisterFlagValidator, etc
 #include "base/dynamic_annotations.h"   // for RunningOnValgrind
@@ -147,6 +147,9 @@
 # define WIN32_DO_PATCHING 1
 #endif
 
+// Some windows file somewhere (at least on cygwin) #define's small (!)
+#undef small
+
 using STL_NAMESPACE::max;
 using STL_NAMESPACE::numeric_limits;
 using STL_NAMESPACE::vector;
@@ -162,6 +165,10 @@ using STL_NAMESPACE::vector;
 #endif
 
 using tcmalloc::AlignmentForSize;
+using tcmalloc::kLog;
+using tcmalloc::kCrash;
+using tcmalloc::kCrashWithStats;
+using tcmalloc::Log;
 using tcmalloc::PageHeap;
 using tcmalloc::PageHeapAllocator;
 using tcmalloc::SizeMap;
@@ -279,16 +286,18 @@ static int tc_new_mode = 0;  // See tc_set_new_mode().
 // required) kind of exception handling for these routines.
 namespace {
 void InvalidFree(void* ptr) {
-  CRASH("Attempt to free invalid pointer: %p\n", ptr);
+  Log(kCrash, __FILE__, __LINE__, "Attempt to free invalid pointer", ptr);
 }
 
-size_t InvalidGetSizeForRealloc(void* old_ptr) {
-  CRASH("Attempt to realloc invalid pointer: %p\n", old_ptr);
+size_t InvalidGetSizeForRealloc(const void* old_ptr) {
+  Log(kCrash, __FILE__, __LINE__,
+      "Attempt to realloc invalid pointer", old_ptr);
   return 0;
 }
 
-size_t InvalidGetAllocatedSize(void* ptr) {
-  CRASH("Attempt to get the size of an invalid pointer: %p\n", ptr);
+size_t InvalidGetAllocatedSize(const void* ptr) {
+  Log(kCrash, __FILE__, __LINE__,
+      "Attempt to get the size of an invalid pointer", ptr);
   return 0;
 }
 }  // unnamed namespace
@@ -303,7 +312,9 @@ struct TCMallocStats {
 };
 
 // Get stats into "r".  Also get per-size-class counts if class_count != NULL
-static void ExtractStats(TCMallocStats* r, uint64_t* class_count) {
+static void ExtractStats(TCMallocStats* r, uint64_t* class_count,
+                         PageHeap::SmallSpanStats* small_spans,
+                         PageHeap::LargeSpanStats* large_spans) {
   r->central_bytes = 0;
   r->transfer_bytes = 0;
   for (int cl = 0; cl < kNumClasses; ++cl) {
@@ -324,14 +335,30 @@ static void ExtractStats(TCMallocStats* r, uint64_t* class_count) {
     ThreadCache::GetThreadStats(&r->thread_bytes, class_count);
     r->metadata_bytes = tcmalloc::metadata_system_bytes();
     r->pageheap = Static::pageheap()->stats();
+    if (small_spans != NULL) {
+      Static::pageheap()->GetSmallSpanStats(small_spans);
+    }
+    if (large_spans != NULL) {
+      Static::pageheap()->GetLargeSpanStats(large_spans);
+    }
   }
 }
 
+static double PagesToMiB(uint64_t pages) {
+  return (pages << kPageShift) / 1048576.0;
+}
+
 // WRITE stats to "out"
 static void DumpStats(TCMalloc_Printer* out, int level) {
   TCMallocStats stats;
   uint64_t class_count[kNumClasses];
-  ExtractStats(&stats, (level >= 2 ? class_count : NULL));
+  PageHeap::SmallSpanStats small;
+  PageHeap::LargeSpanStats large;
+  if (level >= 2) {
+    ExtractStats(&stats, class_count, &small, &large);
+  } else {
+    ExtractStats(&stats, NULL, NULL, NULL);
+  }
 
   static const double MiB = 1048576.0;
 
@@ -404,8 +431,48 @@ static void DumpStats(TCMalloc_Printer* out, int level) {
       }
     }
 
-    SpinLockHolder h(Static::pageheap_lock());
-    Static::pageheap()->Dump(out);
+    // append page heap info
+    int nonempty_sizes = 0;
+    for (int s = 0; s < kMaxPages; s++) {
+      if (small.normal_length[s] + small.returned_length[s] > 0) {
+        nonempty_sizes++;
+      }
+    }
+    out->printf("------------------------------------------------\n");
+    out->printf("PageHeap: %d sizes; %6.1f MiB free; %6.1f MiB unmapped\n",
+                nonempty_sizes, stats.pageheap.free_bytes / MiB,
+                stats.pageheap.unmapped_bytes / MiB);
+    out->printf("------------------------------------------------\n");
+    uint64_t total_normal = 0;
+    uint64_t total_returned = 0;
+    for (int s = 0; s < kMaxPages; s++) {
+      const int n_length = small.normal_length[s];
+      const int r_length = small.returned_length[s];
+      if (n_length + r_length > 0) {
+        uint64_t n_pages = s * n_length;
+        uint64_t r_pages = s * r_length;
+        total_normal += n_pages;
+        total_returned += r_pages;
+        out->printf("%6u pages * %6u spans ~ %6.1f MiB; %6.1f MiB cum"
+                    "; unmapped: %6.1f MiB; %6.1f MiB cum\n",
+                    s,
+                    (n_length + r_length),
+                    PagesToMiB(n_pages + r_pages),
+                    PagesToMiB(total_normal + total_returned),
+                    PagesToMiB(r_pages),
+                    PagesToMiB(total_returned));
+      }
+    }
+
+    total_normal += large.normal_pages;
+    total_returned += large.returned_pages;
+    out->printf(">255   large * %6u spans ~ %6.1f MiB; %6.1f MiB cum"
+                "; unmapped: %6.1f MiB; %6.1f MiB cum\n",
+                static_cast<unsigned int>(large.spans),
+                PagesToMiB(large.normal_pages + large.returned_pages),
+                PagesToMiB(total_normal + total_returned),
+                PagesToMiB(large.returned_pages),
+                PagesToMiB(total_returned));
   }
 }
 
@@ -435,8 +502,9 @@ static void** DumpHeapGrowthStackTraces() {
 
   void** result = new void*[needed_slots];
   if (result == NULL) {
-    MESSAGE("tcmalloc: allocation failed for stack trace slots",
-            needed_slots * sizeof(*result));
+    Log(kLog, __FILE__, __LINE__,
+        "tcmalloc: allocation failed for stack trace slots",
+        needed_slots * sizeof(*result));
     return NULL;
   }
 
@@ -562,7 +630,7 @@ class TCMallocImplementation : public MallocExtension {
 
     if (strcmp(name, "generic.current_allocated_bytes") == 0) {
       TCMallocStats stats;
-      ExtractStats(&stats, NULL);
+      ExtractStats(&stats, NULL, NULL, NULL);
       *value = stats.pageheap.system_bytes
                - stats.thread_bytes
                - stats.central_bytes
@@ -574,7 +642,7 @@ class TCMallocImplementation : public MallocExtension {
 
     if (strcmp(name, "generic.heap_size") == 0) {
       TCMallocStats stats;
-      ExtractStats(&stats, NULL);
+      ExtractStats(&stats, NULL, NULL, NULL);
       *value = stats.pageheap.system_bytes;
       return true;
     }
@@ -588,6 +656,27 @@ class TCMallocImplementation : public MallocExtension {
       return true;
     }
 
+    if (strcmp(name, "tcmalloc.central_cache_free_bytes") == 0) {
+      TCMallocStats stats;
+      ExtractStats(&stats, NULL, NULL, NULL);
+      *value = stats.central_bytes;
+      return true;
+    }
+
+    if (strcmp(name, "tcmalloc.transfer_cache_free_bytes") == 0) {
+      TCMallocStats stats;
+      ExtractStats(&stats, NULL, NULL, NULL);
+      *value = stats.transfer_bytes;
+      return true;
+    }
+
+    if (strcmp(name, "tcmalloc.thread_cache_free_bytes") == 0) {
+      TCMallocStats stats;
+      ExtractStats(&stats, NULL, NULL, NULL);
+      *value = stats.thread_bytes;
+      return true;
+    }
+
     if (strcmp(name, "tcmalloc.pageheap_free_bytes") == 0) {
       SpinLockHolder l(Static::pageheap_lock());
       *value = Static::pageheap()->stats().free_bytes;
@@ -608,7 +697,7 @@ class TCMallocImplementation : public MallocExtension {
 
     if (strcmp(name, "tcmalloc.current_total_thread_cache_bytes") == 0) {
       TCMallocStats stats;
-      ExtractStats(&stats, NULL);
+      ExtractStats(&stats, NULL, NULL, NULL);
       *value = stats.thread_bytes;
       return true;
     }
@@ -689,7 +778,7 @@ class TCMallocImplementation : public MallocExtension {
   // This just calls GetSizeWithCallback, but because that's in an
   // unnamed namespace, we need to move the definition below it in the
   // file.
-  virtual size_t GetAllocatedSize(void* ptr);
+  virtual size_t GetAllocatedSize(const void* ptr);
 
   // This duplicates some of the logic in GetSizeWithCallback, but is
   // faster.  This is important on OS X, where this function is called
@@ -763,42 +852,39 @@ class TCMallocImplementation : public MallocExtension {
     }
 
     // append page heap info
-    int64 page_count_normal[kMaxPages];
-    int64 page_count_returned[kMaxPages];
-    int64 span_count_normal;
-    int64 span_count_returned;
+    PageHeap::SmallSpanStats small;
+    PageHeap::LargeSpanStats large;
     {
       SpinLockHolder h(Static::pageheap_lock());
-      Static::pageheap()->GetClassSizes(page_count_normal,
-                                        page_count_returned,
-                                        &span_count_normal,
-                                        &span_count_returned);
+      Static::pageheap()->GetSmallSpanStats(&small);
+      Static::pageheap()->GetLargeSpanStats(&large);
     }
 
-    // spans: mapped
+    // large spans: mapped
     MallocExtension::FreeListInfo span_info;
     span_info.type = kLargeSpanType;
     span_info.max_object_size = (numeric_limits<size_t>::max)();
     span_info.min_object_size = kMaxPages << kPageShift;
-    span_info.total_bytes_free = span_count_normal << kPageShift;
+    span_info.total_bytes_free = large.normal_pages << kPageShift;
     v->push_back(span_info);
 
-    // spans: unmapped
+    // large spans: unmapped
     span_info.type = kLargeUnmappedSpanType;
-    span_info.total_bytes_free = span_count_returned << kPageShift;
+    span_info.total_bytes_free = large.returned_pages << kPageShift;
     v->push_back(span_info);
 
+    // small spans
     for (int s = 1; s < kMaxPages; s++) {
       MallocExtension::FreeListInfo i;
       i.max_object_size = (s << kPageShift);
       i.min_object_size = ((s - 1) << kPageShift);
 
       i.type = kPageHeapType;
-      i.total_bytes_free = (s << kPageShift) * page_count_normal[s];
+      i.total_bytes_free = (s << kPageShift) * small.normal_length[s];
       v->push_back(i);
 
       i.type = kPageHeapUnmappedType;
-      i.total_bytes_free = (s << kPageShift) * page_count_returned[s];
+      i.total_bytes_free = (s << kPageShift) * small.returned_length[s];
       v->push_back(i);
     }
   }
@@ -824,12 +910,6 @@ TCMallocGuard::TCMallocGuard() {
     tcmalloc::CheckIfKernelSupportsTLS();
 #endif
     ReplaceSystemAlloc();    // defined in libc_override_*.h
-#if defined(__APPLE__)
-    // To break the recursive call of malloc, as malloc -> TCMALLOC_MESSAGE
-    // -> snprintf -> localeconv_l -> malloc, on MacOS.
-    char buf[32];
-    snprintf(buf, sizeof(buf), "%d", tcmallocguard_refcount);
-#endif
     tc_free(tc_malloc(1));
     ThreadCache::InitTSD();
     tc_free(tc_malloc(1));
@@ -1036,7 +1116,15 @@ static inline ThreadCache* GetCacheIfPresent() {
 // It is used primarily by windows code which wants a specialized callback.
 inline void do_free_with_callback(void* ptr, void (*invalid_free_fn)(void*)) {
   if (ptr == NULL) return;
-  ASSERT(Static::pageheap() != NULL);  // Should not call free() before malloc()
+  if (Static::pageheap() == NULL) {
+    // We called free() before malloc().  This can occur if the
+    // (system) malloc() is called before tcmalloc is loaded, and then
+    // free() is called after tcmalloc is loaded (and tc_free has
+    // replaced free), but before the global constructor has run that
+    // sets up the tcmalloc data structures.
+    (*invalid_free_fn)(ptr);  // Decide how to handle the bad free request
+    return;
+  }
   const PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift;
   Span* span = NULL;
   size_t cl = Static::pageheap()->GetSizeClassIfCached(p);
@@ -1088,8 +1176,8 @@ inline void do_free(void* ptr) {
 
 // NOTE: some logic here is duplicated in GetOwnership (above), for
 // speed.  If you change this function, look at that one too.
-inline size_t GetSizeWithCallback(void* ptr,
-                                  size_t (*invalid_getsize_fn)(void*)) {
+inline size_t GetSizeWithCallback(const void* ptr,
+                                  size_t (*invalid_getsize_fn)(const void*)) {
   if (ptr == NULL)
     return 0;
   const PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift;
@@ -1114,7 +1202,7 @@ inline size_t GetSizeWithCallback(void* ptr,
 inline void* do_realloc_with_callback(
     void* old_ptr, size_t new_size,
     void (*invalid_free_fn)(void*),
-    size_t (*invalid_get_size_fn)(void*)) {
+    size_t (*invalid_get_size_fn)(const void*)) {
   // Get the size of the old entry
   const size_t old_size = GetSizeWithCallback(old_ptr, invalid_get_size_fn);
 
@@ -1255,7 +1343,7 @@ inline int do_mallopt(int cmd, int value) {
 #ifdef HAVE_STRUCT_MALLINFO
 inline struct mallinfo do_mallinfo() {
   TCMallocStats stats;
-  ExtractStats(&stats, NULL);
+  ExtractStats(&stats, NULL, NULL, NULL);
 
   // Just some of the fields are filled in.
   struct mallinfo info;
@@ -1379,7 +1467,7 @@ void* cpp_memalign(size_t align, size_t size) {
 }  // end unnamed namespace
 
 // As promised, the definition of this function, declared above.
-size_t TCMallocImplementation::GetAllocatedSize(void* ptr) {
+size_t TCMallocImplementation::GetAllocatedSize(const void* ptr) {
   ASSERT(TCMallocImplementation::GetOwnership(ptr)
          != TCMallocImplementation::kNotOwned);
   return GetSizeWithCallback(ptr, &InvalidGetAllocatedSize);
diff --git a/third_party/tcmalloc/vendor/src/tests/current_allocated_bytes_test.cc b/third_party/tcmalloc/vendor/src/tests/current_allocated_bytes_test.cc
index 8188e7b..e05ec18 100644
--- a/third_party/tcmalloc/vendor/src/tests/current_allocated_bytes_test.cc
+++ b/third_party/tcmalloc/vendor/src/tests/current_allocated_bytes_test.cc
@@ -42,7 +42,7 @@
 #include "config_for_unittests.h"
 #include <stdlib.h>
 #include <stdio.h>
-#include <google/malloc_extension.h>
+#include <gperftools/malloc_extension.h>
 #include "base/logging.h"
 
 const char kCurrent[] = "generic.current_allocated_bytes";
diff --git a/third_party/tcmalloc/vendor/src/tests/debugallocation_test.cc b/third_party/tcmalloc/vendor/src/tests/debugallocation_test.cc
index cb458d4..56ae30e 100644
--- a/third_party/tcmalloc/vendor/src/tests/debugallocation_test.cc
+++ b/third_party/tcmalloc/vendor/src/tests/debugallocation_test.cc
@@ -33,7 +33,7 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <vector>
-#include "google/malloc_extension.h"
+#include "gperftools/malloc_extension.h"
 #include "base/logging.h"
 
 using std::vector;
diff --git a/third_party/tcmalloc/vendor/src/tests/debugallocation_test.sh b/third_party/tcmalloc/vendor/src/tests/debugallocation_test.sh
index faa6c79..faa6c79 100755..100644
--- a/third_party/tcmalloc/vendor/src/tests/debugallocation_test.sh
+++ b/third_party/tcmalloc/vendor/src/tests/debugallocation_test.sh
diff --git a/third_party/tcmalloc/vendor/src/tests/frag_unittest.cc b/third_party/tcmalloc/vendor/src/tests/frag_unittest.cc
index 5ba02bd..1242770 100644
--- a/third_party/tcmalloc/vendor/src/tests/frag_unittest.cc
+++ b/third_party/tcmalloc/vendor/src/tests/frag_unittest.cc
@@ -45,7 +45,7 @@
 #include <vector>
 #include "base/logging.h"
 #include "common.h"
-#include <google/malloc_extension.h>
+#include <gperftools/malloc_extension.h>
 
 using std::vector;
 
diff --git a/third_party/tcmalloc/vendor/src/tests/heap-checker-death_unittest.sh b/third_party/tcmalloc/vendor/src/tests/heap-checker-death_unittest.sh
index 4a83fc2..ab4a666 100644
--- a/third_party/tcmalloc/vendor/src/tests/heap-checker-death_unittest.sh
+++ b/third_party/tcmalloc/vendor/src/tests/heap-checker-death_unittest.sh
@@ -84,11 +84,11 @@ Test() {
   output="$TMPDIR/output"
   ALARM $timeout env "$@" $EXE > "$output" 2>&1
   actual_ec=$?
-  ec_ok=$(expr "$actual_ec" : "$expected_ec$" >/dev/null || echo false)
-  matches_ok=$(test -z "$expected_regexp" || \
-               grep -q "$expected_regexp" "$output" || echo false)
-  negmatches_ok=$(test -z "$unexpected_regexp" || \
-                  ! grep -q "$unexpected_regexp" "$output" || echo false)
+  ec_ok=`expr "$actual_ec" : "$expected_ec$" >/dev/null || echo false`
+  matches_ok=`test -z "$expected_regexp" || \
+              grep "$expected_regexp" "$output" >/dev/null 2>&1 || echo false`
+  negmatches_ok=`test -z "$unexpected_regexp" || \
+                 ! grep "$unexpected_regexp" "$output" >/dev/null 2>&1 || echo false`
   if $ec_ok && $matches_ok && $negmatches_ok; then
     echo "PASS"
     return 0  # 0: success
diff --git a/third_party/tcmalloc/vendor/src/tests/heap-checker_unittest.cc b/third_party/tcmalloc/vendor/src/tests/heap-checker_unittest.cc
index 404c9f1..ab326c9 100644
--- a/third_party/tcmalloc/vendor/src/tests/heap-checker_unittest.cc
+++ b/third_party/tcmalloc/vendor/src/tests/heap-checker_unittest.cc
@@ -101,10 +101,10 @@
 #include "base/logging.h"
 #include "base/commandlineflags.h"
 #include "base/thread_lister.h"
-#include <google/heap-checker.h>
+#include <gperftools/heap-checker.h>
 #include "memory_region_map.h"
-#include <google/malloc_extension.h>
-#include <google/stacktrace.h>
+#include <gperftools/malloc_extension.h>
+#include <gperftools/stacktrace.h>
 
 // On systems (like freebsd) that don't define MAP_ANONYMOUS, use the old
 // form of the name instead.
@@ -1381,7 +1381,13 @@ int main(int argc, char** argv) {
     RunHidden(NewCallback(MakeALeak, &arr));
     Use(&arr);
     LogHidden("Leaking", arr);
-    if (FLAGS_test_cancel_global_check)  HeapLeakChecker::CancelGlobalCheck();
+    if (FLAGS_test_cancel_global_check) {
+      HeapLeakChecker::CancelGlobalCheck();
+    } else {
+      // Verify we can call NoGlobalLeaks repeatedly without deadlocking
+      HeapLeakChecker::NoGlobalLeaks();
+      HeapLeakChecker::NoGlobalLeaks();
+    }
     return Pass();
       // whole-program leak-check should (with very high probability)
       // catch the leak of arr (10 * sizeof(int) bytes)
@@ -1396,7 +1402,13 @@ int main(int argc, char** argv) {
     Use(&arr2);
     LogHidden("Loop leaking", arr1);
     LogHidden("Loop leaking", arr2);
-    if (FLAGS_test_cancel_global_check)  HeapLeakChecker::CancelGlobalCheck();
+    if (FLAGS_test_cancel_global_check) {
+      HeapLeakChecker::CancelGlobalCheck();
+    } else {
+      // Verify we can call NoGlobalLeaks repeatedly without deadlocking
+      HeapLeakChecker::NoGlobalLeaks();
+      HeapLeakChecker::NoGlobalLeaks();
+    }
     return Pass();
       // whole-program leak-check should (with very high probability)
       // catch the leak of arr1 and arr2 (4 * sizeof(void*) bytes)
diff --git a/third_party/tcmalloc/vendor/src/tests/heap-checker_unittest.sh b/third_party/tcmalloc/vendor/src/tests/heap-checker_unittest.sh
index 765e6c7..765e6c7 100755..100644
--- a/third_party/tcmalloc/vendor/src/tests/heap-checker_unittest.sh
+++ b/third_party/tcmalloc/vendor/src/tests/heap-checker_unittest.sh
diff --git a/third_party/tcmalloc/vendor/src/tests/heap-profiler_unittest.cc b/third_party/tcmalloc/vendor/src/tests/heap-profiler_unittest.cc
index e5dedee..5fd8bb7 100644
--- a/third_party/tcmalloc/vendor/src/tests/heap-profiler_unittest.cc
+++ b/third_party/tcmalloc/vendor/src/tests/heap-profiler_unittest.cc
@@ -49,7 +49,7 @@
 #include <string>
 #include "base/basictypes.h"
 #include "base/logging.h"
-#include <google/heap-profiler.h>
+#include <gperftools/heap-profiler.h>
 
 using std::string;
 
diff --git a/third_party/tcmalloc/vendor/src/tests/heap-profiler_unittest.sh b/third_party/tcmalloc/vendor/src/tests/heap-profiler_unittest.sh
index ad0a1ec..ad0a1ec 100755..100644
--- a/third_party/tcmalloc/vendor/src/tests/heap-profiler_unittest.sh
+++ b/third_party/tcmalloc/vendor/src/tests/heap-profiler_unittest.sh
diff --git a/third_party/tcmalloc/vendor/src/tests/low_level_alloc_unittest.cc b/third_party/tcmalloc/vendor/src/tests/low_level_alloc_unittest.cc
index 4228e12..0e5a48a 100644
--- a/third_party/tcmalloc/vendor/src/tests/low_level_alloc_unittest.cc
+++ b/third_party/tcmalloc/vendor/src/tests/low_level_alloc_unittest.cc
@@ -34,7 +34,7 @@
 #include <map>
 #include "base/low_level_alloc.h"
 #include "base/logging.h"
-#include <google/malloc_hook.h>
+#include <gperftools/malloc_hook.h>
 
 using std::map;
 
diff --git a/third_party/tcmalloc/vendor/src/tests/malloc_extension_c_test.c b/third_party/tcmalloc/vendor/src/tests/malloc_extension_c_test.c
index 57cdbbd..af0e0c1 100644
--- a/third_party/tcmalloc/vendor/src/tests/malloc_extension_c_test.c
+++ b/third_party/tcmalloc/vendor/src/tests/malloc_extension_c_test.c
@@ -40,8 +40,8 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <stddef.h>   /* for size_t */
-#include <google/malloc_extension_c.h>
-#include <google/malloc_hook_c.h>
+#include <gperftools/malloc_extension_c.h>
+#include <gperftools/malloc_hook_c.h>
 
 #define FAIL(msg) do {                          \
   fprintf(stderr, "FATAL ERROR: %s\n", msg);    \
diff --git a/third_party/tcmalloc/vendor/src/tests/malloc_extension_test.cc b/third_party/tcmalloc/vendor/src/tests/malloc_extension_test.cc
index 3e57765..58fef7e 100644
--- a/third_party/tcmalloc/vendor/src/tests/malloc_extension_test.cc
+++ b/third_party/tcmalloc/vendor/src/tests/malloc_extension_test.cc
@@ -36,8 +36,8 @@
 #include <stdio.h>
 #include <sys/types.h>
 #include "base/logging.h"
-#include <google/malloc_extension.h>
-#include <google/malloc_extension_c.h>
+#include <gperftools/malloc_extension.h>
+#include <gperftools/malloc_extension_c.h>
 
 using STL_NAMESPACE::vector;
 
diff --git a/third_party/tcmalloc/vendor/src/tests/malloc_hook_test.cc b/third_party/tcmalloc/vendor/src/tests/malloc_hook_test.cc
index b46f6fd..cbf526a 100644
--- a/third_party/tcmalloc/vendor/src/tests/malloc_hook_test.cc
+++ b/third_party/tcmalloc/vendor/src/tests/malloc_hook_test.cc
@@ -42,7 +42,7 @@
 #include <algorithm>
 #include <string>
 #include <vector>
-#include <google/malloc_hook.h>
+#include <gperftools/malloc_hook.h>
 #include "malloc_hook-inl.h"
 #include "base/logging.h"
 #include "base/simple_mutex.h"
diff --git a/third_party/tcmalloc/vendor/src/tests/markidle_unittest.cc b/third_party/tcmalloc/vendor/src/tests/markidle_unittest.cc
index ac9971f..2f150ab 100644
--- a/third_party/tcmalloc/vendor/src/tests/markidle_unittest.cc
+++ b/third_party/tcmalloc/vendor/src/tests/markidle_unittest.cc
@@ -35,7 +35,7 @@
 
 #include "config_for_unittests.h"
 #include "base/logging.h"
-#include <google/malloc_extension.h>
+#include <gperftools/malloc_extension.h>
 #include "tests/testutil.h"   // for RunThread()
 
 // Helper routine to do lots of allocations
diff --git a/third_party/tcmalloc/vendor/src/tests/maybe_threads_unittest.sh b/third_party/tcmalloc/vendor/src/tests/maybe_threads_unittest.sh
index 77b3b78..77b3b78 100755..100644
--- a/third_party/tcmalloc/vendor/src/tests/maybe_threads_unittest.sh
+++ b/third_party/tcmalloc/vendor/src/tests/maybe_threads_unittest.sh
diff --git a/third_party/tcmalloc/vendor/src/tests/profile-handler_unittest.cc b/third_party/tcmalloc/vendor/src/tests/profile-handler_unittest.cc
index 84e035c..98cfe6d 100644
--- a/third_party/tcmalloc/vendor/src/tests/profile-handler_unittest.cc
+++ b/third_party/tcmalloc/vendor/src/tests/profile-handler_unittest.cc
@@ -3,6 +3,13 @@
 //         Chris Demetriou (cgd@google.com)
 //
 // This file contains the unit tests for profile-handler.h interface.
+//
+// It is linked into three separate unit tests:
+//     profile-handler_unittest tests basic functionality
+//     profile-handler_disable_test tests that the profiler
+//         is disabled with --install_signal_handlers=false
+//     profile-handler_conflict_test tests that the profiler
+//         is disabled when a SIGPROF handler is registered before InitGoogle.
 
 #include "config.h"
 #include "profile-handler.h"
@@ -17,6 +24,16 @@
 // Some helpful macros for the test class
 #define TEST_F(cls, fn)    void cls :: fn()
 
+// Do we expect the profiler to be enabled?
+DEFINE_bool(test_profiler_enabled, true,
+            "expect profiler to be enabled during tests");
+
+// Should we look at the kernel signal handler settings during the test?
+// Not if we're in conflict_test, because we can't distinguish its nop
+// handler from the real one.
+DEFINE_bool(test_profiler_signal_handler, true,
+            "check profiler signal handler during tests");
+
 namespace {
 
 // TODO(csilvers): error-checking on the pthreads routines
@@ -278,17 +295,24 @@ class ProfileHandlerTest {
     // Check the callback count.
     EXPECT_GT(GetCallbackCount(), 0);
     // Check that the profile timer is enabled.
-    EXPECT_TRUE(IsTimerEnabled());
+    EXPECT_EQ(FLAGS_test_profiler_enabled, IsTimerEnabled());
     // Check that the signal handler is enabled.
-    EXPECT_TRUE(IsSignalEnabled());
+    if (FLAGS_test_profiler_signal_handler) {
+      EXPECT_EQ(FLAGS_test_profiler_enabled, IsSignalEnabled());
+    }
     uint64 interrupts_before = GetInterruptCount();
     // Sleep for a bit and check that tick counter is making progress.
     int old_tick_count = tick_counter;
     Delay(kSleepInterval);
     int new_tick_count = tick_counter;
-    EXPECT_GT(new_tick_count, old_tick_count);
     uint64 interrupts_after = GetInterruptCount();
-    EXPECT_GT(interrupts_after, interrupts_before);
+    if (FLAGS_test_profiler_enabled) {
+      EXPECT_GT(new_tick_count, old_tick_count);
+      EXPECT_GT(interrupts_after, interrupts_before);
+    } else {
+      EXPECT_EQ(new_tick_count, old_tick_count);
+      EXPECT_EQ(interrupts_after, interrupts_before);
+    }
   }
 
   // Verifies that a callback is not receiving profile ticks.
@@ -300,7 +324,9 @@ class ProfileHandlerTest {
     EXPECT_EQ(old_tick_count, new_tick_count);
     // If no callbacks, signal handler and shared timer should be disabled.
     if (GetCallbackCount() == 0) {
-      EXPECT_FALSE(IsSignalEnabled());
+      if (FLAGS_test_profiler_signal_handler) {
+        EXPECT_FALSE(IsSignalEnabled());
+      }
       if (timer_separate_) {
         EXPECT_TRUE(IsTimerEnabled());
       } else {
@@ -313,7 +339,9 @@ class ProfileHandlerTest {
   // timer, if shared, is disabled. Expects the worker to be running.
   void VerifyDisabled() {
     // Check that the signal handler is disabled.
-    EXPECT_FALSE(IsSignalEnabled());
+    if (FLAGS_test_profiler_signal_handler) {
+      EXPECT_FALSE(IsSignalEnabled());
+    }
     // Check that the callback count is 0.
     EXPECT_EQ(0, GetCallbackCount());
     // Check that the timer is disabled if shared, enabled otherwise.
@@ -465,8 +493,10 @@ TEST_F(ProfileHandlerTest, RegisterCallbackBeforeThread) {
   // correctly enabled.
   RegisterThread();
   EXPECT_EQ(1, GetCallbackCount());
-  EXPECT_TRUE(IsTimerEnabled());
-  EXPECT_TRUE(IsSignalEnabled());
+  EXPECT_EQ(FLAGS_test_profiler_enabled, IsTimerEnabled());
+  if (FLAGS_test_profiler_signal_handler) {
+    EXPECT_EQ(FLAGS_test_profiler_enabled, IsSignalEnabled());
+  }
 }
 
 }  // namespace
diff --git a/third_party/tcmalloc/vendor/src/tests/profiler_unittest.cc b/third_party/tcmalloc/vendor/src/tests/profiler_unittest.cc
index fafc76f..399891b 100644
--- a/third_party/tcmalloc/vendor/src/tests/profiler_unittest.cc
+++ b/third_party/tcmalloc/vendor/src/tests/profiler_unittest.cc
@@ -41,7 +41,7 @@
 #include <unistd.h>                 // for fork()
 #endif
 #include <sys/wait.h>               // for wait()
-#include "google/profiler.h"
+#include "gperftools/profiler.h"
 #include "base/simple_mutex.h"
 #include "tests/testutil.h"
 
diff --git a/third_party/tcmalloc/vendor/src/tests/profiler_unittest.sh b/third_party/tcmalloc/vendor/src/tests/profiler_unittest.sh
index 4668fa7..4668fa7 100755..100644
--- a/third_party/tcmalloc/vendor/src/tests/profiler_unittest.sh
+++ b/third_party/tcmalloc/vendor/src/tests/profiler_unittest.sh
diff --git a/third_party/tcmalloc/vendor/src/tests/sampler_test.cc b/third_party/tcmalloc/vendor/src/tests/sampler_test.cc
index 31c87cd..c55d5dc 100644
--- a/third_party/tcmalloc/vendor/src/tests/sampler_test.cc
+++ b/third_party/tcmalloc/vendor/src/tests/sampler_test.cc
@@ -357,7 +357,7 @@ bool CheckMean(size_t mean, int num_samples) {
   }
   double empirical_mean = total / static_cast<double>(num_samples);
   double expected_sd = mean / pow(num_samples * 1.0, 0.5);
-  return(abs(mean-empirical_mean) < expected_sd * kSigmas);
+  return(fabs(mean-empirical_mean) < expected_sd * kSigmas);
 }
 
 // Prints a sequence so you can look at the distribution
@@ -409,8 +409,8 @@ TEST(Sampler, LargeAndSmallAllocs_CombinedTest) {
                                      size_small, kSamplingInterval);
   LOG(INFO) << StringPrintf("large_allocs_sds = %f\n", large_allocs_sds);
   LOG(INFO) << StringPrintf("small_allocs_sds = %f\n", small_allocs_sds);
-  CHECK_LE(abs(large_allocs_sds), kSigmas);
-  CHECK_LE(abs(small_allocs_sds), kSigmas);
+  CHECK_LE(fabs(large_allocs_sds), kSigmas);
+  CHECK_LE(fabs(small_allocs_sds), kSigmas);
 }
 
 // Tests whether the mean is about right over 1000 samples
diff --git a/third_party/tcmalloc/vendor/src/tests/sampling_test.cc b/third_party/tcmalloc/vendor/src/tests/sampling_test.cc
index c1bd693..8132475 100644
--- a/third_party/tcmalloc/vendor/src/tests/sampling_test.cc
+++ b/third_party/tcmalloc/vendor/src/tests/sampling_test.cc
@@ -41,7 +41,7 @@
 #include <stdlib.h>
 #include <string>
 #include "base/logging.h"
-#include <google/malloc_extension.h>
+#include <gperftools/malloc_extension.h>
 
 using std::string;
 
diff --git a/third_party/tcmalloc/vendor/src/tests/sampling_test.sh b/third_party/tcmalloc/vendor/src/tests/sampling_test.sh
index 2a58426..2a58426 100755..100644
--- a/third_party/tcmalloc/vendor/src/tests/sampling_test.sh
+++ b/third_party/tcmalloc/vendor/src/tests/sampling_test.sh
diff --git a/third_party/tcmalloc/vendor/src/stacktrace_nacl-inl.h b/third_party/tcmalloc/vendor/src/tests/simple_compat_test.cc
index 7467e75..824cfcf 100644
--- a/third_party/tcmalloc/vendor/src/stacktrace_nacl-inl.h
+++ b/third_party/tcmalloc/vendor/src/tests/simple_compat_test.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2011, Google Inc.
+// Copyright (c) 2012, Google Inc.
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
@@ -28,10 +28,40 @@
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 // ---
-// Author: Ivan Krasin
+// Author: Craig Silverstein
 //
-// Native Client stub for stacktrace.
+// This just verifies that we can compile code that #includes stuff
+// via the backwards-compatibility 'google/' #include-dir.  It does
+// not include config.h on purpose, to better simulate a perftools
+// client.
 
-int GET_STACK_TRACE_OR_FRAMES {
+#include <stddef.h>
+#include <stdio.h>
+#include <google/heap-checker.h>
+#include <google/heap-profiler.h>
+#include <google/malloc_extension.h>
+#include <google/malloc_extension_c.h>
+#include <google/malloc_hook.h>
+#include <google/malloc_hook_c.h>
+#include <google/profiler.h>
+#include <google/stacktrace.h>
+#include <google/tcmalloc.h>
+
+// We don't link in -lprofiler for this test, so be sure not to make
+// any function calls that require the cpu-profiler code.  The
+// heap-profiler is ok.
+
+HeapLeakChecker::Disabler* heap_checker_h;
+void (*heap_profiler_h)(const char*) = &HeapProfilerStart;
+MallocExtension::Ownership malloc_extension_h;
+MallocExtension_Ownership malloc_extension_c_h;
+MallocHook::NewHook* malloc_hook_h;
+MallocHook_NewHook* malloc_hook_c_h;
+ProfilerOptions* profiler_h;
+int (*stacktrace_h)(void**, int, int) = &GetStackTrace;
+void* (*tcmalloc_h)(size_t) = &tc_new;
+
+int main(int argc, char** argv) {
+  printf("PASS\n");
   return 0;
 }
diff --git a/third_party/tcmalloc/vendor/src/tests/stacktrace_unittest.cc b/third_party/tcmalloc/vendor/src/tests/stacktrace_unittest.cc
index 69e20ba..3c9f735 100644
--- a/third_party/tcmalloc/vendor/src/tests/stacktrace_unittest.cc
+++ b/third_party/tcmalloc/vendor/src/tests/stacktrace_unittest.cc
@@ -35,7 +35,7 @@
 #include <stdlib.h>
 #include "base/commandlineflags.h"
 #include "base/logging.h"
-#include <google/stacktrace.h>
+#include <gperftools/stacktrace.h>
 
 namespace {
 
diff --git a/third_party/tcmalloc/vendor/src/tests/system-alloc_unittest.cc b/third_party/tcmalloc/vendor/src/tests/system-alloc_unittest.cc
index 8e4eb36..f0259a1 100644
--- a/third_party/tcmalloc/vendor/src/tests/system-alloc_unittest.cc
+++ b/third_party/tcmalloc/vendor/src/tests/system-alloc_unittest.cc
@@ -42,7 +42,7 @@
 #include <algorithm>
 #include <limits>
 #include "base/logging.h"               // for Check_GEImpl, Check_LTImpl, etc
-#include <google/malloc_extension.h>    // for MallocExtension::instance
+#include <gperftools/malloc_extension.h>    // for MallocExtension::instance
 #include "common.h"                     // for kAddressBits
 
 class ArraySysAllocator : public SysAllocator {
diff --git a/third_party/tcmalloc/vendor/src/tests/tcmalloc_unittest.cc b/third_party/tcmalloc/vendor/src/tests/tcmalloc_unittest.cc
index 84a5889..cfdc79c 100644
--- a/third_party/tcmalloc/vendor/src/tests/tcmalloc_unittest.cc
+++ b/third_party/tcmalloc/vendor/src/tests/tcmalloc_unittest.cc
@@ -88,9 +88,9 @@
 #include <new>
 #include "base/logging.h"
 #include "base/simple_mutex.h"
-#include "google/malloc_hook.h"
-#include "google/malloc_extension.h"
-#include "google/tcmalloc.h"
+#include "gperftools/malloc_hook.h"
+#include "gperftools/malloc_extension.h"
+#include "gperftools/tcmalloc.h"
 #include "thread_cache.h"
 #include "tests/testutil.h"
 
@@ -107,10 +107,12 @@ static bool kOSSupportsMemalign = false;
 static inline void* Memalign(size_t align, size_t size) {
   //LOG(FATAL) << "memalign not supported on windows";
   exit(1);
+  return NULL;
 }
 static inline int PosixMemalign(void** ptr, size_t align, size_t size) {
   //LOG(FATAL) << "posix_memalign not supported on windows";
   exit(1);
+  return -1;
 }
 
 // OS X defines posix_memalign in some OS versions but not others;
@@ -120,10 +122,12 @@ static bool kOSSupportsMemalign = false;
 static inline void* Memalign(size_t align, size_t size) {
   //LOG(FATAL) << "memalign not supported on OS X";
   exit(1);
+  return NULL;
 }
 static inline int PosixMemalign(void** ptr, size_t align, size_t size) {
   //LOG(FATAL) << "posix_memalign not supported on OS X";
   exit(1);
+  return -1;
 }
 
 #else
@@ -1083,6 +1087,7 @@ static int RunAllTests(int argc, char** argv) {
     // Windows has _aligned_malloc.  Let's test that that's captured too.
 #if (defined(_MSC_VER) || defined(__MINGW32__)) && !defined(PERFTOOLS_NO_ALIGNED_MALLOC)
     p1 = _aligned_malloc(sizeof(p1) * 2, 64);
+    CHECK(p1 != NULL);
     VerifyNewHookWasCalled();
     _aligned_free(p1);
     VerifyDeleteHookWasCalled();
diff --git a/third_party/tcmalloc/vendor/src/tests/thread_dealloc_unittest.cc b/third_party/tcmalloc/vendor/src/tests/thread_dealloc_unittest.cc
index 32923e1..e6fd9b3 100644
--- a/third_party/tcmalloc/vendor/src/tests/thread_dealloc_unittest.cc
+++ b/third_party/tcmalloc/vendor/src/tests/thread_dealloc_unittest.cc
@@ -38,7 +38,7 @@
 #include <unistd.h>    // for sleep()
 #endif
 #include "base/logging.h"
-#include <google/malloc_extension.h>
+#include <gperftools/malloc_extension.h>
 #include "tests/testutil.h"   // for RunThread()
 
 // Size/number of objects to allocate per thread (1 MB per thread)
diff --git a/third_party/tcmalloc/vendor/src/thread_cache.cc b/third_party/tcmalloc/vendor/src/thread_cache.cc
index c228c0f..d6dead3 100644
--- a/third_party/tcmalloc/vendor/src/thread_cache.cc
+++ b/third_party/tcmalloc/vendor/src/thread_cache.cc
@@ -32,6 +32,7 @@
 
 #include <config.h>
 #include "thread_cache.h"
+#include <errno.h>
 #include <string.h>                     // for memcpy
 #include <algorithm>                    // for max, min
 #include "base/commandlineflags.h"      // for SpinLockHolder
@@ -85,8 +86,9 @@ bool kernel_supports_tls = false;      // be conservative
 #   include <sys/utsname.h>    // DECL_UNAME checked for <sys/utsname.h> too
     void CheckIfKernelSupportsTLS() {
       struct utsname buf;
-      if (uname(&buf) != 0) {   // should be impossible
-        MESSAGE("uname failed assuming no TLS support (errno=%d)\n", errno);
+      if (uname(&buf) < 0) {   // should be impossible
+        Log(kLog, __FILE__, __LINE__,
+            "uname failed assuming no TLS support (errno)", errno);
         kernel_supports_tls = false;
       } else if (strcasecmp(buf.sysname, "linux") == 0) {
         // The linux case: the first kernel to support TLS was 2.6.0
@@ -98,6 +100,10 @@ bool kernel_supports_tls = false;      // be conservative
           kernel_supports_tls = false;
         else
           kernel_supports_tls = true;
+      } else if (strcasecmp(buf.sysname, "CYGWIN_NT-6.1-WOW64") == 0) {
+        // In my testing, this version of cygwin, at least, would hang
+        // when using TLS.
+        kernel_supports_tls = false;
       } else {        // some other kernel, we'll be optimisitic
         kernel_supports_tls = true;
       }
@@ -260,10 +266,6 @@ void ThreadCache::Scavenge() {
   }
 
   IncreaseCacheLimit();
-
-//   int64 finish = CycleClock::Now();
-//   CycleTimer ct;
-//   MESSAGE("GC: %.0f ns\n", ct.CyclesToUsec(finish-start)*1000.0);
 }
 
 void ThreadCache::IncreaseCacheLimit() {
@@ -476,30 +478,6 @@ void ThreadCache::RecomputePerThreadCacheSize() {
   }
   unclaimed_cache_space_ = overall_thread_cache_size_ - claimed;
   per_thread_cache_size_ = space;
-  //  TCMalloc_MESSAGE(__FILE__, __LINE__, "Threads %d => cache size %8d\n", n, int(space));
-}
-
-void ThreadCache::Print(TCMalloc_Printer* out) const {
-  for (int cl = 0; cl < kNumClasses; ++cl) {
-    out->printf("      %5" PRIuS " : %4" PRIuS " len; %4d lo; %4"PRIuS
-                " max; %4"PRIuS" overages;\n",
-                Static::sizemap()->ByteSizeForClass(cl),
-                list_[cl].length(),
-                list_[cl].lowwatermark(),
-                list_[cl].max_length(),
-                list_[cl].length_overages());
-  }
-}
-
-void ThreadCache::PrintThreads(TCMalloc_Printer* out) {
-  size_t actual_limit = 0;
-  for (ThreadCache* h = thread_heaps_; h != NULL; h = h->next_) {
-    h->Print(out);
-    actual_limit += h->max_size_;
-  }
-  out->printf("ThreadCache overall: %"PRIuS ", unclaimed: %"PRIdS
-              ", actual: %"PRIuS"\n",
-              overall_thread_cache_size_, unclaimed_cache_space_, actual_limit);
 }
 
 void ThreadCache::GetThreadStats(uint64_t* total_bytes, uint64_t* class_count) {
diff --git a/third_party/tcmalloc/vendor/src/thread_cache.h b/third_party/tcmalloc/vendor/src/thread_cache.h
index 1742d5b..1d0413b 100644
--- a/third_party/tcmalloc/vendor/src/thread_cache.h
+++ b/third_party/tcmalloc/vendor/src/thread_cache.h
@@ -94,7 +94,6 @@ class ThreadCache {
   void Deallocate(void* ptr, size_t size_class);
 
   void Scavenge();
-  void Print(TCMalloc_Printer* out) const;
 
   int GetSamplePeriod();
 
@@ -120,10 +119,6 @@ class ThreadCache {
   // REQUIRES: Static::pageheap_lock is held.
   static void GetThreadStats(uint64_t* total_bytes, uint64_t* class_count);
 
-  // Write debugging statistics to 'out'.
-  // REQUIRES: Static::pageheap_lock is held.
-  static void PrintThreads(TCMalloc_Printer* out);
-
   // Sets the total thread cache size to new_size, recomputing the
   // individual thread cache sizes as necessary.
   // REQUIRES: Static::pageheap lock is held.
@@ -209,6 +204,10 @@ class ThreadCache {
       return SLL_Pop(&list_);
     }
 
+    void* Next() {
+      return SLL_Next(&list_);
+    }
+
     void PushRange(int N, void *start, void *end) {
       SLL_PushRange(&list_, start, end);
       length_ += N;
@@ -349,6 +348,12 @@ inline void ThreadCache::Deallocate(void* ptr, size_t cl) {
   FreeList* list = &list_[cl];
   size_ += Static::sizemap()->ByteSizeForClass(cl);
   ssize_t size_headroom = max_size_ - size_ - 1;
+
+  // This catches back-to-back frees of allocs in the same size
+  // class. A more comprehensive (and expensive) test would be to walk
+  // the entire freelist. But this might be enough to find some bugs.
+  ASSERT(ptr != list->Next());
+
   list->Push(ptr);
   ssize_t list_headroom =
       static_cast<ssize_t>(list->max_length()) - list->length();
diff --git a/third_party/tcmalloc/vendor/src/windows/auto_testing_hook.h b/third_party/tcmalloc/vendor/src/windows/auto_testing_hook.h
new file mode 100644
index 0000000..5a04797
--- /dev/null
+++ b/third_party/tcmalloc/vendor/src/windows/auto_testing_hook.h
@@ -0,0 +1,155 @@
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//    * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//    * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//    * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Utility for using SideStep with unit tests.
+
+#ifndef CEEE_TESTING_SIDESTEP_AUTO_TESTING_HOOK_H_
+#define CEEE_TESTING_SIDESTEP_AUTO_TESTING_HOOK_H_
+
+#include "base/basictypes.h"
+#include "base/logging.h"
+#include "preamble_patcher.h"
+
+#define SIDESTEP_CHK(x)  CHECK(x)
+#define SIDESTEP_EXPECT_TRUE(x)  SIDESTEP_CHK(x)
+
+namespace sidestep {
+
+// Same trick as common/scope_cleanup.h ScopeGuardImplBase
+class AutoTestingHookBase {
+ public:
+  virtual ~AutoTestingHookBase() {}
+};
+
+// This is the typedef you normally use for the class, e.g.
+//
+// AutoTestingHook hook = MakeTestingHook(TargetFunc, HookTargetFunc);
+//
+// The 'hook' variable will then be destroyed when it goes out of scope.
+//
+// NOTE: You must not hold this type as a member of another class.  Its
+// destructor will not get called.
+typedef const AutoTestingHookBase& AutoTestingHook;
+
+// This is the class you must use when holding a hook as a member of another
+// class, e.g.
+//
+// public:
+//  AutoTestingHookHolder holder_;
+//  MyClass() : my_hook_holder(MakeTestingHookHolder(Target, Hook)) {}
+class AutoTestingHookHolder {
+ public:
+  explicit AutoTestingHookHolder(AutoTestingHookBase* hook) : hook_(hook) {}
+  ~AutoTestingHookHolder() { delete hook_; }
+ private:
+  AutoTestingHookHolder() {}  // disallow
+  AutoTestingHookBase* hook_;
+};
+
+// This class helps patch a function, then unpatch it when the object exits
+// scope, and also maintains the pointer to the original function stub.
+//
+// To enable use of the class without having to explicitly provide the
+// type of the function pointers (and instead only providing it
+// implicitly) we use the same trick as ScopeGuard (see
+// common/scope_cleanup.h) uses, so to create a hook you use the MakeHook
+// function rather than a constructor.
+//
+// NOTE:  This function is only safe for e.g. unit tests and _not_ for
+// production code.  See PreamblePatcher class for details.
+template <typename T>
+class AutoTestingHookImpl : public AutoTestingHookBase {
+ public:
+  static AutoTestingHookImpl<T> MakeTestingHook(T target_function,
+                                                T replacement_function,
+                                                bool do_it) {
+    return AutoTestingHookImpl<T>(target_function, replacement_function, do_it);
+  }
+
+  static AutoTestingHookImpl<T>* MakeTestingHookHolder(T target_function,
+                                                       T replacement_function,
+                                                       bool do_it) {
+    return new AutoTestingHookImpl<T>(target_function,
+                                      replacement_function, do_it);
+  }
+
+  ~AutoTestingHookImpl() {
+    if (did_it_) {
+      SIDESTEP_CHK(SIDESTEP_SUCCESS == PreamblePatcher::Unpatch(
+          (void*)target_function_, (void*)replacement_function_,
+          (void*)original_function_));
+    }
+  }
+
+  // Returns a pointer to the original function.  To use this method you will
+  // have to explicitly create an AutoTestingHookImpl of the specific
+  // function pointer type (i.e. not use the AutoTestingHook typedef).
+  T original_function() {
+    return original_function_;
+  }
+
+ private:
+  AutoTestingHookImpl(T target_function, T replacement_function, bool do_it)
+      : target_function_(target_function),
+        original_function_(NULL),
+        replacement_function_(replacement_function),
+        did_it_(do_it) {
+    if (do_it) {
+      SIDESTEP_CHK(SIDESTEP_SUCCESS == PreamblePatcher::Patch(target_function,
+                                                     replacement_function,
+                                                     &original_function_));
+    }
+  }
+
+  T target_function_;  // always valid
+  T original_function_;  // always valid
+  T replacement_function_;  // always valid
+  bool did_it_;  // Remember if we did it or not...
+};
+
+template <typename T>
+inline AutoTestingHookImpl<T> MakeTestingHook(T target,
+                                              T replacement,
+                                              bool do_it) {
+  return AutoTestingHookImpl<T>::MakeTestingHook(target, replacement, do_it);
+}
+
+template <typename T>
+inline AutoTestingHookImpl<T> MakeTestingHook(T target, T replacement) {
+  return AutoTestingHookImpl<T>::MakeTestingHook(target, replacement, true);
+}
+
+template <typename T>
+inline AutoTestingHookImpl<T>* MakeTestingHookHolder(T target, T replacement) {
+  return AutoTestingHookImpl<T>::MakeTestingHookHolder(target, replacement,
+                                                       true);
+}
+
+};  // namespace sidestep
+
+#endif  // CEEE_TESTING_SIDESTEP_AUTO_TESTING_HOOK_H_
diff --git a/third_party/tcmalloc/vendor/src/windows/config.h b/third_party/tcmalloc/vendor/src/windows/config.h
index 7a7a8d7..9d61884 100644
--- a/third_party/tcmalloc/vendor/src/windows/config.h
+++ b/third_party/tcmalloc/vendor/src/windows/config.h
@@ -213,25 +213,25 @@
 #undef NO_MINUS_C_MINUS_O
 
 /* Name of package */
-#define PACKAGE "google-perftools"
+#define PACKAGE "gperftools"
 
 /* Define to the address where bug reports for this package should be sent. */
 #define PACKAGE_BUGREPORT "opensource@google.com"
 
 /* Define to the full name of this package. */
-#define PACKAGE_NAME "google-perftools"
+#define PACKAGE_NAME "gperftools"
 
 /* Define to the full name and version of this package. */
-#define PACKAGE_STRING "google-perftools 1.8"
+#define PACKAGE_STRING "gperftools 2.0"
 
 /* Define to the one symbol short name of this package. */
-#define PACKAGE_TARNAME "google-perftools"
+#define PACKAGE_TARNAME "gperftools"
 
 /* Define to the home page for this package. */
 #undef PACKAGE_URL
 
 /* Define to the version of this package. */
-#define PACKAGE_VERSION "1.8"
+#define PACKAGE_VERSION "2.0"
 
 /* How to access the PC from a struct ucontext */
 #undef PC_FROM_UCONTEXT
diff --git a/third_party/tcmalloc/vendor/src/windows/google/tcmalloc.h b/third_party/tcmalloc/vendor/src/windows/google/tcmalloc.h
new file mode 100644
index 0000000..c7db631
--- /dev/null
+++ b/third_party/tcmalloc/vendor/src/windows/google/tcmalloc.h
@@ -0,0 +1,34 @@
+/* Copyright (c) 2003, Google Inc.
+ * All rights reserved.
+ * 
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ * 
+ *     * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ *     * Neither the name of Google Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* The code has moved to gperftools/.  Use that include-directory for
+ * new code.
+ */
+#include <gperftools/tcmalloc.h>
diff --git a/third_party/tcmalloc/vendor/src/windows/gperftools/tcmalloc.h b/third_party/tcmalloc/vendor/src/windows/gperftools/tcmalloc.h
new file mode 100644
index 0000000..db32c53
--- /dev/null
+++ b/third_party/tcmalloc/vendor/src/windows/gperftools/tcmalloc.h
@@ -0,0 +1,123 @@
+/* Copyright (c) 2003, Google Inc.
+ * All rights reserved.
+ * 
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ * 
+ *     * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ *     * Neither the name of Google Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * ---
+ * Author: Sanjay Ghemawat <opensource@google.com>
+ *         .h.in file by Craig Silverstein <opensource@google.com>
+ */
+
+#ifndef TCMALLOC_TCMALLOC_H_
+#define TCMALLOC_TCMALLOC_H_
+
+#include <stddef.h>                     // for size_t
+#ifdef HAVE_SYS_CDEFS_H
+#include <sys/cdefs.h>   // where glibc defines __THROW
+#endif
+
+// __THROW is defined in glibc systems.  It means, counter-intuitively,
+// "This function will never throw an exception."  It's an optional
+// optimization tool, but we may need to use it to match glibc prototypes.
+#ifndef __THROW    /* I guess we're not on a glibc system */
+# define __THROW   /* __THROW is just an optimization, so ok to make it "" */
+#endif
+
+// Define the version number so folks can check against it
+#define TC_VERSION_MAJOR  2
+#define TC_VERSION_MINOR  0
+#define TC_VERSION_PATCH  ""
+#define TC_VERSION_STRING "gperftools 2.0"
+
+#include <stdlib.h>   // for struct mallinfo, if it's defined
+
+// Annoying stuff for windows -- makes sure clients can import these functions
+#ifndef PERFTOOLS_DLL_DECL
+# ifdef _WIN32
+#   define PERFTOOLS_DLL_DECL  __declspec(dllimport)
+# else
+#   define PERFTOOLS_DLL_DECL
+# endif
+#endif
+
+#ifdef __cplusplus
+namespace std {
+struct nothrow_t;
+}
+
+extern "C" {
+#endif
+  // Returns a human-readable version string.  If major, minor,
+  // and/or patch are not NULL, they are set to the major version,
+  // minor version, and patch-code (a string, usually "").
+  PERFTOOLS_DLL_DECL const char* tc_version(int* major, int* minor,
+                                            const char** patch) __THROW;
+
+  PERFTOOLS_DLL_DECL void* tc_malloc(size_t size) __THROW;
+  PERFTOOLS_DLL_DECL void tc_free(void* ptr) __THROW;
+  PERFTOOLS_DLL_DECL void* tc_realloc(void* ptr, size_t size) __THROW;
+  PERFTOOLS_DLL_DECL void* tc_calloc(size_t nmemb, size_t size) __THROW;
+  PERFTOOLS_DLL_DECL void tc_cfree(void* ptr) __THROW;
+
+  PERFTOOLS_DLL_DECL void* tc_memalign(size_t __alignment,
+                                       size_t __size) __THROW;
+  PERFTOOLS_DLL_DECL int tc_posix_memalign(void** ptr,
+                                           size_t align, size_t size) __THROW;
+  PERFTOOLS_DLL_DECL void* tc_valloc(size_t __size) __THROW;
+  PERFTOOLS_DLL_DECL void* tc_pvalloc(size_t __size) __THROW;
+
+  PERFTOOLS_DLL_DECL void tc_malloc_stats(void) __THROW;
+  PERFTOOLS_DLL_DECL int tc_mallopt(int cmd, int value) __THROW;
+#if 0
+  PERFTOOLS_DLL_DECL struct mallinfo tc_mallinfo(void) __THROW;
+#endif
+
+  // This is an alias for MallocExtension::instance()->GetAllocatedSize().
+  // It is equivalent to
+  //    OS X: malloc_size()
+  //    glibc: malloc_usable_size()
+  //    Windows: _msize()
+  PERFTOOLS_DLL_DECL size_t tc_malloc_size(void* ptr) __THROW;
+
+#ifdef __cplusplus
+  PERFTOOLS_DLL_DECL int tc_set_new_mode(int flag) __THROW;
+  PERFTOOLS_DLL_DECL void* tc_new(size_t size);
+  PERFTOOLS_DLL_DECL void* tc_new_nothrow(size_t size,
+                                          const std::nothrow_t&) __THROW;
+  PERFTOOLS_DLL_DECL void tc_delete(void* p) __THROW;
+  PERFTOOLS_DLL_DECL void tc_delete_nothrow(void* p,
+                                            const std::nothrow_t&) __THROW;
+  PERFTOOLS_DLL_DECL void* tc_newarray(size_t size);
+  PERFTOOLS_DLL_DECL void* tc_newarray_nothrow(size_t size,
+                                               const std::nothrow_t&) __THROW;
+  PERFTOOLS_DLL_DECL void tc_deletearray(void* p) __THROW;
+  PERFTOOLS_DLL_DECL void tc_deletearray_nothrow(void* p,
+                                                 const std::nothrow_t&) __THROW;
+}
+#endif
+
+#endif  // #ifndef TCMALLOC_TCMALLOC_H_
diff --git a/third_party/tcmalloc/vendor/src/windows/google/tcmalloc.h.in b/third_party/tcmalloc/vendor/src/windows/gperftools/tcmalloc.h.in
index 0b53e4e..d09ec953 100644
--- a/third_party/tcmalloc/vendor/src/windows/google/tcmalloc.h.in
+++ b/third_party/tcmalloc/vendor/src/windows/gperftools/tcmalloc.h.in
@@ -51,7 +51,7 @@
 #define TC_VERSION_MAJOR  @TC_VERSION_MAJOR@
 #define TC_VERSION_MINOR  @TC_VERSION_MINOR@
 #define TC_VERSION_PATCH  "@TC_VERSION_PATCH@"
-#define TC_VERSION_STRING "google-perftools @TC_VERSION_MAJOR@.@TC_VERSION_MINOR@@TC_VERSION_PATCH@"
+#define TC_VERSION_STRING "gperftools @TC_VERSION_MAJOR@.@TC_VERSION_MINOR@@TC_VERSION_PATCH@"
 
 #include <stdlib.h>   // for struct mallinfo, if it's defined
 
diff --git a/third_party/tcmalloc/vendor/src/windows/ia32_opcode_map.cc b/third_party/tcmalloc/vendor/src/windows/ia32_opcode_map.cc
index c9ec18b..ba6a79e 100644
--- a/third_party/tcmalloc/vendor/src/windows/ia32_opcode_map.cc
+++ b/third_party/tcmalloc/vendor/src/windows/ia32_opcode_map.cc
@@ -111,6 +111,25 @@ const Opcode s_first_opcode_byte[] = {
   /* 0x3D */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_I | OT_V, AM_NOT_USED, "cmp", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } },
   /* 0x3E */ { 0, IT_PREFIX, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } },
   /* 0x3F */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "aas", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } },
+#ifdef _M_X64
+  /* REX Prefixes in 64-bit mode. */
+  /* 0x40 */ { 0, IT_PREFIX, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } },
+  /* 0x41 */ { 0, IT_PREFIX, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } },
+  /* 0x42 */ { 0, IT_PREFIX, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } },
+  /* 0x43 */ { 0, IT_PREFIX, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } },
+  /* 0x44 */ { 0, IT_PREFIX, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } },
+  /* 0x45 */ { 0, IT_PREFIX, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } },
+  /* 0x46 */ { 0, IT_PREFIX, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } },
+  /* 0x47 */ { 0, IT_PREFIX, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } },
+  /* 0x48 */ { 0, IT_PREFIX, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } },
+  /* 0x49 */ { 0, IT_PREFIX, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } },
+  /* 0x4A */ { 0, IT_PREFIX, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } },
+  /* 0x4B */ { 0, IT_PREFIX, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } },
+  /* 0x4C */ { 0, IT_PREFIX, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } },
+  /* 0x4D */ { 0, IT_PREFIX, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } },
+  /* 0x4E */ { 0, IT_PREFIX, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } },
+  /* 0x4F */ { 0, IT_PREFIX, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } },
+#else
   /* 0x40 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_NOT_USED, AM_NOT_USED, "inc", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } },
   /* 0x41 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_NOT_USED, AM_NOT_USED, "inc", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } },
   /* 0x42 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_NOT_USED, AM_NOT_USED, "inc", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } },
@@ -127,6 +146,7 @@ const Opcode s_first_opcode_byte[] = {
   /* 0x4D */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_NOT_USED, AM_NOT_USED, "dec", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } },
   /* 0x4E */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_NOT_USED, AM_NOT_USED, "dec", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } },
   /* 0x4F */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_NOT_USED, AM_NOT_USED, "dec", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } },
+#endif
   /* 0x50 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_NOT_USED, AM_NOT_USED, "push", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } },
   /* 0x51 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_NOT_USED, AM_NOT_USED, "push", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } },
   /* 0x52 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_NOT_USED, AM_NOT_USED, "push", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } },
@@ -231,6 +251,16 @@ const Opcode s_first_opcode_byte[] = {
   /* 0xB5 */ { 0, IT_GENERIC, AM_REGISTER | OT_B, AM_I | OT_B, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } },
   /* 0xB6 */ { 0, IT_GENERIC, AM_REGISTER | OT_B, AM_I | OT_B, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } },
   /* 0xB7 */ { 0, IT_GENERIC, AM_REGISTER | OT_B, AM_I | OT_B, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } },
+#ifdef _M_X64
+  /* 0xB8 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_I | OT_V | IOS_64, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } },
+  /* 0xB9 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_I | OT_V | IOS_64, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } },
+  /* 0xBA */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_I | OT_V | IOS_64, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } },
+  /* 0xBB */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_I | OT_V | IOS_64, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } },
+  /* 0xBC */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_I | OT_V | IOS_64, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } },
+  /* 0xBD */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_I | OT_V | IOS_64, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } },
+  /* 0xBE */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_I | OT_V | IOS_64, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } },
+  /* 0xBF */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_I | OT_V | IOS_64, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } },
+#else
   /* 0xB8 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_I | OT_V, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } },
   /* 0xB9 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_I | OT_V, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } },
   /* 0xBA */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_I | OT_V, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } },
@@ -239,6 +269,7 @@ const Opcode s_first_opcode_byte[] = {
   /* 0xBD */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_I | OT_V, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } },
   /* 0xBE */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_I | OT_V, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } },
   /* 0xBF */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_I | OT_V, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } },
+#endif
   /* 0xC0 */ { 6, IT_REFERENCE, AM_E | OT_B, AM_I | OT_B, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } },
   /* 0xC1 */ { 7, IT_REFERENCE, AM_E | OT_V, AM_I | OT_B, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } },
   /* 0xC2 */ { 0, IT_RETURN, AM_I | OT_W, AM_NOT_USED, AM_NOT_USED, "ret", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } },
diff --git a/third_party/tcmalloc/vendor/src/windows/mini_disassembler.cc b/third_party/tcmalloc/vendor/src/windows/mini_disassembler.cc
index 30bdcc1..9e336ba 100644
--- a/third_party/tcmalloc/vendor/src/windows/mini_disassembler.cc
+++ b/third_party/tcmalloc/vendor/src/windows/mini_disassembler.cc
@@ -100,6 +100,12 @@ InstructionType MiniDisassembler::Disassemble(
 void MiniDisassembler::Initialize() {
   operand_is_32_bits_ = operand_default_is_32_bits_;
   address_is_32_bits_ = address_default_is_32_bits_;
+#ifdef _M_X64
+  operand_default_support_64_bits_ = true;
+#else
+  operand_default_support_64_bits_ = false;
+#endif
+  operand_is_64_bits_ = false;
   operand_bytes_ = 0;
   have_modrm_ = false;
   should_decode_modrm_ = false;
@@ -129,6 +135,8 @@ InstructionType MiniDisassembler::ProcessPrefixes(unsigned char* start_byte,
         got_f3_prefix_ = true;
       else if (0x66 == (*start_byte))
         got_66_prefix_ = true;
+      else if (operand_default_support_64_bits_ && (*start_byte) & 0x48)
+        operand_is_64_bits_ = true;
 
       instruction_type = opcode.type_;
       size ++;
@@ -314,8 +322,12 @@ bool MiniDisassembler::ProcessOperand(int flag_operand) {
           // floating point
           succeeded = false;
           break;
-        case OT_V: // Word or doubleword, depending on operand-size attribute.
-          if (operand_is_32_bits_)
+        case OT_V: // Word, doubleword or quadword, depending on operand-size 
+                   // attribute.
+          if (operand_is_64_bits_ && flag_operand & AM_I &&
+              flag_operand & IOS_64)
+            operand_bytes_ += OS_QUAD_WORD;
+          else if (operand_is_32_bits_)
             operand_bytes_ += OS_DOUBLE_WORD;
           else
             operand_bytes_ += OS_WORD;
diff --git a/third_party/tcmalloc/vendor/src/windows/mini_disassembler.h b/third_party/tcmalloc/vendor/src/windows/mini_disassembler.h
index e676232..52daa5d8 100644
--- a/third_party/tcmalloc/vendor/src/windows/mini_disassembler.h
+++ b/third_party/tcmalloc/vendor/src/windows/mini_disassembler.h
@@ -36,6 +36,7 @@
 #ifndef GOOGLE_PERFTOOLS_MINI_DISASSEMBLER_H_
 #define GOOGLE_PERFTOOLS_MINI_DISASSEMBLER_H_
 
+#include "config.h"
 #include <windows.h>
 #include "mini_disassembler_types.h"
 
@@ -74,7 +75,7 @@ namespace sidestep {
 // IA-32 Intel� Architecture Software Developer�s Manual Volume 2:
 // Instruction Set Reference for information about operand decoding
 // etc.
-class MiniDisassembler {
+class PERFTOOLS_DLL_DECL MiniDisassembler {
  public:
 
   // Creates a new instance and sets defaults.
@@ -165,6 +166,12 @@ class MiniDisassembler {
   // Default address size is 32 bits if true, 16 bits if false.
   bool address_default_is_32_bits_;
 
+  // Determines if 64 bit operands are supported (x64).
+  bool operand_default_support_64_bits_;
+
+  // Current operand size is 64 bits if true, 32 bits if false.
+  bool operand_is_64_bits_;
+
   // Huge big opcode table based on the IA-32 manual, defined
   // in Ia32OpcodeMap.cc
   static const OpcodeTable s_ia32_opcode_map_[];
diff --git a/third_party/tcmalloc/vendor/src/windows/mini_disassembler_types.h b/third_party/tcmalloc/vendor/src/windows/mini_disassembler_types.h
index 7f8e997..83dee8b 100644
--- a/third_party/tcmalloc/vendor/src/windows/mini_disassembler_types.h
+++ b/third_party/tcmalloc/vendor/src/windows/mini_disassembler_types.h
@@ -143,6 +143,16 @@ enum OperandType {
   OT_ADDRESS_MODE_M = 0x80000000
 };
 
+// Flag that indicates if an immediate operand is 64-bits.
+//
+// The Intel 64 and IA-32 Architecture Software Developer's Manual currently
+// defines MOV as the only instruction supporting a 64-bit immediate operand.
+enum ImmediateOperandSize {
+  IOS_MASK = 0x0000F000,
+  IOS_DEFAULT = 0x0,
+  IOS_64 = 0x00001000
+};
+
 // Everything that's in an Opcode (see below) except the three
 // alternative opcode structs for different prefixes.
 struct SpecificOpcode {
@@ -154,8 +164,8 @@ struct SpecificOpcode {
   InstructionType type_;
 
   // Description of the type of the dest, src and aux operands,
-  // put together from an enOperandType flag and an enAddressingMethod
-  // flag.
+  // put together from enOperandType, enAddressingMethod and 
+  // enImmediateOperandSize flags.
   int flag_dest_;
   int flag_source_;
   int flag_aux_;
diff --git a/third_party/tcmalloc/vendor/src/windows/patch_functions.cc b/third_party/tcmalloc/vendor/src/windows/patch_functions.cc
index f837e7a..7a7e6ad 100644
--- a/third_party/tcmalloc/vendor/src/windows/patch_functions.cc
+++ b/third_party/tcmalloc/vendor/src/windows/patch_functions.cc
@@ -91,7 +91,7 @@
 #include <vector>
 #include <base/logging.h>
 #include "base/spinlock.h"
-#include "google/malloc_hook.h"
+#include "gperftools/malloc_hook.h"
 #include "malloc_hook-inl.h"
 #include "preamble_patcher.h"
 
@@ -181,6 +181,8 @@ class LibcInfo {
     kNewNothrow, kNewArrayNothrow, kDeleteNothrow, kDeleteArrayNothrow,
     // These are windows-only functions from malloc.h
     k_Msize, k_Expand,
+    // A MS CRT "internal" function, implemented using _calloc_impl
+    k_CallocCrt,
     kNumFunctions
   };
 
@@ -404,7 +406,7 @@ const char* const LibcInfo::function_name_[] = {
   NULL,  // kMangledNewArrayNothrow,
   NULL,  // kMangledDeleteNothrow,
   NULL,  // kMangledDeleteArrayNothrow,
-  "_msize", "_expand",
+  "_msize", "_expand", "_calloc_crt",
 };
 
 // For mingw, I can't patch the new/delete here, because the
@@ -435,6 +437,7 @@ const GenericFnPtr LibcInfo::static_fn_[] = {
 #endif
   (GenericFnPtr)&::_msize,
   (GenericFnPtr)&::_expand,
+  (GenericFnPtr)&::calloc,
 };
 
 template<int T> GenericFnPtr LibcInfoWithPatchFunctions<T>::origstub_fn_[] = {
@@ -457,6 +460,7 @@ const GenericFnPtr LibcInfoWithPatchFunctions<T>::perftools_fn_[] = {
   (GenericFnPtr)&Perftools_deletearray_nothrow,
   (GenericFnPtr)&Perftools__msize,
   (GenericFnPtr)&Perftools__expand,
+  (GenericFnPtr)&Perftools_calloc,
 };
 
 /*static*/ WindowsInfo::FunctionInfo WindowsInfo::function_info_[] = {
@@ -822,7 +826,7 @@ void* LibcInfoWithPatchFunctions<T>::Perftools_realloc(
   return do_realloc_with_callback(
       old_ptr, new_size,
       (void (*)(void*))origstub_fn_[kFree],
-      (size_t (*)(void*))origstub_fn_[k_Msize]);
+      (size_t (*)(const void*))origstub_fn_[k_Msize]);
 }
 
 template<int T>
@@ -900,7 +904,7 @@ void LibcInfoWithPatchFunctions<T>::Perftools_deletearray_nothrow(
 
 template<int T>
 size_t LibcInfoWithPatchFunctions<T>::Perftools__msize(void* ptr) __THROW {
-  return GetSizeWithCallback(ptr, (size_t (*)(void*))origstub_fn_[k_Msize]);
+  return GetSizeWithCallback(ptr, (size_t (*)(const void*))origstub_fn_[k_Msize]);
 }
 
 // We need to define this because internal windows functions like to
diff --git a/third_party/tcmalloc/vendor/src/windows/port.h b/third_party/tcmalloc/vendor/src/windows/port.h
index 300c72d..e9a0206 100644
--- a/third_party/tcmalloc/vendor/src/windows/port.h
+++ b/third_party/tcmalloc/vendor/src/windows/port.h
@@ -65,14 +65,15 @@
 
 /*
  * 4018: signed/unsigned mismatch is common (and ok for signed_i < unsigned_i)
- * 4244: otherwise we get problems when substracting two size_t's to an int
+ * 4244: otherwise we get problems when subtracting two size_t's to an int
  * 4288: VC++7 gets confused when a var is defined in a loop and then after it
  * 4267: too many false positives for "conversion gives possible data loss"
  * 4290: it's ok windows ignores the "throw" directive
  * 4996: Yes, we're ok using "unsafe" functions like vsnprintf and getenv()
+ * 4146: internal_logging.cc intentionally negates an unsigned value
  */
 #ifdef _MSC_VER
-#pragma warning(disable:4018 4244 4288 4267 4290 4996)
+#pragma warning(disable:4018 4244 4288 4267 4290 4996 4146)
 #endif
 
 #ifndef __cplusplus
@@ -414,10 +415,14 @@ inline unsigned int sleep(unsigned int seconds) {
   return 0;
 }
 
+// mingw64 seems to define timespec (though mingw.org mingw doesn't),
+// protected by the _TIMESPEC_DEFINED macro.
+#ifndef _TIMESPEC_DEFINED
 struct timespec {
   int tv_sec;
   int tv_nsec;
 };
+#endif
 
 inline int nanosleep(const struct timespec *req, struct timespec *rem) {
   Sleep(req->tv_sec * 1000 + req->tv_nsec / 1000000);
diff --git a/third_party/tcmalloc/vendor/src/windows/preamble_patcher.cc b/third_party/tcmalloc/vendor/src/windows/preamble_patcher.cc
index 78a4763..b27a95b 100644
--- a/third_party/tcmalloc/vendor/src/windows/preamble_patcher.cc
+++ b/third_party/tcmalloc/vendor/src/windows/preamble_patcher.cc
@@ -29,6 +29,7 @@
  *
  * ---
  * Author: Joi Sigurdsson
+ * Author: Scott Francis
  *
  * Implementation of PreamblePatcher
  */
@@ -46,18 +47,42 @@
 #define ASM_JMP32ABS_0 0xFF
 #define ASM_JMP32ABS_1 0x25
 #define ASM_JMP8REL 0xEB
+#define ASM_JCC32REL_0 0x0F
+#define ASM_JCC32REL_1_MASK 0x80
+#define ASM_NOP 0x90
+// X64 opcodes
+#define ASM_REXW 0x48
+#define ASM_MOVRAX_IMM 0xB8
+#define ASM_JMP 0xFF
+#define ASM_JMP_RAX 0xE0
 
 namespace sidestep {
 
+PreamblePatcher::PreamblePage* PreamblePatcher::preamble_pages_ = NULL;
+long PreamblePatcher::granularity_ = 0;
+long PreamblePatcher::pagesize_ = 0;
+bool PreamblePatcher::initialized_ = false;
+
+static const unsigned int kPreamblePageMagic = 0x4347414D; // "MAGC"
+
 // Handle a special case that we see with functions that point into an
 // IAT table (including functions linked statically into the
 // application): these function already starts with ASM_JMP32*.  For
 // instance, malloc() might be implemented as a JMP to __malloc().
 // This function follows the initial JMPs for us, until we get to the
 // place where the actual code is defined.  If we get to STOP_BEFORE,
-// we return the address before stop_before.
+// we return the address before stop_before.  The stop_before_trampoline
+// flag is used in 64-bit mode.  If true, we will return the address
+// before a trampoline is detected.  Trampolines are defined as:
+//
+//    nop
+//    mov rax, <replacement_function>
+//    jmp rax
+//
+// See PreamblePatcher::RawPatchWithStub for more information.
 void* PreamblePatcher::ResolveTargetImpl(unsigned char* target,
-                                         unsigned char* stop_before) {
+                                         unsigned char* stop_before,
+                                         bool stop_before_trampoline) {
   if (target == NULL)
     return NULL;
   while (1) {
@@ -81,15 +106,26 @@ void* PreamblePatcher::ResolveTargetImpl(unsigned char* target,
       // Visual studio seems to sometimes do it this way instead of the
       // previous way.  Not sure what the rules are, but it was happening
       // with operator new in some binaries.
-      void **new_target_v;
-      SIDESTEP_ASSERT(sizeof(new_target) == 4);
-      memcpy(&new_target_v, reinterpret_cast<void*>(target + 2), 4);
+      void** new_target_v;
+      if (kIs64BitBinary) {
+        // In 64-bit mode JMPs are RIP-relative, not absolute
+        int target_offset;
+        memcpy(reinterpret_cast<void*>(&target_offset),
+               reinterpret_cast<void*>(target + 2), 4);
+        new_target_v = reinterpret_cast<void**>(target + target_offset + 6);
+      } else {
+        SIDESTEP_ASSERT(sizeof(new_target) == 4);
+        memcpy(&new_target_v, reinterpret_cast<void*>(target + 2), 4);
+      }
       new_target = reinterpret_cast<unsigned char*>(*new_target_v);
     } else {
       break;
     }
     if (new_target == stop_before)
       break;
+    if (stop_before_trampoline && *new_target == ASM_NOP
+        && new_target[1] == ASM_REXW && new_target[2] == ASM_MOVRAX_IMM)
+      break;
     target = new_target;
   }
   return target;
@@ -103,7 +139,7 @@ class DeleteUnsignedCharArray {
 
   ~DeleteUnsignedCharArray() {
     if (array_) {
-      delete [] array_;
+      PreamblePatcher::FreePreambleBlock(array_);
     }
   }
 
@@ -191,9 +227,23 @@ SideStepError PreamblePatcher::RawPatch(void* target_function,
     return SIDESTEP_INVALID_PARAMETER;
   }
 
-  // @see MAX_PREAMBLE_STUB_SIZE for an explanation of how we arrives at
-  // this size
-  unsigned char* preamble_stub = new unsigned char[MAX_PREAMBLE_STUB_SIZE];
+  BOOL succeeded = FALSE;
+
+  // First, deal with a special case that we see with functions that
+  // point into an IAT table (including functions linked statically
+  // into the application): these function already starts with
+  // ASM_JMP32REL.  For instance, malloc() might be implemented as a
+  // JMP to __malloc().  In that case, we replace the destination of
+  // the JMP (__malloc), rather than the JMP itself (malloc).  This
+  // way we get the correct behavior no matter how malloc gets called.
+  void* new_target = ResolveTarget(target_function);
+  if (new_target != target_function) {
+    target_function = new_target;
+  }
+
+  // In 64-bit mode, preamble_stub must be within 2GB of target function
+  // so that if target contains a jump, we can translate it.
+  unsigned char* preamble_stub = AllocPreambleBlockNear(target_function);
   if (!preamble_stub) {
     SIDESTEP_ASSERT(false && "Unable to allocate preamble-stub.");
     return SIDESTEP_INSUFFICIENT_BUFFER;
@@ -202,19 +252,6 @@ SideStepError PreamblePatcher::RawPatch(void* target_function,
   // Frees the array at end of scope.
   DeleteUnsignedCharArray guard_preamble_stub(preamble_stub);
 
-  // Change the protection of the newly allocated preamble stub to
-  // PAGE_EXECUTE_READWRITE. This is required to work with DEP (Data
-  // Execution Prevention) which will cause an exception if code is executed
-  // from a page on which you do not have read access.
-  DWORD old_stub_protect = 0;
-  BOOL succeeded = ::VirtualProtect(preamble_stub, MAX_PREAMBLE_STUB_SIZE,
-                                    PAGE_EXECUTE_READWRITE, &old_stub_protect);
-  if (!succeeded) {
-    SIDESTEP_ASSERT(false &&
-                    "Failed to make page preamble stub read-write-execute.");
-    return SIDESTEP_ACCESS_DENIED;
-  }
-
   SideStepError error_code = RawPatchWithStubAndProtections(
       target_function, replacement_function, preamble_stub,
       MAX_PREAMBLE_STUB_SIZE, NULL);
@@ -260,23 +297,6 @@ SideStepError PreamblePatcher::Unpatch(void* target_function,
     return SIDESTEP_INVALID_PARAMETER;
   }
 
-  // We disassemble the preamble of the _stub_ to see how many bytes we
-  // originally copied to the stub.
-  MiniDisassembler disassembler;
-  unsigned int preamble_bytes = 0;
-  while (preamble_bytes < 5) {
-    InstructionType instruction_type =
-        disassembler.Disassemble(
-            reinterpret_cast<unsigned char*>(original_function_stub) +
-            preamble_bytes,
-            preamble_bytes);
-    if (IT_GENERIC != instruction_type) {
-      SIDESTEP_ASSERT(false &&
-                      "Should only have generic instructions in stub!!");
-      return SIDESTEP_UNSUPPORTED_INSTRUCTION;
-    }
-  }
-
   // Before unpatching, target_function should be a JMP to
   // replacement_function.  If it's not, then either it's an error, or
   // we're falling into the case where the original instruction was a
@@ -286,7 +306,8 @@ SideStepError PreamblePatcher::Unpatch(void* target_function,
   unsigned char* target = reinterpret_cast<unsigned char*>(target_function);
   target = reinterpret_cast<unsigned char*>(
       ResolveTargetImpl(
-          target, reinterpret_cast<unsigned char*>(replacement_function)));
+          target, reinterpret_cast<unsigned char*>(replacement_function),
+          true));
   // We should end at the function we patched.  When we patch, we insert
   // a ASM_JMP32REL instruction, so look for that as a sanity check.
   if (target[0] != ASM_JMP32REL) {
@@ -295,11 +316,13 @@ SideStepError PreamblePatcher::Unpatch(void* target_function,
     return SIDESTEP_INVALID_PARAMETER;
   }
 
+  const unsigned int kRequiredTargetPatchBytes = 5;
+
   // We need to be able to write to a process-local copy of the first
-  // MAX_PREAMBLE_STUB_SIZE bytes of target_function
+  // kRequiredTargetPatchBytes bytes of target_function
   DWORD old_target_function_protect = 0;
-  BOOL succeeded = ::VirtualProtect(reinterpret_cast<void*>(target_function),
-                                    MAX_PREAMBLE_STUB_SIZE,
+  BOOL succeeded = ::VirtualProtect(reinterpret_cast<void*>(target),
+                                    kRequiredTargetPatchBytes,
                                     PAGE_EXECUTE_READWRITE,
                                     &old_target_function_protect);
   if (!succeeded) {
@@ -308,20 +331,67 @@ SideStepError PreamblePatcher::Unpatch(void* target_function,
     return SIDESTEP_ACCESS_DENIED;
   }
 
-  // Replace the first few bytes of the original function with the bytes we
-  // previously moved to the preamble stub.
-  memcpy(reinterpret_cast<void*>(target),
-         original_function_stub, preamble_bytes);
+  unsigned char* preamble_stub = reinterpret_cast<unsigned char*>(
+                                   original_function_stub);
 
-  // Stub is now useless so delete it.
-  // [csilvers: Commented out for perftools because it causes big problems
-  //  when we're unpatching malloc.  We just let this live on as a  leak.]
-  //delete [] reinterpret_cast<unsigned char*>(original_function_stub);
+  // Disassemble the preamble of stub and copy the bytes back to target.
+  // If we've done any conditional jumps in the preamble we need to convert
+  // them back to the orignal REL8 jumps in the target.
+  MiniDisassembler disassembler;
+  unsigned int preamble_bytes = 0;
+  unsigned int target_bytes = 0;
+  while (target_bytes < kRequiredTargetPatchBytes) {
+    unsigned int cur_bytes = 0;
+    InstructionType instruction_type =
+        disassembler.Disassemble(preamble_stub + preamble_bytes, cur_bytes);
+    if (IT_JUMP == instruction_type) {
+      unsigned int jump_bytes = 0;
+      SideStepError jump_ret = SIDESTEP_JUMP_INSTRUCTION;
+      if (IsNearConditionalJump(preamble_stub + preamble_bytes, cur_bytes) ||
+          IsNearRelativeJump(preamble_stub + preamble_bytes, cur_bytes) ||
+          IsNearAbsoluteCall(preamble_stub + preamble_bytes, cur_bytes) ||
+          IsNearRelativeCall(preamble_stub + preamble_bytes, cur_bytes)) {
+        jump_ret = PatchNearJumpOrCall(preamble_stub + preamble_bytes, 
+                                       cur_bytes, target + target_bytes, 
+                                       &jump_bytes, MAX_PREAMBLE_STUB_SIZE);
+      }
+      if (jump_ret == SIDESTEP_JUMP_INSTRUCTION) {
+        SIDESTEP_ASSERT(false &&
+                        "Found unsupported jump instruction in stub!!");
+        return SIDESTEP_UNSUPPORTED_INSTRUCTION;
+      }
+      target_bytes += jump_bytes;
+    } else if (IT_GENERIC == instruction_type) {
+      if (IsMovWithDisplacement(preamble_stub + preamble_bytes, cur_bytes)) {
+        unsigned int mov_bytes = 0;
+        if (PatchMovWithDisplacement(preamble_stub + preamble_bytes, cur_bytes,
+                                     target + target_bytes, &mov_bytes,
+                                     MAX_PREAMBLE_STUB_SIZE)
+                                     != SIDESTEP_SUCCESS) {
+          SIDESTEP_ASSERT(false &&
+                          "Found unsupported generic instruction in stub!!");
+          return SIDESTEP_UNSUPPORTED_INSTRUCTION;
+        }
+      } else {
+        memcpy(reinterpret_cast<void*>(target + target_bytes),
+               reinterpret_cast<void*>(reinterpret_cast<unsigned char*>(
+                   original_function_stub) + preamble_bytes), cur_bytes);
+        target_bytes += cur_bytes;
+      }
+    } else {
+      SIDESTEP_ASSERT(false &&
+                      "Found unsupported instruction in stub!!");
+      return SIDESTEP_UNSUPPORTED_INSTRUCTION;
+    }
+    preamble_bytes += cur_bytes;
+  }
 
-  // Restore the protection of the first MAX_PREAMBLE_STUB_SIZE bytes of
+  FreePreambleBlock(reinterpret_cast<unsigned char*>(original_function_stub));
+
+  // Restore the protection of the first kRequiredTargetPatchBytes bytes of
   // target to what they were before we started goofing around.
   succeeded = ::VirtualProtect(reinterpret_cast<void*>(target),
-                               MAX_PREAMBLE_STUB_SIZE,
+                               kRequiredTargetPatchBytes,
                                old_target_function_protect,
                                &old_target_function_protect);
 
@@ -341,4 +411,274 @@ SideStepError PreamblePatcher::Unpatch(void* target_function,
   return SIDESTEP_SUCCESS;
 }
 
+void PreamblePatcher::Initialize() {
+  if (!initialized_) {
+    SYSTEM_INFO si = { 0 };
+    ::GetSystemInfo(&si);
+    granularity_ = si.dwAllocationGranularity;
+    pagesize_ = si.dwPageSize;
+    initialized_ = true;
+  }
+}
+
+unsigned char* PreamblePatcher::AllocPreambleBlockNear(void* target) {
+  PreamblePage* preamble_page = preamble_pages_;
+  while (preamble_page != NULL) {
+    if (preamble_page->free_ != NULL) {
+      __int64 val = reinterpret_cast<__int64>(preamble_page) -
+          reinterpret_cast<__int64>(target);
+      if ((val > 0 && val + pagesize_ <= INT_MAX) ||
+          (val < 0 && val >= INT_MIN)) {
+        break;
+      }
+    }
+    preamble_page = preamble_page->next_;
+  }
+
+  // The free_ member of the page is used to store the next available block
+  // of memory to use or NULL if there are no chunks available, in which case
+  // we'll allocate a new page.
+  if (preamble_page == NULL || preamble_page->free_ == NULL) {
+    // Create a new preamble page and initialize the free list
+    preamble_page = reinterpret_cast<PreamblePage*>(AllocPageNear(target));
+    SIDESTEP_ASSERT(preamble_page != NULL && "Could not allocate page!");
+    void** pp = &preamble_page->free_;
+    unsigned char* ptr = reinterpret_cast<unsigned char*>(preamble_page) +
+        MAX_PREAMBLE_STUB_SIZE;
+    unsigned char* limit = reinterpret_cast<unsigned char*>(preamble_page) +
+        pagesize_;
+    while (ptr < limit) {
+      *pp = ptr;
+      pp = reinterpret_cast<void**>(ptr);
+      ptr += MAX_PREAMBLE_STUB_SIZE;
+    }
+    *pp = NULL;
+    // Insert the new page into the list
+    preamble_page->magic_ = kPreamblePageMagic;
+    preamble_page->next_ = preamble_pages_;
+    preamble_pages_ = preamble_page;
+  }
+  unsigned char* ret = reinterpret_cast<unsigned char*>(preamble_page->free_);
+  preamble_page->free_ = *(reinterpret_cast<void**>(preamble_page->free_));
+  return ret;
+}
+
+void PreamblePatcher::FreePreambleBlock(unsigned char* block) {
+  SIDESTEP_ASSERT(block != NULL);
+  SIDESTEP_ASSERT(granularity_ != 0);
+  uintptr_t ptr = reinterpret_cast<uintptr_t>(block);
+  ptr -= ptr & (granularity_ - 1);
+  PreamblePage* preamble_page = reinterpret_cast<PreamblePage*>(ptr);
+  SIDESTEP_ASSERT(preamble_page->magic_ == kPreamblePageMagic);
+  *(reinterpret_cast<void**>(block)) = preamble_page->free_;
+  preamble_page->free_ = block;
+}
+
+void* PreamblePatcher::AllocPageNear(void* target) {
+  MEMORY_BASIC_INFORMATION mbi = { 0 };
+  if (!::VirtualQuery(target, &mbi, sizeof(mbi))) {
+    SIDESTEP_ASSERT(false && "VirtualQuery failed on target address");
+    return 0;
+  }
+  if (initialized_ == false) {
+    PreamblePatcher::Initialize();
+    SIDESTEP_ASSERT(initialized_);
+  }
+  void* pv = NULL;
+  unsigned char* allocation_base = reinterpret_cast<unsigned char*>(
+      mbi.AllocationBase);
+  __int64 i = 1;
+  bool high_target = reinterpret_cast<__int64>(target) > UINT_MAX;
+  while (pv == NULL) {
+    __int64 val = reinterpret_cast<__int64>(allocation_base) -
+        (i * granularity_);
+    if (high_target &&
+        reinterpret_cast<__int64>(target) - val > INT_MAX) {
+        // We're further than 2GB from the target
+      break;
+    } else if (val <= NULL) {
+      // Less than 0
+      break;
+    }
+    pv = ::VirtualAlloc(reinterpret_cast<void*>(allocation_base -
+                            (i++ * granularity_)),
+                        pagesize_, MEM_COMMIT | MEM_RESERVE,
+                        PAGE_EXECUTE_READWRITE);
+  }
+
+  // We couldn't allocate low, try to allocate high
+  if (pv == NULL) {
+    i = 1;
+    // Round up to the next multiple of page granularity
+    allocation_base = reinterpret_cast<unsigned char*>(
+        (reinterpret_cast<__int64>(target) &
+        (~(granularity_ - 1))) + granularity_);
+    while (pv == NULL) {
+      __int64 val = reinterpret_cast<__int64>(allocation_base) +
+          (i * granularity_) - reinterpret_cast<__int64>(target);
+      if (val > INT_MAX || val < 0) {
+        // We're too far or we overflowed
+        break;
+      }
+      pv = ::VirtualAlloc(reinterpret_cast<void*>(allocation_base +
+                              (i++ * granularity_)),
+                          pagesize_, MEM_COMMIT | MEM_RESERVE,
+                          PAGE_EXECUTE_READWRITE);
+    }
+  }
+  return pv;
+}
+
+bool PreamblePatcher::IsShortConditionalJump(
+    unsigned char* target,
+    unsigned int instruction_size) {
+  return (*(target) & 0x70) == 0x70 && instruction_size == 2;
+}
+
+bool PreamblePatcher::IsNearConditionalJump(
+    unsigned char* target,
+    unsigned int instruction_size) {
+  return *(target) == 0xf && (*(target + 1) & 0x80) == 0x80 &&
+      instruction_size == 6;
+}
+
+bool PreamblePatcher::IsNearRelativeJump(
+    unsigned char* target,
+    unsigned int instruction_size) {
+  return *(target) == 0xe9 && instruction_size == 5;
+}
+
+bool PreamblePatcher::IsNearAbsoluteCall(
+    unsigned char* target,
+    unsigned int instruction_size) {
+  return *(target) == 0xff && (*(target + 1) & 0x10) == 0x10 &&
+      instruction_size == 6;
+}
+
+bool PreamblePatcher::IsNearRelativeCall(
+    unsigned char* target,
+    unsigned int instruction_size) {
+  return *(target) == 0xe8 && instruction_size == 5;
+}
+
+bool PreamblePatcher::IsMovWithDisplacement(
+    unsigned char* target,
+    unsigned int instruction_size) {
+  // In this case, the ModRM byte's mod field will be 0 and r/m will be 101b (5)
+  return instruction_size == 7 && *target == 0x48 && *(target + 1) == 0x8b &&
+      (*(target + 2) >> 6) == 0 && (*(target + 2) & 0x7) == 5;
+}
+
+SideStepError PreamblePatcher::PatchShortConditionalJump(
+    unsigned char* source,
+    unsigned int instruction_size,
+    unsigned char* target,
+    unsigned int* target_bytes,
+    unsigned int target_size) {
+  unsigned char* original_jump_dest = (source + 2) + source[1];
+  unsigned char* stub_jump_from = target + 6;
+  __int64 fixup_jump_offset = original_jump_dest - stub_jump_from;
+  if (fixup_jump_offset > INT_MAX || fixup_jump_offset < INT_MIN) {
+    SIDESTEP_ASSERT(false &&
+                    "Unable to fix up short jump because target"
+                    " is too far away.");
+    return SIDESTEP_JUMP_INSTRUCTION;
+  }
+
+  *target_bytes = 6;
+  if (target_size > *target_bytes) {
+    // Convert the short jump to a near jump.
+    //
+    // 0f 8x xx xx xx xx = Jcc rel32off
+    unsigned short jmpcode = ((0x80 | (source[0] & 0xf)) << 8) | 0x0f;
+    memcpy(reinterpret_cast<void*>(target),
+           reinterpret_cast<void*>(&jmpcode), 2);
+    memcpy(reinterpret_cast<void*>(target + 2),
+           reinterpret_cast<void*>(&fixup_jump_offset), 4);
+  }
+
+  return SIDESTEP_SUCCESS;
+}
+
+SideStepError PreamblePatcher::PatchNearJumpOrCall(
+    unsigned char* source,
+    unsigned int instruction_size,
+    unsigned char* target,
+    unsigned int* target_bytes,
+    unsigned int target_size) {
+  SIDESTEP_ASSERT(instruction_size == 5 || instruction_size == 6);
+  unsigned int jmp_offset_in_instruction = instruction_size == 5 ? 1 : 2;
+  unsigned char* original_jump_dest = reinterpret_cast<unsigned char *>(
+      reinterpret_cast<__int64>(source + instruction_size) +
+      *(reinterpret_cast<int*>(source + jmp_offset_in_instruction)));
+  unsigned char* stub_jump_from = target + instruction_size;
+  __int64 fixup_jump_offset = original_jump_dest - stub_jump_from;
+  if (fixup_jump_offset > INT_MAX || fixup_jump_offset < INT_MIN) {
+    SIDESTEP_ASSERT(false &&
+                    "Unable to fix up near jump because target"
+                    " is too far away.");
+    return SIDESTEP_JUMP_INSTRUCTION;
+  }
+
+  if ((fixup_jump_offset < SCHAR_MAX && fixup_jump_offset > SCHAR_MIN)) {
+    *target_bytes = 2;
+    if (target_size > *target_bytes) {
+      // If the new offset is in range, use a short jump instead of a near jump.
+      if (source[0] == ASM_JCC32REL_0 &&
+          (source[1] & ASM_JCC32REL_1_MASK) == ASM_JCC32REL_1_MASK) {
+        unsigned short jmpcode = (static_cast<unsigned char>(
+            fixup_jump_offset) << 8) | (0x70 | (source[1] & 0xf));
+        memcpy(reinterpret_cast<void*>(target),
+               reinterpret_cast<void*>(&jmpcode),
+               2);
+      } else {
+        target[0] = ASM_JMP8REL;
+        target[1] = static_cast<unsigned char>(fixup_jump_offset);
+      }
+    }
+  } else {
+    *target_bytes = instruction_size;
+    if (target_size > *target_bytes) {
+      memcpy(reinterpret_cast<void*>(target),
+             reinterpret_cast<void*>(source),
+             jmp_offset_in_instruction);
+      memcpy(reinterpret_cast<void*>(target + jmp_offset_in_instruction),
+             reinterpret_cast<void*>(&fixup_jump_offset),
+             4);
+    }
+  }
+
+  return SIDESTEP_SUCCESS;
+}
+
+SideStepError PreamblePatcher::PatchMovWithDisplacement(
+     unsigned char* source,
+     unsigned int instruction_size,
+     unsigned char* target,
+     unsigned int* target_bytes,
+     unsigned int target_size) {
+  SIDESTEP_ASSERT(instruction_size == 7);
+  const int mov_offset_in_instruction = 3; // 0x48 0x8b 0x0d <offset>
+  unsigned char* original_mov_dest = reinterpret_cast<unsigned char*>(
+      reinterpret_cast<__int64>(source + instruction_size) +
+      *(reinterpret_cast<int*>(source + mov_offset_in_instruction)));
+  unsigned char* stub_mov_from = target + instruction_size;
+  __int64 fixup_mov_offset = original_mov_dest - stub_mov_from;
+  if (fixup_mov_offset > INT_MAX || fixup_mov_offset < INT_MIN) {
+    SIDESTEP_ASSERT(false &&
+        "Unable to fix up near MOV because target is too far away.");
+    return SIDESTEP_UNEXPECTED;
+  }
+  *target_bytes = instruction_size;
+  if (target_size > *target_bytes) {
+    memcpy(reinterpret_cast<void*>(target),
+           reinterpret_cast<void*>(source),
+           mov_offset_in_instruction);
+    memcpy(reinterpret_cast<void*>(target + mov_offset_in_instruction),
+           reinterpret_cast<void*>(&fixup_mov_offset),
+           4);
+  }
+  return SIDESTEP_SUCCESS;
+}
+
 };  // namespace sidestep
diff --git a/third_party/tcmalloc/vendor/src/windows/preamble_patcher.h b/third_party/tcmalloc/vendor/src/windows/preamble_patcher.h
index 0028e4e..4fdb7d0 100644
--- a/third_party/tcmalloc/vendor/src/windows/preamble_patcher.h
+++ b/third_party/tcmalloc/vendor/src/windows/preamble_patcher.h
@@ -29,6 +29,7 @@
  *
  * ---
  * Author: Joi Sigurdsson
+ * Author: Scott Francis
  *
  * Definition of PreamblePatcher
  */
@@ -36,6 +37,7 @@
 #ifndef GOOGLE_PERFTOOLS_PREAMBLE_PATCHER_H_
 #define GOOGLE_PERFTOOLS_PREAMBLE_PATCHER_H_
 
+#include "config.h"
 #include <windows.h>
 
 // compatibility shim
@@ -47,7 +49,25 @@
 // bytes of the function. Considering the worst case scenario, we need 4
 // bytes + the max instruction size + 5 more bytes for our jump back to
 // the original code. With that in mind, 32 is a good number :)
+#ifdef _M_X64
+// In 64-bit mode we may need more room.  In 64-bit mode all jumps must be
+// within +/-2GB of RIP.  Because of this limitation we may need to use a
+// trampoline to jump to the replacement function if it is further than 2GB
+// away from the target. The trampoline is 14 bytes.
+//
+// So 4 bytes + max instruction size (17 bytes) + 5 bytes to jump back to the
+// original code + trampoline size.  64 bytes is a nice number :-)
+#define MAX_PREAMBLE_STUB_SIZE    (64)
+#else
 #define MAX_PREAMBLE_STUB_SIZE    (32)
+#endif
+
+// Determines if this is a 64-bit binary.
+#ifdef _M_X64
+static const bool kIs64BitBinary = true;
+#else
+static const bool kIs64BitBinary = false;
+#endif
 
 namespace sidestep {
 
@@ -68,6 +88,8 @@ enum SideStepError {
 #define SIDESTEP_TO_HRESULT(error)                      \
   MAKE_HRESULT(SEVERITY_ERROR, FACILITY_NULL, error)
 
+class DeleteUnsignedCharArray;
+
 // Implements a patching mechanism that overwrites the first few bytes of
 // a function preamble with a jump to our hook function, which is then
 // able to call the original function via a specially-made preamble-stub
@@ -126,7 +148,7 @@ enum SideStepError {
 // reuse the result of calling the function with a given parameter, which
 // may mean if you patch the function in between your patch will never get
 // invoked.  See preamble_patcher_test.cc for an example.
-class PreamblePatcher {
+class PERFTOOLS_DLL_DECL PreamblePatcher {
  public:
 
   // This is a typesafe version of RawPatch(), identical in all other
@@ -287,7 +309,55 @@ class PreamblePatcher {
     return (T)ResolveTargetImpl((unsigned char*)target_function, NULL);
   }
 
+  // Allocates a block of memory of size MAX_PREAMBLE_STUB_SIZE that is as
+  // close (within 2GB) as possible to target.  This is done to ensure that 
+  // we can perform a relative jump from target to a trampoline if the 
+  // replacement function is > +-2GB from target.  This means that we only need 
+  // to patch 5 bytes in the target function.
+  //
+  // @param target    Pointer to target function.
+  //
+  // @return  Returns a block of memory of size MAX_PREAMBLE_STUB_SIZE that can
+  //          be used to store a function preamble block.
+  static unsigned char* AllocPreambleBlockNear(void* target);
+
+  // Frees a block allocated by AllocPreambleBlockNear.
+  //
+  // @param block     Block that was returned by AllocPreambleBlockNear.
+  static void FreePreambleBlock(unsigned char* block);
+
  private:
+  friend class DeleteUnsignedCharArray;
+
+   // Used to store data allocated for preamble stubs
+  struct PreamblePage {
+    unsigned int magic_;
+    PreamblePage* next_;
+    // This member points to a linked list of free blocks within the page
+    // or NULL if at the end
+    void* free_;
+  };
+
+  // In 64-bit mode, the replacement function must be within 2GB of the original
+  // target in order to only require 5 bytes for the function patch.  To meet
+  // this requirement we're creating an allocator within this class to
+  // allocate blocks that are within 2GB of a given target. This member is the
+  // head of a linked list of pages used to allocate blocks that are within
+  // 2GB of the target.
+  static PreamblePage* preamble_pages_;
+  
+  // Page granularity
+  static long granularity_;
+
+  // Page size
+  static long pagesize_;
+
+  // Determines if the patcher has been initialized.
+  static bool initialized_;
+
+  // Used to initialize static members.
+  static void Initialize();
+
   // Patches a function by overwriting its first few bytes with
   // a jump to a different function.  This is similar to the RawPatch
   // function except that it uses the stub allocated by the caller
@@ -318,7 +388,7 @@ class PreamblePatcher {
   // @return An error code indicating the result of patching.
   static SideStepError RawPatchWithStubAndProtections(
       void* target_function,
-      void *replacement_function,
+      void* replacement_function,
       unsigned char* preamble_stub,
       unsigned long stub_size,
       unsigned long* bytes_needed);
@@ -348,7 +418,7 @@ class PreamblePatcher {
   //
   // @return An error code indicating the result of patching.
   static SideStepError RawPatchWithStub(void* target_function,
-                                        void *replacement_function,
+                                        void* replacement_function,
                                         unsigned char* preamble_stub,
                                         unsigned long stub_size,
                                         unsigned long* bytes_needed);
@@ -365,12 +435,175 @@ class PreamblePatcher {
   // target_function, we get to the address stop, we return
   // immediately, the address that jumps to stop_before.
   //
+  // @param stop_before_trampoline  When following JMP instructions from 
+  // target_function, stop before a trampoline is detected.  See comment in
+  // PreamblePatcher::RawPatchWithStub for more information.  This parameter 
+  // has no effect in 32-bit mode.
+  //
   // @return Either target_function (the input parameter), or if
   // target_function's body consists entirely of a JMP instruction,
   // the address it JMPs to (or more precisely, the address at the end
   // of a chain of JMPs).
   static void* ResolveTargetImpl(unsigned char* target_function,
-                                 unsigned char* stop_before);
+                                 unsigned char* stop_before,
+                                 bool stop_before_trampoline = false);
+
+  // Helper routine that attempts to allocate a page as close (within 2GB)
+  // as possible to target.
+  //
+  // @param target    Pointer to target function.
+  //
+  // @return   Returns an address that is within 2GB of target.
+  static void* AllocPageNear(void* target);
+
+  // Helper routine that determines if a target instruction is a short
+  // conditional jump.
+  //
+  // @param target            Pointer to instruction.
+  //
+  // @param instruction_size  Size of the instruction in bytes.
+  //
+  // @return  Returns true if the instruction is a short conditional jump.
+  static bool IsShortConditionalJump(unsigned char* target,
+                                     unsigned int instruction_size);
+
+  // Helper routine that determines if a target instruction is a near
+  // conditional jump.
+  //
+  // @param target            Pointer to instruction.
+  //
+  // @param instruction_size  Size of the instruction in bytes.
+  //
+  // @return  Returns true if the instruction is a near conditional jump.
+  static bool IsNearConditionalJump(unsigned char* target,
+                                    unsigned int instruction_size);
+
+  // Helper routine that determines if a target instruction is a near
+  // relative jump.
+  //
+  // @param target            Pointer to instruction.
+  //
+  // @param instruction_size  Size of the instruction in bytes.
+  //
+  // @return  Returns true if the instruction is a near absolute jump.
+  static bool IsNearRelativeJump(unsigned char* target,
+                                 unsigned int instruction_size);
+
+  // Helper routine that determines if a target instruction is a near 
+  // absolute call.
+  //
+  // @param target            Pointer to instruction.
+  //
+  // @param instruction_size  Size of the instruction in bytes.
+  //
+  // @return  Returns true if the instruction is a near absolute call.
+  static bool IsNearAbsoluteCall(unsigned char* target,
+                                 unsigned int instruction_size);
+
+  // Helper routine that determines if a target instruction is a near 
+  // absolute call.
+  //
+  // @param target            Pointer to instruction.
+  //
+  // @param instruction_size  Size of the instruction in bytes.
+  //
+  // @return  Returns true if the instruction is a near absolute call.
+  static bool IsNearRelativeCall(unsigned char* target,
+                                 unsigned int instruction_size);
+
+  // Helper routine that determines if a target instruction is a 64-bit MOV
+  // that uses a RIP-relative displacement.
+  //
+  // @param target            Pointer to instruction.
+  //
+  // @param instruction_size  Size of the instruction in bytes.
+  //
+  // @return  Returns true if the instruction is a MOV with displacement.
+  static bool IsMovWithDisplacement(unsigned char* target,
+                                    unsigned int instruction_size);
+
+  // Helper routine that converts a short conditional jump instruction
+  // to a near conditional jump in a target buffer.  Note that the target
+  // buffer must be within 2GB of the source for the near jump to work.
+  //
+  // A short conditional jump instruction is in the format:
+  // 7x xx = Jcc rel8off
+  //
+  // @param source              Pointer to instruction.
+  //
+  // @param instruction_size    Size of the instruction.
+  //
+  // @param target              Target buffer to write the new instruction.
+  //
+  // @param target_bytes        Pointer to a buffer that contains the size
+  //                            of the target instruction, in bytes.
+  //
+  // @param target_size         Size of the target buffer.
+  //
+  // @return  Returns SIDESTEP_SUCCESS if successful, otherwise an error.
+  static SideStepError PatchShortConditionalJump(unsigned char* source,
+                                                 unsigned int instruction_size,
+                                                 unsigned char* target,
+                                                 unsigned int* target_bytes,
+                                                 unsigned int target_size);
+
+  // Helper routine that converts an instruction that will convert various
+  // jump-like instructions to corresponding instructions in the target buffer.
+  // What this routine does is fix up the relative offsets contained in jump
+  // instructions to point back to the original target routine.  Like with
+  // PatchShortConditionalJump, the target buffer must be within 2GB of the
+  // source.
+  //
+  // We currently handle the following instructions:
+  //
+  // E9 xx xx xx xx     = JMP rel32off
+  // 0F 8x xx xx xx xx  = Jcc rel32off
+  // FF /2 xx xx xx xx  = CALL reg/mem32/mem64
+  // E8 xx xx xx xx     = CALL rel32off
+  //
+  // It should not be hard to update this function to support other
+  // instructions that jump to relative targets.
+  //
+  // @param source              Pointer to instruction.
+  //
+  // @param instruction_size    Size of the instruction.
+  //
+  // @param target              Target buffer to write the new instruction.
+  //
+  // @param target_bytes        Pointer to a buffer that contains the size
+  //                            of the target instruction, in bytes.
+  //
+  // @param target_size         Size of the target buffer.
+  //
+  // @return  Returns SIDESTEP_SUCCESS if successful, otherwise an error.
+  static SideStepError PatchNearJumpOrCall(unsigned char* source,
+                                           unsigned int instruction_size,
+                                           unsigned char* target,
+                                           unsigned int* target_bytes,
+                                           unsigned int target_size);
+  
+  // Helper routine that patches a 64-bit MOV instruction with a RIP-relative
+  // displacement.  The target buffer must be within 2GB of the source.
+  //
+  // 48 8B 0D XX XX XX XX = MOV rel32off
+  //
+  // @param source              Pointer to instruction.
+  //
+  // @param instruction_size    Size of the instruction.
+  //
+  // @param target              Target buffer to write the new instruction.
+  //
+  // @param target_bytes        Pointer to a buffer that contains the size
+  //                            of the target instruction, in bytes.
+  //
+  // @param target_size         Size of the target buffer.
+  //
+  // @return  Returns SIDESTEP_SUCCESS if successful, otherwise an error.
+  static SideStepError PatchMovWithDisplacement(unsigned char* source,
+                                                unsigned int instruction_size,
+                                                unsigned char* target,
+                                                unsigned int* target_bytes,
+                                                unsigned int target_size);
 };
 
 };  // namespace sidestep
diff --git a/third_party/tcmalloc/vendor/src/windows/preamble_patcher_test.cc b/third_party/tcmalloc/vendor/src/windows/preamble_patcher_test.cc
new file mode 100644
index 0000000..41ab551
--- /dev/null
+++ b/third_party/tcmalloc/vendor/src/windows/preamble_patcher_test.cc
@@ -0,0 +1,367 @@
+/* Copyright (c) 2011, Google Inc.
+ * All rights reserved.
+ * 
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ * 
+ *     * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ *     * Neither the name of Google Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * ---
+ * Author: Joi Sigurdsson
+ * Author: Scott Francis
+ *
+ * Unit tests for PreamblePatcher
+ */
+
+#include "config_for_unittests.h"
+#include "preamble_patcher.h"
+#include "mini_disassembler.h"
+#pragma warning(push)
+#pragma warning(disable:4553)
+#include "auto_testing_hook.h"
+#pragma warning(pop)
+
+#define WIN32_LEAN_AND_MEAN
+#include <windows.h>
+#include <tchar.h>
+
+// Turning off all optimizations for this file, since the official build's
+// "Whole program optimization" seems to cause the TestPatchUsingDynamicStub
+// test to crash with an access violation.  We debugged this and found
+// that the optimized access a register that is changed by a call to the hook
+// function.
+#pragma optimize("", off)
+
+// A convenience macro to avoid a lot of casting in the tests.
+// I tried to make this a templated function, but windows complained:
+//     error C2782: 'sidestep::SideStepError `anonymous-namespace'::Unpatch(T,T,T *)' : template parameter 'T' is ambiguous
+//        could be 'int (int)'
+//        or       'int (__cdecl *)(int)'
+// My life isn't long enough to try to figure out how to fix this.
+#define UNPATCH(target_function, replacement_function, original_function_stub) \
+  sidestep::PreamblePatcher::Unpatch((void*)(target_function),          \
+                                     (void*)(replacement_function),     \
+                                     (void*)(original_function))
+
+namespace {
+
+// Function for testing - this is what we patch
+//
+// NOTE:  Because of the way the compiler optimizes this function in
+// release builds, we need to use a different input value every time we
+// call it within a function, otherwise the compiler will just reuse the
+// last calculated incremented value.
+int __declspec(noinline) IncrementNumber(int i) {
+#ifdef _M_X64
+  __int64 i2 = i + 1;
+  return (int) i2;
+#else
+   return i + 1;
+#endif
+}
+
+extern "C" int TooShortFunction(int);
+
+extern "C" int JumpShortCondFunction(int);
+
+extern "C" int JumpNearCondFunction(int);
+
+extern "C" int JumpAbsoluteFunction(int);
+
+extern "C" int CallNearRelativeFunction(int);
+
+typedef int (*IncrementingFunc)(int);
+IncrementingFunc original_function = NULL;
+
+int HookIncrementNumber(int i) {
+  SIDESTEP_ASSERT(original_function != NULL);
+  int incremented_once = original_function(i);
+  return incremented_once + 1;
+}
+
+// For the AutoTestingHook test, we can't use original_function, because
+// all that is encapsulated.
+// This function "increments" by 10, just to set it apart from the other
+// functions.
+int __declspec(noinline) AutoHookIncrementNumber(int i) {
+  return i + 10;
+}
+
+};  // namespace
+
+namespace sidestep {
+
+bool TestDisassembler() {
+   unsigned int instruction_size = 0;
+   sidestep::MiniDisassembler disassembler;
+   void * target = reinterpret_cast<unsigned char *>(IncrementNumber);
+   void * new_target = PreamblePatcher::ResolveTarget(target);
+   if (target != new_target)
+      target = new_target;
+
+   while (1) {
+      sidestep::InstructionType instructionType = disassembler.Disassemble(
+         reinterpret_cast<unsigned char *>(target) + instruction_size,
+         instruction_size);
+      if (sidestep::IT_RETURN == instructionType) {
+         return true;
+      }
+   }
+}
+
+bool TestPatchWithLongJump() {
+  original_function = NULL;
+  void *p = ::VirtualAlloc(reinterpret_cast<void *>(0x0000020000000000), 4096,
+                           MEM_RESERVE | MEM_COMMIT, PAGE_EXECUTE_READWRITE);
+  SIDESTEP_EXPECT_TRUE(p != NULL);
+  memset(p, 0xcc, 4096);
+  SIDESTEP_EXPECT_TRUE(sidestep::SIDESTEP_SUCCESS ==
+                       sidestep::PreamblePatcher::Patch(IncrementNumber,
+                                                        (IncrementingFunc) p,
+                                                        &original_function));
+  SIDESTEP_ASSERT((*original_function)(1) == 2);
+  SIDESTEP_EXPECT_TRUE(sidestep::SIDESTEP_SUCCESS ==
+                       UNPATCH(IncrementNumber,
+                               (IncrementingFunc)p,
+                               original_function));
+  ::VirtualFree(p, 0, MEM_RELEASE);
+  return true;
+}
+
+bool TestPatchWithPreambleShortCondJump() {
+  original_function = NULL;
+  SIDESTEP_EXPECT_TRUE(sidestep::SIDESTEP_SUCCESS ==
+                       sidestep::PreamblePatcher::Patch(JumpShortCondFunction,
+                                                        HookIncrementNumber,
+                                                        &original_function));
+  (*original_function)(1);
+  SIDESTEP_EXPECT_TRUE(sidestep::SIDESTEP_SUCCESS ==
+                       UNPATCH(JumpShortCondFunction,
+                               (void*)HookIncrementNumber,
+                               original_function));
+  return true;
+}
+
+bool TestPatchWithPreambleNearRelativeCondJump() {
+  original_function = NULL;
+  SIDESTEP_EXPECT_TRUE(sidestep::SIDESTEP_SUCCESS ==
+                       sidestep::PreamblePatcher::Patch(JumpNearCondFunction,
+                                                        HookIncrementNumber,
+                                                        &original_function));
+  (*original_function)(0);
+  (*original_function)(1);
+  SIDESTEP_EXPECT_TRUE(sidestep::SIDESTEP_SUCCESS ==
+                       UNPATCH(JumpNearCondFunction,
+                               HookIncrementNumber,
+                               original_function));
+  return true;
+}
+
+bool TestPatchWithPreambleAbsoluteJump() {
+  original_function = NULL;
+  SIDESTEP_EXPECT_TRUE(sidestep::SIDESTEP_SUCCESS ==
+                       sidestep::PreamblePatcher::Patch(JumpAbsoluteFunction,
+                                                        HookIncrementNumber,
+                                                        &original_function));
+  (*original_function)(0);
+  (*original_function)(1);
+  SIDESTEP_EXPECT_TRUE(sidestep::SIDESTEP_SUCCESS ==
+                       UNPATCH(JumpAbsoluteFunction,
+                               HookIncrementNumber,
+                               original_function));
+  return true;
+}
+
+bool TestPatchWithPreambleNearRelativeCall() {
+  original_function = NULL;
+  SIDESTEP_EXPECT_TRUE(sidestep::SIDESTEP_SUCCESS ==
+                       sidestep::PreamblePatcher::Patch(
+                                                    CallNearRelativeFunction,
+                                                    HookIncrementNumber,
+                                                    &original_function));
+  (*original_function)(0);
+  (*original_function)(1);
+  SIDESTEP_EXPECT_TRUE(sidestep::SIDESTEP_SUCCESS ==
+                       UNPATCH(CallNearRelativeFunction,
+                               HookIncrementNumber,
+                               original_function));
+  return true;
+}
+
+bool TestPatchUsingDynamicStub() {
+  original_function = NULL;
+  SIDESTEP_EXPECT_TRUE(IncrementNumber(1) == 2);
+  SIDESTEP_EXPECT_TRUE(sidestep::SIDESTEP_SUCCESS ==
+                       sidestep::PreamblePatcher::Patch(IncrementNumber,
+                                                        HookIncrementNumber,
+                                                        &original_function));
+  SIDESTEP_EXPECT_TRUE(original_function);
+  SIDESTEP_EXPECT_TRUE(IncrementNumber(2) == 4);
+  SIDESTEP_EXPECT_TRUE(original_function(3) == 4);
+
+  // Clearbox test to see that the function has been patched.
+  sidestep::MiniDisassembler disassembler;
+  unsigned int instruction_size = 0;
+  SIDESTEP_EXPECT_TRUE(sidestep::IT_JUMP == disassembler.Disassemble(
+                           reinterpret_cast<unsigned char*>(IncrementNumber),
+                           instruction_size));
+
+  // Since we patched IncrementNumber, its first statement is a
+  // jmp to the hook function.  So verify that we now can not patch
+  // IncrementNumber because it starts with a jump.
+#if 0
+  IncrementingFunc dummy = NULL;
+  // TODO(joi@chromium.org): restore this test once flag is added to
+  // disable JMP following
+  SIDESTEP_EXPECT_TRUE(sidestep::SIDESTEP_JUMP_INSTRUCTION ==
+                       sidestep::PreamblePatcher::Patch(IncrementNumber,
+                                                        HookIncrementNumber,
+                                                        &dummy));
+
+  // This test disabled because code in preamble_patcher_with_stub.cc
+  // asserts before returning the error code -- so there is no way
+  // to get an error code here, in debug build.
+  dummy = NULL;
+  SIDESTEP_EXPECT_TRUE(sidestep::SIDESTEP_FUNCTION_TOO_SMALL ==
+                       sidestep::PreamblePatcher::Patch(TooShortFunction,
+                                                        HookIncrementNumber,
+                                                        &dummy));
+#endif
+
+  SIDESTEP_EXPECT_TRUE(sidestep::SIDESTEP_SUCCESS ==
+                       UNPATCH(IncrementNumber,
+                               HookIncrementNumber,
+                               original_function));
+  return true;
+}
+
+bool PatchThenUnpatch() {
+  original_function = NULL;
+  SIDESTEP_EXPECT_TRUE(sidestep::SIDESTEP_SUCCESS ==
+                       sidestep::PreamblePatcher::Patch(IncrementNumber,
+                                                        HookIncrementNumber,
+                                                        &original_function));
+  SIDESTEP_EXPECT_TRUE(original_function);
+  SIDESTEP_EXPECT_TRUE(IncrementNumber(1) == 3);
+  SIDESTEP_EXPECT_TRUE(original_function(2) == 3);
+
+  SIDESTEP_EXPECT_TRUE(sidestep::SIDESTEP_SUCCESS ==
+                       UNPATCH(IncrementNumber,
+                               HookIncrementNumber,
+                               original_function));
+  original_function = NULL;
+  SIDESTEP_EXPECT_TRUE(IncrementNumber(3) == 4);
+
+  return true;
+}
+
+bool AutoTestingHookTest() {
+  SIDESTEP_EXPECT_TRUE(IncrementNumber(1) == 2);
+
+  // Inner scope, so we can test what happens when the AutoTestingHook
+  // goes out of scope
+  {
+    AutoTestingHook hook = MakeTestingHook(IncrementNumber,
+                                           AutoHookIncrementNumber);
+    (void) hook;
+    SIDESTEP_EXPECT_TRUE(IncrementNumber(2) == 12);
+  }
+  SIDESTEP_EXPECT_TRUE(IncrementNumber(3) == 4);
+
+  return true;
+}
+
+bool AutoTestingHookInContainerTest() {
+  SIDESTEP_EXPECT_TRUE(IncrementNumber(1) == 2);
+
+  // Inner scope, so we can test what happens when the AutoTestingHook
+  // goes out of scope
+  {
+    AutoTestingHookHolder hook(MakeTestingHookHolder(IncrementNumber,
+                                                     AutoHookIncrementNumber));
+    (void) hook;
+    SIDESTEP_EXPECT_TRUE(IncrementNumber(2) == 12);
+  }
+  SIDESTEP_EXPECT_TRUE(IncrementNumber(3) == 4);
+
+  return true;
+}
+
+bool TestPreambleAllocation() {
+  __int64 diff = 0;
+  void* p1 = reinterpret_cast<void*>(0x110000000);
+  void* p2 = reinterpret_cast<void*>(0x810000000);
+  unsigned char* b1 = PreamblePatcher::AllocPreambleBlockNear(p1);
+  SIDESTEP_EXPECT_TRUE(b1 != NULL);
+  diff = reinterpret_cast<__int64>(p1) - reinterpret_cast<__int64>(b1);
+  // Ensure blocks are within 2GB
+  SIDESTEP_EXPECT_TRUE(diff <= INT_MAX && diff >= INT_MIN);
+  unsigned char* b2 = PreamblePatcher::AllocPreambleBlockNear(p2);
+  SIDESTEP_EXPECT_TRUE(b2 != NULL);
+  diff = reinterpret_cast<__int64>(p2) - reinterpret_cast<__int64>(b2);
+  SIDESTEP_EXPECT_TRUE(diff <= INT_MAX && diff >= INT_MIN);
+
+  // Ensure we're reusing free blocks
+  unsigned char* b3 = b1;
+  unsigned char* b4 = b2;
+  PreamblePatcher::FreePreambleBlock(b1);
+  PreamblePatcher::FreePreambleBlock(b2);
+  b1 = PreamblePatcher::AllocPreambleBlockNear(p1);
+  SIDESTEP_EXPECT_TRUE(b1 == b3);
+  b2 = PreamblePatcher::AllocPreambleBlockNear(p2);
+  SIDESTEP_EXPECT_TRUE(b2 == b4);
+  PreamblePatcher::FreePreambleBlock(b1);
+  PreamblePatcher::FreePreambleBlock(b2);
+
+  return true;
+}
+
+bool UnitTests() {
+  return TestPatchWithPreambleNearRelativeCall() &&
+      TestPatchWithPreambleAbsoluteJump() &&
+      TestPatchWithPreambleNearRelativeCondJump() && 
+      TestPatchWithPreambleShortCondJump() &&
+      TestDisassembler() && TestPatchWithLongJump() &&
+      TestPatchUsingDynamicStub() && PatchThenUnpatch() &&
+      AutoTestingHookTest() && AutoTestingHookInContainerTest() &&
+      TestPreambleAllocation();
+}
+
+};  // namespace sidestep
+
+int safe_vsnprintf(char *str, size_t size, const char *format, va_list ap) {
+  if (size == 0)        // not even room for a \0?
+    return -1;          // not what C99 says to do, but what windows does
+  str[size-1] = '\0';
+  return _vsnprintf(str, size-1, format, ap);
+}
+
+int _tmain(int argc, _TCHAR* argv[])
+{
+  bool ret = sidestep::UnitTests();
+  printf("%s\n", ret ? "PASS" : "FAIL");
+  return ret ? 0 : -1;
+}
+
+#pragma optimize("", on)
diff --git a/third_party/tcmalloc/vendor/src/windows/preamble_patcher_with_stub.cc b/third_party/tcmalloc/vendor/src/windows/preamble_patcher_with_stub.cc
index 4eb391d..b0dc393 100644
--- a/third_party/tcmalloc/vendor/src/windows/preamble_patcher_with_stub.cc
+++ b/third_party/tcmalloc/vendor/src/windows/preamble_patcher_with_stub.cc
@@ -29,6 +29,7 @@
  *
  * ---
  * Author: Joi Sigurdsson
+ * Author: Scott Francis
  *
  * Implementation of PreamblePatcher
  */
@@ -40,12 +41,20 @@
 // Definitions of assembly statements we need
 #define ASM_JMP32REL 0xE9
 #define ASM_INT3 0xCC
+#define ASM_NOP 0x90
+// X64 opcodes
+#define ASM_MOVRAX_IMM 0xB8
+#define ASM_REXW 0x48
+#define ASM_JMP 0xFF
+#define ASM_JMP_RAX 0xE0
+#define ASM_PUSH 0x68
+#define ASM_RET 0xC3
 
 namespace sidestep {
 
 SideStepError PreamblePatcher::RawPatchWithStub(
     void* target_function,
-    void *replacement_function,
+    void* replacement_function,
     unsigned char* preamble_stub,
     unsigned long stub_size,
     unsigned long* bytes_needed) {
@@ -75,23 +84,52 @@ SideStepError PreamblePatcher::RawPatchWithStub(
   // doing it atomically does not help if one of the other threads happens
   // to have its eip in the middle of the bytes you change while you change
   // them.
+  unsigned char* target = reinterpret_cast<unsigned char*>(target_function);
+  unsigned int required_trampoline_bytes = 0;
+  const unsigned int kRequiredStubJumpBytes = 5;
+  const unsigned int kRequiredTargetPatchBytes = 5;
 
-  // First, deal with a special case that we see with functions that
-  // point into an IAT table (including functions linked statically
-  // into the application): these function already starts with
-  // ASM_JMP32REL.  For instance, malloc() might be implemented as a
-  // JMP to __malloc().  In that case, we replace the destination of
-  // the JMP (__malloc), rather than the JMP itself (malloc).  This
-  // way we get the correct behavior no matter how malloc gets called.
-  void *new_target = ResolveTarget(target_function);
-  if (new_target != target_function) {   // we're in the IAT case
-    // I'd like to just say "target = new_target", but I can't,
-    // because the new target will need to have its protections set.
-    return RawPatchWithStubAndProtections(new_target, replacement_function,
-                                          preamble_stub, stub_size,
-                                          bytes_needed);
+  // Initialize the stub with INT3's just in case.
+  if (stub_size) {
+    memset(preamble_stub, 0xcc, stub_size);
+  }
+  if (kIs64BitBinary) {
+    // In 64-bit mode JMP instructions are always relative to RIP.  If the
+    // replacement - target offset is > 2GB, we can't JMP to the replacement
+    // function.  In this case, we're going to use a trampoline - that is,
+    // we're going to do a relative jump to a small chunk of code in the stub
+    // that will then do the absolute jump to the replacement function.  By
+    // doing this, we only need to patch 5 bytes in the target function, as
+    // opposed to patching 12 bytes if we were to do an absolute jump.
+    //
+    // Note that the first byte of the trampoline is a NOP instruction.  This
+    // is used as a trampoline signature that will be detected when unpatching
+    // the function.
+    //
+    // jmp <trampoline>
+    //
+    // trampoline:
+    //    nop
+    //    mov rax, <replacement_function>
+    //    jmp rax
+    //
+    __int64 replacement_target_offset = reinterpret_cast<__int64>(
+        replacement_function) - reinterpret_cast<__int64>(target) - 5;
+    if (replacement_target_offset > INT_MAX
+        || replacement_target_offset < INT_MIN) {
+      // The stub needs to be within 2GB of the target for the trampoline to
+      // work!
+      __int64 trampoline_offset = reinterpret_cast<__int64>(preamble_stub)
+          - reinterpret_cast<__int64>(target) - 5;
+      if (trampoline_offset > INT_MAX || trampoline_offset < INT_MIN) {
+        // We're screwed.
+        SIDESTEP_ASSERT(false 
+                       && "Preamble stub is too far from target to patch.");
+        return SIDESTEP_UNEXPECTED;
+      }
+      required_trampoline_bytes = 13;
+    }
   }
-  unsigned char* target = reinterpret_cast<unsigned char*>(new_target);
 
   // Let's disassemble the preamble of the target function to see if we can
   // patch, and to see how much of the preamble we need to take.  We need 5
@@ -99,42 +137,76 @@ SideStepError PreamblePatcher::RawPatchWithStub(
   // instructions to get 5 bytes.
   MiniDisassembler disassembler;
   unsigned int preamble_bytes = 0;
-  while (preamble_bytes < 5) {
+  unsigned int stub_bytes = 0;
+  while (preamble_bytes < kRequiredTargetPatchBytes) {
+    unsigned int cur_bytes = 0;
     InstructionType instruction_type =
-        disassembler.Disassemble(target + preamble_bytes, preamble_bytes);
+        disassembler.Disassemble(target + preamble_bytes, cur_bytes);
     if (IT_JUMP == instruction_type) {
-      SIDESTEP_ASSERT(false &&
-                      "Unable to patch because there is a jump instruction "
-                      "in the first 5 bytes.");
-      return SIDESTEP_JUMP_INSTRUCTION;
+      unsigned int jump_bytes = 0;
+      SideStepError jump_ret = SIDESTEP_JUMP_INSTRUCTION;
+      if (IsShortConditionalJump(target + preamble_bytes, cur_bytes)) {
+        jump_ret = PatchShortConditionalJump(target + preamble_bytes, cur_bytes,
+                                             preamble_stub + stub_bytes,
+                                             &jump_bytes,
+                                             stub_size - stub_bytes);
+      } else if (IsNearConditionalJump(target + preamble_bytes, cur_bytes) ||
+                 IsNearRelativeJump(target + preamble_bytes, cur_bytes) ||
+                 IsNearAbsoluteCall(target + preamble_bytes, cur_bytes) ||
+                 IsNearRelativeCall(target + preamble_bytes, cur_bytes)) {
+         jump_ret = PatchNearJumpOrCall(target + preamble_bytes, cur_bytes,
+                                        preamble_stub + stub_bytes, &jump_bytes,
+                                        stub_size - stub_bytes);
+      }
+      if (jump_ret != SIDESTEP_SUCCESS) {
+        SIDESTEP_ASSERT(false &&
+                        "Unable to patch because there is an unhandled branch "
+                        "instruction in the initial preamble bytes.");
+        return SIDESTEP_JUMP_INSTRUCTION;
+      }
+      stub_bytes += jump_bytes;
     } else if (IT_RETURN == instruction_type) {
       SIDESTEP_ASSERT(false &&
                       "Unable to patch because function is too short");
       return SIDESTEP_FUNCTION_TOO_SMALL;
-    } else if (IT_GENERIC != instruction_type) {
+    } else if (IT_GENERIC == instruction_type) {
+      if (IsMovWithDisplacement(target + preamble_bytes, cur_bytes)) {
+        unsigned int mov_bytes = 0;
+        if (PatchMovWithDisplacement(target + preamble_bytes, cur_bytes,
+                                     preamble_stub + stub_bytes, &mov_bytes,
+                                     stub_size - stub_bytes)
+            != SIDESTEP_SUCCESS) {
+          return SIDESTEP_UNSUPPORTED_INSTRUCTION;
+        }
+        stub_bytes += mov_bytes;
+      } else {
+        memcpy(reinterpret_cast<void*>(preamble_stub + stub_bytes),
+               reinterpret_cast<void*>(target + preamble_bytes), cur_bytes);
+        stub_bytes += cur_bytes;
+      }
+    } else {
       SIDESTEP_ASSERT(false &&
                       "Disassembler encountered unsupported instruction "
                       "(either unused or unknown");
       return SIDESTEP_UNSUPPORTED_INSTRUCTION;
     }
+    preamble_bytes += cur_bytes;
   }
 
   if (NULL != bytes_needed)
-    *bytes_needed = preamble_bytes + 5;
+    *bytes_needed = stub_bytes + kRequiredStubJumpBytes
+        + required_trampoline_bytes;
 
   // Inv: cbPreamble is the number of bytes (at least 5) that we need to take
   // from the preamble to have whole instructions that are 5 bytes or more
-  // in size total. The size of the stub required is cbPreamble + size of
-  // jmp (5)
-  if (preamble_bytes + 5 > stub_size) {
+  // in size total. The size of the stub required is cbPreamble +
+  // kRequiredStubJumpBytes (5) + required_trampoline_bytes (0 or 13)
+  if (stub_bytes + kRequiredStubJumpBytes + required_trampoline_bytes
+      > stub_size) {
     SIDESTEP_ASSERT(false);
     return SIDESTEP_INSUFFICIENT_BUFFER;
   }
 
-  // First, copy the preamble that we will overwrite.
-  memcpy(reinterpret_cast<void*>(preamble_stub),
-         reinterpret_cast<void*>(target), preamble_bytes);
-
   // Now, make a jmp instruction to the rest of the target function (minus the
   // preamble bytes we moved into the stub) and copy it into our preamble-stub.
   // find address to jump to, relative to next address after jmp instruction
@@ -144,16 +216,32 @@ SideStepError PreamblePatcher::RawPatchWithStub(
 #endif
   int relative_offset_to_target_rest
       = ((reinterpret_cast<unsigned char*>(target) + preamble_bytes) -
-         (preamble_stub + preamble_bytes + 5));
+         (preamble_stub + stub_bytes + kRequiredStubJumpBytes));
 #ifdef _MSC_VER
 #pragma warning(pop)
 #endif
   // jmp (Jump near, relative, displacement relative to next instruction)
-  preamble_stub[preamble_bytes] = ASM_JMP32REL;
+  preamble_stub[stub_bytes] = ASM_JMP32REL;
   // copy the address
-  memcpy(reinterpret_cast<void*>(preamble_stub + preamble_bytes + 1),
+  memcpy(reinterpret_cast<void*>(preamble_stub + stub_bytes + 1),
          reinterpret_cast<void*>(&relative_offset_to_target_rest), 4);
 
+  if (kIs64BitBinary && required_trampoline_bytes != 0) {
+    // Construct the trampoline
+    unsigned int trampoline_pos = stub_bytes + kRequiredStubJumpBytes;
+    preamble_stub[trampoline_pos] = ASM_NOP;
+    preamble_stub[trampoline_pos + 1] = ASM_REXW;
+    preamble_stub[trampoline_pos + 2] = ASM_MOVRAX_IMM;
+    memcpy(reinterpret_cast<void*>(preamble_stub + trampoline_pos + 3),
+           reinterpret_cast<void*>(&replacement_function),
+           sizeof(void *));
+    preamble_stub[trampoline_pos + 11] = ASM_JMP;
+    preamble_stub[trampoline_pos + 12] = ASM_JMP_RAX;
+
+    // Now update replacement_function to point to the trampoline
+    replacement_function = preamble_stub + trampoline_pos;
+  }
+
   // Inv: preamble_stub points to assembly code that will execute the
   // original function by first executing the first cbPreamble bytes of the
   // preamble, then jumping to the rest of the function.
@@ -177,6 +265,7 @@ SideStepError PreamblePatcher::RawPatchWithStub(
   // complete the jmp instruction
   memcpy(reinterpret_cast<void*>(target + 1),
          reinterpret_cast<void*>(&offset_to_replacement_function), 4);
+
   // Set any remaining bytes that were moved to the preamble-stub to INT3 so
   // as not to cause confusion (otherwise you might see some strange
   // instructions if you look at the disassembly, or even invalid
@@ -184,8 +273,9 @@ SideStepError PreamblePatcher::RawPatchWithStub(
   // some code calls into this portion of the code.  If this happens, it
   // means that this function cannot be patched using this patcher without
   // further thought.
-  if (preamble_bytes > 5) {
-    memset(reinterpret_cast<void*>(target + 5), ASM_INT3, preamble_bytes - 5);
+  if (preamble_bytes > kRequiredTargetPatchBytes) {
+    memset(reinterpret_cast<void*>(target + kRequiredTargetPatchBytes),
+           ASM_INT3, preamble_bytes - kRequiredTargetPatchBytes);
   }
 
   // Inv: The memory pointed to by target_function now points to a relative
@@ -193,7 +283,13 @@ SideStepError PreamblePatcher::RawPatchWithStub(
   // stub contains the first stub_size bytes of the original target
   // function's preamble code, followed by a relative jump back to the next
   // instruction after the first cbPreamble bytes.
-
+  //
+  // In 64-bit mode the memory pointed to by target_function *may* point to a
+  // relative jump instruction that jumps to a trampoline which will then
+  // perform an absolute jump to the replacement function.  The preamble stub
+  // still contains the original target function's preamble code, followed by a
+  // jump back to the instructions after the first preamble bytes.
+  //
   return SIDESTEP_SUCCESS;
 }
 
diff --git a/third_party/tcmalloc/vendor/src/windows/shortproc.asm b/third_party/tcmalloc/vendor/src/windows/shortproc.asm
new file mode 100644
index 0000000..7e8e3d7
--- /dev/null
+++ b/third_party/tcmalloc/vendor/src/windows/shortproc.asm
@@ -0,0 +1,169 @@
+; Copyright (c) 2011, Google Inc.
+; All rights reserved.
+; 
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions are
+; met:
+; 
+;     * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+;     * Redistributions in binary form must reproduce the above
+; copyright notice, this list of conditions and the following disclaimer
+; in the documentation and/or other materials provided with the
+; distribution.
+;     * Neither the name of Google Inc. nor the names of its
+; contributors may be used to endorse or promote products derived from
+; this software without specific prior written permission.
+; 
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;
+; ---
+; Author: Scott Francis
+;
+; Unit tests for PreamblePatcher
+ 
+.MODEL small
+ 
+.CODE
+
+TooShortFunction PROC
+	ret
+TooShortFunction ENDP
+
+JumpShortCondFunction PROC
+	test cl, 1
+	jnz jumpspot
+	int 3
+	int 3
+	int 3
+	int 3
+	int 3
+	int 3
+	int 3
+	int 3
+	int 3
+	int 3
+	int 3
+	int 3
+	int 3
+	int 3
+	int 3
+	int 3
+	int 3
+	int 3
+	int 3
+	int 3
+	int 3
+	int 3
+	int 3
+	int 3
+	int 3
+	int 3
+	int 3
+	int 3
+jumpspot:
+	nop
+	nop
+	nop
+	nop
+	mov rax, 1
+	ret
+JumpShortCondFunction ENDP
+
+JumpNearCondFunction PROC
+	test cl, 1
+	jnz jumpspot
+	mov rdx, 0ffff1111H
+	mov rdx, 0ffff1111H
+	mov rdx, 0ffff1111H
+	mov rdx, 0ffff1111H
+	mov rdx, 0ffff1111H
+	mov rdx, 0ffff1111H
+	mov rdx, 0ffff1111H
+	mov rdx, 0ffff1111H
+	mov rdx, 0ffff1111H
+	mov rdx, 0ffff1111H
+	mov rdx, 0ffff1111H
+	mov rdx, 0ffff1111H
+	mov rdx, 0ffff1111H
+	mov rdx, 0ffff1111H
+	mov rdx, 0ffff1111H
+	mov rdx, 0ffff1111H
+	mov rdx, 0ffff1111H
+	mov rdx, 0ffff1111H
+	mov rdx, 0ffff1111H
+	mov rdx, 0ffff1111H
+jumpspot:
+	nop
+	nop
+	mov rax, 1
+	ret
+JumpNearCondFunction ENDP
+
+JumpAbsoluteFunction PROC
+	test cl, 1
+	jmp jumpspot
+	mov rdx, 0ffff1111H
+	mov rdx, 0ffff1111H
+	mov rdx, 0ffff1111H
+	mov rdx, 0ffff1111H
+	mov rdx, 0ffff1111H
+	mov rdx, 0ffff1111H
+	mov rdx, 0ffff1111H
+	mov rdx, 0ffff1111H
+	mov rdx, 0ffff1111H
+	mov rdx, 0ffff1111H
+	mov rdx, 0ffff1111H
+	mov rdx, 0ffff1111H
+	mov rdx, 0ffff1111H
+	mov rdx, 0ffff1111H
+	mov rdx, 0ffff1111H
+	mov rdx, 0ffff1111H
+	mov rdx, 0ffff1111H
+	mov rdx, 0ffff1111H
+	mov rdx, 0ffff1111H
+	mov rdx, 0ffff1111H
+	mov rdx, 0ffff1111H
+	mov rdx, 0ffff1111H
+	mov rdx, 0ffff1111H
+	mov rdx, 0ffff1111H
+	mov rdx, 0ffff1111H
+	mov rdx, 0ffff1111H
+	mov rdx, 0ffff1111H
+jumpspot:
+	nop
+	nop
+	mov rax, 1
+	ret
+JumpAbsoluteFunction ENDP
+
+CallNearRelativeFunction PROC
+	test cl, 1
+	call TooShortFunction
+	mov rdx, 0ffff1111H
+	mov rdx, 0ffff1111H
+	mov rdx, 0ffff1111H
+	mov rdx, 0ffff1111H
+	mov rdx, 0ffff1111H
+	mov rdx, 0ffff1111H
+	mov rdx, 0ffff1111H
+	mov rdx, 0ffff1111H
+	mov rdx, 0ffff1111H
+	mov rdx, 0ffff1111H
+	nop
+	nop
+	nop
+	ret
+CallNearRelativeFunction ENDP
+
+END
author	dmikurube@chromium.org <dmikurube@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2012-03-14 22:31:22 +0000
committer	dmikurube@chromium.org <dmikurube@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2012-03-14 22:31:22 +0000
commit	e6619c2e162540a42b44602d9ca15c15afd1bb63 (patch)
tree	90a53114df5ab8fb4afdef1ee9324a3e903bc84e /third_party/tcmalloc/vendor/src
parent	449019cafa2e658d0765038a4d5d4e06d781e0ad (diff)
download	chromium_src-e6619c2e162540a42b44602d9ca15c15afd1bb63.zip chromium_src-e6619c2e162540a42b44602d9ca15c15afd1bb63.tar.gz chromium_src-e6619c2e162540a42b44602d9ca15c15afd1bb63.tar.bz2