Revert 48024 - Reland http://codereview.chromium.org/1735024/show to assess the performance.

Review URL: http://codereview.chromium.org/2164001 TBR=antonm,willchan Review URL: http://codereview.chromium.org/2155002 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@48032 0039d316-1c4b-4281-b951-d872f2087c98
author: glider@chromium.org <glider@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2010-05-24 12:07:37 +0000
committer: glider@chromium.org <glider@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2010-05-24 12:07:37 +0000
commit: 0f3eaca32c31fdbacbedb6638c43984c11fcd191 (patch)
tree: 32ec0c611c25a1508b48e973a552f8df52dd9b77 /third_party/tcmalloc
parent: f6c3483efa3d0cb4a7f49c2e3fc563100722e21b (diff)
download: chromium_src-0f3eaca32c31fdbacbedb6638c43984c11fcd191.zip
chromium_src-0f3eaca32c31fdbacbedb6638c43984c11fcd191.tar.gz
chromium_src-0f3eaca32c31fdbacbedb6638c43984c11fcd191.tar.bz2
52 files changed, 1748 insertions, 2154 deletions
diff --git a/third_party/tcmalloc/README.chromium b/third_party/tcmalloc/README.chromium
index 1ceda91..a8352a9 100644
--- a/third_party/tcmalloc/README.chromium
+++ b/third_party/tcmalloc/README.chromium
@@ -22,8 +22,9 @@ Contents:
 
     The current revision is:
 
-      Last Changed Rev: 94
-      Last Changed Date: 2010-05-08 01:53:24 +0400 (Sat, 08 May 2010)
+      Last Changed Rev: 77
+      Last Changed Date: 2009-10-27 10:30:52 -0700 (Tue, 27 Oct 2009)
+
 
 
 HOWTOs:
diff --git a/third_party/tcmalloc/chromium/src/base/basictypes.h b/third_party/tcmalloc/chromium/src/base/basictypes.h
index ab9cdabc..9991413 100644
--- a/third_party/tcmalloc/chromium/src/base/basictypes.h
+++ b/third_party/tcmalloc/chromium/src/base/basictypes.h
@@ -308,14 +308,6 @@ class AssignAttributeStartEnd {
 
 #endif  // HAVE___ATTRIBUTE__ and __ELF__ or __MACH__
 
-#if defined(HAVE___ATTRIBUTE__) && (defined(__i386__) || defined(__x86_64__))
-# define CACHELINE_SIZE 64
-# define CACHELINE_ALIGNED __attribute__((aligned(CACHELINE_SIZE)))
-#else
-# define CACHELINE_ALIGNED
-#endif  // defined(HAVE___ATTRIBUTE__) && (__i386__ || __x86_64__)
-
-
 // The following enum should be used only as a constructor argument to indicate
 // that the variable has static storage class, and that the constructor should
 // do nothing to its state.  It indicates to the reader that it is legal to
diff --git a/third_party/tcmalloc/chromium/src/base/dynamic_annotations.cc b/third_party/tcmalloc/chromium/src/base/dynamic_annotations.cc
index e69de29..c8bbcd7 100644
--- a/third_party/tcmalloc/chromium/src/base/dynamic_annotations.cc
+++ b/third_party/tcmalloc/chromium/src/base/dynamic_annotations.cc
@@ -0,0 +1,110 @@
+/* Copyright (c) 2008, Google Inc.
+ * All rights reserved.
+ * 
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ * 
+ *     * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ *     * Neither the name of Google Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * ---
+ * Author: Kostya Serebryany
+ */
+
+#include <config.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "base/dynamic_annotations.h"
+#include "base/sysinfo.h"
+
+// Each function is empty and called (via a macro) only in debug mode.
+// The arguments are captured by dynamic tools at runtime.
+
+extern "C" void AnnotateRWLockCreate(const char *file, int line,
+                                     const volatile void *lock){}
+extern "C" void AnnotateRWLockDestroy(const char *file, int line,
+                                      const volatile void *lock){}
+extern "C" void AnnotateRWLockAcquired(const char *file, int line,
+                                       const volatile void *lock, long is_w){}
+extern "C" void AnnotateRWLockReleased(const char *file, int line,
+                                       const volatile void *lock, long is_w){}
+extern "C" void AnnotateCondVarWait(const char *file, int line,
+                                    const volatile void *cv,
+                                    const volatile void *lock){}
+extern "C" void AnnotateCondVarSignal(const char *file, int line,
+                                      const volatile void *cv){}
+extern "C" void AnnotateCondVarSignalAll(const char *file, int line,
+                                         const volatile void *cv){}
+extern "C" void AnnotatePublishMemoryRange(const char *file, int line,
+                                           const volatile void *address,
+                                           long size){}
+extern "C" void AnnotateUnpublishMemoryRange(const char *file, int line,
+                                           const volatile void *address,
+                                           long size){}
+extern "C" void AnnotatePCQCreate(const char *file, int line,
+                                  const volatile void *pcq){}
+extern "C" void AnnotatePCQDestroy(const char *file, int line,
+                                   const volatile void *pcq){}
+extern "C" void AnnotatePCQPut(const char *file, int line,
+                               const volatile void *pcq){}
+extern "C" void AnnotatePCQGet(const char *file, int line,
+                               const volatile void *pcq){}
+extern "C" void AnnotateNewMemory(const char *file, int line,
+                                  const volatile void *mem,
+                                  long size){}
+extern "C" void AnnotateExpectRace(const char *file, int line,
+                                   const volatile void *mem,
+                                   const char *description){}
+extern "C" void AnnotateBenignRace(const char *file, int line,
+                                   const volatile void *mem,
+                                   const char *description){}
+extern "C" void AnnotateMutexIsUsedAsCondVar(const char *file, int line,
+                                            const volatile void *mu){}
+extern "C" void AnnotateTraceMemory(const char *file, int line,
+                                    const volatile void *arg){}
+extern "C" void AnnotateThreadName(const char *file, int line,
+                                   const char *name){}
+extern "C" void AnnotateIgnoreReadsBegin(const char *file, int line){}
+extern "C" void AnnotateIgnoreReadsEnd(const char *file, int line){}
+extern "C" void AnnotateIgnoreWritesBegin(const char *file, int line){}
+extern "C" void AnnotateIgnoreWritesEnd(const char *file, int line){}
+extern "C" void AnnotateNoOp(const char *file, int line,
+                             const volatile void *arg){}
+
+static int GetRunningOnValgrind() {
+  const char *running_on_valgrind_str = GetenvBeforeMain("RUNNING_ON_VALGRIND");
+  if (running_on_valgrind_str) {
+    return strcmp(running_on_valgrind_str, "0") != 0;
+  }
+  return 0;
+}
+
+// When running under valgrind, this function will be intercepted
+// and a non-zero value will be returned.
+// Some valgrind-based tools (e.g. callgrind) do not intercept functions,
+// so we also read environment variable.
+extern "C" int RunningOnValgrind() {
+  static int running_on_valgrind = GetRunningOnValgrind();
+  return running_on_valgrind;
+}
diff --git a/third_party/tcmalloc/chromium/src/base/dynamic_annotations.h b/third_party/tcmalloc/chromium/src/base/dynamic_annotations.h
index dae1a14..a2a268f 100644
--- a/third_party/tcmalloc/chromium/src/base/dynamic_annotations.h
+++ b/third_party/tcmalloc/chromium/src/base/dynamic_annotations.h
@@ -1,10 +1,10 @@
 /* Copyright (c) 2008, Google Inc.
  * All rights reserved.
- *
+ * 
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are
  * met:
- *
+ * 
  *     * Redistributions of source code must retain the above copyright
  * notice, this list of conditions and the following disclaimer.
  *     * Redistributions in binary form must reproduce the above
@@ -14,7 +14,7 @@
  *     * Neither the name of Google Inc. nor the names of its
  * contributors may be used to endorse or promote products derived from
  * this software without specific prior written permission.
- *
+ * 
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -31,471 +31,445 @@
  * Author: Kostya Serebryany
  */
 
-/* This file defines dynamic annotations for use with dynamic analysis
-   tool such as valgrind, PIN, etc.
-
-   Dynamic annotation is a source code annotation that affects
-   the generated code (that is, the annotation is not a comment).
-   Each such annotation is attached to a particular
-   instruction and/or to a particular object (address) in the program.
-
-   The annotations that should be used by users are macros in all upper-case
-   (e.g., ANNOTATE_NEW_MEMORY).
-
-   Actual implementation of these macros may differ depending on the
-   dynamic analysis tool being used.
-
-   See http://code.google.com/p/data-race-test/  for more information.
-
-   This file supports the following dynamic analysis tools:
-   - None (DYNAMIC_ANNOTATIONS_ENABLED is not defined or zero).
-      Macros are defined empty.
-   - ThreadSanitizer, Helgrind, DRD (DYNAMIC_ANNOTATIONS_ENABLED is 1).
-      Macros are defined as calls to non-inlinable empty functions
-      that are intercepted by Valgrind. */
-
+// This file defines dynamic annotations for use with dynamic analysis
+// tool such as valgrind, PIN, etc.
+//
+// Dynamic annotation is a source code annotation that affects
+// the generated code (that is, the annotation is not a comment).
+// Each such annotation is attached to a particular
+// instruction and/or to a particular object (address) in the program.
+//
+// The annotations that should be used by users are macros in all upper-case
+// (e.g., ANNOTATE_NEW_MEMORY).
+//
+// Actual implementation of these macros may differ depending on the
+// dynamic analysis tool being used.
+//
+// This file supports the following dynamic analysis tools:
+// - None (NDEBUG is defined).
+//    Macros are defined empty.
+// - Helgrind (NDEBUG is not defined).
+//    Macros are defined as calls to non-inlinable empty functions
+//    that are intercepted by helgrind.
+//
 #ifndef BASE_DYNAMIC_ANNOTATIONS_H_
 #define BASE_DYNAMIC_ANNOTATIONS_H_
 
-#ifndef DYNAMIC_ANNOTATIONS_ENABLED
-# define DYNAMIC_ANNOTATIONS_ENABLED 0
-#endif
-
-#if DYNAMIC_ANNOTATIONS_ENABLED != 0
-
-  /* -------------------------------------------------------------
-     Annotations useful when implementing condition variables such as CondVar,
-     using conditional critical sections (Await/LockWhen) and when constructing
-     user-defined synchronization mechanisms.
-
-     The annotations ANNOTATE_HAPPENS_BEFORE() and ANNOTATE_HAPPENS_AFTER() can
-     be used to define happens-before arcs in user-defined synchronization
-     mechanisms:  the race detector will infer an arc from the former to the
-     latter when they share the same argument pointer.
-
-     Example 1 (reference counting):
-
-     void Unref() {
-       ANNOTATE_HAPPENS_BEFORE(&refcount_);
-       if (AtomicDecrementByOne(&refcount_) == 0) {
-         ANNOTATE_HAPPENS_AFTER(&refcount_);
-         delete this;
-       }
-     }
-
-     Example 2 (message queue):
-
-     void MyQueue::Put(Type *e) {
-       MutexLock lock(&mu_);
-       ANNOTATE_HAPPENS_BEFORE(e);
-       PutElementIntoMyQueue(e);
-     }
-
-     Type *MyQueue::Get() {
-       MutexLock lock(&mu_);
-       Type *e = GetElementFromMyQueue();
-       ANNOTATE_HAPPENS_AFTER(e);
-       return e;
-     }
-
-     Note: when possible, please use the existing reference counting and message
-     queue implementations instead of inventing new ones. */
-
-  /* Report that wait on the condition variable at address "cv" has succeeded
-     and the lock at address "lock" is held. */
+#include "base/thread_annotations.h"
+
+// All the annotation macros are in effect only in debug mode.
+#ifndef NDEBUG
+
+  // -------------------------------------------------------------
+  // Annotations useful when implementing condition variables such as CondVar,
+  // using conditional critical sections (Await/LockWhen) and when constructing
+  // user-defined synchronization mechanisms.
+  //
+  // The annotations ANNOTATE_HAPPENS_BEFORE() and ANNOTATE_HAPPENS_AFTER() can
+  // be used to define happens-before arcs in user-defined synchronization
+  // mechanisms:  the race detector will infer an arc from the former to the
+  // latter when they share the same argument pointer.
+  //
+  // Example 1 (reference counting):
+  //
+  // void Unref() {
+  //   ANNOTATE_HAPPENS_BEFORE(&refcount_);
+  //   if (AtomicDecrementByOne(&refcount_) == 0) {
+  //     ANNOTATE_HAPPENS_AFTER(&refcount_);
+  //     delete this;
+  //   }
+  // }
+  //
+  // Example 2 (message queue):
+  //
+  // void MyQueue::Put(Type *e) {
+  //   MutexLock lock(&mu_);
+  //   ANNOTATE_HAPPENS_BEFORE(e);
+  //   PutElementIntoMyQueue(e);
+  // }
+  //
+  // Type *MyQueue::Get() {
+  //   MutexLock lock(&mu_);
+  //   Type *e = GetElementFromMyQueue();
+  //   ANNOTATE_HAPPENS_AFTER(e);
+  //   return e;
+  // }
+  //
+  // Note: when possible, please use the existing reference counting and message
+  // queue implementations instead of inventing new ones.
+
+  // Report that wait on the condition variable at address "cv" has succeeded
+  // and the lock at address "lock" is held.
   #define ANNOTATE_CONDVAR_LOCK_WAIT(cv, lock) \
     AnnotateCondVarWait(__FILE__, __LINE__, cv, lock)
 
-  /* Report that wait on the condition variable at "cv" has succeeded.  Variant
-     w/o lock. */
+  // Report that wait on the condition variable at "cv" has succeeded.  Variant
+  // w/o lock.
   #define ANNOTATE_CONDVAR_WAIT(cv) \
     AnnotateCondVarWait(__FILE__, __LINE__, cv, NULL)
 
-  /* Report that we are about to signal on the condition variable at address
-     "cv". */
+  // Report that we are about to signal on the condition variable at address
+  // "cv".
   #define ANNOTATE_CONDVAR_SIGNAL(cv) \
     AnnotateCondVarSignal(__FILE__, __LINE__, cv)
 
-  /* Report that we are about to signal_all on the condition variable at "cv". */
+  // Report that we are about to signal_all on the condition variable at "cv".
   #define ANNOTATE_CONDVAR_SIGNAL_ALL(cv) \
     AnnotateCondVarSignalAll(__FILE__, __LINE__, cv)
 
-  /* Annotations for user-defined synchronization mechanisms. */
+  // Annotations for user-defined synchronization mechanisms.
   #define ANNOTATE_HAPPENS_BEFORE(obj) ANNOTATE_CONDVAR_SIGNAL(obj)
   #define ANNOTATE_HAPPENS_AFTER(obj)  ANNOTATE_CONDVAR_WAIT(obj)
 
-  /* Report that the bytes in the range [pointer, pointer+size) are about
-     to be published safely. The race checker will create a happens-before
-     arc from the call ANNOTATE_PUBLISH_MEMORY_RANGE(pointer, size) to
-     subsequent accesses to this memory.
-     Note: this annotation may not work properly if the race detector uses
-     sampling, i.e. does not observe all memory accesses.
-     */
+  // Report that the bytes in the range [pointer, pointer+size) are about
+  // to be published safely. The race checker will create a happens-before
+  // arc from the call ANNOTATE_PUBLISH_MEMORY_RANGE(pointer, size) to
+  // subsequent accesses to this memory.
   #define ANNOTATE_PUBLISH_MEMORY_RANGE(pointer, size) \
     AnnotatePublishMemoryRange(__FILE__, __LINE__, pointer, size)
 
-  /* DEPRECATED. Don't use it. */
+  // Report that the bytes in the range [pointer, pointer+size) are not shared
+  // between threads any more and can be safely used by the current thread w/o
+  // synchronization. The race checker will create a happens-before arc from
+  // all previous accesses to this memory to this call.
+  //
+  // This annotation could be applied to complex objects, such as STL
+  // containers, with one condition: the accesses to the object itself
+  // and its internal data should not be separated with any synchronization.
+  //
+  // Example that works:
+  //
+  // map<int, int> the_map;
+  // void Thread1() {
+  //   MutexLock lock(&mu);
+  //   // Ok: accesses to the_map and its internal data is not separated by
+  //   // synchronization.
+  //   the_map[1]++;
+  // }
+  // void Thread2() {
+  //   {
+  //     MutexLock lock(&mu);
+  //     ...
+  //     // because of some reason we know that the_map will not be used by
+  //     // other threads any more
+  //     ANNOTATE_UNPUBLISH_MEMORY_RANGE(&the_map, sizeof(the_map));
+  //   }
+  //   the_map->DoSomething();
+  // }
+  //
+  // Example that does not work (due to the way happens-before arcs are
+  // represented in some race detectors):
+  //
+  // void Thread1() {
+  //   MutexLock lock(&mu);
+  //   int *guts_of_the_map = &(*the_map)[1];
+  //   // we have some synchronization between access to 'c' and its guts.
+  //   // This will make ANNOTATE_UNPUBLISH_MEMORY_RANGE in Thread2  useless.
+  //   some_other_lock_or_other_synchronization_utility.Lock();
+  //   (*guts_of_the_map)++;
+  //    ...
+  // }
+  //
+  // void Thread1() { // same as above...
   #define ANNOTATE_UNPUBLISH_MEMORY_RANGE(pointer, size) \
     AnnotateUnpublishMemoryRange(__FILE__, __LINE__, pointer, size)
 
-  /* DEPRECATED. Don't use it. */
+  // This annotation should be used to annotate thread-safe swapping of
+  // containers. Required only when using hybrid (i.e. not pure happens-before)
+  // detectors.
+  //
+  // This annotation has the same limitation as ANNOTATE_UNPUBLISH_MEMORY_RANGE
+  // (see above).
+  //
+  // Example:
+  // map<int, int> the_map;
+  // void Thread1() {
+  //   MutexLock lock(&mu);
+  //   the_map[1]++;
+  // }
+  // void Thread2() {
+  //   map<int,int> tmp;
+  //   {
+  //     MutexLock lock(&mu);
+  //     the_map.swap(tmp);
+  //     ANNOTATE_SWAP_MEMORY_RANGE(&the_map, sizeof(the_map));
+  //   }
+  //   tmp->DoSomething();
+  // }
   #define ANNOTATE_SWAP_MEMORY_RANGE(pointer, size)   \
     do {                                              \
       ANNOTATE_UNPUBLISH_MEMORY_RANGE(pointer, size); \
       ANNOTATE_PUBLISH_MEMORY_RANGE(pointer, size);   \
     } while (0)
 
-  /* Instruct the tool to create a happens-before arc between mu->Unlock() and
-     mu->Lock(). This annotation may slow down the race detector and hide real
-     races. Normally it is used only when it would be difficult to annotate each
-     of the mutex's critical sections individually using the annotations above.
-     This annotation makes sense only for hybrid race detectors. For pure
-     happens-before detectors this is a no-op. For more details see
-     http://code.google.com/p/data-race-test/wiki/PureHappensBeforeVsHybrid . */
+  // Instruct the tool to create a happens-before arc between mu->Unlock() and
+  // mu->Lock(). This annotation may slow down the race detector and hide real
+  // races. Normally it is used only when it would be difficult to annotate each
+  // of the mutex's critical sections individually using the annotations above.
+  // This annotation makes sense only for hybrid race detectors. For pure
+  // happens-before detectors this is a no-op. For more details see
+  // http://code.google.com/p/data-race-test/wiki/PureHappensBeforeVsHybrid .
   #define ANNOTATE_PURE_HAPPENS_BEFORE_MUTEX(mu) \
     AnnotateMutexIsUsedAsCondVar(__FILE__, __LINE__, mu)
 
-  /* Deprecated. Use ANNOTATE_PURE_HAPPENS_BEFORE_MUTEX. */
+  // Deprecated. Use ANNOTATE_PURE_HAPPENS_BEFORE_MUTEX.
   #define ANNOTATE_MUTEX_IS_USED_AS_CONDVAR(mu) \
     AnnotateMutexIsUsedAsCondVar(__FILE__, __LINE__, mu)
 
-  /* -------------------------------------------------------------
-     Annotations useful when defining memory allocators, or when memory that
-     was protected in one way starts to be protected in another. */
+  // -------------------------------------------------------------
+  // Annotations useful when defining memory allocators, or when memory that
+  // was protected in one way starts to be protected in another.
 
-  /* Report that a new memory at "address" of size "size" has been allocated.
-     This might be used when the memory has been retrieved from a free list and
-     is about to be reused, or when a the locking discipline for a variable
-     changes. */
+  // Report that a new memory at "address" of size "size" has been allocated.
+  // This might be used when the memory has been retrieved from a free list and
+  // is about to be reused, or when a the locking discipline for a variable
+  // changes.
   #define ANNOTATE_NEW_MEMORY(address, size) \
     AnnotateNewMemory(__FILE__, __LINE__, address, size)
 
-  /* -------------------------------------------------------------
-     Annotations useful when defining FIFO queues that transfer data between
-     threads. */
+  // -------------------------------------------------------------
+  // Annotations useful when defining FIFO queues that transfer data between
+  // threads.
 
-  /* Report that the producer-consumer queue (such as ProducerConsumerQueue) at
-     address "pcq" has been created.  The ANNOTATE_PCQ_* annotations
-     should be used only for FIFO queues.  For non-FIFO queues use
-     ANNOTATE_HAPPENS_BEFORE (for put) and ANNOTATE_HAPPENS_AFTER (for get). */
+  // Report that the producer-consumer queue (such as ProducerConsumerQueue) at
+  // address "pcq" has been created.  The ANNOTATE_PCQ_* annotations
+  // should be used only for FIFO queues.  For non-FIFO queues use
+  // ANNOTATE_HAPPENS_BEFORE (for put) and ANNOTATE_HAPPENS_AFTER (for get).
   #define ANNOTATE_PCQ_CREATE(pcq) \
     AnnotatePCQCreate(__FILE__, __LINE__, pcq)
 
-  /* Report that the queue at address "pcq" is about to be destroyed. */
+  // Report that the queue at address "pcq" is about to be destroyed.
   #define ANNOTATE_PCQ_DESTROY(pcq) \
     AnnotatePCQDestroy(__FILE__, __LINE__, pcq)
 
-  /* Report that we are about to put an element into a FIFO queue at address
-     "pcq". */
+  // Report that we are about to put an element into a FIFO queue at address
+  // "pcq".
   #define ANNOTATE_PCQ_PUT(pcq) \
     AnnotatePCQPut(__FILE__, __LINE__, pcq)
 
-  /* Report that we've just got an element from a FIFO queue at address "pcq". */
+  // Report that we've just got an element from a FIFO queue at address "pcq".
   #define ANNOTATE_PCQ_GET(pcq) \
     AnnotatePCQGet(__FILE__, __LINE__, pcq)
 
-  /* -------------------------------------------------------------
-     Annotations that suppress errors.  It is usually better to express the
-     program's synchronization using the other annotations, but these can
-     be used when all else fails. */
-
-  /* Report that we may have a benign race at "pointer", with size
-     "sizeof(*(pointer))". "pointer" must be a non-void* pointer.  Insert at the
-     point where "pointer" has been allocated, preferably close to the point
-     where the race happens.  See also ANNOTATE_BENIGN_RACE_STATIC. */
-  #define ANNOTATE_BENIGN_RACE(pointer, description) \
-    AnnotateBenignRaceSized(__FILE__, __LINE__, pointer, \
-                            sizeof(*(pointer)), description)
-
-  /* Same as ANNOTATE_BENIGN_RACE(address, description), but applies to
-     the memory range [address, address+size). */
-  #define ANNOTATE_BENIGN_RACE_SIZED(address, size, description) \
-    AnnotateBenignRaceSized(__FILE__, __LINE__, address, size, description)
-
-  /* Request the analysis tool to ignore all reads in the current thread
-     until ANNOTATE_IGNORE_READS_END is called.
-     Useful to ignore intentional racey reads, while still checking
-     other reads and all writes.
-     See also ANNOTATE_UNPROTECTED_READ. */
+  // -------------------------------------------------------------
+  // Annotations that suppress errors.  It is usually better to express the
+  // program's synchronization using the other annotations, but these can
+  // be used when all else fails.
+
+  // Report that we may have a benign race on at "address".
+  // Insert at the point where "address" has been allocated, preferably close
+  // to the point where the race happens.
+  // See also ANNOTATE_BENIGN_RACE_STATIC.
+  #define ANNOTATE_BENIGN_RACE(address, description) \
+    AnnotateBenignRace(__FILE__, __LINE__, address, description)
+
+  // Request the analysis tool to ignore all reads in the current thread
+  // until ANNOTATE_IGNORE_READS_END is called.
+  // Useful to ignore intentional racey reads, while still checking
+  // other reads and all writes.
+  // See also ANNOTATE_UNPROTECTED_READ.
   #define ANNOTATE_IGNORE_READS_BEGIN() \
     AnnotateIgnoreReadsBegin(__FILE__, __LINE__)
 
-  /* Stop ignoring reads. */
+  // Stop ignoring reads.
   #define ANNOTATE_IGNORE_READS_END() \
     AnnotateIgnoreReadsEnd(__FILE__, __LINE__)
 
-  /* Similar to ANNOTATE_IGNORE_READS_BEGIN, but ignore writes. */
+  // Similar to ANNOTATE_IGNORE_READS_BEGIN, but ignore writes.
   #define ANNOTATE_IGNORE_WRITES_BEGIN() \
     AnnotateIgnoreWritesBegin(__FILE__, __LINE__)
 
-  /* Stop ignoring writes. */
+  // Stop ignoring writes.
   #define ANNOTATE_IGNORE_WRITES_END() \
     AnnotateIgnoreWritesEnd(__FILE__, __LINE__)
 
-  /* Start ignoring all memory accesses (reads and writes). */
+  // Start ignoring all memory accesses (reads and writes).
   #define ANNOTATE_IGNORE_READS_AND_WRITES_BEGIN() \
     do {\
       ANNOTATE_IGNORE_READS_BEGIN();\
       ANNOTATE_IGNORE_WRITES_BEGIN();\
     }while(0)\
 
-  /* Stop ignoring all memory accesses. */
+  // Stop ignoring all memory accesses.
   #define ANNOTATE_IGNORE_READS_AND_WRITES_END() \
     do {\
       ANNOTATE_IGNORE_WRITES_END();\
       ANNOTATE_IGNORE_READS_END();\
     }while(0)\
 
-  /* Enable (enable!=0) or disable (enable==0) race detection for all threads.
-     This annotation could be useful if you want to skip expensive race analysis
-     during some period of program execution, e.g. during initialization. */
-  #define ANNOTATE_ENABLE_RACE_DETECTION(enable) \
-    AnnotateEnableRaceDetection(__FILE__, __LINE__, enable)
+  // -------------------------------------------------------------
+  // Annotations useful for debugging.
 
-  /* -------------------------------------------------------------
-     Annotations useful for debugging. */
-
-  /* Request to trace every access to "address". */
+  // Request to trace every access to "address".
   #define ANNOTATE_TRACE_MEMORY(address) \
     AnnotateTraceMemory(__FILE__, __LINE__, address)
 
-  /* Report the current thread name to a race detector. */
+  // Report the current thread name to a race detector.
   #define ANNOTATE_THREAD_NAME(name) \
     AnnotateThreadName(__FILE__, __LINE__, name)
 
-  /* -------------------------------------------------------------
-     Annotations useful when implementing locks.  They are not
-     normally needed by modules that merely use locks.
-     The "lock" argument is a pointer to the lock object. */
+  // -------------------------------------------------------------
+  // Annotations useful when implementing locks.  They are not
+  // normally needed by modules that merely use locks.
+  // The "lock" argument is a pointer to the lock object.
 
-  /* Report that a lock has been created at address "lock". */
+  // Report that a lock has been created at address "lock".
   #define ANNOTATE_RWLOCK_CREATE(lock) \
     AnnotateRWLockCreate(__FILE__, __LINE__, lock)
 
-  /* Report that the lock at address "lock" is about to be destroyed. */
+  // Report that the lock at address "lock" is about to be destroyed.
   #define ANNOTATE_RWLOCK_DESTROY(lock) \
     AnnotateRWLockDestroy(__FILE__, __LINE__, lock)
 
-  /* Report that the lock at address "lock" has been acquired.
-     is_w=1 for writer lock, is_w=0 for reader lock. */
+  // Report that the lock at address "lock" has been acquired.
+  // is_w=1 for writer lock, is_w=0 for reader lock.
   #define ANNOTATE_RWLOCK_ACQUIRED(lock, is_w) \
     AnnotateRWLockAcquired(__FILE__, __LINE__, lock, is_w)
 
-  /* Report that the lock at address "lock" is about to be released. */
+  // Report that the lock at address "lock" is about to be released.
   #define ANNOTATE_RWLOCK_RELEASED(lock, is_w) \
     AnnotateRWLockReleased(__FILE__, __LINE__, lock, is_w)
 
-  /* -------------------------------------------------------------
-     Annotations useful when implementing barriers.  They are not
-     normally needed by modules that merely use barriers.
-     The "barrier" argument is a pointer to the barrier object. */
-
-  /* Report that the "barrier" has been initialized with initial "count".
-   If 'reinitialization_allowed' is true, initialization is allowed to happen
-   multiple times w/o calling barrier_destroy() */
-  #define ANNOTATE_BARRIER_INIT(barrier, count, reinitialization_allowed) \
-    AnnotateBarrierInit(__FILE__, __LINE__, barrier, count, \
-                        reinitialization_allowed)
-
-  /* Report that we are about to enter barrier_wait("barrier"). */
-  #define ANNOTATE_BARRIER_WAIT_BEFORE(barrier) \
-    AnnotateBarrierWaitBefore(__FILE__, __LINE__, barrier)
-
-  /* Report that we just exited barrier_wait("barrier"). */
-  #define ANNOTATE_BARRIER_WAIT_AFTER(barrier) \
-    AnnotateBarrierWaitAfter(__FILE__, __LINE__, barrier)
-
-  /* Report that the "barrier" has been destroyed. */
-  #define ANNOTATE_BARRIER_DESTROY(barrier) \
-    AnnotateBarrierDestroy(__FILE__, __LINE__, barrier)
-
-  /* -------------------------------------------------------------
-     Annotations useful for testing race detectors. */
+  // -------------------------------------------------------------
+  // Annotations useful for testing race detectors.
 
-  /* Report that we expect a race on the variable at "address".
-     Use only in unit tests for a race detector. */
+  // Report that we expect a race on the variable at "address".
+  // Use only in unit tests for a race detector.
   #define ANNOTATE_EXPECT_RACE(address, description) \
     AnnotateExpectRace(__FILE__, __LINE__, address, description)
 
-  /* A no-op. Insert where you like to test the interceptors. */
+  // A no-op. Insert where you like to test the interceptors.
   #define ANNOTATE_NO_OP(arg) \
     AnnotateNoOp(__FILE__, __LINE__, arg)
 
-  /* Force the race detector to flush its state. The actual effect depends on
-   * the implementation of the detector. */
-  #define ANNOTATE_FLUSH_STATE() \
-    AnnotateFlushState(__FILE__, __LINE__)
-
-
-#else  /* DYNAMIC_ANNOTATIONS_ENABLED == 0 */
-
-  #define ANNOTATE_RWLOCK_CREATE(lock) /* empty */
-  #define ANNOTATE_RWLOCK_DESTROY(lock) /* empty */
-  #define ANNOTATE_RWLOCK_ACQUIRED(lock, is_w) /* empty */
-  #define ANNOTATE_RWLOCK_RELEASED(lock, is_w) /* empty */
-  #define ANNOTATE_BARRIER_INIT(barrier, count, reinitialization_allowed) /* */
-  #define ANNOTATE_BARRIER_WAIT_BEFORE(barrier) /* empty */
-  #define ANNOTATE_BARRIER_WAIT_AFTER(barrier) /* empty */
-  #define ANNOTATE_BARRIER_DESTROY(barrier) /* empty */
-  #define ANNOTATE_CONDVAR_LOCK_WAIT(cv, lock) /* empty */
-  #define ANNOTATE_CONDVAR_WAIT(cv) /* empty */
-  #define ANNOTATE_CONDVAR_SIGNAL(cv) /* empty */
-  #define ANNOTATE_CONDVAR_SIGNAL_ALL(cv) /* empty */
-  #define ANNOTATE_HAPPENS_BEFORE(obj) /* empty */
-  #define ANNOTATE_HAPPENS_AFTER(obj) /* empty */
-  #define ANNOTATE_PUBLISH_MEMORY_RANGE(address, size) /* empty */
-  #define ANNOTATE_UNPUBLISH_MEMORY_RANGE(address, size)  /* empty */
-  #define ANNOTATE_SWAP_MEMORY_RANGE(address, size)  /* empty */
-  #define ANNOTATE_PCQ_CREATE(pcq) /* empty */
-  #define ANNOTATE_PCQ_DESTROY(pcq) /* empty */
-  #define ANNOTATE_PCQ_PUT(pcq) /* empty */
-  #define ANNOTATE_PCQ_GET(pcq) /* empty */
-  #define ANNOTATE_NEW_MEMORY(address, size) /* empty */
-  #define ANNOTATE_EXPECT_RACE(address, description) /* empty */
-  #define ANNOTATE_BENIGN_RACE(address, description) /* empty */
-  #define ANNOTATE_BENIGN_RACE_SIZED(address, size, description) /* empty */
-  #define ANNOTATE_PURE_HAPPENS_BEFORE_MUTEX(mu) /* empty */
-  #define ANNOTATE_MUTEX_IS_USED_AS_CONDVAR(mu) /* empty */
-  #define ANNOTATE_TRACE_MEMORY(arg) /* empty */
-  #define ANNOTATE_THREAD_NAME(name) /* empty */
-  #define ANNOTATE_IGNORE_READS_BEGIN() /* empty */
-  #define ANNOTATE_IGNORE_READS_END() /* empty */
-  #define ANNOTATE_IGNORE_WRITES_BEGIN() /* empty */
-  #define ANNOTATE_IGNORE_WRITES_END() /* empty */
-  #define ANNOTATE_IGNORE_READS_AND_WRITES_BEGIN() /* empty */
-  #define ANNOTATE_IGNORE_READS_AND_WRITES_END() /* empty */
-  #define ANNOTATE_ENABLE_RACE_DETECTION(enable) /* empty */
-  #define ANNOTATE_NO_OP(arg) /* empty */
-  #define ANNOTATE_FLUSH_STATE() /* empty */
-
-#endif  /* DYNAMIC_ANNOTATIONS_ENABLED */
-
-/* Use the macros above rather than using these functions directly. */
-#ifdef __cplusplus
-extern "C" {
-#endif
-void AnnotateRWLockCreate(const char *file, int line,
-                          const volatile void *lock);
-void AnnotateRWLockDestroy(const char *file, int line,
-                           const volatile void *lock);
-void AnnotateRWLockAcquired(const char *file, int line,
-                            const volatile void *lock, long is_w);
-void AnnotateRWLockReleased(const char *file, int line,
-                            const volatile void *lock, long is_w);
-void AnnotateBarrierInit(const char *file, int line,
-                         const volatile void *barrier, long count,
-                         long reinitialization_allowed);
-void AnnotateBarrierWaitBefore(const char *file, int line,
-                               const volatile void *barrier);
-void AnnotateBarrierWaitAfter(const char *file, int line,
-                              const volatile void *barrier);
-void AnnotateBarrierDestroy(const char *file, int line,
-                            const volatile void *barrier);
-void AnnotateCondVarWait(const char *file, int line,
-                         const volatile void *cv,
-                         const volatile void *lock);
-void AnnotateCondVarSignal(const char *file, int line,
-                           const volatile void *cv);
-void AnnotateCondVarSignalAll(const char *file, int line,
-                              const volatile void *cv);
-void AnnotatePublishMemoryRange(const char *file, int line,
-                                const volatile void *address,
-                                long size);
-void AnnotateUnpublishMemoryRange(const char *file, int line,
+#else  // NDEBUG is defined
+
+  #define ANNOTATE_RWLOCK_CREATE(lock) // empty
+  #define ANNOTATE_RWLOCK_DESTROY(lock) // empty
+  #define ANNOTATE_RWLOCK_ACQUIRED(lock, is_w) // empty
+  #define ANNOTATE_RWLOCK_RELEASED(lock, is_w) // empty
+  #define ANNOTATE_CONDVAR_LOCK_WAIT(cv, lock) // empty
+  #define ANNOTATE_CONDVAR_WAIT(cv) // empty
+  #define ANNOTATE_CONDVAR_SIGNAL(cv) // empty
+  #define ANNOTATE_CONDVAR_SIGNAL_ALL(cv) // empty
+  #define ANNOTATE_HAPPENS_BEFORE(obj) // empty
+  #define ANNOTATE_HAPPENS_AFTER(obj) // empty
+  #define ANNOTATE_PUBLISH_MEMORY_RANGE(address, size) // empty
+  #define ANNOTATE_UNPUBLISH_MEMORY_RANGE(address, size)  // empty
+  #define ANNOTATE_SWAP_MEMORY_RANGE(address, size)  // empty
+  #define ANNOTATE_PCQ_CREATE(pcq) // empty
+  #define ANNOTATE_PCQ_DESTROY(pcq) // empty
+  #define ANNOTATE_PCQ_PUT(pcq) // empty
+  #define ANNOTATE_PCQ_GET(pcq) // empty
+  #define ANNOTATE_NEW_MEMORY(address, size) // empty
+  #define ANNOTATE_EXPECT_RACE(address, description) // empty
+  #define ANNOTATE_BENIGN_RACE(address, description) // empty
+  #define ANNOTATE_PURE_HAPPENS_BEFORE_MUTEX(mu) // empty
+  #define ANNOTATE_MUTEX_IS_USED_AS_CONDVAR(mu) // empty
+  #define ANNOTATE_TRACE_MEMORY(arg) // empty
+  #define ANNOTATE_THREAD_NAME(name) // empty
+  #define ANNOTATE_IGNORE_READS_BEGIN() // empty
+  #define ANNOTATE_IGNORE_READS_END() // empty
+  #define ANNOTATE_IGNORE_WRITES_BEGIN() // empty
+  #define ANNOTATE_IGNORE_WRITES_END() // empty
+  #define ANNOTATE_IGNORE_READS_AND_WRITES_BEGIN() // empty
+  #define ANNOTATE_IGNORE_READS_AND_WRITES_END() // empty
+  #define ANNOTATE_NO_OP(arg) // empty
+
+#endif  // NDEBUG
+
+// Use the macros above rather than using these functions directly.
+extern "C" void AnnotateRWLockCreate(const char *file, int line,
+                                     const volatile void *lock);
+extern "C" void AnnotateRWLockDestroy(const char *file, int line,
+                                      const volatile void *lock);
+extern "C" void AnnotateRWLockAcquired(const char *file, int line,
+                                       const volatile void *lock, long is_w);
+extern "C" void AnnotateRWLockReleased(const char *file, int line,
+                                       const volatile void *lock, long is_w);
+extern "C" void AnnotateCondVarWait(const char *file, int line,
+                                    const volatile void *cv,
+                                    const volatile void *lock);
+extern "C" void AnnotateCondVarSignal(const char *file, int line,
+                                      const volatile void *cv);
+extern "C" void AnnotateCondVarSignalAll(const char *file, int line,
+                                         const volatile void *cv);
+extern "C" void AnnotatePublishMemoryRange(const char *file, int line,
+                                           const volatile void *address,
+                                           long size);
+extern "C" void AnnotateUnpublishMemoryRange(const char *file, int line,
+                                           const volatile void *address,
+                                           long size);
+extern "C" void AnnotatePCQCreate(const char *file, int line,
+                                  const volatile void *pcq);
+extern "C" void AnnotatePCQDestroy(const char *file, int line,
+                                   const volatile void *pcq);
+extern "C" void AnnotatePCQPut(const char *file, int line,
+                               const volatile void *pcq);
+extern "C" void AnnotatePCQGet(const char *file, int line,
+                               const volatile void *pcq);
+extern "C" void AnnotateNewMemory(const char *file, int line,
                                   const volatile void *address,
                                   long size);
-void AnnotatePCQCreate(const char *file, int line,
-                       const volatile void *pcq);
-void AnnotatePCQDestroy(const char *file, int line,
-                        const volatile void *pcq);
-void AnnotatePCQPut(const char *file, int line,
-                    const volatile void *pcq);
-void AnnotatePCQGet(const char *file, int line,
-                    const volatile void *pcq);
-void AnnotateNewMemory(const char *file, int line,
-                       const volatile void *address,
-                       long size);
-void AnnotateExpectRace(const char *file, int line,
-                        const volatile void *address,
-                        const char *description);
-void AnnotateBenignRace(const char *file, int line,
-                        const volatile void *address,
-                        const char *description);
-void AnnotateBenignRaceSized(const char *file, int line,
-                        const volatile void *address,
-                        long size,
-                        const char *description);
-void AnnotateMutexIsUsedAsCondVar(const char *file, int line,
-                                  const volatile void *mu);
-void AnnotateTraceMemory(const char *file, int line,
-                         const volatile void *arg);
-void AnnotateThreadName(const char *file, int line,
-                        const char *name);
-void AnnotateIgnoreReadsBegin(const char *file, int line);
-void AnnotateIgnoreReadsEnd(const char *file, int line);
-void AnnotateIgnoreWritesBegin(const char *file, int line);
-void AnnotateIgnoreWritesEnd(const char *file, int line);
-void AnnotateEnableRaceDetection(const char *file, int line, int enable);
-void AnnotateNoOp(const char *file, int line,
-                  const volatile void *arg);
-void AnnotateFlushState(const char *file, int line);
-
-/* Return non-zero value if running under valgrind.
-
-  If "valgrind.h" is included into dynamic_annotations.c,
-  the regular valgrind mechanism will be used.
-  See http://valgrind.org/docs/manual/manual-core-adv.html about
-  RUNNING_ON_VALGRIND and other valgrind "client requests".
-  The file "valgrind.h" may be obtained by doing
-     svn co svn://svn.valgrind.org/valgrind/trunk/include
-
-  If for some reason you can't use "valgrind.h" or want to fake valgrind,
-  there are two ways to make this function return non-zero:
-    - Use environment variable: export RUNNING_ON_VALGRIND=1
-    - Make your tool intercept the function RunningOnValgrind() and
-      change its return value.
- */
-int RunningOnValgrind(void);
-
-#ifdef __cplusplus
-}
-#endif
-
-#if DYNAMIC_ANNOTATIONS_ENABLED != 0 && defined(__cplusplus)
-
-  /* ANNOTATE_UNPROTECTED_READ is the preferred way to annotate racey reads.
-
-     Instead of doing
-        ANNOTATE_IGNORE_READS_BEGIN();
-        ... = x;
-        ANNOTATE_IGNORE_READS_END();
-     one can use
-        ... = ANNOTATE_UNPROTECTED_READ(x); */
+extern "C" void AnnotateExpectRace(const char *file, int line,
+                                   const volatile void *address,
+                                   const char *description);
+extern "C" void AnnotateBenignRace(const char *file, int line,
+                                   const volatile void *address,
+                                   const char *description);
+extern "C" void AnnotateMutexIsUsedAsCondVar(const char *file, int line,
+                                            const volatile void *mu);
+extern "C" void AnnotateTraceMemory(const char *file, int line,
+                                    const volatile void *arg);
+extern "C" void AnnotateThreadName(const char *file, int line,
+                                   const char *name);
+extern "C" void AnnotateIgnoreReadsBegin(const char *file, int line);
+extern "C" void AnnotateIgnoreReadsEnd(const char *file, int line);
+extern "C" void AnnotateIgnoreWritesBegin(const char *file, int line);
+extern "C" void AnnotateIgnoreWritesEnd(const char *file, int line);
+extern "C" void AnnotateNoOp(const char *file, int line,
+                             const volatile void *arg);
+
+#ifndef NDEBUG
+
+  // ANNOTATE_UNPROTECTED_READ is the preferred way to annotate racey reads.
+  //
+  // Instead of doing
+  //    ANNOTATE_IGNORE_READS_BEGIN();
+  //    ... = x;
+  //    ANNOTATE_IGNORE_READS_END();
+  // one can use
+  //    ... = ANNOTATE_UNPROTECTED_READ(x);
   template <class T>
-  inline T ANNOTATE_UNPROTECTED_READ(const volatile T &x) {
+  inline T ANNOTATE_UNPROTECTED_READ(const volatile T &x)
+       NO_THREAD_SAFETY_ANALYSIS {
     ANNOTATE_IGNORE_READS_BEGIN();
     T res = x;
     ANNOTATE_IGNORE_READS_END();
     return res;
   }
-  /* Apply ANNOTATE_BENIGN_RACE_SIZED to a static variable. */
+
+  // Apply ANNOTATE_BENIGN_RACE to a static variable.
   #define ANNOTATE_BENIGN_RACE_STATIC(static_var, description)        \
     namespace {                                                       \
       class static_var ## _annotator {                                \
        public:                                                        \
         static_var ## _annotator() {                                  \
-          ANNOTATE_BENIGN_RACE_SIZED(&static_var,                     \
-                                      sizeof(static_var),             \
+          ANNOTATE_BENIGN_RACE(&static_var,                           \
             # static_var ": " description);                           \
         }                                                             \
       };                                                              \
       static static_var ## _annotator the ## static_var ## _annotator;\
     }
-#else /* DYNAMIC_ANNOTATIONS_ENABLED == 0 */
+#else // !NDEBUG
 
   #define ANNOTATE_UNPROTECTED_READ(x) (x)
-  #define ANNOTATE_BENIGN_RACE_STATIC(static_var, description)  /* empty */
+  #define ANNOTATE_BENIGN_RACE_STATIC(static_var, description)  // empty
+
+#endif // !NDEBUG
+
+// Return non-zero value if running under valgrind.
+extern "C" int RunningOnValgrind();
 
-#endif /* DYNAMIC_ANNOTATIONS_ENABLED */
 
-#endif  /* BASE_DYNAMIC_ANNOTATIONS_H_ */
+#endif  // BASE_DYNAMIC_ANNOTATIONS_H_
diff --git a/third_party/tcmalloc/chromium/src/base/low_level_alloc.cc b/third_party/tcmalloc/chromium/src/base/low_level_alloc.cc
index 7ca3953a..2bbce54 100644
--- a/third_party/tcmalloc/chromium/src/base/low_level_alloc.cc
+++ b/third_party/tcmalloc/chromium/src/base/low_level_alloc.cc
@@ -210,9 +210,8 @@ static const intptr_t kMagicUnallocated = ~kMagicAllocated;
 namespace {
   class ArenaLock {
    public:
-    explicit ArenaLock(LowLevelAlloc::Arena *arena)
-        EXCLUSIVE_LOCK_FUNCTION(arena->mu)
-        : left_(false), mask_valid_(false), arena_(arena) {
+    explicit ArenaLock(LowLevelAlloc::Arena *arena) :
+        left_(false), mask_valid_(false), arena_(arena) {
       if ((arena->flags & LowLevelAlloc::kAsyncSignalSafe) != 0) {
       // We've decided not to support async-signal-safe arena use until
       // there a demonstrated need.  Here's how one could do it though
@@ -229,7 +228,7 @@ namespace {
       this->arena_->mu.Lock();
     }
     ~ArenaLock() { RAW_CHECK(this->left_, "haven't left Arena region"); }
-    void Leave() UNLOCK_FUNCTION(arena_->mu) {
+    void Leave() {
       this->arena_->mu.Unlock();
 #if 0
       if (this->mask_valid_) {
diff --git a/third_party/tcmalloc/chromium/src/base/vdso_support.cc b/third_party/tcmalloc/chromium/src/base/vdso_support.cc
index fce7c2c..ddaca37 100644
--- a/third_party/tcmalloc/chromium/src/base/vdso_support.cc
+++ b/third_party/tcmalloc/chromium/src/base/vdso_support.cc
@@ -42,8 +42,8 @@
 #include <fcntl.h>
 
 #include "base/atomicops.h"  // for MemoryBarrier
-#include "base/linux_syscall_support.h"
 #include "base/logging.h"
+#include "base/linux_syscall_support.h"
 #include "base/dynamic_annotations.h"
 #include "base/basictypes.h"  // for COMPILE_ASSERT
 
diff --git a/third_party/tcmalloc/chromium/src/central_freelist.cc b/third_party/tcmalloc/chromium/src/central_freelist.cc
index 5b7dfbb..674ff9b 100644
--- a/third_party/tcmalloc/chromium/src/central_freelist.cc
+++ b/third_party/tcmalloc/chromium/src/central_freelist.cc
@@ -266,7 +266,8 @@ void CentralFreeList::Populate() {
   Span* span;
   {
     SpinLockHolder h(Static::pageheap_lock());
-    span = Static::pageheap()->New(npages, size_class_, kPageSize);
+    span = Static::pageheap()->New(npages);
+    if (span) Static::pageheap()->RegisterSizeClass(span, size_class_);
   }
   if (span == NULL) {
     MESSAGE("tcmalloc: allocation failed", npages << kPageShift);
@@ -274,6 +275,12 @@ void CentralFreeList::Populate() {
     return;
   }
   ASSERT(span->length == npages);
+  // Cache sizeclass info eagerly.  Locking is not necessary.
+  // (Instead of being eager, we could just replace any stale info
+  // about this span, but that seems to be no better in practice.)
+  for (int i = 0; i < npages; i++) {
+    Static::pageheap()->CacheSizeClass(span->start + i, size_class_);
+  }
 
   // Split the block into pieces and add to the free-list
   // TODO: coloring of objects to avoid cache conflicts?
diff --git a/third_party/tcmalloc/chromium/src/common.h b/third_party/tcmalloc/chromium/src/common.h
index f9557c9..53a0a0b 100644
--- a/third_party/tcmalloc/chromium/src/common.h
+++ b/third_party/tcmalloc/chromium/src/common.h
@@ -62,7 +62,6 @@ static const size_t kPageSize   = 1 << kPageShift;
 static const size_t kMaxSize    = 8u * kPageSize;
 static const size_t kAlignment  = 8;
 static const size_t kNumClasses = 61;
-static const size_t kLargeSizeClass = 0;
 
 // Maximum length we allow a per-thread free-list to have before we
 // move objects from it into the corresponding central free-list.  We
diff --git a/third_party/tcmalloc/chromium/src/config.h.in b/third_party/tcmalloc/chromium/src/config.h.in
index 49bbf0d..1ad2642 100644
--- a/third_party/tcmalloc/chromium/src/config.h.in
+++ b/third_party/tcmalloc/chromium/src/config.h.in
@@ -132,7 +132,7 @@
 /* Define to 1 if you have the <sys/types.h> header file. */
 #undef HAVE_SYS_TYPES_H
 
-/* <sys/ucontext.h> is broken on redhat 7 */
+/* Define to 1 if you have the <sys/ucontext.h> header file. */
 #undef HAVE_SYS_UCONTEXT_H
 
 /* Define to 1 if you have the <sys/wait.h> header file. */
@@ -150,9 +150,6 @@
 /* Define to 1 if you have the <unwind.h> header file. */
 #undef HAVE_UNWIND_H
 
-/* Define to 1 if you have the <valgrind.h> header file. */
-#undef HAVE_VALGRIND_H
-
 /* define if your compiler has __attribute__ */
 #undef HAVE___ATTRIBUTE__
 
diff --git a/third_party/tcmalloc/chromium/src/config_linux.h b/third_party/tcmalloc/chromium/src/config_linux.h
index 9786b3e..398f303 100644
--- a/third_party/tcmalloc/chromium/src/config_linux.h
+++ b/third_party/tcmalloc/chromium/src/config_linux.h
@@ -136,7 +136,7 @@
 /* Define to 1 if compiler supports __thread */
 #define HAVE_TLS 1
 
-/* <sys/ucontext.h> is broken on redhat 7 */
+/* Define to 1 if you have the <ucontext.h> header file. */
 #define HAVE_UCONTEXT_H 1
 
 /* Define to 1 if you have the <unistd.h> header file. */
@@ -145,9 +145,6 @@
 /* Define to 1 if you have the <unwind.h> header file. */
 #define HAVE_UNWIND_H 1
 
-/* Define to 1 if you have the <valgrind.h> header file. */
-#undef HAVE_VALGRIND_H
-
 /* define if your compiler has __attribute__ */
 #define HAVE___ATTRIBUTE__ 1
 
diff --git a/third_party/tcmalloc/chromium/src/config_win.h b/third_party/tcmalloc/chromium/src/config_win.h
index 236bd6b..30daf4f 100644
--- a/third_party/tcmalloc/chromium/src/config_win.h
+++ b/third_party/tcmalloc/chromium/src/config_win.h
@@ -255,12 +255,10 @@
 // ---------------------------------------------------------------------
 // Extra stuff not found in config.h.in
 
-// This must be defined before the windows.h is included.  We need at
-// least 0x0400 for mutex.h to have access to TryLock, and at least
-// 0x0501 for patch_functions.cc to have access to GetModuleHandleEx.
-// (This latter is an optimization we could take out if need be.)
+// This must be defined before the windows.h is included.  It's needed
+// for mutex.h, to give access to the TryLock method.
 #ifndef _WIN32_WINNT
-# define _WIN32_WINNT 0x0501
+# define _WIN32_WINNT 0x0400
 #endif
 
 // We want to make sure not to ever try to #include heap-checker.h
diff --git a/third_party/tcmalloc/chromium/src/debugallocation.cc b/third_party/tcmalloc/chromium/src/debugallocation.cc
index 949fbe9..1a9ddcb 100644
--- a/third_party/tcmalloc/chromium/src/debugallocation.cc
+++ b/third_party/tcmalloc/chromium/src/debugallocation.cc
@@ -1010,7 +1010,7 @@ static void *MemalignOverride(size_t align, size_t size,
                               const void *caller) __THROW
   ATTRIBUTE_SECTION(google_malloc);
 
-void* operator new(size_t size) throw (std::bad_alloc)
+void* operator new(size_t size)
   ATTRIBUTE_SECTION(google_malloc);
 void* operator new(size_t size, const std::nothrow_t&) __THROW
   ATTRIBUTE_SECTION(google_malloc);
@@ -1018,7 +1018,7 @@ void operator delete(void* p) __THROW
   ATTRIBUTE_SECTION(google_malloc);
 void operator delete(void* p, const std::nothrow_t&) __THROW
   ATTRIBUTE_SECTION(google_malloc);
-void* operator new[](size_t size) throw (std::bad_alloc)
+void* operator new[](size_t size)
   ATTRIBUTE_SECTION(google_malloc);
 void* operator new[](size_t size, const std::nothrow_t&) __THROW
   ATTRIBUTE_SECTION(google_malloc);
@@ -1176,12 +1176,12 @@ extern "C" void* pvalloc(size_t size) __THROW {
   return p;
 }
 
-extern "C" int mallopt(int cmd, int value) __THROW {
+extern "C" int mallopt(int cmd, int value) {
   return BASE_MALLOPT(cmd, value);
 }
 
 #ifdef HAVE_STRUCT_MALLINFO
-extern "C" struct mallinfo mallinfo(void) __THROW {
+extern "C" struct mallinfo mallinfo(void) {
   return BASE_MALLINFO();
 }
 #endif
@@ -1239,7 +1239,7 @@ inline void* cpp_debug_alloc(size_t size, int new_type, bool nothrow) {
   }
 }
 
-void* operator new(size_t size) throw (std::bad_alloc) {
+void* operator new(size_t size) {
   void* ptr = cpp_debug_alloc(size, MallocBlock::kNewType, false);
   MallocHook::InvokeNewHook(ptr, size);
   if (ptr == NULL) {
@@ -1259,8 +1259,7 @@ void operator delete(void* ptr) __THROW {
   DebugDeallocate(ptr, MallocBlock::kNewType);
 }
 
-// Some STL implementations explicitly invoke this.
-// It is completely equivalent to a normal delete (delete never throws).
+// Compilers use this, though I can't see how it differs from normal delete.
 void operator delete(void* ptr, const std::nothrow_t&) __THROW {
   MallocHook::InvokeDeleteHook(ptr);
   DebugDeallocate(ptr, MallocBlock::kNewType);
@@ -1270,7 +1269,7 @@ void operator delete(void* ptr, const std::nothrow_t&) __THROW {
 
 // Alloc/free stuff for debug operator new[] & friends
 
-void* operator new[](size_t size) throw (std::bad_alloc) {
+void* operator new[](size_t size) {
   void* ptr = cpp_debug_alloc(size, MallocBlock::kArrayNewType, false);
   MallocHook::InvokeNewHook(ptr, size);
   if (ptr == NULL) {
@@ -1290,8 +1289,7 @@ void operator delete[](void* ptr) __THROW {
   DebugDeallocate(ptr, MallocBlock::kArrayNewType);
 }
 
-// Some STL implementations explicitly invoke this.
-// It is completely equivalent to a normal delete (delete never throws).
+// Compilers use this, though I can't see how it differs from normal delete.
 void operator delete[](void* ptr, const std::nothrow_t&) __THROW {
   MallocHook::InvokeDeleteHook(ptr);
   DebugDeallocate(ptr, MallocBlock::kArrayNewType);
@@ -1361,22 +1359,17 @@ class DebugMallocImplementation : public ParentImplementation {
 static DebugMallocImplementation debug_malloc_implementation;
 
 REGISTER_MODULE_INITIALIZER(debugallocation, {
-  // Either we or valgrind will control memory management.  We
-  // register our extension if we're the winner.
-  if (RunningOnValgrind()) {
-    // Let Valgrind uses its own malloc (so don't register our extension).
-  } else {
-    MallocExtension::Register(&debug_malloc_implementation);
-    // When the program exits, check all blocks still in the free
-    // queue for corruption.
-    atexit(DanglingWriteChecker);
-  }
+  MallocExtension::Register(&debug_malloc_implementation);
+
+  // When the program exits, check all blocks still in the free queue for
+  // corruption.
+  atexit(DanglingWriteChecker);
 });
 
 #ifdef TCMALLOC_FOR_DEBUGALLOCATION
 
 // Redefine malloc_stats to use tcmalloc's implementation:
-extern "C" void malloc_stats(void) __THROW {
+extern "C" void malloc_stats(void) {
   do_malloc_stats();
 }
 
diff --git a/third_party/tcmalloc/chromium/src/google/heap-profiler.h b/third_party/tcmalloc/chromium/src/google/heap-profiler.h
index 57cb97a..5efaf64 100644
--- a/third_party/tcmalloc/chromium/src/google/heap-profiler.h
+++ b/third_party/tcmalloc/chromium/src/google/heap-profiler.h
@@ -71,13 +71,12 @@ extern "C" {
  */
 PERFTOOLS_DLL_DECL void HeapProfilerStart(const char* prefix);
 
-/* Returns non-zero if we are currently profiling the heap.  (Returns
- * an int rather than a bool so it's usable from C.)  This is true
+/* Returns true if we are currently profiling the heap.  This is true
  * between calls to HeapProfilerStart() and HeapProfilerStop(), and
  * also if the program has been run with HEAPPROFILER, or some other
  * way to turn on whole-program profiling.
  */
-int IsHeapProfilerRunning();
+bool IsHeapProfilerRunning();
 
 /* Stop heap profiling.  Can be restarted again with HeapProfilerStart(),
  * but the currently accumulated profiling information will be cleared.
diff --git a/third_party/tcmalloc/chromium/src/google/profiler.h b/third_party/tcmalloc/chromium/src/google/profiler.h
index a6883f4..74b936f 100644
--- a/third_party/tcmalloc/chromium/src/google/profiler.h
+++ b/third_party/tcmalloc/chromium/src/google/profiler.h
@@ -108,15 +108,13 @@ struct ProfilerOptions {
   void *filter_in_thread_arg;
 };
 
-/* Start profiling and write profile info into fname, discarding any
- * existing profiling data in that file.
+/* Start profiling and write profile info into fname.
  *
  * This is equivalent to calling ProfilerStartWithOptions(fname, NULL).
  */
 PERFTOOLS_DLL_DECL int ProfilerStart(const char* fname);
 
-/* Start profiling and write profile into fname, discarding any
- * existing profiling data in that file.
+/* Start profiling and write profile into fname.
  *
  * The profiler is configured using the options given by 'options'.
  * Options which are not specified are given default values.
diff --git a/third_party/tcmalloc/chromium/src/google/stacktrace.h b/third_party/tcmalloc/chromium/src/google/stacktrace.h
index fd186d6..8188ce3 100644
--- a/third_party/tcmalloc/chromium/src/google/stacktrace.h
+++ b/third_party/tcmalloc/chromium/src/google/stacktrace.h
@@ -49,23 +49,23 @@
 // Skips the most recent "skip_count" stack frames (also skips the
 // frame generated for the "GetStackFrames" routine itself), and then
 // records the pc values for up to the next "max_depth" frames in
-// "result", and the corresponding stack frame sizes in "sizes".
-// Returns the number of values recorded in "result"/"sizes".
+// "pcs", and the corresponding stack frame sizes in "sizes".  Returns
+// the number of values recorded in "pcs"/"sizes".
 //
 // Example:
 //      main() { foo(); }
 //      foo() { bar(); }
 //      bar() {
-//        void* result[10];
+//        void* pcs[10];
 //        int sizes[10];
-//        int depth = GetStackFrames(result, sizes, 10, 1);
+//        int depth = GetStackFrames(pcs, sizes, 10, 1);
 //      }
 //
 // The GetStackFrames call will skip the frame for "bar".  It will
 // return 2 and will produce pc values that map to the following
 // procedures:
-//      result[0]       foo
-//      result[1]       main
+//      pcs[0]       foo
+//      pcs[1]       main
 // (Actually, there may be a few more entries after "main" to account for
 // startup procedures.)
 // And corresponding stack frame sizes will also be recorded:
@@ -76,15 +76,15 @@
 // be identified.
 //
 // This routine may return fewer stack frame entries than are
-// available. Also note that "result" and "sizes" must both be non-NULL.
-extern PERFTOOLS_DLL_DECL int GetStackFrames(void** result, int* sizes, int max_depth,
+// available. Also note that "pcs" and "sizes" must both be non-NULL.
+extern PERFTOOLS_DLL_DECL int GetStackFrames(void** pcs, int* sizes, int max_depth,
                           int skip_count);
 
 // Same as above, but to be used from a signal handler. The "uc" parameter
 // should be the pointer to ucontext_t which was passed as the 3rd parameter
 // to sa_sigaction signal handler. It may help the unwinder to get a
 // better stack trace under certain conditions. The "uc" may safely be NULL.
-extern PERFTOOLS_DLL_DECL int GetStackFramesWithContext(void** result, int* sizes, int max_depth,
+extern PERFTOOLS_DLL_DECL int GetStackFramesWithContext(void** pcs, int* sizes, int max_depth,
                                      int skip_count, const void *uc);
 
 // This is similar to the GetStackFrames routine, except that it returns
diff --git a/third_party/tcmalloc/chromium/src/google/tcmalloc.h.in b/third_party/tcmalloc/chromium/src/google/tcmalloc.h.in
index fbb70ab..e5c873d 100644
--- a/third_party/tcmalloc/chromium/src/google/tcmalloc.h.in
+++ b/third_party/tcmalloc/chromium/src/google/tcmalloc.h.in
@@ -60,8 +60,7 @@
 #endif
 
 #ifdef __cplusplus
-#include <new>          // for std::nothrow_t
-
+#include <new>  // for nothrow_t
 extern "C" {
 #endif
   // Returns a human-readable version string.  If major, minor,
@@ -92,15 +91,16 @@ extern "C" {
 #ifdef __cplusplus
   PERFTOOLS_DLL_DECL int tc_set_new_mode(int flag) __THROW;
   PERFTOOLS_DLL_DECL void* tc_new(size_t size);
-  PERFTOOLS_DLL_DECL void* tc_new_nothrow(size_t size,
-                                          const std::nothrow_t&) __THROW;
   PERFTOOLS_DLL_DECL void tc_delete(void* p) __THROW;
-  PERFTOOLS_DLL_DECL void tc_delete_nothrow(void* p,
-                                            const std::nothrow_t&) __THROW;
   PERFTOOLS_DLL_DECL void* tc_newarray(size_t size);
+  PERFTOOLS_DLL_DECL void tc_deletearray(void* p) __THROW;
+
+  PERFTOOLS_DLL_DECL void* tc_new_nothrow(size_t size,
+                                          const std::nothrow_t&) __THROW;
   PERFTOOLS_DLL_DECL void* tc_newarray_nothrow(size_t size,
                                                const std::nothrow_t&) __THROW;
-  PERFTOOLS_DLL_DECL void tc_deletearray(void* p) __THROW;
+  PERFTOOLS_DLL_DECL void tc_delete_nothrow(void* p,
+                                            const std::nothrow_t&) __THROW;
   PERFTOOLS_DLL_DECL void tc_deletearray_nothrow(void* p,
                                                  const std::nothrow_t&) __THROW;
 }
diff --git a/third_party/tcmalloc/chromium/src/heap-checker.cc b/third_party/tcmalloc/chromium/src/heap-checker.cc
index 2779c97..82a7adb 100644
--- a/third_party/tcmalloc/chromium/src/heap-checker.cc
+++ b/third_party/tcmalloc/chromium/src/heap-checker.cc
@@ -159,23 +159,6 @@ DEFINE_bool(heap_check_test_pointer_alignment,
             "Set to true to check if the found leak can be due to "
             "use of unaligned pointers");
 
-// Alignment at which all pointers in memory are supposed to be located;
-// use 1 if any alignment is ok.
-// heap_check_test_pointer_alignment flag guides if we try the value of 1.
-// The larger it can be, the lesser is the chance of missing real leaks.
-//
-// sizeof(void)* is correct.  However gold (the new linker) has a bug where it
-// sometimes places global pointers on 4-byte boundaries, even when pointers
-// are 8 bytes long.  While we are fixing the linker, degrade to 4-byte
-// alignment on all targets.  http://b/1226481
-//
-static const size_t kPointerSourceAlignment = sizeof(void*);
-DEFINE_int32(heap_check_pointer_source_alignment,
-	     EnvToInt("HEAP_CHECK_POINTER_SOURCE_ALIGNMENT",
-                      kPointerSourceAlignment),
-             "Alignment at which all pointers in memory are supposed to be "
-             "located.  Use 1 if any alignment is ok.");
-
 // A reasonable default to handle pointers inside of typical class objects:
 // Too low and we won't be able to traverse pointers to normally-used
 // nested objects and base parts of multiple-inherited objects.
@@ -262,6 +245,13 @@ static bool constructor_heap_profiling = false;
 static const int heap_checker_info_level = 0;
 
 //----------------------------------------------------------------------
+
+// Alignment at which all pointers in memory are supposed to be located;
+// use 1 if any alignment is ok.
+// heap_check_test_pointer_alignment flag guides if we try the value of 1.
+// The larger it can be, the lesser is the chance of missing real leaks.
+static const size_t kPointerSourceAlignment = sizeof(void*);
+
 // Cancel our InitialMallocHook_* if present.
 static void CancelInitialMallocHooks();  // defined below
 
@@ -494,7 +484,7 @@ HeapLeakChecker::Disabler::Disabler() {
   // in a thread-safe manner.
   int counter = get_thread_disable_counter();
   set_thread_disable_counter(counter + 1);
-  RAW_VLOG(10, "Increasing thread disable counter to %d", counter + 1);
+  RAW_VLOG(1, "Increasing thread disable counter to %d", counter + 1);
 }
 
 HeapLeakChecker::Disabler::~Disabler() {
@@ -502,7 +492,7 @@ HeapLeakChecker::Disabler::~Disabler() {
   RAW_DCHECK(counter > 0, "");
   if (counter > 0) {
     set_thread_disable_counter(counter - 1);
-    RAW_VLOG(10, "Decreasing thread disable counter to %d", counter);
+    RAW_VLOG(1, "Decreasing thread disable counter to %d", counter);
   } else {
     RAW_VLOG(0, "Thread disable counter underflow : %d", counter);
   }
@@ -535,7 +525,7 @@ static void NewHook(const void* ptr, size_t size) {
   if (ptr != NULL) {
     const int counter = get_thread_disable_counter();
     const bool ignore = (counter > 0);
-    RAW_VLOG(16, "Recording Alloc: %p of %"PRIuS "; %d", ptr, size,
+    RAW_VLOG(7, "Recording Alloc: %p of %"PRIuS "; %d", ptr, size,
              int(counter));
     { SpinLockHolder l(&heap_checker_lock);
       if (size > max_heap_object_size) max_heap_object_size = size;
@@ -550,17 +540,17 @@ static void NewHook(const void* ptr, size_t size) {
         }
       }
     }
-    RAW_VLOG(17, "Alloc Recorded: %p of %"PRIuS"", ptr, size);
+    RAW_VLOG(8, "Alloc Recorded: %p of %"PRIuS"", ptr, size);
   }
 }
 
 static void DeleteHook(const void* ptr) {
   if (ptr != NULL) {
-    RAW_VLOG(16, "Recording Free %p", ptr);
+    RAW_VLOG(7, "Recording Free %p", ptr);
     { SpinLockHolder l(&heap_checker_lock);
       if (heap_checker_on) heap_profile->RecordFree(ptr);
     }
-    RAW_VLOG(17, "Free Recorded: %p", ptr);
+    RAW_VLOG(8, "Free Recorded: %p", ptr);
   }
 }
 
@@ -594,7 +584,7 @@ static StackDirection stack_direction = UNKNOWN_DIRECTION;
 static void RegisterStackLocked(const void* top_ptr) {
   RAW_DCHECK(heap_checker_lock.IsHeld(), "");
   RAW_DCHECK(MemoryRegionMap::LockIsHeld(), "");
-  RAW_VLOG(10, "Thread stack at %p", top_ptr);
+  RAW_VLOG(1, "Thread stack at %p", top_ptr);
   uintptr_t top = AsInt(top_ptr);
   stack_tops->insert(top);  // add for later use
 
@@ -608,12 +598,12 @@ static void RegisterStackLocked(const void* top_ptr) {
   if (MemoryRegionMap::FindAndMarkStackRegion(top, &region)) {
     // Make the proper portion of the stack live:
     if (stack_direction == GROWS_TOWARDS_LOW_ADDRESSES) {
-      RAW_VLOG(11, "Live stack at %p of %"PRIuPTR" bytes",
+      RAW_VLOG(2, "Live stack at %p of %"PRIuPTR" bytes",
                   top_ptr, region.end_addr - top);
       live_objects->push_back(AllocObject(top_ptr, region.end_addr - top,
                                           THREAD_DATA));
     } else {  // GROWS_TOWARDS_HIGH_ADDRESSES
-      RAW_VLOG(11, "Live stack at %p of %"PRIuPTR" bytes",
+      RAW_VLOG(2, "Live stack at %p of %"PRIuPTR" bytes",
                   AsPtr(region.start_addr),
                   top - region.start_addr);
       live_objects->push_back(AllocObject(AsPtr(region.start_addr),
@@ -629,7 +619,7 @@ static void RegisterStackLocked(const void* top_ptr) {
         uintptr_t start = AsInt(span->ptr);
         uintptr_t end = start + span->size;
         if (start <= top  &&  top < end) {
-          RAW_VLOG(11, "Stack at %p is inside /proc/self/maps chunk %p..%p",
+          RAW_VLOG(2, "Stack at %p is inside /proc/self/maps chunk %p..%p",
                       top_ptr, AsPtr(start), AsPtr(end));
           // Shrink start..end region by chopping away the memory regions in
           // MemoryRegionMap that land in it to undo merging of regions
@@ -650,17 +640,17 @@ static void RegisterStackLocked(const void* top_ptr) {
             }
           }
           if (stack_start != start  ||  stack_end != end) {
-            RAW_VLOG(11, "Stack at %p is actually inside memory chunk %p..%p",
+            RAW_VLOG(2, "Stack at %p is actually inside memory chunk %p..%p",
                         top_ptr, AsPtr(stack_start), AsPtr(stack_end));
           }
           // Make the proper portion of the stack live:
           if (stack_direction == GROWS_TOWARDS_LOW_ADDRESSES) {
-            RAW_VLOG(11, "Live stack at %p of %"PRIuPTR" bytes",
+            RAW_VLOG(2, "Live stack at %p of %"PRIuPTR" bytes",
                         top_ptr, stack_end - top);
             live_objects->push_back(
               AllocObject(top_ptr, stack_end - top, THREAD_DATA));
           } else {  // GROWS_TOWARDS_HIGH_ADDRESSES
-            RAW_VLOG(11, "Live stack at %p of %"PRIuPTR" bytes",
+            RAW_VLOG(2, "Live stack at %p of %"PRIuPTR" bytes",
                         AsPtr(stack_start), top - stack_start);
             live_objects->push_back(
               AllocObject(AsPtr(stack_start), top - stack_start, THREAD_DATA));
@@ -733,14 +723,14 @@ static void MakeDisabledLiveCallbackLocked(
         // and the rest of the region where the stack lives can well
         // contain outdated stack variables which are not live anymore,
         // hence should not be treated as such.
-        RAW_VLOG(11, "Not %s-disabling %"PRIuS" bytes at %p"
+        RAW_VLOG(2, "Not %s-disabling %"PRIuS" bytes at %p"
                     ": have stack inside: %p",
                     (stack_disable ? "stack" : "range"),
                     info.object_size, ptr, AsPtr(*iter));
         return;
       }
     }
-    RAW_VLOG(11, "%s-disabling %"PRIuS" bytes at %p",
+    RAW_VLOG(2, "%s-disabling %"PRIuS" bytes at %p",
                 (stack_disable ? "Stack" : "Range"), info.object_size, ptr);
     live_objects->push_back(AllocObject(ptr, info.object_size,
                                         MUST_BE_ON_HEAP));
@@ -765,7 +755,7 @@ static void RecordGlobalDataLocked(uintptr_t start_address,
   // Ignore non-writeable regions.
   if (strchr(permissions, 'w') == NULL) return;
   if (filename == NULL  ||  *filename == '\0')  filename = "UNNAMED";
-  RAW_VLOG(11, "Looking into %s: 0x%" PRIxPTR "..0x%" PRIxPTR,
+  RAW_VLOG(2, "Looking into %s: 0x%" PRIxPTR "..0x%" PRIxPTR,
               filename, start_address, end_address);
   (*library_live_objects)[filename].
     push_back(AllocObject(AsPtr(start_address),
@@ -824,12 +814,12 @@ void HeapLeakChecker::DisableLibraryAllocsLocked(const char* library,
     // does not call user code.
   }
   if (depth) {
-    RAW_VLOG(10, "Disabling allocations from %s at depth %d:", library, depth);
+    RAW_VLOG(1, "Disabling allocations from %s at depth %d:", library, depth);
     DisableChecksFromToLocked(AsPtr(start_address), AsPtr(end_address), depth);
     if (IsLibraryNamed(library, "/libpthread")  ||
         IsLibraryNamed(library, "/libdl")  ||
         IsLibraryNamed(library, "/ld")) {
-      RAW_VLOG(10, "Global memory regions made by %s will be live data",
+      RAW_VLOG(1, "Global memory regions made by %s will be live data",
                   library);
       if (global_region_caller_ranges == NULL) {
         global_region_caller_ranges =
@@ -946,7 +936,7 @@ static enum {
                                                         va_list /*ap*/) {
   RAW_DCHECK(heap_checker_lock.IsHeld(), "");
   thread_listing_status = CALLBACK_STARTED;
-  RAW_VLOG(11, "Found %d threads (from pid %d)", num_threads, getpid());
+  RAW_VLOG(2, "Found %d threads (from pid %d)", num_threads, getpid());
 
   if (FLAGS_heap_check_ignore_global_live) {
     UseProcMapsLocked(RECORD_GLOBAL_DATA);
@@ -961,7 +951,7 @@ static enum {
     // the leak checking thread itself is handled
     // specially via self_thread_stack, not here:
     if (thread_pids[i] == self_thread_pid) continue;
-    RAW_VLOG(11, "Handling thread with pid %d", thread_pids[i]);
+    RAW_VLOG(2, "Handling thread with pid %d", thread_pids[i]);
 #if defined(HAVE_LINUX_PTRACE_H) && defined(HAVE_SYS_SYSCALL_H) && defined(DUMPER)
     i386_regs thread_regs;
 #define sys_ptrace(r, p, a, d)  syscall(SYS_ptrace, (r), (p), (a), (d))
@@ -977,7 +967,7 @@ static enum {
       // register pointers still being in the registers and not on the stack):
       for (void** p = reinterpret_cast<void**>(&thread_regs);
            p < reinterpret_cast<void**>(&thread_regs + 1); ++p) {
-        RAW_VLOG(12, "Thread register %p", *p);
+        RAW_VLOG(3, "Thread register %p", *p);
         thread_registers.push_back(*p);
       }
     } else {
@@ -992,7 +982,7 @@ static enum {
   if (thread_registers.size()) {
     // Make thread registers be live heap data sources.
     // we rely here on the fact that vector is in one memory chunk:
-    RAW_VLOG(11, "Live registers at %p of %"PRIuS" bytes",
+    RAW_VLOG(2, "Live registers at %p of %"PRIuS" bytes",
                 &thread_registers[0], thread_registers.size() * sizeof(void*));
     live_objects->push_back(AllocObject(&thread_registers[0],
                                         thread_registers.size() * sizeof(void*),
@@ -1015,7 +1005,7 @@ static const void* self_thread_stack_top;
 void HeapLeakChecker::IgnoreNonThreadLiveObjectsLocked() {
   RAW_DCHECK(heap_checker_lock.IsHeld(), "");
   RAW_DCHECK(MemoryRegionMap::LockIsHeld(), "");
-  RAW_VLOG(11, "Handling self thread with pid %d", self_thread_pid);
+  RAW_VLOG(2, "Handling self thread with pid %d", self_thread_pid);
   // Register our own stack:
 
   // Important that all stack ranges (including the one here)
@@ -1029,7 +1019,7 @@ void HeapLeakChecker::IgnoreNonThreadLiveObjectsLocked() {
     for (IgnoredObjectsMap::const_iterator object = ignored_objects->begin();
          object != ignored_objects->end(); ++object) {
       const void* ptr = AsPtr(object->first);
-      RAW_VLOG(11, "Ignored live object at %p of %"PRIuS" bytes",
+      RAW_VLOG(2, "Ignored live object at %p of %"PRIuS" bytes",
                   ptr, object->second);
       live_objects->
         push_back(AllocObject(ptr, object->second, MUST_BE_ON_HEAP));
@@ -1142,10 +1132,10 @@ void HeapLeakChecker::IgnoreNonThreadLiveObjectsLocked() {
         }
       }
       // Now get and use live_objects from the final version of l->second:
-      if (VLOG_IS_ON(11)) {
+      if (VLOG_IS_ON(2)) {
         for (LiveObjectsStack::const_iterator i = l->second.begin();
              i != l->second.end(); ++i) {
-          RAW_VLOG(11, "Library live region at %p of %"PRIuPTR" bytes",
+          RAW_VLOG(2, "Library live region at %p of %"PRIuPTR" bytes",
                       i->ptr, i->size);
         }
       }
@@ -1250,7 +1240,7 @@ void HeapLeakChecker::IgnoreAllLiveObjectsLocked(const void* self_stack_top) {
       RAW_LOG(ERROR, "Thread stacks not found for %d threads. "
                      "Will likely report false leak positives.", r);
     } else {
-      RAW_VLOG(11, "Thread stacks appear to be found for all threads");
+      RAW_VLOG(2, "Thread stacks appear to be found for all threads");
     }
   } else {
     RAW_LOG(WARNING, "Not looking for thread stacks; "
@@ -1266,7 +1256,7 @@ void HeapLeakChecker::IgnoreAllLiveObjectsLocked(const void* self_stack_top) {
     IgnoreNonThreadLiveObjectsLocked();
   }
   if (live_objects_total) {
-    RAW_VLOG(10, "Ignoring %"PRId64" reachable objects of %"PRId64" bytes",
+    RAW_VLOG(1, "Ignoring %"PRId64" reachable objects of %"PRId64" bytes",
                 live_objects_total, live_bytes_total);
   }
   // Free these: we made them here and heap_profile never saw them
@@ -1276,8 +1266,7 @@ void HeapLeakChecker::IgnoreAllLiveObjectsLocked(const void* self_stack_top) {
 }
 
 // Alignment at which we should consider pointer positions
-// in IgnoreLiveObjectsLocked. Will normally use the value of
-// FLAGS_heap_check_pointer_source_alignment.
+// in IgnoreLiveObjectsLocked. Use 1 if any alignment is ok.
 static size_t pointer_source_alignment = kPointerSourceAlignment;
 // Global lock for HeapLeakChecker::DoNoLeaks
 // to protect pointer_source_alignment.
@@ -1325,7 +1314,7 @@ static SpinLock alignment_checker_lock(SpinLock::LINKER_INITIALIZED);
       live_object_count += 1;
       live_byte_count += size;
     }
-    RAW_VLOG(13, "Looking for heap pointers in %p of %"PRIuS" bytes",
+    RAW_VLOG(4, "Looking for heap pointers in %p of %"PRIuS" bytes",
                 object, size);
     const char* const whole_object = object;
     size_t const whole_size = size;
@@ -1362,7 +1351,7 @@ static SpinLock alignment_checker_lock(SpinLock::LINKER_INITIALIZED);
       if (can_be_on_heap) {
         const void* ptr = reinterpret_cast<const void*>(addr);
         // Too expensive (inner loop): manually uncomment when debugging:
-        // RAW_VLOG(17, "Trying pointer to %p at %p", ptr, object);
+        // RAW_VLOG(8, "Trying pointer to %p at %p", ptr, object);
         size_t object_size;
         if (HaveOnHeapLocked(&ptr, &object_size)  &&
             heap_profile->MarkAsLive(ptr)) {
@@ -1371,15 +1360,15 @@ static SpinLock alignment_checker_lock(SpinLock::LINKER_INITIALIZED);
           // a heap object which is in fact leaked.
           // I.e. in very rare and probably not repeatable/lasting cases
           // we might miss some real heap memory leaks.
-          RAW_VLOG(14, "Found pointer to %p of %"PRIuS" bytes at %p "
+          RAW_VLOG(5, "Found pointer to %p of %"PRIuS" bytes at %p "
                       "inside %p of size %"PRIuS"",
                       ptr, object_size, object, whole_object, whole_size);
-          if (VLOG_IS_ON(15)) {
+          if (VLOG_IS_ON(6)) {
             // log call stacks to help debug how come something is not a leak
             HeapProfileTable::AllocInfo alloc;
-            if (!heap_profile->FindAllocDetails(ptr, &alloc)) {
-              RAW_LOG(FATAL, "FindAllocDetails failed on ptr %p", ptr);
-            }
+            bool r = heap_profile->FindAllocDetails(ptr, &alloc);
+            r = r;              // suppress compiler warning in non-debug mode
+            RAW_DCHECK(r, "");  // sanity
             RAW_LOG(INFO, "New live %p object's alloc stack:", ptr);
             for (int i = 0; i < alloc.stack_depth; ++i) {
               RAW_LOG(INFO, "  @ %p", alloc.call_stack[i]);
@@ -1397,7 +1386,7 @@ static SpinLock alignment_checker_lock(SpinLock::LINKER_INITIALIZED);
   live_objects_total += live_object_count;
   live_bytes_total += live_byte_count;
   if (live_object_count) {
-    RAW_VLOG(10, "Removed %"PRId64" live heap objects of %"PRId64" bytes: %s%s",
+    RAW_VLOG(1, "Removed %"PRId64" live heap objects of %"PRId64" bytes: %s%s",
                 live_object_count, live_byte_count, name, name2);
   }
 }
@@ -1419,7 +1408,7 @@ void HeapLeakChecker::IgnoreObject(const void* ptr) {
   if (!HaveOnHeapLocked(&ptr, &object_size)) {
     RAW_LOG(ERROR, "No live heap object at %p to ignore", ptr);
   } else {
-    RAW_VLOG(10, "Going to ignore live object at %p of %"PRIuS" bytes",
+    RAW_VLOG(1, "Going to ignore live object at %p of %"PRIuS" bytes",
                 ptr, object_size);
     if (ignored_objects == NULL)  {
       ignored_objects = new(Allocator::Allocate(sizeof(IgnoredObjectsMap)))
@@ -1445,7 +1434,7 @@ void HeapLeakChecker::UnIgnoreObject(const void* ptr) {
       if (object != ignored_objects->end()  &&  object_size == object->second) {
         ignored_objects->erase(object);
         found = true;
-        RAW_VLOG(10, "Now not going to ignore live object "
+        RAW_VLOG(1, "Now not going to ignore live object "
                     "at %p of %"PRIuS" bytes", ptr, object_size);
       }
     }
@@ -1494,7 +1483,7 @@ void HeapLeakChecker::Create(const char *name, bool make_start_snapshot) {
       const HeapProfileTable::Stats& t = heap_profile->total();
       const size_t start_inuse_bytes = t.alloc_size - t.free_size;
       const size_t start_inuse_allocs = t.allocs - t.frees;
-      RAW_VLOG(10, "Start check \"%s\" profile: %"PRIuS" bytes "
+      RAW_VLOG(1, "Start check \"%s\" profile: %"PRIuS" bytes "
                "in %"PRIuS" objects",
                name_, start_inuse_bytes, start_inuse_allocs);
     } else {
@@ -1623,7 +1612,7 @@ bool HeapLeakChecker::DoNoLeaks(ShouldSymbolize should_symbolize) {
   {
     // Heap activity in other threads is paused during this function
     // (i.e. until we got all profile difference info).
-    SpinLockHolder hl(&heap_checker_lock);
+    SpinLockHolder l(&heap_checker_lock);
     if (heap_checker_on == false) {
       if (name_ != NULL) {  // leak checking enabled when created the checker
         RAW_LOG(WARNING, "Heap leak checker got turned off after checker "
@@ -1660,8 +1649,6 @@ bool HeapLeakChecker::DoNoLeaks(ShouldSymbolize should_symbolize) {
     // Make the heap profile, other threads are locked out.
     HeapProfileTable::Snapshot* base =
         reinterpret_cast<HeapProfileTable::Snapshot*>(start_snapshot_);
-    RAW_DCHECK(FLAGS_heap_check_pointer_source_alignment > 0, "");
-    pointer_source_alignment = FLAGS_heap_check_pointer_source_alignment;
     IgnoreAllLiveObjectsLocked(&a_local_var);
     leaks = heap_profile->NonLiveSnapshot(base);
 
@@ -1681,28 +1668,23 @@ bool HeapLeakChecker::DoNoLeaks(ShouldSymbolize should_symbolize) {
                 initial_allocs, Allocator::alloc_count());
       }
     } else if (FLAGS_heap_check_test_pointer_alignment) {
-      if (pointer_source_alignment == 1) {
-        RAW_LOG(WARNING, "--heap_check_test_pointer_alignment has no effect: "
-                "--heap_check_pointer_source_alignment was already set to 1");
+      // Try with reduced pointer aligment
+      pointer_source_alignment = 1;
+      IgnoreAllLiveObjectsLocked(&a_local_var);
+      HeapProfileTable::Snapshot* leaks_wo_align =
+          heap_profile->NonLiveSnapshot(base);
+      pointer_source_alignment = kPointerSourceAlignment;
+      if (leaks_wo_align->Empty()) {
+        RAW_LOG(WARNING, "Found no leaks without pointer alignment: "
+                "something might be placing pointers at "
+                "unaligned addresses! This needs to be fixed.");
       } else {
-        // Try with reduced pointer aligment
-        pointer_source_alignment = 1;
-        IgnoreAllLiveObjectsLocked(&a_local_var);
-        HeapProfileTable::Snapshot* leaks_wo_align =
-            heap_profile->NonLiveSnapshot(base);
-        pointer_source_alignment = FLAGS_heap_check_pointer_source_alignment;
-        if (leaks_wo_align->Empty()) {
-          RAW_LOG(WARNING, "Found no leaks without pointer alignment: "
-                  "something might be placing pointers at "
-                  "unaligned addresses! This needs to be fixed.");
-        } else {
-          RAW_LOG(INFO, "Found leaks without pointer alignment as well: "
-                  "unaligned pointers must not be the cause of leaks.");
-          RAW_LOG(INFO, "--heap_check_test_pointer_alignment did not help "
-                  "to diagnose the leaks.");
-        }
-        heap_profile->ReleaseSnapshot(leaks_wo_align);
+        RAW_LOG(INFO, "Found leaks without pointer alignment as well: "
+                "unaligned pointers must not be the cause of leaks.");
+        RAW_LOG(INFO, "--heap_check_test_pointer_alignment did not help "
+                "to diagnose the leaks.");
       }
+      heap_profile->ReleaseSnapshot(leaks_wo_align);
     }
 
     if (leaks != NULL) {
@@ -1759,7 +1741,7 @@ bool HeapLeakChecker::DoNoLeaks(ShouldSymbolize should_symbolize) {
     SuggestPprofCommand(pprof_file);
 
     {
-      SpinLockHolder hl(&heap_checker_lock);
+      SpinLockHolder l(&heap_checker_lock);
       heap_profile->ReleaseSnapshot(leaks);
       Allocator::Free(pprof_file);
     }
@@ -1892,7 +1874,6 @@ static bool internal_init_start_has_run = false;
   }
 
   // Set all flags
-  RAW_DCHECK(FLAGS_heap_check_pointer_source_alignment > 0, "");
   if (FLAGS_heap_check == "minimal") {
     // The least we can check.
     FLAGS_heap_check_before_constructors = false;  // from after main
@@ -2062,7 +2043,7 @@ bool HeapLeakChecker::NoGlobalLeaks() {
   // we never delete or change main_heap_checker once it's set:
   HeapLeakChecker* main_hc = GlobalChecker();
   if (main_hc) {
-    RAW_VLOG(10, "Checking for whole-program memory leaks");
+    RAW_VLOG(1, "Checking for whole-program memory leaks");
     // The program is over, so it's safe to symbolize addresses (which
     // requires a fork) because no serious work is expected to be done
     // after this.  Symbolizing is really useful -- knowing what
@@ -2184,7 +2165,7 @@ void HeapLeakChecker::BeforeConstructorsLocked() {
   RAW_CHECK(heap_profile == NULL, "");
   heap_profile = new(Allocator::Allocate(sizeof(HeapProfileTable)))
                    HeapProfileTable(&Allocator::Allocate, &Allocator::Free);
-  RAW_VLOG(10, "Starting tracking the heap");
+  RAW_VLOG(1, "Starting tracking the heap");
   heap_checker_on = true;
 }
 
@@ -2348,7 +2329,7 @@ void HeapLeakChecker::DisableChecksFromToLocked(const void* start_address,
   value.start_address = AsInt(start_address);
   value.max_depth = max_depth;
   if (disabled_ranges->insert(make_pair(AsInt(end_address), value)).second) {
-    RAW_VLOG(10, "Disabling leak checking in stack traces "
+    RAW_VLOG(1, "Disabling leak checking in stack traces "
                 "under frame addresses between %p..%p",
                 start_address, end_address);
   } else {  // check that this is just a verbatim repetition
@@ -2371,7 +2352,7 @@ inline bool HeapLeakChecker::HaveOnHeapLocked(const void** ptr,
   const uintptr_t addr = AsInt(*ptr);
   if (heap_profile->FindInsideAlloc(
         *ptr, max_heap_object_size, ptr, object_size)) {
-    RAW_VLOG(16, "Got pointer into %p at +%"PRIuPTR" offset",
+    RAW_VLOG(7, "Got pointer into %p at +%"PRIuPTR" offset",
              *ptr, addr - AsInt(*ptr));
     return true;
   }
diff --git a/third_party/tcmalloc/chromium/src/heap-profile-table.cc b/third_party/tcmalloc/chromium/src/heap-profile-table.cc
index ecaf75f..66e4f20 100644
--- a/third_party/tcmalloc/chromium/src/heap-profile-table.cc
+++ b/third_party/tcmalloc/chromium/src/heap-profile-table.cc
@@ -99,7 +99,7 @@ const char HeapProfileTable::kFileExt[] = ".heap";
 //----------------------------------------------------------------------
 
 static const int kHashTableSize = 179999;   // Size for table_.
-/*static*/ const int HeapProfileTable::kMaxStackDepth;
+/*static*/ const int HeapProfileTable::kMaxStackDepth = 32;
 
 //----------------------------------------------------------------------
 
diff --git a/third_party/tcmalloc/chromium/src/heap-profile-table.h b/third_party/tcmalloc/chromium/src/heap-profile-table.h
index c9bee15..5403257 100644
--- a/third_party/tcmalloc/chromium/src/heap-profile-table.h
+++ b/third_party/tcmalloc/chromium/src/heap-profile-table.h
@@ -52,8 +52,8 @@ class HeapProfileTable {
   // Extension to be used for heap pforile files.
   static const char kFileExt[];
 
-  // Longest stack trace we record.
-  static const int kMaxStackDepth = 32;
+  // Longest stack trace we record.  Defined in the .cc file.
+  static const int kMaxStackDepth;
 
   // data types ----------------------------
 
diff --git a/third_party/tcmalloc/chromium/src/heap-profiler.cc b/third_party/tcmalloc/chromium/src/heap-profiler.cc
index 3055f4ce..a1c643a9 100644
--- a/third_party/tcmalloc/chromium/src/heap-profiler.cc
+++ b/third_party/tcmalloc/chromium/src/heap-profiler.cc
@@ -524,9 +524,9 @@ extern "C" void HeapProfilerStart(const char* prefix) {
   filename_prefix[prefix_length] = '\0';
 }
 
-extern "C" int IsHeapProfilerRunning() {
+extern "C" bool IsHeapProfilerRunning() {
   SpinLockHolder l(&heap_lock);
-  return is_on ? 1 : 0;   // return an int, because C code doesn't have bool
+  return is_on;
 }
 
 extern "C" void HeapProfilerStop() {
diff --git a/third_party/tcmalloc/chromium/src/internal_logging.h b/third_party/tcmalloc/chromium/src/internal_logging.h
index 731b2d9..0cb9ba2 100644
--- a/third_party/tcmalloc/chromium/src/internal_logging.h
+++ b/third_party/tcmalloc/chromium/src/internal_logging.h
@@ -119,9 +119,7 @@ do {                                                                     \
 #ifndef NDEBUG
 #define ASSERT(cond) CHECK_CONDITION(cond)
 #else
-#define ASSERT(cond)                            \
-  do {                                          \
-  } while (0 && (cond))
+#define ASSERT(cond) ((void) 0)
 #endif
 
 // Print into buffer
diff --git a/third_party/tcmalloc/chromium/src/malloc_extension.cc b/third_party/tcmalloc/chromium/src/malloc_extension.cc
index c2f8b54..4ce262f 100644
--- a/third_party/tcmalloc/chromium/src/malloc_extension.cc
+++ b/third_party/tcmalloc/chromium/src/malloc_extension.cc
@@ -187,10 +187,7 @@ MallocExtension* MallocExtension::instance() {
 void MallocExtension::Register(MallocExtension* implementation) {
   perftools_pthread_once(&module_init, InitModule);
   // When running under valgrind, our custom malloc is replaced with
-  // valgrind's one and malloc extensions will not work.  (Note:
-  // callers should be responsible for checking that they are the
-  // malloc that is really being run, before calling Register.  This
-  // is just here as an extra sanity check.)
+  // valgrind's one and malloc extensions will not work.
   if (!RunningOnValgrind()) {
     current_instance = implementation;
   }
diff --git a/third_party/tcmalloc/chromium/src/malloc_hook.cc b/third_party/tcmalloc/chromium/src/malloc_hook.cc
index 4315b86..2a7f542 100644
--- a/third_party/tcmalloc/chromium/src/malloc_hook.cc
+++ b/third_party/tcmalloc/chromium/src/malloc_hook.cc
@@ -326,8 +326,8 @@ extern "C" int MallocHook_GetCallerStackTrace(void** result, int max_depth,
     return 0;
   for (int i = 0; i < depth; ++i) {  // stack[0] is our immediate caller
     if (InHookCaller(stack[i])) {
-      RAW_VLOG(10, "Found hooked allocator at %d: %p <- %p",
-                   i, stack[i], stack[i+1]);
+      RAW_VLOG(4, "Found hooked allocator at %d: %p <- %p",
+                  i, stack[i], stack[i+1]);
       i += 1;  // skip hook caller frame
       depth -= i;  // correct depth
       if (depth > max_depth) depth = max_depth;
diff --git a/third_party/tcmalloc/chromium/src/memory_region_map.cc b/third_party/tcmalloc/chromium/src/memory_region_map.cc
index f6bed45..05fdc06 100644
--- a/third_party/tcmalloc/chromium/src/memory_region_map.cc
+++ b/third_party/tcmalloc/chromium/src/memory_region_map.cc
@@ -181,7 +181,7 @@ static MemoryRegionMap::RegionSetRep regions_rep;
 static bool recursive_insert = false;
 
 void MemoryRegionMap::Init(int max_stack_depth) {
-  RAW_VLOG(10, "MemoryRegionMap Init");
+  RAW_VLOG(2, "MemoryRegionMap Init");
   RAW_CHECK(max_stack_depth >= 0, "");
   // Make sure we don't overflow the memory in region stacks:
   RAW_CHECK(max_stack_depth <= kMaxStackDepth,
@@ -192,7 +192,7 @@ void MemoryRegionMap::Init(int max_stack_depth) {
   if (client_count_ > 1) {
     // not first client: already did initialization-proper
     Unlock();
-    RAW_VLOG(10, "MemoryRegionMap Init increment done");
+    RAW_VLOG(2, "MemoryRegionMap Init increment done");
     return;
   }
   // Set our hooks and make sure no other hooks existed:
@@ -217,17 +217,17 @@ void MemoryRegionMap::Init(int max_stack_depth) {
     // recursive_insert = false; as InsertRegionLocked will also construct
     // regions_ on demand for us.
   Unlock();
-  RAW_VLOG(10, "MemoryRegionMap Init done");
+  RAW_VLOG(2, "MemoryRegionMap Init done");
 }
 
 bool MemoryRegionMap::Shutdown() {
-  RAW_VLOG(10, "MemoryRegionMap Shutdown");
+  RAW_VLOG(2, "MemoryRegionMap Shutdown");
   Lock();
   RAW_CHECK(client_count_ > 0, "");
   client_count_ -= 1;
   if (client_count_ != 0) {  // not last client; need not really shutdown
     Unlock();
-    RAW_VLOG(10, "MemoryRegionMap Shutdown decrement done");
+    RAW_VLOG(2, "MemoryRegionMap Shutdown decrement done");
     return true;
   }
   CheckMallocHooks();  // we assume no other hooks
@@ -244,7 +244,7 @@ bool MemoryRegionMap::Shutdown() {
     RAW_LOG(WARNING, "Can't delete LowLevelAlloc arena: it's being used");
   }
   Unlock();
-  RAW_VLOG(10, "MemoryRegionMap Shutdown done");
+  RAW_VLOG(2, "MemoryRegionMap Shutdown done");
   return deleted_arena;
 }
 
@@ -336,7 +336,7 @@ bool MemoryRegionMap::FindAndMarkStackRegion(uintptr_t stack_top,
   Lock();
   const Region* region = DoFindRegionLocked(stack_top);
   if (region != NULL) {
-    RAW_VLOG(10, "Stack at %p is inside region %p..%p",
+    RAW_VLOG(2, "Stack at %p is inside region %p..%p",
                 reinterpret_cast<void*>(stack_top),
                 reinterpret_cast<void*>(region->start_addr),
                 reinterpret_cast<void*>(region->end_addr));
@@ -361,7 +361,7 @@ MemoryRegionMap::RegionIterator MemoryRegionMap::EndRegionLocked() {
 }
 
 inline void MemoryRegionMap::DoInsertRegionLocked(const Region& region) {
-  RAW_VLOG(12, "Inserting region %p..%p from %p",
+  RAW_VLOG(4, "Inserting region %p..%p from %p",
               reinterpret_cast<void*>(region.start_addr),
               reinterpret_cast<void*>(region.end_addr),
               reinterpret_cast<void*>(region.caller()));
@@ -385,10 +385,10 @@ inline void MemoryRegionMap::DoInsertRegionLocked(const Region& region) {
   // This inserts and allocates permanent storage for region
   // and its call stack data: it's safe to do it now:
   regions_->insert(region);
-  RAW_VLOG(12, "Inserted region %p..%p :",
+  RAW_VLOG(4, "Inserted region %p..%p :",
               reinterpret_cast<void*>(region.start_addr),
               reinterpret_cast<void*>(region.end_addr));
-  if (VLOG_IS_ON(12))  LogAllLocked();
+  if (VLOG_IS_ON(4))  LogAllLocked();
 }
 
 // These variables are local to MemoryRegionMap::InsertRegionLocked()
@@ -425,7 +425,7 @@ inline void MemoryRegionMap::InsertRegionLocked(const Region& region) {
   // and taken into account when the recursion unwinds.
   // Do the insert:
   if (recursive_insert) {  // recursion: save in saved_regions
-    RAW_VLOG(12, "Saving recursive insert of region %p..%p from %p",
+    RAW_VLOG(4, "Saving recursive insert of region %p..%p from %p",
                 reinterpret_cast<void*>(region.start_addr),
                 reinterpret_cast<void*>(region.end_addr),
                 reinterpret_cast<void*>(region.caller()));
@@ -436,7 +436,7 @@ inline void MemoryRegionMap::InsertRegionLocked(const Region& region) {
     saved_regions[saved_regions_count++] = region;
   } else {  // not a recusrive call
     if (regions_ == NULL) {  // init regions_
-      RAW_VLOG(12, "Initializing region set");
+      RAW_VLOG(4, "Initializing region set");
       regions_ = regions_rep.region_set();
       recursive_insert = true;
       new(regions_) RegionSet();
@@ -470,7 +470,7 @@ void MemoryRegionMap::RecordRegionAddition(const void* start, size_t size) {
                                       max_stack_depth_, kStripFrames + 1)
     : 0;
   region.set_call_stack_depth(depth);  // record stack info fully
-  RAW_VLOG(10, "New global region %p..%p from %p",
+  RAW_VLOG(2, "New global region %p..%p from %p",
               reinterpret_cast<void*>(region.start_addr),
               reinterpret_cast<void*>(region.end_addr),
               reinterpret_cast<void*>(region.caller()));
@@ -499,7 +499,7 @@ void MemoryRegionMap::RecordRegionRemoval(const void* start, size_t size) {
         // An exact match, so it's safe to remove.
         --saved_regions_count;
         --put_pos;
-        RAW_VLOG(10, ("Insta-Removing saved region %p..%p; "
+        RAW_VLOG(2, ("Insta-Removing saved region %p..%p; "
                      "now have %d saved regions"),
                  reinterpret_cast<void*>(start_addr),
                  reinterpret_cast<void*>(end_addr),
@@ -523,7 +523,7 @@ void MemoryRegionMap::RecordRegionRemoval(const void* start, size_t size) {
   uintptr_t start_addr = reinterpret_cast<uintptr_t>(start);
   uintptr_t end_addr = start_addr + size;
   // subtract start_addr, end_addr from all the regions
-  RAW_VLOG(10, "Removing global region %p..%p; have %"PRIuS" regions",
+  RAW_VLOG(2, "Removing global region %p..%p; have %"PRIuS" regions",
               reinterpret_cast<void*>(start_addr),
               reinterpret_cast<void*>(end_addr),
               regions_->size());
@@ -533,12 +533,12 @@ void MemoryRegionMap::RecordRegionRemoval(const void* start, size_t size) {
   for (RegionSet::iterator region = regions_->lower_bound(sample);
        region != regions_->end()  &&  region->start_addr < end_addr;
        /*noop*/) {
-    RAW_VLOG(13, "Looking at region %p..%p",
+    RAW_VLOG(5, "Looking at region %p..%p",
                 reinterpret_cast<void*>(region->start_addr),
                 reinterpret_cast<void*>(region->end_addr));
     if (start_addr <= region->start_addr  &&
         region->end_addr <= end_addr) {  // full deletion
-      RAW_VLOG(12, "Deleting region %p..%p",
+      RAW_VLOG(4, "Deleting region %p..%p",
                   reinterpret_cast<void*>(region->start_addr),
                   reinterpret_cast<void*>(region->end_addr));
       RegionSet::iterator d = region;
@@ -547,7 +547,7 @@ void MemoryRegionMap::RecordRegionRemoval(const void* start, size_t size) {
       continue;
     } else if (region->start_addr < start_addr  &&
                end_addr < region->end_addr) {  // cutting-out split
-      RAW_VLOG(12, "Splitting region %p..%p in two",
+      RAW_VLOG(4, "Splitting region %p..%p in two",
                   reinterpret_cast<void*>(region->start_addr),
                   reinterpret_cast<void*>(region->end_addr));
       // Make another region for the start portion:
@@ -560,13 +560,13 @@ void MemoryRegionMap::RecordRegionRemoval(const void* start, size_t size) {
       const_cast<Region&>(*region).set_start_addr(end_addr);
     } else if (end_addr > region->start_addr  &&
                start_addr <= region->start_addr) {  // cut from start
-      RAW_VLOG(12, "Start-chopping region %p..%p",
+      RAW_VLOG(4, "Start-chopping region %p..%p",
                   reinterpret_cast<void*>(region->start_addr),
                   reinterpret_cast<void*>(region->end_addr));
       const_cast<Region&>(*region).set_start_addr(end_addr);
     } else if (start_addr > region->start_addr  &&
                start_addr < region->end_addr) {  // cut from end
-      RAW_VLOG(12, "End-chopping region %p..%p",
+      RAW_VLOG(4, "End-chopping region %p..%p",
                   reinterpret_cast<void*>(region->start_addr),
                   reinterpret_cast<void*>(region->end_addr));
       // Can't just modify region->end_addr (it's the sorting key):
@@ -582,11 +582,11 @@ void MemoryRegionMap::RecordRegionRemoval(const void* start, size_t size) {
     }
     ++region;
   }
-  RAW_VLOG(12, "Removed region %p..%p; have %"PRIuS" regions",
+  RAW_VLOG(4, "Removed region %p..%p; have %"PRIuS" regions",
               reinterpret_cast<void*>(start_addr),
               reinterpret_cast<void*>(end_addr),
               regions_->size());
-  if (VLOG_IS_ON(12))  LogAllLocked();
+  if (VLOG_IS_ON(4))  LogAllLocked();
   Unlock();
 }
 
@@ -596,7 +596,7 @@ void MemoryRegionMap::MmapHook(const void* result,
                                int fd, off_t offset) {
   // TODO(maxim): replace all 0x%"PRIxS" by %p when RAW_VLOG uses a safe
   // snprintf reimplementation that does not malloc to pretty-print NULL
-  RAW_VLOG(10, "MMap = 0x%"PRIxPTR" of %"PRIuS" at %llu "
+  RAW_VLOG(2, "MMap = 0x%"PRIxPTR" of %"PRIuS" at %llu "
               "prot %d flags %d fd %d offs %lld",
               reinterpret_cast<uintptr_t>(result), size,
               reinterpret_cast<uint64>(start), prot, flags, fd,
@@ -607,7 +607,7 @@ void MemoryRegionMap::MmapHook(const void* result,
 }
 
 void MemoryRegionMap::MunmapHook(const void* ptr, size_t size) {
-  RAW_VLOG(10, "MUnmap of %p %"PRIuS"", ptr, size);
+  RAW_VLOG(2, "MUnmap of %p %"PRIuS"", ptr, size);
   if (size != 0) {
     RecordRegionRemoval(ptr, size);
   }
@@ -617,7 +617,7 @@ void MemoryRegionMap::MremapHook(const void* result,
                                  const void* old_addr, size_t old_size,
                                  size_t new_size, int flags,
                                  const void* new_addr) {
-  RAW_VLOG(10, "MRemap = 0x%"PRIxPTR" of 0x%"PRIxPTR" %"PRIuS" "
+  RAW_VLOG(2, "MRemap = 0x%"PRIxPTR" of 0x%"PRIxPTR" %"PRIuS" "
               "to %"PRIuS" flags %d new_addr=0x%"PRIxPTR,
               (uintptr_t)result, (uintptr_t)old_addr,
                old_size, new_size, flags,
@@ -631,7 +631,7 @@ void MemoryRegionMap::MremapHook(const void* result,
 extern "C" void* __sbrk(ptrdiff_t increment);  // defined in libc
 
 void MemoryRegionMap::SbrkHook(const void* result, ptrdiff_t increment) {
-  RAW_VLOG(10, "Sbrk = 0x%"PRIxPTR" of %"PRIdS"", (uintptr_t)result, increment);
+  RAW_VLOG(2, "Sbrk = 0x%"PRIxPTR" of %"PRIdS"", (uintptr_t)result, increment);
   if (result != reinterpret_cast<void*>(-1)) {
     if (increment > 0) {
       void* new_end = sbrk(0);
diff --git a/third_party/tcmalloc/chromium/src/page_heap.cc b/third_party/tcmalloc/chromium/src/page_heap.cc
index a256b64..31130e9 100644
--- a/third_party/tcmalloc/chromium/src/page_heap.cc
+++ b/third_party/tcmalloc/chromium/src/page_heap.cc
@@ -61,65 +61,50 @@ PageHeap::PageHeap()
   }
 }
 
-// Returns the minimum number of pages necessary to ensure that an
-// allocation of size n can be aligned to the given alignment.
-static Length AlignedAllocationSize(Length n, size_t alignment) {
-  ASSERT(alignment >= kPageSize);
-  return n + tcmalloc::pages(alignment - kPageSize);
-}
-
-Span* PageHeap::New(Length n, size_t sc, size_t align) {
+Span* PageHeap::New(Length n) {
   ASSERT(Check());
   ASSERT(n > 0);
 
-  if (align < kPageSize) {
-    align = kPageSize;
-  }
-
-  Length aligned_size = AlignedAllocationSize(n, align);
-
   // Find first size >= n that has a non-empty list
-  for (Length s = aligned_size; s < kMaxPages; s++) {
+  for (Length s = n; s < kMaxPages; s++) {
     Span* ll = &free_[s].normal;
     // If we're lucky, ll is non-empty, meaning it has a suitable span.
     if (!DLL_IsEmpty(ll)) {
       ASSERT(ll->next->location == Span::ON_NORMAL_FREELIST);
-      return Carve(ll->next, n, sc, align);
+      return Carve(ll->next, n);
     }
     // Alternatively, maybe there's a usable returned span.
     ll = &free_[s].returned;
     if (!DLL_IsEmpty(ll)) {
       ASSERT(ll->next->location == Span::ON_RETURNED_FREELIST);
-      return Carve(ll->next, n, sc, align);
+      return Carve(ll->next, n);
     }
     // Still no luck, so keep looking in larger classes.
   }
 
-  Span* result = AllocLarge(n, sc, align);
+  Span* result = AllocLarge(n);
   if (result != NULL) return result;
 
   // Grow the heap and try again
-  if (!GrowHeap(aligned_size)) {
+  if (!GrowHeap(n)) {
     ASSERT(stats_.unmapped_bytes+ stats_.committed_bytes==stats_.system_bytes);
     ASSERT(Check());
     return NULL;
   }
 
-  return AllocLarge(n, sc, align);
+  return AllocLarge(n);
 }
 
-Span* PageHeap::AllocLarge(Length n, size_t sc, size_t align) {
-  // Find the best span (closest to n in size).
+Span* PageHeap::AllocLarge(Length n) {
+  // find the best span (closest to n in size).
   // The following loops implements address-ordered best-fit.
   Span *best = NULL;
 
-  Length aligned_size = AlignedAllocationSize(n, align);
-
   // Search through normal list
   for (Span* span = large_.normal.next;
        span != &large_.normal;
        span = span->next) {
-    if (span->length >= aligned_size) {
+    if (span->length >= n) {
       if ((best == NULL)
           || (span->length < best->length)
           || ((span->length == best->length) && (span->start < best->start))) {
@@ -133,7 +118,7 @@ Span* PageHeap::AllocLarge(Length n, size_t sc, size_t align) {
   for (Span* span = large_.returned.next;
        span != &large_.returned;
        span = span->next) {
-    if (span->length >= aligned_size) {
+    if (span->length >= n) {
       if ((best == NULL)
           || (span->length < best->length)
           || ((span->length == best->length) && (span->start < best->start))) {
@@ -143,18 +128,19 @@ Span* PageHeap::AllocLarge(Length n, size_t sc, size_t align) {
     }
   }
 
-  return best == NULL ? NULL : Carve(best, n, sc, align);
+  return best == NULL ? NULL : Carve(best, n);
 }
 
 Span* PageHeap::Split(Span* span, Length n) {
   ASSERT(0 < n);
   ASSERT(n < span->length);
-  ASSERT((span->location != Span::IN_USE) || span->sizeclass == 0);
+  ASSERT(span->location == Span::IN_USE);
+  ASSERT(span->sizeclass == 0);
   Event(span, 'T', n);
 
   const int extra = span->length - n;
   Span* leftover = NewSpan(span->start + n, extra);
-  leftover->location = span->location;
+  ASSERT(leftover->location == Span::IN_USE);
   Event(leftover, 'U', extra);
   RecordSpan(leftover);
   pagemap_.set(span->start + n - 1, span); // Update map from pageid to span
@@ -175,71 +161,43 @@ void PageHeap::DecommitSpan(Span* span) {
   stats_.committed_bytes -= span->length << kPageShift;
 }
 
-Span* PageHeap::Carve(Span* span, Length n, size_t sc, size_t align) {
+Span* PageHeap::Carve(Span* span, Length n) {
   ASSERT(n > 0);
   ASSERT(span->location != Span::IN_USE);
-  ASSERT(align >= kPageSize);
   const int old_location = span->location;
-
-  Length align_pages = align >> kPageShift;
   RemoveFromFreeList(span);
-
-  if (span->start & (align_pages - 1)) {
-    Length skip_for_alignment = align_pages - (span->start & (align_pages - 1));
-    Span* aligned = Split(span, skip_for_alignment);
-
-    // The next span of |span| was just splitted -- no need to
-    // coalesce them. The previous span of |span| was not previously coalesced
-    // with |span|, i.e. is NULL or has location other than |old_location|.
-    const PageID p = span->start;
-    const Length n = span->length;
-    Span* prev = GetDescriptor(p-1);
-    ASSERT(prev == NULL ||
-           prev->location == Span::IN_USE ||
-           prev->location != old_location);
-    PrependToFreeList(span); // Skip coalescing - no candidates possible
-    span = aligned;
-  }
+  span->location = Span::IN_USE;
+  Event(span, 'A', n);
 
   const int extra = span->length - n;
   ASSERT(extra >= 0);
   if (extra > 0) {
-    Span* leftover = Split(span, n);
+    Span* leftover = NewSpan(span->start + n, extra);
+    leftover->location = old_location;
+    Event(leftover, 'S', extra);
+    RecordSpan(leftover);
+
     // The previous span of |leftover| was just splitted -- no need to
     // coalesce them. The next span of |leftover| was not previously coalesced
-    // with |span|, i.e. is NULL or has location other than |old_location|.
+    // with |span|, i.e. is NULL or has got location other than |old_location|.
     const PageID p = leftover->start;
     const Length len = leftover->length;
     Span* next = GetDescriptor(p+len);
     ASSERT (next == NULL ||
             next->location == Span::IN_USE ||
             next->location != leftover->location);
-    PrependToFreeList(leftover);
-  }
-
 
+    PrependToFreeList(leftover);  // Skip coalescing - no candidates possible
+    span->length = n;
+    pagemap_.set(span->start + n - 1, span);
+  }
   ASSERT(Check());
   if (old_location == Span::ON_RETURNED_FREELIST) {
     // We need to recommit this address space.
     CommitSpan(span);
   }
-
-  span->location = Span::IN_USE;
-  span->sizeclass = sc;
-  Event(span, 'A', n);
-
-  // Cache sizeclass info eagerly. Locking is not necessary.
-  // (Instead of being eager, we could just replace any stale info
-  // about this span, but that seems to be no better in practice.)
-  CacheSizeClass(span->start, sc);
-
-  if (sc != kLargeSizeClass) {
-    for (Length i = 1; i < n; i++) {
-      pagemap_.set(span->start + i, span);
-      CacheSizeClass(span->start + i, sc);
-    }
-  }
-
+  ASSERT(span->location == Span::IN_USE);
+  ASSERT(span->length == n);
   ASSERT(stats_.unmapped_bytes+ stats_.committed_bytes==stats_.system_bytes);
   return span;
 }
@@ -421,6 +379,18 @@ Length PageHeap::ReleaseAtLeastNPages(Length num_pages) {
   return released_pages;
 }
 
+void PageHeap::RegisterSizeClass(Span* span, size_t sc) {
+  // Associate span object with all interior pages as well
+  ASSERT(span->location == Span::IN_USE);
+  ASSERT(GetDescriptor(span->start) == span);
+  ASSERT(GetDescriptor(span->start+span->length-1) == span);
+  Event(span, 'C', sc);
+  span->sizeclass = sc;
+  for (Length i = 1; i < span->length-1; i++) {
+    pagemap_.set(span->start+i, span);
+  }
+}
+
 static double MB(uint64_t bytes) {
   return bytes / 1048576.0;
 }
diff --git a/third_party/tcmalloc/chromium/src/page_heap.h b/third_party/tcmalloc/chromium/src/page_heap.h
index 63f21b2..52acedb 100644
--- a/third_party/tcmalloc/chromium/src/page_heap.h
+++ b/third_party/tcmalloc/chromium/src/page_heap.h
@@ -101,49 +101,21 @@ class PERFTOOLS_DLL_DECL PageHeap {
  public:
   PageHeap();
 
-  // Allocate a run of "n" pages.  Returns NULL if out of memory.
-  // Caller should not pass "n == 0" -- instead, n should have been
-  // rounded up already.  The span will be used for allocating objects
-  // with the specifled sizeclass sc (sc must be zero for large
-  // objects). The first page of the span will be aligned to the value
-  // specified by align, which must be a power of two.
-  Span* New(Length n, size_t sc, size_t align);
+  // Allocate a run of "n" pages.  Returns zero if out of memory.
+  // Caller should not pass "n == 0" -- instead, n should have
+  // been rounded up already.
+  Span* New(Length n);
 
   // Delete the span "[p, p+n-1]".
   // REQUIRES: span was returned by earlier call to New() and
   //           has not yet been deleted.
   void Delete(Span* span);
 
-  // Gets either the size class of addr, if it is a small object, or it's span.
-  // Return:
-  // if addr is invalid:
-  //   leave *out_sc and *out_span unchanged and return false;
-  // if addr is valid and has a small size class:
-  //   *out_sc = the size class
-  //   *out_span = <undefined>
-  //   return true
-  // if addr is valid and has a large size class:
-  //   *out_sc = kLargeSizeClass
-  //   *out_span = the span pointer
-  //   return true
-  bool GetSizeClassOrSpan(void* addr, size_t* out_sc, Span** out_span) {
-    const PageID p = reinterpret_cast<uintptr_t>(addr) >> kPageShift;
-    size_t cl = GetSizeClassIfCached(p);
-    Span* span = NULL;
-
-    if (cl != kLargeSizeClass) {
-      ASSERT(cl == GetDescriptor(p)->sizeclass);
-    } else {
-      span = GetDescriptor(p);
-      if (!span) {
-        return false;
-      }
-      cl = span->sizeclass;
-    }
-    *out_span = span;
-    *out_sc = cl;
-    return true;
-  }
+  // Mark an allocated span as being used for small objects of the
+  // specified size-class.
+  // REQUIRES: span was returned by an earlier call to New()
+  //           and has not yet been deleted.
+  void RegisterSizeClass(Span* span, size_t sc);
 
   // Split an allocated span into two spans: one of length "n" pages
   // followed by another span of length "span->length - n" pages.
@@ -151,29 +123,14 @@ class PERFTOOLS_DLL_DECL PageHeap {
   // Returns a pointer to the second span.
   //
   // REQUIRES: "0 < n < span->length"
-  // REQUIRES: a) the span is free or b) sizeclass == 0
+  // REQUIRES: span->location == IN_USE
+  // REQUIRES: span->sizeclass == 0
   Span* Split(Span* span, Length n);
 
   // Return the descriptor for the specified page.  Returns NULL if
   // this PageID was not allocated previously.
   inline Span* GetDescriptor(PageID p) const {
-    Span* ret = reinterpret_cast<Span*>(pagemap_.get(p));
-#ifndef NDEBUG
-    if (ret != NULL && ret->location == Span::IN_USE) {
-      size_t cl = GetSizeClassIfCached(p);
-      // Three cases:
-      //  - The object is not cached
-      //  - The object is cached correctly
-      //  - It is a large object and we're not looking at the first
-      //    page. This happens in coalescing.
-      ASSERT(cl == kLargeSizeClass || cl == ret->sizeclass ||
-             (ret->start != p && ret->sizeclass == kLargeSizeClass));
-      // If the object is sampled, it must have be kLargeSizeClass
-      ASSERT(ret->sizeclass == kLargeSizeClass || !ret->sample);
-    }
-#endif
-
-    return ret;
+    return reinterpret_cast<Span*>(pagemap_.get(p));
   }
 
   // Dump state to stderr
@@ -277,7 +234,7 @@ class PERFTOOLS_DLL_DECL PageHeap {
   // length exactly "n" and mark it as non-free so it can be returned
   // to the client.  After all that, decrease free_pages_ by n and
   // return span.
-  Span* Carve(Span* span, Length n, size_t sc, size_t align);
+  Span* Carve(Span* span, Length n);
 
   void RecordSpan(Span* span) {
     pagemap_.set(span->start, span);
@@ -288,7 +245,7 @@ class PERFTOOLS_DLL_DECL PageHeap {
 
   // Allocate a large span of length == n.  If successful, returns a
   // span of exactly the specified length.  Else, returns NULL.
-  Span* AllocLarge(Length n, size_t sc, size_t align);
+  Span* AllocLarge(Length n);
 
   // Coalesce span with neighboring spans if possible, prepend to
   // appropriate free list, and adjust stats.
diff --git a/third_party/tcmalloc/chromium/src/page_heap_allocator.h b/third_party/tcmalloc/chromium/src/page_heap_allocator.h
index 3f75939..20e1ab1 100644
--- a/third_party/tcmalloc/chromium/src/page_heap_allocator.h
+++ b/third_party/tcmalloc/chromium/src/page_heap_allocator.h
@@ -44,7 +44,7 @@ class PageHeapAllocator {
   // allocated and their constructors might not have run by the time some
   // other static variable tries to allocate memory.
   void Init() {
-    ASSERT(sizeof(T) <= kAllocIncrement);
+    ASSERT(kAlignedSize <= kAllocIncrement);
     inuse_ = 0;
     free_area_ = NULL;
     free_avail_ = 0;
@@ -60,9 +60,8 @@ class PageHeapAllocator {
       result = free_list_;
       free_list_ = *(reinterpret_cast<void**>(result));
     } else {
-      if (free_avail_ < sizeof(T)) {
-        // Need more room. We assume that MetaDataAlloc returns
-        // suitably aligned memory.
+      if (free_avail_ < kAlignedSize) {
+        // Need more room
         free_area_ = reinterpret_cast<char*>(MetaDataAlloc(kAllocIncrement));
         if (free_area_ == NULL) {
           CRASH("FATAL ERROR: Out of memory trying to allocate internal "
@@ -72,8 +71,8 @@ class PageHeapAllocator {
         free_avail_ = kAllocIncrement;
       }
       result = free_area_;
-      free_area_ += sizeof(T);
-      free_avail_ -= sizeof(T);
+      free_area_ += kAlignedSize;
+      free_avail_ -= kAlignedSize;
     }
     inuse_++;
     return reinterpret_cast<T*>(result);
@@ -91,6 +90,10 @@ class PageHeapAllocator {
   // How much to allocate from system at a time
   static const int kAllocIncrement = 128 << 10;
 
+  // Aligned size of T
+  static const size_t kAlignedSize
+  = (((sizeof(T) + kAlignment - 1) / kAlignment) * kAlignment);
+
   // Free area from which to carve new objects
   char* free_area_;
   size_t free_avail_;
diff --git a/third_party/tcmalloc/chromium/src/pprof b/third_party/tcmalloc/chromium/src/pprof
index 8aff380..fec0c9e 100755
--- a/third_party/tcmalloc/chromium/src/pprof
+++ b/third_party/tcmalloc/chromium/src/pprof
@@ -89,10 +89,11 @@ my %obj_tool_map = (
 );
 my $DOT = "dot";          # leave non-absolute, since it may be in /usr/local
 my $GV = "gv";
-my $KCACHEGRIND = "kcachegrind";
 my $PS2PDF = "ps2pdf";
 # These are used for dynamic profiles
-my $URL_FETCHER = "curl -s";
+my $WGET = "wget";
+my $WGET_FLAGS = "--no-http-keep-alive";   # only supported by some wgets
+my $CURL = "curl";
 
 # These are the web pages that servers need to support for dynamic profiles
 my $HEAP_PAGE = "/pprof/heap";
@@ -106,12 +107,6 @@ my $FILTEREDPROFILE_PAGE = "/pprof/filteredprofile(?:\\?.*)?";
 my $SYMBOL_PAGE = "/pprof/symbol";     # must support symbol lookup via POST
 my $PROGRAM_NAME_PAGE = "/pprof/cmdline";
 
-# These are the web pages that can be named on the command line.
-# All the alternatives must begin with /.
-my $PROFILES = "($HEAP_PAGE|$PROFILE_PAGE|$PMUPROFILE_PAGE|" .
-               "$GROWTH_PAGE|$CONTENTION_PAGE|$WALL_PAGE|" .
-               "$FILTEREDPROFILE_PAGE)";
-
 # default binary name
 my $UNKNOWN_BINARY = "(unknown)";
 
@@ -180,14 +175,12 @@ Output type:
    --text              Generate text report
    --callgrind         Generate callgrind format to stdout
    --gv                Generate Postscript and display
-   --web               Generate SVG and display
    --list=<regexp>     Generate source listing of matching routines
    --disasm=<regexp>   Generate disassembly of matching routines
    --symbols           Print demangled symbol names found at given addresses
    --dot               Generate DOT file to stdout
    --ps                Generate Postcript to stdout
    --pdf               Generate PDF to stdout
-   --svg               Generate SVG to stdout
    --gif               Generate GIF to stdout
    --raw               Generate symbolized pprof data (useful with remote fetch)
 
@@ -230,8 +223,6 @@ pprof /bin/ls ls.prof
                        Enters "interactive" mode
 pprof --text /bin/ls ls.prof
                        Outputs one line per procedure
-pprof --web /bin/ls ls.prof
-                       Displays annotated call-graph in web browser
 pprof --gv /bin/ls ls.prof
                        Displays annotated call-graph via 'gv'
 pprof --gv --focus=Mutex /bin/ls ls.prof
@@ -242,9 +233,6 @@ pprof --list=getdir /bin/ls ls.prof
                        (Per-line) annotated source listing for getdir()
 pprof --disasm=getdir /bin/ls ls.prof
                        (Per-PC) annotated disassembly for getdir()
-
-pprof http://localhost:1234/
-                       Enters "interactive" mode
 pprof --text localhost:1234
                        Outputs one line per procedure for localhost:1234
 pprof --raw localhost:1234 > ./local.raw
@@ -304,12 +292,10 @@ sub Init() {
   $main::opt_disasm = "";
   $main::opt_symbols = 0;
   $main::opt_gv = 0;
-  $main::opt_web = 0;
   $main::opt_dot = 0;
   $main::opt_ps = 0;
   $main::opt_pdf = 0;
   $main::opt_gif = 0;
-  $main::opt_svg = 0;
   $main::opt_raw = 0;
 
   $main::opt_nodecount = 80;
@@ -344,16 +330,13 @@ sub Init() {
   # Are we using $SYMBOL_PAGE?
   $main::use_symbol_page = 0;
 
-  # Files returned by TempName.
-  %main::tempnames = ();
-
   # Type of profile we are dealing with
   # Supported types:
-  #     cpu
-  #     heap
-  #     growth
-  #     contention
-  $main::profile_type = '';     # Empty type means "unknown"
+  #	cpu
+  #	heap
+  #	growth
+  #	contention
+  $main::profile_type = '';	# Empty type means "unknown"
 
   GetOptions("help!"          => \$main::opt_help,
              "version!"       => \$main::opt_version,
@@ -372,11 +355,9 @@ sub Init() {
              "disasm=s"       => \$main::opt_disasm,
              "symbols!"       => \$main::opt_symbols,
              "gv!"            => \$main::opt_gv,
-             "web!"           => \$main::opt_web,
              "dot!"           => \$main::opt_dot,
              "ps!"            => \$main::opt_ps,
              "pdf!"           => \$main::opt_pdf,
-             "svg!"           => \$main::opt_svg,
              "gif!"           => \$main::opt_gif,
              "raw!"           => \$main::opt_raw,
              "interactive!"   => \$main::opt_interactive,
@@ -399,8 +380,8 @@ sub Init() {
              "tools=s"        => \$main::opt_tools,
              "test!"          => \$main::opt_test,
              "debug!"         => \$main::opt_debug,
-             # Undocumented flags used only by unittests:
-             "test_stride=i"  => \$main::opt_test_stride,
+	     # Undocumented flags used only by unittests:
+	     "test_stride=i"  => \$main::opt_test_stride,
       ) || usage("Invalid option(s)");
 
   # Deal with the standard --help and --version
@@ -452,11 +433,9 @@ sub Init() {
       ($main::opt_disasm eq '' ? 0 : 1) +
       ($main::opt_symbols == 0 ? 0 : 1) +
       $main::opt_gv +
-      $main::opt_web +
       $main::opt_dot +
       $main::opt_ps +
       $main::opt_pdf +
-      $main::opt_svg +
       $main::opt_gif +
       $main::opt_raw +
       $main::opt_interactive +
@@ -531,6 +510,20 @@ sub Init() {
     ConfigureObjTools($main::prog)
   }
 
+  # Check what flags our commandline utilities support
+  if (open(TFILE, "$WGET $WGET_FLAGS -V 2>&1 |")) {
+    my @lines = <TFILE>;
+    if (grep(/unrecognized/, @lines) > 0) {
+      # grep found 'unrecognized' token from WGET, clear WGET flags
+      $WGET_FLAGS = "";
+    }
+    close(TFILE);
+  }
+  # TODO(csilvers): check all the other binaries and objtools to see
+  # if they are installed and what flags they support, and store that
+  # in a data structure here, rather than scattering these tests about.
+  # Then, ideally, rewrite code to use wget OR curl OR GET or ...
+
   # Break the opt_list_prefix into the prefix_list array
   @prefix_list = split (',', $main::opt_lib_prefix);
 
@@ -641,24 +634,9 @@ sub Main() {
     } else {
       if (PrintDot($main::prog, $symbols, $profile, $flat, $cumulative, $total)) {
         if ($main::opt_gv) {
-          RunGV(TempName($main::next_tmpfile, "ps"), "");
-        } elsif ($main::opt_web) {
-          my $tmp = TempName($main::next_tmpfile, "svg");
-          RunWeb($tmp);
-          # The command we run might hand the file name off
-          # to an already running browser instance and then exit.
-          # Normally, we'd remove $tmp on exit (right now),
-          # but fork a child to remove $tmp a little later, so that the
-          # browser has time to load it first.
-          delete $main::tempnames{$tmp};
-          if (fork() == 0) {
-            sleep 5;
-            unlink($tmp);
-            exit(0);
-          }
+	  RunGV(PsTempName($main::next_tmpfile), "");
         }
       } else {
-        cleanup();
         exit(1);
       }
     }
@@ -689,7 +667,7 @@ sub ReadlineMightFail {
 
 sub RunGV {
   my $fname = shift;
-  my $bg = shift;       # "" or " &" if we should run in background
+  my $bg = shift;	# "" or " &" if we should run in background
   if (!system("$GV --version >/dev/null 2>&1")) {
     # Options using double dash are supported by this gv version.
     # Also, turn on noantialias to better handle bug in gv for
@@ -704,41 +682,6 @@ sub RunGV {
   }
 }
 
-sub RunWeb {
-  my $fname = shift;
-  print STDERR "Loading web page file:///$fname\n";
-
-  if (`uname` =~ /Darwin/) {
-    # OS X: open will use standard preference for SVG files.
-    system("/usr/bin/open", $fname);
-    return;
-  }
-
-  # Some kind of Unix; try generic symlinks, then specific browsers.
-  # (Stop once we find one.)
-  # Works best if the browser is already running.
-  my @alt = (
-    "/etc/alternatives/gnome-www-browser",
-    "/etc/alternatives/x-www-browser",
-    "google-chrome",
-    "firefox",
-  );
-  foreach my $b (@alt) {
-    if (system($b, $fname) == 0) {
-      return;
-    }
-  }
-
-  print STDERR "Could not load web browser.\n";
-}
-
-sub RunKcachegrind {
-  my $fname = shift;
-  my $bg = shift;       # "" or " &" if we should run in background
-  print STDERR "Starting '$KCACHEGRIND " . $fname . $bg . "'\n";
-  system("$KCACHEGRIND " . $fname . $bg);
-}
-
 
 ##### Interactive helper routines #####
 
@@ -746,11 +689,10 @@ sub InteractiveMode {
   $| = 1;  # Make output unbuffered for interactive mode
   my ($orig_profile, $symbols, $libs, $total) = @_;
 
-  print STDERR "Welcome to pprof!  For help, type 'help'.\n";
+  print "Welcome to pprof!  For help, type 'help'.\n";
 
-  # Use ReadLine if it's installed and input comes from a console.
-  if ( -t STDIN &&
-       !ReadlineMightFail() &&
+  # Use ReadLine if it's installed.
+  if ( !ReadlineMightFail() &&
        defined(eval {require Term::ReadLine}) ) {
     my $term = new Term::ReadLine 'pprof';
     while ( defined ($_ = $term->readline('(pprof) '))) {
@@ -761,7 +703,7 @@ sub InteractiveMode {
     }
   } else {       # don't have readline
     while (1) {
-      print STDERR "(pprof) ";
+      print "(pprof) ";
       $_ = <STDIN>;
       last if ! defined $_ ;
       s/\r//g;         # turn windows-looking lines into unix-looking lines
@@ -785,13 +727,13 @@ sub InteractiveCommand {
   my($orig_profile, $symbols, $libs, $total, $command) = @_;
   $_ = $command;                # just to make future m//'s easier
   if (!defined($_)) {
-    print STDERR "\n";
+    print "\n";
     return 0;
   }
-  if (m/^\s*quit/) {
+  if (m/^ *quit/) {
     return 0;
   }
-  if (m/^\s*help/) {
+  if (m/^ *help/) {
     InteractiveHelpMessage();
     return 1;
   }
@@ -803,7 +745,7 @@ sub InteractiveCommand {
   $main::opt_gv = 0;
   $main::opt_cum = 0;
 
-  if (m/^\s*(text|top)(\d*)\s*(.*)/) {
+  if (m/^ *(text|top)(\d*) *(.*)/) {
     $main::opt_text = 1;
 
     my $line_limit = ($2 ne "") ? int($2) : 10;
@@ -822,24 +764,7 @@ sub InteractiveCommand {
     PrintText($symbols, $flat, $cumulative, $total, $line_limit);
     return 1;
   }
-  if (m/^\s*callgrind\s*([^ \n]*)/) {
-    $main::opt_callgrind = 1;
-
-    # Get derived profiles
-    my $calls = ExtractCalls($symbols, $orig_profile);
-    my $filename = $1;
-    if ( $1 eq '' ) {
-      $filename = TempName($main::next_tmpfile, "callgrind");
-    }
-    PrintCallgrind($calls, $filename);
-    if ( $1 eq '' ) {
-      RunKcachegrind($filename, " & ");
-      $main::next_tmpfile++;
-    }
-
-    return 1;
-  }
-  if (m/^\s*list\s*(.+)/) {
+  if (m/^ *list *(.+)/) {
     $main::opt_list = 1;
 
     my $routine;
@@ -856,7 +781,7 @@ sub InteractiveCommand {
     PrintListing($libs, $flat, $cumulative, $routine);
     return 1;
   }
-  if (m/^\s*disasm\s*(.+)/) {
+  if (m/^ *disasm *(.+)/) {
     $main::opt_disasm = 1;
 
     my $routine;
@@ -874,18 +799,12 @@ sub InteractiveCommand {
     PrintDisassembly($libs, $flat, $cumulative, $routine, $total);
     return 1;
   }
-  if (m/^\s*(gv|web)\s*(.*)/) {
-    $main::opt_gv = 0;
-    $main::opt_web = 0;
-    if ($1 eq "gv") {
-      $main::opt_gv = 1;
-    } elsif ($1 eq "web") {
-      $main::opt_web = 1;
-    }
+  if (m/^ *gv *(.*)/) {
+    $main::opt_gv = 1;
 
     my $focus;
     my $ignore;
-    ($focus, $ignore) = ParseInteractiveArgs($2);
+    ($focus, $ignore) = ParseInteractiveArgs($1);
 
     # Process current profile to account for various settings
     my $profile = ProcessProfile($orig_profile, $symbols, $focus, $ignore);
@@ -896,19 +815,11 @@ sub InteractiveCommand {
     my $cumulative = CumulativeProfile($reduced);
 
     if (PrintDot($main::prog, $symbols, $profile, $flat, $cumulative, $total)) {
-      if ($main::opt_gv) {
-        RunGV(TempName($main::next_tmpfile, "ps"), " &");
-      } elsif ($main::opt_web) {
-        RunWeb(TempName($main::next_tmpfile, "svg"));
-      }
+      RunGV(PsTempName($main::next_tmpfile), " &");
       $main::next_tmpfile++;
     }
     return 1;
   }
-  if (m/^\s*$/) {
-    return 1;
-  }
-  print STDERR "Unknown command: try 'help'.\n";
   return 1;
 }
 
@@ -945,7 +856,7 @@ sub ProcessProfile {
 }
 
 sub InteractiveHelpMessage {
-  print STDERR <<ENDOFHELP;
+  print <<ENDOFHELP;
 Interactive pprof mode
 
 Commands:
@@ -957,14 +868,6 @@ Commands:
       the "focus" regular expression matches a routine name on the stack
       trace.
 
-  web
-  web [focus] [-ignore1] [-ignore2]
-      Like GV, but displays profile in your web browser instead of using
-      Ghostview. Works best if your web browser is already running.
-      To change the browser that gets used:
-      On Linux, set the /etc/alternatives/gnome-www-browser symlink.
-      On OS X, change the Finder association for SVG files.
-
   list [routine_regexp] [-ignore1] [-ignore2]
       Show source listing of routines whose names match "routine_regexp"
 
@@ -979,10 +882,6 @@ Commands:
       Show disassembly of routines whose names match "routine_regexp",
       annotated with sample counts.
 
-  callgrind
-  callgrind [filename]
-      Generates callgrind file. If no filename is given, kcachegrind is called.
-
   help - This listing
   quit or ^D - End pprof
 
@@ -1014,19 +913,16 @@ sub ParseInteractiveArgs {
     }
   }
   if ($ignore ne "") {
-    print STDERR "Ignoring samples in call stacks that match '$ignore'\n";
+    print "Ignoring samples in call stacks that match '$ignore'\n";
   }
   return ($focus, $ignore);
 }
 
 ##### Output code #####
 
-sub TempName {
+sub PsTempName {
   my $fnum = shift;
-  my $ext = shift;
-  my $file = "$main::tmpfile_ps.$fnum.$ext";
-  $main::tempnames{$file} = 1;
-  return $file;
+  return "$main::tmpfile_ps" . "." . "$fnum" . ".ps";
 }
 
 # Print profile data in packed binary format (64-bit) to standard out
@@ -1149,15 +1045,7 @@ sub PrintText {
 # Print the call graph in a way that's suiteable for callgrind.
 sub PrintCallgrind {
   my $calls = shift;
-  my $filename;
-  if ($main::opt_interactive) {
-    $filename = shift;
-    print STDERR "Writing callgrind file to '$filename'.\n"
-  } else {
-    $filename = "&STDOUT";
-  }
-  open(CG, ">".$filename );
-  printf CG ("events: Hits\n\n");
+  printf("events: Hits\n\n");
   foreach my $call ( map { $_->[0] }
                      sort { $a->[1] cmp $b ->[1] ||
                             $a->[2] <=> $b->[2] }
@@ -1169,15 +1057,13 @@ sub PrintCallgrind {
     my ( $caller_file, $caller_line, $caller_function,
          $callee_file, $callee_line, $callee_function ) =
        ( $1, $2, $3, $5, $6, $7 );
-
-      
-    printf CG ("fl=$caller_file\nfn=$caller_function\n");
+    printf("fl=$caller_file\nfn=$caller_function\n");
     if (defined $6) {
-      printf CG ("cfl=$callee_file\n");
-      printf CG ("cfn=$callee_function\n");
-      printf CG ("calls=$count $callee_line\n");
+      printf("cfl=$callee_file\n");
+      printf("cfn=$callee_function\n");
+      printf("calls=$count $callee_line\n");
     }
-    printf CG ("$caller_line $count\n\n");
+    printf("$caller_line $count\n\n");
   }
 }
 
@@ -1499,7 +1385,7 @@ sub SourceLine {
       return undef;
     }
     my $lines = [];
-    push(@{$lines}, "");        # So we can use 1-based line numbers as indices
+    push(@{$lines}, "");	# So we can use 1-based line numbers as indices
     while (<FILE>) {
       push(@{$lines}, $_);
     }
@@ -1591,8 +1477,8 @@ sub PrintDisassembledFunction {
     # Find run of instructions for this range of source lines
     my $first_inst = $i;
     while (($i <= $#instructions) &&
-           ($instructions[$i]->[2] >= $first_line) &&
-           ($instructions[$i]->[2] <= $last_line)) {
+	   ($instructions[$i]->[2] >= $first_line) &&
+	   ($instructions[$i]->[2] <= $last_line)) {
       $e = $instructions[$i];
       $flat_sum{$e->[2]} += $flat_count[$i];
       $cum_sum{$e->[2]} += $cum_count[$i];
@@ -1604,16 +1490,16 @@ sub PrintDisassembledFunction {
     for (my $l = $first_line; $l <= $last_line; $l++) {
       my $line = SourceLine($current_file, $l);
       if (!defined($line)) {
-        $line = "?\n";
+	$line = "?\n";
         next;
       } else {
         $line =~ s/^\s+//;
       }
       printf("%6s %6s %5d: %s",
-             UnparseAlt($flat_sum{$l}),
-             UnparseAlt($cum_sum{$l}),
-             $l,
-             $line);
+	     UnparseAlt($flat_sum{$l}),
+	     UnparseAlt($cum_sum{$l}),
+	     $l,
+	     $line);
     }
 
     # Print disassembly
@@ -1630,9 +1516,9 @@ sub PrintDisassembledFunction {
       while ($d =~ s/(\w+)<[^<>]*>/$1/g)  { }       # Remove template arguments
 
       printf("%6s %6s    %8s: %6s\n",
-             UnparseAlt($flat_count[$x]),
-             UnparseAlt($cum_count[$x]),
-             $address,
+	     UnparseAlt($flat_count[$x]),
+	     UnparseAlt($cum_count[$x]),
+	     $address,
              $d);
     }
   }
@@ -1656,7 +1542,7 @@ sub PrintDot {
   # Find nodes to include
   my @list = (sort { abs(GetEntry($cumulative, $b)) <=>
                      abs(GetEntry($cumulative, $a))
-                     || $a cmp $b }
+		     || $a cmp $b }
               keys(%{$cumulative}));
   my $last = $nodecount - 1;
   if ($last > $#list) {
@@ -1668,6 +1554,7 @@ sub PrintDot {
   }
   if ($last < 0) {
     print STDERR "No nodes to print\n";
+    cleanup();
     return 0;
   }
 
@@ -1680,14 +1567,11 @@ sub PrintDot {
   # Open DOT output file
   my $output;
   if ($main::opt_gv) {
-    $output = "| $DOT -Tps2 >" . TempName($main::next_tmpfile, "ps");
+    $output = "| $DOT -Tps2 >" . PsTempName($main::next_tmpfile);
   } elsif ($main::opt_ps) {
     $output = "| $DOT -Tps2";
   } elsif ($main::opt_pdf) {
     $output = "| $DOT -Tps2 | $PS2PDF - -";
-  } elsif ($main::opt_web || $main::opt_svg) {
-    # We need to post-process the SVG, so write to a temporary file always.
-    $output = "| $DOT -Tsvg >" . TempName($main::next_tmpfile, "svg");
   } elsif ($main::opt_gif) {
     $output = "| $DOT -Tgif";
   } else {
@@ -1798,10 +1682,7 @@ sub PrintDot {
       my $fraction = abs($local_total ? (3 * ($n / $local_total)) : 0);
       if ($fraction > 1) { $fraction = 1; }
       my $w = $fraction * 2;
-      if ($w < 1 && ($main::opt_web || $main::opt_svg)) {
-        # SVG output treats line widths < 1 poorly.
-        $w = 1;
-      }
+      #if ($w < 1) { $w = 1; }
 
       # Dot sometimes segfaults if given edge weights that are too large, so
       # we cap the weights at a large value
@@ -1825,312 +1706,11 @@ sub PrintDot {
   }
 
   print DOT ("}\n");
-  close(DOT);
-
-  if ($main::opt_web || $main::opt_svg) {
-    # Rewrite SVG to be more usable inside web browser.
-    RewriteSvg(TempName($main::next_tmpfile, "svg"));
-  }
 
+  close(DOT);
   return 1;
 }
 
-sub RewriteSvg {
-  my $svgfile = shift;
-
-  open(SVG, $svgfile) || die "open temp svg: $!";
-  my @svg = <SVG>;
-  close(SVG);
-  unlink $svgfile;
-  my $svg = join('', @svg);
-
-  # Dot's SVG output is
-  #
-  #    <svg width="___" height="___"
-  #     viewBox="___" xmlns=...>
-  #    <g id="graph0" transform="...">
-  #    ...
-  #    </g>
-  #    </svg>
-  #
-  # Change it to
-  #
-  #    <svg width="100%" height="100%"
-  #     xmlns=...>
-  #    $svg_javascript
-  #    <g id="viewport" transform="translate(0,0)">
-  #    <g id="graph0" transform="...">
-  #    ...
-  #    </g>
-  #    </g>
-  #    </svg>
-
-  # Fix width, height; drop viewBox.
-  $svg =~ s/(?s)<svg width="[^"]+" height="[^"]+"(.*?)viewBox="[^"]+"/<svg width="100%" height="100%"$1/;
-
-  # Insert script, viewport <g> above first <g>
-  my $svg_javascript = SvgJavascript();
-  my $viewport = "<g id=\"viewport\" transform=\"translate(0,0)\">\n";
-  $svg =~ s/<g id="graph\d"/$svg_javascript$viewport$&/;
-
-  # Insert final </g> above </svg>.
-  $svg =~ s/(.*)(<\/svg>)/$1<\/g>$2/;
-  $svg =~ s/<g id="graph\d"(.*?)/<g id="viewport"$1/;
-
-  if ($main::opt_svg) {
-    # --svg: write to standard output.
-    print $svg;
-  } else {
-    # Write back to temporary file.
-    open(SVG, ">$svgfile") || die "open $svgfile: $!";
-    print SVG $svg;
-    close(SVG);
-  }
-}
-
-sub SvgJavascript {
-  return <<'EOF';
-<script type="text/ecmascript"><![CDATA[
-// SVGPan
-// http://www.cyberz.org/blog/2009/12/08/svgpan-a-javascript-svg-panzoomdrag-library/
-// Local modification: if(true || ...) below to force panning, never moving.
-
-/**
- *  SVGPan library 1.2
- * ====================
- *
- * Given an unique existing element with id "viewport", including the
- * the library into any SVG adds the following capabilities:
- *
- *  - Mouse panning
- *  - Mouse zooming (using the wheel)
- *  - Object dargging
- *
- * Known issues:
- *
- *  - Zooming (while panning) on Safari has still some issues
- *
- * Releases:
- *
- * 1.2, Sat Mar 20 08:42:50 GMT 2010, Zeng Xiaohui
- *	Fixed a bug with browser mouse handler interaction
- *
- * 1.1, Wed Feb  3 17:39:33 GMT 2010, Zeng Xiaohui
- *	Updated the zoom code to support the mouse wheel on Safari/Chrome
- *
- * 1.0, Andrea Leofreddi
- *	First release
- *
- * This code is licensed under the following BSD license:
- *
- * Copyright 2009-2010 Andrea Leofreddi <a.leofreddi@itcharm.com>. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without modification, are
- * permitted provided that the following conditions are met:
- *
- *    1. Redistributions of source code must retain the above copyright notice, this list of
- *       conditions and the following disclaimer.
- *
- *    2. Redistributions in binary form must reproduce the above copyright notice, this list
- *       of conditions and the following disclaimer in the documentation and/or other materials
- *       provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY Andrea Leofreddi ``AS IS'' AND ANY EXPRESS OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
- * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL Andrea Leofreddi OR
- * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
- * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
- * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
- * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
- * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * The views and conclusions contained in the software and documentation are those of the
- * authors and should not be interpreted as representing official policies, either expressed
- * or implied, of Andrea Leofreddi.
- */
-
-var root = document.documentElement;
-
-var state = 'none', stateTarget, stateOrigin, stateTf;
-
-setupHandlers(root);
-
-/**
- * Register handlers
- */
-function setupHandlers(root){
-	setAttributes(root, {
-		"onmouseup" : "add(evt)",
-		"onmousedown" : "handleMouseDown(evt)",
-		"onmousemove" : "handleMouseMove(evt)",
-		"onmouseup" : "handleMouseUp(evt)",
-		//"onmouseout" : "handleMouseUp(evt)", // Decomment this to stop the pan functionality when dragging out of the SVG element
-	});
-
-	if(navigator.userAgent.toLowerCase().indexOf('webkit') >= 0)
-		window.addEventListener('mousewheel', handleMouseWheel, false); // Chrome/Safari
-	else
-		window.addEventListener('DOMMouseScroll', handleMouseWheel, false); // Others
-
-	var g = svgDoc.getElementById("svg");
-	g.width = "100%";
-	g.height = "100%";
-}
-
-/**
- * Instance an SVGPoint object with given event coordinates.
- */
-function getEventPoint(evt) {
-	var p = root.createSVGPoint();
-
-	p.x = evt.clientX;
-	p.y = evt.clientY;
-
-	return p;
-}
-
-/**
- * Sets the current transform matrix of an element.
- */
-function setCTM(element, matrix) {
-	var s = "matrix(" + matrix.a + "," + matrix.b + "," + matrix.c + "," + matrix.d + "," + matrix.e + "," + matrix.f + ")";
-
-	element.setAttribute("transform", s);
-}
-
-/**
- * Dumps a matrix to a string (useful for debug).
- */
-function dumpMatrix(matrix) {
-	var s = "[ " + matrix.a + ", " + matrix.c + ", " + matrix.e + "\n  " + matrix.b + ", " + matrix.d + ", " + matrix.f + "\n  0, 0, 1 ]";
-
-	return s;
-}
-
-/**
- * Sets attributes of an element.
- */
-function setAttributes(element, attributes){
-	for (i in attributes)
-		element.setAttributeNS(null, i, attributes[i]);
-}
-
-/**
- * Handle mouse move event.
- */
-function handleMouseWheel(evt) {
-	if(evt.preventDefault)
-		evt.preventDefault();
-
-	evt.returnValue = false;
-
-	var svgDoc = evt.target.ownerDocument;
-
-	var delta;
-
-	if(evt.wheelDelta)
-		delta = evt.wheelDelta / 3600; // Chrome/Safari
-	else
-		delta = evt.detail / -90; // Mozilla
-
-	var z = 1 + delta; // Zoom factor: 0.9/1.1
-
-	var g = svgDoc.getElementById("viewport");
-
-	var p = getEventPoint(evt);
-
-	p = p.matrixTransform(g.getCTM().inverse());
-
-	// Compute new scale matrix in current mouse position
-	var k = root.createSVGMatrix().translate(p.x, p.y).scale(z).translate(-p.x, -p.y);
-
-        setCTM(g, g.getCTM().multiply(k));
-
-	stateTf = stateTf.multiply(k.inverse());
-}
-
-/**
- * Handle mouse move event.
- */
-function handleMouseMove(evt) {
-	if(evt.preventDefault)
-		evt.preventDefault();
-
-	evt.returnValue = false;
-
-	var svgDoc = evt.target.ownerDocument;
-
-	var g = svgDoc.getElementById("viewport");
-
-	if(state == 'pan') {
-		// Pan mode
-		var p = getEventPoint(evt).matrixTransform(stateTf);
-
-		setCTM(g, stateTf.inverse().translate(p.x - stateOrigin.x, p.y - stateOrigin.y));
-	} else if(state == 'move') {
-		// Move mode
-		var p = getEventPoint(evt).matrixTransform(g.getCTM().inverse());
-
-		setCTM(stateTarget, root.createSVGMatrix().translate(p.x - stateOrigin.x, p.y - stateOrigin.y).multiply(g.getCTM().inverse()).multiply(stateTarget.getCTM()));
-
-		stateOrigin = p;
-	}
-}
-
-/**
- * Handle click event.
- */
-function handleMouseDown(evt) {
-	if(evt.preventDefault)
-		evt.preventDefault();
-
-	evt.returnValue = false;
-
-	var svgDoc = evt.target.ownerDocument;
-
-	var g = svgDoc.getElementById("viewport");
-
-	if(true || evt.target.tagName == "svg") {
-		// Pan mode
-		state = 'pan';
-
-		stateTf = g.getCTM().inverse();
-
-		stateOrigin = getEventPoint(evt).matrixTransform(stateTf);
-	} else {
-		// Move mode
-		state = 'move';
-
-		stateTarget = evt.target;
-
-		stateTf = g.getCTM().inverse();
-
-		stateOrigin = getEventPoint(evt).matrixTransform(stateTf);
-	}
-}
-
-/**
- * Handle mouse button release event.
- */
-function handleMouseUp(evt) {
-	if(evt.preventDefault)
-		evt.preventDefault();
-
-	evt.returnValue = false;
-
-	var svgDoc = evt.target.ownerDocument;
-
-	if(state == 'pan' || state == 'move') {
-		// Quit pan mode
-		state = '';
-	}
-}
-
-]]></script>
-EOF
-}
-
 # Translate a stack of addresses into a stack of symbols
 sub TranslateStack {
   my $symbols = shift;
@@ -2226,7 +1806,7 @@ sub Unparse {
       }
     }
   } elsif ($main::profile_type eq 'contention' && !$main::opt_contentions) {
-    return sprintf("%.3f", $num / 1e9); # Convert nanoseconds to seconds
+    return sprintf("%.3f", $num / 1e9);	# Convert nanoseconds to seconds
   } else {
     return sprintf("%d", $num);
   }
@@ -2367,42 +1947,42 @@ sub RemoveUninterestingFrames {
                       'malloc',
                       'free',
                       'memalign',
-                      'posix_memalign',
+		      'posix_memalign',
                       'pvalloc',
                       'valloc',
                       'realloc',
-                      'tc_calloc',
+		      'tc_calloc',
                       'tc_cfree',
                       'tc_malloc',
                       'tc_free',
                       'tc_memalign',
-                      'tc_posix_memalign',
+		      'tc_posix_memalign',
                       'tc_pvalloc',
                       'tc_valloc',
                       'tc_realloc',
-                      'tc_new',
-                      'tc_delete',
-                      'tc_newarray',
-                      'tc_deletearray',
-                      'tc_new_nothrow',
-                      'tc_newarray_nothrow',
-                      'do_malloc',
+		      'tc_new',
+		      'tc_delete',
+		      'tc_newarray',
+		      'tc_deletearray',
+		      'tc_new_nothrow',
+		      'tc_newarray_nothrow',
+		      'do_malloc',
                       '::do_malloc',   # new name -- got moved to an unnamed ns
                       '::do_malloc_or_cpp_alloc',
                       'DoSampledAllocation',
-                      'simple_alloc::allocate',
-                      '__malloc_alloc_template::allocate',
+		      'simple_alloc::allocate',
+		      '__malloc_alloc_template::allocate',
                       '__builtin_delete',
                       '__builtin_new',
                       '__builtin_vec_delete',
                       '__builtin_vec_new',
                       'operator new',
                       'operator new[]',
-                      # These mark the beginning/end of our custom sections
-                      '__start_google_malloc',
-                      '__stop_google_malloc',
-                      '__start_malloc_hook',
-                      '__stop_malloc_hook') {
+		      # These mark the beginning/end of our custom sections
+		      '__start_google_malloc',
+		      '__stop_google_malloc',
+		      '__start_malloc_hook',
+		      '__stop_malloc_hook') {
       $skip{$name} = 1;
       $skip{"_" . $name} = 1;   # Mach (OS X) adds a _ prefix to everything
     }
@@ -2419,11 +1999,11 @@ sub RemoveUninterestingFrames {
     # TODO(dpeng): this should not be necessary; it's taken
     # care of by the general 2nd-pc mechanism below.
     foreach my $name ('ProfileData::Add',           # historical
-                      'ProfileData::prof_handler',  # historical
-                      'CpuProfiler::prof_handler',
+		      'ProfileData::prof_handler',  # historical
+		      'CpuProfiler::prof_handler',
                       '__FRAME_END__',
-                      '__pthread_sighandler',
-                      '__restore') {
+		      '__pthread_sighandler',
+		      '__restore') {
       $skip{$name} = 1;
     }
   } else {
@@ -2462,10 +2042,10 @@ sub RemoveUninterestingFrames {
     my @path = ();
     foreach my $a (@addrs) {
       if (exists($symbols->{$a})) {
-        my $func = $symbols->{$a}->[0];
-        if ($skip{$func} || ($func =~ m/$skip_regexp/)) {
-          next;
-        }
+	my $func = $symbols->{$a}->[0];
+	if ($skip{$func} || ($func =~ m/$skip_regexp/)) {
+	  next;
+	}
       }
       push(@path, $a);
     }
@@ -2490,8 +2070,8 @@ sub ReduceProfile {
       # To avoid double-counting due to recursion, skip a stack-trace
       # entry if it has already been seen
       if (!$seen{$e}) {
-        $seen{$e} = 1;
-        push(@path, $e);
+	$seen{$e} = 1;
+	push(@path, $e);
       }
     }
     my $reduced_path = join("\n", @path);
@@ -2685,11 +2265,28 @@ sub AddEntries {
   AddEntry($profile, (join "\n", @k), $count);
 }
 
+sub IsSymbolizedProfileFile {
+  my $file_name = shift;
+
+  if (!(-e $file_name) || !(-r $file_name)) {
+    return 0;
+  }
+
+  $SYMBOL_PAGE =~ m,[^/]+$,;    # matches everything after the last slash
+  my $symbol_marker = $&;
+  # Check if the file contains a symbol-section marker.
+  open(TFILE, "<$file_name");
+  my @lines = <TFILE>;
+  my $result = grep(/^--- *$symbol_marker/, @lines);
+  close(TFILE);
+  return $result > 0;
+}
+
 ##### Code to profile a server dynamically #####
 
 sub CheckSymbolPage {
   my $url = SymbolPageURL();
-  open(SYMBOL, "$URL_FETCHER '$url' |");
+  open(SYMBOL, "$WGET $WGET_FLAGS -qO- '$url' |");
   my $line = <SYMBOL>;
   $line =~ s/\r//g;         # turn windows-looking lines into unix-looking lines
   close(SYMBOL);
@@ -2708,45 +2305,33 @@ sub CheckSymbolPage {
 
 sub IsProfileURL {
   my $profile_name = shift;
-  if (-f $profile_name) {
-    printf STDERR "Using local file $profile_name.\n";
-    return 0;
-  }
-  return 1;
+  my ($host, $port, $path) = ParseProfileURL($profile_name);
+  return defined($host) and defined($port) and defined($path);
 }
 
 sub ParseProfileURL {
   my $profile_name = shift;
-
-  if (!defined($profile_name) || $profile_name eq "") {
-    return ();
-  }
-
-  # Split profile URL - matches all non-empty strings, so no test.
-  $profile_name =~ m,^(https?://)?([^/]+)(.*?)(/|$PROFILES)?$,;
-
-  my $proto = $1 || "http://";
-  my $hostport = $2;
-  my $prefix = $3;
-  my $profile = $4 || "/";
-
-  my $host = $hostport;
-  $host =~ s/:.*//;
-
-  my $baseurl = "$proto$hostport$prefix";
-  return ($host, $baseurl, $profile);
+  if (defined($profile_name) &&
+      $profile_name =~ m,^(http://|)([^/:]+):(\d+)(|\@\d+)(|/|.*($PROFILE_PAGE|$PMUPROFILE_PAGE|$HEAP_PAGE|$GROWTH_PAGE|$CONTENTION_PAGE|$WALL_PAGE|$FILTEREDPROFILE_PAGE))$,o) {
+    # $6 is $PROFILE_PAGE/$HEAP_PAGE/etc.  $5 is *everything* after
+    # the hostname, as long as that everything is the empty string,
+    # a slash, or something ending in $PROFILE_PAGE/$HEAP_PAGE/etc.
+    # So "$6 || $5" is $PROFILE_PAGE/etc if there, or else it's "/" or "".
+    return ($2, $3, $6 || $5);
+  }
+  return ();
 }
 
 # We fetch symbols from the first profile argument.
 sub SymbolPageURL {
-  my ($host, $baseURL, $path) = ParseProfileURL($main::pfile_args[0]);
-  return "$baseURL$SYMBOL_PAGE";
+  my ($host, $port, $path) = ParseProfileURL($main::pfile_args[0]);
+  return "http://$host:$port$SYMBOL_PAGE";
 }
 
 sub FetchProgramName() {
-  my ($host, $baseURL, $path) = ParseProfileURL($main::pfile_args[0]);
-  my $url = "$baseURL$PROGRAM_NAME_PAGE";
-  my $command_line = "$URL_FETCHER '$url'";
+  my ($host, $port, $path) = ParseProfileURL($main::pfile_args[0]);
+  my $url = "http://$host:$port$PROGRAM_NAME_PAGE";
+  my $command_line = "$WGET $WGET_FLAGS -qO- '$url'";
   open(CMDLINE, "$command_line |") or error($command_line);
   my $cmdline = <CMDLINE>;
   $cmdline =~ s/\r//g;   # turn windows-looking lines into unix-looking lines
@@ -2763,7 +2348,7 @@ sub FetchProgramName() {
 # curl.  Redirection happens on borg hosts.
 sub ResolveRedirectionForCurl {
   my $url = shift;
-  my $command_line = "$URL_FETCHER --head '$url'";
+  my $command_line = "$CURL -s --head '$url'";
   open(CMDLINE, "$command_line |") or error($command_line);
   while (<CMDLINE>) {
     s/\r//g;         # turn windows-looking lines into unix-looking lines
@@ -2775,20 +2360,6 @@ sub ResolveRedirectionForCurl {
   return $url;
 }
 
-# Add a timeout flat to URL_FETCHER
-sub AddFetchTimeout {
-  my $fetcher = shift;
-  my $timeout = shift;
-  if (defined($timeout)) {
-    if ($fetcher =~ m/\bcurl -s/) {
-      $fetcher .= sprintf(" --max-time %d", $timeout);
-    } elsif ($fetcher =~ m/\brpcget\b/) {
-      $fetcher .= sprintf(" --deadline=%d", $timeout);
-    }
-  }
-  return $fetcher;
-}
-
 # Reads a symbol map from the file handle name given as $1, returning
 # the resulting symbol map.  Also processes variables relating to symbols.
 # Currently, the only variable processed is 'binary=<value>' which updates
@@ -2833,6 +2404,7 @@ sub FetchSymbols {
   my $pcset = shift;
   my $symbol_map = shift;
 
+
   my %seen = ();
   my @pcs = grep { !$seen{$_}++ } keys(%$pcset);  # uniq
 
@@ -2842,16 +2414,12 @@ sub FetchSymbols {
     open(POSTFILE, ">$main::tmpfile_sym");
     print POSTFILE $post_data;
     close(POSTFILE);
-
+ 
     my $url = SymbolPageURL();
-
-    my $command_line;
-    if ($URL_FETCHER =~ m/\bcurl -s/) {
-      $url = ResolveRedirectionForCurl($url);
-      $command_line = "$URL_FETCHER -d '\@$main::tmpfile_sym' '$url'";
-    } else {
-      $command_line = "$URL_FETCHER --post '$url' < '$main::tmpfile_sym'";
-    }
+    # Here we use curl for sending data via POST since old
+    # wget doesn't have --post-file option.
+    $url = ResolveRedirectionForCurl($url);
+    my $command_line = "$CURL -sd '\@$main::tmpfile_sym' '$url'";
     # We use c++filt in case $SYMBOL_PAGE gives us mangled symbols.
     my $cppfilt = $obj_tool_map{"c++filt"};
     open(SYMBOL, "$command_line | $cppfilt |") or error($command_line);
@@ -2896,10 +2464,10 @@ sub BaseName {
 
 sub MakeProfileBaseName {
   my ($binary_name, $profile_name) = @_;
-  my ($host, $baseURL, $path) = ParseProfileURL($profile_name);
+  my ($host, $port, $path) = ParseProfileURL($profile_name);
   my $binary_shortname = BaseName($binary_name);
-  return sprintf("%s.%s.%s",
-                 $binary_shortname, $main::op_time, $host);
+  return sprintf("%s.%s.%s-port%s",
+                 $binary_shortname, $main::op_time, $host, $port);
 }
 
 sub FetchDynamicProfile {
@@ -2911,7 +2479,7 @@ sub FetchDynamicProfile {
   if (!IsProfileURL($profile_name)) {
     return $profile_name;
   } else {
-    my ($host, $baseURL, $path) = ParseProfileURL($profile_name);
+    my ($host, $port, $path) = ParseProfileURL($profile_name);
     if ($path eq "" || $path eq "/") {
       # Missing type specifier defaults to cpu-profile
       $path = $PROFILE_PAGE;
@@ -2919,28 +2487,37 @@ sub FetchDynamicProfile {
 
     my $profile_file = MakeProfileBaseName($binary_name, $profile_name);
 
-    my $url = "$baseURL$path";
-    my $fetch_timeout = undef;
-    if ($path =~ m/$PROFILE_PAGE|$PMUPROFILE_PAGE/) {
-      if ($path =~ m/[?]/) {
-        $url .= "&";
+    my $url;
+    my $wget_timeout;
+    if (($path =~ m/$PROFILE_PAGE/) || ($path =~ m/$PMUPROFILE_PAGE/)) {
+      if ($path =~ m/$PROFILE_PAGE/) {
+        $url = sprintf("http://$host:$port$path?seconds=%d",
+            $main::opt_seconds);
       } else {
-        $url .= "?";
+        if ($profile_name =~ m/[?]/) {
+          $profile_name .= "&"
+        } else {
+          $profile_name .= "?"
+        }
+        $url = sprintf("http://$profile_name" . "seconds=%d",
+            $main::opt_seconds);
       }
-      $url .= sprintf("seconds=%d", $main::opt_seconds);
-      $fetch_timeout = $main::opt_seconds * 1.01 + 60;
+      $wget_timeout = sprintf("--timeout=%d",
+                              int($main::opt_seconds * 1.01 + 60));
     } else {
       # For non-CPU profiles, we add a type-extension to
       # the target profile file name.
       my $suffix = $path;
       $suffix =~ s,/,.,g;
-      $profile_file .= $suffix;
+      $profile_file .= "$suffix";
+      $url = "http://$host:$port$path";
+      $wget_timeout = "";
     }
 
     my $profile_dir = $ENV{"PPROF_TMPDIR"} || ($ENV{HOME} . "/pprof");
-    if (! -d $profile_dir) {
+    if (!(-d $profile_dir)) {
       mkdir($profile_dir)
-          || die("Unable to create profile directory $profile_dir: $!\n");
+	  || die("Unable to create profile directory $profile_dir: $!\n");
     }
     my $tmp_profile = "$profile_dir/.tmp.$profile_file";
     my $real_profile = "$profile_dir/$profile_file";
@@ -2949,15 +2526,14 @@ sub FetchDynamicProfile {
       return $real_profile;
     }
 
-    my $fetcher = AddFetchTimeout($URL_FETCHER, $fetch_timeout);
-    my $cmd = "$fetcher '$url' > '$tmp_profile'";
-    if ($path =~ m/$PROFILE_PAGE|$PMUPROFILE_PAGE/){
+    my $cmd = "$WGET $WGET_FLAGS $wget_timeout -q -O $tmp_profile '$url'";
+    if (($path =~ m/$PROFILE_PAGE/) || ($path =~ m/$PMUPROFILE_PAGE/)){
       print STDERR "Gathering CPU profile from $url for $main::opt_seconds seconds to\n  ${real_profile}\n";
       if ($encourage_patience) {
         print STDERR "Be patient...\n";
       }
     } else {
-      print STDERR "Fetching $path profile from $url to\n  ${real_profile}\n";
+      print STDERR "Fetching $path profile from $host:$port to\n  ${real_profile}\n";
     }
 
     (system($cmd) == 0) || error("Failed to get profile: $cmd: $!\n");
@@ -3004,7 +2580,6 @@ sub FetchDynamicProfilesRecurse {
   } else {
     $position = 1 | ($position << 1);
     TryCollectProfile($maxlevel, $level, $position);
-    cleanup();
     exit(0);
   }
 }
@@ -3028,69 +2603,22 @@ sub TryCollectProfile {
 
 # Provide a small streaming-read module to handle very large
 # cpu-profile files.  Stream in chunks along a sliding window.
-# Provides an interface to get one 'slot', correctly handling
-# endian-ness differences.  A slot is one 32-bit or 64-bit word
-# (depending on the input profile).  We tell endianness and bit-size
-# for the profile by looking at the first 8 bytes: in cpu profiles,
-# the second slot is always 3 (we'll accept anything that's not 0).
 BEGIN {
   package CpuProfileStream;
 
   sub new {
-    my ($class, $file, $fname) = @_;
-    my $self = { file        => $file,
-                 base        => 0,
-                 stride      => 512 * 1024,   # must be a multiple of bitsize/8
-                 slots       => [],
-                 unpack_code => "",           # N for big-endian, V for little
+    my ($class, $file) = @_;
+    my $self = { file      => $file,
+                 base      => 0,
+                 stride    => 512 * 1024,     # must be a multiple of |long|
+                 slots     => []
     };
     bless $self, $class;
     # Let unittests adjust the stride
     if ($main::opt_test_stride > 0) {
       $self->{stride} = $main::opt_test_stride;
     }
-    # Read the first two slots to figure out bitsize and endianness.
-    my $slots = $self->{slots};
-    my $str;
-    read($self->{file}, $str, 8);
-    # Set the global $address_length based on what we see here.
-    # 8 is 32-bit (8 hexadecimal chars); 16 is 64-bit (16 hexadecimal chars).
-    $address_length = ($str eq (chr(0)x8)) ? 16 : 8;
-    if ($address_length == 8) {
-      if (substr($str, 6, 2) eq chr(0)x2) {
-        $self->{unpack_code} = 'V';  # Little-endian.
-      } elsif (substr($str, 4, 2) eq chr(0)x2) {
-        $self->{unpack_code} = 'N';  # Big-endian
-      } else {
-        ::error("$fname: header size >= 2**16\n");
-      }
-      @$slots = unpack($self->{unpack_code} . "*", $str);
-    } else {
-      # If we're a 64-bit profile, make sure we're a 64-bit-capable
-      # perl.  Otherwise, each slot will be represented as a float
-      # instead of an int64, losing precision and making all the
-      # 64-bit addresses right.  We *could* try to handle this with
-      # software emulation of 64-bit ints, but that's added complexity
-      # for no clear benefit (yet).  We use 'Q' to test for 64-bit-ness;
-      # perl docs say it's only available on 64-bit perl systems.
-      my $has_q = 0;
-      eval { $has_q = pack("Q", "1") ? 1 : 1; };
-      if (!$has_q) {
-        ::error("$fname: need a 64-bit perl to process this 64-bit profile.\n");
-      }
-      read($self->{file}, $str, 8);
-      if (substr($str, 4, 4) eq chr(0)x4) {
-        # We'd love to use 'Q', but it's a) not universal, b) not endian-proof.
-        $self->{unpack_code} = 'V';  # Little-endian.
-      } elsif (substr($str, 0, 4) eq chr(0)x4) {
-        $self->{unpack_code} = 'N';  # Big-endian
-      } else {
-        ::error("$fname: header size >= 2**32\n");
-      }
-      my @pair = unpack($self->{unpack_code} . "*", $str);
-      # Since we know one of the pair is 0, it's fine to just add them.
-      @$slots = (0, $pair[0] + $pair[1]);
-    }
+    $self->overflow();
     return $self;
   }
 
@@ -3101,25 +2629,7 @@ BEGIN {
     $self->{base} += $#$slots + 1;   # skip over data we're replacing
     my $str;
     read($self->{file}, $str, $self->{stride});
-    if ($address_length == 8) {      # the 32-bit case
-      # This is the easy case: unpack provides 32-bit unpacking primitives.
-      @$slots = unpack($self->{unpack_code} . "*", $str);
-    } else {
-      # We need to unpack 32 bits at a time and combine.
-      my @b32_values = unpack($self->{unpack_code} . "*", $str);
-      my @b64_values = ();
-      for (my $i = 0; $i < $#b32_values; $i += 2) {
-        # TODO(csilvers): if this is a 32-bit perl, the math below
-        #    could end up in a too-large int, which perl will promote
-        #    to a double, losing necessary precision.  Deal with that.
-        if ($self->{unpack_code} eq 'V') {    # little-endian
-          push(@b64_values, $b32_values[$i] + $b32_values[$i+1] * (2**32));
-        } else {
-          push(@b64_values, $b32_values[$i] * (2**32) + $b32_values[$i+1]);
-        }
-      }
-      @$slots = @b64_values;
-    }
+    @$slots = unpack("L*", $str);
   }
 
   # Access the i-th long in the file (logically), or -1 at EOF.
@@ -3128,16 +2638,16 @@ BEGIN {
     my $slots = $self->{slots};
     while ($#$slots >= 0) {
       if ($idx < $self->{base}) {
-        # The only time we expect a reference to $slots[$i - something]
-        # after referencing $slots[$i] is reading the very first header.
-        # Since $stride > |header|, that shouldn't cause any lookback
-        # errors.  And everything after the header is sequential.
-        print STDERR "Unexpected look-back reading CPU profile";
-        return -1;   # shrug, don't know what better to return
+	# The only time we expect a reference to $slots[$i - something]
+	# after referencing $slots[$i] is reading the very first header.
+	# Since $stride > |header|, that shouldn't cause any lookback
+	# errors.  And everything after the header is sequential.
+	print STDERR "Unexpected look-back reading CPU profile";
+	return -1;   # shrug, don't know what better to return
       } elsif ($idx > $self->{base} + $#$slots) {
-        $self->overflow();
+	$self->overflow();
       } else {
-        return $slots->[$idx - $self->{base}];
+	return $slots->[$idx - $self->{base}];
       }
     }
     # If we get here, $slots is [], which means we've reached EOF
@@ -3145,44 +2655,6 @@ BEGIN {
   }
 }
 
-# Return the next line from the profile file, assuming it's a text
-# line (which in this case means, doesn't start with a NUL byte).  If
-# it's not a text line, return "".  At EOF, return undef, like perl does.
-# Input file should be in binmode.
-sub ReadProfileLine {
-  local *PROFILE = shift;
-  my $firstchar = "";
-  my $line = "";
-  read(PROFILE, $firstchar, 1);
-  seek(PROFILE, -1, 1);          # unread the firstchar
-  if ($firstchar eq "\0") {
-    return "";
-  }
-  $line = <PROFILE>;
-  if (defined($line)) {
-    $line =~ s/\r//g;   # turn windows-looking lines into unix-looking lines
-  }
-  return $line;
-}
-
-sub IsSymbolizedProfileFile {
-  my $file_name = shift;
-  if (!(-e $file_name) || !(-r $file_name)) {
-    return 0;
-  }
-  # Check if the file contains a symbol-section marker.
-  open(TFILE, "<$file_name");
-  binmode TFILE;
-  my $firstline = ReadProfileLine(*TFILE);
-  close(TFILE);
-  if (!$firstline) {
-    return 0;
-  }
-  $SYMBOL_PAGE =~ m,[^/]+$,;    # matches everything after the last slash
-  my $symbol_marker = $&;
-  return $firstline =~ /^--- *$symbol_marker/;
-}
-
 # Parse profile generated by common/profiler.cc and return a reference
 # to a map:
 #      $result->{version}     Version number of profile file
@@ -3217,17 +2689,28 @@ sub ReadProfile {
   # whole firstline, since it may be gigabytes(!) of data.
   open(PROFILE, "<$fname") || error("$fname: $!\n");
   binmode PROFILE;      # New perls do UTF-8 processing
-  my $header = ReadProfileLine(*PROFILE);
-  if (!defined($header)) {   # means "at EOF"
-    error("Profile is empty.\n");
+  my $firstchar = "";
+  my $header = "";
+  read(PROFILE, $firstchar, 1);
+  seek(PROFILE, -1, 1);          # unread the firstchar
+  if ($firstchar ne "\0") {
+    $header = <PROFILE>;
+    $header =~ s/\r//g;   # turn windows-looking lines into unix-looking lines
   }
 
   my $symbols;
   if ($header =~ m/^--- *$symbol_marker/o) {
-    # Read the symbol section of the symbolized profile file.
+    # read the symbol section of the symbolized profile file
     $symbols = ReadSymbols(*PROFILE{IO});
-    # Read the next line to get the header for the remaining profile.
-    $header = ReadProfileLine(*PROFILE) || "";
+
+    # read the next line to get the header for the remaining profile
+    $header = "";
+    read(PROFILE, $firstchar, 1);
+    seek(PROFILE, -1, 1);          # unread the firstchar
+    if ($firstchar ne "\0") {
+      $header = <PROFILE>;
+      $header =~ s/\r//g;
+    }
   }
 
   my $result;
@@ -3269,33 +2752,6 @@ sub ReadProfile {
   return $result;
 }
 
-# Subtract one from caller pc so we map back to call instr.
-# However, don't do this if we're reading a symbolized profile
-# file, in which case the subtract-one was done when the file
-# was written.
-#
-# We apply the same logic to all readers, though ReadCPUProfile uses an
-# independent implementation.
-sub FixCallerAddresses {
-  my $stack = shift;
-  if ($main::use_symbolized_profile) {
-    return $stack;
-  } else {
-    $stack =~ /(\s)/;
-    my $delimiter = $1;
-    my @addrs = split(' ', $stack);
-    my @fixedaddrs;
-    $#fixedaddrs = $#addrs;
-    if ($#addrs >= 0) {
-      $fixedaddrs[0] = $addrs[0];
-    }
-    for (my $i = 1; $i <= $#addrs; $i++) {
-      $fixedaddrs[$i] = AddressSub($addrs[$i], "0x1");
-    }
-    return join $delimiter, @fixedaddrs;
-  }
-}
-
 # CPU profile reader
 sub ReadCPUProfile {
   my $prog = shift;
@@ -3307,7 +2763,10 @@ sub ReadCPUProfile {
   my $pcs = {};
 
   # Parse string into array of slots.
-  my $slots = CpuProfileStream->new(*PROFILE, $fname);
+  # L! cannot be used because with a native 64-bit build, it will cause
+  # 1) a valid 64-bit profile to use the 32-bit codepath, and
+  # 2) a valid 32-bit profile to be unrecognized.
+  my $slots = CpuProfileStream->new(*PROFILE);
 
   # Read header.  The current header version is a 5-element structure
   # containing:
@@ -3316,50 +2775,108 @@ sub ReadCPUProfile {
   #   2: format version (0)
   #   3: sampling period (usec)
   #   4: unused padding (always 0)
+  # The header words are 32-bit or 64-bit depending on the ABI of the program
+  # that generated the profile.  In the 64-bit case, since our x86-architecture
+  # machines are little-endian, the actual value of each of these elements is
+  # in the first 32-bit word, and the second is always zero.  The @slots array
+  # above was read as a sequence of 32-bit words in both cases, so we need to
+  # explicitly check for both cases.  A typical slot sequence for each is:
+  #   32-bit:  0 3 0 100 0
+  #   64-bit:  0 0  3 0  0 0  100 0  0 0
+  #
   if ($slots->get(0) != 0 ) {
     error("$fname: not a profile file, or old format profile file\n");
   }
-  $i = 2 + $slots->get(1);
-  $version = $slots->get(2);
-  $period = $slots->get(3);
-  # Do some sanity checking on these header values.
-  if ($version > (2**32) || $period > (2**32) || $i > (2**32) || $i < 5) {
-    error("$fname: not a profile file, or corrupted profile file\n");
-  }
-
-  # Parse profile
-  while ($slots->get($i) != -1) {
-    my $n = $slots->get($i++);
-    my $d = $slots->get($i++);
-    if ($d > (2**16)) {  # TODO(csilvers): what's a reasonable max-stack-depth?
-      my $addr = sprintf("0%o", $i * ($address_length == 8 ? 4 : 8));
-      print STDERR "At index $i (address $addr):\n";
-      error("$fname: stack trace depth >= 2**32\n");
-    }
-    if ($slots->get($i) == 0) {
-      # End of profile data marker
+  if ($slots->get(1) >= 3) {
+    # Normal 32-bit header:
+    $version = $slots->get(2);
+    $period = $slots->get(3);
+    $i = 2 + $slots->get(1);
+    $address_length = 8;
+
+    # Parse profile
+    while ($slots->get($i) != -1) {
+      my $n = $slots->get($i++);
+      my $d = $slots->get($i++);
+      if ($slots->get($i) == 0) {
+        # End of profile data marker
+        $i += $d;
+        last;
+      }
+
+      # Make key out of the stack entries
+      my @k = ();
+      for (my $j = 0; $j < $d; $j++) {
+        my $pc = sprintf("%08x", $slots->get($i+$j));
+        $pcs->{$pc} = 1;
+        push @k, $pc;
+      }
+
+      AddEntry($profile, (join "\n", @k), $n);
       $i += $d;
-      last;
     }
 
-    # Make key out of the stack entries
-    my @k = ();
-    for (my $j = 0; $j < $d; $j++) {
-      my $pc = $slots->get($i+$j);
-      # Subtract one from caller pc so we map back to call instr.
-      # However, don't do this if we're reading a symbolized profile
-      # file, in which case the subtract-one was done when the file
-      # was written.
-      if ($j > 0 && !$main::use_symbolized_profile) {
-        $pc--;
+  # Normal 64-bit header:  All entries are doubled in size.  The first
+  # word (little-endian) should contain the real value, the second should
+  # be zero.
+  } elsif ($slots->get(1) != 0 ||
+	   $slots->get(2) < 3 ||
+	   $slots->get(3) != 0 ||
+	   $slots->get(5) != 0 ||
+	   $slots->get(7) != 0) {
+    error("$fname: not a profile file, or old format profile file\n");
+  } else {
+    $version = $slots->get(4);
+    $period = $slots->get(6);
+    $i = 4 + 2 * $slots->get(2);
+    $address_length = 16;
+
+    # Parse profile
+    while ($slots->get($i) != -1) {
+      my $n = $slots->get($i++);
+      my $nhi = $slots->get($i++);
+      # Huge counts may coerce to floating point, keeping scale, not precision
+      if ($nhi != 0) { $n += $nhi*(2**32); }
+      my $d = $slots->get($i++);
+      if ($slots->get($i++) != 0) {
+        my $addr = sprintf("%o", 4 * $i);
+        print STDERR "At index $i ($addr):\n";
+        error("$fname: stack trace depth >= 2**32\n");
       }
-      $pc = sprintf("%0*x", $address_length, $pc);
-      $pcs->{$pc} = 1;
-      push @k, $pc;
-    }
+      if ($slots->get($i) == 0 && $slots->get($i+1) == 0) {
+        # End of profile data marker
+        $i += 2 * $d;
+        last;
+      }
+
+      # Make key out of the stack entries
+      my @k = ();
+      for (my $j = 0; $j < $d; $j++) {
+        my $pclo = $slots->get($i++);
+        my $pchi = $slots->get($i++);
+        if ($pclo == -1 || $pchi == -1) {
+          error("$fname: Unexpected EOF when reading stack of depth $d\n");
+        }
+
+	# Subtract one from caller pc so we map back to call instr.
+        # However, don't do this if we're reading a symbolized profile
+        # file, in which case the subtract-one was done when the file
+        # was written.
+        if ($j > 0 && !$main::use_symbolized_profile) {
+          if ($pclo == 0) {
+            $pchi--;
+            $pclo = 0xffffffff;
+          } else {
+            $pclo--;
+          }
+        }
 
-    AddEntry($profile, (join "\n", @k), $n);
-    $i += $d;
+        my $pc = sprintf("%08x%08x", $pchi, $pclo);
+        $pcs->{$pc} = 1;
+        push @k, $pc;
+      }
+      AddEntry($profile, (join "\n", @k), $n);
+    }
   }
 
   # Parse map
@@ -3430,18 +2947,18 @@ sub ReadHeapProfile {
       # found for profiles generated locally, and the others for
       # remote profiles.
       if (($type eq "heapprofile") || ($type !~ /heap/) ) {
-        # No need to adjust for the sampling rate with heap-profiler-derived data
-        $sampling_algorithm = 0;
+	# No need to adjust for the sampling rate with heap-profiler-derived data
+	$sampling_algorithm = 0;
       } elsif ($type =~ /_v2/) {
-        $sampling_algorithm = 2;     # version 2 sampling
+	$sampling_algorithm = 2;     # version 2 sampling
         if (defined($sample_period) && ($sample_period ne '')) {
-          $sample_adjustment = int($sample_period);
-        }
+	  $sample_adjustment = int($sample_period);
+	}
       } else {
-        $sampling_algorithm = 1;     # version 1 sampling
+	$sampling_algorithm = 1;     # version 1 sampling
         if (defined($sample_period) && ($sample_period ne '')) {
-          $sample_adjustment = int($sample_period)/2;
-        }
+	  $sample_adjustment = int($sample_period)/2;
+	}
       }
     } else {
       # We detect whether or not this is a remote-heap profile by checking
@@ -3453,7 +2970,7 @@ sub ReadHeapProfile {
       my ($n1, $s1, $n2, $s2) = ($1, $2, $3, $4);
       if (($n1 == $n2) && ($s1 == $s2)) {
         # This is likely to be a remote-heap based sample profile
-        $sampling_algorithm = 1;
+	$sampling_algorithm = 1;
       }
     }
   }
@@ -3467,7 +2984,7 @@ sub ReadHeapProfile {
       print STDERR "Adjusting heap profiles for 1-in-128KB sampling rate\n";
     } else {
       printf STDERR ("Adjusting heap profiles for 1-in-%d sampling rate\n",
-                     $sample_adjustment);
+		     $sample_adjustment);
     }
     if ($sampling_algorithm > 1) {
       # We don't bother printing anything for the original version (version 1)
@@ -3484,7 +3001,7 @@ sub ReadHeapProfile {
     if (/^MAPPED_LIBRARIES:/) {
       # Read the /proc/self/maps data
       while (<PROFILE>) {
-        s/\r//g;         # turn windows-looking lines into unix-looking lines
+	s/\r//g;         # turn windows-looking lines into unix-looking lines
         $map .= $_;
       }
       last;
@@ -3494,7 +3011,7 @@ sub ReadHeapProfile {
       # Read /proc/self/maps data as formatted by DumpAddressMap()
       my $buildvar = "";
       while (<PROFILE>) {
-        s/\r//g;         # turn windows-looking lines into unix-looking lines
+	s/\r//g;         # turn windows-looking lines into unix-looking lines
         # Parse "build=<dir>" specification if supplied
         if (m/^\s*build=(.*)\n/) {
           $buildvar = $1;
@@ -3549,7 +3066,7 @@ sub ReadHeapProfile {
       }
 
       my @counts = ($n1, $s1, $n2, $s2);
-      AddEntries($profile, $pcs, FixCallerAddresses($stack), $counts[$index]);
+      AddEntries($profile, $pcs, $stack, $counts[$index]);
     }
   }
 
@@ -3569,7 +3086,7 @@ sub ReadSynchProfile {
   my $profile = {};
   my $pcs = {};
   my $sampling_period = 1;
-  my $cyclespernanosec = 2.8;   # Default assumption for old binaries
+  my $cyclespernanosec = 2.8;	# Default assumption for old binaries
   my $seen_clockrate = 0;
   my $line;
 
@@ -3595,7 +3112,7 @@ sub ReadSynchProfile {
       $count *= $sampling_period;
 
       my @values = ($cycles, $count, $cycles / $count);
-      AddEntries($profile, $pcs, FixCallerAddresses($stack), $values[$index]);
+      AddEntries($profile, $pcs, $stack, $values[$index]);
 
     } elsif ( $line =~ /^(slow release).*thread \d+  \@\s*(.*?)\s*$/ ||
               $line =~ /^\s*(\d+) \@\s*(.*?)\s*$/ ) {
@@ -3610,7 +3127,7 @@ sub ReadSynchProfile {
       # Adjust for sampling done by application
       $cycles *= $sampling_period;
 
-      AddEntries($profile, $pcs, FixCallerAddresses($stack), $cycles);
+      AddEntries($profile, $pcs, $stack, $cycles);
 
     } elsif ( $line =~ m/^([a-z][^=]*)=(.*)$/ ) {
       my ($variable, $value) = ($1,$2);
@@ -3791,8 +3308,8 @@ sub ParseTextSectionHeaderFromOtool {
     } elsif ($line =~ /segname (\w+)/) {
       $segname = $1;
     } elsif (!(($cmd eq "LC_SEGMENT" || $cmd eq "LC_SEGMENT_64") &&
-               $sectname eq "__text" &&
-               $segname eq "__TEXT")) {
+	       $sectname eq "__text" &&
+	       $segname eq "__TEXT")) {
       next;
     } elsif ($line =~ /\baddr 0x([0-9a-fA-F]+)/) {
       $vma = $1;
@@ -3852,7 +3369,7 @@ sub ParseLibraries {
     my $finish;
     my $offset;
     my $lib;
-    if ($l =~ /^($h)-($h)\s+..x.\s+($h)\s+\S+:\S+\s+\d+\s+(\S+\.(so|dll|dylib|bundle)((\.\d+)+\w*(\.\d+){0,3})?)$/i) {
+    if ($l =~ /^($h)-($h)\s+..x.\s+($h)\s+\S+:\S+\s+\d+\s+(\S+\.(so|dll|dylib|bundle)((\.\d+)+\w*)?)$/i) {
       # Full line from /proc/self/maps.  Example:
       #   40000000-40015000 r-xp 00000000 03:01 12845071   /lib/ld-2.3.2.so
       $start = HexExtend($1);
@@ -4158,7 +3675,7 @@ sub MapToSymbols {
   if ($debug) { print("---- $image ---\n"); }
   for (my $i = 0; $i <= $#{$pclist}; $i++) {
     # addr2line always reads hex addresses, and does not need '0x' prefix.
-    if ($debug) { printf STDERR ("%s\n", $pclist->[$i]); }
+    if ($debug) { printf("%s\n", $pclist->[$i]); }
     printf ADDRESSES ("%s\n", AddressSub($pclist->[$i], $offset));
     if (defined($sep_address)) {
       printf ADDRESSES ("%s\n", $sep_address);
@@ -4210,7 +3727,7 @@ sub MapToSymbols {
       $symbols->{$pcstr} = $sym;
     }
     unshift(@{$sym}, $function, $filelinenum, $fullfunction);
-    if ($debug) { printf STDERR ("%s => [%s]\n", $pcstr, join(" ", @{$sym})); }
+    if ($debug) { printf("%s => [%s]\n", $pcstr, join(" ", @{$sym})); }
     if (!defined($sep_address)) {
       # Inlining is off, se this entry ends immediately
       $count++;
@@ -4266,7 +3783,7 @@ sub MapSymbolsWithNM {
   }
   return 1;
 }
-
+ 
 sub ShortFunctionName {
   my $function = shift;
   while ($function =~ s/\([^()]*\)(\s*const)?//g) { }   # Argument types
@@ -4313,8 +3830,6 @@ sub ConfigureObjTools {
   if ($file_type =~ /Mach-O/) {
     # OS X uses otool to examine Mach-O files, rather than objdump.
     $obj_tool_map{"otool"} = "otool";
-    $obj_tool_map{"addr2line"} = "false";  # no addr2line
-    $obj_tool_map{"objdump"} = "false";  # no objdump
   }
 
   # Go fill in %obj_tool_map with the pathnames to use:
@@ -4361,8 +3876,9 @@ sub ConfigureTool {
 
 sub cleanup {
   unlink($main::tmpfile_sym);
-  unlink(keys %main::tempnames);
-
+  for (my $i = 0; $i < $main::next_tmpfile; $i++) {
+    unlink(PsTempName($i));
+  }
   # We leave any collected profiles in $HOME/pprof in case the user wants
   # to look at them later.  We print a message informing them of this.
   if ((scalar(@main::profile_files) > 0) &&
@@ -4405,7 +3921,7 @@ sub GetProcedureBoundariesViaNm {
   my $routine = "";
   while (<NM>) {
     s/\r//g;         # turn windows-looking lines into unix-looking lines
-    if (m/^\s*([0-9a-f]+) (.) (..*)/) {
+    if (m/^([0-9a-f]+) (.) (..*)/) {
       my $start_val = $1;
       my $type = $2;
       my $this_routine = $3;
@@ -4426,12 +3942,12 @@ sub GetProcedureBoundariesViaNm {
       # we'll just go ahead and process the first entry (which never
       # got touched in the queue), and ignore the others.
       if ($start_val eq $last_start && $type =~ /t/i) {
-        # We are the 'T' symbol at this address, replace previous symbol.
-        $routine = $this_routine;
-        next;
+	# We are the 'T' symbol at this address, replace previous symbol.
+	$routine = $this_routine;
+	next;
       } elsif ($start_val eq $last_start) {
-        # We're not the 'T' symbol at this address, so ignore us.
-        next;
+	# We're not the 'T' symbol at this address, so ignore us.
+	next;
       }
 
       if ($this_routine eq $sep_symbol) {
@@ -4446,7 +3962,7 @@ sub GetProcedureBoundariesViaNm {
 
       if (defined($routine) && $routine =~ m/$regexp/) {
         $symbol_table->{$routine} = [HexExtend($last_start),
-                                     HexExtend($start_val)];
+				     HexExtend($start_val)];
       }
       $last_start = $start_val;
       $routine = $this_routine;
@@ -4465,8 +3981,9 @@ sub GetProcedureBoundariesViaNm {
   # TODO(csilvers): do better here.
   if (defined($routine) && $routine =~ m/$regexp/) {
     $symbol_table->{$routine} = [HexExtend($last_start),
-                                 HexExtend($last_start)];
+				 HexExtend($last_start)];
   }
+
   return $symbol_table;
 }
 
@@ -4512,13 +4029,9 @@ sub GetProcedureBoundaries {
   # -D to at least get *exported* symbols.  If we can't use --demangle,
   # we use c++filt instead, if it exists on this system.
   my @nm_commands = ("$nm -n $flatten_flag $demangle_flag" .
-                     " $image 2>/dev/null $cppfilt_flag",
-                     "$nm -D -n $flatten_flag $demangle_flag" .
-                     " $image 2>/dev/null $cppfilt_flag",
-                     # 6nm is for Go binaries
-		     "6nm $image 2>/dev/null | sort",
-                     );
-
+		     " $image 2>/dev/null $cppfilt_flag",
+		     "$nm -D -n $flatten_flag $demangle_flag" .
+		     " $image 2>/dev/null $cppfilt_flag");
   # If the executable is an MS Windows PDB-format executable, we'll
   # have set up obj_tool_map("nm_pdb").  In this case, we actually
   # want to use both unix nm and windows-specific nm_pdb, since
@@ -4750,3 +4263,4 @@ sub RunUnitTests {
   }
   exit ($error_count);
 }
+
diff --git a/third_party/tcmalloc/chromium/src/span.h b/third_party/tcmalloc/chromium/src/span.h
index b3483ca..ab9a796 100644
--- a/third_party/tcmalloc/chromium/src/span.h
+++ b/third_party/tcmalloc/chromium/src/span.h
@@ -60,10 +60,6 @@ struct Span {
   int value[64];
 #endif
 
-  void* start_ptr() {
-    return reinterpret_cast<void*>(start << kPageShift);
-  }
-
   // What freelist the span is on: IN_USE if on none, or normal or returned
   enum { IN_USE, ON_NORMAL_FREELIST, ON_RETURNED_FREELIST };
 };
diff --git a/third_party/tcmalloc/chromium/src/stacktrace.cc b/third_party/tcmalloc/chromium/src/stacktrace.cc
index 68cb865..d158eea 100644
--- a/third_party/tcmalloc/chromium/src/stacktrace.cc
+++ b/third_party/tcmalloc/chromium/src/stacktrace.cc
@@ -57,45 +57,7 @@
 #include "stacktrace_config.h"
 
 #if defined(STACKTRACE_INL_HEADER)
-
-#define IS_STACK_FRAMES 0
-#define IS_WITH_CONTEXT 0
-#define GET_STACK_TRACE_OR_FRAMES \
-   GetStackTrace(void **result, int max_depth, int skip_count)
-#include STACKTRACE_INL_HEADER
-#undef IS_STACK_FRAMES
-#undef IS_WITH_CONTEXT
-#undef GET_STACK_TRACE_OR_FRAMES
-
-#define IS_STACK_FRAMES 1
-#define IS_WITH_CONTEXT 0
-#define GET_STACK_TRACE_OR_FRAMES \
-  GetStackFrames(void **result, int *sizes, int max_depth, int skip_count)
-#include STACKTRACE_INL_HEADER
-#undef IS_STACK_FRAMES
-#undef IS_WITH_CONTEXT
-#undef GET_STACK_TRACE_OR_FRAMES
-
-#define IS_STACK_FRAMES 0
-#define IS_WITH_CONTEXT 1
-#define GET_STACK_TRACE_OR_FRAMES \
-  GetStackTraceWithContext(void **result, int max_depth, \
-                           int skip_count, const void *ucp)
-#include STACKTRACE_INL_HEADER
-#undef IS_STACK_FRAMES
-#undef IS_WITH_CONTEXT
-#undef GET_STACK_TRACE_OR_FRAMES
-
-#define IS_STACK_FRAMES 1
-#define IS_WITH_CONTEXT 1
-#define GET_STACK_TRACE_OR_FRAMES \
-  GetStackFramesWithContext(void **result, int *sizes, int max_depth, \
-                            int skip_count, const void *ucp)
-#include STACKTRACE_INL_HEADER
-#undef IS_STACK_FRAMES
-#undef IS_WITH_CONTEXT
-#undef GET_STACK_TRACE_OR_FRAMES
-
+# include STACKTRACE_INL_HEADER
 #elif 0
 // This is for the benefit of code analysis tools that may have
 // trouble with the computed #include above.
diff --git a/third_party/tcmalloc/chromium/src/stacktrace_config.h b/third_party/tcmalloc/chromium/src/stacktrace_config.h
index 18f16ab..b58ab1d 100644
--- a/third_party/tcmalloc/chromium/src/stacktrace_config.h
+++ b/third_party/tcmalloc/chromium/src/stacktrace_config.h
@@ -53,7 +53,6 @@
 #   define STACKTRACE_SKIP_CONTEXT_ROUTINES 1
 # elif defined(HAVE_LIBUNWIND_H)  // a proxy for having libunwind installed
 #   define STACKTRACE_INL_HEADER "stacktrace_libunwind-inl.h"
-#   define STACKTRACE_USES_LIBUNWIND 1
 # elif defined(__linux)
 #   error Cannnot calculate stack trace: need either libunwind or frame-pointers (see INSTALL file)
 # else
diff --git a/third_party/tcmalloc/chromium/src/stacktrace_generic-inl.h b/third_party/tcmalloc/chromium/src/stacktrace_generic-inl.h
index 0e72ee7..490cd9d 100644
--- a/third_party/tcmalloc/chromium/src/stacktrace_generic-inl.h
+++ b/third_party/tcmalloc/chromium/src/stacktrace_generic-inl.h
@@ -34,32 +34,57 @@
 //
 // Note:  The glibc implementation may cause a call to malloc.
 // This can cause a deadlock in HeapProfiler.
-
-#ifndef BASE_STACKTRACE_GENERIC_INL_H_
-#define BASE_STACKTRACE_GENERIC_INL_H_
-// Note: this file is included into stacktrace.cc more than once.
-// Anything that should only be defined once should be here:
-
 #include <execinfo.h>
 #include <string.h>
 #include "google/stacktrace.h"
-#endif  // BASE_STACKTRACE_GENERIC_INL_H_
 
-// Note: this part of the file is included several times.
-// Do not put globals below.
+// If you change this function, also change GetStackFrames below.
+int GetStackTrace(void** result, int max_depth, int skip_count) {
+  static const int kStackLength = 64;
+  void * stack[kStackLength];
+  int size;
+
+  size = backtrace(stack, kStackLength);
+  skip_count++;  // we want to skip the current frame as well
+  int result_count = size - skip_count;
+  if (result_count < 0)
+    result_count = 0;
+  if (result_count > max_depth)
+    result_count = max_depth;
+  for (int i = 0; i < result_count; i++)
+    result[i] = stack[i + skip_count];
+
+  return result_count;
+}
 
-// The following 4 functions are generated from the code below:
-//   GetStack{Trace,Frames}()
-//   GetStack{Trace,Frames}WithContext()
+// If you change this function, also change GetStackTrace above:
+//
+// This GetStackFrames routine shares a lot of code with GetStackTrace
+// above. This code could have been refactored into a common routine,
+// and then both GetStackTrace/GetStackFrames could call that routine.
+// There are two problems with that:
+//
+// (1) The performance of the refactored-code suffers substantially - the
+//     refactored needs to be able to record the stack trace when called
+//     from GetStackTrace, and both the stack trace and stack frame sizes,
+//     when called from GetStackFrames - this introduces enough new
+//     conditionals that GetStackTrace performance can degrade by as much
+//     as 50%.
 //
-// These functions take the following args:
-//   void** result: the stack-trace, as an array
-//   int* sizes: the size of each stack frame, as an array
-//               (GetStackFrames* only)
-//   int max_depth: the size of the result (and sizes) array(s)
-//   int skip_count: how many stack pointers to skip before storing in result
-//   void* ucp: a ucontext_t* (GetStack{Trace,Frames}WithContext only)
-int GET_STACK_TRACE_OR_FRAMES {
+// (2) Whether the refactored routine gets inlined into GetStackTrace and
+//     GetStackFrames depends on the compiler, and we can't guarantee the
+//     behavior either-way, even with "__attribute__ ((always_inline))"
+//     or "__attribute__ ((noinline))". But we need this guarantee or the
+//     frame counts may be off by one.
+//
+// Both (1) and (2) can be addressed without this code duplication, by
+// clever use of template functions, and by defining GetStackTrace and
+// GetStackFrames as macros that expand to these template functions.
+// However, this approach comes with its own set of problems - namely,
+// macros and  preprocessor trouble - for example,  if GetStackTrace
+// and/or GetStackFrames is ever defined as a member functions in some
+// class, we are in trouble.
+int GetStackFrames(void** pcs, int* sizes, int max_depth, int skip_count) {
   static const int kStackLength = 64;
   void * stack[kStackLength];
   int size;
@@ -72,12 +97,10 @@ int GET_STACK_TRACE_OR_FRAMES {
   if (result_count > max_depth)
     result_count = max_depth;
   for (int i = 0; i < result_count; i++)
-    result[i] = stack[i + skip_count];
+    pcs[i] = stack[i + skip_count];
 
-#if IS_STACK_FRAMES
   // No implementation for finding out the stack frame sizes yet.
   memset(sizes, 0, sizeof(*sizes) * result_count);
-#endif
 
   return result_count;
 }
diff --git a/third_party/tcmalloc/chromium/src/stacktrace_libunwind-inl.h b/third_party/tcmalloc/chromium/src/stacktrace_libunwind-inl.h
index a1d5249..d9d829a 100644
--- a/third_party/tcmalloc/chromium/src/stacktrace_libunwind-inl.h
+++ b/third_party/tcmalloc/chromium/src/stacktrace_libunwind-inl.h
@@ -32,11 +32,6 @@
 //
 // Produce stack trace using libunwind
 
-#ifndef BASE_STACKTRACE_LIBINWIND_INL_H_
-#define BASE_STACKTRACE_LIBINWIND_INL_H_
-// Note: this file is included into stacktrace.cc more than once.
-// Anything that should only be defined once should be here:
-
 // We only need local unwinder.
 #define UNW_LOCAL_ONLY
 
@@ -57,30 +52,12 @@ extern "C" {
 // cases, we return 0 to indicate the situation.
 static __thread int recursive;
 
-#endif  // BASE_STACKTRACE_LIBINWIND_INL_H_
-
-// Note: this part of the file is included several times.
-// Do not put globals below.
-
-// The following 4 functions are generated from the code below:
-//   GetStack{Trace,Frames}()
-//   GetStack{Trace,Frames}WithContext()
-//
-// These functions take the following args:
-//   void** result: the stack-trace, as an array
-//   int* sizes: the size of each stack frame, as an array
-//               (GetStackFrames* only)
-//   int max_depth: the size of the result (and sizes) array(s)
-//   int skip_count: how many stack pointers to skip before storing in result
-//   void* ucp: a ucontext_t* (GetStack{Trace,Frames}WithContext only)
-int GET_STACK_TRACE_OR_FRAMES {
+// If you change this function, also change GetStackFrames below.
+int GetStackTrace(void** result, int max_depth, int skip_count) {
   void *ip;
   int n = 0;
   unw_cursor_t cursor;
   unw_context_t uc;
-#if IS_STACK_FRAMES
-  unw_word_t sp = 0, next_sp = 0;
-#endif
 
   if (recursive) {
     return 0;
@@ -90,39 +67,90 @@ int GET_STACK_TRACE_OR_FRAMES {
   unw_getcontext(&uc);
   int ret = unw_init_local(&cursor, &uc);
   assert(ret >= 0);
-  skip_count++;         // Do not include current frame
-
-  while (skip_count--) {
-    if (unw_step(&cursor) <= 0) {
-      goto out;
-    }
-#if IS_STACK_FRAMES
-    if (unw_get_reg(&cursor, UNW_REG_SP, &next_sp)) {
-      goto out;
-    }
-#endif
-  }
+  skip_count++;         // Do not include the "GetStackTrace" frame
 
   while (n < max_depth) {
     if (unw_get_reg(&cursor, UNW_REG_IP, (unw_word_t *) &ip) < 0) {
       break;
     }
-#if IS_STACK_FRAMES
-    sizes[n] = 0;
-#endif
-    result[n++] = ip;
+    if (skip_count > 0) {
+      skip_count--;
+    } else {
+      result[n++] = ip;
+    }
     if (unw_step(&cursor) <= 0) {
       break;
     }
-#if IS_STACK_FRAMES
+  }
+  --recursive;
+  return n;
+}
+
+// If you change this function, also change GetStackTrace above:
+//
+// This GetStackFrames routine shares a lot of code with GetStackTrace
+// above. This code could have been refactored into a common routine,
+// and then both GetStackTrace/GetStackFrames could call that routine.
+// There are two problems with that:
+//
+// (1) The performance of the refactored-code suffers substantially - the
+//     refactored needs to be able to record the stack trace when called
+//     from GetStackTrace, and both the stack trace and stack frame sizes,
+//     when called from GetStackFrames - this introduces enough new
+//     conditionals that GetStackTrace performance can degrade by as much
+//     as 50%.
+//
+// (2) Whether the refactored routine gets inlined into GetStackTrace and
+//     GetStackFrames depends on the compiler, and we can't guarantee the
+//     behavior either-way, even with "__attribute__ ((always_inline))"
+//     or "__attribute__ ((noinline))". But we need this guarantee or the
+//     frame counts may be off by one.
+//
+// Both (1) and (2) can be addressed without this code duplication, by
+// clever use of template functions, and by defining GetStackTrace and
+// GetStackFrames as macros that expand to these template functions.
+// However, this approach comes with its own set of problems - namely,
+// macros and  preprocessor trouble - for example,  if GetStackTrace
+// and/or GetStackFrames is ever defined as a member functions in some
+// class, we are in trouble.
+int GetStackFrames(void** pcs, int* sizes, int max_depth, int skip_count) {
+  void *ip;
+  int n = 0;
+  unw_cursor_t cursor;
+  unw_context_t uc;
+  unw_word_t sp = 0, next_sp = 0;
+
+  if (recursive) {
+    return 0;
+  }
+  ++recursive;
+
+  unw_getcontext(&uc);
+  RAW_CHECK(unw_init_local(&cursor, &uc) >= 0, "unw_init_local failed");
+  skip_count++;         // Do not include the "GetStackFrames" frame
+
+  while (skip_count--) {
+    if (unw_step(&cursor) <= 0 ||
+        unw_get_reg(&cursor, UNW_REG_SP, &next_sp) < 0) {
+      goto out;
+    }
+  }
+  while (n < max_depth) {
     sp = next_sp;
-    if (unw_get_reg(&cursor, UNW_REG_SP, &next_sp) , 0) {
+    if (unw_get_reg(&cursor, UNW_REG_IP, (unw_word_t *) &ip) < 0)
+      break;
+    if (unw_step(&cursor) <= 0 ||
+        unw_get_reg(&cursor, UNW_REG_SP, &next_sp)) {
+      // We couldn't step any further (possibly because we reached _start).
+      // Provide the last good PC we've got, and get out.
+      sizes[n] = 0;
+      pcs[n++] = ip;
       break;
     }
-    sizes[n - 1] = next_sp - sp;
-#endif
+    sizes[n] = next_sp - sp;
+    pcs[n++] = ip;
   }
-out:
+ out:
   --recursive;
   return n;
 }
diff --git a/third_party/tcmalloc/chromium/src/stacktrace_powerpc-inl.h b/third_party/tcmalloc/chromium/src/stacktrace_powerpc-inl.h
index 9a07eea..5631e49 100644
--- a/third_party/tcmalloc/chromium/src/stacktrace_powerpc-inl.h
+++ b/third_party/tcmalloc/chromium/src/stacktrace_powerpc-inl.h
@@ -36,11 +36,6 @@
 //    http://www.linux-foundation.org/spec/ELF/ppc64/PPC-elf64abi-1.9.html#STACK
 // Linux has similar code: http://patchwork.ozlabs.org/linuxppc/patch?id=8882
 
-#ifndef BASE_STACKTRACE_POWERPC_INL_H_
-#define BASE_STACKTRACE_POWERPC_INL_H_
-// Note: this file is included into stacktrace.cc more than once.
-// Anything that should only be defined once should be here:
-
 #include <stdint.h>   // for uintptr_t
 #include <stdlib.h>   // for NULL
 #include <google/stacktrace.h>
@@ -76,23 +71,9 @@ static void **NextStackFrame(void **old_sp) {
 // This ensures that GetStackTrace stes up the Link Register properly.
 void StacktracePowerPCDummyFunction() __attribute__((noinline));
 void StacktracePowerPCDummyFunction() { __asm__ volatile(""); }
-#endif  // BASE_STACKTRACE_POWERPC_INL_H_
-
-// Note: this part of the file is included several times.
-// Do not put globals below.
 
-// The following 4 functions are generated from the code below:
-//   GetStack{Trace,Frames}()
-//   GetStack{Trace,Frames}WithContext()
-//
-// These functions take the following args:
-//   void** result: the stack-trace, as an array
-//   int* sizes: the size of each stack frame, as an array
-//               (GetStackFrames* only)
-//   int max_depth: the size of the result (and sizes) array(s)
-//   int skip_count: how many stack pointers to skip before storing in result
-//   void* ucp: a ucontext_t* (GetStack{Trace,Frames}WithContext only)
-int GET_STACK_TRACE_OR_FRAMES {
+// If you change this function, also change GetStackFrames below.
+int GetStackTrace(void** result, int max_depth, int skip_count) {
   void **sp;
   // Apple OS X uses an old version of gnu as -- both Darwin 7.9.0 (Panther)
   // and Darwin 8.8.1 (Tiger) use as 1.38.  This means we have to use a
@@ -114,29 +95,11 @@ int GET_STACK_TRACE_OR_FRAMES {
   // This routine forces the compiler (at least gcc) to push it anyway.
   StacktracePowerPCDummyFunction();
 
-#if IS_STACK_FRAMES
-  // Note we do *not* increment skip_count here for the SYSV ABI.  If
-  // we did, the list of stack frames wouldn't properly match up with
-  // the list of return addresses.  Note this means the top pc entry
-  // is probably bogus for linux/ppc (and other SYSV-ABI systems).
-#else
   // The LR save area is used by the callee, so the top entry is bogus.
   skip_count++;
-#endif
 
   int n = 0;
   while (sp && n < max_depth) {
-#if IS_STACK_FRAMES
-    // The GetStackFrames routine is called when we are in some
-    // informational context (the failure signal handler for example).
-    // Use the non-strict unwinding rules to produce a stack trace
-    // that is as complete as possible (even if it contains a few bogus
-    // entries in some rare cases).
-    void **next_sp = NextStackFrame<false>(sp);
-#else
-    void **next_sp = NextStackFrame<true>(sp);
-#endif
-
     if (skip_count > 0) {
       skip_count--;
     } else {
@@ -157,15 +120,85 @@ int GET_STACK_TRACE_OR_FRAMES {
 #else
 #error Need to specify the PPC ABI for your archiecture.
 #endif
+    }
+    // Use strict unwinding rules.
+    sp = NextStackFrame<true>(sp);
+  }
+  return n;
+}
+
+// If you change this function, also change GetStackTrace above:
+//
+// This GetStackFrames routine shares a lot of code with GetStackTrace
+// above. This code could have been refactored into a common routine,
+// and then both GetStackTrace/GetStackFrames could call that routine.
+// There are two problems with that:
+//
+// (1) The performance of the refactored-code suffers substantially - the
+//     refactored needs to be able to record the stack trace when called
+//     from GetStackTrace, and both the stack trace and stack frame sizes,
+//     when called from GetStackFrames - this introduces enough new
+//     conditionals that GetStackTrace performance can degrade by as much
+//     as 50%.
+//
+// (2) Whether the refactored routine gets inlined into GetStackTrace and
+//     GetStackFrames depends on the compiler, and we can't guarantee the
+//     behavior either-way, even with "__attribute__ ((always_inline))"
+//     or "__attribute__ ((noinline))". But we need this guarantee or the
+//     frame counts may be off by one.
+//
+// Both (1) and (2) can be addressed without this code duplication, by
+// clever use of template functions, and by defining GetStackTrace and
+// GetStackFrames as macros that expand to these template functions.
+// However, this approach comes with its own set of problems - namely,
+// macros and  preprocessor trouble - for example,  if GetStackTrace
+// and/or GetStackFrames is ever defined as a member functions in some
+// class, we are in trouble.
+int GetStackFrames(void** pcs, int *sizes, int max_depth, int skip_count) {
+  void **sp;
+#ifdef __APPLE__
+  __asm__ volatile ("mr %0,r1" : "=r" (sp));
+#else
+  __asm__ volatile ("mr %0,1" : "=r" (sp));
+#endif
 
-#if IS_STACK_FRAME
+  StacktracePowerPCDummyFunction();
+  // Note we do *not* increment skip_count here for the SYSV ABI.  If
+  // we did, the list of stack frames wouldn't properly match up with
+  // the list of return addresses.  Note this means the top pc entry
+  // is probably bogus for linux/ppc (and other SYSV-ABI systems).
+
+  int n = 0;
+  while (sp && n < max_depth) {
+    // The GetStackFrames routine is called when we are in some
+    // informational context (the failure signal handler for example).
+    // Use the non-strict unwinding rules to produce a stack trace
+    // that is as complete as possible (even if it contains a few bogus
+    // entries in some rare cases).
+    void **next_sp = NextStackFrame<false>(sp);
+    if (skip_count > 0) {
+      skip_count--;
+    } else {
+#if defined(_CALL_AIX) || defined(_CALL_DARWIN)
+      pcs[n++] = *(sp+2);
+#elif defined(_CALL_SYSV)
+      pcs[n++] = *(sp+1);
+#elif defined(__APPLE__) || (defined(__linux) && defined(__PPC64__))
+      // This check is in case the compiler doesn't define _CALL_AIX/etc.
+      pcs[n++] = *(sp+2);
+#elif defined(__linux)
+      // This check is in case the compiler doesn't define _CALL_SYSV.
+      pcs[n++] = *(sp+1);
+#else
+#error Need to specify the PPC ABI for your archiecture.
+#endif
       if (next_sp > sp) {
         sizes[n] = (uintptr_t)next_sp - (uintptr_t)sp;
       } else {
         // A frame-size of 0 is used to indicate unknown frame size.
         sizes[n] = 0;
       }
-#endif
+      n++;
     }
     sp = next_sp;
   }
diff --git a/third_party/tcmalloc/chromium/src/stacktrace_win32-inl.h b/third_party/tcmalloc/chromium/src/stacktrace_win32-inl.h
index bbd4c43..892cd7c 100644
--- a/third_party/tcmalloc/chromium/src/stacktrace_win32-inl.h
+++ b/third_party/tcmalloc/chromium/src/stacktrace_win32-inl.h
@@ -49,11 +49,6 @@
 // This code is inspired by a patch from David Vitek:
 //   http://code.google.com/p/google-perftools/issues/detail?id=83
 
-#ifndef BASE_STACKTRACE_WIN32_INL_H_
-#define BASE_STACKTRACE_WIN32_INL_H_
-// Note: this file is included into stacktrace.cc more than once.
-// Anything that should only be defined once should be here:
-
 #include "config.h"
 #include <windows.h>    // for GetProcAddress and GetModuleHandle
 #include <assert.h>
@@ -87,5 +82,3 @@ PERFTOOLS_DLL_DECL int GetStackFrames(void** /* pcs */,
   assert(0 == "Not yet implemented");
   return 0;
 }
-
-#endif  // BASE_STACKTRACE_WIN32_INL_H_
diff --git a/third_party/tcmalloc/chromium/src/stacktrace_x86-inl.h b/third_party/tcmalloc/chromium/src/stacktrace_x86-inl.h
index 6753fdb..05701e7 100644
--- a/third_party/tcmalloc/chromium/src/stacktrace_x86-inl.h
+++ b/third_party/tcmalloc/chromium/src/stacktrace_x86-inl.h
@@ -31,13 +31,17 @@
 // Author: Sanjay Ghemawat
 //
 // Produce stack trace
-
-#ifndef BASE_STACKTRACE_X86_INL_H_
-#define BASE_STACKTRACE_X86_INL_H_
-// Note: this file is included into stacktrace.cc more than once.
-// Anything that should only be defined once should be here:
+//
+// NOTE: there is code duplication between
+// GetStackTrace, GetStackTraceWithContext, GetStackFrames and
+// GetStackFramesWithContext. If you update one, update them all.
+//
+// There is no easy way to avoid this, because inlining
+// interferes with skip_count, and there is no portable
+// way to turn inlining off, or force it always on.
 
 #include "config.h"
+
 #include <stdlib.h>   // for NULL
 #include <assert.h>
 #if defined(HAVE_SYS_UCONTEXT_H)
@@ -186,8 +190,8 @@ static void **NextStackFrame(void **old_sp, const void *uc) {
       const ucontext_t *ucv = static_cast<const ucontext_t *>(uc);
       // This kernel does not use frame pointer in its VDSO code,
       // and so %ebp is not suitable for unwinding.
-      void **const reg_ebp =
-          reinterpret_cast<void **>(ucv->uc_mcontext.gregs[REG_EBP]);
+      const void **const reg_ebp =
+          reinterpret_cast<const void **>(ucv->uc_mcontext.gregs[REG_EBP]);
       const unsigned char *const reg_eip =
           reinterpret_cast<unsigned char *>(ucv->uc_mcontext.gregs[REG_EIP]);
       if (new_sp == reg_ebp &&
@@ -265,24 +269,209 @@ static void **NextStackFrame(void **old_sp, const void *uc) {
   return new_sp;
 }
 
-#endif  // BASE_STACKTRACE_X86_INL_H_
+// If you change this function, see NOTE at the top of file.
+// Same as above, but with signal ucontext_t pointer.
+int GetStackTraceWithContext(void** result,
+                             int max_depth,
+                             int skip_count,
+                             const void *uc) {
+  void **sp;
+#if (__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 2) || __llvm__
+  // __builtin_frame_address(0) can return the wrong address on gcc-4.1.0-k8.
+  // It's always correct on llvm, and the techniques below aren't (in
+  // particular, llvm-gcc will make a copy of pcs, so it's not in sp[2]),
+  // so we also prefer __builtin_frame_address when running under llvm.
+  sp = reinterpret_cast<void**>(__builtin_frame_address(0));
+#elif defined(__i386__)
+  // Stack frame format:
+  //    sp[0]   pointer to previous frame
+  //    sp[1]   caller address
+  //    sp[2]   first argument
+  //    ...
+  // NOTE: This will break under llvm, since result is a copy and not in sp[2]
+  sp = (void **)&result - 2;
+#elif defined(__x86_64__)
+  unsigned long rbp;
+  // Move the value of the register %rbp into the local variable rbp.
+  // We need 'volatile' to prevent this instruction from getting moved
+  // around during optimization to before function prologue is done.
+  // An alternative way to achieve this
+  // would be (before this __asm__ instruction) to call Noop() defined as
+  //   static void Noop() __attribute__ ((noinline));  // prevent inlining
+  //   static void Noop() { asm(""); }  // prevent optimizing-away
+  __asm__ volatile ("mov %%rbp, %0" : "=r" (rbp));
+  // Arguments are passed in registers on x86-64, so we can't just
+  // offset from &result
+  sp = (void **) rbp;
+#else
+# error Using stacktrace_x86-inl.h on a non x86 architecture!
+#endif
+
+  int n = 0;
+  while (sp && n < max_depth) {
+    if (*(sp+1) == reinterpret_cast<void *>(0)) {
+      // In 64-bit code, we often see a frame that
+      // points to itself and has a return address of 0.
+      break;
+    }
+    if (skip_count > 0) {
+      skip_count--;
+    } else {
+      result[n++] = *(sp+1);
+    }
+    // Use strict unwinding rules.
+    sp = NextStackFrame<true, true>(sp, uc);
+  }
+  return n;
+}
+
+int GetStackTrace(void** result, int max_depth, int skip_count) {
+  void **sp;
+#if (__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 2) || __llvm__
+  // __builtin_frame_address(0) can return the wrong address on gcc-4.1.0-k8.
+  // It's always correct on llvm, and the techniques below aren't (in
+  // particular, llvm-gcc will make a copy of pcs, so it's not in sp[2]),
+  // so we also prefer __builtin_frame_address when running under llvm.
+  sp = reinterpret_cast<void**>(__builtin_frame_address(0));
+#elif defined(__i386__)
+  // Stack frame format:
+  //    sp[0]   pointer to previous frame
+  //    sp[1]   caller address
+  //    sp[2]   first argument
+  //    ...
+  // NOTE: This will break under llvm, since result is a copy and not in sp[2]
+  sp = (void **)&result - 2;
+#elif defined(__x86_64__)
+  unsigned long rbp;
+  // Move the value of the register %rbp into the local variable rbp.
+  // We need 'volatile' to prevent this instruction from getting moved
+  // around during optimization to before function prologue is done.
+  // An alternative way to achieve this
+  // would be (before this __asm__ instruction) to call Noop() defined as
+  //   static void Noop() __attribute__ ((noinline));  // prevent inlining
+  //   static void Noop() { asm(""); }  // prevent optimizing-away
+  __asm__ volatile ("mov %%rbp, %0" : "=r" (rbp));
+  // Arguments are passed in registers on x86-64, so we can't just
+  // offset from &result
+  sp = (void **) rbp;
+#else
+# error Using stacktrace_x86-inl.h on a non x86 architecture!
+#endif
 
-// Note: this part of the file is included several times.
-// Do not put globals below.
+  int n = 0;
+  while (sp && n < max_depth) {
+    if (*(sp+1) == reinterpret_cast<void *>(0)) {
+      // In 64-bit code, we often see a frame that
+      // points to itself and has a return address of 0.
+      break;
+    }
+    if (skip_count > 0) {
+      skip_count--;
+    } else {
+      result[n++] = *(sp+1);
+    }
+    // Use strict unwinding rules.
+    sp = NextStackFrame<true, false>(sp, NULL);
+  }
+  return n;
+}
 
-// The following 4 functions are generated from the code below:
-//   GetStack{Trace,Frames}()
-//   GetStack{Trace,Frames}WithContext()
+// If you change this function, see NOTE at the top of file.
+//
+// This GetStackFrames routine shares a lot of code with GetStackTrace
+// above. This code could have been refactored into a common routine,
+// and then both GetStackTrace/GetStackFrames could call that routine.
+// There are two problems with that:
 //
-// These functions take the following args:
-//   void** result: the stack-trace, as an array
-//   int* sizes: the size of each stack frame, as an array
-//               (GetStackFrames* only)
-//   int max_depth: the size of the result (and sizes) array(s)
-//   int skip_count: how many stack pointers to skip before storing in result
-//   void* ucp: a ucontext_t* (GetStack{Trace,Frames}WithContext only)
+// (1) The performance of the refactored-code suffers substantially - the
+//     refactored needs to be able to record the stack trace when called
+//     from GetStackTrace, and both the stack trace and stack frame sizes,
+//     when called from GetStackFrames - this introduces enough new
+//     conditionals that GetStackTrace performance can degrade by as much
+//     as 50%.
+//
+// (2) Whether the refactored routine gets inlined into GetStackTrace and
+//     GetStackFrames depends on the compiler, and we can't guarantee the
+//     behavior either-way, even with "__attribute__ ((always_inline))"
+//     or "__attribute__ ((noinline))". But we need this guarantee or the
+//     frame counts may be off by one.
+//
+// Both (1) and (2) can be addressed without this code duplication, by
+// clever use of template functions, and by defining GetStackTrace and
+// GetStackFrames as macros that expand to these template functions.
+// However, this approach comes with its own set of problems - namely,
+// macros and  preprocessor trouble - for example,  if GetStackTrace
+// and/or GetStackFrames is ever defined as a member functions in some
+// class, we are in trouble.
+int GetStackFrames(void** pcs, int* sizes, int max_depth, int skip_count) {
+  void **sp;
+#if (__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 2) || __llvm__
+  // __builtin_frame_address(0) can return the wrong address on gcc-4.1.0-k8.
+  // It's always correct on llvm, and the techniques below aren't (in
+  // particular, llvm-gcc will make a copy of pcs, so it's not in sp[2]),
+  // so we also prefer __builtin_frame_address when running under llvm.
+  sp = reinterpret_cast<void**>(__builtin_frame_address(0));
+#elif defined(__i386__)
+  // Stack frame format:
+  //    sp[0]   pointer to previous frame
+  //    sp[1]   caller address
+  //    sp[2]   first argument
+  //    ...
+  sp = (void **)&pcs - 2;
+#elif defined(__x86_64__)
+  unsigned long rbp;
+  // Move the value of the register %rbp into the local variable rbp.
+  // We need 'volatile' to prevent this instruction from getting moved
+  // around during optimization to before function prologue is done.
+  // An alternative way to achieve this
+  // would be (before this __asm__ instruction) to call Noop() defined as
+  //   static void Noop() __attribute__ ((noinline));  // prevent inlining
+  //   static void Noop() { asm(""); }  // prevent optimizing-away
+  __asm__ volatile ("mov %%rbp, %0" : "=r" (rbp));
+  // Arguments are passed in registers on x86-64, so we can't just
+  // offset from &result
+  sp = (void **) rbp;
+#else
+# error Using stacktrace_x86-inl.h on a non x86 architecture!
+#endif
+
+  int n = 0;
+  while (sp && n < max_depth) {
+    if (*(sp+1) == reinterpret_cast<void *>(0)) {
+      // In 64-bit code, we often see a frame that
+      // points to itself and has a return address of 0.
+      break;
+    }
+    // The GetStackFrames routine is called when we are in some
+    // informational context (the failure signal handler for example).
+    // Use the non-strict unwinding rules to produce a stack trace
+    // that is as complete as possible (even if it contains a few bogus
+    // entries in some rare cases).
+    void **next_sp = NextStackFrame<false, false>(sp, NULL);
+    if (skip_count > 0) {
+      skip_count--;
+    } else {
+      pcs[n] = *(sp+1);
+      if (next_sp > sp) {
+        sizes[n] = (uintptr_t)next_sp - (uintptr_t)sp;
+      } else {
+        // A frame-size of 0 is used to indicate unknown frame size.
+        sizes[n] = 0;
+      }
+      n++;
+    }
+    sp = next_sp;
+  }
+  return n;
+}
 
-int GET_STACK_TRACE_OR_FRAMES {
+// If you change this function, see NOTE at the top of file.
+// Same as above, but with signal ucontext_t pointer.
+int GetStackFramesWithContext(void** pcs,
+                              int* sizes,
+                              int max_depth,
+                              int skip_count,
+                              const void *uc) {
   void **sp;
 #if (__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 2) || __llvm__
   // __builtin_frame_address(0) can return the wrong address on gcc-4.1.0-k8.
@@ -322,22 +511,22 @@ int GET_STACK_TRACE_OR_FRAMES {
       // points to itself and has a return address of 0.
       break;
     }
-#if !IS_WITH_CONTEXT
-    const void *const ucp = NULL;
-#endif
-    void **next_sp = NextStackFrame<!IS_STACK_FRAMES, IS_WITH_CONTEXT>(sp, ucp);
+    // The GetStackFrames routine is called when we are in some
+    // informational context (the failure signal handler for example).
+    // Use the non-strict unwinding rules to produce a stack trace
+    // that is as complete as possible (even if it contains a few bogus
+    // entries in some rare cases).
+    void **next_sp = NextStackFrame<false, true>(sp, uc);
     if (skip_count > 0) {
       skip_count--;
     } else {
-      result[n] = *(sp+1);
-#if IS_STACK_FRAMES
+      pcs[n] = *(sp+1);
       if (next_sp > sp) {
         sizes[n] = (uintptr_t)next_sp - (uintptr_t)sp;
       } else {
         // A frame-size of 0 is used to indicate unknown frame size.
         sizes[n] = 0;
       }
-#endif
       n++;
     }
     sp = next_sp;
diff --git a/third_party/tcmalloc/chromium/src/symbolize.cc b/third_party/tcmalloc/chromium/src/symbolize.cc
index ff45e3e..9dd890e 100644
--- a/third_party/tcmalloc/chromium/src/symbolize.cc
+++ b/third_party/tcmalloc/chromium/src/symbolize.cc
@@ -87,40 +87,16 @@ int SymbolTable::Symbolize() {
 #else
   // All this work is to do two-way communication.  ugh.
   extern char* program_invocation_name;  // gcc provides this
-  int *child_in = NULL;   // file descriptors
-  int *child_out = NULL;  // for now, we don't worry about child_err
-  int child_fds[5][2];    // socketpair may be called up to five times below
-
-  // The client program may close its stdin and/or stdout and/or stderr
-  // thus allowing socketpair to reuse file descriptors 0, 1 or 2.
-  // In this case the communication between the forked processes may be broken
-  // if either the parent or the child tries to close or duplicate these
-  // descriptors. The loop below produces two pairs of file descriptors, each
-  // greater than 2 (stderr).
-  for (int i = 0; i < 5; i++) {
-    if (socketpair(AF_UNIX, SOCK_STREAM, 0, child_fds[i]) == -1) {
-      for (int j = 0; j < i; j++) {
-        close(child_fds[j][0]);
-        close(child_fds[j][1]);
-        return 0;
-      }
-    } else {
-      if ((child_fds[i][0] > 2) && (child_fds[i][1] > 2)) {
-        if (child_in == NULL) {
-          child_in = child_fds[i];
-        } else {
-          child_out = child_fds[i];
-          for (int j = 0; j < i; j++) {
-            if (child_fds[j] == child_in) continue;
-            close(child_fds[j][0]);
-            close(child_fds[j][1]);
-          }
-          break;
-        }
-      }
-    }
+  int child_in[2];   // file descriptors
+  int child_out[2];  // for now, we don't worry about child_err
+  if (socketpair(AF_UNIX, SOCK_STREAM, 0, child_in) == -1) {
+    return 0;
+  }
+  if (socketpair(AF_UNIX, SOCK_STREAM, 0, child_out) == -1) {
+    close(child_in[0]);
+    close(child_in[1]);
+    return 0;
   }
-
   switch (fork()) {
     case -1: {  // error
       close(child_in[0]);
diff --git a/third_party/tcmalloc/chromium/src/system-alloc.cc b/third_party/tcmalloc/chromium/src/system-alloc.cc
index 29bed80..21d9b43 100644
--- a/third_party/tcmalloc/chromium/src/system-alloc.cc
+++ b/third_party/tcmalloc/chromium/src/system-alloc.cc
@@ -78,7 +78,7 @@ union MemoryAligner {
   void*  p;
   double d;
   size_t s;
-} CACHELINE_ALIGNED;
+};
 
 static SpinLock spinlock(SpinLock::LINKER_INITIALIZED);
 
@@ -150,10 +150,6 @@ bool RegisterSystemAllocator(SysAllocator *a, int priority) {
 
 void* SbrkSysAllocator::Alloc(size_t size, size_t *actual_size,
                               size_t alignment) {
-#ifndef HAVE_SBRK
-  failed_ = true;
-  return NULL;
-#else
   // Check if we should use sbrk allocation.
   // FLAGS_malloc_skip_sbrk starts out as false (its uninitialized
   // state) and eventually gets initialized to the specified value.  Note
@@ -168,16 +164,16 @@ void* SbrkSysAllocator::Alloc(size_t size, size_t *actual_size,
   // a strict check here
   if (static_cast<ptrdiff_t>(size + alignment) < 0) return NULL;
 
-  // This doesn't overflow because TCMalloc_SystemAlloc has already
-  // tested for overflow at the alignment boundary.
-  size = ((size + alignment - 1) / alignment) * alignment;
-
-  // "actual_size" indicates that the bytes from the returned pointer
-  // p up to and including (p + actual_size - 1) have been allocated.
+  // could theoretically return the "extra" bytes here, but this
+  // is simple and correct.
   if (actual_size) {
     *actual_size = size;
   }
 
+  // This doesn't overflow because TCMalloc_SystemAlloc has already
+  // tested for overflow at the alignment boundary.
+  size = ((size + alignment - 1) / alignment) * alignment;
+
   // Check that we we're not asking for so much more memory that we'd
   // wrap around the end of the virtual address space.  (This seems
   // like something sbrk() should check for us, and indeed opensolaris
@@ -220,7 +216,6 @@ void* SbrkSysAllocator::Alloc(size_t size, size_t *actual_size,
     ptr += alignment - (ptr & (alignment-1));
   }
   return reinterpret_cast<void*>(ptr);
-#endif  // HAVE_SBRK
 }
 
 void SbrkSysAllocator::DumpStats(TCMalloc_Printer* printer) {
@@ -243,6 +238,12 @@ void* MmapSysAllocator::Alloc(size_t size, size_t *actual_size,
     return NULL;
   }
 
+  // could theoretically return the "extra" bytes here, but this
+  // is simple and correct.
+  if (actual_size) {
+    *actual_size = size;
+  }
+
   // Enforce page alignment
   if (pagesize == 0) pagesize = getpagesize();
   if (alignment < pagesize) alignment = pagesize;
@@ -252,12 +253,6 @@ void* MmapSysAllocator::Alloc(size_t size, size_t *actual_size,
   }
   size = aligned_size;
 
-  // "actual_size" indicates that the bytes from the returned pointer
-  // p up to and including (p + actual_size - 1) have been allocated.
-  if (actual_size) {
-    *actual_size = size;
-  }
-
   // Ask for extra memory if alignment > pagesize
   size_t extra = 0;
   if (alignment > pagesize) {
@@ -333,6 +328,12 @@ void* DevMemSysAllocator::Alloc(size_t size, size_t *actual_size,
     initialized = true;
   }
 
+  // could theoretically return the "extra" bytes here, but this
+  // is simple and correct.
+  if (actual_size) {
+    *actual_size = size;
+  }
+
   // Enforce page alignment
   if (pagesize == 0) pagesize = getpagesize();
   if (alignment < pagesize) alignment = pagesize;
@@ -342,12 +343,6 @@ void* DevMemSysAllocator::Alloc(size_t size, size_t *actual_size,
   }
   size = aligned_size;
 
-  // "actual_size" indicates that the bytes from the returned pointer
-  // p up to and including (p + actual_size - 1) have been allocated.
-  if (actual_size) {
-    *actual_size = size;
-  }
-
   // Ask for extra memory if alignment > pagesize
   size_t extra = 0;
   if (alignment > pagesize) {
diff --git a/third_party/tcmalloc/chromium/src/system-alloc.h b/third_party/tcmalloc/chromium/src/system-alloc.h
index 8d982ef..60affed 100644
--- a/third_party/tcmalloc/chromium/src/system-alloc.h
+++ b/third_party/tcmalloc/chromium/src/system-alloc.h
@@ -48,11 +48,7 @@
 // may optionally return more bytes than asked for (i.e. return an
 // entire "huge" page if a huge page allocator is in use).
 //
-// The returned pointer is a multiple of "alignment" if non-zero. The
-// returned pointer will always be aligned suitably for holding a
-// void*, double, or size_t. In addition, if this platform defines
-// CACHELINE_ALIGNED, the return pointer will always be cacheline
-// aligned.
+// The returned pointer is a multiple of "alignment" if non-zero.
 //
 // Returns NULL when out of memory.
 extern void* TCMalloc_SystemAlloc(size_t bytes, size_t *actual_bytes,
diff --git a/third_party/tcmalloc/chromium/src/tcmalloc.cc b/third_party/tcmalloc/chromium/src/tcmalloc.cc
index 79825ce..6acead8 100644
--- a/third_party/tcmalloc/chromium/src/tcmalloc.cc
+++ b/third_party/tcmalloc/chromium/src/tcmalloc.cc
@@ -228,9 +228,8 @@ extern "C" {
       ATTRIBUTE_SECTION(google_malloc);
   void* tc_newarray_nothrow(size_t size, const std::nothrow_t&) __THROW
       ATTRIBUTE_SECTION(google_malloc);
-  // Surprisingly, standard C++ library implementations use a
-  // nothrow-delete internally.  See, eg:
-  // http://www.dinkumware.com/manuals/?manual=compleat&page=new.html
+  // Surprisingly, compilers use a nothrow-delete internally.  See, eg:
+  //   http://www.dinkumware.com/manuals/?manual=compleat&page=new.html
   void tc_delete_nothrow(void* ptr, const std::nothrow_t&) __THROW
       ATTRIBUTE_SECTION(google_malloc);
   void tc_deletearray_nothrow(void* ptr, const std::nothrow_t&) __THROW
@@ -254,9 +253,9 @@ extern "C" {
   // NOTE: we make many of these symbols weak, but do so in the makefile
   //       (via objcopy -W) and not here.  That ends up being more portable.
 # define ALIAS(x) __attribute__ ((alias (x)))
-void* operator new(size_t size) throw (std::bad_alloc) ALIAS("tc_new");
+void* operator new(size_t size)                  ALIAS("tc_new");
 void operator delete(void* p) __THROW            ALIAS("tc_delete");
-void* operator new[](size_t size) throw (std::bad_alloc) ALIAS("tc_newarray");
+void* operator new[](size_t size)                ALIAS("tc_newarray");
 void operator delete[](void* p) __THROW          ALIAS("tc_deletearray");
 void* operator new(size_t size, const std::nothrow_t&) __THROW
                                                  ALIAS("tc_new_nothrow");
@@ -265,7 +264,7 @@ void* operator new[](size_t size, const std::nothrow_t&) __THROW
 void operator delete(void* size, const std::nothrow_t&) __THROW
                                                  ALIAS("tc_delete_nothrow");
 void operator delete[](void* size, const std::nothrow_t&) __THROW
-                                                ALIAS("tc_deletearray_nothrow");
+                                                 ALIAS("tc_deletearray_nothrow");
 extern "C" {
   void* malloc(size_t size) __THROW              ALIAS("tc_malloc");
   void  free(void* ptr) __THROW                  ALIAS("tc_free");
@@ -805,17 +804,7 @@ TCMallocGuard::TCMallocGuard() {
     tc_free(tc_malloc(1));
     ThreadCache::InitTSD();
     tc_free(tc_malloc(1));
-    // Either we, or debugallocation.cc, or valgrind will control memory
-    // management.  We register our extension if we're the winner.
-#ifdef TCMALLOC_FOR_DEBUGALLOCATION
-    // Let debugallocation register its extension.
-#else
-    if (RunningOnValgrind()) {
-      // Let Valgrind uses its own malloc (so don't register our extension).
-    } else {
-      MallocExtension::Register(new TCMallocImplementation);
-    }
-#endif
+    MallocExtension::Register(new TCMallocImplementation);
   }
 }
 
@@ -837,28 +826,7 @@ static TCMallocGuard module_enter_exit_hook;
 // Helpers for the exported routines below
 //-------------------------------------------------------------------
 
-static inline void* CheckedMallocResult(void *result) {
-  Span* fetched_span;
-  size_t cl;
-
-  if (result != NULL) {
-    ASSERT(Static::pageheap()->GetSizeClassOrSpan(result, &cl, &fetched_span));
-  }
-
-  return result;
-}
-
-static inline void* SpanToMallocResult(Span *span) {
-  Span* fetched_span = NULL;
-  size_t cl = 0;
-  ASSERT(Static::pageheap()->GetSizeClassOrSpan(span->start_ptr(),
-                                                &cl, &fetched_span));
-  ASSERT(cl == kLargeSizeClass);
-  ASSERT(span == fetched_span);
-  return span->start_ptr();
-}
-
-static void* DoSampledAllocation(size_t size) {
+static Span* DoSampledAllocation(size_t size) {
   // Grab the stack trace outside the heap lock
   StackTrace tmp;
   tmp.depth = GetStackTrace(tmp.stack, tcmalloc::kMaxStackDepth, 1);
@@ -866,8 +834,7 @@ static void* DoSampledAllocation(size_t size) {
 
   SpinLockHolder h(Static::pageheap_lock());
   // Allocate span
-  Span *span = Static::pageheap()->New(tcmalloc::pages(size == 0 ? 1 : size),
-                                       kLargeSizeClass, kPageSize);
+  Span *span = Static::pageheap()->New(tcmalloc::pages(size == 0 ? 1 : size));
   if (span == NULL) {
     return NULL;
   }
@@ -884,7 +851,26 @@ static void* DoSampledAllocation(size_t size) {
   span->objects = stack;
   tcmalloc::DLL_Prepend(Static::sampled_objects(), span);
 
-  return SpanToMallocResult(span);
+  return span;
+}
+
+static inline bool CheckCachedSizeClass(void *ptr) {
+  PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift;
+  size_t cached_value = Static::pageheap()->GetSizeClassIfCached(p);
+  return cached_value == 0 ||
+      cached_value == Static::pageheap()->GetDescriptor(p)->sizeclass;
+}
+
+static inline void* CheckedMallocResult(void *result)
+{
+  ASSERT(result == 0 || CheckCachedSizeClass(result));
+  return result;
+}
+
+static inline void* SpanToMallocResult(Span *span) {
+  Static::pageheap()->CacheSizeClass(span->start, 0);
+  return
+      CheckedMallocResult(reinterpret_cast<void*>(span->start << kPageShift));
 }
 
 // Copy of FLAGS_tcmalloc_large_alloc_report_threshold with
@@ -930,39 +916,24 @@ inline void* do_memalign_or_cpp_memalign(size_t align, size_t size) {
   return tc_new_mode ? cpp_memalign(align, size) : do_memalign(align, size);
 }
 
-// Must be called with the page lock held.
-inline bool should_report_large(Length num_pages) {
-  const int64 threshold = large_alloc_threshold;
-  if (threshold > 0 && num_pages >= (threshold >> kPageShift)) {
-    // Increase the threshold by 1/8 every time we generate a report.
-    // We cap the threshold at 8GB to avoid overflow problems.
-    large_alloc_threshold = (threshold + threshold/8 < 8ll<<30
-                             ? threshold + threshold/8 : 8ll<<30);
-    return true;
-  }
-  return false;
-}
-
 // Helper for do_malloc().
-inline void* do_malloc_pages(ThreadCache* heap, size_t size) {
-  void* result;
-  bool report_large;
-
-  Length num_pages = tcmalloc::pages(size);
-  size = num_pages << kPageShift;
-
-  if ((FLAGS_tcmalloc_sample_parameter > 0) && heap->SampleAllocation(size)) {
-    result = DoSampledAllocation(size);
-
-    SpinLockHolder h(Static::pageheap_lock());
-    report_large = should_report_large(num_pages);
-  } else {
+inline void* do_malloc_pages(Length num_pages) {
+  Span *span;
+  bool report_large = false;
+  {
     SpinLockHolder h(Static::pageheap_lock());
-    Span* span = Static::pageheap()->New(num_pages, kLargeSizeClass, kPageSize);
-    result = (span == NULL ? NULL : SpanToMallocResult(span));
-    report_large = should_report_large(num_pages);
+    span = Static::pageheap()->New(num_pages);
+    const int64 threshold = large_alloc_threshold;
+    if (threshold > 0 && num_pages >= (threshold >> kPageShift)) {
+      // Increase the threshold by 1/8 every time we generate a report.
+      // We cap the threshold at 8GB to avoid overflow problems.
+      large_alloc_threshold = (threshold + threshold/8 < 8ll<<30
+                               ? threshold + threshold/8 : 8ll<<30);
+      report_large = true;
+    }
   }
 
+  void* result = (span == NULL ? NULL : SpanToMallocResult(span));
   if (report_large) {
     ReportLargeAlloc(num_pages, result);
   }
@@ -974,19 +945,17 @@ inline void* do_malloc(size_t size) {
 
   // The following call forces module initialization
   ThreadCache* heap = ThreadCache::GetCache();
-  if (size <= kMaxSize) {
-    size_t cl = Static::sizemap()->SizeClass(size);
-    size = Static::sizemap()->class_to_size(cl);
-
-    if ((FLAGS_tcmalloc_sample_parameter > 0) && heap->SampleAllocation(size)) {
-      ret = DoSampledAllocation(size);
-    } else {
-      // The common case, and also the simplest.  This just pops the
-      // size-appropriate freelist, after replenishing it if it's empty.
-      ret = CheckedMallocResult(heap->Allocate(size, cl));
+  if ((FLAGS_tcmalloc_sample_parameter > 0) && heap->SampleAllocation(size)) {
+    Span* span = DoSampledAllocation(size);
+    if (span != NULL) {
+      ret = SpanToMallocResult(span);
     }
+  } else if (size <= kMaxSize) {
+    // The common case, and also the simplest.  This just pops the
+    // size-appropriate freelist, after replenishing it if it's empty.
+    ret = CheckedMallocResult(heap->Allocate(size));
   } else {
-    ret = do_malloc_pages(heap, size);
+    ret = do_malloc_pages(tcmalloc::pages(size));
   }
   if (ret == NULL) errno = ENOMEM;
   return ret;
@@ -1014,22 +983,28 @@ static inline ThreadCache* GetCacheIfPresent() {
 inline void do_free_with_callback(void* ptr, void (*invalid_free_fn)(void*)) {
   if (ptr == NULL) return;
   ASSERT(Static::pageheap() != NULL);  // Should not call free() before malloc()
-  Span* span;
-  size_t cl;
-
-  if (!Static::pageheap()->GetSizeClassOrSpan(ptr, &cl, &span)) {
-    // result can be false because the pointer passed in is invalid
-    // (not something returned by malloc or friends), or because the
-    // pointer was allocated with some other allocator besides
-    // tcmalloc.  The latter can happen if tcmalloc is linked in via
-    // a dynamic library, but is not listed last on the link line.
-    // In that case, libraries after it on the link line will
-    // allocate with libc malloc, but free with tcmalloc's free.
-    (*invalid_free_fn)(ptr);  // Decide how to handle the bad free request
-    return;
+  const PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift;
+  Span* span = NULL;
+  size_t cl = Static::pageheap()->GetSizeClassIfCached(p);
+
+  if (cl == 0) {
+    span = Static::pageheap()->GetDescriptor(p);
+    if (!span) {
+      // span can be NULL because the pointer passed in is invalid
+      // (not something returned by malloc or friends), or because the
+      // pointer was allocated with some other allocator besides
+      // tcmalloc.  The latter can happen if tcmalloc is linked in via
+      // a dynamic library, but is not listed last on the link line.
+      // In that case, libraries after it on the link line will
+      // allocate with libc malloc, but free with tcmalloc's free.
+      (*invalid_free_fn)(ptr);  // Decide how to handle the bad free request
+      return;
+    }
+    cl = span->sizeclass;
+    Static::pageheap()->CacheSizeClass(p, cl);
   }
-
-  if (cl != kLargeSizeClass) {
+  if (cl != 0) {
+    ASSERT(!Static::pageheap()->GetDescriptor(p)->sample);
     ThreadCache* heap = GetCacheIfPresent();
     if (heap != NULL) {
       heap->Deallocate(ptr, cl);
@@ -1040,7 +1015,8 @@ inline void do_free_with_callback(void* ptr, void (*invalid_free_fn)(void*)) {
     }
   } else {
     SpinLockHolder h(Static::pageheap_lock());
-    ASSERT(span != NULL && ptr == span->start_ptr());
+    ASSERT(reinterpret_cast<uintptr_t>(ptr) % kPageSize == 0);
+    ASSERT(span != NULL && span->start == p);
     if (span->sample) {
       tcmalloc::DLL_Remove(span);
       Static::stacktrace_allocator()->Delete(
@@ -1060,17 +1036,20 @@ inline size_t GetSizeWithCallback(void* ptr,
                                   size_t (*invalid_getsize_fn)(void*)) {
   if (ptr == NULL)
     return 0;
-
-  Span* span;
-  size_t cl;
-  if (!Static::pageheap()->GetSizeClassOrSpan(ptr, &cl, &span)) {
-    return (*invalid_getsize_fn)(ptr);
-  }
-
-  if (cl != kLargeSizeClass) {
+  const PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift;
+  size_t cl = Static::pageheap()->GetSizeClassIfCached(p);
+  if (cl != 0) {
     return Static::sizemap()->ByteSizeForClass(cl);
   } else {
-    return span->length << kPageShift;
+    Span *span = Static::pageheap()->GetDescriptor(p);
+    if (span == NULL) {  // means we do not own this memory
+      return (*invalid_getsize_fn)(ptr);
+    } else if (span->sizeclass != 0) {
+      Static::pageheap()->CacheSizeClass(p, span->sizeclass);
+      return Static::sizemap()->ByteSizeForClass(span->sizeclass);
+    } else {
+      return span->length << kPageShift;
+    }
   }
 }
 
@@ -1157,18 +1136,47 @@ void* do_memalign(size_t align, size_t size) {
     }
     if (cl < kNumClasses) {
       ThreadCache* heap = ThreadCache::GetCache();
-      size = Static::sizemap()->class_to_size(cl);
-      return CheckedMallocResult(heap->Allocate(size, cl));
+      return CheckedMallocResult(heap->Allocate(
+                                     Static::sizemap()->class_to_size(cl)));
     }
   }
 
   // We will allocate directly from the page heap
   SpinLockHolder h(Static::pageheap_lock());
 
-  // Any page-level allocation will be fine
-  Span* span = Static::pageheap()->New(tcmalloc::pages(size),
-                                       kLargeSizeClass, align);
-  return span == NULL ? NULL : SpanToMallocResult(span);
+  if (align <= kPageSize) {
+    // Any page-level allocation will be fine
+    // TODO: We could put the rest of this page in the appropriate
+    // TODO: cache but it does not seem worth it.
+    Span* span = Static::pageheap()->New(tcmalloc::pages(size));
+    return span == NULL ? NULL : SpanToMallocResult(span);
+  }
+
+  // Allocate extra pages and carve off an aligned portion
+  const Length alloc = tcmalloc::pages(size + align);
+  Span* span = Static::pageheap()->New(alloc);
+  if (span == NULL) return NULL;
+
+  // Skip starting portion so that we end up aligned
+  Length skip = 0;
+  while ((((span->start+skip) << kPageShift) & (align - 1)) != 0) {
+    skip++;
+  }
+  ASSERT(skip < alloc);
+  if (skip > 0) {
+    Span* rest = Static::pageheap()->Split(span, skip);
+    Static::pageheap()->Delete(span);
+    span = rest;
+  }
+
+  // Skip trailing portion that we do not need to return
+  const Length needed = tcmalloc::pages(size);
+  ASSERT(span->length >= needed);
+  if (span->length > needed) {
+    Span* trailer = Static::pageheap()->Split(span, needed);
+    Static::pageheap()->Delete(trailer);
+  }
+  return SpanToMallocResult(span);
 }
 
 // Helpers for use by exported routines below:
@@ -1384,7 +1392,8 @@ extern "C" PERFTOOLS_DLL_DECL void* tc_new(size_t size) {
   return p;
 }
 
-extern "C" PERFTOOLS_DLL_DECL void* tc_new_nothrow(size_t size, const std::nothrow_t&) __THROW {
+extern "C" PERFTOOLS_DLL_DECL void* tc_new_nothrow(
+    size_t size, const std::nothrow_t&) __THROW {
   void* p = cpp_alloc(size, true);
   MallocHook::InvokeNewHook(p, size);
   return p;
@@ -1395,10 +1404,10 @@ extern "C" PERFTOOLS_DLL_DECL void tc_delete(void* p) __THROW {
   do_free(p);
 }
 
-// Standard C++ library implementations define and use this
-// (via ::operator delete(ptr, nothrow)).
+// Compilers define and use this (via ::operator delete(ptr, nothrow)).
 // But it's really the same as normal delete, so we just do the same thing.
-extern "C" PERFTOOLS_DLL_DECL void tc_delete_nothrow(void* p, const std::nothrow_t&) __THROW {
+extern "C" PERFTOOLS_DLL_DECL void tc_delete_nothrow(
+    void* p, const std::nothrow_t&) __THROW {
   MallocHook::InvokeDeleteHook(p);
   do_free(p);
 }
@@ -1414,8 +1423,8 @@ extern "C" PERFTOOLS_DLL_DECL void* tc_newarray(size_t size) {
   return p;
 }
 
-extern "C" PERFTOOLS_DLL_DECL void* tc_newarray_nothrow(size_t size, const std::nothrow_t&)
-    __THROW {
+extern "C" PERFTOOLS_DLL_DECL void* tc_newarray_nothrow(
+    size_t size, const std::nothrow_t&) __THROW {
   void* p = cpp_alloc(size, true);
   MallocHook::InvokeNewHook(p, size);
   return p;
@@ -1426,7 +1435,8 @@ extern "C" PERFTOOLS_DLL_DECL void tc_deletearray(void* p) __THROW {
   do_free(p);
 }
 
-extern "C" PERFTOOLS_DLL_DECL void tc_deletearray_nothrow(void* p, const std::nothrow_t&) __THROW {
+extern "C" PERFTOOLS_DLL_DECL void tc_deletearray_nothrow(
+    void* p, const std::nothrow_t&) __THROW {
   MallocHook::InvokeDeleteHook(p);
   do_free(p);
 }
diff --git a/third_party/tcmalloc/chromium/src/tests/debugallocation_test.cc b/third_party/tcmalloc/chromium/src/tests/debugallocation_test.cc
index c482187..ca00e36 100644
--- a/third_party/tcmalloc/chromium/src/tests/debugallocation_test.cc
+++ b/third_party/tcmalloc/chromium/src/tests/debugallocation_test.cc
@@ -75,14 +75,7 @@ static int test_counter = 0;    // incremented every time the macro is called
 // This flag won't be compiled in in opt mode.
 DECLARE_int32(max_free_queue_size);
 
-// Test match as well as mismatch rules:
 TEST(DebugAllocationTest, DeallocMismatch) {
-  // malloc can be matched only by free
-  // new can be matched only by delete and delete(nothrow)
-  // new[] can be matched only by delete[] and delete[](nothrow)
-  // new(nothrow) can be matched only by delete and delete(nothrow)
-  // new(nothrow)[] can be matched only by delete[] and delete[](nothrow)
-
   // Allocate with malloc.
   {
     int* x = static_cast<int*>(malloc(sizeof(*x)));
@@ -95,41 +88,17 @@ TEST(DebugAllocationTest, DeallocMismatch) {
   // Allocate with new.
   {
     int* x = new int;
-    int* y = new int;
     IF_DEBUG_EXPECT_DEATH(free(x), "mismatch.*being dealloc.*free");
     IF_DEBUG_EXPECT_DEATH(delete [] x, "mismatch.*being dealloc.*delete *[[]");
     delete x;
-    ::operator delete(y, std::nothrow);
   }
 
   // Allocate with new[].
   {
     int* x = new int[1];
-    int* y = new int[1];
-    IF_DEBUG_EXPECT_DEATH(free(x), "mismatch.*being dealloc.*free");
-    IF_DEBUG_EXPECT_DEATH(delete x, "mismatch.*being dealloc.*delete");
-    delete [] x;
-    ::operator delete[](y, std::nothrow);
-  }
-
-  // Allocate with new(nothrow).
-  {
-    int* x = new(std::nothrow) int;
-    int* y = new(std::nothrow) int;
-    IF_DEBUG_EXPECT_DEATH(free(x), "mismatch.*being dealloc.*free");
-    IF_DEBUG_EXPECT_DEATH(delete [] x, "mismatch.*being dealloc.*delete *[[]");
-    delete x;
-    ::operator delete(y, std::nothrow);
-  }
-
-  // Allocate with new(nothrow)[].
-  {
-    int* x = new(std::nothrow) int[1];
-    int* y = new(std::nothrow) int[1];
     IF_DEBUG_EXPECT_DEATH(free(x), "mismatch.*being dealloc.*free");
     IF_DEBUG_EXPECT_DEATH(delete x, "mismatch.*being dealloc.*delete");
     delete [] x;
-    ::operator delete[](y, std::nothrow);
   }
 }
 
diff --git a/third_party/tcmalloc/chromium/src/tests/heap-checker-death_unittest.sh b/third_party/tcmalloc/chromium/src/tests/heap-checker-death_unittest.sh
index 4a83fc2..9f0c08c 100644
--- a/third_party/tcmalloc/chromium/src/tests/heap-checker-death_unittest.sh
+++ b/third_party/tcmalloc/chromium/src/tests/heap-checker-death_unittest.sh
@@ -139,13 +139,13 @@ EARLY_MSG="Starting tracking the heap$"
 
 Test 60 0 "$EARLY_MSG" "" \
   HEAPCHECK="" HEAP_CHECKER_TEST_TEST_LEAK=1 HEAP_CHECKER_TEST_NO_THREADS=1 \
-  PERFTOOLS_VERBOSE=10 || exit 5
+  PERFTOOLS_VERBOSE=1 || exit 5
 Test 60 0 "MemoryRegionMap Init$" "" \
   HEAPCHECK="" HEAP_CHECKER_TEST_TEST_LEAK=1 HEAP_CHECKER_TEST_NO_THREADS=1 \
-  PERFTOOLS_VERBOSE=11 || exit 6
+  PERFTOOLS_VERBOSE=2 || exit 6
 Test 60 0 "" "$EARLY_MSG" \
   HEAPCHECK="" HEAP_CHECKER_TEST_TEST_LEAK=1 HEAP_CHECKER_TEST_NO_THREADS=1 \
-  PERFTOOLS_VERBOSE=-11 || exit 7
+  PERFTOOLS_VERBOSE=-2 || exit 7
 
 # These invocations should fail with very high probability,
 # rather than return 0 or hang (1 == exit(1), 134 == abort(), 139 = SIGSEGV):
@@ -162,10 +162,10 @@ Test 60 1 "MakeALeak" "" \
 
 # Test that very early log messages are present and controllable:
 Test 60 1 "Starting tracking the heap$" "" \
-  HEAP_CHECKER_TEST_TEST_LEAK=1 HEAP_CHECKER_TEST_NO_THREADS=1 PERFTOOLS_VERBOSE=10 \
+  HEAP_CHECKER_TEST_TEST_LEAK=1 HEAP_CHECKER_TEST_NO_THREADS=1 PERFTOOLS_VERBOSE=1 \
   || exit 11
 Test 60 1 "" "Starting tracking the heap" \
-  HEAP_CHECKER_TEST_TEST_LEAK=1 HEAP_CHECKER_TEST_NO_THREADS=1 PERFTOOLS_VERBOSE=-10 \
+  HEAP_CHECKER_TEST_TEST_LEAK=1 HEAP_CHECKER_TEST_NO_THREADS=1 PERFTOOLS_VERBOSE=-1 \
   || exit 12
 
 cd /    # so we're not in TMPDIR when we delete it
diff --git a/third_party/tcmalloc/chromium/src/tests/page_heap_test.cc b/third_party/tcmalloc/chromium/src/tests/page_heap_test.cc
index fd444da..9120b78 100644
--- a/third_party/tcmalloc/chromium/src/tests/page_heap_test.cc
+++ b/third_party/tcmalloc/chromium/src/tests/page_heap_test.cc
@@ -26,7 +26,7 @@ static void TestPageHeap_Stats() {
   CheckStats(ph, 0, 0, 0);
 
   // Allocate a span 's1'
-  tcmalloc::Span* s1 = ph->New(256, kLargeSizeClass, kPageSize);
+  tcmalloc::Span* s1 = ph->New(256);
   CheckStats(ph, 256, 0, 0);
 
   // Split span 's1' into 's1', 's2'.  Delete 's2'
diff --git a/third_party/tcmalloc/chromium/src/tests/profiler_unittest.cc b/third_party/tcmalloc/chromium/src/tests/profiler_unittest.cc
index 19371b7..1908b03 100644
--- a/third_party/tcmalloc/chromium/src/tests/profiler_unittest.cc
+++ b/third_party/tcmalloc/chromium/src/tests/profiler_unittest.cc
@@ -56,11 +56,12 @@ static void test_other_thread() {
 
   int i, m;
   char b[128];
-  MutexLock ml(&mutex);
   for (m = 0; m < 1000000; ++m) {          // run millions of times
     for (i = 0; i < g_iters; ++i ) {
+      MutexLock ml(&mutex);
       result ^= i;
     }
+    MutexLock ml(&mutex);
     snprintf(b, sizeof(b), "%d", result);  // get some libc action
   }
 #endif
@@ -69,11 +70,12 @@ static void test_other_thread() {
 static void test_main_thread() {
   int i, m;
   char b[128];
-  MutexLock ml(&mutex);
   for (m = 0; m < 1000000; ++m) {          // run millions of times
     for (i = 0; i < g_iters; ++i ) {
+      MutexLock ml(&mutex);
       result ^= i;
     }
+    MutexLock ml(&mutex);
     snprintf(b, sizeof(b), "%d", result);  // get some libc action
   }
 }
diff --git a/third_party/tcmalloc/chromium/src/tests/profiler_unittest.sh b/third_party/tcmalloc/chromium/src/tests/profiler_unittest.sh
index 4668fa7..5766f2e 100644
--- a/third_party/tcmalloc/chromium/src/tests/profiler_unittest.sh
+++ b/third_party/tcmalloc/chromium/src/tests/profiler_unittest.sh
@@ -206,27 +206,28 @@ CPUPROFILE="$TMPDIR/p5" "$PROFILER2" 50 || RegisterFailure
 CPUPROFILE="$TMPDIR/p6" "$PROFILER2" 100 || RegisterFailure
 VerifySimilar p5 "$PROFILER2_REALNAME" p6 "$PROFILER2_REALNAME" 2
 
-CPUPROFILE="$TMPDIR/p5b" "$PROFILER3" 30 || RegisterFailure
-CPUPROFILE="$TMPDIR/p5c" "$PROFILER3" 60 || RegisterFailure
+# When we compile with threads, things take a lot longer even when we only use 1
+CPUPROFILE="$TMPDIR/p5b" "$PROFILER3" 10 || RegisterFailure
+CPUPROFILE="$TMPDIR/p5c" "$PROFILER3" 20 || RegisterFailure
 VerifySimilar p5b "$PROFILER3_REALNAME" p5c "$PROFILER3_REALNAME" 2
 
 # Now try what happens when we use threads
-"$PROFILER3" 30 2 "$TMPDIR/p7" || RegisterFailure
-"$PROFILER3" 60 2 "$TMPDIR/p8" || RegisterFailure
+"$PROFILER3" 5 2 "$TMPDIR/p7" || RegisterFailure
+"$PROFILER3" 10 2 "$TMPDIR/p8" || RegisterFailure
 VerifySimilar p7 "$PROFILER3_REALNAME" p8 "$PROFILER3_REALNAME" 2
 
-"$PROFILER4" 30 2 "$TMPDIR/p9" || RegisterFailure
-"$PROFILER4" 60 2 "$TMPDIR/p10" || RegisterFailure
+"$PROFILER4" 5 2 "$TMPDIR/p9" || RegisterFailure
+"$PROFILER4" 10 2 "$TMPDIR/p10" || RegisterFailure
 VerifySimilar p9 "$PROFILER4_REALNAME" p10 "$PROFILER4_REALNAME" 2
 
 # More threads!
-"$PROFILER4" 25 3 "$TMPDIR/p9" || RegisterFailure
-"$PROFILER4" 50 3 "$TMPDIR/p10" || RegisterFailure
+"$PROFILER4" 2 3 "$TMPDIR/p9" || RegisterFailure
+"$PROFILER4" 4 3 "$TMPDIR/p10" || RegisterFailure
 VerifySimilar p9 "$PROFILER4_REALNAME" p10 "$PROFILER4_REALNAME" 2
 
 # Compare how much time the main thread takes compared to the other threads
 # Recall the main thread runs twice as long as the other threads, by design.
-"$PROFILER4" 20 4 "$TMPDIR/p11" || RegisterFailure
+"$PROFILER4" 2 4 "$TMPDIR/p11" || RegisterFailure
 VerifyAcrossThreads p11 "$PROFILER4_REALNAME" 2
 
 # Test symbol save and restore
@@ -235,14 +236,14 @@ VerifyAcrossThreads p11 "$PROFILER4_REALNAME" 2
     >"$TMPDIR/p13" 2>/dev/null || RegisterFailure
 VerifyIdentical p12 "$PROFILER1_REALNAME" p13 "" || RegisterFailure
 
-"$PROFILER3" 30 2 "$TMPDIR/p14" || RegisterFailure
+"$PROFILER3" 5 2 "$TMPDIR/p14" || RegisterFailure
 "$PPROF" $PPROF_FLAGS "$PROFILER3_REALNAME" "$TMPDIR/p14" --raw \
     >"$TMPDIR/p15" 2>/dev/null || RegisterFailure
 VerifyIdentical p14 "$PROFILER3_REALNAME" p15 "" || RegisterFailure
 
 # Test using ITIMER_REAL instead of ITIMER_PROF.
-env CPUPROFILE_REALTIME=1 "$PROFILER3" 30 2 "$TMPDIR/p16" || RegisterFailure
-env CPUPROFILE_REALTIME=1 "$PROFILER3" 60 2 "$TMPDIR/p17" || RegisterFailure
+env CPUPROFILE_REALTIME=1 "$PROFILER3" 5 2 "$TMPDIR/p16" || RegisterFailure
+env CPUPROFILE_REALTIME=1 "$PROFILER3" 10 2 "$TMPDIR/p17" || RegisterFailure
 VerifySimilar p16 "$PROFILER3_REALNAME" p17 "$PROFILER3_REALNAME" 2
 
 
diff --git a/third_party/tcmalloc/chromium/src/tests/tcmalloc_unittest.cc b/third_party/tcmalloc/chromium/src/tests/tcmalloc_unittest.cc
index 6b2ec26..25bfd6a 100644
--- a/third_party/tcmalloc/chromium/src/tests/tcmalloc_unittest.cc
+++ b/third_party/tcmalloc/chromium/src/tests/tcmalloc_unittest.cc
@@ -977,7 +977,7 @@ static int RunAllTests(int argc, char** argv) {
   }
 
   // This code stresses some of the memory allocation via STL.
-  // It may call operator delete(void*, nothrow_t).
+  // In particular, it calls operator delete(void*, nothrow_t).
   fprintf(LOGSTREAM, "Testing STL use\n");
   {
     std::vector<int> v;
diff --git a/third_party/tcmalloc/chromium/src/thread_cache.h b/third_party/tcmalloc/chromium/src/thread_cache.h
index 1165447..4c6a233 100644
--- a/third_party/tcmalloc/chromium/src/thread_cache.h
+++ b/third_party/tcmalloc/chromium/src/thread_cache.h
@@ -79,9 +79,7 @@ class ThreadCache {
   // Total byte size in cache
   size_t Size() const { return size_; }
 
-  // Allocate an object of the given size and class. The size given
-  // must be the same as the size of the class in the size map.
-  void* Allocate(size_t size, size_t cl);
+  void* Allocate(size_t size);
   void Deallocate(void* ptr, size_t size_class);
 
   void Scavenge();
@@ -295,18 +293,15 @@ class ThreadCache {
   // across all ThreadCaches.  Protected by Static::pageheap_lock.
   static ssize_t unclaimed_cache_space_;
 
-  // This class is laid out with the most frequently used fields
-  // first so that hot elements are placed on the same cache line.
+  // Warning: the offset of list_ affects performance.  On general
+  // principles, we don't like list_[x] to span multiple L1 cache
+  // lines.  However, merely placing list_ at offset 0 here seems to
+  // cause cache conflicts.
 
   size_t        size_;                  // Combined size of data
   size_t        max_size_;              // size_ > max_size_ --> Scavenge()
-
-  // We sample allocations, biased by the size of the allocation
-  Sampler       sampler_;               // A sampler
-
-  FreeList      list_[kNumClasses];     // Array indexed by size-class
-
   pthread_t     tid_;                   // Which thread owns it
+  FreeList      list_[kNumClasses];     // Array indexed by size-class
   bool          in_setspecific_;        // In call to pthread_setspecific?
 
   // Allocate a new heap. REQUIRES: Static::pageheap_lock is held.
@@ -318,10 +313,9 @@ class ThreadCache {
   static void DeleteCache(ThreadCache* heap);
   static void RecomputePerThreadCacheSize();
 
-  // Ensure that this class is cacheline-aligned. This is critical for
-  // performance, as false sharing would negate many of the benefits
-  // of a per-thread cache.
-} CACHELINE_ALIGNED;
+  // We sample allocations, biased by the size of the allocation
+  Sampler       sampler_;               // A sampler
+};
 
 // Allocator for thread heaps
 // This is logically part of the ThreadCache class, but MSVC, at
@@ -337,15 +331,15 @@ inline bool ThreadCache::SampleAllocation(size_t k) {
   return sampler_.SampleAllocation(k);
 }
 
-inline void* ThreadCache::Allocate(size_t size, size_t cl) {
+inline void* ThreadCache::Allocate(size_t size) {
   ASSERT(size <= kMaxSize);
-  ASSERT(size == Static::sizemap()->ByteSizeForClass(cl));
-
+  const size_t cl = Static::sizemap()->SizeClass(size);
+  const size_t alloc_size = Static::sizemap()->ByteSizeForClass(cl);
   FreeList* list = &list_[cl];
   if (list->empty()) {
-    return FetchFromCentralCache(cl, size);
+    return FetchFromCentralCache(cl, alloc_size);
   }
-  size_ -= size;
+  size_ -= alloc_size;
   return list->Pop();
 }
 
diff --git a/third_party/tcmalloc/chromium/src/windows/addr2line-pdb.c b/third_party/tcmalloc/chromium/src/windows/addr2line-pdb.c
index 5c65a03..97b614b 100644
--- a/third_party/tcmalloc/chromium/src/windows/addr2line-pdb.c
+++ b/third_party/tcmalloc/chromium/src/windows/addr2line-pdb.c
@@ -48,12 +48,6 @@
 #define SEARCH_CAP (1024*1024)
 #define WEBSYM "SRV*c:\\websymbols*http://msdl.microsoft.com/download/symbols"
 
-void usage() {
-  fprintf(stderr, "usage: "
-          "addr2line-pdb [-f|--functions] [-C|--demangle] [-e filename]\n");
-  fprintf(stderr, "(Then list the hex addresses on stdin, one per line)\n");
-}
-
 int main(int argc, char *argv[]) {
   DWORD  error;
   HANDLE process;
@@ -80,11 +74,10 @@ int main(int argc, char *argv[]) {
       }
       filename = argv[i+1];
       i++;     /* to skip over filename too */
-    } else if (strcmp(argv[i], "--help") == 0) {
-      usage();
-      exit(0);
     } else {
-      usage();
+      fprintf(stderr, "usage: "
+              "addr2line-pdb [-f|--functions] [-C|--demangle] [-e filename]\n");
+      fprintf(stderr, "(Then list the hex addresses on stdin, one per line)\n");
       exit(1);
     }
   }
diff --git a/third_party/tcmalloc/chromium/src/windows/config.h b/third_party/tcmalloc/chromium/src/windows/config.h
index b5d9bb6..99de82c 100644
--- a/third_party/tcmalloc/chromium/src/windows/config.h
+++ b/third_party/tcmalloc/chromium/src/windows/config.h
@@ -261,12 +261,10 @@
 // ---------------------------------------------------------------------
 // Extra stuff not found in config.h.in
 
-// This must be defined before the windows.h is included.  We need at
-// least 0x0400 for mutex.h to have access to TryLock, and at least
-// 0x0501 for patch_functions.cc to have access to GetModuleHandleEx.
-// (This latter is an optimization we could take out if need be.)
+// This must be defined before the windows.h is included.  It's needed
+// for mutex.h, to give access to the TryLock method.
 #ifndef _WIN32_WINNT
-# define _WIN32_WINNT 0x0501
+# define _WIN32_WINNT 0x0400
 #endif
 
 // We want to make sure not to ever try to #include heap-checker.h
diff --git a/third_party/tcmalloc/chromium/src/windows/google/tcmalloc.h b/third_party/tcmalloc/chromium/src/windows/google/tcmalloc.h
index 663b7f9..4b97b15 100644
--- a/third_party/tcmalloc/chromium/src/windows/google/tcmalloc.h
+++ b/third_party/tcmalloc/chromium/src/windows/google/tcmalloc.h
@@ -61,8 +61,7 @@
 #endif
 
 #ifdef __cplusplus
-#include <new>          // for std::nothrow_t
-
+#include <new>  // for nothrow_t
 extern "C" {
 #endif
   // Returns a human-readable version string.  If major, minor,
@@ -93,15 +92,16 @@ extern "C" {
 #ifdef __cplusplus
   PERFTOOLS_DLL_DECL int tc_set_new_mode(int flag) __THROW;
   PERFTOOLS_DLL_DECL void* tc_new(size_t size);
-  PERFTOOLS_DLL_DECL void* tc_new_nothrow(size_t size,
-                                          const std::nothrow_t&) __THROW;
   PERFTOOLS_DLL_DECL void tc_delete(void* p) __THROW;
-  PERFTOOLS_DLL_DECL void tc_delete_nothrow(void* p,
-                                            const std::nothrow_t&) __THROW;
   PERFTOOLS_DLL_DECL void* tc_newarray(size_t size);
+  PERFTOOLS_DLL_DECL void tc_deletearray(void* p) __THROW;
+
+  PERFTOOLS_DLL_DECL void* tc_new_nothrow(size_t size,
+                                          const std::nothrow_t&) __THROW;
   PERFTOOLS_DLL_DECL void* tc_newarray_nothrow(size_t size,
                                                const std::nothrow_t&) __THROW;
-  PERFTOOLS_DLL_DECL void tc_deletearray(void* p) __THROW;
+  PERFTOOLS_DLL_DECL void tc_delete_nothrow(void* p,
+                                            const std::nothrow_t&) __THROW;
   PERFTOOLS_DLL_DECL void tc_deletearray_nothrow(void* p,
                                                  const std::nothrow_t&) __THROW;
 }
diff --git a/third_party/tcmalloc/chromium/src/windows/nm-pdb.c b/third_party/tcmalloc/chromium/src/windows/nm-pdb.c
index 9beb21d..726d345 100644
--- a/third_party/tcmalloc/chromium/src/windows/nm-pdb.c
+++ b/third_party/tcmalloc/chromium/src/windows/nm-pdb.c
@@ -180,10 +180,6 @@ static void ShowSymbolInfo(HANDLE process, ULONG64 module_base) {
 #endif
 }
 
-void usage() {
-  fprintf(stderr, "usage: nm-pdb [-C|--demangle] <module or filename>\n");
-}
-
 int main(int argc, char *argv[]) {
   DWORD  error;
   HANDLE process;
@@ -199,15 +195,12 @@ int main(int argc, char *argv[]) {
   for (i = 1; i < argc; i++) {
     if (strcmp(argv[i], "--demangle") == 0 || strcmp(argv[i], "-C") == 0) {
       symopts |= SYMOPT_UNDNAME;
-    } else if (strcmp(argv[i], "--help") == 0) {
-      usage();
-      exit(0);
     } else {
       break;
     }
   }
   if (i != argc - 1) {
-    usage();
+    fprintf(stderr, "usage: nm-pdb [-C|--demangle] <module or filename>\n");
     exit(1);
   }
   filename = argv[i];
diff --git a/third_party/tcmalloc/chromium/src/windows/patch_functions.cc b/third_party/tcmalloc/chromium/src/windows/patch_functions.cc
index deb841b..c1ed37f 100644
--- a/third_party/tcmalloc/chromium/src/windows/patch_functions.cc
+++ b/third_party/tcmalloc/chromium/src/windows/patch_functions.cc
@@ -83,7 +83,6 @@
 #endif
 
 #include <windows.h>
-#include <stdio.h>
 #include <malloc.h>       // for _msize and _expand
 #include <Psapi.h>        // for EnumProcessModules, GetModuleInformation, etc.
 #include <set>
@@ -97,6 +96,8 @@
 
 // The maximum number of modules we allow to be in one executable
 const int kMaxModules = 8182;
+// The maximum size of a module's basename
+const int kMaxModuleNameSize = 256;
 
 // These are hard-coded, unfortunately. :-( They are also probably
 // compiler specific.  See get_mangled_names.cc, in this directory,
@@ -144,15 +145,13 @@ class LibcInfo {
   LibcInfo() {
     memset(this, 0, sizeof(*this));  // easiest way to initialize the array
   }
+  bool SameAs(const LibcInfo& that) const;
+  bool SameAsModuleEntry(const ModuleEntryCopy& module_entry) const;
+
+  bool patched() const { return is_valid() && module_name_[0] != '\0'; }
+  const char* module_name() const { return is_valid() ? module_name_ : ""; }
 
-  bool patched() const { return is_valid(); }
   void set_is_valid(bool b) { is_valid_ = b; }
-  // According to http://msdn.microsoft.com/en-us/library/ms684229(VS.85).aspx:
-  // "The load address of a module (lpBaseOfDll) is the same as the HMODULE
-  // value."
-  HMODULE hmodule() const {
-    return reinterpret_cast<HMODULE>(const_cast<void*>(module_base_address_));
-  }
 
   // Populates all the windows_fn_[] vars based on our module info.
   // Returns false if windows_fn_ is all NULL's, because there's
@@ -168,6 +167,7 @@ class LibcInfo {
     memcpy(this->windows_fn_, that.windows_fn_, sizeof(windows_fn_));
     this->module_base_address_ = that.module_base_address_;
     this->module_base_size_ = that.module_base_size_;
+    memcpy(this->module_name_, that.module_name_, sizeof(module_name_));
   }
 
   enum {
@@ -207,6 +207,7 @@ class LibcInfo {
 
   const void *module_base_address_;
   size_t module_base_size_;
+  char module_name_[kMaxModuleNameSize];
 
  public:
   // These shouldn't have to be public, since only subclasses of
@@ -284,8 +285,10 @@ template<int> class LibcInfoWithPatchFunctions : public LibcInfo {
 
 // This is a subset of MODDULEENTRY32, that we need for patching.
 struct ModuleEntryCopy {
-  LPVOID  modBaseAddr;     // the same as hmodule
+  LPVOID  modBaseAddr;
   DWORD   modBaseSize;
+  HMODULE hModule;
+  TCHAR   szModule[kMaxModuleNameSize];
   // This is not part of MODDULEENTRY32, but is needed to avoid making
   // windows syscalls while we're holding patch_all_modules_lock (see
   // lock-inversion comments at patch_all_modules_lock definition, below).
@@ -294,16 +297,26 @@ struct ModuleEntryCopy {
   ModuleEntryCopy() {
     modBaseAddr = NULL;
     modBaseSize = 0;
+    hModule = NULL;
+    strcpy(szModule, "<executable>");
     for (int i = 0; i < sizeof(rgProcAddresses)/sizeof(*rgProcAddresses); i++)
       rgProcAddresses[i] = LibcInfo::static_fn(i);
   }
-  ModuleEntryCopy(const MODULEINFO& mi) {
+  ModuleEntryCopy(HANDLE hprocess, HMODULE hmodule, const MODULEINFO& mi) {
     this->modBaseAddr = mi.lpBaseOfDll;
     this->modBaseSize = mi.SizeOfImage;
+    this->hModule = hmodule;
+    // TODO(csilvers): we could make more efficient by calling these
+    // lazily (not until the vars are needed, which is often never).
+    // However, there's tricky business with calling windows functions
+    // inside the patch_all_modules_lock (see the lock inversion
+    // comments with the patch_all_modules_lock definition, below), so
+    // it's safest to do it all here, where no lock is needed.
+    ::GetModuleBaseNameA(hprocess, hmodule,
+                         this->szModule, sizeof(this->szModule));
     for (int i = 0; i < sizeof(rgProcAddresses)/sizeof(*rgProcAddresses); i++)
-      rgProcAddresses[i] = (GenericFnPtr)::GetProcAddress(
-          reinterpret_cast<const HMODULE>(mi.lpBaseOfDll),
-          LibcInfo::function_name(i));
+      rgProcAddresses[i] =
+          (GenericFnPtr)::GetProcAddress(hModule, LibcInfo::function_name(i));
   }
 };
 
@@ -466,6 +479,18 @@ const GenericFnPtr LibcInfoWithPatchFunctions<T>::perftools_fn_[] = {
   { "FreeLibrary", NULL, NULL, (GenericFnPtr)&Perftools_FreeLibrary },
 };
 
+bool LibcInfo::SameAs(const LibcInfo& that) const {
+  return (is_valid() &&
+          module_base_address_ == that.module_base_address_ &&
+          module_base_size_ == that.module_base_size_);
+}
+
+bool LibcInfo::SameAsModuleEntry(const ModuleEntryCopy& module_entry) const {
+  return (is_valid() &&
+          module_base_address_ == module_entry.modBaseAddr &&
+          module_base_size_ == module_entry.modBaseSize);
+}
+
 bool LibcInfo::PopulateWindowsFn(const ModuleEntryCopy& module_entry) {
   // First, store the location of the function to patch before
   // patching it.  If none of these functions are found in the module,
@@ -527,9 +552,10 @@ bool LibcInfo::PopulateWindowsFn(const ModuleEntryCopy& module_entry) {
   CHECK(windows_fn_[kFree]);
   CHECK(windows_fn_[kRealloc]);
 
-  // OK, we successfully populated.  Let's store our member information.
+  // OK, we successfully patched.  Let's store our member information.
   module_base_address_ = module_entry.modBaseAddr;
   module_base_size_ = module_entry.modBaseSize;
+  strcpy(module_name_, module_entry.szModule);
   return true;
 }
 
@@ -610,6 +636,14 @@ void WindowsInfo::Unpatch() {
 
 // You should hold the patch_all_modules_lock when calling this.
 void PatchOneModuleLocked(const LibcInfo& me_info) {
+  // Double-check we haven't seen this module before.
+  for (int i = 0; i < sizeof(g_module_libcs)/sizeof(*g_module_libcs); i++) {
+    if (g_module_libcs[i]->SameAs(me_info)) {
+      fprintf(stderr, "%s:%d: FATAL PERFTOOLS ERROR: %s double-patched somehow.\n",
+              __FILE__, __LINE__, g_module_libcs[i]->module_name());
+      CHECK(false);
+    }
+  }
   // If we don't already have info on this module, let's add it.  This
   // is where we're sad that each libcX has a different type, so we
   // can't use an array; instead, we have to use a switch statement.
@@ -652,70 +686,52 @@ void PatchMainExecutableLocked() {
 // patch_all_modules_lock, inside PatchAllModules().
 static SpinLock patch_all_modules_lock(SpinLock::LINKER_INITIALIZED);
 
-// last_loaded: The set of modules that were loaded the last time
-// PatchAllModules was called.  This is an optimization for only
-// looking at modules that were added or removed from the last call.
-static std::set<HMODULE> *g_last_loaded;
-
 // Iterates over all the modules currently loaded by the executable,
-// according to windows, and makes sure they're all patched.  Most
-// modules will already be in loaded_modules, meaning we have already
-// loaded and either patched them or determined they did not need to
-// be patched.  Others will not, which means we need to patch them
-// (if necessary).  Finally, we have to go through the existing
-// g_module_libcs and see if any of those are *not* in the modules
-// currently loaded by the executable.  If so, we need to invalidate
-// them.  Returns true if we did any work (patching or invalidating),
-// false if we were a noop.  May update loaded_modules as well.
-// NOTE: you must hold the patch_all_modules_lock to access loaded_modules.
+// and makes sure they're all patched.  For ones that aren't, we patch
+// them in.  We also check that every module we had patched in the
+// past is still loaded, and update internal data structures if so.
+// We return true if this PatchAllModules did any work, false else.
 bool PatchAllModules() {
   std::vector<ModuleEntryCopy> modules;
   bool made_changes = false;
 
   const HANDLE hCurrentProcess = GetCurrentProcess();
-  DWORD num_modules = 0;
+  MODULEINFO mi;
+  DWORD cbNeeded = 0;
   HMODULE hModules[kMaxModules];  // max # of modules we support in one process
-  if (!::EnumProcessModules(hCurrentProcess, hModules, sizeof(hModules),
-                            &num_modules)) {
-    num_modules = 0;
-  }
-  // EnumProcessModules actually set the bytes written into hModules,
-  // so we need to divide to make num_modules actually be a module-count.
-  num_modules /= sizeof(*hModules);
-  if (num_modules >= kMaxModules) {
-    printf("PERFTOOLS ERROR: Too many modules in this executable to try"
-           " to patch them all (if you need to, raise kMaxModules in"
-           " patch_functions.cc).\n");
-    num_modules = kMaxModules;
+  if (::EnumProcessModules(hCurrentProcess, hModules, sizeof(hModules),
+                           &cbNeeded)) {
+    for (int i = 0; i < cbNeeded / sizeof(*hModules); ++i) {
+      if (i >= kMaxModules) {
+        printf("PERFTOOLS ERROR: Too many modules in this executable to try"
+               " to patch them all (if you need to, raise kMaxModules in"
+               " patch_functions.cc).\n");
+        break;
+      }
+      if (::GetModuleInformation(hCurrentProcess, hModules[i], &mi, sizeof(mi)))
+        modules.push_back(ModuleEntryCopy(hCurrentProcess, hModules[i], mi));
+    }
   }
 
-  // Now we handle the unpatching of modules we have in g_module_libcs
-  // but that were not found in EnumProcessModules.  We need to
-  // invalidate them.  To speed that up, we store the EnumProcessModules
-  // output in a set.
-  // At the same time, we prepare for the adding of new modules, by
-  // removing from hModules all the modules we know we've already
-  // patched (or decided don't need to be patched).  At the end,
-  // hModules will hold only the modules that we need to consider patching.
-  std::set<HMODULE> currently_loaded_modules;
+  // Now do the actual patching and unpatching.
   {
     SpinLockHolder h(&patch_all_modules_lock);
-    if (!g_last_loaded)  g_last_loaded = new std::set<HMODULE>;
-    // At the end of this loop, currently_loaded_modules contains the
-    // full list of EnumProcessModules, and hModules just the ones we
-    // haven't handled yet.
-    for (int i = 0; i < num_modules; ) {
-      currently_loaded_modules.insert(hModules[i]);
-      if (g_last_loaded->count(hModules[i]) > 0) {
-        hModules[i] = hModules[--num_modules];  // replace element i with tail
-      } else {
-        i++;                                    // keep element i
-      }
-    }
-    // Now we do the unpatching/invalidation.
     for (int i = 0; i < sizeof(g_module_libcs)/sizeof(*g_module_libcs); i++) {
-      if (g_module_libcs[i]->patched() &&
-          currently_loaded_modules.count(g_module_libcs[i]->hmodule()) == 0) {
+      if (!g_module_libcs[i]->is_valid())
+        continue;
+      bool still_loaded = false;
+      for (std::vector<ModuleEntryCopy>::iterator it = modules.begin();
+           it != modules.end(); ++it) {
+        if (g_module_libcs[i]->SameAsModuleEntry(*it)) {
+          // Both g_module_libcs[i] and it are still valid.  Mark it by
+          // removing it from the vector; mark g_module_libcs[i] by
+          // setting a bool.
+          modules.erase(it);
+          still_loaded = true;
+          break;
+        }
+      }
+      if (!still_loaded) {
         // Means g_module_libcs[i] is no longer loaded (no me32 matched).
         // We could call Unpatch() here, but why bother?  The module
         // has gone away, so nobody is going to call into it anyway.
@@ -723,28 +739,14 @@ bool PatchAllModules() {
         made_changes = true;
       }
     }
-    // Update the loaded module cache.
-    g_last_loaded->swap(currently_loaded_modules);
-  }
-
-  // Now that we know what modules are new, let's get the info we'll
-  // need to patch them.  Note this *cannot* be done while holding the
-  // lock, since it needs to make windows calls (see the lock-inversion
-  // comments before the definition of patch_all_modules_lock).
-  MODULEINFO mi;
-  for (int i = 0; i < num_modules; i++) {
-    if (::GetModuleInformation(hCurrentProcess, hModules[i], &mi, sizeof(mi)))
-      modules.push_back(ModuleEntryCopy(mi));
-  }
 
-  // Now we can do the patching of new modules.
-  {
-    SpinLockHolder h(&patch_all_modules_lock);
-    for (std::vector<ModuleEntryCopy>::iterator it = modules.begin();
+    // We've handled all the g_module_libcs.  Now let's handle the rest
+    // of the module-entries: those that haven't already been loaded.
+    for (std::vector<ModuleEntryCopy>::const_iterator it = modules.begin();
          it != modules.end(); ++it) {
       LibcInfo libc_info;
       if (libc_info.PopulateWindowsFn(*it)) { // true==module has libc routines
-        PatchOneModuleLocked(libc_info);
+        PatchOneModuleLocked(libc_info);      // updates num_patched_modules
         made_changes = true;
       }
     }
@@ -757,10 +759,6 @@ bool PatchAllModules() {
       made_changes = true;
     }
   }
-  // TODO(csilvers): for this to be reliable, we need to also take
-  // into account if we *would* have patched any modules had they not
-  // already been loaded.  (That is, made_changes should ignore
-  // g_last_loaded.)
   return made_changes;
 }
 
@@ -768,9 +766,59 @@ bool PatchAllModules() {
 }  // end unnamed namespace
 
 // ---------------------------------------------------------------------
-// Now that we've done all the patching machinery, let's actually
-// define the functions we're patching in.  Mostly these are
-// simple wrappers around the do_* routines in tcmalloc.cc.
+// PatchWindowsFunctions()
+//    This is the function that is exposed to the outside world.
+//    It should be called before the program becomes multi-threaded,
+//    since main_executable_windows.Patch() is not thread-safe.
+// ---------------------------------------------------------------------
+
+void PatchWindowsFunctions() {
+  // This does the libc patching in every module, and the main executable.
+  PatchAllModules();
+  main_executable_windows.Patch();
+}
+
+#if 0
+// It's possible to unpatch all the functions when we are exiting.
+
+// The idea is to handle properly windows-internal data that is
+// allocated before PatchWindowsFunctions is called.  If all
+// destruction happened in reverse order from construction, then we
+// could call UnpatchWindowsFunctions at just the right time, so that
+// that early-allocated data would be freed using the windows
+// allocation functions rather than tcmalloc.  The problem is that
+// windows allocates some structures lazily, so it would allocate them
+// late (using tcmalloc) and then try to deallocate them late as well.
+// So instead of unpatching, we just modify all the tcmalloc routines
+// so they call through to the libc rountines if the memory in
+// question doesn't seem to have been allocated with tcmalloc.  I keep
+// this unpatch code around for reference.
+
+void UnpatchWindowsFunctions() {
+  // We need to go back to the system malloc/etc at global destruct time,
+  // so objects that were constructed before tcmalloc, using the system
+  // malloc, can destroy themselves using the system free.  This depends
+  // on DLLs unloading in the reverse order in which they load!
+  //
+  // We also go back to the default HeapAlloc/etc, just for consistency.
+  // Who knows, it may help avoid weird bugs in some situations.
+  main_executable_windows.Unpatch();
+  main_executable.Unpatch();
+  if (libc1.is_valid()) libc1.Unpatch();
+  if (libc2.is_valid()) libc2.Unpatch();
+  if (libc3.is_valid()) libc3.Unpatch();
+  if (libc4.is_valid()) libc4.Unpatch();
+  if (libc5.is_valid()) libc5.Unpatch();
+  if (libc6.is_valid()) libc6.Unpatch();
+  if (libc7.is_valid()) libc7.Unpatch();
+  if (libc8.is_valid()) libc8.Unpatch();
+}
+#endif
+
+// ---------------------------------------------------------------------
+// Now that we've done all the patching machinery, let's end the file
+// by actually defining the functions we're patching in.  Mostly these
+// are simple wrappers around the do_* routines in tcmalloc.cc.
 //
 // In fact, we #include tcmalloc.cc to get at the tcmalloc internal
 // do_* functions, the better to write our own hook functions.
@@ -981,107 +1029,19 @@ BOOL WINAPI WindowsInfo::Perftools_UnmapViewOfFile(LPCVOID lpBaseAddress) {
               lpBaseAddress);
 }
 
-// g_load_map holds a copy of windows' refcount for how many times
-// each currently loaded module has been loaded and unloaded.  We use
-// it as an optimization when the same module is loaded more than
-// once: as long as the refcount stays above 1, we don't need to worry
-// about patching because it's already patched.  Likewise, we don't
-// need to unpatch until the refcount drops to 0.  load_map is
-// maintained in LoadLibraryExW and FreeLibrary, and only covers
-// modules explicitly loaded/freed via those interfaces.
-static std::map<HMODULE, int>* g_load_map = NULL;
-
 HMODULE WINAPI WindowsInfo::Perftools_LoadLibraryExW(LPCWSTR lpFileName,
                                                      HANDLE hFile,
                                                      DWORD dwFlags) {
-  HMODULE rv;
-  // Check to see if the modules is already loaded, flag 0 gets a
-  // reference if it was loaded.  If it was loaded no need to call
-  // PatchAllModules, just increase the reference count to match
-  // what GetModuleHandleExW does internally inside windows.
-  if (::GetModuleHandleExW(0, lpFileName, &rv)) {
-    return rv;
-  } else {
-    // Not already loaded, so load it.
-    rv = ((HMODULE (WINAPI *)(LPCWSTR, HANDLE, DWORD))
-                  function_info_[kLoadLibraryExW].origstub_fn)(
-                      lpFileName, hFile, dwFlags);
-    // This will patch any newly loaded libraries, if patching needs
-    // to be done.
-    PatchAllModules();
-
-    return rv;
-  }
+  HMODULE rv = ((HMODULE (WINAPI *)(LPCWSTR, HANDLE, DWORD))
+                function_info_[kLoadLibraryExW].origstub_fn)(
+                    lpFileName, hFile, dwFlags);
+  PatchAllModules();
+  return rv;
 }
 
 BOOL WINAPI WindowsInfo::Perftools_FreeLibrary(HMODULE hLibModule) {
   BOOL rv = ((BOOL (WINAPI *)(HMODULE))
              function_info_[kFreeLibrary].origstub_fn)(hLibModule);
-
-  // Check to see if the module is still loaded by passing the base
-  // address and seeing if it comes back with the same address.  If it
-  // is the same address it's still loaded, so the FreeLibrary() call
-  // was a noop, and there's no need to redo the patching.
-  HMODULE owner = NULL;
-  BOOL result = ::GetModuleHandleExW(
-      (GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS |
-       GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT),
-      (LPCWSTR)hLibModule,
-      &owner);
-  if (result && owner == hLibModule)
-    return rv;
-
   PatchAllModules();    // this will fix up the list of patched libraries
   return rv;
 }
-
-
-// ---------------------------------------------------------------------
-// PatchWindowsFunctions()
-//    This is the function that is exposed to the outside world.
-//    It should be called before the program becomes multi-threaded,
-//    since main_executable_windows.Patch() is not thread-safe.
-// ---------------------------------------------------------------------
-
-void PatchWindowsFunctions() {
-  // This does the libc patching in every module, and the main executable.
-  PatchAllModules();
-  main_executable_windows.Patch();
-}
-
-#if 0
-// It's possible to unpatch all the functions when we are exiting.
-
-// The idea is to handle properly windows-internal data that is
-// allocated before PatchWindowsFunctions is called.  If all
-// destruction happened in reverse order from construction, then we
-// could call UnpatchWindowsFunctions at just the right time, so that
-// that early-allocated data would be freed using the windows
-// allocation functions rather than tcmalloc.  The problem is that
-// windows allocates some structures lazily, so it would allocate them
-// late (using tcmalloc) and then try to deallocate them late as well.
-// So instead of unpatching, we just modify all the tcmalloc routines
-// so they call through to the libc rountines if the memory in
-// question doesn't seem to have been allocated with tcmalloc.  I keep
-// this unpatch code around for reference.
-
-void UnpatchWindowsFunctions() {
-  // We need to go back to the system malloc/etc at global destruct time,
-  // so objects that were constructed before tcmalloc, using the system
-  // malloc, can destroy themselves using the system free.  This depends
-  // on DLLs unloading in the reverse order in which they load!
-  //
-  // We also go back to the default HeapAlloc/etc, just for consistency.
-  // Who knows, it may help avoid weird bugs in some situations.
-  main_executable_windows.Unpatch();
-  main_executable.Unpatch();
-  if (libc1.is_valid()) libc1.Unpatch();
-  if (libc2.is_valid()) libc2.Unpatch();
-  if (libc3.is_valid()) libc3.Unpatch();
-  if (libc4.is_valid()) libc4.Unpatch();
-  if (libc5.is_valid()) libc5.Unpatch();
-  if (libc6.is_valid()) libc6.Unpatch();
-  if (libc7.is_valid()) libc7.Unpatch();
-  if (libc8.is_valid()) libc8.Unpatch();
-}
-#endif
author	glider@chromium.org <glider@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2010-05-24 12:07:37 +0000
committer	glider@chromium.org <glider@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2010-05-24 12:07:37 +0000
commit	0f3eaca32c31fdbacbedb6638c43984c11fcd191 (patch)
tree	32ec0c611c25a1508b48e973a552f8df52dd9b77 /third_party/tcmalloc
parent	f6c3483efa3d0cb4a7f49c2e3fc563100722e21b (diff)
download	chromium_src-0f3eaca32c31fdbacbedb6638c43984c11fcd191.zip chromium_src-0f3eaca32c31fdbacbedb6638c43984c11fcd191.tar.gz chromium_src-0f3eaca32c31fdbacbedb6638c43984c11fcd191.tar.bz2