Add redundancy to detect double frees in TCMalloc

I don't think I want to ship this to dev or stable, but I think it may be valuable to try on our canary builds. Added a single byte or word (tranparently) to all allocations, and wrote a unique value into that location after each allocation. When free() is called, we validate the flag, and then mark the block as not being allocated. Any time a block fails to validate, we crash, as this means the object either overran its allocated region (or suffered memory corruption), or else (more likely) a double free took place. We have two distinct crash stacks for the two distinct validation problems (corrupt vs double free). ALthough I haven't been able to demonstrate a problem on the bots, I'm pretty psyched to try to find something in the field with this change. The perf loss is very low, and I think that the likely cause for TCMalloc cross-linked-list corruption (double frees) will be detected (I can hope!) R=mbelshe BUG=75921 Review URL: http://codereview.chromium.org/6683029 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@78369 0039d316-1c4b-4281-b951-d872f2087c98
author: jar@chromium.org <jar@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2011-03-16 15:31:39 +0000
committer: jar@chromium.org <jar@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2011-03-16 15:31:39 +0000
commit: 3b0be97b587797d06198472242f814980872b1b0 (patch)
tree: 63455b1280c95009e9207f0c076d5b95673fc827
parent: 71de819b6409c3fc9886c0d07e2e6216d1a52703 (diff)
download: chromium_src-3b0be97b587797d06198472242f814980872b1b0.zip
chromium_src-3b0be97b587797d06198472242f814980872b1b0.tar.gz
chromium_src-3b0be97b587797d06198472242f814980872b1b0.tar.bz2
2 files changed, 181 insertions, 2 deletions
diff --git a/base/allocator/allocator_shim.cc b/base/allocator/allocator_shim.cc
index f11164c..0bee3d3 100644
--- a/base/allocator/allocator_shim.cc
+++ b/base/allocator/allocator_shim.cc
@@ -41,7 +41,7 @@ typedef enum {
 // See SetupSubprocessAllocator() to specify a default secondary (subprocess)
 // allocator.
 // TODO(jar): Switch to using TCMALLOC for the renderer as well.
-static Allocator allocator = WINHEAP;
+static Allocator allocator = TCMALLOC;
 
 // The names of the environment variables that can optionally control the
 // selection of the allocator.  The primary may be used to control overall
diff --git a/third_party/tcmalloc/chromium/src/tcmalloc.cc b/third_party/tcmalloc/chromium/src/tcmalloc.cc
index 794af94..f6bb50c 100644
--- a/third_party/tcmalloc/chromium/src/tcmalloc.cc
+++ b/third_party/tcmalloc/chromium/src/tcmalloc.cc
@@ -150,6 +150,14 @@ using tcmalloc::ThreadCache;
 # define __THROW   // __THROW is just an optimization, so ok to make it ""
 #endif
 
+// ---- Double free debug declarations
+static size_t ExcludeSpaceForMark(size_t size);
+static void AddRoomForMark(size_t* size);
+static void ExcludeMarkFromSize(size_t* new_size);
+static void MarkAllocatedRegion(void* ptr);
+static void ValidateAllocatedRegion(void* ptr, size_t cl);
+// ---- End Double free debug declarations
+
 DECLARE_int64(tcmalloc_sample_parameter);
 DECLARE_double(tcmalloc_release_rate);
 
@@ -850,6 +858,7 @@ static inline bool CheckCachedSizeClass(void *ptr) {
 
 static inline void* CheckedMallocResult(void *result) {
   ASSERT(result == NULL || CheckCachedSizeClass(result));
+  MarkAllocatedRegion(result);
   return result;
 }
 
@@ -969,6 +978,8 @@ inline void* do_malloc_pages(ThreadCache* heap, size_t size) {
 }
 
 inline void* do_malloc(size_t size) {
+  AddRoomForMark(&size);
+
   void* ret = NULL;
 
   // The following call forces module initialization
@@ -979,6 +990,7 @@ inline void* do_malloc(size_t size) {
 
     if ((FLAGS_tcmalloc_sample_parameter > 0) && heap->SampleAllocation(size)) {
       ret = DoSampledAllocation(size);
+      MarkAllocatedRegion(ret);
     } else {
       // The common case, and also the simplest. This just pops the
       // size-appropriate freelist, after replenishing it if it's empty.
@@ -986,6 +998,7 @@ inline void* do_malloc(size_t size) {
     }
   } else {
     ret = do_malloc_pages(heap, size);
+    MarkAllocatedRegion(ret);
   }
   if (ret == NULL) errno = ENOMEM;
   return ret;
@@ -1033,6 +1046,9 @@ inline void do_free_with_callback(void* ptr, void (*invalid_free_fn)(void*)) {
     cl = span->sizeclass;
     Static::pageheap()->CacheSizeClass(p, cl);
   }
+
+  ValidateAllocatedRegion(ptr, cl);
+
   if (cl != 0) {
     ASSERT(!Static::pageheap()->GetDescriptor(p)->sample);
     ThreadCache* heap = GetCacheIfPresent();
@@ -1089,6 +1105,7 @@ inline void* do_realloc_with_callback(
     void* old_ptr, size_t new_size,
     void (*invalid_free_fn)(void*),
     size_t (*invalid_get_size_fn)(void*)) {
+  AddRoomForMark(&new_size);
   // Get the size of the old entry
   const size_t old_size = GetSizeWithCallback(old_ptr, invalid_get_size_fn);
 
@@ -1107,6 +1124,7 @@ inline void* do_realloc_with_callback(
     if (new_size > old_size && new_size < lower_bound_to_grow) {
       new_ptr = do_malloc_or_cpp_alloc(lower_bound_to_grow);
     }
+    ExcludeMarkFromSize(&new_size);  // do_malloc will add space if needed.
     if (new_ptr == NULL) {
       // Either new_size is not a tiny increment, or last do_malloc failed.
       new_ptr = do_malloc_or_cpp_alloc(new_size);
@@ -1125,6 +1143,7 @@ inline void* do_realloc_with_callback(
   } else {
     // We still need to call hooks to report the updated size:
     MallocHook::InvokeDeleteHook(old_ptr);
+    ExcludeMarkFromSize(&new_size);
     MallocHook::InvokeNewHook(old_ptr, new_size);
     return old_ptr;
   }
@@ -1145,6 +1164,8 @@ inline void* do_realloc(void* old_ptr, size_t new_size) {
 void* do_memalign(size_t align, size_t size) {
   ASSERT((align & (align - 1)) == 0);
   ASSERT(align > 0);
+  // Marked in CheckMallocResult(), which is also inside SpanToMallocResult(). 
+  AddRoomForMark(&size);
   if (size + align < size) return NULL;         // Overflow
 
   if (Static::pageheap() == NULL) ThreadCache::InitModule();
@@ -1347,7 +1368,8 @@ void* cpp_memalign(size_t align, size_t size) {
 
 // As promised, the definition of this function, declared above.
 size_t TCMallocImplementation::GetAllocatedSize(void* ptr) {
-  return GetSizeWithCallback(ptr, &InvalidGetAllocatedSize);
+  return ExcludeSpaceForMark(
+      GetSizeWithCallback(ptr, &InvalidGetAllocatedSize));
 }
 
 void TCMallocImplementation::MarkThreadBusy() {
@@ -1564,3 +1586,160 @@ static void *MemalignOverride(size_t align, size_t size, const void *caller)
 }
 void *(*__memalign_hook)(size_t, size_t, const void *) = MemalignOverride;
 #endif  // #ifndef TCMALLOC_FOR_DEBUGALLOCATION
+
+// ---Double free() debugging implementation -----------------------------------
+// We will put a mark at the extreme end of each allocation block.  We make
+// sure that we always allocate enough "extra memory" that we can fit in the
+// mark, and still provide the requested usable region.  If ever that mark is
+// not as expected, then we know that the user is corrupting memory beyond their
+// request size, or that they have called free a second time without having
+// the memory allocated (again).  This allows us to spot most double free()s,
+// but some can "slip by" or confuse our logic if the caller reallocates memory
+// (for a second use) before performing an evil double-free of a first
+// allocation
+
+// This code can be optimized, but for now, it is written to be most easily
+// understood, and flexible (since it is evolving a bit). Potential
+// optimizations include using other calculated data, such as class size, or
+// allocation size, which is known in the code above, but then is recalculated
+// below.  Another potential optimization would be careful manual inlining of
+// code, but I *think* that the compile will probably do this for me, and I've
+// been careful to avoid aliasing issues that might make a compiler back-off.
+
+// Evolution includes experimenting with different marks, to minimize the chance
+// that a mark would be misunderstood (missed corruption).  The marks are meant
+// to be hashed encoding of the location, so that they can't be copied over a
+// different region (by accident) without being detected (most of the time).
+
+#define TCMALLOC_VALIDATION
+
+#if !defined(TCMALLOC_VALIDATION)
+
+static size_t ExcludeSpaceForMark(size_t size) { return size; (}
+static void AddRoomForMark(size_t* size) {}
+static void ExcludeMarkFromSize(size_t* new_size) {}
+static void MarkAllocatedRegion(void* ptr) {}
+static void ValidateAllocatedRegion(void* ptr, size_t cl) {}
+
+#else  // TCMALLOC_VALIDATION
+
+static void DieFromDoubleFree() {
+  char* p = NULL;
+  p++;
+  *p += 1;  // Segv.
+}
+
+static size_t DieFromBadFreePointer(void* unused) {
+  char* p = NULL;
+  p += 2;
+  *p += 2;  // Segv.
+  return 0;
+}
+
+static void DieFromMemoryCorruption() {
+  char* p = NULL;
+  p += 3;
+  *p += 3;  // Segv.
+}
+
+// We can either do byte marking, or whole word marking based on the following
+// define.  char is as small as we can get, and word marking probably provides
+// more than enough bits that we won't miss a corruption. Any sized integral
+// type can be used, but we just define two examples.
+
+//  #define TCMALLOC_SMALL_VALIDATION
+#if defined (TCMALLOC_SMALL_VALIDATION)
+
+typedef char MarkType;  // char saves memory... int is more complete.
+static const MarkType kAllocationMarkMask = static_cast<MarkType>(0x36);
+
+#else 
+
+typedef int MarkType;  // char saves memory... int is more complete.
+static const MarkType kAllocationMarkMask = static_cast<MarkType>(0xE1AB9536);
+
+#endif
+
+// TODO(jar): See if use of reference rather than pointer gets better inlining,
+// or if macro is needed.  My fear is that taking address map preclude register
+// allocation :-(.
+inline static void AddRoomForMark(size_t* size) {
+  *size += sizeof(kAllocationMarkMask);
+}
+
+inline static void ExcludeMarkFromSize(size_t* new_size) {
+  *new_size -= sizeof(kAllocationMarkMask);
+}
+
+inline static size_t ExcludeSpaceForMark(size_t size) {
+  return size - sizeof(kAllocationMarkMask);  // Lie about size when asked.
+}
+
+inline static MarkType* GetMarkLocation(void* ptr) {
+  size_t class_size = GetSizeWithCallback(ptr, DieFromBadFreePointer);
+  ASSERT(class_size % sizeof(kAllocationMarkMask) == 0);
+  size_t last_index = (class_size / sizeof(kAllocationMarkMask)) - 1;
+  return static_cast<MarkType*>(ptr) + last_index;
+}
+
+// We hash in the mark location plus the pointer so that we effectively mix in
+// the size of the block.  This means that if a span is used for different sizes
+// that the mark will be different. It would be good to hash in the size (which
+// we effectively get by using both mark location and pointer), but even better
+// would be to also include the class, as it concisely contains the entropy
+// found in the size (when we don't have large allocation), and there is less
+// risk of losing those bits to truncation. It would probably be good to combine
+// the high bits of size (capturing info about large blocks) with the class
+// (which is a 6 bit number).
+inline static MarkType GetMarkValue(void* ptr, MarkType* mark) {
+  void* ptr2 = static_cast<void*>(mark);
+  size_t offset1 = static_cast<char*>(ptr) - static_cast<char*>(NULL);
+  size_t offset2 = static_cast<char*>(ptr2) - static_cast<char*>(NULL);
+  static const int kInvariantBits = 2;
+  ASSERT((offset1 >> kInvariantBits) << kInvariantBits == offset1);
+  // Note: low bits of both offsets are invariants due to alignment.  High bits
+  // of both offsets are the same (unless we have a large allocation).  Avoid
+  // XORing high bits together, as they will cancel for most small allocations.
+
+  MarkType ret = kAllocationMarkMask;
+  // Using a little shift, we can safely XOR together both offsets.
+  ret ^= static_cast<MarkType>(offset1 >> kInvariantBits) ^
+         static_cast<MarkType>(offset2);
+  if (sizeof(ret) == 1) {
+    // Try to bring some high level bits into the mix.
+    ret += static_cast<MarkType>(offset1 >> 8) ^
+           static_cast<MarkType>(offset1 >> 16) ^
+           static_cast<MarkType>(offset1 >> 24) ;
+  }
+  // Hash in high bits on a 64 bit architecture.
+  if (sizeof(size_t) == 8 && sizeof(ret) == 4)
+    ret += offset1 >> 16;
+  if (ret == 0)
+    ret = kAllocationMarkMask;  // Avoid common pattern of all zeros.
+  return ret;
+}
+
+// TODO(jar): Use the passed in TCmalloc Class Index to calculate mark location
+// faster.  The current implementation calls general functions, which have to
+// recalculate this in order to get the Class Size.  This is a slow and wasteful
+// recomputation... but it is much more readable this way (for now).
+static void ValidateAllocatedRegion(void* ptr, size_t cl) {
+  if (ptr == NULL) return;
+  MarkType* mark = GetMarkLocation(ptr);
+  MarkType allocated_mark = GetMarkValue(ptr, mark);
+  MarkType current_mark = *mark;
+
+  if (current_mark == ~allocated_mark)
+    DieFromDoubleFree();
+  if (current_mark != allocated_mark)
+    DieFromMemoryCorruption();
+  *mark = ~allocated_mark;  //  Distinctively not allocated.
+}
+
+static void MarkAllocatedRegion(void* ptr) {
+  if (ptr == NULL) return;
+  MarkType* mark = GetMarkLocation(ptr);
+  *mark = GetMarkValue(ptr, mark);
+}
+
+#endif  // TCMALLOC_VALIDATION
author	jar@chromium.org <jar@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2011-03-16 15:31:39 +0000
committer	jar@chromium.org <jar@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2011-03-16 15:31:39 +0000
commit	3b0be97b587797d06198472242f814980872b1b0 (patch)
tree	63455b1280c95009e9207f0c076d5b95673fc827
parent	71de819b6409c3fc9886c0d07e2e6216d1a52703 (diff)
download	chromium_src-3b0be97b587797d06198472242f814980872b1b0.zip chromium_src-3b0be97b587797d06198472242f814980872b1b0.tar.gz chromium_src-3b0be97b587797d06198472242f814980872b1b0.tar.bz2