diff options
author | jar@chromium.org <jar@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2011-03-16 15:31:39 +0000 |
---|---|---|
committer | jar@chromium.org <jar@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2011-03-16 15:31:39 +0000 |
commit | 3b0be97b587797d06198472242f814980872b1b0 (patch) | |
tree | 63455b1280c95009e9207f0c076d5b95673fc827 | |
parent | 71de819b6409c3fc9886c0d07e2e6216d1a52703 (diff) | |
download | chromium_src-3b0be97b587797d06198472242f814980872b1b0.zip chromium_src-3b0be97b587797d06198472242f814980872b1b0.tar.gz chromium_src-3b0be97b587797d06198472242f814980872b1b0.tar.bz2 |
Add redundancy to detect double frees in TCMalloc
I don't think I want to ship this to dev or stable, but
I think it may be valuable to try on our canary
builds.
Added a single byte or word (tranparently) to all
allocations, and wrote a unique value into that
location after each allocation. When free() is
called, we validate the flag, and then mark
the block as not being allocated.
Any time a block fails to validate, we crash,
as this means the object either overran its
allocated region (or suffered memory corruption),
or else (more likely) a double free took place.
We have two distinct crash stacks for the
two distinct validation problems (corrupt vs
double free).
ALthough I haven't been able to demonstrate a
problem on the bots, I'm pretty psyched to try
to find something in the field with this change.
The perf loss is very low, and I think that the
likely cause for TCMalloc cross-linked-list
corruption (double frees) will be detected
(I can hope!)
R=mbelshe
BUG=75921
Review URL: http://codereview.chromium.org/6683029
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@78369 0039d316-1c4b-4281-b951-d872f2087c98
-rw-r--r-- | base/allocator/allocator_shim.cc | 2 | ||||
-rw-r--r-- | third_party/tcmalloc/chromium/src/tcmalloc.cc | 181 |
2 files changed, 181 insertions, 2 deletions
diff --git a/base/allocator/allocator_shim.cc b/base/allocator/allocator_shim.cc index f11164c..0bee3d3 100644 --- a/base/allocator/allocator_shim.cc +++ b/base/allocator/allocator_shim.cc @@ -41,7 +41,7 @@ typedef enum { // See SetupSubprocessAllocator() to specify a default secondary (subprocess) // allocator. // TODO(jar): Switch to using TCMALLOC for the renderer as well. -static Allocator allocator = WINHEAP; +static Allocator allocator = TCMALLOC; // The names of the environment variables that can optionally control the // selection of the allocator. The primary may be used to control overall diff --git a/third_party/tcmalloc/chromium/src/tcmalloc.cc b/third_party/tcmalloc/chromium/src/tcmalloc.cc index 794af94..f6bb50c 100644 --- a/third_party/tcmalloc/chromium/src/tcmalloc.cc +++ b/third_party/tcmalloc/chromium/src/tcmalloc.cc @@ -150,6 +150,14 @@ using tcmalloc::ThreadCache; # define __THROW // __THROW is just an optimization, so ok to make it "" #endif +// ---- Double free debug declarations +static size_t ExcludeSpaceForMark(size_t size); +static void AddRoomForMark(size_t* size); +static void ExcludeMarkFromSize(size_t* new_size); +static void MarkAllocatedRegion(void* ptr); +static void ValidateAllocatedRegion(void* ptr, size_t cl); +// ---- End Double free debug declarations + DECLARE_int64(tcmalloc_sample_parameter); DECLARE_double(tcmalloc_release_rate); @@ -850,6 +858,7 @@ static inline bool CheckCachedSizeClass(void *ptr) { static inline void* CheckedMallocResult(void *result) { ASSERT(result == NULL || CheckCachedSizeClass(result)); + MarkAllocatedRegion(result); return result; } @@ -969,6 +978,8 @@ inline void* do_malloc_pages(ThreadCache* heap, size_t size) { } inline void* do_malloc(size_t size) { + AddRoomForMark(&size); + void* ret = NULL; // The following call forces module initialization @@ -979,6 +990,7 @@ inline void* do_malloc(size_t size) { if ((FLAGS_tcmalloc_sample_parameter > 0) && heap->SampleAllocation(size)) { ret = DoSampledAllocation(size); + MarkAllocatedRegion(ret); } else { // The common case, and also the simplest. This just pops the // size-appropriate freelist, after replenishing it if it's empty. @@ -986,6 +998,7 @@ inline void* do_malloc(size_t size) { } } else { ret = do_malloc_pages(heap, size); + MarkAllocatedRegion(ret); } if (ret == NULL) errno = ENOMEM; return ret; @@ -1033,6 +1046,9 @@ inline void do_free_with_callback(void* ptr, void (*invalid_free_fn)(void*)) { cl = span->sizeclass; Static::pageheap()->CacheSizeClass(p, cl); } + + ValidateAllocatedRegion(ptr, cl); + if (cl != 0) { ASSERT(!Static::pageheap()->GetDescriptor(p)->sample); ThreadCache* heap = GetCacheIfPresent(); @@ -1089,6 +1105,7 @@ inline void* do_realloc_with_callback( void* old_ptr, size_t new_size, void (*invalid_free_fn)(void*), size_t (*invalid_get_size_fn)(void*)) { + AddRoomForMark(&new_size); // Get the size of the old entry const size_t old_size = GetSizeWithCallback(old_ptr, invalid_get_size_fn); @@ -1107,6 +1124,7 @@ inline void* do_realloc_with_callback( if (new_size > old_size && new_size < lower_bound_to_grow) { new_ptr = do_malloc_or_cpp_alloc(lower_bound_to_grow); } + ExcludeMarkFromSize(&new_size); // do_malloc will add space if needed. if (new_ptr == NULL) { // Either new_size is not a tiny increment, or last do_malloc failed. new_ptr = do_malloc_or_cpp_alloc(new_size); @@ -1125,6 +1143,7 @@ inline void* do_realloc_with_callback( } else { // We still need to call hooks to report the updated size: MallocHook::InvokeDeleteHook(old_ptr); + ExcludeMarkFromSize(&new_size); MallocHook::InvokeNewHook(old_ptr, new_size); return old_ptr; } @@ -1145,6 +1164,8 @@ inline void* do_realloc(void* old_ptr, size_t new_size) { void* do_memalign(size_t align, size_t size) { ASSERT((align & (align - 1)) == 0); ASSERT(align > 0); + // Marked in CheckMallocResult(), which is also inside SpanToMallocResult(). + AddRoomForMark(&size); if (size + align < size) return NULL; // Overflow if (Static::pageheap() == NULL) ThreadCache::InitModule(); @@ -1347,7 +1368,8 @@ void* cpp_memalign(size_t align, size_t size) { // As promised, the definition of this function, declared above. size_t TCMallocImplementation::GetAllocatedSize(void* ptr) { - return GetSizeWithCallback(ptr, &InvalidGetAllocatedSize); + return ExcludeSpaceForMark( + GetSizeWithCallback(ptr, &InvalidGetAllocatedSize)); } void TCMallocImplementation::MarkThreadBusy() { @@ -1564,3 +1586,160 @@ static void *MemalignOverride(size_t align, size_t size, const void *caller) } void *(*__memalign_hook)(size_t, size_t, const void *) = MemalignOverride; #endif // #ifndef TCMALLOC_FOR_DEBUGALLOCATION + +// ---Double free() debugging implementation ----------------------------------- +// We will put a mark at the extreme end of each allocation block. We make +// sure that we always allocate enough "extra memory" that we can fit in the +// mark, and still provide the requested usable region. If ever that mark is +// not as expected, then we know that the user is corrupting memory beyond their +// request size, or that they have called free a second time without having +// the memory allocated (again). This allows us to spot most double free()s, +// but some can "slip by" or confuse our logic if the caller reallocates memory +// (for a second use) before performing an evil double-free of a first +// allocation + +// This code can be optimized, but for now, it is written to be most easily +// understood, and flexible (since it is evolving a bit). Potential +// optimizations include using other calculated data, such as class size, or +// allocation size, which is known in the code above, but then is recalculated +// below. Another potential optimization would be careful manual inlining of +// code, but I *think* that the compile will probably do this for me, and I've +// been careful to avoid aliasing issues that might make a compiler back-off. + +// Evolution includes experimenting with different marks, to minimize the chance +// that a mark would be misunderstood (missed corruption). The marks are meant +// to be hashed encoding of the location, so that they can't be copied over a +// different region (by accident) without being detected (most of the time). + +#define TCMALLOC_VALIDATION + +#if !defined(TCMALLOC_VALIDATION) + +static size_t ExcludeSpaceForMark(size_t size) { return size; (} +static void AddRoomForMark(size_t* size) {} +static void ExcludeMarkFromSize(size_t* new_size) {} +static void MarkAllocatedRegion(void* ptr) {} +static void ValidateAllocatedRegion(void* ptr, size_t cl) {} + +#else // TCMALLOC_VALIDATION + +static void DieFromDoubleFree() { + char* p = NULL; + p++; + *p += 1; // Segv. +} + +static size_t DieFromBadFreePointer(void* unused) { + char* p = NULL; + p += 2; + *p += 2; // Segv. + return 0; +} + +static void DieFromMemoryCorruption() { + char* p = NULL; + p += 3; + *p += 3; // Segv. +} + +// We can either do byte marking, or whole word marking based on the following +// define. char is as small as we can get, and word marking probably provides +// more than enough bits that we won't miss a corruption. Any sized integral +// type can be used, but we just define two examples. + +// #define TCMALLOC_SMALL_VALIDATION +#if defined (TCMALLOC_SMALL_VALIDATION) + +typedef char MarkType; // char saves memory... int is more complete. +static const MarkType kAllocationMarkMask = static_cast<MarkType>(0x36); + +#else + +typedef int MarkType; // char saves memory... int is more complete. +static const MarkType kAllocationMarkMask = static_cast<MarkType>(0xE1AB9536); + +#endif + +// TODO(jar): See if use of reference rather than pointer gets better inlining, +// or if macro is needed. My fear is that taking address map preclude register +// allocation :-(. +inline static void AddRoomForMark(size_t* size) { + *size += sizeof(kAllocationMarkMask); +} + +inline static void ExcludeMarkFromSize(size_t* new_size) { + *new_size -= sizeof(kAllocationMarkMask); +} + +inline static size_t ExcludeSpaceForMark(size_t size) { + return size - sizeof(kAllocationMarkMask); // Lie about size when asked. +} + +inline static MarkType* GetMarkLocation(void* ptr) { + size_t class_size = GetSizeWithCallback(ptr, DieFromBadFreePointer); + ASSERT(class_size % sizeof(kAllocationMarkMask) == 0); + size_t last_index = (class_size / sizeof(kAllocationMarkMask)) - 1; + return static_cast<MarkType*>(ptr) + last_index; +} + +// We hash in the mark location plus the pointer so that we effectively mix in +// the size of the block. This means that if a span is used for different sizes +// that the mark will be different. It would be good to hash in the size (which +// we effectively get by using both mark location and pointer), but even better +// would be to also include the class, as it concisely contains the entropy +// found in the size (when we don't have large allocation), and there is less +// risk of losing those bits to truncation. It would probably be good to combine +// the high bits of size (capturing info about large blocks) with the class +// (which is a 6 bit number). +inline static MarkType GetMarkValue(void* ptr, MarkType* mark) { + void* ptr2 = static_cast<void*>(mark); + size_t offset1 = static_cast<char*>(ptr) - static_cast<char*>(NULL); + size_t offset2 = static_cast<char*>(ptr2) - static_cast<char*>(NULL); + static const int kInvariantBits = 2; + ASSERT((offset1 >> kInvariantBits) << kInvariantBits == offset1); + // Note: low bits of both offsets are invariants due to alignment. High bits + // of both offsets are the same (unless we have a large allocation). Avoid + // XORing high bits together, as they will cancel for most small allocations. + + MarkType ret = kAllocationMarkMask; + // Using a little shift, we can safely XOR together both offsets. + ret ^= static_cast<MarkType>(offset1 >> kInvariantBits) ^ + static_cast<MarkType>(offset2); + if (sizeof(ret) == 1) { + // Try to bring some high level bits into the mix. + ret += static_cast<MarkType>(offset1 >> 8) ^ + static_cast<MarkType>(offset1 >> 16) ^ + static_cast<MarkType>(offset1 >> 24) ; + } + // Hash in high bits on a 64 bit architecture. + if (sizeof(size_t) == 8 && sizeof(ret) == 4) + ret += offset1 >> 16; + if (ret == 0) + ret = kAllocationMarkMask; // Avoid common pattern of all zeros. + return ret; +} + +// TODO(jar): Use the passed in TCmalloc Class Index to calculate mark location +// faster. The current implementation calls general functions, which have to +// recalculate this in order to get the Class Size. This is a slow and wasteful +// recomputation... but it is much more readable this way (for now). +static void ValidateAllocatedRegion(void* ptr, size_t cl) { + if (ptr == NULL) return; + MarkType* mark = GetMarkLocation(ptr); + MarkType allocated_mark = GetMarkValue(ptr, mark); + MarkType current_mark = *mark; + + if (current_mark == ~allocated_mark) + DieFromDoubleFree(); + if (current_mark != allocated_mark) + DieFromMemoryCorruption(); + *mark = ~allocated_mark; // Distinctively not allocated. +} + +static void MarkAllocatedRegion(void* ptr) { + if (ptr == NULL) return; + MarkType* mark = GetMarkLocation(ptr); + *mark = GetMarkValue(ptr, mark); +} + +#endif // TCMALLOC_VALIDATION |