diff options
49 files changed, 2047 insertions, 840 deletions
diff --git a/base/allocator/allocator.gyp b/base/allocator/allocator.gyp index fe1aae8..6ea94c8 100644 --- a/base/allocator/allocator.gyp +++ b/base/allocator/allocator.gyp @@ -145,12 +145,10 @@ '<(tcmalloc_dir)/src/static_vars.h', '<(tcmalloc_dir)/src/symbolize.cc', '<(tcmalloc_dir)/src/symbolize.h', - '<(tcmalloc_dir)/src/symbolize_linux.cc', '<(tcmalloc_dir)/src/system-alloc.cc', '<(tcmalloc_dir)/src/system-alloc.h', '<(tcmalloc_dir)/src/tcmalloc.cc', '<(tcmalloc_dir)/src/tcmalloc_guard.h', - '<(tcmalloc_dir)/src/tcmalloc_linux.cc', '<(tcmalloc_dir)/src/thread_cache.cc', '<(tcmalloc_dir)/src/thread_cache.h', '<(tcmalloc_dir)/src/windows/config.h', @@ -186,10 +184,6 @@ # Included by allocator_shim.cc for maximal inlining. 'generic_allocators.cc', 'win_allocator.cc', - '<(tcmalloc_dir)/src/tcmalloc.cc', - - # Unneeded on Windows, symbolize_linux.cc used there instead. - '<(tcmalloc_dir)/src/symbolize.cc', # We simply don't use these, but list them above so that IDE # users can view the full available source for reference, etc. @@ -291,9 +285,8 @@ '<(tcmalloc_dir)/src/system-alloc.cc', '<(tcmalloc_dir)/src/system-alloc.h', - # don't use linux forked version - '<(tcmalloc_dir)/src/tcmalloc_linux.cc', - '<(tcmalloc_dir)/src/symbolize_linux.cc', + # included by allocator_shim.cc + '<(tcmalloc_dir)/src/tcmalloc.cc', # heap-profiler/checker/cpuprofiler '<(tcmalloc_dir)/src/base/thread_lister.c', @@ -327,17 +320,6 @@ '<(jemalloc_dir)/qr.h', '<(jemalloc_dir)/rb.h', - # TODO(willchan): Return to using this when page_heap_linux.cc - # becomes unnecessary. - '<(tcmalloc_dir)/src/page_heap.cc', - ], - # TODO(willchan): This is actually just a branched copy of the - # vanilla upstream page_heap.cc. The current forked copy of - # page_heap.cc has Windows-specific code in it so Linux can't - # use it. These need to be refactored so we can track changes - # to the upstream page_heap.cc without duplication. - 'sources': [ - '<(tcmalloc_dir)/src/page_heap_linux.cc', ], 'cflags!': [ '-fvisibility=hidden', diff --git a/third_party/tcmalloc/chromium/src/base/basictypes.h b/third_party/tcmalloc/chromium/src/base/basictypes.h index e4d4140..9991413 100644 --- a/third_party/tcmalloc/chromium/src/base/basictypes.h +++ b/third_party/tcmalloc/chromium/src/base/basictypes.h @@ -240,7 +240,7 @@ struct CompileAssert { # define HAVE_ATTRIBUTE_SECTION_START 1 #elif defined(HAVE___ATTRIBUTE__) && defined(__MACH__) -# define ATTRIBUTE_SECTION(name) __attribute__ ((section ("__DATA, " #name))) +# define ATTRIBUTE_SECTION(name) __attribute__ ((section ("__TEXT, " #name))) #include <mach-o/getsect.h> #include <mach-o/dyld.h> @@ -251,18 +251,32 @@ class AssignAttributeStartEnd { if (_dyld_present()) { for (int i = _dyld_image_count() - 1; i >= 0; --i) { const mach_header* hdr = _dyld_get_image_header(i); - uint32_t len; - *pstart = getsectdatafromheader(hdr, "__DATA", name, &len); - if (*pstart) { // NULL if not defined in this dynamic library - *pstart += _dyld_get_image_vmaddr_slide(i); // correct for reloc - *pend = *pstart + len; - return; +#ifdef MH_MAGIC_64 + if (hdr->magic == MH_MAGIC_64) { + uint64_t len; + *pstart = getsectdatafromheader_64((mach_header_64*)hdr, + "__TEXT", name, &len); + if (*pstart) { // NULL if not defined in this dynamic library + *pstart += _dyld_get_image_vmaddr_slide(i); // correct for reloc + *pend = *pstart + len; + return; + } + } +#endif + if (hdr->magic == MH_MAGIC) { + uint32_t len; + *pstart = getsectdatafromheader(hdr, "__TEXT", name, &len); + if (*pstart) { // NULL if not defined in this dynamic library + *pstart += _dyld_get_image_vmaddr_slide(i); // correct for reloc + *pend = *pstart + len; + return; + } } } } // If we get here, not defined in a dll at all. See if defined statically. unsigned long len; // don't ask me why this type isn't uint32_t too... - *pstart = getsectdata("__DATA", name, &len); + *pstart = getsectdata("__TEXT", name, &len); *pend = *pstart + len; } }; diff --git a/third_party/tcmalloc/chromium/src/base/dynamic_annotations.h b/third_party/tcmalloc/chromium/src/base/dynamic_annotations.h index 5995ac4..a2a268f 100644 --- a/third_party/tcmalloc/chromium/src/base/dynamic_annotations.h +++ b/third_party/tcmalloc/chromium/src/base/dynamic_annotations.h @@ -203,9 +203,16 @@ } while (0) // Instruct the tool to create a happens-before arc between mu->Unlock() and - // mu->Lock(). This annotation may slow down the race detector; normally it - // is used only when it would be difficult to annotate each of the mutex's - // critical sections individually using the annotations above. + // mu->Lock(). This annotation may slow down the race detector and hide real + // races. Normally it is used only when it would be difficult to annotate each + // of the mutex's critical sections individually using the annotations above. + // This annotation makes sense only for hybrid race detectors. For pure + // happens-before detectors this is a no-op. For more details see + // http://code.google.com/p/data-race-test/wiki/PureHappensBeforeVsHybrid . + #define ANNOTATE_PURE_HAPPENS_BEFORE_MUTEX(mu) \ + AnnotateMutexIsUsedAsCondVar(__FILE__, __LINE__, mu) + + // Deprecated. Use ANNOTATE_PURE_HAPPENS_BEFORE_MUTEX. #define ANNOTATE_MUTEX_IS_USED_AS_CONDVAR(mu) \ AnnotateMutexIsUsedAsCondVar(__FILE__, __LINE__, mu) @@ -357,6 +364,7 @@ #define ANNOTATE_NEW_MEMORY(address, size) // empty #define ANNOTATE_EXPECT_RACE(address, description) // empty #define ANNOTATE_BENIGN_RACE(address, description) // empty + #define ANNOTATE_PURE_HAPPENS_BEFORE_MUTEX(mu) // empty #define ANNOTATE_MUTEX_IS_USED_AS_CONDVAR(mu) // empty #define ANNOTATE_TRACE_MEMORY(arg) // empty #define ANNOTATE_THREAD_NAME(name) // empty diff --git a/third_party/tcmalloc/chromium/src/base/logging.h b/third_party/tcmalloc/chromium/src/base/logging.h index 3313d97..4d5e30c 100644 --- a/third_party/tcmalloc/chromium/src/base/logging.h +++ b/third_party/tcmalloc/chromium/src/base/logging.h @@ -136,17 +136,26 @@ enum { DEBUG_MODE = 1 }; #define CHECK_GE(val1, val2) CHECK_OP(>=, val1, val2) #define CHECK_GT(val1, val2) CHECK_OP(> , val1, val2) -// A synonym for CHECK_* that is used in some unittests. +// Synonyms for CHECK_* that are used in some unittests. #define EXPECT_EQ(val1, val2) CHECK_EQ(val1, val2) #define EXPECT_NE(val1, val2) CHECK_NE(val1, val2) #define EXPECT_LE(val1, val2) CHECK_LE(val1, val2) #define EXPECT_LT(val1, val2) CHECK_LT(val1, val2) #define EXPECT_GE(val1, val2) CHECK_GE(val1, val2) #define EXPECT_GT(val1, val2) CHECK_GT(val1, val2) +#define ASSERT_EQ(val1, val2) EXPECT_EQ(val1, val2) +#define ASSERT_NE(val1, val2) EXPECT_NE(val1, val2) +#define ASSERT_LE(val1, val2) EXPECT_LE(val1, val2) +#define ASSERT_LT(val1, val2) EXPECT_LT(val1, val2) +#define ASSERT_GE(val1, val2) EXPECT_GE(val1, val2) +#define ASSERT_GT(val1, val2) EXPECT_GT(val1, val2) // As are these variants. #define EXPECT_TRUE(cond) CHECK(cond) #define EXPECT_FALSE(cond) CHECK(!(cond)) #define EXPECT_STREQ(a, b) CHECK(strcmp(a, b) == 0) +#define ASSERT_TRUE(cond) EXPECT_TRUE(cond) +#define ASSERT_FALSE(cond) EXPECT_FALSE(cond) +#define ASSERT_STREQ(a, b) EXPECT_STREQ(a, b) // Used for (libc) functions that return -1 and set errno #define CHECK_ERR(invocation) PCHECK((invocation) != -1) diff --git a/third_party/tcmalloc/chromium/src/base/low_level_alloc.cc b/third_party/tcmalloc/chromium/src/base/low_level_alloc.cc index 900ae4e..2bbce54 100644 --- a/third_party/tcmalloc/chromium/src/base/low_level_alloc.cc +++ b/third_party/tcmalloc/chromium/src/base/low_level_alloc.cc @@ -67,7 +67,7 @@ namespace { // first. Valid in both allocated and unallocated blocks intptr_t magic; // kMagicAllocated or kMagicUnallocated xor this LowLevelAlloc::Arena *arena; // pointer to parent arena - void *dummy_for_alignment; // aligns regions to 0 mod 2*sizeof(void*) + void *dummy_for_alignment; // aligns regions to 0 mod 2*sizeof(void*) } header; // Next two fields: in unallocated blocks: freelist skiplist data @@ -197,15 +197,57 @@ struct LowLevelAlloc::Arena { // pointer. static struct LowLevelAlloc::Arena default_arena; -// A non-malloc-hooked arena: used only to allocate metadata for arenas that +// Non-malloc-hooked arenas: used only to allocate metadata for arenas that // do not want malloc hook reporting, so that for them there's no malloc hook // reporting even during arena creation. static struct LowLevelAlloc::Arena unhooked_arena; +static struct LowLevelAlloc::Arena unhooked_async_sig_safe_arena; // magic numbers to identify allocated and unallocated blocks static const intptr_t kMagicAllocated = 0x4c833e95; static const intptr_t kMagicUnallocated = ~kMagicAllocated; +namespace { + class ArenaLock { + public: + explicit ArenaLock(LowLevelAlloc::Arena *arena) : + left_(false), mask_valid_(false), arena_(arena) { + if ((arena->flags & LowLevelAlloc::kAsyncSignalSafe) != 0) { + // We've decided not to support async-signal-safe arena use until + // there a demonstrated need. Here's how one could do it though + // (would need to be made more portable). +#if 0 + sigset_t all; + sigfillset(&all); + this->mask_valid_ = + (pthread_sigmask(SIG_BLOCK, &all, &this->mask_) == 0); +#else + RAW_CHECK(false, "We do not yet support async-signal-safe arena."); +#endif + } + this->arena_->mu.Lock(); + } + ~ArenaLock() { RAW_CHECK(this->left_, "haven't left Arena region"); } + void Leave() { + this->arena_->mu.Unlock(); +#if 0 + if (this->mask_valid_) { + pthread_sigmask(SIG_SETMASK, &this->mask_, 0); + } +#endif + this->left_ = true; + } + private: + bool left_; // whether left region + bool mask_valid_; +#if 0 + sigset_t mask_; // old mask of blocked signals +#endif + LowLevelAlloc::Arena *arena_; + DISALLOW_COPY_AND_ASSIGN(ArenaLock); + }; +} // anonymous namespace + // create an appropriate magic number for an object at "ptr" // "magic" should be kMagicAllocated or kMagicUnallocated inline static intptr_t Magic(intptr_t magic, AllocList::Header *ptr) { @@ -235,6 +277,8 @@ static void ArenaInit(LowLevelAlloc::Arena *arena) { // Default arena should be hooked, e.g. for heap-checker to trace // pointer chains through objects in the default arena. arena->flags = LowLevelAlloc::kCallMallocHook; + } else if (arena == &unhooked_async_sig_safe_arena) { + arena->flags = LowLevelAlloc::kAsyncSignalSafe; } else { arena->flags = 0; // other arenas' flags may be overridden by client, // but unhooked_arena will have 0 in 'flags'. @@ -246,9 +290,12 @@ static void ArenaInit(LowLevelAlloc::Arena *arena) { LowLevelAlloc::Arena *LowLevelAlloc::NewArena(int32 flags, Arena *meta_data_arena) { RAW_CHECK(meta_data_arena != 0, "must pass a valid arena"); - if (meta_data_arena == &default_arena && - (flags & LowLevelAlloc::kCallMallocHook) == 0) { - meta_data_arena = &unhooked_arena; + if (meta_data_arena == &default_arena) { + if ((flags & LowLevelAlloc::kAsyncSignalSafe) != 0) { + meta_data_arena = &unhooked_async_sig_safe_arena; + } else if ((flags & LowLevelAlloc::kCallMallocHook) == 0) { + meta_data_arena = &unhooked_arena; + } } // Arena(0) uses the constructor for non-static contexts Arena *result = @@ -262,9 +309,9 @@ LowLevelAlloc::Arena *LowLevelAlloc::NewArena(int32 flags, bool LowLevelAlloc::DeleteArena(Arena *arena) { RAW_CHECK(arena != 0 && arena != &default_arena && arena != &unhooked_arena, "may not delete default arena"); - arena->mu.Lock(); + ArenaLock section(arena); bool empty = (arena->allocation_count == 0); - arena->mu.Unlock(); + section.Leave(); if (empty) { while (arena->freelist.next[0] != 0) { AllocList *region = arena->freelist.next[0]; @@ -279,7 +326,13 @@ bool LowLevelAlloc::DeleteArena(Arena *arena) { "empty arena has non-page-aligned block size"); RAW_CHECK(reinterpret_cast<intptr_t>(region) % arena->pagesize == 0, "empty arena has non-page-aligned block"); - RAW_CHECK(munmap(region, size) == 0, + int munmap_result; + if ((arena->flags & LowLevelAlloc::kAsyncSignalSafe) == 0) { + munmap_result = munmap(region, size); + } else { + munmap_result = MallocHook::UnhookedMUnmap(region, size); + } + RAW_CHECK(munmap_result == 0, "LowLevelAlloc::DeleteArena: munmap failed address"); } Free(arena); @@ -363,21 +416,21 @@ void LowLevelAlloc::Free(void *v) { if ((arena->flags & kCallMallocHook) != 0) { MallocHook::InvokeDeleteHook(v); } - arena->mu.Lock(); + ArenaLock section(arena); AddToFreelist(v, arena); RAW_CHECK(arena->allocation_count > 0, "nothing in arena to free"); arena->allocation_count--; - arena->mu.Unlock(); + section.Leave(); } } // allocates and returns a block of size bytes, to be freed with Free() // L < arena->mu -void *DoAllocWithArena(size_t request, LowLevelAlloc::Arena *arena) { +static void *DoAllocWithArena(size_t request, LowLevelAlloc::Arena *arena) { void *result = 0; if (request != 0) { AllocList *s; // will point to region that satisfies request - arena->mu.Lock(); + ArenaLock section(arena); ArenaInit(arena); // round up with header size_t req_rnd = RoundUp(request + sizeof (s->header), arena->roundup); @@ -399,8 +452,14 @@ void *DoAllocWithArena(size_t request, LowLevelAlloc::Arena *arena) { // mmap generous 64K chunks to decrease // the chances/impact of fragmentation: size_t new_pages_size = RoundUp(req_rnd, arena->pagesize * 16); - void *new_pages = mmap(0, new_pages_size, - PROT_WRITE|PROT_READ, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); + void *new_pages; + if ((arena->flags & LowLevelAlloc::kAsyncSignalSafe) != 0) { + new_pages = MallocHook::UnhookedMMap(0, new_pages_size, + PROT_WRITE|PROT_READ, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); + } else { + new_pages = mmap(0, new_pages_size, + PROT_WRITE|PROT_READ, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); + } RAW_CHECK(new_pages != MAP_FAILED, "mmap error"); arena->mu.Lock(); s = reinterpret_cast<AllocList *>(new_pages); @@ -425,7 +484,7 @@ void *DoAllocWithArena(size_t request, LowLevelAlloc::Arena *arena) { s->header.magic = Magic(kMagicAllocated, &s->header); RAW_CHECK(s->header.arena == arena, ""); arena->allocation_count++; - arena->mu.Unlock(); + section.Leave(); result = &s->levels; } ANNOTATE_NEW_MEMORY(result, request); diff --git a/third_party/tcmalloc/chromium/src/base/low_level_alloc.h b/third_party/tcmalloc/chromium/src/base/low_level_alloc.h index 0df1298..393b3d2 100644 --- a/third_party/tcmalloc/chromium/src/base/low_level_alloc.h +++ b/third_party/tcmalloc/chromium/src/base/low_level_alloc.h @@ -72,14 +72,20 @@ class LowLevelAlloc { // meta_data_arena; the DefaultArena() can be passed for meta_data_arena. // These values may be ored into flags: enum { - // Calls to Alloc() and Free() will be reported - // via the MallocHook interface. - // The DefaultArena() has this flag on. - // NewArena(flags, DefaultArena()) w/o this bit in 'flags', - // on the other hand, will not cause any MallocHook - // calls even during the NewArena call itself - // (it will in fact use meta_data_arena different from DefaultArena()). - kCallMallocHook = 0x0001 + // Report calls to Alloc() and Free() via the MallocHook interface. + // Set in the DefaultArena. + kCallMallocHook = 0x0001, + + // Make calls to Alloc(), Free() be async-signal-safe. Not set in + // DefaultArena(). + kAsyncSignalSafe = 0x0002, + + // When used with DefaultArena(), the NewArena() and DeleteArena() calls + // obey the flags given explicitly in the NewArena() call, even if those + // flags differ from the settings in DefaultArena(). So the call + // NewArena(kAsyncSignalSafe, DefaultArena()) is itself async-signal-safe, + // as well as generatating an arena that provides async-signal-safe + // Alloc/Free. }; static Arena *NewArena(int32 flags, Arena *meta_data_arena); diff --git a/third_party/tcmalloc/chromium/src/base/spinlock.h b/third_party/tcmalloc/chromium/src/base/spinlock.h index dda59a6..9e633c4 100644 --- a/third_party/tcmalloc/chromium/src/base/spinlock.h +++ b/third_party/tcmalloc/chromium/src/base/spinlock.h @@ -134,7 +134,7 @@ class LOCKABLE SpinLock { void SlowLock(); void SlowUnlock(int64 wait_timestamp); - DISALLOW_EVIL_CONSTRUCTORS(SpinLock); + DISALLOW_COPY_AND_ASSIGN(SpinLock); }; // Corresponding locker object that arranges to acquire a spinlock for diff --git a/third_party/tcmalloc/chromium/src/base/sysinfo.cc b/third_party/tcmalloc/chromium/src/base/sysinfo.cc index a2bc2a9..7af0495 100644 --- a/third_party/tcmalloc/chromium/src/base/sysinfo.cc +++ b/third_party/tcmalloc/chromium/src/base/sysinfo.cc @@ -100,11 +100,33 @@ // Some non-trivial getenv-related functions. // ---------------------------------------------------------------------- +// It's not safe to call getenv() in the malloc hooks, because they +// might be called extremely early, before libc is done setting up +// correctly. In particular, the thread library may not be done +// setting up errno. So instead, we use the built-in __environ array +// if it exists, and otherwise read /proc/self/environ directly, using +// system calls to read the file, and thus avoid setting errno. +// /proc/self/environ has a limit of how much data it exports (around +// 8K), so it's not an ideal solution. const char* GetenvBeforeMain(const char* name) { +#if defined(HAVE___ENVIRON) // if we have it, it's declared in unistd.h + const int namelen = strlen(name); + for (char** p = __environ; *p; p++) { + if (!memcmp(*p, name, namelen) && (*p)[namelen] == '=') // it's a match + return *p + namelen+1; // point after = + } + return NULL; +#elif defined(PLATFORM_WINDOWS) + // TODO(mbelshe) - repeated calls to this function will overwrite the + // contents of the static buffer. + static char envbuf[1024]; // enough to hold any envvar we care about + if (!GetEnvironmentVariableA(name, envbuf, sizeof(envbuf)-1)) + return NULL; + return envbuf; +#else // static is ok because this function should only be called before // main(), when we're single-threaded. static char envbuf[16<<10]; -#ifndef PLATFORM_WINDOWS if (*envbuf == '\0') { // haven't read the environ yet int fd = safeopen("/proc/self/environ", O_RDONLY); // The -2 below guarantees the last two bytes of the buffer will be \0\0 @@ -129,12 +151,6 @@ const char* GetenvBeforeMain(const char* name) { p = endp + 1; } return NULL; // env var never found -#else - // TODO(mbelshe) - repeated calls to this function will overwrite the - // contents of the static buffer. - if (!GetEnvironmentVariableA(name, envbuf, sizeof(envbuf)-1)) - return NULL; - return envbuf; #endif } @@ -441,6 +457,48 @@ static void ConstructFilename(const char* spec, pid_t pid, } #endif +// A templatized helper function instantiated for Mach (OS X) only. +// It can handle finding info for both 32 bits and 64 bits. +// Returns true if it successfully handled the hdr, false else. +#ifdef __MACH__ // Mac OS X, almost certainly +template<uint32_t kMagic, uint32_t kLCSegment, + typename MachHeader, typename SegmentCommand> +static bool NextExtMachHelper(const mach_header* hdr, + int current_image, int current_load_cmd, + uint64 *start, uint64 *end, char **flags, + uint64 *offset, int64 *inode, char **filename, + uint64 *file_mapping, uint64 *file_pages, + uint64 *anon_mapping, uint64 *anon_pages, + dev_t *dev) { + static char kDefaultPerms[5] = "r-xp"; + if (hdr->magic != kMagic) + return false; + const char* lc = (const char *)hdr + sizeof(MachHeader); + // TODO(csilvers): make this not-quadradic (increment and hold state) + for (int j = 0; j < current_load_cmd; j++) // advance to *our* load_cmd + lc += ((const load_command *)lc)->cmdsize; + if (((const load_command *)lc)->cmd == kLCSegment) { + const intptr_t dlloff = _dyld_get_image_vmaddr_slide(current_image); + const SegmentCommand* sc = (const SegmentCommand *)lc; + if (start) *start = sc->vmaddr + dlloff; + if (end) *end = sc->vmaddr + sc->vmsize + dlloff; + if (flags) *flags = kDefaultPerms; // can we do better? + if (offset) *offset = sc->fileoff; + if (inode) *inode = 0; + if (filename) + *filename = const_cast<char*>(_dyld_get_image_name(current_image)); + if (file_mapping) *file_mapping = 0; + if (file_pages) *file_pages = 0; // could we use sc->filesize? + if (anon_mapping) *anon_mapping = 0; + if (anon_pages) *anon_pages = 0; + if (dev) *dev = 0; + return true; + } + + return false; +} +#endif + ProcMapsIterator::ProcMapsIterator(pid_t pid) { Init(pid, NULL, false); } @@ -456,6 +514,7 @@ ProcMapsIterator::ProcMapsIterator(pid_t pid, Buffer *buffer, void ProcMapsIterator::Init(pid_t pid, Buffer *buffer, bool use_maps_backing) { + pid_ = pid; using_maps_backing_ = use_maps_backing; dynamic_buffer_ = NULL; if (!buffer) { @@ -691,6 +750,7 @@ bool ProcMapsIterator::NextExt(uint64 *start, uint64 *end, char **flags, COMPILE_ASSERT(MA_READ == 4, solaris_ma_read_must_equal_4); COMPILE_ASSERT(MA_WRITE == 2, solaris_ma_write_must_equal_2); COMPILE_ASSERT(MA_EXEC == 1, solaris_ma_exec_must_equal_1); + Buffer object_path; int nread = 0; // fill up buffer with text NO_INTR(nread = read(fd_, ibuf_, sizeof(prmap_t))); if (nread == sizeof(prmap_t)) { @@ -700,13 +760,27 @@ bool ProcMapsIterator::NextExt(uint64 *start, uint64 *end, char **flags, // two middle ints are major and minor device numbers, but I'm not sure. sscanf(mapinfo->pr_mapname, "ufs.%*d.%*d.%ld", &inode_from_mapname); + if (pid_ == 0) { + CHECK_LT(snprintf(object_path.buf_, Buffer::kBufSize, + "/proc/self/path/%s", mapinfo->pr_mapname), + Buffer::kBufSize); + } else { + CHECK_LT(snprintf(object_path.buf_, Buffer::kBufSize, + "/proc/%d/path/%s", pid_, mapinfo->pr_mapname), + Buffer::kBufSize); + } + ssize_t len = readlink(object_path.buf_, current_filename_, PATH_MAX); + CHECK_LT(len, PATH_MAX); + if (len < 0) + len = 0; + current_filename_[len] = '\0'; + if (start) *start = mapinfo->pr_vaddr; if (end) *end = mapinfo->pr_vaddr + mapinfo->pr_size; if (flags) *flags = kPerms[mapinfo->pr_mflags & 7]; if (offset) *offset = mapinfo->pr_offset; if (inode) *inode = inode_from_mapname; - // TODO(csilvers): How to map from /proc/map/object to filename? - if (filename) *filename = mapinfo->pr_mapname; // format is ufs.?.?.inode + if (filename) *filename = current_filename_; if (file_mapping) *file_mapping = 0; if (file_pages) *file_pages = 0; if (anon_mapping) *anon_mapping = 0; @@ -715,7 +789,6 @@ bool ProcMapsIterator::NextExt(uint64 *start, uint64 *end, char **flags, return true; } #elif defined(__MACH__) - static char kDefaultPerms[5] = "r-xp"; // We return a separate entry for each segment in the DLL. (TODO(csilvers): // can we do better?) A DLL ("image") has load-commands, some of which // talk about segment boundaries. @@ -728,25 +801,22 @@ bool ProcMapsIterator::NextExt(uint64 *start, uint64 *end, char **flags, // We start with the next load command (we've already looked at this one). for (current_load_cmd_--; current_load_cmd_ >= 0; current_load_cmd_--) { - const char* lc = ((const char *)hdr + sizeof(struct mach_header)); - // TODO(csilvers): make this not-quadradic (increment and hold state) - for (int j = 0; j < current_load_cmd_; j++) // advance to *our* load_cmd - lc += ((const load_command *)lc)->cmdsize; - if (((const load_command *)lc)->cmd == LC_SEGMENT) { - const intptr_t dlloff = _dyld_get_image_vmaddr_slide(current_image_); - const segment_command* sc = (const segment_command *)lc; - if (start) *start = sc->vmaddr + dlloff; - if (end) *end = sc->vmaddr + sc->vmsize + dlloff; - if (flags) *flags = kDefaultPerms; // can we do better? - if (offset) *offset = sc->fileoff; - if (inode) *inode = 0; - if (filename) - *filename = const_cast<char*>(_dyld_get_image_name(current_image_)); - if (file_mapping) *file_mapping = 0; - if (file_pages) *file_pages = 0; // could we use sc->filesize? - if (anon_mapping) *anon_mapping = 0; - if (anon_pages) *anon_pages = 0; - if (dev) *dev = 0; +#ifdef MH_MAGIC_64 + if (NextExtMachHelper<MH_MAGIC_64, LC_SEGMENT_64, + struct mach_header_64, struct segment_command_64>( + hdr, current_image_, current_load_cmd_, + start, end, flags, offset, inode, filename, + file_mapping, file_pages, anon_mapping, + anon_pages, dev)) { + return true; + } +#endif + if (NextExtMachHelper<MH_MAGIC, LC_SEGMENT, + struct mach_header, struct segment_command>( + hdr, current_image_, current_load_cmd_, + start, end, flags, offset, inode, filename, + file_mapping, file_pages, anon_mapping, + anon_pages, dev)) { return true; } } diff --git a/third_party/tcmalloc/chromium/src/base/sysinfo.h b/third_party/tcmalloc/chromium/src/base/sysinfo.h index b4b5c9f..0bcc1f5 100644 --- a/third_party/tcmalloc/chromium/src/base/sysinfo.h +++ b/third_party/tcmalloc/chromium/src/base/sysinfo.h @@ -209,9 +209,13 @@ class ProcMapsIterator { #elif defined(__MACH__) int current_image_; // dll's are called "images" in macos parlance int current_load_cmd_; // the segment of this dll we're examining +#elif defined(__sun__) // Solaris + int fd_; + char current_filename_[PATH_MAX]; #else int fd_; // filehandle on /proc/*/maps #endif + pid_t pid_; char flags_[10]; Buffer* dynamic_buffer_; // dynamically-allocated Buffer bool using_maps_backing_; // true if we are looking at maps_backing instead of maps. diff --git a/third_party/tcmalloc/chromium/src/config.h.in b/third_party/tcmalloc/chromium/src/config.h.in index 1b1a3d9..1ad2642 100644 --- a/third_party/tcmalloc/chromium/src/config.h.in +++ b/third_party/tcmalloc/chromium/src/config.h.in @@ -153,6 +153,9 @@ /* define if your compiler has __attribute__ */ #undef HAVE___ATTRIBUTE__ +/* Define to 1 if compiler supports __environ */ +#undef HAVE___ENVIRON + /* Define to 1 if the system has the type `__int64'. */ #undef HAVE___INT64 diff --git a/third_party/tcmalloc/chromium/src/debugallocation.cc b/third_party/tcmalloc/chromium/src/debugallocation.cc index dcf722d..1a9ddcb 100644 --- a/third_party/tcmalloc/chromium/src/debugallocation.cc +++ b/third_party/tcmalloc/chromium/src/debugallocation.cc @@ -152,18 +152,20 @@ extern "C" { // The do_* functions are defined in tcmalloc.cc, // which is included before this file // when TCMALLOC_FOR_DEBUGALLOCATION is defined. -#define BASE_MALLOC do_malloc -#define BASE_FREE do_free -#define BASE_MALLOPT do_mallopt -#define BASE_MALLINFO do_mallinfo +#define BASE_MALLOC_NEW(size) cpp_alloc(size, false) +#define BASE_MALLOC do_malloc_or_cpp_alloc +#define BASE_FREE do_free +#define BASE_MALLOPT do_mallopt +#define BASE_MALLINFO do_mallinfo #else // We are working on top of standard libc's malloc library -#define BASE_MALLOC __libc_malloc -#define BASE_FREE __libc_free -#define BASE_MALLOPT __libc_mallopt -#define BASE_MALLINFO __libc_mallinfo +#define BASE_MALLOC_NEW __libc_malloc +#define BASE_MALLOC __libc_malloc +#define BASE_FREE __libc_free +#define BASE_MALLOPT __libc_mallopt +#define BASE_MALLINFO __libc_mallinfo #endif @@ -524,10 +526,14 @@ class MallocBlock { } b = (MallocBlock*) (p + (num_pages - 1) * pagesize - sz); } else { - b = (MallocBlock*) BASE_MALLOC(real_malloced_size(size)); + b = (MallocBlock*) (type == kMallocType ? + BASE_MALLOC(real_malloced_size(size)) : + BASE_MALLOC_NEW(real_malloced_size(size))); } #else - b = (MallocBlock*) BASE_MALLOC(real_malloced_size(size)); + b = (MallocBlock*) (type == kMallocType ? + BASE_MALLOC(real_malloced_size(size)) : + BASE_MALLOC_NEW(real_malloced_size(size))); #endif // It would be nice to output a diagnostic on allocation failure @@ -656,25 +662,24 @@ class MallocBlock { reinterpret_cast<void*>( PRINTABLE_PTHREAD(queue_entry.deleter_threadid))); - SymbolMap symbolization_table; + SymbolTable symbolization_table; const int num_symbols = queue_entry.num_deleter_pcs; // short alias name for (int i = 0; i < num_symbols; i++) { // Symbolizes the previous address of pc because pc may be in the // next function. This may happen when the function ends with // a call to a function annotated noreturn (e.g. CHECK). - uintptr_t pc = - reinterpret_cast<uintptr_t>(queue_entry.deleter_pcs[i]) - 1; - symbolization_table[pc] = ""; + char* pc = + reinterpret_cast<char*>(queue_entry.deleter_pcs[i]) - 1; + symbolization_table.Add(pc); } - int sym_buffer_len = kSymbolSize * num_symbols; - char *sym_buffer = new char[sym_buffer_len]; if (FLAGS_symbolize_stacktrace) - Symbolize(sym_buffer, sym_buffer_len, &symbolization_table); + symbolization_table.Symbolize(); for (int i = 0; i < num_symbols; i++) { - uintptr_t pc = - reinterpret_cast<uintptr_t>(queue_entry.deleter_pcs[i]) - 1; - TracePrintf(STDERR_FILENO, " @ %p %s\n", - pc, symbolization_table[pc]); + char *pc = + reinterpret_cast<char*>(queue_entry.deleter_pcs[i]) - 1; + TracePrintf(STDERR_FILENO, " @ %"PRIxPTR" %s\n", + reinterpret_cast<uintptr_t>(pc), + symbolization_table.GetSymbol(pc)); } } else { RAW_LOG(ERROR, @@ -696,6 +701,12 @@ class MallocBlock { // Find the header just before client's memory. MallocBlock *mb = reinterpret_cast<MallocBlock *>( reinterpret_cast<char *>(p) - data_offset); + // If mb->alloc_type_ is kMagicDeletedInt, we're not an ok pointer. + if (mb->alloc_type_ == kMagicDeletedInt) { + RAW_LOG(FATAL, "memory allocation bug: object at %p has been already" + " deallocated; or else a word before the object has been" + " corrupted (memory stomping bug)", p); + } // If mb->offset_ is zero (common case), mb is the real header. If // mb->offset_ is non-zero, this block was allocated by memalign, and // mb->offset_ is the distance backwards to the real header from mb, diff --git a/third_party/tcmalloc/chromium/src/google/heap-checker.h b/third_party/tcmalloc/chromium/src/google/heap-checker.h index 751eb9f..c0ee8a8 100644 --- a/third_party/tcmalloc/chromium/src/google/heap-checker.h +++ b/third_party/tcmalloc/chromium/src/google/heap-checker.h @@ -51,10 +51,12 @@ #ifndef BASE_HEAP_CHECKER_H_ #define BASE_HEAP_CHECKER_H_ -#include "config.h" - #include <sys/types.h> // for size_t -#ifdef HAVE_STDINT_H +// I can't #include config.h in this public API file, but I should +// really use configure (and make malloc_extension.h a .in file) to +// figure out if the system has stdint.h or not. But I'm lazy, so +// for now I'm assuming it's a problem only with MSVC. +#ifndef _MSC_VER #include <stdint.h> // for uintptr_t #endif #include <stdarg.h> // for va_list diff --git a/third_party/tcmalloc/chromium/src/google/malloc_extension.h b/third_party/tcmalloc/chromium/src/google/malloc_extension.h index bc53e0f..fc272c9 100644 --- a/third_party/tcmalloc/chromium/src/google/malloc_extension.h +++ b/third_party/tcmalloc/chromium/src/google/malloc_extension.h @@ -42,6 +42,13 @@ #define BASE_MALLOC_EXTENSION_H_ #include <stddef.h> +// I can't #include config.h in this public API file, but I should +// really use configure (and make malloc_extension.h a .in file) to +// figure out if the system has stdint.h or not. But I'm lazy, so +// for now I'm assuming it's a problem only with MSVC. +#ifndef _MSC_VER +#include <stdint.h> +#endif #include <string> // Annoying stuff for windows -- makes sure clients can import these functions @@ -58,6 +65,10 @@ static const int kMallocHistogramSize = 64; // One day, we could support other types of writers (perhaps for C?) typedef std::string MallocExtensionWriter; +namespace base { +struct MallocRange; +} + // The default implementations of the following routines do nothing. // All implementations should be thread-safe; the current one // (TCMallocImplementation) is. @@ -99,6 +110,14 @@ class PERFTOOLS_DLL_DECL MallocExtension { // be passed to "pprof". virtual void GetHeapGrowthStacks(MallocExtensionWriter* writer); + // Invokes func(arg, range) for every controlled memory + // range. *range is filled in with information about the range. + // + // This is a best-effort interface useful only for performance + // analysis. The implementation may not call func at all. + typedef void (RangeFunction)(void*, const base::MallocRange*); + virtual void Ranges(void* arg, RangeFunction func); + // ------------------------------------------------------------------- // Control operations for getting and setting malloc implementation // specific parameters. Some currently useful properties: @@ -127,12 +146,20 @@ class PERFTOOLS_DLL_DECL MallocExtension { // This property is not writable. // // "tcmalloc.slack_bytes" - // Number of bytes allocated from system, but not currently - // in use by malloced objects. I.e., bytes available for - // allocation without needing more bytes from system. + // Number of bytes allocated from system, but not currently in + // use by malloced objects. I.e., bytes available for + // allocation without needing more bytes from system. It is + // the sum of pageheap_free_bytes and pageheap_unmapped_bytes. + // This property is not writable. + // + // "tcmalloc.pageheap_free_bytes" + // Number of bytes in free, mapped pages in pageheap + // This property is not writable. + // + // "tcmalloc.pageheap_unmapped_bytes" + // Number of bytes in free, unmapped pages in pageheap // This property is not writable. // - // TODO: Add more properties as necessary // ------------------------------------------------------------------- // Get the named "property"'s value. Returns true if the property @@ -167,12 +194,13 @@ class PERFTOOLS_DLL_DECL MallocExtension { // Most malloc implementations ignore this routine. virtual void MarkThreadBusy(); - // Try to free memory back to the operating system for reuse. Only - // use this extension if the application has recently freed a lot of - // memory, and does not anticipate using it again for a long time -- - // to get this memory back may require faulting pages back in by the - // OS, and that may be slow. (Currently only implemented in - // tcmalloc.) + // Try to release num_bytes of free memory back to the operating + // system for reuse. Use this extension with caution -- to get this + // memory back may require faulting pages back in by the OS, and + // that may be slow. (Currently only implemented in tcmalloc.) + virtual void ReleaseToSystem(size_t num_bytes); + + // Same as ReleaseToSystem() but release as much memory as possible. virtual void ReleaseFreeMemory(); // Sets the rate at which we release unused memory to the system. @@ -239,4 +267,29 @@ class PERFTOOLS_DLL_DECL MallocExtension { virtual void** ReadHeapGrowthStackTraces(); }; +namespace base { + +// Information passed per range. More fields may be added later. +struct MallocRange { + enum Type { + INUSE, // Application is using this range + FREE, // Range is currently free + UNMAPPED, // Backing physical memory has been returned to the OS + UNKNOWN, + // More enum values may be added in the future + }; + + uintptr_t address; // Address of range + size_t length; // Byte length of range + Type type; // Type of this range + double fraction; // Fraction of range that is being used (0 if !INUSE) + + // Perhaps add the following: + // - stack trace if this range was sampled + // - heap growth stack trace if applicable to this range + // - age when allocated (for inuse) or freed (if not in use) +}; + +} // namespace base + #endif // BASE_MALLOC_EXTENSION_H_ diff --git a/third_party/tcmalloc/chromium/src/google/malloc_extension_c.h b/third_party/tcmalloc/chromium/src/google/malloc_extension_c.h index 514305e..fcaa8cd 100644 --- a/third_party/tcmalloc/chromium/src/google/malloc_extension_c.h +++ b/third_party/tcmalloc/chromium/src/google/malloc_extension_c.h @@ -75,6 +75,7 @@ PERFTOOLS_DLL_DECL int MallocExtension_GetNumericProperty(const char* property, PERFTOOLS_DLL_DECL int MallocExtension_SetNumericProperty(const char* property, size_t value); PERFTOOLS_DLL_DECL void MallocExtension_MarkThreadIdle(void); PERFTOOLS_DLL_DECL void MallocExtension_MarkThreadBusy(void); +PERFTOOLS_DLL_DECL void MallocExtension_ReleaseToSystem(size_t num_bytes); PERFTOOLS_DLL_DECL void MallocExtension_ReleaseFreeMemory(void); PERFTOOLS_DLL_DECL size_t MallocExtension_GetEstimatedAllocatedSize(size_t size); PERFTOOLS_DLL_DECL size_t MallocExtension_GetAllocatedSize(void* p); diff --git a/third_party/tcmalloc/chromium/src/google/malloc_hook.h b/third_party/tcmalloc/chromium/src/google/malloc_hook.h index f2713e9..48d92da 100644 --- a/third_party/tcmalloc/chromium/src/google/malloc_hook.h +++ b/third_party/tcmalloc/chromium/src/google/malloc_hook.h @@ -191,6 +191,12 @@ class PERFTOOLS_DLL_DECL MallocHook { int skip_count) { return MallocHook_GetCallerStackTrace(result, max_depth, skip_count); } + + // Unhooked versions of mmap() and munmap(). These should be used + // only by experts, since they bypass heapchecking, etc. + static void* UnhookedMMap(void *start, size_t length, int prot, int flags, + int fd, off_t offset); + static int UnhookedMUnmap(void *start, size_t length); }; #endif /* _MALLOC_HOOK_H_ */ diff --git a/third_party/tcmalloc/chromium/src/google/tcmalloc.h.in b/third_party/tcmalloc/chromium/src/google/tcmalloc.h.in index cf62c70..e5c873d 100644 --- a/third_party/tcmalloc/chromium/src/google/tcmalloc.h.in +++ b/third_party/tcmalloc/chromium/src/google/tcmalloc.h.in @@ -89,6 +89,7 @@ extern "C" { #endif #ifdef __cplusplus + PERFTOOLS_DLL_DECL int tc_set_new_mode(int flag) __THROW; PERFTOOLS_DLL_DECL void* tc_new(size_t size); PERFTOOLS_DLL_DECL void tc_delete(void* p) __THROW; PERFTOOLS_DLL_DECL void* tc_newarray(size_t size); diff --git a/third_party/tcmalloc/chromium/src/heap-checker.cc b/third_party/tcmalloc/chromium/src/heap-checker.cc index fc8973a..82a7adb 100644 --- a/third_party/tcmalloc/chromium/src/heap-checker.cc +++ b/third_party/tcmalloc/chromium/src/heap-checker.cc @@ -47,11 +47,9 @@ #ifdef HAVE_PTHREAD #include <pthread.h> #endif -#ifdef HAVE_POLL_H -#include <poll.h> -#endif #include <sys/stat.h> #include <sys/types.h> +#include <time.h> #include <assert.h> #ifdef HAVE_LINUX_PTRACE_H @@ -1834,7 +1832,9 @@ static bool internal_init_start_has_run = false; // /*static*/ void HeapLeakChecker::InternalInitStart() { { SpinLockHolder l(&heap_checker_lock); - RAW_CHECK(!internal_init_start_has_run, "Only one call is expected"); + RAW_CHECK(!internal_init_start_has_run, + "Heap-check constructor called twice. Perhaps you both linked" + " in the heap checker, and also used LD_PRELOAD to load it?"); internal_init_start_has_run = true; if (FLAGS_heap_check.empty()) { @@ -2298,7 +2298,8 @@ void HeapLeakChecker_AfterDestructors() { } if (FLAGS_heap_check_after_destructors) { if (HeapLeakChecker::DoMainHeapCheck()) { - poll(0, 0, 500); + const struct timespec sleep_time = { 0, 500000000 }; // 500 ms + nanosleep(&sleep_time, NULL); // Need this hack to wait for other pthreads to exit. // Otherwise tcmalloc find errors // on a free() call from pthreads. diff --git a/third_party/tcmalloc/chromium/src/heap-profile-table.cc b/third_party/tcmalloc/chromium/src/heap-profile-table.cc index 0369d90..66e4f20 100644 --- a/third_party/tcmalloc/chromium/src/heap-profile-table.cc +++ b/third_party/tcmalloc/chromium/src/heap-profile-table.cc @@ -549,22 +549,17 @@ void HeapProfileTable::Snapshot::ReportLeaks(const char* checker_name, RAW_LOG(ERROR, "The %d largest leaks:", to_report); // Print - SymbolMap symbolization_table; - int num_symbols = 0; + SymbolTable symbolization_table; for (int i = 0; i < to_report; i++) { const Entry& e = entries[i]; for (int j = 0; j < e.bucket->depth; j++) { - const void* pc = e.bucket->stack[j]; - symbolization_table[reinterpret_cast<uintptr_t>(pc)] = ""; - num_symbols++; + symbolization_table.Add(e.bucket->stack[j]); } } static const int kBufSize = 2<<10; char buffer[kBufSize]; - int sym_buffer_len = kSymbolSize * num_symbols; - char *sym_buffer = new char[sym_buffer_len]; if (should_symbolize) - Symbolize(sym_buffer, sym_buffer_len, &symbolization_table); + symbolization_table.Symbolize(); for (int i = 0; i < to_report; i++) { const Entry& e = entries[i]; base::RawPrinter printer(buffer, kBufSize); @@ -572,12 +567,11 @@ void HeapProfileTable::Snapshot::ReportLeaks(const char* checker_name, e.bytes, e.count); for (int j = 0; j < e.bucket->depth; j++) { const void* pc = e.bucket->stack[j]; - printer.Printf("\t@ %p %s\n", - pc, symbolization_table[reinterpret_cast<uintptr_t>(pc)]); + printer.Printf("\t@ %"PRIxPTR" %s\n", + reinterpret_cast<uintptr_t>(pc), symbolization_table.GetSymbol(pc)); } RAW_LOG(ERROR, "%s", buffer); } - delete[] sym_buffer; if (to_report < n) { RAW_LOG(ERROR, "Skipping leaks numbered %d..%d", diff --git a/third_party/tcmalloc/chromium/src/heap-profile-table.h b/third_party/tcmalloc/chromium/src/heap-profile-table.h index 92d237e..5403257 100644 --- a/third_party/tcmalloc/chromium/src/heap-profile-table.h +++ b/third_party/tcmalloc/chromium/src/heap-profile-table.h @@ -326,7 +326,7 @@ class HeapProfileTable { // Map of all currently allocated objects we know about. AllocationMap* allocation_; - DISALLOW_EVIL_CONSTRUCTORS(HeapProfileTable); + DISALLOW_COPY_AND_ASSIGN(HeapProfileTable); }; class HeapProfileTable::Snapshot { diff --git a/third_party/tcmalloc/chromium/src/malloc_extension.cc b/third_party/tcmalloc/chromium/src/malloc_extension.cc index 068a693..4ce262f 100644 --- a/third_party/tcmalloc/chromium/src/malloc_extension.cc +++ b/third_party/tcmalloc/chromium/src/malloc_extension.cc @@ -143,10 +143,14 @@ void MallocExtension::MarkThreadBusy() { // Default implementation does nothing } -void MallocExtension::ReleaseFreeMemory() { +void MallocExtension::ReleaseToSystem(size_t num_bytes) { // Default implementation does nothing } +void MallocExtension::ReleaseFreeMemory() { + ReleaseToSystem(static_cast<size_t>(-1)); // SIZE_T_MAX +} + void MallocExtension::SetMemoryReleaseRate(double rate) { // Default implementation does nothing } @@ -300,6 +304,10 @@ void MallocExtension::GetHeapGrowthStacks(MallocExtensionWriter* writer) { DumpAddressMap(writer); } +void MallocExtension::Ranges(void* arg, RangeFunction func) { + // No callbacks by default +} + // These are C shims that work on the current instance. #define C_SHIM(fn, retval, paramlist, arglist) \ @@ -325,5 +333,6 @@ C_SHIM(SetNumericProperty, int, C_SHIM(MarkThreadIdle, void, (void), ()); C_SHIM(MarkThreadBusy, void, (void), ()); C_SHIM(ReleaseFreeMemory, void, (void), ()); +C_SHIM(ReleaseToSystem, void, (size_t num_bytes), (num_bytes)); C_SHIM(GetEstimatedAllocatedSize, size_t, (size_t size), (size)); C_SHIM(GetAllocatedSize, size_t, (void* p), (p)); diff --git a/third_party/tcmalloc/chromium/src/malloc_hook.cc b/third_party/tcmalloc/chromium/src/malloc_hook.cc index d1ad12a..2a7f542 100644 --- a/third_party/tcmalloc/chromium/src/malloc_hook.cc +++ b/third_party/tcmalloc/chromium/src/malloc_hook.cc @@ -423,7 +423,7 @@ static inline void* do_mmap64(void *start, size_t length, return result; } -# endif +# endif // defined(__x86_64__) // We use do_mmap64 abstraction to put MallocHook::InvokeMmapHook // calls right into mmap and mmap64, so that the stack frames in the caller's @@ -472,7 +472,7 @@ extern "C" void* mmap(void *start, size_t length, int prot, int flags, return result; } -#endif +#endif // !defined(__USE_FILE_OFFSET64) || !defined(__REDIRECT_NTH) extern "C" int munmap(void* start, size_t length) __THROW { MallocHook::InvokeMunmapHook(start, length); @@ -501,4 +501,26 @@ extern "C" void* sbrk(ptrdiff_t increment) __THROW { return result; } -#endif +/*static*/void* MallocHook::UnhookedMMap(void *start, size_t length, int prot, + int flags, int fd, off_t offset) { + return do_mmap64(start, length, prot, flags, fd, offset); +} + +/*static*/int MallocHook::UnhookedMUnmap(void *start, size_t length) { + return sys_munmap(start, length); +} + +#else // defined(__linux) && + // (defined(__i386__) || defined(__x86_64__) || defined(__PPC__)) + +/*static*/void* MallocHook::UnhookedMMap(void *start, size_t length, int prot, + int flags, int fd, off_t offset) { + return mmap(start, length, prot, flags, fd, offset); +} + +/*static*/int MallocHook::UnhookedMUnmap(void *start, size_t length) { + return munmap(start, length); +} + +#endif // defined(__linux) && + // (defined(__i386__) || defined(__x86_64__) || defined(__PPC__)) diff --git a/third_party/tcmalloc/chromium/src/memfs_malloc.cc b/third_party/tcmalloc/chromium/src/memfs_malloc.cc index bef2d3c..9df4cad 100644 --- a/third_party/tcmalloc/chromium/src/memfs_malloc.cc +++ b/third_party/tcmalloc/chromium/src/memfs_malloc.cc @@ -101,6 +101,11 @@ void HugetlbSysAllocator::DumpStats(TCMalloc_Printer* printer) { void* HugetlbSysAllocator::Alloc(size_t size, size_t *actual_size, size_t alignment) { + // don't go any further if we haven't opened the backing file + if (hugetlb_fd_ == -1) { + return NULL; + } + // We don't respond to allocation requests smaller than big_page_size_ unless // the caller is willing to take more than they asked for. if (actual_size == NULL && size < big_page_size_) { @@ -161,8 +166,8 @@ void* HugetlbSysAllocator::Alloc(size_t size, size_t *actual_size, MAP_SHARED, hugetlb_fd_, hugetlb_base_); if (result == reinterpret_cast<void*>(MAP_FAILED)) { if (!FLAGS_memfs_malloc_ignore_mmap_fail) { - TCMalloc_MESSAGE(__FILE__, __LINE__, "mmap failed: %s\n", - strerror(errno)); + TCMalloc_MESSAGE(__FILE__, __LINE__, "mmap of size %"PRIuS" failed: %s\n", + size + extra, strerror(errno)); failed_ = true; if (FLAGS_memfs_malloc_abort_on_fail) { CRASH("memfs_malloc_abort_on_fail is set\n"); diff --git a/third_party/tcmalloc/chromium/src/memory_region_map.h b/third_party/tcmalloc/chromium/src/memory_region_map.h index 1289764..f88c7b9 100644 --- a/third_party/tcmalloc/chromium/src/memory_region_map.h +++ b/third_party/tcmalloc/chromium/src/memory_region_map.h @@ -118,7 +118,7 @@ class MemoryRegionMap { LockHolder() { Lock(); } ~LockHolder() { Unlock(); } private: - DISALLOW_EVIL_CONSTRUCTORS(LockHolder); + DISALLOW_COPY_AND_ASSIGN(LockHolder); }; // A memory region that we know about through malloc_hook-s. @@ -329,7 +329,7 @@ class MemoryRegionMap { // Assumes Lock() is held static void LogAllLocked(); - DISALLOW_EVIL_CONSTRUCTORS(MemoryRegionMap); + DISALLOW_COPY_AND_ASSIGN(MemoryRegionMap); }; #endif // BASE_MEMORY_REGION_MAP_H_ diff --git a/third_party/tcmalloc/chromium/src/packed-cache-inl.h b/third_party/tcmalloc/chromium/src/packed-cache-inl.h index 77d6313..9d2cfe3 100644 --- a/third_party/tcmalloc/chromium/src/packed-cache-inl.h +++ b/third_party/tcmalloc/chromium/src/packed-cache-inl.h @@ -111,6 +111,10 @@ #ifndef TCMALLOC_PACKED_CACHE_INL_H_ #define TCMALLOC_PACKED_CACHE_INL_H_ +#include "config.h" +#ifdef HAVE_STDINT_H +#include <stdint.h> +#endif #include "base/basictypes.h" // for COMPILE_ASSERT #include "internal_logging.h" diff --git a/third_party/tcmalloc/chromium/src/page_heap.cc b/third_party/tcmalloc/chromium/src/page_heap.cc index f92cfc4..7a09aca 100644 --- a/third_party/tcmalloc/chromium/src/page_heap.cc +++ b/third_party/tcmalloc/chromium/src/page_heap.cc @@ -49,12 +49,9 @@ namespace tcmalloc { PageHeap::PageHeap() : pagemap_(MetaDataAlloc), pagemap_cache_(0), - free_pages_(0), - system_bytes_(0), - committed_bytes_(0), scavenge_counter_(0), // Start scavenging at kMaxPages list - scavenge_index_(kMaxPages-1) { + release_index_(kMaxPages) { COMPILE_ASSERT(kNumClasses <= (1 << PageMapCache::kValuebits), valuebits); DLL_Init(&large_.normal); DLL_Init(&large_.returned); @@ -154,20 +151,20 @@ Span* PageHeap::Split(Span* span, Length n) { void PageHeap::CommitSpan(Span* span) { TCMalloc_SystemCommit(reinterpret_cast<void*>(span->start << kPageShift), static_cast<size_t>(span->length << kPageShift)); - committed_bytes_ += span->length << kPageShift; + stats_.committed_bytes += span->length << kPageShift; } void PageHeap::DecommitSpan(Span* span) { TCMalloc_SystemRelease(reinterpret_cast<void*>(span->start << kPageShift), static_cast<size_t>(span->length << kPageShift)); - committed_bytes_ -= span->length << kPageShift; + stats_.committed_bytes -= span->length << kPageShift; } Span* PageHeap::Carve(Span* span, Length n) { ASSERT(n > 0); ASSERT(span->location != Span::IN_USE); const int old_location = span->location; - DLL_Remove(span); + RemoveFromFreeList(span); span->location = Span::IN_USE; Event(span, 'A', n); @@ -178,18 +175,11 @@ Span* PageHeap::Carve(Span* span, Length n) { leftover->location = old_location; Event(leftover, 'S', extra); RecordSpan(leftover); - - // Place leftover span on appropriate free list - SpanList* listpair = (extra < kMaxPages) ? &free_[extra] : &large_; - Span* dst = (leftover->location == Span::ON_RETURNED_FREELIST - ? &listpair->returned : &listpair->normal); - DLL_Prepend(dst, leftover); - + PrependToFreeList(leftover); // Skip coalescing - no candidates possible span->length = n; pagemap_.set(span->start + n - 1, span); } ASSERT(Check()); - free_pages_ -= n; if (old_location == Span::ON_RETURNED_FREELIST) { // We need to recommit this address space. CommitSpan(span); @@ -205,8 +195,18 @@ void PageHeap::Delete(Span* span) { ASSERT(span->length > 0); ASSERT(GetDescriptor(span->start) == span); ASSERT(GetDescriptor(span->start + span->length - 1) == span); + const Length n = span->length; span->sizeclass = 0; span->sample = 0; + span->location = Span::ON_NORMAL_FREELIST; + Event(span, 'D', span->length); + MergeIntoFreeList(span); // Coalesces if possible + IncrementalScavenge(n); + ASSERT(Check()); +} + +void PageHeap::MergeIntoFreeList(Span* span) { + ASSERT(span->location != Span::IN_USE); // Coalesce -- we guarantee that "p" != 0, so no bounds checking // necessary. We do not bother resetting the stale pagemap @@ -239,12 +239,12 @@ void PageHeap::Delete(Span* span) { if (prev->location == Span::ON_RETURNED_FREELIST) { // We're about to put the merge span into the returned freelist and call // DecommitSpan() on it, which will mark the entire span including this - // one as released and decrease committed_bytes_ by the size of the + // one as released and decrease stats_.committed_bytes by the size of the // merged span. To make the math work out we temporarily increase the - // committed_bytes_ amount. - committed_bytes_ += prev->length << kPageShift; + // stats_.committed_bytes amount. + stats_.committed_bytes += prev->length << kPageShift; } - DLL_Remove(prev); + RemoveFromFreeList(prev); DeleteSpan(prev); span->start -= len; span->length += len; @@ -258,9 +258,9 @@ void PageHeap::Delete(Span* span) { const Length len = next->length; if (next->location == Span::ON_RETURNED_FREELIST) { // See the comment below 'if (prev->location ...' for explanation. - committed_bytes_ += next->length << kPageShift; + stats_.committed_bytes += next->length << kPageShift; } - DLL_Remove(next); + RemoveFromFreeList(next); DeleteSpan(next); span->length += len; pagemap_.set(span->start + span->length - 1, span); @@ -270,15 +270,29 @@ void PageHeap::Delete(Span* span) { Event(span, 'D', span->length); span->location = Span::ON_RETURNED_FREELIST; DecommitSpan(span); - if (span->length < kMaxPages) { - DLL_Prepend(&free_[span->length].returned, span); + PrependToFreeList(span); +} + +void PageHeap::PrependToFreeList(Span* span) { + ASSERT(span->location != Span::IN_USE); + SpanList* list = (span->length < kMaxPages) ? &free_[span->length] : &large_; + if (span->location == Span::ON_NORMAL_FREELIST) { + stats_.free_bytes += (span->length << kPageShift); + DLL_Prepend(&list->normal, span); } else { - DLL_Prepend(&large_.returned, span); + stats_.unmapped_bytes += (span->length << kPageShift); + DLL_Prepend(&list->returned, span); } - free_pages_ += n; +} - IncrementalScavenge(n); - ASSERT(Check()); +void PageHeap::RemoveFromFreeList(Span* span) { + ASSERT(span->location != Span::IN_USE); + if (span->location == Span::ON_NORMAL_FREELIST) { + stats_.free_bytes -= (span->length << kPageShift); + } else { + stats_.unmapped_bytes -= (span->length << kPageShift); + } + DLL_Remove(span); } void PageHeap::IncrementalScavenge(Length n) { @@ -286,17 +300,6 @@ void PageHeap::IncrementalScavenge(Length n) { scavenge_counter_ -= n; if (scavenge_counter_ >= 0) return; // Not yet time to scavenge - // Never delay scavenging for more than the following number of - // deallocated pages. With 4K pages, this comes to 4GB of - // deallocation. - // Chrome: Changed to 64MB - static const int kMaxReleaseDelay = 1 << 14; - - // If there is nothing to release, wait for so many pages before - // scavenging again. With 4K pages, this comes to 1GB of memory. - // Chrome: Changed to 16MB - static const int kDefaultReleaseDelay = 1 << 12; - const double rate = FLAGS_tcmalloc_release_rate; if (rate <= 1e-6) { // Tiny release rate means that releasing is disabled. @@ -304,40 +307,62 @@ void PageHeap::IncrementalScavenge(Length n) { return; } - // Find index of free list to scavenge - int index = scavenge_index_ + 1; - for (int i = 0; i < kMaxPages+1; i++) { - if (index > kMaxPages) index = 0; - SpanList* slist = (index == kMaxPages) ? &large_ : &free_[index]; - if (!DLL_IsEmpty(&slist->normal)) { - // Release the last span on the normal portion of this list - Span* s = slist->normal.prev; - ASSERT(s->location == Span::ON_NORMAL_FREELIST); - DLL_Remove(s); - DecommitSpan(s); - s->location = Span::ON_RETURNED_FREELIST; - DLL_Prepend(&slist->returned, s); - - // Compute how long to wait until we return memory. - // FLAGS_tcmalloc_release_rate==1 means wait for 1000 pages - // after releasing one page. - const double mult = 1000.0 / rate; - double wait = mult * static_cast<double>(s->length); - if (wait > kMaxReleaseDelay) { - // Avoid overflow and bound to reasonable range - wait = kMaxReleaseDelay; - } - scavenge_counter_ = static_cast<int64_t>(wait); + Length released_pages = ReleaseAtLeastNPages(1); - scavenge_index_ = index; // Scavenge at index+1 next time - // Note: we stop scavenging after finding one. - return; + if (released_pages == 0) { + // Nothing to scavenge, delay for a while. + scavenge_counter_ = kDefaultReleaseDelay; + } else { + // Compute how long to wait until we return memory. + // FLAGS_tcmalloc_release_rate==1 means wait for 1000 pages + // after releasing one page. + const double mult = 1000.0 / rate; + double wait = mult * static_cast<double>(released_pages); + if (wait > kMaxReleaseDelay) { + // Avoid overflow and bound to reasonable range. + wait = kMaxReleaseDelay; } - index++; + scavenge_counter_ = static_cast<int64_t>(wait); } +} + +Length PageHeap::ReleaseLastNormalSpan(SpanList* slist) { + Span* s = slist->normal.prev; + ASSERT(s->location == Span::ON_NORMAL_FREELIST); + RemoveFromFreeList(s); + const Length n = s->length; + TCMalloc_SystemRelease(reinterpret_cast<void*>(s->start << kPageShift), + static_cast<size_t>(s->length << kPageShift)); + s->location = Span::ON_RETURNED_FREELIST; + MergeIntoFreeList(s); // Coalesces if possible. + return n; +} - // Nothing to scavenge, delay for a while - scavenge_counter_ = kDefaultReleaseDelay; +Length PageHeap::ReleaseAtLeastNPages(Length num_pages) { + Length released_pages = 0; + Length prev_released_pages = -1; + + // Round robin through the lists of free spans, releasing the last + // span in each list. Stop after releasing at least num_pages. + while (released_pages < num_pages) { + if (released_pages == prev_released_pages) { + // Last iteration of while loop made no progress. + break; + } + prev_released_pages = released_pages; + + for (int i = 0; i < kMaxPages+1 && released_pages < num_pages; + i++, release_index_++) { + if (release_index_ > kMaxPages) release_index_ = 0; + SpanList* slist = (release_index_ == kMaxPages) ? + &large_ : &free_[release_index_]; + if (!DLL_IsEmpty(&slist->normal)) { + Length released_len = ReleaseLastNormalSpan(slist); + released_pages += released_len; + } + } + } + return released_pages; } void PageHeap::RegisterSizeClass(Span* span, size_t sc) { @@ -352,6 +377,10 @@ void PageHeap::RegisterSizeClass(Span* span, size_t sc) { } } +static double MB(uint64_t bytes) { + return bytes / 1048576.0; +} + static double PagesToMB(uint64_t pages) { return (pages << kPageShift) / 1048576.0; } @@ -364,8 +393,8 @@ void PageHeap::Dump(TCMalloc_Printer* out) { } } out->printf("------------------------------------------------\n"); - out->printf("PageHeap: %d sizes; %6.1f MB free\n", - nonempty_sizes, PagesToMB(free_pages_)); + out->printf("PageHeap: %d sizes; %6.1f MB free; %6.1f MB unmapped\n", + nonempty_sizes, MB(stats_.free_bytes), MB(stats_.unmapped_bytes)); out->printf("------------------------------------------------\n"); uint64_t total_normal = 0; uint64_t total_returned = 0; @@ -417,6 +446,37 @@ void PageHeap::Dump(TCMalloc_Printer* out) { PagesToMB(total_returned)); } +bool PageHeap::GetNextRange(PageID start, base::MallocRange* r) { + Span* span = reinterpret_cast<Span*>(pagemap_.Next(start)); + if (span == NULL) { + return false; + } + r->address = span->start << kPageShift; + r->length = span->length << kPageShift; + r->fraction = 0; + switch (span->location) { + case Span::IN_USE: + r->type = base::MallocRange::INUSE; + r->fraction = 1; + if (span->sizeclass > 0) { + // Only some of the objects in this span may be in use. + const size_t osize = Static::sizemap()->class_to_size(span->sizeclass); + r->fraction = (1.0 * osize * span->refcount) / r->length; + } + break; + case Span::ON_NORMAL_FREELIST: + r->type = base::MallocRange::FREE; + break; + case Span::ON_RETURNED_FREELIST: + r->type = base::MallocRange::UNMAPPED; + break; + default: + r->type = base::MallocRange::UNKNOWN; + break; + } + return true; +} + static void RecordGrowth(size_t growth) { StackTrace* t = Static::stacktrace_allocator()->New(); t->depth = GetStackTrace(t->stack, kMaxStackDepth-1, 3); @@ -442,9 +502,9 @@ bool PageHeap::GrowHeap(Length n) { ask = actual_size >> kPageShift; RecordGrowth(ask << kPageShift); - uint64_t old_system_bytes = system_bytes_; - system_bytes_ += (ask << kPageShift); - committed_bytes_ += (ask << kPageShift); + uint64_t old_system_bytes = stats_.system_bytes; + stats_.system_bytes += (ask << kPageShift); + stats_.committed_bytes += (ask << kPageShift); const PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift; ASSERT(p > 0); @@ -453,7 +513,7 @@ bool PageHeap::GrowHeap(Length n) { // when a program keeps allocating and freeing large blocks. if (old_system_bytes < kPageMapBigAllocationThreshold - && system_bytes_ >= kPageMapBigAllocationThreshold) { + && stats_.system_bytes >= kPageMapBigAllocationThreshold) { pagemap_.PreallocateMoreMemory(); } @@ -461,10 +521,8 @@ bool PageHeap::GrowHeap(Length n) { // Plus ensure one before and one after so coalescing code // does not need bounds-checking. if (pagemap_.Ensure(p-1, ask+2)) { - // Pretend the new area is allocated and then Delete() it to - // cause any necessary coalescing to occur. - // - // We do not adjust free_pages_ here since Delete() will do it for us. + // Pretend the new area is allocated and then Delete() it to cause + // any necessary coalescing to occur. Span* span = NewSpan(p, ask); RecordSpan(span); Delete(span); @@ -506,25 +564,4 @@ bool PageHeap::CheckList(Span* list, Length min_pages, Length max_pages, return true; } -void PageHeap::ReleaseFreeList(Span* list, Span* returned) { - // Walk backwards through list so that when we push these - // spans on the "returned" list, we preserve the order. - while (!DLL_IsEmpty(list)) { - Span* s = list->prev; - DLL_Remove(s); - DLL_Prepend(returned, s); - ASSERT(s->location == Span::ON_NORMAL_FREELIST); - s->location = Span::ON_RETURNED_FREELIST; - DecommitSpan(s); - } -} - -void PageHeap::ReleaseFreePages() { - for (Length s = 0; s < kMaxPages; s++) { - ReleaseFreeList(&free_[s].normal, &free_[s].returned); - } - ReleaseFreeList(&large_.normal, &large_.returned); - ASSERT(Check()); -} - } // namespace tcmalloc diff --git a/third_party/tcmalloc/chromium/src/page_heap.h b/third_party/tcmalloc/chromium/src/page_heap.h index 85ad979..17e2fa8 100644 --- a/third_party/tcmalloc/chromium/src/page_heap.h +++ b/third_party/tcmalloc/chromium/src/page_heap.h @@ -34,11 +34,20 @@ #define TCMALLOC_PAGE_HEAP_H_ #include <config.h> +#include <google/malloc_extension.h> #include "common.h" #include "packed-cache-inl.h" #include "pagemap.h" #include "span.h" +// We need to dllexport PageHeap just for the unittest. MSVC complains +// that we don't dllexport the PageHeap members, but we don't need to +// test those, so I just suppress this warning. +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable:4251) +#endif + // This #ifdef should almost never be set. Set NO_TCMALLOC_SAMPLES if // you're porting to a system where you really can't get a stacktrace. #ifdef NO_TCMALLOC_SAMPLES @@ -67,6 +76,7 @@ template <int BITS> class MapSelector { typedef PackedCache<BITS-kPageShift, uint64_t> CacheType; }; +// A two-level map for 32-bit machines template <> class MapSelector<32> { public: #ifdef WIN32 @@ -87,7 +97,7 @@ template <> class MapSelector<32> { // contiguous runs of pages (called a "span"). // ------------------------------------------------------------------------- -class PageHeap { +class PERFTOOLS_DLL_DECL PageHeap { public: PageHeap(); @@ -117,7 +127,8 @@ class PageHeap { // REQUIRES: span->sizeclass == 0 Span* Split(Span* span, Length n); - // Return the descriptor for the specified page. + // Return the descriptor for the specified page. Returns NULL if + // this PageID was not allocated previously. inline Span* GetDescriptor(PageID p) const { return reinterpret_cast<Span*>(pagemap_.get(p)); } @@ -125,15 +136,20 @@ class PageHeap { // Dump state to stderr void Dump(TCMalloc_Printer* out); - // Return number of bytes allocated from system - inline uint64_t SystemBytes() const { return system_bytes_; } + // If this page heap is managing a range with starting page # >= start, + // store info about the range in *r and return true. Else return false. + bool GetNextRange(PageID start, base::MallocRange* r); - inline uint64_t CommittedBytes() const { return committed_bytes_; } + // Page heap statistics + struct Stats { + Stats() : system_bytes(0), free_bytes(0), unmapped_bytes(0) {} + uint64_t system_bytes; // Total bytes allocated from system + uint64_t free_bytes; // Total bytes on normal freelists + uint64_t unmapped_bytes; // Total bytes on returned freelists + uint64_t committed_bytes; // Bytes committed, always <= system_bytes_. - // Return number of free bytes in heap - uint64_t FreeBytes() const { - return (static_cast<uint64_t>(free_pages_) << kPageShift); - } + }; + inline Stats stats() const { return stats_; } bool Check(); // Like Check() but does some more comprehensive checking. @@ -141,8 +157,13 @@ class PageHeap { bool CheckList(Span* list, Length min_pages, Length max_pages, int freelist); // ON_NORMAL_FREELIST or ON_RETURNED_FREELIST - // Release all pages on the free list for reuse by the OS: - void ReleaseFreePages(); + // Try to release at least num_pages for reuse by the OS. Returns + // the actual number of pages released, which may be less than + // num_pages if there weren't enough pages to release. The result + // may also be larger than num_pages since page_heap might decide to + // release one large range instead of fragmenting it into two + // smaller released and unreleased ranges. + Length ReleaseAtLeastNPages(Length num_pages); // Return 0 if we have no information, or else the correct sizeclass for p. // Reads and writes to pagemap_cache_ do not require locking. @@ -171,6 +192,17 @@ class PageHeap { // REQUIRED: kMaxPages >= kMinSystemAlloc; static const size_t kMaxPages = kMinSystemAlloc; + // Never delay scavenging for more than the following number of + // deallocated pages. With 4K pages, this comes to 4GB of + // deallocation. + // Chrome: Changed to 64MB + static const int kMaxReleaseDelay = 1 << 16; + + // If there is nothing to release, wait for so many pages before + // scavenging again. With 4K pages, this comes to 1GB of memory. + // Chrome: Changed to 16MB + static const int kDefaultReleaseDelay = 1 << 12; + // Pick the appropriate map and cache types based on pointer size typedef MapSelector<8*sizeof(uintptr_t)>::Type PageMap; typedef MapSelector<8*sizeof(uintptr_t)>::CacheType PageMapCache; @@ -191,15 +223,8 @@ class PageHeap { // Array mapping from span length to a doubly linked list of free spans SpanList free_[kMaxPages]; - // Number of pages kept in free lists - uintptr_t free_pages_; - - // Bytes allocated from system - uint64_t system_bytes_; - - // Bytes committed, always <= system_bytes_. - uint64_t committed_bytes_; - + // Statistics on system, free, and unmapped bytes + Stats stats_; bool GrowHeap(Length n); // REQUIRES: span->length >= n @@ -222,39 +247,42 @@ class PageHeap { // span of exactly the specified length. Else, returns NULL. Span* AllocLarge(Length n); -#if defined(OS_LINUX) - // Coalesce span with neighboring spans if possible. Add the - // resulting span to the appropriate free list. - void AddToFreeList(Span* span); -#else // ! defined(OS_LINUX) + // Coalesce span with neighboring spans if possible, prepend to + // appropriate free list, and adjust stats. + void MergeIntoFreeList(Span* span); + // Commit the span. void CommitSpan(Span* span); // Decommit the span. void DecommitSpan(Span* span); -#endif // ! defined(OS_LINUX) + + // Prepends span to appropriate free list, and adjusts stats. + void PrependToFreeList(Span* span); + + // Removes span from its free list, and adjust stats. + void RemoveFromFreeList(Span* span); // Incrementally release some memory to the system. // IncrementalScavenge(n) is called whenever n pages are freed. void IncrementalScavenge(Length n); -#if defined(OS_LINUX) - // Release all pages in the specified free list for reuse by the OS - // REQURES: list must be a "normal" list (i.e., not "returned") - void ReleaseFreeList(Span* list); -#else // ! defined(OS_LINUX) - // Releases all memory held in the given list's 'normal' freelist and adds - // it to the 'released' freelist. - void ReleaseFreeList(Span* list, Span* returned); -#endif // ! defined(OS_LINUX) + // Release the last span on the normal portion of this list. + // Return the length of that span. + Length ReleaseLastNormalSpan(SpanList* slist); + // Number of pages to deallocate before doing more scavenging int64_t scavenge_counter_; - // Index of last free list we scavenged - int scavenge_index_; + // Index of last free list where we released memory to the OS. + int release_index_; }; } // namespace tcmalloc +#ifdef _MSC_VER +#pragma warning(pop) +#endif + #endif // TCMALLOC_PAGE_HEAP_H_ diff --git a/third_party/tcmalloc/chromium/src/pagemap.h b/third_party/tcmalloc/chromium/src/pagemap.h index 0d78991..c8540f7 100644 --- a/third_party/tcmalloc/chromium/src/pagemap.h +++ b/third_party/tcmalloc/chromium/src/pagemap.h @@ -101,10 +101,20 @@ class TCMalloc_PageMap1 { // REQUIRES "k" is in range "[0,2^BITS-1]". // REQUIRES "k" has been ensured before. // - // Sets the value for KEY. + // Sets the value 'v' for key 'k'. void set(Number k, void* v) { array_[k] = v; } + + // Return the first non-NULL pointer found in this map for + // a page number >= k. Returns NULL if no such number is found. + void* Next(Number k) const { + while (k < (1 << BITS)) { + if (array_[k] != NULL) return array_[k]; + k++; + } + return NULL; + } }; #ifdef WIN32 @@ -289,6 +299,15 @@ class TCMalloc_PageMap1_LazyCommit { void set(Number k, void* v) { array_[k] = v; } + // Return the first non-NULL pointer found in this map for + // a page number >= k. Returns NULL if no such number is found. + void* Next(Number k) const { + while (k < (1 << BITS)) { + if (array_[k] != NULL) return array_[k]; + k++; + } + return NULL; + } }; #endif // WIN32 @@ -362,6 +381,24 @@ class TCMalloc_PageMap2 { // Allocate enough to keep track of all possible pages Ensure(0, 1 << BITS); } + + void* Next(Number k) const { + while (k < (1 << BITS)) { + const Number i1 = k >> LEAF_BITS; + Leaf* leaf = root_[i1]; + if (leaf != NULL) { + // Scan forward in leaf + for (Number i2 = k & (LEAF_LENGTH - 1); i2 < LEAF_LENGTH; i2++) { + if (leaf->values[i2] != NULL) { + return leaf->values[i2]; + } + } + } + // Skip to next top-level entry + k = (i1 + 1) << LEAF_BITS; + } + return NULL; + } }; // Three-level radix tree @@ -456,6 +493,29 @@ class TCMalloc_PageMap3 { void PreallocateMoreMemory() { } + + void* Next(Number k) const { + while (k < (Number(1) << BITS)) { + const Number i1 = k >> (LEAF_BITS + INTERIOR_BITS); + const Number i2 = (k >> LEAF_BITS) & (INTERIOR_LENGTH-1); + if (root_->ptrs[i1] == NULL) { + // Advance to next top-level entry + k = (i1 + 1) << (LEAF_BITS + INTERIOR_BITS); + } else { + Leaf* leaf = reinterpret_cast<Leaf*>(root_->ptrs[i1]->ptrs[i2]); + if (leaf != NULL) { + for (Number i3 = (k & (LEAF_LENGTH-1)); i3 < LEAF_LENGTH; i3++) { + if (leaf->values[i3] != NULL) { + return leaf->values[i3]; + } + } + } + // Advance to next interior entry + k = ((k >> LEAF_BITS) + 1) << LEAF_BITS; + } + } + return NULL; + } }; #endif // TCMALLOC_PAGEMAP_H_ diff --git a/third_party/tcmalloc/chromium/src/pprof b/third_party/tcmalloc/chromium/src/pprof index 88a6041..fec0c9e 100755 --- a/third_party/tcmalloc/chromium/src/pprof +++ b/third_party/tcmalloc/chromium/src/pprof @@ -72,7 +72,7 @@ use strict; use warnings; use Getopt::Long; -my $PPROF_VERSION = "1.4"; +my $PPROF_VERSION = "1.5"; # These are the object tools we use which can come from a # user-specified location using --tools, from the PPROF_TOOLS @@ -92,6 +92,7 @@ my $GV = "gv"; my $PS2PDF = "ps2pdf"; # These are used for dynamic profiles my $WGET = "wget"; +my $WGET_FLAGS = "--no-http-keep-alive"; # only supported by some wgets my $CURL = "curl"; # These are the web pages that servers need to support for dynamic profiles @@ -117,6 +118,11 @@ my $address_length = 16; # A list of paths to search for shared object files my @prefix_list = (); +# Special routine name that should not have any symbols. +# Used as separator to parse "addr2line -i" output. +my $sep_symbol = '_fini'; +my $sep_address = undef; + ##### Argument parsing ##### sub usage_string { @@ -504,6 +510,20 @@ sub Init() { ConfigureObjTools($main::prog) } + # Check what flags our commandline utilities support + if (open(TFILE, "$WGET $WGET_FLAGS -V 2>&1 |")) { + my @lines = <TFILE>; + if (grep(/unrecognized/, @lines) > 0) { + # grep found 'unrecognized' token from WGET, clear WGET flags + $WGET_FLAGS = ""; + } + close(TFILE); + } + # TODO(csilvers): check all the other binaries and objtools to see + # if they are installed and what flags they support, and store that + # in a data structure here, rather than scattering these tests about. + # Then, ideally, rewrite code to use wget OR curl OR GET or ... + # Break the opt_list_prefix into the prefix_list array @prefix_list = split (',', $main::opt_lib_prefix); @@ -952,22 +972,31 @@ sub PrintSymbolizedProfile { print 'binary=', $prog, "\n"; } while (my ($pc, $name) = each(%{$symbols})) { - my $fullname = $name->[2]; - print '0x', $pc, ' ', $fullname, "\n"; + my $sep = ' '; + print '0x', $pc; + # We have a list of function names, which include the inlined + # calls. They are separated (and terminated) by --, which is + # illegal in function names. + for (my $j = 2; $j <= $#{$name}; $j += 3) { + print $sep, $name->[$j]; + $sep = '--'; + } + print "\n"; } print '---', "\n"; + $PROFILE_PAGE =~ m,[^/]+$,; # matches everything after the last slash + my $profile_marker = $&; + print '--- ', $profile_marker, "\n"; if (defined($main::collected_profile)) { # if used with remote fetch, simply dump the collected profile to output. open(SRC, "<$main::collected_profile"); while (<SRC>) { print $_; } + close(SRC); } else { # dump a cpu-format profile to standard out - $PROFILE_PAGE =~ m,[^/]+$,; # matches everything after the last slash - my $profile_marker = $&; - print '--- ', $profile_marker, "\n"; PrintProfileData($profile); } } @@ -1069,9 +1098,9 @@ sub PrintDisassembly { } # Return reference to array of tuples of the form: -# [address, filename, linenumber, instruction] +# [start_address, filename, linenumber, instruction, limit_address] # E.g., -# ["0x806c43d", "/foo/bar.cc", 131, "ret"] +# ["0x806c43d", "/foo/bar.cc", 131, "ret", "0x806c440"] sub Disassemble { my $prog = shift; my $offset = shift; @@ -1086,6 +1115,7 @@ sub Disassemble { my @result = (); my $filename = ""; my $linenumber = -1; + my $last = ["", "", "", ""]; while (<OBJDUMP>) { s/\r//g; # turn windows-looking lines into unix-looking lines chop; @@ -1098,7 +1128,9 @@ sub Disassemble { # Disassembly line -- zero-extend address to full length my $addr = HexExtend($1); my $k = AddressAdd($addr, $offset); - push(@result, [$k, $filename, $linenumber, $2]); + $last->[4] = $k; # Store ending address for previous instruction + $last = [$k, $filename, $linenumber, $2, $end_addr]; + push(@result, $last); } } close(OBJDUMP); @@ -1274,8 +1306,13 @@ sub PrintSource { my $total1 = 0; # Total flat counts my $total2 = 0; # Total cumulative counts foreach my $e (@instructions) { - my $c1 = GetEntry($flat, $e->[0]); - my $c2 = GetEntry($cumulative, $e->[0]); + # Add up counts for all address that fall inside this instruction + my $c1 = 0; + my $c2 = 0; + for (my $a = $e->[0]; $a lt $e->[4]; $a = AddressInc($a)) { + $c1 += GetEntry($flat, $a); + $c2 += GetEntry($cumulative, $a); + } $running1 += $c1; $running2 += $c2; $total1 += $c1; @@ -1386,8 +1423,13 @@ sub PrintDisassembledFunction { my $flat_total = 0; my $cum_total = 0; foreach my $e (@instructions) { - my $c1 = GetEntry($flat, $e->[0]); - my $c2 = GetEntry($cumulative, $e->[0]); + # Add up counts for all address that fall inside this instruction + my $c1 = 0; + my $c2 = 0; + for (my $a = $e->[0]; $a lt $e->[4]; $a = AddressInc($a)) { + $c1 += GetEntry($flat, $a); + $c2 += GetEntry($cumulative, $a); + } push(@flat_count, $c1); push(@cum_count, $c2); $flat_total += $c1; @@ -1615,10 +1657,10 @@ sub PrintDot { foreach my $k (keys(%{$raw})) { # TODO: omit low %age edges $n = $raw->{$k}; - my @addrs = split(/\n/, $k); - for (my $i = 1; $i <= $#addrs; $i++) { - my $src = OutputKey($symbols, $addrs[$i]); - my $dst = OutputKey($symbols, $addrs[$i-1]); + my @translated = TranslateStack($symbols, $k); + for (my $i = 1; $i <= $#translated; $i++) { + my $src = $translated[$i]; + my $dst = $translated[$i-1]; #next if ($src eq $dst); # Avoid self-edges? if (exists($node{$src}) && exists($node{$dst})) { my $edge_label = "$src\001$dst"; @@ -1648,14 +1690,18 @@ sub PrintDot { if ($edgeweight > 100000) { $edgeweight = 100000; } $edgeweight = int($edgeweight); + my $style = sprintf("setlinewidth(%f)", $w); + if ($x[1] =~ m/\(inline\)/) { + $style .= ",dashed"; + } + # Use a slightly squashed function of the edge count as the weight - printf DOT ("N%s -> N%s [label=%s, weight=%d, " . - "style=\"setlinewidth(%f)\"];\n", + printf DOT ("N%s -> N%s [label=%s, weight=%d, style=\"%s\"];\n", $node{$x[0]}, $node{$x[1]}, Unparse($n), $edgeweight, - $w); + $style); } } @@ -1665,42 +1711,74 @@ sub PrintDot { return 1; } -# Generate the key under which a given address should be counted -# based on the user-specified output granularity. -sub OutputKey { +# Translate a stack of addresses into a stack of symbols +sub TranslateStack { my $symbols = shift; - my $a = shift; - - # Skip large addresses since they sometimes show up as fake entries on RH9 - if (length($a) > 8) { - if ($a gt "7fffffffffffffff") { return ''; } - } - - # Extract symbolic info for address - my $func = $a; - my $fullfunc = $a; - my $fileline = ""; - if (exists($symbols->{$a})) { - $func = $symbols->{$a}->[0]; - $fullfunc = $symbols->{$a}->[2]; - $fileline = $symbols->{$a}->[1]; - } - - if ($main::opt_disasm || $main::opt_list) { - return $a; # We want just the address for the key - } elsif ($main::opt_addresses) { - return "$a $func $fileline"; - } elsif ($main::opt_lines) { - return "$func $fileline"; - } elsif ($main::opt_functions) { - return $func; - } elsif ($main::opt_files) { - my $f = ($fileline eq '') ? $a : $fileline; - $f =~ s/:\d+$//; - return $f; - } else { - return $a; + my $k = shift; + + my @addrs = split(/\n/, $k); + my @result = (); + for (my $i = 0; $i <= $#addrs; $i++) { + my $a = $addrs[$i]; + + # Skip large addresses since they sometimes show up as fake entries on RH9 + if (length($a) > 8 && $a gt "7fffffffffffffff") { + next; + } + + if ($main::opt_disasm || $main::opt_list) { + # We want just the address for the key + push(@result, $a); + next; + } + + my $symlist = $symbols->{$a}; + if (!defined($symlist)) { + $symlist = [$a, "", $a]; + } + + # We can have a sequence of symbols for a particular entry + # (more than one symbol in the case of inlining). Callers + # come before callees in symlist, so walk backwards since + # the translated stack should contain callees before callers. + for (my $j = $#{$symlist}; $j >= 2; $j -= 3) { + my $func = $symlist->[$j-2]; + my $fileline = $symlist->[$j-1]; + my $fullfunc = $symlist->[$j]; + if ($j > 2) { + $func = "$func (inline)"; + } + if ($main::opt_addresses) { + push(@result, "$a $func $fileline"); + } elsif ($main::opt_lines) { + if ($func eq '??' && $fileline eq '??:0') { + push(@result, "$a"); + } else { + push(@result, "$func $fileline"); + } + } elsif ($main::opt_functions) { + if ($func eq '??') { + push(@result, "$a"); + } else { + push(@result, $func); + } + } elsif ($main::opt_files) { + if ($fileline eq '??:0' || $fileline eq '') { + push(@result, "$a"); + } else { + my $f = $fileline; + $f =~ s/:\d+$//; + push(@result, $f); + } + } else { + push(@result, $a); + last; # Do not print inlined info + } + } } + + # print join(",", @addrs), " => ", join(",", @result), "\n"; + return @result; } # Generate percent string for a number and a total @@ -1890,6 +1968,7 @@ sub RemoveUninterestingFrames { 'tc_newarray_nothrow', 'do_malloc', '::do_malloc', # new name -- got moved to an unnamed ns + '::do_malloc_or_cpp_alloc', 'DoSampledAllocation', 'simple_alloc::allocate', '__malloc_alloc_template::allocate', @@ -1898,7 +1977,12 @@ sub RemoveUninterestingFrames { '__builtin_vec_delete', '__builtin_vec_new', 'operator new', - 'operator new[]') { + 'operator new[]', + # These mark the beginning/end of our custom sections + '__start_google_malloc', + '__stop_google_malloc', + '__start_malloc_hook', + '__stop_malloc_hook') { $skip{$name} = 1; $skip{"_" . $name} = 1; # Mach (OS X) adds a _ prefix to everything } @@ -1978,17 +2062,16 @@ sub ReduceProfile { my $result = {}; foreach my $k (keys(%{$profile})) { my $count = $profile->{$k}; - my @addrs = split(/\n/, $k); + my @translated = TranslateStack($symbols, $k); my @path = (); my %seen = (); $seen{''} = 1; # So that empty keys are skipped - foreach my $a (@addrs) { + foreach my $e (@translated) { # To avoid double-counting due to recursion, skip a stack-trace # entry if it has already been seen - my $key = OutputKey($symbols, $a); - if (!$seen{$key}) { - $seen{$key} = 1; - push(@path, $key); + if (!$seen{$e}) { + $seen{$e} = 1; + push(@path, $e); } } my $reduced_path = join("\n", @path); @@ -1997,6 +2080,20 @@ sub ReduceProfile { return $result; } +# Does the specified symbol array match the regexp? +sub SymbolMatches { + my $sym = shift; + my $re = shift; + if (defined($sym)) { + for (my $i = 0; $i < $#{$sym}; $i += 3) { + if ($sym->[$i] =~ m/$re/ || $sym->[$i+1] =~ m/$re/) { + return 1; + } + } + } + return 0; +} + # Focus only on paths involving specified regexps sub FocusProfile { my $symbols = shift; @@ -2008,10 +2105,7 @@ sub FocusProfile { my @addrs = split(/\n/, $k); foreach my $a (@addrs) { # Reply if it matches either the address/shortname/fileline - if (($a =~ m/$focus/) || - (exists($symbols->{$a}) && - (($symbols->{$a}->[0] =~ m/$focus/) || - ($symbols->{$a}->[1] =~ m/$focus/)))) { + if (($a =~ m/$focus/) || SymbolMatches($symbols->{$a}, $focus)) { AddEntry($result, $k, $count); last; } @@ -2032,10 +2126,7 @@ sub IgnoreProfile { my $matched = 0; foreach my $a (@addrs) { # Reply if it matches either the address/shortname/fileline - if (($a =~ m/$ignore/) || - (exists($symbols->{$a}) && - (($symbols->{$a}->[0] =~ m/$ignore/) || - ($symbols->{$a}->[1] =~ m/$ignore/)))) { + if (($a =~ m/$ignore/) || SymbolMatches($symbols->{$a}, $ignore)) { $matched = 1; last; } @@ -2195,7 +2286,7 @@ sub IsSymbolizedProfileFile { sub CheckSymbolPage { my $url = SymbolPageURL(); - open(SYMBOL, "$WGET -qO- '$url' |"); + open(SYMBOL, "$WGET $WGET_FLAGS -qO- '$url' |"); my $line = <SYMBOL>; $line =~ s/\r//g; # turn windows-looking lines into unix-looking lines close(SYMBOL); @@ -2240,7 +2331,7 @@ sub SymbolPageURL { sub FetchProgramName() { my ($host, $port, $path) = ParseProfileURL($main::pfile_args[0]); my $url = "http://$host:$port$PROGRAM_NAME_PAGE"; - my $command_line = "$WGET -qO- '$url'"; + my $command_line = "$WGET $WGET_FLAGS -qO- '$url'"; open(CMDLINE, "$command_line |") or error($command_line); my $cmdline = <CMDLINE>; $cmdline =~ s/\r//g; # turn windows-looking lines into unix-looking lines @@ -2346,13 +2437,21 @@ sub FetchSymbols { # /symbol, the symbols match and are retrievable from the map. my $shortpc = $pc; $shortpc =~ s/^0*//; + # Each line may have a list of names, which includes the function + # and also other functions it has inlined. They are separated + # (in PrintSymbolizedFile), by --, which is illegal in function names. + my $fullnames; if (defined($symbol_map->{$shortpc})) { - $fullname = $symbol_map->{$shortpc}; + $fullnames = $symbol_map->{$shortpc}; } else { - $fullname = "0x" . $pc; # Just use addresses + $fullnames = "0x" . $pc; # Just use addresses + } + my $sym = []; + $symbols->{$pc} = $sym; + foreach my $fullname (split("--", $fullnames)) { + my $name = ShortFunctionName($fullname); + push(@{$sym}, $name, "?", $fullname); } - my $name = ShortFunctionName($fullname); - $symbols->{$pc} = [$name, "?", $fullname]; } return $symbols; } @@ -2392,7 +2491,7 @@ sub FetchDynamicProfile { my $wget_timeout; if (($path =~ m/$PROFILE_PAGE/) || ($path =~ m/$PMUPROFILE_PAGE/)) { if ($path =~ m/$PROFILE_PAGE/) { - $url = sprintf("http://$host:$port$PROFILE_PAGE?seconds=%d", + $url = sprintf("http://$host:$port$path?seconds=%d", $main::opt_seconds); } else { if ($profile_name =~ m/[?]/) { @@ -2427,7 +2526,7 @@ sub FetchDynamicProfile { return $real_profile; } - my $cmd = "$WGET $wget_timeout -q -O $tmp_profile '$url'"; + my $cmd = "$WGET $WGET_FLAGS $wget_timeout -q -O $tmp_profile '$url'"; if (($path =~ m/$PROFILE_PAGE/) || ($path =~ m/$PMUPROFILE_PAGE/)){ print STDERR "Gathering CPU profile from $url for $main::opt_seconds seconds to\n ${real_profile}\n"; if ($encourage_patience) { @@ -2752,12 +2851,26 @@ sub ReadCPUProfile { # Make key out of the stack entries my @k = (); - for (my $j = $d; $j--; ) { + for (my $j = 0; $j < $d; $j++) { my $pclo = $slots->get($i++); my $pchi = $slots->get($i++); if ($pclo == -1 || $pchi == -1) { error("$fname: Unexpected EOF when reading stack of depth $d\n"); } + + # Subtract one from caller pc so we map back to call instr. + # However, don't do this if we're reading a symbolized profile + # file, in which case the subtract-one was done when the file + # was written. + if ($j > 0 && !$main::use_symbolized_profile) { + if ($pclo == 0) { + $pchi--; + $pclo = 0xffffffff; + } else { + $pclo--; + } + } + my $pc = sprintf("%08x%08x", $pchi, $pclo); $pcs->{$pc} = 1; push @k, $pc; @@ -3256,7 +3369,7 @@ sub ParseLibraries { my $finish; my $offset; my $lib; - if ($l =~ /^($h)-($h)\s+..x.\s+($h)\s+\S+:\S+\s+\d+\s+(\S+\.(so|dll|dylib)((\.\d+)+\w*)?)$/i) { + if ($l =~ /^($h)-($h)\s+..x.\s+($h)\s+\S+:\S+\s+\d+\s+(\S+\.(so|dll|dylib|bundle)((\.\d+)+\w*)?)$/i) { # Full line from /proc/self/maps. Example: # 40000000-40015000 r-xp 00000000 03:01 12845071 /lib/ld-2.3.2.so $start = HexExtend($1); @@ -3516,87 +3629,111 @@ sub MapToSymbols { my $pclist = shift; my $symbols = shift; + my $debug = 0; + # Ignore empty binaries if ($#{$pclist} < 0) { return; } - my $got_symbols = MapSymbolsWithNM($image, $offset, $pclist, $symbols); - if ($main::opt_interactive || - $main::opt_addresses || - $main::opt_lines || - $main::opt_files || - $main::opt_list || - $main::opt_callgrind || - !$got_symbols) { - GetLineNumbers($image, $offset, $pclist, $symbols); + # Figure out the addr2line command to use + my $addr2line = $obj_tool_map{"addr2line"}; + my $cmd = "$addr2line -f -C -e $image"; + if (exists $obj_tool_map{"addr2line_pdb"}) { + $addr2line = $obj_tool_map{"addr2line_pdb"}; + $cmd = "$addr2line --demangle -f -C -e $image"; } -} -# The file $tmpfile_sym must already have been created before calling this. -sub GetLineNumbersViaAddr2Line { - my $addr2line_command = shift; - my $pclist = shift; - my $symbols = shift; + # If "addr2line" isn't installed on the system at all, just use + # nm to get what info we can (function names, but not line numbers). + if (system("$addr2line --help >/dev/null 2>&1") != 0) { + MapSymbolsWithNM($image, $offset, $pclist, $symbols); + return; + } + + # "addr2line -i" can produce a variable number of lines per input + # address, with no separator that allows us to tell when data for + # the next address starts. So we find the address for a special + # symbol (_fini) and interleave this address between all real + # addresses passed to addr2line. The name of this special symbol + # can then be used as a separator. + $sep_address = undef; # May be filled in by MapSymbolsWithNM() + my $nm_symbols = {}; + MapSymbolsWithNM($image, $offset, $pclist, $nm_symbols); + # TODO(csilvers): only add '-i' if addr2line supports it. + if (defined($sep_address)) { + # Only add " -i" to addr2line if the binary supports it. + # addr2line --help returns 0, but not if it sees an unknown flag first. + if (system("$cmd -i --help >/dev/null 2>&1") == 0) { + $cmd .= " -i"; + } else { + $sep_address = undef; # no need for sep_address if we don't support -i + } + } + + # Make file with all PC values with intervening 'sep_address' so + # that we can reliably detect the end of inlined function list + open(ADDRESSES, ">$main::tmpfile_sym") || error("$main::tmpfile_sym: $!\n"); + if ($debug) { print("---- $image ---\n"); } + for (my $i = 0; $i <= $#{$pclist}; $i++) { + # addr2line always reads hex addresses, and does not need '0x' prefix. + if ($debug) { printf("%s\n", $pclist->[$i]); } + printf ADDRESSES ("%s\n", AddressSub($pclist->[$i], $offset)); + if (defined($sep_address)) { + printf ADDRESSES ("%s\n", $sep_address); + } + } + close(ADDRESSES); + if ($debug) { + print("----\n"); + system("cat $main::tmpfile_sym"); + print("----\n"); + system("$cmd <$main::tmpfile_sym"); + print("----\n"); + } - open(SYMBOLS, "$addr2line_command <$main::tmpfile_sym |") - || error("$addr2line_command: $!\n"); - my $count = 0; + open(SYMBOLS, "$cmd <$main::tmpfile_sym |") || error("$cmd: $!\n"); + my $count = 0; # Index in pclist while (<SYMBOLS>) { + # Read fullfunction and filelineinfo from next pair of lines s/\r?\n$//g; my $fullfunction = $_; - $_ = <SYMBOLS>; s/\r?\n$//g; my $filelinenum = $_; - $filelinenum =~ s|\\|/|g; # turn windows-style paths into unix-style paths - if (!$main::opt_list) { - $filelinenum =~ s|^.*/([^/]+:\d+)$|$1|; # Remove directory name + + if (defined($sep_address) && $fullfunction eq $sep_symbol) { + # Terminating marker for data for this address + $count++; + next; } + $filelinenum =~ s|\\|/|g; # turn windows-style paths into unix-style paths + my $pcstr = $pclist->[$count]; - if (defined($symbols->{$pcstr})) { - # Override just the line-number portion. The function name portion - # is less buggy when computed using nm instead of addr2line. But - # don't override if addr2line is giving ??'s and nm didn't. (This - # may be seen mostly/entirely on cygwin's addr2line/nm.) - if (($filelinenum ne "??:0") || ($symbols->{$pcstr}->[1] eq "?")) { - $symbols->{$pcstr}->[1] = $filelinenum; + my $function = ShortFunctionName($fullfunction); + if ($fullfunction eq '??') { + # See if nm found a symbol + my $nms = $nm_symbols->{$pcstr}; + if (defined($nms)) { + $function = $nms->[0]; + $fullfunction = $nms->[2]; } - } else { - my $function = ShortFunctionName($fullfunction); - $symbols->{$pcstr} = [$function, $filelinenum, $fullfunction]; } - $count++; - } - close(SYMBOLS); - return $count; -} -sub GetLineNumbers { - my $image = shift; - my $offset = shift; - my $pclist = shift; - my $symbols = shift; - - # Make file with all PC values - open(ADDRESSES, ">$main::tmpfile_sym") || error("$main::tmpfile_sym: $!\n"); - for (my $i = 0; $i <= $#{$pclist}; $i++) { - # addr2line always reads hex addresses, and does not need '0x' prefix. - printf ADDRESSES ("%s\n", AddressSub($pclist->[$i], $offset)); - } - close(ADDRESSES); - - # Pass to addr2line - my $addr2line = $obj_tool_map{"addr2line"}; - my @addr2line_commands = ("$addr2line -f -C -e $image"); - if (exists $obj_tool_map{"addr2line_pdb"}) { - my $addr2line_pdb = $obj_tool_map{"addr2line_pdb"}; - push(@addr2line_commands, "$addr2line_pdb --demangle -f -C -e $image"); - } - foreach my $addr2line_command (@addr2line_commands) { - if (GetLineNumbersViaAddr2Line("$addr2line_command", $pclist, $symbols)) { - last; + # Prepend to accumulated symbols for pcstr + # (so that caller comes before callee) + my $sym = $symbols->{$pcstr}; + if (!defined($sym)) { + $sym = []; + $symbols->{$pcstr} = $sym; + } + unshift(@{$sym}, $function, $filelinenum, $fullfunction); + if ($debug) { printf("%s => [%s]\n", $pcstr, join(" ", @{$sym})); } + if (!defined($sep_address)) { + # Inlining is off, se this entry ends immediately + $count++; } } + close(SYMBOLS); } # Use nm to map the list of referenced PCs to symbols. Return true iff we @@ -3646,7 +3783,7 @@ sub MapSymbolsWithNM { } return 1; } - + sub ShortFunctionName { my $function = shift; while ($function =~ s/\([^()]*\)(\s*const)?//g) { } # Argument types @@ -3813,6 +3950,10 @@ sub GetProcedureBoundariesViaNm { next; } + if ($this_routine eq $sep_symbol) { + $sep_address = HexExtend($start_val); + } + # Tag this routine with the starting address in case the image # has multiple occurrences of this routine. We use a syntax # that resembles template paramters that are automatically diff --git a/third_party/tcmalloc/chromium/src/profile-handler.cc b/third_party/tcmalloc/chromium/src/profile-handler.cc index e658d30..cf65740 100644 --- a/third_party/tcmalloc/chromium/src/profile-handler.cc +++ b/third_party/tcmalloc/chromium/src/profile-handler.cc @@ -89,18 +89,18 @@ class ProfileHandler { // Registers a callback routine to receive profile timer ticks. The returned // token is to be used when unregistering this callback and must not be // deleted by the caller. Registration of the first callback enables the - // SIGPROF handler. + // SIGPROF handler (or SIGALRM if using ITIMER_REAL). ProfileHandlerToken* RegisterCallback(ProfileHandlerCallback callback, void* callback_arg); // Unregisters a previously registered callback. Expects the token returned // by the corresponding RegisterCallback routine. Unregistering the last - // callback disables the SIGPROF handler. + // callback disables the SIGPROF handler (or SIGALRM if using ITIMER_REAL). void UnregisterCallback(ProfileHandlerToken* token) NO_THREAD_SAFETY_ANALYSIS; // Unregisters all the callbacks, stops the timer if shared, disables the - // SIGPROF handler and clears the timer_sharing_ state. + // SIGPROF (or SIGALRM) handler and clears the timer_sharing_ state. void Reset(); // Gets the current state of profile handler. @@ -127,12 +127,15 @@ class ProfileHandler { // Initializes the ProfileHandler singleton via GoogleOnceInit. static void Init(); - // Counts the number of SIGPROF interrupts received. + // The number of SIGPROF (or SIGALRM for ITIMER_REAL) interrupts received. int64 interrupts_ GUARDED_BY(signal_lock_); - // SIGPROF interrupt frequency, read-only after construction. + // SIGPROF/SIGALRM interrupt frequency, read-only after construction. int32 frequency_; + // ITIMER_PROF (which uses SIGPROF), or ITIMER_REAL (which uses SIGALRM) + int timer_type_; + // Counts the number of callbacks registered. int32 callback_count_ GUARDED_BY(control_lock_); @@ -196,10 +199,10 @@ class ProfileHandler { // Disables (ignores) the timer interrupt signal. void DisableHandler() EXCLUSIVE_LOCKS_REQUIRED(control_lock_); - // SIGPROF handler. Iterate over and call all the registered callbacks. + // SIGPROF/SIGALRM handler. Iterate over and call all the registered callbacks. static void SignalHandler(int sig, siginfo_t* sinfo, void* ucontext); - DISALLOW_EVIL_CONSTRUCTORS(ProfileHandler); + DISALLOW_COPY_AND_ASSIGN(ProfileHandler); }; ProfileHandler* ProfileHandler::instance_ = NULL; @@ -241,6 +244,9 @@ ProfileHandler::ProfileHandler() callback_count_(0), timer_sharing_(TIMERS_UNTOUCHED) { SpinLockHolder cl(&control_lock_); + + timer_type_ = (getenv("CPUPROFILE_REALTIME") ? ITIMER_REAL : ITIMER_PROF); + // Get frequency of interrupts (if specified) char junk; const char* fr = getenv("CPUPROFILE_FREQUENCY"); @@ -390,18 +396,18 @@ void ProfileHandler::StartTimer() { timer.it_interval.tv_sec = 0; timer.it_interval.tv_usec = 1000000 / frequency_; timer.it_value = timer.it_interval; - setitimer(ITIMER_PROF, &timer, 0); + setitimer(timer_type_, &timer, 0); } void ProfileHandler::StopTimer() { struct itimerval timer; memset(&timer, 0, sizeof timer); - setitimer(ITIMER_PROF, &timer, 0); + setitimer(timer_type_, &timer, 0); } bool ProfileHandler::IsTimerRunning() { struct itimerval current_timer; - RAW_CHECK(0 == getitimer(ITIMER_PROF, ¤t_timer), "getitimer"); + RAW_CHECK(0 == getitimer(timer_type_, ¤t_timer), "getitimer"); return (current_timer.it_value.tv_sec != 0 || current_timer.it_value.tv_usec != 0); } @@ -411,7 +417,8 @@ void ProfileHandler::EnableHandler() { sa.sa_sigaction = SignalHandler; sa.sa_flags = SA_RESTART | SA_SIGINFO; sigemptyset(&sa.sa_mask); - RAW_CHECK(sigaction(SIGPROF, &sa, NULL) == 0, "sigprof (enable)"); + const int signal_number = (timer_type_ == ITIMER_PROF ? SIGPROF : SIGALRM); + RAW_CHECK(sigaction(signal_number, &sa, NULL) == 0, "sigprof (enable)"); } void ProfileHandler::DisableHandler() { @@ -419,7 +426,8 @@ void ProfileHandler::DisableHandler() { sa.sa_handler = SIG_IGN; sa.sa_flags = SA_RESTART; sigemptyset(&sa.sa_mask); - RAW_CHECK(sigaction(SIGPROF, &sa, NULL) == 0, "sigprof (disable)"); + const int signal_number = (timer_type_ == ITIMER_PROF ? SIGPROF : SIGALRM); + RAW_CHECK(sigaction(signal_number, &sa, NULL) == 0, "sigprof (disable)"); } void ProfileHandler::SignalHandler(int sig, siginfo_t* sinfo, void* ucontext) { @@ -447,7 +455,7 @@ class ProfileHandlerInitializer { } private: - DISALLOW_EVIL_CONSTRUCTORS(ProfileHandlerInitializer); + DISALLOW_COPY_AND_ASSIGN(ProfileHandlerInitializer); }; // ProfileHandlerInitializer singleton static ProfileHandlerInitializer profile_handler_initializer; diff --git a/third_party/tcmalloc/chromium/src/profiledata.h b/third_party/tcmalloc/chromium/src/profiledata.h index da7ea9e..3521bac 100644 --- a/third_party/tcmalloc/chromium/src/profiledata.h +++ b/third_party/tcmalloc/chromium/src/profiledata.h @@ -177,7 +177,7 @@ class ProfileData { // Write contents of eviction buffer to disk. void FlushEvicted(); - DISALLOW_EVIL_CONSTRUCTORS(ProfileData); + DISALLOW_COPY_AND_ASSIGN(ProfileData); }; #endif // BASE_PROFILEDATA_H_ diff --git a/third_party/tcmalloc/chromium/src/stacktrace_config.h b/third_party/tcmalloc/chromium/src/stacktrace_config.h index 3bd0fb3..b58ab1d 100644 --- a/third_party/tcmalloc/chromium/src/stacktrace_config.h +++ b/third_party/tcmalloc/chromium/src/stacktrace_config.h @@ -46,17 +46,8 @@ #ifndef BASE_STACKTRACE_CONFIG_H_ #define BASE_STACKTRACE_CONFIG_H_ -// First, the i386 case. -#if defined(__i386__) && __GNUC__ >= 2 -# if !defined(NO_FRAME_POINTER) -# define STACKTRACE_INL_HEADER "stacktrace_x86-inl.h" -# define STACKTRACE_SKIP_CONTEXT_ROUTINES 1 -# else -# define STACKTRACE_INL_HEADER "stacktrace_generic-inl.h" -# endif - -// Now, the x86_64 case. -#elif defined(__x86_64__) && __GNUC__ >= 2 +// First, the i386 and x86_64 case. +#if (defined(__i386__) || defined(__x86_64__)) && __GNUC__ >= 2 # if !defined(NO_FRAME_POINTER) # define STACKTRACE_INL_HEADER "stacktrace_x86-inl.h" # define STACKTRACE_SKIP_CONTEXT_ROUTINES 1 diff --git a/third_party/tcmalloc/chromium/src/stacktrace_x86-inl.h b/third_party/tcmalloc/chromium/src/stacktrace_x86-inl.h index 5650c86..05701e7 100644 --- a/third_party/tcmalloc/chromium/src/stacktrace_x86-inl.h +++ b/third_party/tcmalloc/chromium/src/stacktrace_x86-inl.h @@ -49,6 +49,11 @@ #elif defined(HAVE_UCONTEXT_H) #include <ucontext.h> // for ucontext_t #elif defined(HAVE_CYGWIN_SIGNAL_H) +// cygwin/signal.h has a buglet where it uses pthread_attr_t without +// #including <pthread.h> itself. So we have to do it. +# ifdef HAVE_PTHREAD +# include <pthread.h> +# endif #include <cygwin/signal.h> typedef ucontext ucontext_t; #endif diff --git a/third_party/tcmalloc/chromium/src/symbolize.cc b/third_party/tcmalloc/chromium/src/symbolize.cc index b7cdf0e..9dd890e 100644 --- a/third_party/tcmalloc/chromium/src/symbolize.cc +++ b/third_party/tcmalloc/chromium/src/symbolize.cc @@ -65,30 +65,37 @@ DEFINE_string(symbolize_pprof, // a more-permanent copy that won't ever get destroyed. static string* g_pprof_path = new string(FLAGS_symbolize_pprof); +void SymbolTable::Add(const void* addr) { + symbolization_table_[addr] = ""; +} + +const char* SymbolTable::GetSymbol(const void* addr) { + return symbolization_table_[addr]; +} + // Updates symbolization_table with the pointers to symbol names corresponding // to its keys. The symbol names are stored in out, which is allocated and // freed by the caller of this routine. // Note that the forking/etc is not thread-safe or re-entrant. That's // ok for the purpose we need -- reporting leaks detected by heap-checker // -- but be careful if you decide to use this routine for other purposes. -extern bool Symbolize(char *out, int out_size, - SymbolMap *symbolization_table) { +int SymbolTable::Symbolize() { #if !defined(HAVE_UNISTD_H) || !defined(HAVE_SYS_SOCKET_H) || !defined(HAVE_SYS_WAIT_H) - return false; + return 0; #elif !defined(HAVE_PROGRAM_INVOCATION_NAME) - return false; // TODO(csilvers): get argv[0] somehow + return 0; // TODO(csilvers): get argv[0] somehow #else // All this work is to do two-way communication. ugh. extern char* program_invocation_name; // gcc provides this int child_in[2]; // file descriptors int child_out[2]; // for now, we don't worry about child_err if (socketpair(AF_UNIX, SOCK_STREAM, 0, child_in) == -1) { - return false; + return 0; } if (socketpair(AF_UNIX, SOCK_STREAM, 0, child_out) == -1) { close(child_in[0]); close(child_in[1]); - return false; + return 0; } switch (fork()) { case -1: { // error @@ -96,7 +103,7 @@ extern bool Symbolize(char *out, int out_size, close(child_in[1]); close(child_out[0]); close(child_out[1]); - return false; + return 0; } case 0: { // child close(child_in[1]); // child uses the 0's, parent uses the 1's @@ -125,30 +132,36 @@ extern bool Symbolize(char *out, int out_size, struct pollfd pfd = { child_in[1], POLLOUT, 0 }; if (!poll(&pfd, 1, 0) || !(pfd.revents & POLLOUT) || (pfd.revents & (POLLHUP|POLLERR))) { - return false; + return 0; } #endif DumpProcSelfMaps(child_in[1]); // what pprof expects on stdin - char pcstr[64]; // enough for a single address - for (SymbolMap::const_iterator iter = symbolization_table->begin(); - iter != symbolization_table->end(); ++iter) { - snprintf(pcstr, sizeof(pcstr), // pprof expects format to be 0xXXXXXX - "0x%" PRIxPTR "\n", iter->first); - // TODO(glider): the number of write()s can be reduced by using - // snprintf() here. - write(child_in[1], pcstr, strlen(pcstr)); + // Allocate 24 bytes = ("0x" + 8 bytes + "\n" + overhead) for each + // address to feed to pprof. + const int kOutBufSize = 24 * symbolization_table_.size(); + char *pprof_buffer = new char[kOutBufSize]; + int written = 0; + for (SymbolMap::const_iterator iter = symbolization_table_.begin(); + iter != symbolization_table_.end(); ++iter) { + written += snprintf(pprof_buffer + written, kOutBufSize - written, + // pprof expects format to be 0xXXXXXX + "0x%"PRIxPTR"\n", reinterpret_cast<uintptr_t>(iter->first)); } + write(child_in[1], pprof_buffer, strlen(pprof_buffer)); close(child_in[1]); // that's all we need to write + const int kSymbolBufferSize = kSymbolSize * symbolization_table_.size(); int total_bytes_read = 0; - memset(out, '\0', out_size); + delete[] symbol_buffer_; + symbol_buffer_ = new char[kSymbolBufferSize]; + memset(symbol_buffer_, '\0', kSymbolBufferSize); while (1) { - int bytes_read = read(child_out[1], out + total_bytes_read, - out_size - total_bytes_read); + int bytes_read = read(child_out[1], symbol_buffer_ + total_bytes_read, + kSymbolBufferSize - total_bytes_read); if (bytes_read < 0) { close(child_out[1]); - return false; + return 0; } else if (bytes_read == 0) { close(child_out[1]); wait(NULL); @@ -159,25 +172,24 @@ extern bool Symbolize(char *out, int out_size, } // We have successfully read the output of pprof into out. Make sure // the last symbol is full (we can tell because it ends with a \n). - // TODO(glider): even when the last symbol is full, the list of symbols - // may be incomplete. We should check for that and return the number of - // symbols we actually get from pprof. - if (total_bytes_read == 0 || out[total_bytes_read - 1] != '\n') - return false; - // make the symbolization_table values point to the output vector - SymbolMap::iterator fill = symbolization_table->begin(); - char *current_name = out; + if (total_bytes_read == 0 || symbol_buffer_[total_bytes_read - 1] != '\n') + return 0; + // make the symbolization_table_ values point to the output vector + SymbolMap::iterator fill = symbolization_table_.begin(); + int num_symbols = 0; + const char *current_name = symbol_buffer_; for (int i = 0; i < total_bytes_read; i++) { - if (out[i] == '\n') { + if (symbol_buffer_[i] == '\n') { fill->second = current_name; - out[i] = '\0'; - current_name = out + i + 1; + symbol_buffer_[i] = '\0'; + current_name = symbol_buffer_ + i + 1; fill++; + num_symbols++; } } - return true; + return num_symbols; } } - return false; // shouldn't be reachable + return 0; // shouldn't be reachable #endif } diff --git a/third_party/tcmalloc/chromium/src/symbolize.h b/third_party/tcmalloc/chromium/src/symbolize.h index 72196f6..1ab4ed6 100644 --- a/third_party/tcmalloc/chromium/src/symbolize.h +++ b/third_party/tcmalloc/chromium/src/symbolize.h @@ -33,18 +33,50 @@ #ifndef TCMALLOC_SYMBOLIZE_H_ #define TCMALLOC_SYMBOLIZE_H_ +#include "config.h" +#ifdef HAVE_STDINT_H +#include <stdint.h> // for uintptr_t +#endif #include <map> using std::map; -// An average size of memory allocated for a stack trace symbol. -static const int kSymbolSize = 1024; +// SymbolTable encapsulates the address operations necessary for stack trace +// symbolization. A common use-case is to Add() the addresses from one or +// several stack traces to a table, call Symbolize() once and use GetSymbol() +// to get the symbol names for pretty-printing the stack traces. +class SymbolTable { + public: + SymbolTable() + : symbol_buffer_(NULL) {} + ~SymbolTable() { + delete[] symbol_buffer_; + } -// TODO(glider): it's better to make SymbolMap a class that encapsulates the -// address operations and has the Symbolize() method. -typedef map<uintptr_t, char*> SymbolMap; + // Adds an address to the table. This may overwrite a currently known symbol + // name, so Add() should not generally be called after Symbolize(). + void Add(const void* addr); -extern bool Symbolize(char *out, int out_size, - SymbolMap *symbolization_table); + // Returns the symbol name for addr, if the given address was added before + // the last successful call to Symbolize(). Otherwise may return an empty + // c-string. + const char* GetSymbol(const void* addr); + + // Obtains the symbol names for the addresses stored in the table and returns + // the number of addresses actually symbolized. + int Symbolize(); + + private: + typedef map<const void*, const char*> SymbolMap; + + // An average size of memory allocated for a stack trace symbol. + static const int kSymbolSize = 1024; + + // Map from addresses to symbol names. + SymbolMap symbolization_table_; + + // Pointer to the buffer that stores the symbol names. + char *symbol_buffer_; +}; #endif // TCMALLOC_SYMBOLIZE_H_ diff --git a/third_party/tcmalloc/chromium/src/tcmalloc.cc b/third_party/tcmalloc/chromium/src/tcmalloc.cc index 66e0ea6..355def9 100644 --- a/third_party/tcmalloc/chromium/src/tcmalloc.cc +++ b/third_party/tcmalloc/chromium/src/tcmalloc.cc @@ -136,6 +136,7 @@ # define WIN32_DO_PATCHING 1 #endif +using std::max; using tcmalloc::PageHeap; using tcmalloc::PageHeapAllocator; using tcmalloc::SizeMap; @@ -229,23 +230,30 @@ extern "C" { ATTRIBUTE_SECTION(google_malloc); void* tc_newarray_nothrow(size_t size, const std::nothrow_t&) __THROW ATTRIBUTE_SECTION(google_malloc); -} + // Surprisingly, compilers use a nothrow-delete internally. See, eg: + // http://www.dinkumware.com/manuals/?manual=compleat&page=new.html + void tc_delete_nothrow(void* ptr, const std::nothrow_t&) __THROW + ATTRIBUTE_SECTION(google_malloc); + void tc_deletearray_nothrow(void* ptr, const std::nothrow_t&) __THROW + ATTRIBUTE_SECTION(google_malloc); +} // extern "C" // Override the libc functions to prefer our own instead. This comes // first so code in tcmalloc.cc can use the overridden versions. One // exception: in windows, by default, we patch our code into these // functions (via src/windows/patch_function.cc) rather than override // them. In that case, we don't want to do this overriding here. -#ifndef WIN32_DO_PATCHING +#if !defined(WIN32_DO_PATCHING) && !defined(TCMALLOC_FOR_DEBUGALLOCATION) // TODO(mbelshe): Turn off TCMalloc's symbols for libc. We do that // elsewhere. -#if 0 +#ifndef _WIN32 #if defined(__GNUC__) && !defined(__MACH__) // Potentially faster variants that use the gcc alias extension. - // Mach-O (Darwin) does not support weak aliases, hence the __MACH__ check. // FreeBSD does support aliases, but apparently not correctly. :-( + // NOTE: we make many of these symbols weak, but do so in the makefile + // (via objcopy -W) and not here. That ends up being more portable. # define ALIAS(x) __attribute__ ((alias (x))) void* operator new(size_t size) ALIAS("tc_new"); void operator delete(void* p) __THROW ALIAS("tc_delete"); @@ -255,6 +263,10 @@ void* operator new(size_t size, const std::nothrow_t&) __THROW ALIAS("tc_new_nothrow"); void* operator new[](size_t size, const std::nothrow_t&) __THROW ALIAS("tc_newarray_nothrow"); +void operator delete(void* size, const std::nothrow_t&) __THROW + ALIAS("tc_delete_nothrow"); +void operator delete[](void* size, const std::nothrow_t&) __THROW + ALIAS("tc_deletearray_nothrow"); extern "C" { void* malloc(size_t size) __THROW ALIAS("tc_malloc"); void free(void* ptr) __THROW ALIAS("tc_free"); @@ -271,26 +283,8 @@ extern "C" { #ifdef HAVE_STRUCT_MALLINFO struct mallinfo mallinfo(void) __THROW ALIAS("tc_mallinfo"); #endif - // Some library routines on RedHat 9 allocate memory using malloc() - // and free it using __libc_free() (or vice-versa). Since we provide - // our own implementations of malloc/free, we need to make sure that - // the __libc_XXX variants (defined as part of glibc) also point to - // the same implementations. -# if defined(__GLIBC__) - void* __libc_malloc(size_t size) ALIAS("tc_malloc"); - void __libc_free(void* ptr) ALIAS("tc_free"); - void* __libc_realloc(void* ptr, size_t size) ALIAS("tc_realloc"); - void* __libc_calloc(size_t n, size_t size) ALIAS("tc_calloc"); - void __libc_cfree(void* ptr) ALIAS("tc_cfree"); - void* __libc_memalign(size_t align, size_t s) ALIAS("tc_memalign"); - void* __libc_valloc(size_t size) ALIAS("tc_valloc"); - void* __libc_pvalloc(size_t size) ALIAS("tc_pvalloc"); - int __posix_memalign(void** r, size_t a, size_t s) ALIAS("tc_posix_memalign"); -# define HAVE_ALIASED___LIBC 1 -# endif // #if defined(__GLIBC__) } // extern "C" -# undef ALIAS -#else +#else // #if defined(__GNUC__) && !defined(__MACH__) // Portable wrappers void* operator new(size_t size) { return tc_new(size); } void operator delete(void* p) __THROW { tc_delete(p); } @@ -302,6 +296,12 @@ void* operator new(size_t size, const std::nothrow_t& nt) __THROW { void* operator new[](size_t size, const std::nothrow_t& nt) __THROW { return tc_newarray_nothrow(size, nt); } +void operator delete(void* ptr, const std::nothrow_t& nt) __THROW { + return tc_delete_nothrow(ptr, nt); +} +void operator delete[](void* ptr, const std::nothrow_t& nt) __THROW { + return tc_deletearray_nothrow(ptr, nt); +} extern "C" { void* malloc(size_t s) __THROW { return tc_malloc(s); } void free(void* p) __THROW { tc_free(p); } @@ -319,11 +319,27 @@ extern "C" { #ifdef HAVE_STRUCT_MALLINFO struct mallinfo mallinfo(void) __THROW { return tc_mallinfo(); } #endif -} // extern C +} // extern "C" #endif // #if defined(__GNUC__) -#ifndef HAVE_ALIASED___LIBC +// Some library routines on RedHat 9 allocate memory using malloc() +// and free it using __libc_free() (or vice-versa). Since we provide +// our own implementations of malloc/free, we need to make sure that +// the __libc_XXX variants (defined as part of glibc) also point to +// the same implementations. +#ifdef __GLIBC__ // only glibc defines __libc_* extern "C" { +#ifdef ALIAS + void* __libc_malloc(size_t size) ALIAS("tc_malloc"); + void __libc_free(void* ptr) ALIAS("tc_free"); + void* __libc_realloc(void* ptr, size_t size) ALIAS("tc_realloc"); + void* __libc_calloc(size_t n, size_t size) ALIAS("tc_calloc"); + void __libc_cfree(void* ptr) ALIAS("tc_cfree"); + void* __libc_memalign(size_t align, size_t s) ALIAS("tc_memalign"); + void* __libc_valloc(size_t size) ALIAS("tc_valloc"); + void* __libc_pvalloc(size_t size) ALIAS("tc_pvalloc"); + int __posix_memalign(void** r, size_t a, size_t s) ALIAS("tc_posix_memalign"); +#else // #ifdef ALIAS void* __libc_malloc(size_t size) { return malloc(size); } void __libc_free(void* ptr) { free(ptr); } void* __libc_realloc(void* ptr, size_t size) { return realloc(ptr, size); } @@ -335,19 +351,25 @@ extern "C" { int __posix_memalign(void** r, size_t a, size_t s) { return posix_memalign(r, a, s); } +#endif // #ifdef ALIAS } // extern "C" -#endif // #ifndef HAVE_ALIASED___LIBC +#endif // ifdef __GLIBC__ -#endif // #ifdef 0 +#endif // #ifndef _WIN32 +#undef ALIAS -#endif // #ifndef WIN32_DO_PATCHING +#endif // #ifndef(WIN32_DO_PATCHING) && ndef(TCMALLOC_FOR_DEBUGALLOCATION) // ----------------------- IMPLEMENTATION ------------------------------- -// These routines are called by free(), realloc(), etc. if the pointer is -// invalid. This is a cheap (source-editing required) kind of exception -// handling for these routines. +static int tc_new_mode = 0; // See tc_set_new_mode(). + +// Routines such as free() and realloc() catch some erroneous pointers +// passed to them, and invoke the below when they do. (An erroneous pointer +// won't be caught if it's within a valid span or a stale span for which +// the pagemap cache has a non-zero sizeclass.) This is a cheap (source-editing +// required) kind of exception handling for these routines. namespace { void InvalidFree(void* ptr) { CRASH("Attempt to free invalid pointer: %p\n", ptr); @@ -366,13 +388,11 @@ size_t InvalidGetAllocatedSize(void* ptr) { // Extract interesting stats struct TCMallocStats { - uint64_t system_bytes; // Bytes alloced from system - uint64_t committed_bytes; // Bytes alloced and committed from system - uint64_t thread_bytes; // Bytes in thread caches - uint64_t central_bytes; // Bytes in central cache - uint64_t transfer_bytes; // Bytes in central transfer cache - uint64_t pageheap_bytes; // Bytes in page heap - uint64_t metadata_bytes; // Bytes alloced for metadata + uint64_t thread_bytes; // Bytes in thread caches + uint64_t central_bytes; // Bytes in central cache + uint64_t transfer_bytes; // Bytes in central transfer cache + uint64_t metadata_bytes; // Bytes alloced for metadata + PageHeap::Stats pageheap; // Stats from page heap }; // Get stats into "r". Also get per-size-class counts if class_count != NULL @@ -394,14 +414,8 @@ static void ExtractStats(TCMallocStats* r, uint64_t* class_count) { { // scope SpinLockHolder h(Static::pageheap_lock()); ThreadCache::GetThreadStats(&r->thread_bytes, class_count); - } - - { //scope - SpinLockHolder h(Static::pageheap_lock()); - r->system_bytes = Static::pageheap()->SystemBytes(); - r->committed_bytes = Static::pageheap()->CommittedBytes(); r->metadata_bytes = tcmalloc::metadata_system_bytes(); - r->pageheap_bytes = Static::pageheap()->FreeBytes(); + r->pageheap = Static::pageheap()->stats(); } } @@ -413,8 +427,9 @@ static void DumpStats(TCMalloc_Printer* out, int level) { static const double MB = 1048576.0; - const uint64_t bytes_in_use = stats.system_bytes - - stats.pageheap_bytes + const uint64_t bytes_in_use = stats.pageheap.system_bytes + - stats.pageheap.free_bytes + - stats.pageheap.unmapped_bytes - stats.central_bytes - stats.transfer_bytes - stats.thread_bytes; @@ -422,13 +437,15 @@ static void DumpStats(TCMalloc_Printer* out, int level) { out->printf("WASTE: %7.1f MB committed but not used\n" "WASTE: %7.1f MB bytes committed, %7.1f MB bytes in use\n" "WASTE: committed/used ratio of %f\n", - (stats.committed_bytes - bytes_in_use) / MB, - stats.committed_bytes / MB, + (stats.pageheap.committed_bytes - bytes_in_use) / MB, + stats.pageheap.committed_bytes / MB, bytes_in_use / MB, - stats.committed_bytes / static_cast<double>(bytes_in_use)); + stats.pageheap.committed_bytes / static_cast<double>(bytes_in_use)); if (level >= 2) { out->printf("------------------------------------------------\n"); + out->printf("Size class breakdown\n"); + out->printf("------------------------------------------------\n"); uint64_t cumulative = 0; for (int cl = 0; cl < kNumClasses; ++cl) { if (class_count[cl] > 0) { @@ -456,6 +473,7 @@ static void DumpStats(TCMalloc_Printer* out, int level) { "MALLOC: %12" PRIu64 " (%7.1f MB) Bytes committed\n" "MALLOC: %12" PRIu64 " (%7.1f MB) Bytes in use by application\n" "MALLOC: %12" PRIu64 " (%7.1f MB) Bytes free in page heap\n" + "MALLOC: %12" PRIu64 " (%7.1f MB) Bytes unmapped in page heap\n" "MALLOC: %12" PRIu64 " (%7.1f MB) Bytes free in central cache\n" "MALLOC: %12" PRIu64 " (%7.1f MB) Bytes free in transfer cache\n" "MALLOC: %12" PRIu64 " (%7.1f MB) Bytes free in thread caches\n" @@ -463,10 +481,11 @@ static void DumpStats(TCMalloc_Printer* out, int level) { "MALLOC: %12" PRIu64 " Thread heaps in use\n" "MALLOC: %12" PRIu64 " (%7.1f MB) Metadata allocated\n" "------------------------------------------------\n", - stats.system_bytes, stats.system_bytes / MB, - stats.committed_bytes, stats.committed_bytes / MB, + stats.pageheap.system_bytes, stats.pageheap.system_bytes / MB, + stats.pageheap.committed_bytes, stats.pageheap.committed_bytes / MB, bytes_in_use, bytes_in_use / MB, - stats.pageheap_bytes, stats.pageheap_bytes / MB, + stats.pageheap.free_bytes, stats.pageheap.free_bytes / MB, + stats.pageheap.unmapped_bytes, stats.pageheap.unmapped_bytes / MB, stats.central_bytes, stats.central_bytes / MB, stats.transfer_bytes, stats.transfer_bytes / MB, stats.thread_bytes, stats.thread_bytes / MB, @@ -530,9 +549,50 @@ static void** DumpHeapGrowthStackTraces() { return result; } +static void IterateOverRanges(void* arg, MallocExtension::RangeFunction func) { + PageID page = 1; // Some code may assume that page==0 is never used + bool done = false; + while (!done) { + // Accumulate a small number of ranges in a local buffer + static const int kNumRanges = 16; + static base::MallocRange ranges[kNumRanges]; + int n = 0; + { + SpinLockHolder h(Static::pageheap_lock()); + while (n < kNumRanges) { + if (!Static::pageheap()->GetNextRange(page, &ranges[n])) { + done = true; + break; + } else { + uintptr_t limit = ranges[n].address + ranges[n].length; + page = (limit + kPageSize - 1) >> kPageShift; + n++; + } + } + } + + for (int i = 0; i < n; i++) { + (*func)(arg, &ranges[i]); + } + } +} + // TCMalloc's support for extra malloc interfaces class TCMallocImplementation : public MallocExtension { + private: + // ReleaseToSystem() might release more than the requested bytes because + // the page heap releases at the span granularity, and spans are of wildly + // different sizes. This member keeps track of the extra bytes bytes + // released so that the app can periodically call ReleaseToSystem() to + // release memory at a constant rate. + // NOTE: Protected by Static::pageheap_lock(). + size_t extra_bytes_released_; + public: + TCMallocImplementation() + : extra_bytes_released_(0) { + } + virtual void GetStats(char* buffer, int buffer_length) { ASSERT(buffer_length > 0); TCMalloc_Printer printer(buffer, buffer_length); @@ -562,39 +622,51 @@ class TCMallocImplementation : public MallocExtension { return DumpHeapGrowthStackTraces(); } + virtual void Ranges(void* arg, RangeFunction func) { + IterateOverRanges(arg, func); + } + virtual bool GetNumericProperty(const char* name, size_t* value) { ASSERT(name != NULL); if (strcmp(name, "generic.current_allocated_bytes") == 0) { TCMallocStats stats; ExtractStats(&stats, NULL); - *value = stats.system_bytes + *value = stats.pageheap.system_bytes - stats.thread_bytes - stats.central_bytes - stats.transfer_bytes - - stats.pageheap_bytes; + - stats.pageheap.free_bytes + - stats.pageheap.unmapped_bytes; return true; } if (strcmp(name, "generic.heap_size") == 0) { TCMallocStats stats; ExtractStats(&stats, NULL); - *value = stats.system_bytes; + *value = stats.pageheap.system_bytes; return true; } - if (strcmp(name, "generic.committed_bytes") == 0) { - TCMallocStats stats; - ExtractStats(&stats, NULL); - *value = stats.committed_bytes + stats.metadata_bytes; + if (strcmp(name, "tcmalloc.slack_bytes") == 0) { + // We assume that bytes in the page heap are not fragmented too + // badly, and are therefore available for allocation without + // growing the pageheap system byte count. + SpinLockHolder l(Static::pageheap_lock()); + PageHeap::Stats stats = Static::pageheap()->stats(); + *value = stats.free_bytes + stats.unmapped_bytes; return true; } - if (strcmp(name, "tcmalloc.slack_bytes") == 0) { - // We assume that bytes in the page heap are not fragmented too - // badly, and are therefore available for allocation. + if (strcmp(name, "tcmalloc.pageheap_free_bytes") == 0) { + SpinLockHolder l(Static::pageheap_lock()); + *value = Static::pageheap()->stats().free_bytes; + return true; + } + + if (strcmp(name, "tcmalloc.pageheap_unmapped_bytes") == 0) { SpinLockHolder l(Static::pageheap_lock()); - *value = Static::pageheap()->FreeBytes(); + *value = Static::pageheap()->stats().unmapped_bytes; return true; } @@ -630,9 +702,31 @@ class TCMallocImplementation : public MallocExtension { ThreadCache::BecomeIdle(); } - virtual void ReleaseFreeMemory() { + virtual void MarkThreadBusy(); // Implemented below + + virtual void ReleaseToSystem(size_t num_bytes) { SpinLockHolder h(Static::pageheap_lock()); - Static::pageheap()->ReleaseFreePages(); + if (num_bytes <= extra_bytes_released_) { + // We released too much on a prior call, so don't release any + // more this time. + extra_bytes_released_ = extra_bytes_released_ - num_bytes; + return; + } + num_bytes = num_bytes - extra_bytes_released_; + // num_bytes might be less than one page. If we pass zero to + // ReleaseAtLeastNPages, it won't do anything, so we release a whole + // page now and let extra_bytes_released_ smooth it out over time. + Length num_pages = max<Length>(num_bytes >> kPageShift, 1); + size_t bytes_released = Static::pageheap()->ReleaseAtLeastNPages( + num_pages) << kPageShift; + if (bytes_released > num_bytes) { + extra_bytes_released_ = bytes_released - num_bytes; + } else { + // The PageHeap wasn't able to release num_bytes. Don't try to + // compensate with a big release next time. Specifically, + // ReleaseFreeMemory() calls ReleaseToSystem(LONG_MAX). + extra_bytes_released_ = 0; + } } virtual void SetMemoryReleaseRate(double rate) { @@ -681,9 +775,9 @@ TCMallocGuard::TCMallocGuard() { // patch the windows VirtualAlloc, etc. PatchWindowsFunctions(); // defined in windows/patch_functions.cc #endif - free(malloc(1)); + tc_free(tc_malloc(1)); ThreadCache::InitTSD(); - free(malloc(1)); + tc_free(tc_malloc(1)); MallocExtension::Register(new TCMallocImplementation); } } @@ -778,6 +872,24 @@ static void ReportLargeAlloc(Length num_pages, void* result) { namespace { +inline void* cpp_alloc(size_t size, bool nothrow); +inline void* do_malloc(size_t size); + +// TODO(willchan): Investigate whether or not inlining this much is harmful to +// performance. +// This is equivalent to do_malloc() except when tc_new_mode is set to true. +// Otherwise, it will run the std::new_handler if set. +inline void* do_malloc_or_cpp_alloc(size_t size) { + return tc_new_mode ? cpp_alloc(size, true) : do_malloc(size); +} + +void* cpp_memalign(size_t align, size_t size); +void* do_memalign(size_t align, size_t size); + +inline void* do_memalign_or_cpp_memalign(size_t align, size_t size) { + return tc_new_mode ? cpp_memalign(align, size) : do_memalign(align, size); +} + // Helper for do_malloc(). inline void* do_malloc_pages(Length num_pages) { Span *span; @@ -828,7 +940,7 @@ inline void* do_calloc(size_t n, size_t elem_size) { const size_t size = n * elem_size; if (elem_size != 0 && size / elem_size != n) return NULL; - void* result = do_malloc(size); + void* result = do_malloc_or_cpp_alloc(size); if (result != NULL) { memset(result, 0, size); } @@ -937,11 +1049,11 @@ inline void* do_realloc_with_callback( void* new_ptr = NULL; if (new_size > old_size && new_size < lower_bound_to_grow) { - new_ptr = do_malloc(lower_bound_to_grow); + new_ptr = do_malloc_or_cpp_alloc(lower_bound_to_grow); } if (new_ptr == NULL) { // Either new_size is not a tiny increment, or last do_malloc failed. - new_ptr = do_malloc(new_size); + new_ptr = do_malloc_or_cpp_alloc(new_size); } if (new_ptr == NULL) { return NULL; @@ -1062,16 +1174,18 @@ inline struct mallinfo do_mallinfo() { // Unfortunately, the struct contains "int" field, so some of the // size values will be truncated. - info.arena = static_cast<int>(stats.system_bytes); + info.arena = static_cast<int>(stats.pageheap.system_bytes); info.fsmblks = static_cast<int>(stats.thread_bytes + stats.central_bytes + stats.transfer_bytes); - info.fordblks = static_cast<int>(stats.pageheap_bytes); - info.uordblks = static_cast<int>(stats.system_bytes + info.fordblks = static_cast<int>(stats.pageheap.free_bytes + + stats.pageheap.unmapped_bytes); + info.uordblks = static_cast<int>(stats.pageheap.system_bytes - stats.thread_bytes - stats.central_bytes - stats.transfer_bytes - - stats.pageheap_bytes); + - stats.pageheap.free_bytes + - stats.pageheap.unmapped_bytes); return info; } @@ -1127,6 +1241,52 @@ inline void* cpp_alloc(size_t size, bool nothrow) { } } +void* cpp_memalign(size_t align, size_t size) { + for (;;) { + void* p = do_memalign(align, size); +#ifdef PREANSINEW + return p; +#else + if (p == NULL) { // allocation failed + // Get the current new handler. NB: this function is not + // thread-safe. We make a feeble stab at making it so here, but + // this lock only protects against tcmalloc interfering with + // itself, not with other libraries calling set_new_handler. + std::new_handler nh; + { + SpinLockHolder h(&set_new_handler_lock); + nh = std::set_new_handler(0); + (void) std::set_new_handler(nh); + } +#if (defined(__GNUC__) && !defined(__EXCEPTIONS)) || (defined(_HAS_EXCEPTIONS) && !_HAS_EXCEPTIONS) + if (nh) { + // Since exceptions are disabled, we don't really know if new_handler + // failed. Assume it will abort if it fails. + (*nh)(); + continue; + } + return 0; +#else + // If no new_handler is established, the allocation failed. + if (!nh) + return 0; + + // Otherwise, try the new_handler. If it returns, retry the + // allocation. If it throws std::bad_alloc, fail the allocation. + // if it throws something else, don't interfere. + try { + (*nh)(); + } catch (const std::bad_alloc&) { + return p; + } +#endif // (defined(__GNUC__) && !defined(__EXCEPTIONS)) || (defined(_HAS_EXCEPTIONS) && !_HAS_EXCEPTIONS) + } else { // allocation success + return p; + } +#endif // PREANSINEW + } +} + } // end unnamed namespace // As promised, the definition of this function, declared above. @@ -1134,41 +1294,56 @@ size_t TCMallocImplementation::GetAllocatedSize(void* ptr) { return GetSizeWithCallback(ptr, &InvalidGetAllocatedSize); } +void TCMallocImplementation::MarkThreadBusy() { + // Allocate to force the creation of a thread cache, but avoid + // invoking any hooks. + do_free(do_malloc(0)); +} + //------------------------------------------------------------------- // Exported routines //------------------------------------------------------------------- +extern "C" PERFTOOLS_DLL_DECL const char* tc_version( + int* major, int* minor, const char** patch) __THROW { + if (major) *major = TC_VERSION_MAJOR; + if (minor) *minor = TC_VERSION_MINOR; + if (patch) *patch = TC_VERSION_PATCH; + return TC_VERSION_STRING; +} + // CAVEAT: The code structure below ensures that MallocHook methods are always // called from the stack frame of the invoked allocation function. // heap-checker.cc depends on this to start a stack trace from // the call to the (de)allocation function. -static int tc_new_mode = 0; // See tc_set_new_mode(). -extern "C" void* tc_malloc(size_t size) __THROW { - void* result = (tc_new_mode ? cpp_alloc(size, false) : do_malloc(size)); +extern "C" PERFTOOLS_DLL_DECL void* tc_malloc(size_t size) __THROW { + void* result = do_malloc_or_cpp_alloc(size); MallocHook::InvokeNewHook(result, size); return result; } -extern "C" void tc_free(void* ptr) __THROW { +extern "C" PERFTOOLS_DLL_DECL void tc_free(void* ptr) __THROW { MallocHook::InvokeDeleteHook(ptr); do_free(ptr); } -extern "C" void* tc_calloc(size_t n, size_t elem_size) __THROW { +extern "C" PERFTOOLS_DLL_DECL void* tc_calloc(size_t n, + size_t elem_size) __THROW { void* result = do_calloc(n, elem_size); MallocHook::InvokeNewHook(result, n * elem_size); return result; } -extern "C" void tc_cfree(void* ptr) __THROW { +extern "C" PERFTOOLS_DLL_DECL void tc_cfree(void* ptr) __THROW { MallocHook::InvokeDeleteHook(ptr); do_free(ptr); } -extern "C" void* tc_realloc(void* old_ptr, size_t new_size) __THROW { +extern "C" PERFTOOLS_DLL_DECL void* tc_realloc(void* old_ptr, + size_t new_size) __THROW { if (old_ptr == NULL) { - void* result = do_malloc(new_size); + void* result = do_malloc_or_cpp_alloc(new_size); MallocHook::InvokeNewHook(result, new_size); return result; } @@ -1180,7 +1355,7 @@ extern "C" void* tc_realloc(void* old_ptr, size_t new_size) __THROW { return do_realloc(old_ptr, new_size); } -extern "C" void* tc_new(size_t size) { +extern "C" PERFTOOLS_DLL_DECL void* tc_new(size_t size) { void* p = cpp_alloc(size, false); // We keep this next instruction out of cpp_alloc for a reason: when // it's in, and new just calls cpp_alloc, the optimizer may fold the @@ -1191,18 +1366,27 @@ extern "C" void* tc_new(size_t size) { return p; } -extern "C" void* tc_new_nothrow(size_t size, const std::nothrow_t&) __THROW { +extern "C" PERFTOOLS_DLL_DECL void* tc_new_nothrow( + size_t size, const std::nothrow_t&) __THROW { void* p = cpp_alloc(size, true); MallocHook::InvokeNewHook(p, size); return p; } -extern "C" void tc_delete(void* p) __THROW { +extern "C" PERFTOOLS_DLL_DECL void tc_delete(void* p) __THROW { MallocHook::InvokeDeleteHook(p); do_free(p); } -extern "C" void* tc_newarray(size_t size) { +// Compilers define and use this (via ::operator delete(ptr, nothrow)). +// But it's really the same as normal delete, so we just do the same thing. +extern "C" PERFTOOLS_DLL_DECL void tc_delete_nothrow( + void* p, const std::nothrow_t&) __THROW { + MallocHook::InvokeDeleteHook(p); + do_free(p); +} + +extern "C" PERFTOOLS_DLL_DECL void* tc_newarray(size_t size) { void* p = cpp_alloc(size, false); // We keep this next instruction out of cpp_alloc for a reason: when // it's in, and new just calls cpp_alloc, the optimizer may fold the @@ -1213,32 +1397,40 @@ extern "C" void* tc_newarray(size_t size) { return p; } -extern "C" void* tc_newarray_nothrow(size_t size, const std::nothrow_t&) __THROW { +extern "C" PERFTOOLS_DLL_DECL void* tc_newarray_nothrow( + size_t size, const std::nothrow_t&) __THROW { void* p = cpp_alloc(size, true); MallocHook::InvokeNewHook(p, size); return p; } -extern "C" void tc_deletearray(void* p) __THROW { +extern "C" PERFTOOLS_DLL_DECL void tc_deletearray(void* p) __THROW { MallocHook::InvokeDeleteHook(p); do_free(p); } -extern "C" void* tc_memalign(size_t align, size_t size) __THROW { - void* result = do_memalign(align, size); +extern "C" PERFTOOLS_DLL_DECL void tc_deletearray_nothrow( + void* p, const std::nothrow_t&) __THROW { + MallocHook::InvokeDeleteHook(p); + do_free(p); +} + +extern "C" PERFTOOLS_DLL_DECL void* tc_memalign(size_t align, + size_t size) __THROW { + void* result = do_memalign_or_cpp_memalign(align, size); MallocHook::InvokeNewHook(result, size); return result; } -extern "C" int tc_posix_memalign(void** result_ptr, size_t align, size_t size) - __THROW { +extern "C" PERFTOOLS_DLL_DECL int tc_posix_memalign( + void** result_ptr, size_t align, size_t size) __THROW { if (((align % sizeof(void*)) != 0) || ((align & (align - 1)) != 0) || (align == 0)) { return EINVAL; } - void* result = do_memalign(align, size); + void* result = do_memalign_or_cpp_memalign(align, size); MallocHook::InvokeNewHook(result, size); if (result == NULL) { return ENOMEM; @@ -1250,33 +1442,36 @@ extern "C" int tc_posix_memalign(void** result_ptr, size_t align, size_t size) static size_t pagesize = 0; -extern "C" void* tc_valloc(size_t size) __THROW { +extern "C" PERFTOOLS_DLL_DECL void* tc_valloc(size_t size) __THROW { // Allocate page-aligned object of length >= size bytes if (pagesize == 0) pagesize = getpagesize(); - void* result = do_memalign(pagesize, size); + void* result = do_memalign_or_cpp_memalign(pagesize, size); MallocHook::InvokeNewHook(result, size); return result; } -extern "C" void* tc_pvalloc(size_t size) __THROW { +extern "C" PERFTOOLS_DLL_DECL void* tc_pvalloc(size_t size) __THROW { // Round up size to a multiple of pagesize if (pagesize == 0) pagesize = getpagesize(); + if (size == 0) { // pvalloc(0) should allocate one page, according to + size = pagesize; // http://man.free4web.biz/man3/libmpatrol.3.html + } size = (size + pagesize - 1) & ~(pagesize - 1); - void* result = do_memalign(pagesize, size); + void* result = do_memalign_or_cpp_memalign(pagesize, size); MallocHook::InvokeNewHook(result, size); return result; } -extern "C" void tc_malloc_stats(void) __THROW { +extern "C" PERFTOOLS_DLL_DECL void tc_malloc_stats(void) __THROW { do_malloc_stats(); } -extern "C" int tc_mallopt(int cmd, int value) __THROW { +extern "C" PERFTOOLS_DLL_DECL int tc_mallopt(int cmd, int value) __THROW { return do_mallopt(cmd, value); } #ifdef HAVE_STRUCT_MALLINFO -extern "C" struct mallinfo tc_mallinfo(void) __THROW { +extern "C" PERFTOOLS_DLL_DECL struct mallinfo tc_mallinfo(void) __THROW { return do_mallinfo(); } #endif @@ -1286,7 +1481,7 @@ extern "C" struct mallinfo tc_mallinfo(void) __THROW { // If flag is 1, calls to malloc will behave like calls to new, // and the std_new_handler will be invoked on failure. // Returns the previous mode. -extern "C" int tc_set_new_mode(int flag) __THROW { +extern "C" PERFTOOLS_DLL_DECL int tc_set_new_mode(int flag) __THROW { int old_mode = tc_new_mode; tc_new_mode = flag; return old_mode; @@ -1300,13 +1495,15 @@ extern "C" int tc_set_new_mode(int flag) __THROW { // This function is an exception to the rule of calling MallocHook method // from the stack frame of the allocation function; // heap-checker handles this special case explicitly. +#ifndef TCMALLOC_FOR_DEBUGALLOCATION static void *MemalignOverride(size_t align, size_t size, const void *caller) __THROW ATTRIBUTE_SECTION(google_malloc); static void *MemalignOverride(size_t align, size_t size, const void *caller) __THROW { - void* result = do_memalign(align, size); + void* result = do_memalign_or_cpp_memalign(align, size); MallocHook::InvokeNewHook(result, size); return result; } void *(*__memalign_hook)(size_t, size_t, const void *) = MemalignOverride; +#endif // #ifndef TCMALLOC_FOR_DEBUGALLOCATION diff --git a/third_party/tcmalloc/chromium/src/tests/atomicops_unittest.cc b/third_party/tcmalloc/chromium/src/tests/atomicops_unittest.cc index 5a620f5..3892b59 100644 --- a/third_party/tcmalloc/chromium/src/tests/atomicops_unittest.cc +++ b/third_party/tcmalloc/chromium/src/tests/atomicops_unittest.cc @@ -60,50 +60,50 @@ static void TestAtomicIncrement() { s.count = 0; s.next_word = next_word_value; - CHECK_EQ(base::subtle::NoBarrier_AtomicIncrement(&s.count, 1), 1); - CHECK_EQ(s.count, 1); - CHECK_EQ(s.prev_word, prev_word_value); - CHECK_EQ(s.next_word, next_word_value); - - CHECK_EQ(base::subtle::NoBarrier_AtomicIncrement(&s.count, 2), 3); - CHECK_EQ(s.count, 3); - CHECK_EQ(s.prev_word, prev_word_value); - CHECK_EQ(s.next_word, next_word_value); - - CHECK_EQ(base::subtle::NoBarrier_AtomicIncrement(&s.count, 3), 6); - CHECK_EQ(s.count, 6); - CHECK_EQ(s.prev_word, prev_word_value); - CHECK_EQ(s.next_word, next_word_value); - - CHECK_EQ(base::subtle::NoBarrier_AtomicIncrement(&s.count, -3), 3); - CHECK_EQ(s.count, 3); - CHECK_EQ(s.prev_word, prev_word_value); - CHECK_EQ(s.next_word, next_word_value); - - CHECK_EQ(base::subtle::NoBarrier_AtomicIncrement(&s.count, -2), 1); - CHECK_EQ(s.count, 1); - CHECK_EQ(s.prev_word, prev_word_value); - CHECK_EQ(s.next_word, next_word_value); - - CHECK_EQ(base::subtle::NoBarrier_AtomicIncrement(&s.count, -1), 0); - CHECK_EQ(s.count, 0); - CHECK_EQ(s.prev_word, prev_word_value); - CHECK_EQ(s.next_word, next_word_value); - - CHECK_EQ(base::subtle::NoBarrier_AtomicIncrement(&s.count, -1), -1); - CHECK_EQ(s.count, -1); - CHECK_EQ(s.prev_word, prev_word_value); - CHECK_EQ(s.next_word, next_word_value); - - CHECK_EQ(base::subtle::NoBarrier_AtomicIncrement(&s.count, -4), -5); - CHECK_EQ(s.count, -5); - CHECK_EQ(s.prev_word, prev_word_value); - CHECK_EQ(s.next_word, next_word_value); - - CHECK_EQ(base::subtle::NoBarrier_AtomicIncrement(&s.count, 5), 0); - CHECK_EQ(s.count, 0); - CHECK_EQ(s.prev_word, prev_word_value); - CHECK_EQ(s.next_word, next_word_value); + ASSERT_EQ(1, base::subtle::NoBarrier_AtomicIncrement(&s.count, 1)); + ASSERT_EQ(1, s.count); + ASSERT_EQ(prev_word_value, s.prev_word); + ASSERT_EQ(next_word_value, s.next_word); + + ASSERT_EQ(3, base::subtle::NoBarrier_AtomicIncrement(&s.count, 2)); + ASSERT_EQ(3, s.count); + ASSERT_EQ(prev_word_value, s.prev_word); + ASSERT_EQ(next_word_value, s.next_word); + + ASSERT_EQ(6, base::subtle::NoBarrier_AtomicIncrement(&s.count, 3)); + ASSERT_EQ(6, s.count); + ASSERT_EQ(prev_word_value, s.prev_word); + ASSERT_EQ(next_word_value, s.next_word); + + ASSERT_EQ(3, base::subtle::NoBarrier_AtomicIncrement(&s.count, -3)); + ASSERT_EQ(3, s.count); + ASSERT_EQ(prev_word_value, s.prev_word); + ASSERT_EQ(next_word_value, s.next_word); + + ASSERT_EQ(1, base::subtle::NoBarrier_AtomicIncrement(&s.count, -2)); + ASSERT_EQ(1, s.count); + ASSERT_EQ(prev_word_value, s.prev_word); + ASSERT_EQ(next_word_value, s.next_word); + + ASSERT_EQ(0, base::subtle::NoBarrier_AtomicIncrement(&s.count, -1)); + ASSERT_EQ(0, s.count); + ASSERT_EQ(prev_word_value, s.prev_word); + ASSERT_EQ(next_word_value, s.next_word); + + ASSERT_EQ(-1, base::subtle::NoBarrier_AtomicIncrement(&s.count, -1)); + ASSERT_EQ(-1, s.count); + ASSERT_EQ(prev_word_value, s.prev_word); + ASSERT_EQ(next_word_value, s.next_word); + + ASSERT_EQ(-5, base::subtle::NoBarrier_AtomicIncrement(&s.count, -4)); + ASSERT_EQ(-5, s.count); + ASSERT_EQ(prev_word_value, s.prev_word); + ASSERT_EQ(next_word_value, s.next_word); + + ASSERT_EQ(0, base::subtle::NoBarrier_AtomicIncrement(&s.count, 5)); + ASSERT_EQ(0, s.count); + ASSERT_EQ(prev_word_value, s.prev_word); + ASSERT_EQ(next_word_value, s.next_word); } @@ -114,8 +114,8 @@ template <class AtomicType> static void TestCompareAndSwap() { AtomicType value = 0; AtomicType prev = base::subtle::NoBarrier_CompareAndSwap(&value, 0, 1); - CHECK_EQ(1, value); - CHECK_EQ(0, prev); + ASSERT_EQ(1, value); + ASSERT_EQ(0, prev); // Use test value that has non-zero bits in both halves, more for testing // 64-bit implementation on 32-bit platforms. @@ -123,13 +123,13 @@ static void TestCompareAndSwap() { (NUM_BITS(AtomicType) - 2)) + 11; value = k_test_val; prev = base::subtle::NoBarrier_CompareAndSwap(&value, 0, 5); - CHECK_EQ(k_test_val, value); - CHECK_EQ(k_test_val, prev); + ASSERT_EQ(k_test_val, value); + ASSERT_EQ(k_test_val, prev); value = k_test_val; prev = base::subtle::NoBarrier_CompareAndSwap(&value, k_test_val, 5); - CHECK_EQ(5, value); - CHECK_EQ(k_test_val, prev); + ASSERT_EQ(5, value); + ASSERT_EQ(k_test_val, prev); } @@ -137,8 +137,8 @@ template <class AtomicType> static void TestAtomicExchange() { AtomicType value = 0; AtomicType new_value = base::subtle::NoBarrier_AtomicExchange(&value, 1); - CHECK_EQ(1, value); - CHECK_EQ(0, new_value); + ASSERT_EQ(1, value); + ASSERT_EQ(0, new_value); // Use test value that has non-zero bits in both halves, more for testing // 64-bit implementation on 32-bit platforms. @@ -146,13 +146,13 @@ static void TestAtomicExchange() { (NUM_BITS(AtomicType) - 2)) + 11; value = k_test_val; new_value = base::subtle::NoBarrier_AtomicExchange(&value, k_test_val); - CHECK_EQ(k_test_val, value); - CHECK_EQ(k_test_val, new_value); + ASSERT_EQ(k_test_val, value); + ASSERT_EQ(k_test_val, new_value); value = k_test_val; new_value = base::subtle::NoBarrier_AtomicExchange(&value, 5); - CHECK_EQ(5, value); - CHECK_EQ(k_test_val, new_value); + ASSERT_EQ(5, value); + ASSERT_EQ(k_test_val, new_value); } @@ -163,11 +163,11 @@ static void TestAtomicIncrementBounds() { AtomicType test_val = GG_ULONGLONG(1) << (NUM_BITS(AtomicType) / 2); AtomicType value = test_val - 1; AtomicType new_value = base::subtle::NoBarrier_AtomicIncrement(&value, 1); - CHECK_EQ(test_val, value); - CHECK_EQ(value, new_value); + ASSERT_EQ(test_val, value); + ASSERT_EQ(value, new_value); base::subtle::NoBarrier_AtomicIncrement(&value, -1); - CHECK_EQ(test_val - 1, value); + ASSERT_EQ(test_val - 1, value); } // This is a simple sanity check that values are correct. Not testing @@ -180,19 +180,19 @@ static void TestStore() { AtomicType value; base::subtle::NoBarrier_Store(&value, kVal1); - CHECK_EQ(kVal1, value); + ASSERT_EQ(kVal1, value); base::subtle::NoBarrier_Store(&value, kVal2); - CHECK_EQ(kVal2, value); + ASSERT_EQ(kVal2, value); base::subtle::Acquire_Store(&value, kVal1); - CHECK_EQ(kVal1, value); + ASSERT_EQ(kVal1, value); base::subtle::Acquire_Store(&value, kVal2); - CHECK_EQ(kVal2, value); + ASSERT_EQ(kVal2, value); base::subtle::Release_Store(&value, kVal1); - CHECK_EQ(kVal1, value); + ASSERT_EQ(kVal1, value); base::subtle::Release_Store(&value, kVal2); - CHECK_EQ(kVal2, value); + ASSERT_EQ(kVal2, value); } // This is a simple sanity check that values are correct. Not testing @@ -205,19 +205,19 @@ static void TestLoad() { AtomicType value; value = kVal1; - CHECK_EQ(kVal1, base::subtle::NoBarrier_Load(&value)); + ASSERT_EQ(kVal1, base::subtle::NoBarrier_Load(&value)); value = kVal2; - CHECK_EQ(kVal2, base::subtle::NoBarrier_Load(&value)); + ASSERT_EQ(kVal2, base::subtle::NoBarrier_Load(&value)); value = kVal1; - CHECK_EQ(kVal1, base::subtle::Acquire_Load(&value)); + ASSERT_EQ(kVal1, base::subtle::Acquire_Load(&value)); value = kVal2; - CHECK_EQ(kVal2, base::subtle::Acquire_Load(&value)); + ASSERT_EQ(kVal2, base::subtle::Acquire_Load(&value)); value = kVal1; - CHECK_EQ(kVal1, base::subtle::Release_Load(&value)); + ASSERT_EQ(kVal1, base::subtle::Release_Load(&value)); value = kVal2; - CHECK_EQ(kVal2, base::subtle::Release_Load(&value)); + ASSERT_EQ(kVal2, base::subtle::Release_Load(&value)); } template <class AtomicType> diff --git a/third_party/tcmalloc/chromium/src/tests/debugallocation_test.cc b/third_party/tcmalloc/chromium/src/tests/debugallocation_test.cc index 4274b7e..ca00e36 100644 --- a/third_party/tcmalloc/chromium/src/tests/debugallocation_test.cc +++ b/third_party/tcmalloc/chromium/src/tests/debugallocation_test.cc @@ -102,6 +102,28 @@ TEST(DebugAllocationTest, DeallocMismatch) { } } +TEST(DebugAllocationTest, DoubleFree) { + int* pint = new int; + delete pint; + IF_DEBUG_EXPECT_DEATH(delete pint, "has been already deallocated"); +} + +TEST(DebugAllocationTest, StompBefore) { + int* pint = new int; +#ifndef NDEBUG // don't stomp memory if we're not in a position to detect it + pint[-1] = 5; + IF_DEBUG_EXPECT_DEATH(delete pint, "a word before object"); +#endif +} + +TEST(DebugAllocationTest, StompAfter) { + int* pint = new int; +#ifndef NDEBUG // don't stomp memory if we're not in a position to detect it + pint[1] = 5; + IF_DEBUG_EXPECT_DEATH(delete pint, "a word after object"); +#endif +} + TEST(DebugAllocationTest, FreeQueueTest) { // Verify that the allocator doesn't return blocks that were recently freed. int* x = new int; @@ -205,6 +227,31 @@ TEST(DebugAllocationTest, GetAllocatedSizeTest) { free(a); } +TEST(DebugAllocationTest, HugeAlloc) { + const size_t kTooBig = ~static_cast<size_t>(0); + void* a = NULL; + char* b = NULL; + +#ifndef NDEBUG + + a = malloc(kTooBig); + EXPECT_EQ(NULL, a); + b = NULL; + IF_DEBUG_EXPECT_DEATH(b = new char[kTooBig], + "Unable to allocate.*new\\[\\] failed\\."); + EXPECT_EQ(NULL, b); + + // kAlsoTooBig is small enough not to get caught by debugallocation's check, + // but will still fall through to tcmalloc's check. + const size_t kAlsoTooBig = kTooBig - 1024; + + a = malloc(kAlsoTooBig); + EXPECT_EQ(NULL, a); + IF_DEBUG_EXPECT_DEATH(b = new char[kAlsoTooBig], "Unable to allocate.*new failed"); + EXPECT_EQ(NULL, b); +#endif +} + int main(int argc, char** argv) { // If you run without args, we run the non-death parts of the test. // Otherwise, argv[1] should be a number saying which death-test diff --git a/third_party/tcmalloc/chromium/src/tests/malloc_extension_c_test.c b/third_party/tcmalloc/chromium/src/tests/malloc_extension_c_test.c index aad2d4b..b6319a1 100644 --- a/third_party/tcmalloc/chromium/src/tests/malloc_extension_c_test.c +++ b/third_party/tcmalloc/chromium/src/tests/malloc_extension_c_test.c @@ -108,6 +108,7 @@ void TestMallocExtension(void) { } MallocExtension_MarkThreadIdle(); MallocExtension_MarkThreadBusy(); + MallocExtension_ReleaseToSystem(1); MallocExtension_ReleaseFreeMemory(); if (MallocExtension_GetEstimatedAllocatedSize(10) < 10) { FAIL("GetEstimatedAllocatedSize returned a bad value (too small)"); diff --git a/third_party/tcmalloc/chromium/src/tests/malloc_extension_test.cc b/third_party/tcmalloc/chromium/src/tests/malloc_extension_test.cc index 1f00f73..ef76766 100644 --- a/third_party/tcmalloc/chromium/src/tests/malloc_extension_test.cc +++ b/third_party/tcmalloc/chromium/src/tests/malloc_extension_test.cc @@ -43,32 +43,32 @@ int main(int argc, char** argv) { void* a = malloc(1000); size_t cxx_bytes_used, c_bytes_used; - CHECK(MallocExtension::instance()->GetNumericProperty( - "generic.current_allocated_bytes", &cxx_bytes_used)); - CHECK(MallocExtension_GetNumericProperty( - "generic.current_allocated_bytes", &c_bytes_used)); - CHECK_GT(cxx_bytes_used, 1000); - CHECK_EQ(cxx_bytes_used, c_bytes_used); + ASSERT_TRUE(MallocExtension::instance()->GetNumericProperty( + "generic.current_allocated_bytes", &cxx_bytes_used)); + ASSERT_TRUE(MallocExtension_GetNumericProperty( + "generic.current_allocated_bytes", &c_bytes_used)); + ASSERT_GT(cxx_bytes_used, 1000); + ASSERT_EQ(cxx_bytes_used, c_bytes_used); - CHECK(MallocExtension::instance()->VerifyAllMemory()); - CHECK(MallocExtension_VerifyAllMemory()); + ASSERT_TRUE(MallocExtension::instance()->VerifyAllMemory()); + ASSERT_TRUE(MallocExtension_VerifyAllMemory()); - CHECK_GE(MallocExtension::instance()->GetAllocatedSize(a), 1000); + ASSERT_GE(MallocExtension::instance()->GetAllocatedSize(a), 1000); // This is just a sanity check. If we allocated too much, tcmalloc is broken - CHECK_LE(MallocExtension::instance()->GetAllocatedSize(a), 5000); - CHECK_GE(MallocExtension::instance()->GetEstimatedAllocatedSize(1000), 1000); + ASSERT_LE(MallocExtension::instance()->GetAllocatedSize(a), 5000); + ASSERT_GE(MallocExtension::instance()->GetEstimatedAllocatedSize(1000), 1000); for (int i = 0; i < 10; ++i) { void *p = malloc(i); - CHECK_GE(MallocExtension::instance()->GetAllocatedSize(p), + ASSERT_GE(MallocExtension::instance()->GetAllocatedSize(p), MallocExtension::instance()->GetEstimatedAllocatedSize(i)); free(p); } // Check the c-shim version too. - CHECK_GE(MallocExtension_GetAllocatedSize(a), 1000); - CHECK_LE(MallocExtension_GetAllocatedSize(a), 5000); - CHECK_GE(MallocExtension_GetEstimatedAllocatedSize(1000), 1000); + ASSERT_GE(MallocExtension_GetAllocatedSize(a), 1000); + ASSERT_LE(MallocExtension_GetAllocatedSize(a), 5000); + ASSERT_GE(MallocExtension_GetEstimatedAllocatedSize(1000), 1000); free(a); diff --git a/third_party/tcmalloc/chromium/src/tests/pagemap_unittest.cc b/third_party/tcmalloc/chromium/src/tests/pagemap_unittest.cc index dcf6c9a..83e76e2 100644 --- a/third_party/tcmalloc/chromium/src/tests/pagemap_unittest.cc +++ b/third_party/tcmalloc/chromium/src/tests/pagemap_unittest.cc @@ -113,6 +113,53 @@ void TestMap(int limit, bool limit_is_below_the_overflow_boundary) { } } +// REQUIRES: BITS==10, i.e., valid range is [0,1023]. +// Representations for different types will end up being: +// PageMap1: array[1024] +// PageMap2: array[32][32] +// PageMap3: array[16][16][4] +template <class Type> +void TestNext(const char* name) { + RAW_LOG(ERROR, "Running NextTest %s\n", name); + Type map(malloc); + char a, b, c, d, e; + + // When map is empty + CHECK(map.Next(0) == NULL); + CHECK(map.Next(5) == NULL); + CHECK(map.Next(1<<30) == NULL); + + // Add a single value + map.Ensure(40, 1); + map.set(40, &a); + CHECK(map.Next(0) == &a); + CHECK(map.Next(39) == &a); + CHECK(map.Next(40) == &a); + CHECK(map.Next(41) == NULL); + CHECK(map.Next(1<<30) == NULL); + + // Add a few values + map.Ensure(41, 1); + map.Ensure(100, 3); + map.set(41, &b); + map.set(100, &c); + map.set(101, &d); + map.set(102, &e); + CHECK(map.Next(0) == &a); + CHECK(map.Next(39) == &a); + CHECK(map.Next(40) == &a); + CHECK(map.Next(41) == &b); + CHECK(map.Next(42) == &c); + CHECK(map.Next(63) == &c); + CHECK(map.Next(64) == &c); + CHECK(map.Next(65) == &c); + CHECK(map.Next(99) == &c); + CHECK(map.Next(100) == &c); + CHECK(map.Next(101) == &d); + CHECK(map.Next(102) == &e); + CHECK(map.Next(103) == NULL); +} + int main(int argc, char** argv) { TestMap< TCMalloc_PageMap1<10> > (100, true); TestMap< TCMalloc_PageMap1<10> > (1 << 10, false); @@ -121,6 +168,10 @@ int main(int argc, char** argv) { TestMap< TCMalloc_PageMap3<20> > (100, true); TestMap< TCMalloc_PageMap3<20> > (1 << 20, false); + TestNext< TCMalloc_PageMap1<10> >("PageMap1"); + TestNext< TCMalloc_PageMap2<10> >("PageMap2"); + TestNext< TCMalloc_PageMap3<10> >("PageMap3"); + printf("PASS\n"); return 0; } diff --git a/third_party/tcmalloc/chromium/src/tests/profile-handler_unittest.cc b/third_party/tcmalloc/chromium/src/tests/profile-handler_unittest.cc index 4b247c7..84e035c 100644 --- a/third_party/tcmalloc/chromium/src/tests/profile-handler_unittest.cc +++ b/third_party/tcmalloc/chromium/src/tests/profile-handler_unittest.cc @@ -8,8 +8,9 @@ #include "profile-handler.h" #include <assert.h> -#include <sys/time.h> #include <pthread.h> +#include <sys/time.h> +#include <time.h> #include "base/logging.h" #include "base/simple_mutex.h" @@ -46,19 +47,41 @@ class Thread { bool joinable_; }; -// Sleep interval in usecs. To ensure a SIGPROF timer interrupt under heavy -// load, this is set to a 20x of ProfileHandler timer interval (i.e 100Hz) -// TODO(nabeelmian) Under very heavy loads, the worker thread may not accumulate -// enough cpu usage to get a profile tick. -int kSleepInterval = 200000; +// Sleep interval in nano secs. ITIMER_PROF goes off only afer the specified CPU +// time is consumed. Under heavy load this process may no get scheduled in a +// timely fashion. Therefore, give enough time (20x of ProfileHandle timer +// interval 10ms (100Hz)) for this process to accumulate enought CPU time to get +// a profile tick. +int kSleepInterval = 200000000; + +// Sleep interval in nano secs. To ensure that if the timer has expired it is +// reset. +int kTimerResetInterval = 5000000; // Whether each thread has separate timers. static bool timer_separate_ = false; +static int timer_type_ = ITIMER_PROF; +static int signal_number_ = SIGPROF; + +// Delays processing by the specified number of nano seconds. 'delay_ns' +// must be less than the number of nano seconds in a second (1000000000). +void Delay(int delay_ns) { + static const int kNumNSecInSecond = 1000000000; + EXPECT_LT(delay_ns, kNumNSecInSecond); + struct timespec delay = { 0, delay_ns }; + nanosleep(&delay, 0); +} // Checks whether the profile timer is enabled for the current thread. bool IsTimerEnabled() { itimerval current_timer; - EXPECT_EQ(0, getitimer(ITIMER_PROF, ¤t_timer)); + EXPECT_EQ(0, getitimer(timer_type_, ¤t_timer)); + if ((current_timer.it_value.tv_sec == 0) && + (current_timer.it_value.tv_usec != 0)) { + // May be the timer has expired. Sleep for a bit and check again. + Delay(kTimerResetInterval); + EXPECT_EQ(0, getitimer(timer_type_, ¤t_timer)); + } return (current_timer.it_value.tv_sec != 0 || current_timer.it_value.tv_usec != 0); } @@ -160,11 +183,15 @@ class ProfileHandlerTest { // Determines whether threads have separate timers. static void SetUpTestCase() { + timer_type_ = (getenv("CPUPROFILE_REALTIME") ? ITIMER_REAL : ITIMER_PROF); + signal_number_ = (getenv("CPUPROFILE_REALTIME") ? SIGALRM : SIGPROF); + timer_separate_ = threads_have_separate_timers(); + Delay(kTimerResetInterval); } - // Sets up the profile timers and SIGPROF handler in a known state. It does - // the following: + // Sets up the profile timers and SIGPROF/SIGALRM handler in a known state. + // It does the following: // 1. Unregisters all the callbacks, stops the timer (if shared) and // clears out timer_sharing state in the ProfileHandler. This clears // out any state left behind by the previous test or during module @@ -176,7 +203,7 @@ class ProfileHandlerTest { // Reset the state of ProfileHandler between each test. This unregisters // all callbacks, stops timer (if shared) and clears timer sharing state. ProfileHandlerReset(); - EXPECT_EQ(GetCallbackCount(), 0); + EXPECT_EQ(0, GetCallbackCount()); VerifyDisabled(); // ProfileHandler requires at least two threads to be registerd to determine // whether timers are shared. @@ -213,7 +240,7 @@ class ProfileHandlerTest { busy_worker_->Start(); // Wait for worker to start up and register with the ProfileHandler. // TODO(nabeelmian) This may not work under very heavy load. - usleep(kSleepInterval); + Delay(kSleepInterval); } // Stops the worker thread. @@ -223,10 +250,10 @@ class ProfileHandlerTest { delete busy_worker_; } - // Checks whether SIGPROF signal handler is enabled. + // Checks whether SIGPROF/SIGALRM signal handler is enabled. bool IsSignalEnabled() { struct sigaction sa; - CHECK_EQ(sigaction(SIGPROF, NULL, &sa), 0); + CHECK_EQ(sigaction(signal_number_, NULL, &sa), 0); return ((sa.sa_handler == SIG_IGN) || (sa.sa_handler == SIG_DFL)) ? false : true; } @@ -257,7 +284,7 @@ class ProfileHandlerTest { uint64 interrupts_before = GetInterruptCount(); // Sleep for a bit and check that tick counter is making progress. int old_tick_count = tick_counter; - usleep(kSleepInterval); + Delay(kSleepInterval); int new_tick_count = tick_counter; EXPECT_GT(new_tick_count, old_tick_count); uint64 interrupts_after = GetInterruptCount(); @@ -268,9 +295,9 @@ class ProfileHandlerTest { void VerifyUnregistration(const int& tick_counter) { // Sleep for a bit and check that tick counter is not making progress. int old_tick_count = tick_counter; - usleep(kSleepInterval); + Delay(kSleepInterval); int new_tick_count = tick_counter; - EXPECT_EQ(new_tick_count, old_tick_count); + EXPECT_EQ(old_tick_count, new_tick_count); // If no callbacks, signal handler and shared timer should be disabled. if (GetCallbackCount() == 0) { EXPECT_FALSE(IsSignalEnabled()); @@ -282,13 +309,13 @@ class ProfileHandlerTest { } } - // Verifies that the SIGPROF interrupt handler is disabled and the timer, - // if shared, is disabled. Expects the worker to be running. + // Verifies that the SIGPROF/SIGALRM interrupt handler is disabled and the + // timer, if shared, is disabled. Expects the worker to be running. void VerifyDisabled() { // Check that the signal handler is disabled. EXPECT_FALSE(IsSignalEnabled()); // Check that the callback count is 0. - EXPECT_EQ(GetCallbackCount(), 0); + EXPECT_EQ(0, GetCallbackCount()); // Check that the timer is disabled if shared, enabled otherwise. if (timer_separate_) { EXPECT_TRUE(IsTimerEnabled()); @@ -297,9 +324,25 @@ class ProfileHandlerTest { } // Verify that the ProfileHandler is not accumulating profile ticks. uint64 interrupts_before = GetInterruptCount(); - usleep(kSleepInterval); + Delay(kSleepInterval); uint64 interrupts_after = GetInterruptCount(); - EXPECT_EQ(interrupts_after, interrupts_before); + EXPECT_EQ(interrupts_before, interrupts_after); + } + + // Registers a callback and waits for kTimerResetInterval for timers to get + // reset. + ProfileHandlerToken* RegisterCallback(void* callback_arg) { + ProfileHandlerToken* token = ProfileHandlerRegisterCallback( + TickCounter, callback_arg); + Delay(kTimerResetInterval); + return token; + } + + // Unregisters a callback and waits for kTimerResetInterval for timers to get + // reset. + void UnregisterCallback(ProfileHandlerToken* token) { + ProfileHandlerUnregisterCallback(token); + Delay(kTimerResetInterval); } // Busy worker thread to accumulate cpu usage. @@ -336,10 +379,9 @@ class ProfileHandlerTest { // ProfileHandlerUnregisterCallback. TEST_F(ProfileHandlerTest, RegisterUnregisterCallback) { int tick_count = 0; - ProfileHandlerToken* token = ProfileHandlerRegisterCallback( - TickCounter, &tick_count); + ProfileHandlerToken* token = RegisterCallback(&tick_count); VerifyRegistration(tick_count); - ProfileHandlerUnregisterCallback(token); + UnregisterCallback(token); VerifyUnregistration(tick_count); } @@ -347,31 +389,29 @@ TEST_F(ProfileHandlerTest, RegisterUnregisterCallback) { TEST_F(ProfileHandlerTest, MultipleCallbacks) { // Register first callback. int first_tick_count; - ProfileHandlerToken* token1 = ProfileHandlerRegisterCallback( - TickCounter, &first_tick_count); + ProfileHandlerToken* token1 = RegisterCallback(&first_tick_count); // Check that callback was registered correctly. VerifyRegistration(first_tick_count); - EXPECT_EQ(GetCallbackCount(), 1); + EXPECT_EQ(1, GetCallbackCount()); // Register second callback. int second_tick_count; - ProfileHandlerToken* token2 = ProfileHandlerRegisterCallback( - TickCounter, &second_tick_count); + ProfileHandlerToken* token2 = RegisterCallback(&second_tick_count); // Check that callback was registered correctly. VerifyRegistration(second_tick_count); - EXPECT_EQ(GetCallbackCount(), 2); + EXPECT_EQ(2, GetCallbackCount()); // Unregister first callback. - ProfileHandlerUnregisterCallback(token1); + UnregisterCallback(token1); VerifyUnregistration(first_tick_count); - EXPECT_EQ(GetCallbackCount(), 1); + EXPECT_EQ(1, GetCallbackCount()); // Verify that second callback is still registered. VerifyRegistration(second_tick_count); // Unregister second callback. - ProfileHandlerUnregisterCallback(token2); + UnregisterCallback(token2); VerifyUnregistration(second_tick_count); - EXPECT_EQ(GetCallbackCount(), 0); + EXPECT_EQ(0, GetCallbackCount()); // Verify that the signal handler and timers are correctly disabled. VerifyDisabled(); @@ -382,15 +422,15 @@ TEST_F(ProfileHandlerTest, Reset) { // Verify that the profile timer interrupt is disabled. VerifyDisabled(); int first_tick_count; - ProfileHandlerRegisterCallback(TickCounter, &first_tick_count); + RegisterCallback(&first_tick_count); VerifyRegistration(first_tick_count); - EXPECT_EQ(GetCallbackCount(), 1); + EXPECT_EQ(1, GetCallbackCount()); // Register second callback. int second_tick_count; - ProfileHandlerRegisterCallback(TickCounter, &second_tick_count); + RegisterCallback(&second_tick_count); VerifyRegistration(second_tick_count); - EXPECT_EQ(GetCallbackCount(), 2); + EXPECT_EQ(2, GetCallbackCount()); // Reset the profile handler and verify that callback were correctly // unregistered and timer/signal are disabled. @@ -409,7 +449,7 @@ TEST_F(ProfileHandlerTest, RegisterCallbackBeforeThread) { // the signal handler and reset the timer sharing state in the Profile // Handler. ProfileHandlerReset(); - EXPECT_EQ(GetCallbackCount(), 0); + EXPECT_EQ(0, GetCallbackCount()); VerifyDisabled(); // Start the worker. At this time ProfileHandler doesn't know if timers are @@ -417,14 +457,14 @@ TEST_F(ProfileHandlerTest, RegisterCallbackBeforeThread) { StartWorker(); // Register a callback and check that profile ticks are being delivered. int tick_count; - ProfileHandlerRegisterCallback(TickCounter, &tick_count); - EXPECT_EQ(GetCallbackCount(), 1); + RegisterCallback(&tick_count); + EXPECT_EQ(1, GetCallbackCount()); VerifyRegistration(tick_count); // Register a second thread and verify that timer and signal handler are // correctly enabled. RegisterThread(); - EXPECT_EQ(GetCallbackCount(), 1); + EXPECT_EQ(1, GetCallbackCount()); EXPECT_TRUE(IsTimerEnabled()); EXPECT_TRUE(IsSignalEnabled()); } diff --git a/third_party/tcmalloc/chromium/src/tests/profiler_unittest.sh b/third_party/tcmalloc/chromium/src/tests/profiler_unittest.sh index 73be680..5766f2e 100644 --- a/third_party/tcmalloc/chromium/src/tests/profiler_unittest.sh +++ b/third_party/tcmalloc/chromium/src/tests/profiler_unittest.sh @@ -241,10 +241,16 @@ VerifyIdentical p12 "$PROFILER1_REALNAME" p13 "" || RegisterFailure >"$TMPDIR/p15" 2>/dev/null || RegisterFailure VerifyIdentical p14 "$PROFILER3_REALNAME" p15 "" || RegisterFailure +# Test using ITIMER_REAL instead of ITIMER_PROF. +env CPUPROFILE_REALTIME=1 "$PROFILER3" 5 2 "$TMPDIR/p16" || RegisterFailure +env CPUPROFILE_REALTIME=1 "$PROFILER3" 10 2 "$TMPDIR/p17" || RegisterFailure +VerifySimilar p16 "$PROFILER3_REALNAME" p17 "$PROFILER3_REALNAME" 2 + + # Make sure that when we have a process with a fork, the profiles don't # clobber each other -CPUPROFILE="$TMPDIR/p6" "$PROFILER1" 1 -2 || RegisterFailure -n=`ls $TMPDIR/p6* | wc -l` +CPUPROFILE="$TMPDIR/pfork" "$PROFILER1" 1 -2 || RegisterFailure +n=`ls $TMPDIR/pfork* | wc -l` if [ $n != 3 ]; then echo "FORK test FAILED: expected 3 profiles (for main + 2 children), found $n" num_failures=`expr $num_failures + 1` diff --git a/third_party/tcmalloc/chromium/src/tests/sampling_test.sh b/third_party/tcmalloc/chromium/src/tests/sampling_test.sh index 149d27b..8c96bc1 100644 --- a/third_party/tcmalloc/chromium/src/tests/sampling_test.sh +++ b/third_party/tcmalloc/chromium/src/tests/sampling_test.sh @@ -62,10 +62,10 @@ if ! cat "$SAMPLING_TEST_BINARY" >/dev/null 2>&1; then SAMPLING_TEST_BINARY="$SAMPLING_TEST_BINARY".exe fi -die() { - echo "FAILED" - echo "reason:" - echo "$@" +die() { # runs the command given as arguments, and then dies. + echo "FAILED. Output from $@" + echo "----" + "$@" echo "----" exit 1 } @@ -79,16 +79,16 @@ mkdir "$OUTDIR" || die "Unable to create $OUTDIR" # 50M to 99M. "$SAMPLING_TEST" "$OUTDIR/out" -echo -n "Testing heap output..." +echo "Testing heap output..." "$PPROF" --text "$SAMPLING_TEST_BINARY" "$OUTDIR/out.heap" \ | grep '^ *[5-9][0-9]\.[0-9][ 0-9.%]*_*AllocateAllocate' >/dev/null \ - || die `"$PPROF" --text "$SAMPLING_TEST_BINARY" "$OUTDIR/out.heap"` + || die "$PPROF" --text "$SAMPLING_TEST_BINARY" "$OUTDIR/out.heap" echo "OK" -echo -n "Testing growth output..." +echo "Testing growth output..." "$PPROF" --text "$SAMPLING_TEST_BINARY" "$OUTDIR/out.growth" \ | grep '^ *[5-9][0-9]\.[0-9][ 0-9.%]*_*AllocateAllocate' >/dev/null \ - || die `"$PPROF" --text "$SAMPLING_TEST_BINARY" "$OUTDIR/out.growth"` + || die "$PPROF" --text "$SAMPLING_TEST_BINARY" "$OUTDIR/out.growth" echo "OK" echo "PASS" diff --git a/third_party/tcmalloc/chromium/src/tests/tcmalloc_unittest.cc b/third_party/tcmalloc/chromium/src/tests/tcmalloc_unittest.cc index 713fbe1..25bfd6a 100644 --- a/third_party/tcmalloc/chromium/src/tests/tcmalloc_unittest.cc +++ b/third_party/tcmalloc/chromium/src/tests/tcmalloc_unittest.cc @@ -124,6 +124,9 @@ using std::vector; using std::string; +DECLARE_double(tcmalloc_release_rate); +DECLARE_int32(max_free_queue_size); // in debugallocation.cc + namespace testing { static const int FLAGS_numtests = 50000; @@ -747,6 +750,177 @@ static void TestHugeThreadCache() { delete[] array; } +namespace { + +struct RangeCallbackState { + uintptr_t ptr; + base::MallocRange::Type expected_type; + size_t min_size; + bool matched; +}; + +static void RangeCallback(void* arg, const base::MallocRange* r) { + RangeCallbackState* state = reinterpret_cast<RangeCallbackState*>(arg); + if (state->ptr >= r->address && + state->ptr < r->address + r->length) { + CHECK_EQ(r->type, state->expected_type); + CHECK_GE(r->length, state->min_size); + state->matched = true; + } +} + +// Check that at least one of the callbacks from Ranges() contains +// the specified address with the specified type, and has size +// >= min_size. +static void CheckRangeCallback(void* ptr, base::MallocRange::Type type, + size_t min_size) { + RangeCallbackState state; + state.ptr = reinterpret_cast<uintptr_t>(ptr); + state.expected_type = type; + state.min_size = min_size; + state.matched = false; + MallocExtension::instance()->Ranges(&state, RangeCallback); + CHECK(state.matched); +} + +} + +static void TestRanges() { + static const int MB = 1048576; + void* a = malloc(MB); + void* b = malloc(MB); + CheckRangeCallback(a, base::MallocRange::INUSE, MB); + CheckRangeCallback(b, base::MallocRange::INUSE, MB); + free(a); + CheckRangeCallback(a, base::MallocRange::FREE, MB); + CheckRangeCallback(b, base::MallocRange::INUSE, MB); + MallocExtension::instance()->ReleaseFreeMemory(); + CheckRangeCallback(a, base::MallocRange::UNMAPPED, MB); + CheckRangeCallback(b, base::MallocRange::INUSE, MB); + free(b); + CheckRangeCallback(a, base::MallocRange::UNMAPPED, MB); + CheckRangeCallback(b, base::MallocRange::FREE, MB); +} + +#ifndef DEBUGALLOCATION +static size_t GetUnmappedBytes() { + size_t bytes; + CHECK(MallocExtension::instance()->GetNumericProperty( + "tcmalloc.pageheap_unmapped_bytes", &bytes)); + return bytes; +} +#endif + +static void TestReleaseToSystem() { + // Debug allocation mode adds overhead to each allocation which + // messes up all the equality tests here. I just disable the + // teset in this mode. TODO(csilvers): get it to work for debugalloc? +#ifndef DEBUGALLOCATION + const double old_tcmalloc_release_rate = FLAGS_tcmalloc_release_rate; + FLAGS_tcmalloc_release_rate = 0; + + static const int MB = 1048576; + void* a = malloc(MB); + void* b = malloc(MB); + MallocExtension::instance()->ReleaseFreeMemory(); + size_t starting_bytes = GetUnmappedBytes(); + + // Calling ReleaseFreeMemory() a second time shouldn't do anything. + MallocExtension::instance()->ReleaseFreeMemory(); + EXPECT_EQ(starting_bytes, GetUnmappedBytes()); + + // ReleaseToSystem shouldn't do anything either. + MallocExtension::instance()->ReleaseToSystem(MB); + EXPECT_EQ(starting_bytes, GetUnmappedBytes()); + + free(a); + + // The span to release should be 1MB. + MallocExtension::instance()->ReleaseToSystem(MB/2); + EXPECT_EQ(starting_bytes + MB, GetUnmappedBytes()); + + // Should do nothing since the previous call released too much. + MallocExtension::instance()->ReleaseToSystem(MB/4); + EXPECT_EQ(starting_bytes + MB, GetUnmappedBytes()); + + free(b); + + // Use up the extra MB/4 bytes from 'a' and also release 'b'. + MallocExtension::instance()->ReleaseToSystem(MB/2); + EXPECT_EQ(starting_bytes + 2*MB, GetUnmappedBytes()); + + // Should do nothing since the previous call released too much. + MallocExtension::instance()->ReleaseToSystem(MB/2); + EXPECT_EQ(starting_bytes + 2*MB, GetUnmappedBytes()); + + // Nothing else to release. + MallocExtension::instance()->ReleaseFreeMemory(); + EXPECT_EQ(starting_bytes + 2*MB, GetUnmappedBytes()); + + a = malloc(MB); + free(a); + EXPECT_EQ(starting_bytes + MB, GetUnmappedBytes()); + + // Releasing less than a page should still trigger a release. + MallocExtension::instance()->ReleaseToSystem(1); + EXPECT_EQ(starting_bytes + 2*MB, GetUnmappedBytes()); + + FLAGS_tcmalloc_release_rate = old_tcmalloc_release_rate; +#endif // #ifndef DEBUGALLOCATION +} + +bool g_no_memory = false; +std::new_handler g_old_handler = NULL; +static void OnNoMemory() { + g_no_memory = true; + std::set_new_handler(g_old_handler); +} + +static void TestSetNewMode() { + int old_mode = tc_set_new_mode(1); + + // DebugAllocation will try to catch huge allocations. We need to avoid this + // by requesting a smaller malloc block, that still can't be satisfied. + const size_t kHugeRequest = kTooBig - 1024; + + g_old_handler = std::set_new_handler(&OnNoMemory); + g_no_memory = false; + void* ret = malloc(kHugeRequest); + EXPECT_EQ(NULL, ret); + EXPECT_TRUE(g_no_memory); + + g_old_handler = std::set_new_handler(&OnNoMemory); + g_no_memory = false; + ret = calloc(1, kHugeRequest); + EXPECT_EQ(NULL, ret); + EXPECT_TRUE(g_no_memory); + + g_old_handler = std::set_new_handler(&OnNoMemory); + g_no_memory = false; + ret = realloc(NULL, kHugeRequest); + EXPECT_EQ(NULL, ret); + EXPECT_TRUE(g_no_memory); + + // Not really important, but must be small enough such that kAlignment + + // kHugeRequest does not overflow. + const int kAlignment = 1 << 5; + + g_old_handler = std::set_new_handler(&OnNoMemory); + g_no_memory = false; + ret = memalign(kAlignment, kHugeRequest); + EXPECT_EQ(NULL, ret); + EXPECT_TRUE(g_no_memory); + + g_old_handler = std::set_new_handler(&OnNoMemory); + g_no_memory = false; + EXPECT_EQ(ENOMEM, + posix_memalign(&ret, kAlignment, kHugeRequest)); + EXPECT_EQ(NULL, ret); + EXPECT_TRUE(g_no_memory); + + tc_set_new_mode(old_mode); +} + static int RunAllTests(int argc, char** argv) { // Optional argv[1] is the seed AllocatorState rnd(argc > 1 ? atoi(argv[1]) : 100); @@ -1023,6 +1197,9 @@ static int RunAllTests(int argc, char** argv) { #endif TestHugeThreadCache(); + TestRanges(); + TestReleaseToSystem(); + TestSetNewMode(); return 0; } @@ -1032,6 +1209,10 @@ static int RunAllTests(int argc, char** argv) { using testing::RunAllTests; int main(int argc, char** argv) { +#ifdef DEBUGALLOCATION // debug allocation takes forever for huge allocs + FLAGS_max_free_queue_size = 0; // return freed blocks to tcmalloc immediately +#endif + RunAllTests(argc, argv); // Test tc_version() diff --git a/third_party/tcmalloc/chromium/src/thread_cache.cc b/third_party/tcmalloc/chromium/src/thread_cache.cc index fd44a70..64f4deb 100644 --- a/third_party/tcmalloc/chromium/src/thread_cache.cc +++ b/third_party/tcmalloc/chromium/src/thread_cache.cc @@ -299,12 +299,6 @@ int ThreadCache::GetSamplePeriod() { } void ThreadCache::InitModule() { - // There is a slight potential race here because of double-checked - // locking idiom. However, as long as the program does a small - // allocation before switching to multi-threaded mode, we will be - // fine. We increase the chances of doing such a small allocation - // by doing one in the constructor of the module_enter_exit_hook - // object declared below. SpinLockHolder h(Static::pageheap_lock()); if (!phinited) { Static::InitStaticVars(); diff --git a/third_party/tcmalloc/chromium/src/windows/config.h b/third_party/tcmalloc/chromium/src/windows/config.h index b3a6852..99de82c 100644 --- a/third_party/tcmalloc/chromium/src/windows/config.h +++ b/third_party/tcmalloc/chromium/src/windows/config.h @@ -175,6 +175,9 @@ /* define if your compiler has __attribute__ */ #undef HAVE___ATTRIBUTE__ +/* Define to 1 if compiler supports __environ */ +#undef HAVE___ENVIRON + /* Define to 1 if the system has the type `__int64'. */ #define HAVE___INT64 1 diff --git a/third_party/tcmalloc/chromium/src/windows/google/tcmalloc.h b/third_party/tcmalloc/chromium/src/windows/google/tcmalloc.h index 15a2e19..4b97b15 100644 --- a/third_party/tcmalloc/chromium/src/windows/google/tcmalloc.h +++ b/third_party/tcmalloc/chromium/src/windows/google/tcmalloc.h @@ -90,6 +90,7 @@ extern "C" { #endif #ifdef __cplusplus + PERFTOOLS_DLL_DECL int tc_set_new_mode(int flag) __THROW; PERFTOOLS_DLL_DECL void* tc_new(size_t size); PERFTOOLS_DLL_DECL void tc_delete(void* p) __THROW; PERFTOOLS_DLL_DECL void* tc_newarray(size_t size); diff --git a/third_party/tcmalloc/chromium/src/windows/patch_functions.cc b/third_party/tcmalloc/chromium/src/windows/patch_functions.cc index d0e54ff..c1ed37f 100644 --- a/third_party/tcmalloc/chromium/src/windows/patch_functions.cc +++ b/third_party/tcmalloc/chromium/src/windows/patch_functions.cc @@ -72,9 +72,21 @@ #error This file is intended for patching allocators - use override_functions.cc instead. #endif +// We use psapi. Non-MSVC systems will have to link this in themselves. +#ifdef _MSC_VER +#pragma comment(lib, "Psapi.lib") +#endif + +// Make sure we always use the 'old' names of the psapi functions. +#ifndef PSAPI_VERSION +#define PSAPI_VERSION 1 +#endif + #include <windows.h> #include <malloc.h> // for _msize and _expand -#include <tlhelp32.h> // for CreateToolhelp32Snapshot() +#include <Psapi.h> // for EnumProcessModules, GetModuleInformation, etc. +#include <set> +#include <map> #include <vector> #include <base/logging.h> #include "base/spinlock.h" @@ -82,10 +94,10 @@ #include "malloc_hook-inl.h" #include "preamble_patcher.h" -// MinGW doesn't seem to define this, perhaps some windowsen don't either. -#ifndef TH32CS_SNAPMODULE32 -#define TH32CS_SNAPMODULE32 0 -#endif +// The maximum number of modules we allow to be in one executable +const int kMaxModules = 8182; +// The maximum size of a module's basename +const int kMaxModuleNameSize = 256; // These are hard-coded, unfortunately. :-( They are also probably // compiler specific. See get_mangled_names.cc, in this directory, @@ -117,6 +129,8 @@ typedef void (*GenericFnPtr)(); using sidestep::PreamblePatcher; +struct ModuleEntryCopy; // defined below + // These functions are how we override the memory allocation // functions, just like tcmalloc.cc and malloc_hook.cc do. @@ -131,33 +145,19 @@ class LibcInfo { LibcInfo() { memset(this, 0, sizeof(*this)); // easiest way to initialize the array } - bool SameAs(const LibcInfo& that) const { - return (is_valid() && - module_base_address_ == that.module_base_address_ && - module_base_size_ == that.module_base_size_); - } - bool SameAsME32(const MODULEENTRY32& me32) const { - return (is_valid() && - module_base_address_ == me32.modBaseAddr && - module_base_size_ == me32.modBaseSize); - } + bool SameAs(const LibcInfo& that) const; + bool SameAsModuleEntry(const ModuleEntryCopy& module_entry) const; + bool patched() const { return is_valid() && module_name_[0] != '\0'; } const char* module_name() const { return is_valid() ? module_name_ : ""; } void set_is_valid(bool b) { is_valid_ = b; } - // These shouldn't have to be public, since only subclasses of - // LibcInfo need it, but they do. Maybe something to do with - // templates. Shrug. - bool is_valid() const { return is_valid_; } - GenericFnPtr windows_fn(int ifunction) const { - return windows_fn_[ifunction]; - } - // Populates all the windows_fn_[] vars based on our module info. // Returns false if windows_fn_ is all NULL's, because there's - // nothing to patch. Also populates the me32 info. - bool PopulateWindowsFn(const MODULEENTRY32& me32); + // nothing to patch. Also populates the rest of the module_entry + // info, such as the module's name. + bool PopulateWindowsFn(const ModuleEntryCopy& module_entry); protected: void CopyFrom(const LibcInfo& that) { @@ -207,7 +207,25 @@ class LibcInfo { const void *module_base_address_; size_t module_base_size_; - char module_name_[MAX_MODULE_NAME32 + 1]; + char module_name_[kMaxModuleNameSize]; + + public: + // These shouldn't have to be public, since only subclasses of + // LibcInfo need it, but they do. Maybe something to do with + // templates. Shrug. I hide them down here so users won't see + // them. :-) (OK, I also need to define ctrgProcAddress late.) + bool is_valid() const { return is_valid_; } + GenericFnPtr windows_fn(int ifunction) const { + return windows_fn_[ifunction]; + } + // These three are needed by ModuleEntryCopy. + static const int ctrgProcAddress = kNumFunctions; + static GenericFnPtr static_fn(int ifunction) { + return static_fn_[ifunction]; + } + static const char* const function_name(int ifunction) { + return function_name_[ifunction]; + } }; // Template trickiness: logically, a LibcInfo would include @@ -265,6 +283,43 @@ template<int> class LibcInfoWithPatchFunctions : public LibcInfo { // But they seem pretty obscure, and I'm fine not overriding them for now. }; +// This is a subset of MODDULEENTRY32, that we need for patching. +struct ModuleEntryCopy { + LPVOID modBaseAddr; + DWORD modBaseSize; + HMODULE hModule; + TCHAR szModule[kMaxModuleNameSize]; + // This is not part of MODDULEENTRY32, but is needed to avoid making + // windows syscalls while we're holding patch_all_modules_lock (see + // lock-inversion comments at patch_all_modules_lock definition, below). + GenericFnPtr rgProcAddresses[LibcInfo::ctrgProcAddress]; + + ModuleEntryCopy() { + modBaseAddr = NULL; + modBaseSize = 0; + hModule = NULL; + strcpy(szModule, "<executable>"); + for (int i = 0; i < sizeof(rgProcAddresses)/sizeof(*rgProcAddresses); i++) + rgProcAddresses[i] = LibcInfo::static_fn(i); + } + ModuleEntryCopy(HANDLE hprocess, HMODULE hmodule, const MODULEINFO& mi) { + this->modBaseAddr = mi.lpBaseOfDll; + this->modBaseSize = mi.SizeOfImage; + this->hModule = hmodule; + // TODO(csilvers): we could make more efficient by calling these + // lazily (not until the vars are needed, which is often never). + // However, there's tricky business with calling windows functions + // inside the patch_all_modules_lock (see the lock inversion + // comments with the patch_all_modules_lock definition, below), so + // it's safest to do it all here, where no lock is needed. + ::GetModuleBaseNameA(hprocess, hmodule, + this->szModule, sizeof(this->szModule)); + for (int i = 0; i < sizeof(rgProcAddresses)/sizeof(*rgProcAddresses); i++) + rgProcAddresses[i] = + (GenericFnPtr)::GetProcAddress(hModule, LibcInfo::function_name(i)); + } +}; + // This class is easier because there's only one of them. class WindowsInfo { public: @@ -318,6 +373,11 @@ class WindowsInfo { // If you run out, just add a few more to the array. You'll also need // to update the switch statement in PatchOneModule(), and the list in // UnpatchWindowsFunctions(). +// main_executable and main_executable_windows are two windows into +// the same executable. One is responsible for patching the libc +// routines that live in the main executable (if any) to use tcmalloc; +// the other is responsible for patching the windows routines like +// HeapAlloc/etc to use tcmalloc. static LibcInfoWithPatchFunctions<0> main_executable; static LibcInfoWithPatchFunctions<1> libc1; static LibcInfoWithPatchFunctions<2> libc2; @@ -327,7 +387,7 @@ static LibcInfoWithPatchFunctions<5> libc5; static LibcInfoWithPatchFunctions<6> libc6; static LibcInfoWithPatchFunctions<7> libc7; static LibcInfoWithPatchFunctions<8> libc8; -static LibcInfo* module_libcs[] = { +static LibcInfo* g_module_libcs[] = { &libc1, &libc2, &libc3, &libc4, &libc5, &libc6, &libc7, &libc8 }; static WindowsInfo main_executable_windows; @@ -419,24 +479,28 @@ const GenericFnPtr LibcInfoWithPatchFunctions<T>::perftools_fn_[] = { { "FreeLibrary", NULL, NULL, (GenericFnPtr)&Perftools_FreeLibrary }, }; -bool LibcInfo::PopulateWindowsFn(const MODULEENTRY32& me32) { - module_base_address_ = me32.modBaseAddr; - module_base_size_ = me32.modBaseSize; - strcpy(module_name_, me32.szModule); +bool LibcInfo::SameAs(const LibcInfo& that) const { + return (is_valid() && + module_base_address_ == that.module_base_address_ && + module_base_size_ == that.module_base_size_); +} + +bool LibcInfo::SameAsModuleEntry(const ModuleEntryCopy& module_entry) const { + return (is_valid() && + module_base_address_ == module_entry.modBaseAddr && + module_base_size_ == module_entry.modBaseSize); +} +bool LibcInfo::PopulateWindowsFn(const ModuleEntryCopy& module_entry) { // First, store the location of the function to patch before // patching it. If none of these functions are found in the module, // then this module has no libc in it, and we just return false. for (int i = 0; i < kNumFunctions; i++) { if (!function_name_[i]) // we can turn off patching by unsetting name continue; - GenericFnPtr fn = NULL; - if (me32.hModule == NULL) { // used for the main executable - // This is used only for a statically-linked-in libc. - fn = static_fn_[i]; - } else { - fn = (GenericFnPtr)::GetProcAddress(me32.hModule, function_name_[i]); - } + // The ::GetProcAddress calls were done in the ModuleEntryCopy + // constructor, so we don't have to make any windows calls here. + const GenericFnPtr fn = module_entry.rgProcAddresses[i]; if (fn) { windows_fn_[i] = PreamblePatcher::ResolveTarget(fn); } @@ -462,9 +526,9 @@ bool LibcInfo::PopulateWindowsFn(const MODULEENTRY32& me32) { // need to set our windows_fn to NULL, to avoid double-patching. for (int ifn = 0; ifn < kNumFunctions; ifn++) { for (int imod = 0; - imod < sizeof(module_libcs)/sizeof(*module_libcs); imod++) { - if (module_libcs[imod]->is_valid() && - this->windows_fn(ifn) == module_libcs[imod]->windows_fn(ifn)) { + imod < sizeof(g_module_libcs)/sizeof(*g_module_libcs); imod++) { + if (g_module_libcs[imod]->is_valid() && + this->windows_fn(ifn) == g_module_libcs[imod]->windows_fn(ifn)) { windows_fn_[ifn] = NULL; } } @@ -487,17 +551,33 @@ bool LibcInfo::PopulateWindowsFn(const MODULEENTRY32& me32) { // haven't needed to yet. CHECK(windows_fn_[kFree]); CHECK(windows_fn_[kRealloc]); + + // OK, we successfully patched. Let's store our member information. + module_base_address_ = module_entry.modBaseAddr; + module_base_size_ = module_entry.modBaseSize; + strcpy(module_name_, module_entry.szModule); return true; } template<int T> bool LibcInfoWithPatchFunctions<T>::Patch(const LibcInfo& me_info) { - CopyFrom(me_info); // copies the me32 and the windows_fn_ array + CopyFrom(me_info); // copies the module_entry and the windows_fn_ array for (int i = 0; i < kNumFunctions; i++) { - if (windows_fn_[i] && windows_fn_[i] != perftools_fn_[i]) + if (windows_fn_[i] && windows_fn_[i] != perftools_fn_[i]) { + // if origstub_fn_ is not NULL, it's left around from a previous + // patch. We need to set it to NULL for the new Patch call. + // Since we've patched Unpatch() not to delete origstub_fn_ (it + // causes problems in some contexts, though obviously not this + // one), we should delete it now, before setting it to NULL. + // NOTE: casting from a function to a pointer is contra the C++ + // spec. It's not safe on IA64, but is on i386. We use + // a C-style cast here to emphasize this is not legal C++. + delete[] (char*)(origstub_fn_[i]); + origstub_fn_[i] = NULL; // Patch() will fill this in CHECK_EQ(sidestep::SIDESTEP_SUCCESS, PreamblePatcher::Patch(windows_fn_[i], perftools_fn_[i], &origstub_fn_[i])); + } } set_is_valid(true); return true; @@ -526,6 +606,16 @@ void WindowsInfo::Patch() { for (int i = 0; i < kNumFunctions; i++) { function_info_[i].windows_fn = (GenericFnPtr) ::GetProcAddress(hkernel32, function_info_[i].name); + // If origstub_fn is not NULL, it's left around from a previous + // patch. We need to set it to NULL for the new Patch call. + // Since we've patched Unpatch() not to delete origstub_fn_ (it + // causes problems in some contexts, though obviously not this + // one), we should delete it now, before setting it to NULL. + // NOTE: casting from a function to a pointer is contra the C++ + // spec. It's not safe on IA64, but is on i386. We use + // a C-style cast here to emphasize this is not legal C++. + delete[] (char*)(function_info_[i].origstub_fn); + function_info_[i].origstub_fn = NULL; // Patch() will fill this in CHECK_EQ(sidestep::SIDESTEP_SUCCESS, PreamblePatcher::Patch(function_info_[i].windows_fn, function_info_[i].perftools_fn, @@ -547,10 +637,10 @@ void WindowsInfo::Unpatch() { // You should hold the patch_all_modules_lock when calling this. void PatchOneModuleLocked(const LibcInfo& me_info) { // Double-check we haven't seen this module before. - for (int i = 0; i < sizeof(module_libcs)/sizeof(*module_libcs); i++) { - if (module_libcs[i]->SameAs(me_info)) { - fprintf(stderr, "%s:%d: FATAL ERROR: %s double-patched somehow.\n", - __FILE__, __LINE__, module_libcs[i]->module_name()); + for (int i = 0; i < sizeof(g_module_libcs)/sizeof(*g_module_libcs); i++) { + if (g_module_libcs[i]->SameAs(me_info)) { + fprintf(stderr, "%s:%d: FATAL PERFTOOLS ERROR: %s double-patched somehow.\n", + __FILE__, __LINE__, g_module_libcs[i]->module_name()); CHECK(false); } } @@ -558,8 +648,8 @@ void PatchOneModuleLocked(const LibcInfo& me_info) { // is where we're sad that each libcX has a different type, so we // can't use an array; instead, we have to use a switch statement. // Patch() returns false if there were no libc functions in the module. - for (int i = 0; i < sizeof(module_libcs)/sizeof(*module_libcs); i++) { - if (!module_libcs[i]->is_valid()) { // found an empty spot to add! + for (int i = 0; i < sizeof(g_module_libcs)/sizeof(*g_module_libcs); i++) { + if (!g_module_libcs[i]->is_valid()) { // found an empty spot to add! switch (i) { case 0: libc1.Patch(me_info); return; case 1: libc2.Patch(me_info); return; @@ -572,88 +662,104 @@ void PatchOneModuleLocked(const LibcInfo& me_info) { } } } - printf("ERROR: Too many modules containing libc in this executable\n"); + printf("PERFTOOLS ERROR: Too many modules containing libc in this executable\n"); } void PatchMainExecutableLocked() { if (main_executable.patched()) return; // main executable has already been patched - MODULEENTRY32 fake_me32; // we make a fake one to pass into Patch() - fake_me32.modBaseAddr = NULL; - fake_me32.modBaseSize = 0; - strcpy(fake_me32.szModule, "<executable>"); - fake_me32.hModule = NULL; - main_executable.PopulateWindowsFn(fake_me32); + ModuleEntryCopy fake_module_entry; // make a fake one to pass into Patch() + // No need to call PopulateModuleEntryProcAddresses on the main executable. + main_executable.PopulateWindowsFn(fake_module_entry); main_executable.Patch(main_executable); } +// This lock is subject to a subtle and annoying lock inversion +// problem: it may interact badly with unknown internal windows locks. +// In particular, windows may be holding a lock when it calls +// LoadLibraryExW and FreeLibrary, which we've patched. We have those +// routines call PatchAllModules, which acquires this lock. If we +// make windows system calls while holding this lock, those system +// calls may need the internal windows locks that are being held in +// the call to LoadLibraryExW, resulting in deadlock. The solution is +// to be very careful not to call *any* windows routines while holding +// patch_all_modules_lock, inside PatchAllModules(). static SpinLock patch_all_modules_lock(SpinLock::LINKER_INITIALIZED); // Iterates over all the modules currently loaded by the executable, // and makes sure they're all patched. For ones that aren't, we patch // them in. We also check that every module we had patched in the // past is still loaded, and update internal data structures if so. -void PatchAllModules() { - std::vector<LibcInfo*> modules; - bool still_loaded[sizeof(module_libcs)/sizeof(*module_libcs)] = {}; - - HANDLE hModuleSnap = CreateToolhelp32Snapshot(TH32CS_SNAPMODULE | - TH32CS_SNAPMODULE32, - GetCurrentProcessId()); - if (hModuleSnap != INVALID_HANDLE_VALUE) { - MODULEENTRY32 me32; - me32.dwSize = sizeof(me32); - if (Module32First(hModuleSnap, &me32)) { - do { - bool module_already_loaded = false; - for (int i = 0; i < sizeof(module_libcs)/sizeof(*module_libcs); i++) { - if (module_libcs[i]->SameAsME32(me32)) { - still_loaded[i] = true; - module_already_loaded = true; - break; - } - } - if (!module_already_loaded) { - LibcInfo* libc_info = new LibcInfo; - if (libc_info->PopulateWindowsFn(me32)) - modules.push_back(libc_info); - else // means module has no libc routines - delete libc_info; - } - } while (Module32Next(hModuleSnap, &me32)); +// We return true if this PatchAllModules did any work, false else. +bool PatchAllModules() { + std::vector<ModuleEntryCopy> modules; + bool made_changes = false; + + const HANDLE hCurrentProcess = GetCurrentProcess(); + MODULEINFO mi; + DWORD cbNeeded = 0; + HMODULE hModules[kMaxModules]; // max # of modules we support in one process + if (::EnumProcessModules(hCurrentProcess, hModules, sizeof(hModules), + &cbNeeded)) { + for (int i = 0; i < cbNeeded / sizeof(*hModules); ++i) { + if (i >= kMaxModules) { + printf("PERFTOOLS ERROR: Too many modules in this executable to try" + " to patch them all (if you need to, raise kMaxModules in" + " patch_functions.cc).\n"); + break; + } + if (::GetModuleInformation(hCurrentProcess, hModules[i], &mi, sizeof(mi))) + modules.push_back(ModuleEntryCopy(hCurrentProcess, hModules[i], mi)); } - CloseHandle(hModuleSnap); } - // Now do the actual patching. + // Now do the actual patching and unpatching. { SpinLockHolder h(&patch_all_modules_lock); - // First, delete the modules that are no longer loaded. (We go first - // so we can try to open up space for the new modules we need to load.) - for (int i = 0; i < sizeof(module_libcs)/sizeof(*module_libcs); i++) { - if (!still_loaded[i]) { + for (int i = 0; i < sizeof(g_module_libcs)/sizeof(*g_module_libcs); i++) { + if (!g_module_libcs[i]->is_valid()) + continue; + bool still_loaded = false; + for (std::vector<ModuleEntryCopy>::iterator it = modules.begin(); + it != modules.end(); ++it) { + if (g_module_libcs[i]->SameAsModuleEntry(*it)) { + // Both g_module_libcs[i] and it are still valid. Mark it by + // removing it from the vector; mark g_module_libcs[i] by + // setting a bool. + modules.erase(it); + still_loaded = true; + break; + } + } + if (!still_loaded) { + // Means g_module_libcs[i] is no longer loaded (no me32 matched). // We could call Unpatch() here, but why bother? The module // has gone away, so nobody is going to call into it anyway. - module_libcs[i]->set_is_valid(false); + g_module_libcs[i]->set_is_valid(false); + made_changes = true; } } - // Now, add in new modules that we need to load. - for (std::vector<LibcInfo*>::iterator it = modules.begin(); - it != modules.end(); ++it) { - PatchOneModuleLocked(**it); // updates num_patched_modules + // We've handled all the g_module_libcs. Now let's handle the rest + // of the module-entries: those that haven't already been loaded. + for (std::vector<ModuleEntryCopy>::const_iterator it = modules.begin(); + it != modules.end(); ++it) { + LibcInfo libc_info; + if (libc_info.PopulateWindowsFn(*it)) { // true==module has libc routines + PatchOneModuleLocked(libc_info); // updates num_patched_modules + made_changes = true; + } } // Now that we've dealt with the modules (dlls), update the main // executable. We do this last because PatchMainExecutableLocked // wants to look at how other modules were patched. - PatchMainExecutableLocked(); - } - - for (std::vector<LibcInfo*>::iterator it = modules.begin(); - it != modules.end(); ++it) { - delete *it; + if (!main_executable.patched()) { + PatchMainExecutableLocked(); + made_changes = true; + } } + return made_changes; } @@ -662,6 +768,8 @@ void PatchAllModules() { // --------------------------------------------------------------------- // PatchWindowsFunctions() // This is the function that is exposed to the outside world. +// It should be called before the program becomes multi-threaded, +// since main_executable_windows.Patch() is not thread-safe. // --------------------------------------------------------------------- void PatchWindowsFunctions() { @@ -726,7 +834,7 @@ void UnpatchWindowsFunctions() { template<int T> void* LibcInfoWithPatchFunctions<T>::Perftools_malloc(size_t size) __THROW { - void* result = do_malloc(size); + void* result = do_malloc_or_cpp_alloc(size); MallocHook::InvokeNewHook(result, size); return result; } @@ -745,7 +853,7 @@ template<int T> void* LibcInfoWithPatchFunctions<T>::Perftools_realloc( void* old_ptr, size_t new_size) __THROW { if (old_ptr == NULL) { - void* result = do_malloc(new_size); + void* result = do_malloc_or_cpp_alloc(new_size); MallocHook::InvokeNewHook(result, new_size); return result; } @@ -852,7 +960,7 @@ template<int T> void* LibcInfoWithPatchFunctions<T>::Perftools__aligned_malloc(size_t size, size_t alignment) __THROW { - void* result = do_memalign(alignment, size); + void* result = do_memalign_or_cpp_memalign(alignment, size); MallocHook::InvokeNewHook(result, size); return result; } @@ -927,7 +1035,7 @@ HMODULE WINAPI WindowsInfo::Perftools_LoadLibraryExW(LPCWSTR lpFileName, HMODULE rv = ((HMODULE (WINAPI *)(LPCWSTR, HANDLE, DWORD)) function_info_[kLoadLibraryExW].origstub_fn)( lpFileName, hFile, dwFlags); - PatchAllModules(); // this will patch any newly loaded libraries + PatchAllModules(); return rv; } diff --git a/third_party/tcmalloc/chromium/src/windows/preamble_patcher.cc b/third_party/tcmalloc/chromium/src/windows/preamble_patcher.cc index 62b6e7f..78a4763 100644 --- a/third_party/tcmalloc/chromium/src/windows/preamble_patcher.cc +++ b/third_party/tcmalloc/chromium/src/windows/preamble_patcher.cc @@ -316,7 +316,7 @@ SideStepError PreamblePatcher::Unpatch(void* target_function, // Stub is now useless so delete it. // [csilvers: Commented out for perftools because it causes big problems // when we're unpatching malloc. We just let this live on as a leak.] - //delete original_function_stub; + //delete [] reinterpret_cast<unsigned char*>(original_function_stub); // Restore the protection of the first MAX_PREAMBLE_STUB_SIZE bytes of // target to what they were before we started goofing around. |