diff options
-rw-r--r-- | tools/memory_watcher/call_stack.cc | 75 | ||||
-rw-r--r-- | tools/memory_watcher/call_stack.h | 69 | ||||
-rw-r--r-- | tools/memory_watcher/dllmain.cc | 3 | ||||
-rw-r--r-- | tools/memory_watcher/memory_hook.cc | 9 | ||||
-rw-r--r-- | tools/memory_watcher/memory_hook.h | 4 | ||||
-rw-r--r-- | tools/memory_watcher/memory_watcher.cc | 158 | ||||
-rw-r--r-- | tools/memory_watcher/memory_watcher.h | 37 |
7 files changed, 248 insertions, 107 deletions
diff --git a/tools/memory_watcher/call_stack.cc b/tools/memory_watcher/call_stack.cc index 490d3f5..460695d 100644 --- a/tools/memory_watcher/call_stack.cc +++ b/tools/memory_watcher/call_stack.cc @@ -33,16 +33,13 @@ typedef DWORD64 (__stdcall *t_SymLoadModule64)(HANDLE, HANDLE, PCSTR, typedef BOOL (__stdcall *t_SymGetModuleInfo64)(HANDLE, DWORD64, PIMAGEHLP_MODULE64); -// According to http://msdn2.microsoft.com/en-us/library/ms680650(VS.85).aspx -// "All DbgHelp functions, such as this one, are single threaded. Therefore, -// calls from more than one thread to this function will likely result in -// unexpected behavior or memory corruption. To avoid this, you must -// synchromize all concurrent calls from one thread to this function." -// -// dbghelp_lock_ is used to serialize access across all calls to the DbgHelp -// library. This may be overly conservative (serializing them all together), -// but does guarantee correctness. -static Lock dbghelp_lock_; +// static +Lock CallStack::dbghelp_lock_; +// static +bool CallStack::dbghelp_loaded_ = false; +// static +DWORD CallStack::active_thread_id_ = 0; + static t_StackWalk64 pStackWalk64 = NULL; static t_SymCleanup pSymCleanup = NULL; @@ -62,15 +59,21 @@ static t_SymLoadModule64 pSymLoadModule64 = NULL; if (p##name == NULL) return false; \ } while (0) -// Dynamically load the DbgHelp library and supporting routines that we -// will use. -static bool LoadDbgHelp() { - static bool loaded = false; - if (!loaded) { +// This code has to be VERY careful to not induce any allocations, as memory +// watching code may cause recursion, which may obscure the stack for the truly +// offensive issue. We use this function to break into a debugger, and it +// is guaranteed to not do any allocations (in fact, not do anything). +static void UltraSafeDebugBreak() { + _asm int(3); +} + +// static +bool CallStack::LoadDbgHelp() { + if (!dbghelp_loaded_) { AutoLock Lock(dbghelp_lock_); // Re-check if we've loaded successfully now that we have the lock. - if (loaded) + if (dbghelp_loaded_) return true; // Load dbghelp.dll, and obtain pointers to the exported functions that we @@ -89,12 +92,13 @@ static bool LoadDbgHelp() { LOADPROC(dbghelp_module, SymGetModuleInfo64); LOADPROC(dbghelp_module, SymGetSearchPath); LOADPROC(dbghelp_module, SymLoadModule64); - loaded = true; + dbghelp_loaded_ = true; } else { + UltraSafeDebugBreak(); return false; } } - return loaded; + return dbghelp_loaded_; } // Load the symbols for generating stack traces. @@ -168,9 +172,14 @@ CallStack::CallStack() { frame_count_ = 0; hash_ = 0; id_ = InterlockedIncrement(&callstack_id); + valid_ = false; + + if (!dbghelp_loaded_) { + UltraSafeDebugBreak(); // Initialize should have been called. + return; + } - LoadDbgHelp(); - CHECK(GetStackTrace()); + GetStackTrace(); } bool CallStack::IsEqual(const CallStack &target) { @@ -202,7 +211,17 @@ void CallStack::AddFrame(DWORD_PTR pc) { hash_ = hash_ ^ (pc >> 16); } +bool CallStack::LockedRecursionDetected() const { + if (!active_thread_id_) return false; + DWORD thread_id = GetCurrentThreadId(); + // TODO(jar): Perchance we should use atomic access to member. + return thread_id == active_thread_id_; +} + bool CallStack::GetStackTrace() { + if (LockedRecursionDetected()) + return false; + // Initialize the context record. CONTEXT context; memset(&context, 0, sizeof(context)); @@ -234,7 +253,7 @@ bool CallStack::GetStackTrace() { // Walk the stack. unsigned int count = 0; { - AutoLock lock(dbghelp_lock_); + AutoDbgHelpLock thread_monitoring_lock; while (count < kMaxTraceFrames) { count++; @@ -255,11 +274,12 @@ bool CallStack::GetStackTrace() { // Push this frame's program counter onto the provided CallStack. AddFrame((DWORD_PTR)frame.AddrPC.Offset); } + valid_ = true; } return true; } -void CallStack::ToString(std::string* output) { +void CallStack::ToString(PrivateAllocatorString* output) { static const int kStackWalkMaxNameLen = MAX_SYM_NAME; HANDLE current_process = GetCurrentProcess(); @@ -272,12 +292,12 @@ void CallStack::ToString(std::string* output) { // Iterate through each frame in the call stack. for (int32 index = 0; index < frame_count_; index++) { - std::string line; + PrivateAllocatorString line; DWORD_PTR intruction_pointer = frame(index); SymbolCache::iterator it; - it = symbol_cache_->find( intruction_pointer ); + it = symbol_cache_->find(intruction_pointer); if (it != symbol_cache_->end()) { line = it->second; } else { @@ -311,14 +331,17 @@ void CallStack::ToString(std::string* output) { strstr(symbol->Name, "Perftools_") || strstr(symbol->Name, "MemoryHook::") ) { // Just record a blank string. - (*symbol_cache_)[intruction_pointer] = std::string(""); + (*symbol_cache_)[intruction_pointer] = ""; continue; } line += " "; line += static_cast<char*>(Line.FileName); line += " ("; - line += IntToString(Line.LineNumber); + // TODO(jar): get something like this template to work :-/ + // line += IntToCustomString<PrivateAllocatorString>(Line.LineNumber); + // ...and then delete this line, which uses std::string. + line += IntToString(Line.LineNumber).c_str(); line += "): "; line += symbol->Name; line += "\n"; diff --git a/tools/memory_watcher/call_stack.h b/tools/memory_watcher/call_stack.h index 5af7ed6..2c026bc 100644 --- a/tools/memory_watcher/call_stack.h +++ b/tools/memory_watcher/call_stack.h @@ -51,8 +51,14 @@ class CallStack { // every frame in each is identical to the corresponding frame in the other. bool IsEqual(const CallStack &target); + typedef std::basic_string<char, std::char_traits<char>, + PrivateHookAllocator<char> > PrivateAllocatorString; + // Convert the callstack to a string stored in output. - void CallStack::ToString(std::string* output); + void CallStack::ToString(PrivateAllocatorString* output); + + // + bool Valid() const { return valid_; } private: // The maximum number of frames to trace. @@ -68,14 +74,67 @@ class CallStack { // Functions for manipulating the frame list. void ClearFrames(); + // Dynamically load the DbgHelp library and supporting routines that we + // will use. + static bool LoadDbgHelp(); + + static void LockDbgHelp() { + dbghelp_lock_.Acquire(); + active_thread_id_ = GetCurrentThreadId(); + } + + static void UnlockDbgHelp() { + active_thread_id_ = GetCurrentThreadId(); + dbghelp_lock_.Release(); + } + + class AutoDbgHelpLock { + public: + AutoDbgHelpLock() { + CallStack::LockDbgHelp(); + } + ~AutoDbgHelpLock() { + CallStack::UnlockDbgHelp(); + } + }; + + // Check to see if this thread is already processing a stack. + bool LockedRecursionDetected() const; + + // According to http://msdn2.microsoft.com/en-us/library/ms680650(VS.85).aspx + // "All DbgHelp functions, such as this one, are single threaded. Therefore, + // calls from more than one thread to this function will likely result in + // unexpected behavior or memory corruption. To avoid this, you must + // synchromize all concurrent calls from one thread to this function." + // + // dbghelp_lock_ is used to serialize access across all calls to the DbgHelp + // library. This may be overly conservative (serializing them all together), + // but does guarantee correctness. + static Lock dbghelp_lock_; + + // Record the fact that dbghelp has been loaded. + // Changes to this variable are protected by dbghelp_lock_. + // It will only changes once... from false to true. + static bool dbghelp_loaded_; + + // To prevent infinite recursion due to unexpected side effects in libraries, + // we track the thread_id of the thread currently holding the dbghelp_lock_. + // We avoid re-aquiring said lock and return an !valid_ instance when we + // detect recursion. + static DWORD active_thread_id_; + int frame_count_; // Current size (in frames) DWORD_PTR frames_[kMaxTraceFrames]; int32 hash_; int32 id_; + // Indicate is this is a valid stack. + // This is false if recursion precluded a real stack generation. + bool valid_; + // Cache ProgramCounter -> Symbol lookups. // This cache is not thread safe. - typedef std::map<int32, std::string, std::less<int32>, + typedef std::map<int32, PrivateAllocatorString, std::less<int32>, PrivateHookAllocator<int32> > SymbolCache; static SymbolCache* symbol_cache_; @@ -88,14 +147,18 @@ class CallStack { // free instances. class AllocationStack : public CallStack { public: - AllocationStack() : next_(NULL), CallStack() {} + explicit AllocationStack(int32 size) + : next_(NULL), size_(size), CallStack() {} // We maintain a freelist of the AllocationStacks. void* operator new(size_t s); void operator delete(void*p); + int32 size() const { return size_; } + private: AllocationStack* next_; // Pointer used when on the freelist. + int32 size_; // Size of block allocated. static AllocationStack* freelist_; static Lock freelist_lock_; diff --git a/tools/memory_watcher/dllmain.cc b/tools/memory_watcher/dllmain.cc index a7d69f4..f60ce8b 100644 --- a/tools/memory_watcher/dllmain.cc +++ b/tools/memory_watcher/dllmain.cc @@ -60,9 +60,8 @@ DWORD WINAPI ThreadMain(LPVOID) { case WAIT_OBJECT_0: if (g_memory_watcher) { g_memory_watcher->DumpLeaks(); - // After dumping, we teardown. - ExitProcess(0); } + stopping = true; break; case WAIT_OBJECT_0 + 1: stopping = true; diff --git a/tools/memory_watcher/memory_hook.cc b/tools/memory_watcher/memory_hook.cc index 9c8bc03..2340d84 100644 --- a/tools/memory_watcher/memory_hook.cc +++ b/tools/memory_watcher/memory_hook.cc @@ -217,6 +217,7 @@ static LPVOID WINAPI Perftools_HeapReAlloc(HANDLE hHeap, DWORD dwFlags, // block via Perftools_HeapAlloc. LPVOID rv = Perftools_HeapAlloc(hHeap, dwFlags, dwBytes); + DCHECK_EQ((HEAP_REALLOC_IN_PLACE_ONLY & dwFlags), 0); // If there was an old buffer, now copy the data to the new buffer. if (lpMem != 0) { @@ -237,8 +238,10 @@ static LPVOID WINAPI Perftools_VirtualAllocEx(HANDLE process, LPVOID address, if (address != NULL) { MEMORY_BASIC_INFORMATION info; CHECK(VirtualQuery(address, &info, sizeof(info))); - if (info.State & MEM_COMMIT) + if (info.State & MEM_COMMIT) { already_committed = true; + CHECK(size >= info.RegionSize); + } } bool reserving = (address == NULL) || (type & MEM_RESERVE); bool committing = !already_committed && (type & MEM_COMMIT); @@ -355,6 +358,7 @@ static HGLOBAL WINAPI Perftools_GlobalFree(HGLOBAL hMem) { static HGLOBAL WINAPI Perftools_GlobalReAlloc(HGLOBAL hMem, SIZE_T dwBytes, UINT uFlags) { + // TODO(jar): [The following looks like a copy/paste typo from LocalRealloc.] // GlobalDiscard is a macro which calls LocalReAlloc with size 0. if (dwBytes == 0) { return patch_GlobalReAlloc()(hMem, dwBytes, uFlags); @@ -515,6 +519,9 @@ bool MemoryHook::RegisterWatcher(MemoryObserver* watcher) { bool MemoryHook::UnregisterWatcher(MemoryObserver* watcher) { DCHECK(hooked_); DCHECK(global_hook_->watcher_ == watcher); + // TODO(jar): changing watcher_ here is very racy. Other threads may (without + // a lock) testing, and then calling through this value. We probably can't + // remove this until we are single threaded. global_hook_->watcher_ = NULL; // For now, since there are no more watchers, unhook memory. diff --git a/tools/memory_watcher/memory_hook.h b/tools/memory_watcher/memory_hook.h index 1980b37..06a50ef 100644 --- a/tools/memory_watcher/memory_hook.h +++ b/tools/memory_watcher/memory_hook.h @@ -35,7 +35,7 @@ class PrivateHookAllocator { // Allocate memory for STL. pointer allocate(size_type n, const void * = 0) { - return reinterpret_cast<T*>(MemoryHook::Alloc(n * sizeof(T))); + return reinterpret_cast<T*>(MemoryHook::Alloc(n * sizeof(T))); } // Deallocate memory for STL. @@ -65,6 +65,8 @@ class PrivateHookAllocator { // the MemoryObserver interface. class MemoryObserver { public: + virtual ~MemoryObserver() {} + // Track a pointer. Will capture the current StackTrace. virtual void OnTrack(HANDLE heap, int32 id, int32 size) = 0; diff --git a/tools/memory_watcher/memory_watcher.cc b/tools/memory_watcher/memory_watcher.cc index f987983..0bcab53 100644 --- a/tools/memory_watcher/memory_watcher.cc +++ b/tools/memory_watcher/memory_watcher.cc @@ -25,13 +25,11 @@ static StatsCounter mem_in_use_frees("MemoryInUse.Frees"); MemoryWatcher::MemoryWatcher() : file_(NULL), hooked_(false), - in_track_(false), - block_map_size_(0) { + active_thread_id_(0) { MemoryHook::Initialize(); CallStack::Initialize(); block_map_ = new CallStackMap(); - stack_map_ = new CallStackIdMap(); // Register last - only after we're ready for notifications! Hook(); @@ -87,41 +85,56 @@ void MemoryWatcher::CloseLogFile() { } } +bool MemoryWatcher::LockedRecursionDetected() const { + if (!active_thread_id_) return false; + DWORD thread_id = GetCurrentThreadId(); + // TODO(jar): Perchance we should use atomic access to member. + return thread_id == active_thread_id_; +} + void MemoryWatcher::OnTrack(HANDLE heap, int32 id, int32 size) { // Don't track zeroes. It's a waste of time. if (size == 0) return; + if (LockedRecursionDetected()) + return; + // AllocationStack overrides new/delete to not allocate // from the main heap. - AllocationStack* stack = new AllocationStack(); + AllocationStack* stack = new AllocationStack(size); + if (!stack->Valid()) return; // Recursion blocked generation of stack. + { AutoLock lock(block_map_lock_); // Ideally, we'd like to verify that the block being added // here is not already in our list of tracked blocks. However, // the lookup in our hash table is expensive and slows us too - // much. Uncomment this line if you think you need it. - //DCHECK(block_map_->find(id) == block_map_->end()); - - (*block_map_)[id] = stack; - - CallStackIdMap::iterator it = stack_map_->find(stack->hash()); - if (it != stack_map_->end()) { - it->second.size += size; - it->second.count++; - } else { - StackTrack tracker; - tracker.count = 1; - tracker.size = size; - tracker.stack = stack; - (*stack_map_)[stack->hash()] = tracker; + // much. + CallStackMap::iterator block_it = block_map_->find(id); + if (block_it != block_map_->end()) { +#if 0 // Don't do this until stack->ToString() uses ONLY our heap. + active_thread_id_ = GetCurrentThreadId(); + PrivateAllocatorString output; + block_it->second->ToString(&output); + // LOG(INFO) << "First Stack size " << stack->size() << "was\n" << output; + stack->ToString(&output); + // LOG(INFO) << "Second Stack size " << stack->size() << "was\n" << output; +#endif // 0 + + // TODO(jar): We should delete one stack, and keep the other, perhaps + // based on size. + // For now, just delete the first, and keep the second? + delete block_it->second; } + // TODO(jar): Perchance we should use atomic access to member. + active_thread_id_ = 0; // Note: Only do this AFTER exiting above scope! - block_map_size_ += size; + (*block_map_)[id] = stack; } - mem_in_use.Set(block_map_size_); + mem_in_use.Add(size); mem_in_use_blocks.Increment(); mem_in_use_allocs.Increment(); } @@ -133,46 +146,31 @@ void MemoryWatcher::OnUntrack(HANDLE heap, int32 id, int32 size) { if (size == 0) return; + if (LockedRecursionDetected()) + return; + { AutoLock lock(block_map_lock_); + active_thread_id_ = GetCurrentThreadId(); // First, find the block in our block_map. CallStackMap::iterator it = block_map_->find(id); if (it != block_map_->end()) { AllocationStack* stack = it->second; - CallStackIdMap::iterator id_it = stack_map_->find(stack->hash()); - DCHECK(id_it != stack_map_->end()); - id_it->second.size -= size; - id_it->second.count--; - DCHECK_GE(id_it->second.count, 0); - - // If there are no more callstacks with this stack, then we - // have cleaned up all instances, and can safely delete the - // StackTracker in the stack_map. - bool safe_to_delete = true; - if (id_it->second.count != 0) { - // See if our |StackTracker| is also using |stack|. - if (id_it->second.stack == stack) - safe_to_delete = false; // We're still using |stack|. - } else { - // See if we skipped deleting our |StackTracker|'s |stack| earlier. - if (id_it->second.stack != stack) - delete id_it->second.stack; // We skipped it earlier. - stack_map_->erase(id_it); // Discard our StackTracker. - } - - block_map_size_ -= size; + DCHECK(stack->size() == size); block_map_->erase(id); - if (safe_to_delete) - delete stack; + delete stack; } else { // Untracked item. This happens a fair amount, and it is // normal. A lot of time elapses during process startup // before the allocation routines are hooked. + size = 0; // Ignore size in tallies. } + // TODO(jar): Perchance we should use atomic access to member. + active_thread_id_ = 0; } - mem_in_use.Set(block_map_size_); + mem_in_use.Add(-size); mem_in_use_blocks.Decrement(); mem_in_use_frees.Increment(); } @@ -184,28 +182,72 @@ void MemoryWatcher::SetLogName(char* log_name) { log_name_ = log_name; } +// Help sort lists of stacks based on allocation cost. +// Note: Sort based on allocation count is interesting too! +static bool CompareCallStackIdItems(MemoryWatcher::StackTrack* left, + MemoryWatcher::StackTrack* right) { + return left->size > right->size; +} + + void MemoryWatcher::DumpLeaks() { // We can only dump the leaks once. We'll cleanup the hooks here. - DCHECK(hooked_); + if (!hooked_) + return; Unhook(); AutoLock lock(block_map_lock_); + active_thread_id_ = GetCurrentThreadId(); OpenLogFile(); - // Dump the stack map. - CallStackIdMap::iterator it = stack_map_->begin(); - while (it != stack_map_->end()) { - fwprintf(file_, L"%d bytes, %d items (0x%x)\n", - it->second.size, it->second.count, it->first); - CallStack* stack = it->second.stack; - std::string output; + // Aggregate contributions from each allocated block on per-stack basis. + CallStackIdMap stack_map; + for (CallStackMap::iterator block_it = block_map_->begin(); + block_it != block_map_->end(); ++block_it) { + AllocationStack* stack = block_it->second; + int32 stack_hash = stack->hash(); + int32 alloc_block_size = stack->size(); + CallStackIdMap::iterator it = stack_map.find(stack_hash); + if (it == stack_map.end()) { + StackTrack tracker; + tracker.count = 1; + tracker.size = alloc_block_size; + tracker.stack = stack; // Temporary pointer into block_map_. + stack_map[stack_hash] = tracker; + } else { + it->second.count++; + it->second.size += alloc_block_size; + } + } + // Don't release lock yet, as block_map_ is still pointed into. + + // Put references to StrackTracks into array for sorting. + std::vector<StackTrack*, PrivateHookAllocator<int32> > + stack_tracks(stack_map.size()); + CallStackIdMap::iterator it = stack_map.begin(); + for (size_t i = 0; i < stack_tracks.size(); ++i) { + stack_tracks[i] = &(it->second); + ++it; + } + sort(stack_tracks.begin(), stack_tracks.end(), CompareCallStackIdItems); + + int32 total_bytes = 0; + int32 total_blocks = 0; + for (size_t i = 0; i < stack_tracks.size(); ++i) { + StackTrack* stack_track = stack_tracks[i]; + fwprintf(file_, L"%d bytes, %d allocs, #%d\n", + stack_track->size, stack_track->count, i); + total_bytes += stack_track->size; + total_blocks += stack_track->count; + + CallStack* stack = stack_track->stack; + PrivateAllocatorString output; stack->ToString(&output); fprintf(file_, "%s", output.c_str()); - it++; } - fprintf(file_, "Total Leaks: %d\n", block_map_->size()); - fprintf(file_, "Total Stacks: %d\n", stack_map_->size()); - fprintf(file_, "Total Bytes: %d\n", block_map_size_); + fprintf(file_, "Total Leaks: %d\n", total_blocks); + fprintf(file_, "Total Stacks: %d\n", stack_tracks.size()); + fprintf(file_, "Total Bytes: %d\n", total_bytes); CloseLogFile(); } diff --git a/tools/memory_watcher/memory_watcher.h b/tools/memory_watcher/memory_watcher.h index c382cb1..9609ce2 100644 --- a/tools/memory_watcher/memory_watcher.h +++ b/tools/memory_watcher/memory_watcher.h @@ -23,6 +23,19 @@ class AllocationStack; // allocations and frees. class MemoryWatcher : MemoryObserver { public: + struct StackTrack { + CallStack* stack; + int count; + int size; + }; + + typedef std::map<int32, AllocationStack*, std::less<int32>, + PrivateHookAllocator<int32> > CallStackMap; + typedef std::map<int32, StackTrack, std::less<int32>, + PrivateHookAllocator<int32> > CallStackIdMap; + typedef std::basic_string<char, std::char_traits<char>, + PrivateHookAllocator<char> > PrivateAllocatorString; + MemoryWatcher(); virtual ~MemoryWatcher(); @@ -49,32 +62,24 @@ class MemoryWatcher : MemoryObserver { // Unhooks our memory hooks. void Unhook(); + // Check to see if this thread is already processing a block, and should not + // recurse. + bool LockedRecursionDetected() const; + // This is for logging. FILE* file_; - struct StackTrack { - CallStack* stack; - int count; - int size; - }; - bool hooked_; // True when this class has the memory_hooks hooked. - bool in_track_; + // Either 0, or else the threadID for a thread that is actively working on + // a stack track. Used to avoid recursive tracking. + DWORD active_thread_id_; + Lock block_map_lock_; - typedef std::map<int32, AllocationStack*, std::less<int32>, - PrivateHookAllocator<int32>> CallStackMap; - typedef std::map<int32, StackTrack, std::less<int32>, - PrivateHookAllocator<int32>> CallStackIdMap; // The block_map provides quick lookups based on the allocation // pointer. This is important for having fast round trips through // malloc/free. CallStackMap *block_map_; - // The stack_map keeps track of the known CallStacks based on the - // hash of the CallStack. This is so that we can quickly aggregate - // like-CallStacks together. - CallStackIdMap *stack_map_; - int32 block_map_size_; // The file name for that log. std::string file_name_; |