diff options
163 files changed, 50530 insertions, 0 deletions
diff --git a/third_party/tcmalloc/chromium/src/addressmap-inl.h b/third_party/tcmalloc/chromium/src/addressmap-inl.h new file mode 100644 index 0000000..b122f17 --- /dev/null +++ b/third_party/tcmalloc/chromium/src/addressmap-inl.h @@ -0,0 +1,421 @@ +// Copyright (c) 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Sanjay Ghemawat +// +// A fast map from addresses to values. Assumes that addresses are +// clustered. The main use is intended to be for heap-profiling. +// May be too memory-hungry for other uses. +// +// We use a user-defined allocator/de-allocator so that we can use +// this data structure during heap-profiling. +// +// IMPLEMENTATION DETAIL: +// +// Some default definitions/parameters: +// * Block -- aligned 128-byte region of the address space +// * Cluster -- aligned 1-MB region of the address space +// * Block-ID -- block-number within a cluster +// * Cluster-ID -- Starting address of cluster divided by cluster size +// +// We use a three-level map to represent the state: +// 1. A hash-table maps from a cluster-ID to the data for that cluster. +// 2. For each non-empty cluster we keep an array indexed by +// block-ID tht points to the first entry in the linked-list +// for the block. +// 3. At the bottom, we keep a singly-linked list of all +// entries in a block (for non-empty blocks). +// +// hash table +// +-------------+ +// | id->cluster |---> ... +// | ... | +// | id->cluster |---> Cluster +// +-------------+ +-------+ Data for one block +// | nil | +------------------------------------+ +// | ----+---|->[addr/value]-->[addr/value]-->... | +// | nil | +------------------------------------+ +// | ----+--> ... +// | nil | +// | ... | +// +-------+ +// +// Note that we require zero-bytes of overhead for completely empty +// clusters. The minimum space requirement for a cluster is the size +// of the hash-table entry plus a pointer value for each block in +// the cluster. Empty blocks impose no extra space requirement. +// +// The cost of a lookup is: +// a. A hash-table lookup to find the cluster +// b. An array access in the cluster structure +// c. A traversal over the linked-list for a block + +#ifndef BASE_ADDRESSMAP_INL_H_ +#define BASE_ADDRESSMAP_INL_H_ + +#include "config.h" +#include <stddef.h> +#include <string.h> +#if defined HAVE_STDINT_H +#include <stdint.h> // to get uint16_t (ISO naming madness) +#elif defined HAVE_INTTYPES_H +#include <inttypes.h> // another place uint16_t might be defined +#else +#include <sys/types.h> // our last best hope +#endif + +// This class is thread-unsafe -- that is, instances of this class can +// not be accessed concurrently by multiple threads -- because the +// callback function for Iterate() may mutate contained values. If the +// callback functions you pass do not mutate their Value* argument, +// AddressMap can be treated as thread-compatible -- that is, it's +// safe for multiple threads to call "const" methods on this class, +// but not safe for one thread to call const methods on this class +// while another thread is calling non-const methods on the class. +template <class Value> +class AddressMap { + public: + typedef void* (*Allocator)(size_t size); + typedef void (*DeAllocator)(void* ptr); + typedef const void* Key; + + // Create an AddressMap that uses the specified allocator/deallocator. + // The allocator/deallocator should behave like malloc/free. + // For instance, the allocator does not need to return initialized memory. + AddressMap(Allocator alloc, DeAllocator dealloc); + ~AddressMap(); + + // If the map contains an entry for "key", return it. Else return NULL. + inline const Value* Find(Key key) const; + inline Value* FindMutable(Key key); + + // Insert <key,value> into the map. Any old value associated + // with key is forgotten. + void Insert(Key key, Value value); + + // Remove any entry for key in the map. If an entry was found + // and removed, stores the associated value in "*removed_value" + // and returns true. Else returns false. + bool FindAndRemove(Key key, Value* removed_value); + + // Similar to Find but we assume that keys are addresses of non-overlapping + // memory ranges whose sizes are given by size_func. + // If the map contains a range into which "key" points + // (at its start or inside of it, but not at the end), + // return the address of the associated value + // and store its key in "*res_key". + // Else return NULL. + // max_size specifies largest range size possibly in existence now. + typedef size_t (*ValueSizeFunc)(const Value& v); + const Value* FindInside(ValueSizeFunc size_func, size_t max_size, + Key key, Key* res_key); + + // Iterate over the address map calling 'callback' + // for all stored key-value pairs and passing 'arg' to it. + // We don't use full Closure/Callback machinery not to add + // unnecessary dependencies to this class with low-level uses. + template<class Type> + inline void Iterate(void (*callback)(Key, Value*, Type), Type arg) const; + + private: + typedef uintptr_t Number; + + // The implementation assumes that addresses inserted into the map + // will be clustered. We take advantage of this fact by splitting + // up the address-space into blocks and using a linked-list entry + // for each block. + + // Size of each block. There is one linked-list for each block, so + // do not make the block-size too big. Oterwise, a lot of time + // will be spent traversing linked lists. + static const int kBlockBits = 7; + static const int kBlockSize = 1 << kBlockBits; + + // Entry kept in per-block linked-list + struct Entry { + Entry* next; + Key key; + Value value; + }; + + // We further group a sequence of consecutive blocks into a cluster. + // The data for a cluster is represented as a dense array of + // linked-lists, one list per contained block. + static const int kClusterBits = 13; + static const Number kClusterSize = 1 << (kBlockBits + kClusterBits); + static const int kClusterBlocks = 1 << kClusterBits; + + // We use a simple chaining hash-table to represent the clusters. + struct Cluster { + Cluster* next; // Next cluster in hash table chain + Number id; // Cluster ID + Entry* blocks[kClusterBlocks]; // Per-block linked-lists + }; + + // Number of hash-table entries. With the block-size/cluster-size + // defined above, each cluster covers 1 MB, so an 4K entry + // hash-table will give an average hash-chain length of 1 for 4GB of + // in-use memory. + static const int kHashBits = 12; + static const int kHashSize = 1 << 12; + + // Number of entry objects allocated at a time + static const int ALLOC_COUNT = 64; + + Cluster** hashtable_; // The hash-table + Entry* free_; // Free list of unused Entry objects + + // Multiplicative hash function: + // The value "kHashMultiplier" is the bottom 32 bits of + // int((sqrt(5)-1)/2 * 2^32) + // This is a good multiplier as suggested in CLR, Knuth. The hash + // value is taken to be the top "k" bits of the bottom 32 bits + // of the muliplied value. + static const uint32_t kHashMultiplier = 2654435769u; + static int HashInt(Number x) { + // Multiply by a constant and take the top bits of the result. + const uint32_t m = static_cast<uint32_t>(x) * kHashMultiplier; + return static_cast<int>(m >> (32 - kHashBits)); + } + + // Find cluster object for specified address. If not found + // and "create" is true, create the object. If not found + // and "create" is false, return NULL. + // + // This method is bitwise-const if create is false. + Cluster* FindCluster(Number address, bool create) { + // Look in hashtable + const Number cluster_id = address >> (kBlockBits + kClusterBits); + const int h = HashInt(cluster_id); + for (Cluster* c = hashtable_[h]; c != NULL; c = c->next) { + if (c->id == cluster_id) { + return c; + } + } + + // Create cluster if necessary + if (create) { + Cluster* c = New<Cluster>(1); + c->id = cluster_id; + c->next = hashtable_[h]; + hashtable_[h] = c; + return c; + } + return NULL; + } + + // Return the block ID for an address within its cluster + static int BlockID(Number address) { + return (address >> kBlockBits) & (kClusterBlocks - 1); + } + + //-------------------------------------------------------------- + // Memory management -- we keep all objects we allocate linked + // together in a singly linked list so we can get rid of them + // when we are all done. Furthermore, we allow the client to + // pass in custom memory allocator/deallocator routines. + //-------------------------------------------------------------- + struct Object { + Object* next; + // The real data starts here + }; + + Allocator alloc_; // The allocator + DeAllocator dealloc_; // The deallocator + Object* allocated_; // List of allocated objects + + // Allocates a zeroed array of T with length "num". Also inserts + // the allocated block into a linked list so it can be deallocated + // when we are all done. + template <class T> T* New(int num) { + void* ptr = (*alloc_)(sizeof(Object) + num*sizeof(T)); + memset(ptr, 0, sizeof(Object) + num*sizeof(T)); + Object* obj = reinterpret_cast<Object*>(ptr); + obj->next = allocated_; + allocated_ = obj; + return reinterpret_cast<T*>(reinterpret_cast<Object*>(ptr) + 1); + } +}; + +// More implementation details follow: + +template <class Value> +AddressMap<Value>::AddressMap(Allocator alloc, DeAllocator dealloc) + : free_(NULL), + alloc_(alloc), + dealloc_(dealloc), + allocated_(NULL) { + hashtable_ = New<Cluster*>(kHashSize); +} + +template <class Value> +AddressMap<Value>::~AddressMap() { + // De-allocate all of the objects we allocated + for (Object* obj = allocated_; obj != NULL; /**/) { + Object* next = obj->next; + (*dealloc_)(obj); + obj = next; + } +} + +template <class Value> +inline const Value* AddressMap<Value>::Find(Key key) const { + return const_cast<AddressMap*>(this)->FindMutable(key); +} + +template <class Value> +inline Value* AddressMap<Value>::FindMutable(Key key) { + const Number num = reinterpret_cast<Number>(key); + const Cluster* const c = FindCluster(num, false/*do not create*/); + if (c != NULL) { + for (Entry* e = c->blocks[BlockID(num)]; e != NULL; e = e->next) { + if (e->key == key) { + return &e->value; + } + } + } + return NULL; +} + +template <class Value> +void AddressMap<Value>::Insert(Key key, Value value) { + const Number num = reinterpret_cast<Number>(key); + Cluster* const c = FindCluster(num, true/*create*/); + + // Look in linked-list for this block + const int block = BlockID(num); + for (Entry* e = c->blocks[block]; e != NULL; e = e->next) { + if (e->key == key) { + e->value = value; + return; + } + } + + // Create entry + if (free_ == NULL) { + // Allocate a new batch of entries and add to free-list + Entry* array = New<Entry>(ALLOC_COUNT); + for (int i = 0; i < ALLOC_COUNT-1; i++) { + array[i].next = &array[i+1]; + } + array[ALLOC_COUNT-1].next = free_; + free_ = &array[0]; + } + Entry* e = free_; + free_ = e->next; + e->key = key; + e->value = value; + e->next = c->blocks[block]; + c->blocks[block] = e; +} + +template <class Value> +bool AddressMap<Value>::FindAndRemove(Key key, Value* removed_value) { + const Number num = reinterpret_cast<Number>(key); + Cluster* const c = FindCluster(num, false/*do not create*/); + if (c != NULL) { + for (Entry** p = &c->blocks[BlockID(num)]; *p != NULL; p = &(*p)->next) { + Entry* e = *p; + if (e->key == key) { + *removed_value = e->value; + *p = e->next; // Remove e from linked-list + e->next = free_; // Add e to free-list + free_ = e; + return true; + } + } + } + return false; +} + +template <class Value> +const Value* AddressMap<Value>::FindInside(ValueSizeFunc size_func, + size_t max_size, + Key key, + Key* res_key) { + const Number key_num = reinterpret_cast<Number>(key); + Number num = key_num; // we'll move this to move back through the clusters + while (1) { + const Cluster* c = FindCluster(num, false/*do not create*/); + if (c != NULL) { + while (1) { + const int block = BlockID(num); + bool had_smaller_key = false; + for (const Entry* e = c->blocks[block]; e != NULL; e = e->next) { + const Number e_num = reinterpret_cast<Number>(e->key); + if (e_num <= key_num) { + if (e_num == key_num || // to handle 0-sized ranges + key_num < e_num + (*size_func)(e->value)) { + *res_key = e->key; + return &e->value; + } + had_smaller_key = true; + } + } + if (had_smaller_key) return NULL; // got a range before 'key' + // and it did not contain 'key' + if (block == 0) break; + // try address-wise previous block + num |= kBlockSize - 1; // start at the last addr of prev block + num -= kBlockSize; + if (key_num - num > max_size) return NULL; + } + } + if (num < kClusterSize) return NULL; // first cluster + // go to address-wise previous cluster to try + num |= kClusterSize - 1; // start at the last block of previous cluster + num -= kClusterSize; + if (key_num - num > max_size) return NULL; + // Having max_size to limit the search is crucial: else + // we have to traverse a lot of empty clusters (or blocks). + // We can avoid needing max_size if we put clusters into + // a search tree, but performance suffers considerably + // if we use this approach by using stl::set. + } +} + +template <class Value> +template <class Type> +inline void AddressMap<Value>::Iterate(void (*callback)(Key, Value*, Type), + Type arg) const { + // We could optimize this by traversing only non-empty clusters and/or blocks + // but it does not speed up heap-checker noticeably. + for (int h = 0; h < kHashSize; ++h) { + for (const Cluster* c = hashtable_[h]; c != NULL; c = c->next) { + for (int b = 0; b < kClusterBlocks; ++b) { + for (Entry* e = c->blocks[b]; e != NULL; e = e->next) { + callback(e->key, &e->value, arg); + } + } + } + } +} + +#endif // BASE_ADDRESSMAP_INL_H_ diff --git a/third_party/tcmalloc/chromium/src/base/atomicops-internals-linuxppc.h b/third_party/tcmalloc/chromium/src/base/atomicops-internals-linuxppc.h new file mode 100644 index 0000000..7e49560 --- /dev/null +++ b/third_party/tcmalloc/chromium/src/base/atomicops-internals-linuxppc.h @@ -0,0 +1,416 @@ +/* Copyright (c) 2008, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + */ + +// Implementation of atomic operations for ppc-linux. This file should not +// be included directly. Clients should instead include +// "base/atomicops.h". + +#ifndef BASE_ATOMICOPS_INTERNALS_LINUXPPC_H_ +#define BASE_ATOMICOPS_INTERNALS_LINUXPPC_H_ + +typedef int32_t Atomic32; + +#ifdef __PPC64__ +#define BASE_HAS_ATOMIC64 1 +#endif + +namespace base { +namespace subtle { + +static inline void _sync(void) { + __asm__ __volatile__("sync": : : "memory"); +} + +static inline void _lwsync(void) { + // gcc defines __NO_LWSYNC__ when appropriate; see + // http://gcc.gnu.org/ml/gcc-patches/2006-11/msg01238.html +#ifdef __NO_LWSYNC__ + __asm__ __volatile__("msync": : : "memory"); +#else + __asm__ __volatile__("lwsync": : : "memory"); +#endif +} + +static inline void _isync(void) { + __asm__ __volatile__("isync": : : "memory"); +} + +static inline Atomic32 OSAtomicAdd32(Atomic32 amount, Atomic32 *value) { + Atomic32 t; + __asm__ __volatile__( +"1: lwarx %0,0,%3\n\ + add %0,%2,%0\n\ + stwcx. %0,0,%3 \n\ + bne- 1b" + : "=&r" (t), "+m" (*value) + : "r" (amount), "r" (value) + : "cc"); + return t; +} + +static inline Atomic32 OSAtomicAdd32Barrier(Atomic32 amount, Atomic32 *value) { + Atomic32 t; + _lwsync(); + t = OSAtomicAdd32(amount, value); + // This is based on the code snippet in the architecture manual (Vol + // 2, Appendix B). It's a little tricky: correctness depends on the + // fact that the code right before this (in OSAtomicAdd32) has a + // conditional branch with a data dependency on the update. + // Otherwise, we'd have to use sync. + _isync(); + return t; +} + +static inline bool OSAtomicCompareAndSwap32(Atomic32 old_value, + Atomic32 new_value, + Atomic32 *value) { + Atomic32 prev; + __asm__ __volatile__( +"1: lwarx %0,0,%2\n\ + cmpw 0,%0,%3\n\ + bne- 2f\n\ + stwcx. %4,0,%2\n\ + bne- 1b\n\ +2:" + : "=&r" (prev), "+m" (*value) + : "r" (value), "r" (old_value), "r" (new_value) + : "cc"); + return prev == old_value; +} + +static inline Atomic32 OSAtomicCompareAndSwap32Acquire(Atomic32 old_value, + Atomic32 new_value, + Atomic32 *value) { + Atomic32 t; + t = OSAtomicCompareAndSwap32(old_value, new_value, value); + // This is based on the code snippet in the architecture manual (Vol + // 2, Appendix B). It's a little tricky: correctness depends on the + // fact that the code right before this (in + // OSAtomicCompareAndSwap32) has a conditional branch with a data + // dependency on the update. Otherwise, we'd have to use sync. + _isync(); + return t; +} + +static inline Atomic32 OSAtomicCompareAndSwap32Release(Atomic32 old_value, + Atomic32 new_value, + Atomic32 *value) { + _lwsync(); + return OSAtomicCompareAndSwap32(old_value, new_value, value); +} + +typedef int64_t Atomic64; + +inline void MemoryBarrier() { + // This can't be _lwsync(); we need to order the immediately + // preceding stores against any load that may follow, but lwsync + // doesn't guarantee that. + _sync(); +} + +// 32-bit Versions. + +inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32 *ptr, + Atomic32 old_value, + Atomic32 new_value) { + Atomic32 prev_value; + do { + if (OSAtomicCompareAndSwap32(old_value, new_value, + const_cast<Atomic32*>(ptr))) { + return old_value; + } + prev_value = *ptr; + } while (prev_value == old_value); + return prev_value; +} + +inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32 *ptr, + Atomic32 new_value) { + Atomic32 old_value; + do { + old_value = *ptr; + } while (!OSAtomicCompareAndSwap32(old_value, new_value, + const_cast<Atomic32*>(ptr))); + return old_value; +} + +inline Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32 *ptr, + Atomic32 increment) { + return OSAtomicAdd32(increment, const_cast<Atomic32*>(ptr)); +} + +inline Atomic32 Barrier_AtomicIncrement(volatile Atomic32 *ptr, + Atomic32 increment) { + return OSAtomicAdd32Barrier(increment, const_cast<Atomic32*>(ptr)); +} + +inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32 *ptr, + Atomic32 old_value, + Atomic32 new_value) { + Atomic32 prev_value; + do { + if (OSAtomicCompareAndSwap32Acquire(old_value, new_value, + const_cast<Atomic32*>(ptr))) { + return old_value; + } + prev_value = *ptr; + } while (prev_value == old_value); + return prev_value; +} + +inline Atomic32 Release_CompareAndSwap(volatile Atomic32 *ptr, + Atomic32 old_value, + Atomic32 new_value) { + Atomic32 prev_value; + do { + if (OSAtomicCompareAndSwap32Release(old_value, new_value, + const_cast<Atomic32*>(ptr))) { + return old_value; + } + prev_value = *ptr; + } while (prev_value == old_value); + return prev_value; +} + +#ifdef __PPC64__ + +// 64-bit Versions. + +static inline Atomic64 OSAtomicAdd64(Atomic64 amount, Atomic64 *value) { + Atomic64 t; + __asm__ __volatile__( +"1: ldarx %0,0,%3\n\ + add %0,%2,%0\n\ + stdcx. %0,0,%3 \n\ + bne- 1b" + : "=&r" (t), "+m" (*value) + : "r" (amount), "r" (value) + : "cc"); + return t; +} + +static inline Atomic64 OSAtomicAdd64Barrier(Atomic64 amount, Atomic64 *value) { + Atomic64 t; + _lwsync(); + t = OSAtomicAdd64(amount, value); + // This is based on the code snippet in the architecture manual (Vol + // 2, Appendix B). It's a little tricky: correctness depends on the + // fact that the code right before this (in OSAtomicAdd64) has a + // conditional branch with a data dependency on the update. + // Otherwise, we'd have to use sync. + _isync(); + return t; +} + +static inline bool OSAtomicCompareAndSwap64(Atomic64 old_value, + Atomic64 new_value, + Atomic64 *value) { + Atomic64 prev; + __asm__ __volatile__( +"1: ldarx %0,0,%2\n\ + cmpw 0,%0,%3\n\ + bne- 2f\n\ + stdcx. %4,0,%2\n\ + bne- 1b\n\ +2:" + : "=&r" (prev), "+m" (*value) + : "r" (value), "r" (old_value), "r" (new_value) + : "cc"); + return prev == old_value; +} + +static inline Atomic64 OSAtomicCompareAndSwap64Acquire(Atomic64 old_value, + Atomic64 new_value, + Atomic64 *value) { + Atomic64 t; + t = OSAtomicCompareAndSwap64(old_value, new_value, value); + // This is based on the code snippet in the architecture manual (Vol + // 2, Appendix B). It's a little tricky: correctness depends on the + // fact that the code right before this (in + // OSAtomicCompareAndSwap64) has a conditional branch with a data + // dependency on the update. Otherwise, we'd have to use sync. + _isync(); + return t; +} + +static inline Atomic64 OSAtomicCompareAndSwap64Release(Atomic64 old_value, + Atomic64 new_value, + Atomic64 *value) { + _lwsync(); + return OSAtomicCompareAndSwap64(old_value, new_value, value); +} + + +inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64 *ptr, + Atomic64 old_value, + Atomic64 new_value) { + Atomic64 prev_value; + do { + if (OSAtomicCompareAndSwap64(old_value, new_value, + const_cast<Atomic64*>(ptr))) { + return old_value; + } + prev_value = *ptr; + } while (prev_value == old_value); + return prev_value; +} + +inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64 *ptr, + Atomic64 new_value) { + Atomic64 old_value; + do { + old_value = *ptr; + } while (!OSAtomicCompareAndSwap64(old_value, new_value, + const_cast<Atomic64*>(ptr))); + return old_value; +} + +inline Atomic64 NoBarrier_AtomicIncrement(volatile Atomic64 *ptr, + Atomic64 increment) { + return OSAtomicAdd64(increment, const_cast<Atomic64*>(ptr)); +} + +inline Atomic64 Barrier_AtomicIncrement(volatile Atomic64 *ptr, + Atomic64 increment) { + return OSAtomicAdd64Barrier(increment, const_cast<Atomic64*>(ptr)); +} + +inline Atomic64 Acquire_CompareAndSwap(volatile Atomic64 *ptr, + Atomic64 old_value, + Atomic64 new_value) { + Atomic64 prev_value; + do { + if (OSAtomicCompareAndSwap64Acquire(old_value, new_value, + const_cast<Atomic64*>(ptr))) { + return old_value; + } + prev_value = *ptr; + } while (prev_value == old_value); + return prev_value; +} + +inline Atomic64 Release_CompareAndSwap(volatile Atomic64 *ptr, + Atomic64 old_value, + Atomic64 new_value) { + Atomic64 prev_value; + do { + if (OSAtomicCompareAndSwap64Release(old_value, new_value, + const_cast<Atomic64*>(ptr))) { + return old_value; + } + prev_value = *ptr; + } while (prev_value == old_value); + return prev_value; +} + +#endif + +inline void NoBarrier_Store(volatile Atomic32 *ptr, Atomic32 value) { + *ptr = value; +} + +inline void Acquire_Store(volatile Atomic32 *ptr, Atomic32 value) { + *ptr = value; + // This can't be _lwsync(); we need to order the immediately + // preceding stores against any load that may follow, but lwsync + // doesn't guarantee that. + _sync(); +} + +inline void Release_Store(volatile Atomic32 *ptr, Atomic32 value) { + _lwsync(); + *ptr = value; +} + +inline Atomic32 NoBarrier_Load(volatile const Atomic32 *ptr) { + return *ptr; +} + +inline Atomic32 Acquire_Load(volatile const Atomic32 *ptr) { + Atomic32 value = *ptr; + _lwsync(); + return value; +} + +inline Atomic32 Release_Load(volatile const Atomic32 *ptr) { + // This can't be _lwsync(); we need to order the immediately + // preceding stores against any load that may follow, but lwsync + // doesn't guarantee that. + _sync(); + return *ptr; +} + +#ifdef __PPC64__ + +// 64-bit Versions. + +inline void NoBarrier_Store(volatile Atomic64 *ptr, Atomic64 value) { + *ptr = value; +} + +inline void Acquire_Store(volatile Atomic64 *ptr, Atomic64 value) { + *ptr = value; + // This can't be _lwsync(); we need to order the immediately + // preceding stores against any load that may follow, but lwsync + // doesn't guarantee that. + _sync(); +} + +inline void Release_Store(volatile Atomic64 *ptr, Atomic64 value) { + _lwsync(); + *ptr = value; +} + +inline Atomic64 NoBarrier_Load(volatile const Atomic64 *ptr) { + return *ptr; +} + +inline Atomic64 Acquire_Load(volatile const Atomic64 *ptr) { + Atomic64 value = *ptr; + _lwsync(); + return value; +} + +inline Atomic64 Release_Load(volatile const Atomic64 *ptr) { + // This can't be _lwsync(); we need to order the immediately + // preceding stores against any load that may follow, but lwsync + // doesn't guarantee that. + _sync(); + return *ptr; +} + +#endif + +} // namespace base::subtle +} // namespace base + +#endif // BASE_ATOMICOPS_INTERNALS_LINUXPPC_H_ diff --git a/third_party/tcmalloc/chromium/src/base/atomicops-internals-macosx.h b/third_party/tcmalloc/chromium/src/base/atomicops-internals-macosx.h new file mode 100644 index 0000000..430b9ee --- /dev/null +++ b/third_party/tcmalloc/chromium/src/base/atomicops-internals-macosx.h @@ -0,0 +1,359 @@ +/* Copyright (c) 2006, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +// Implementation of atomic operations for Mac OS X. This file should not +// be included directly. Clients should instead include +// "base/atomicops.h". + +#ifndef BASE_ATOMICOPS_INTERNALS_MACOSX_H_ +#define BASE_ATOMICOPS_INTERNALS_MACOSX_H_ + +typedef int32_t Atomic32; + +// MacOS uses long for intptr_t, AtomicWord and Atomic32 are always different +// on the Mac, even when they are the same size. Similarly, on __ppc64__, +// AtomicWord and Atomic64 are always different. Thus, we need explicit +// casting. +#ifdef __LP64__ +#define AtomicWordCastType base::subtle::Atomic64 +#else +#define AtomicWordCastType Atomic32 +#endif + +#if defined(__LP64__) || defined(__i386__) +#define BASE_HAS_ATOMIC64 1 // Use only in tests and base/atomic* +#endif + +#include <libkern/OSAtomic.h> + +namespace base { +namespace subtle { + +#if !defined(__LP64__) && defined(__ppc__) + +// The Mac 64-bit OSAtomic implementations are not available for 32-bit PowerPC, +// while the underlying assembly instructions are available only some +// implementations of PowerPC. + +// The following inline functions will fail with the error message at compile +// time ONLY IF they are called. So it is safe to use this header if user +// code only calls AtomicWord and Atomic32 operations. +// +// NOTE(vchen): Implementation notes to implement the atomic ops below may +// be found in "PowerPC Virtual Environment Architecture, Book II, +// Version 2.02", January 28, 2005, Appendix B, page 46. Unfortunately, +// extra care must be taken to ensure data are properly 8-byte aligned, and +// that data are returned correctly according to Mac OS X ABI specs. + +inline int64_t OSAtomicCompareAndSwap64( + int64_t oldValue, int64_t newValue, int64_t *theValue) { + __asm__ __volatile__( + "_OSAtomicCompareAndSwap64_not_supported_for_32_bit_ppc\n\t"); + return 0; +} + +inline int64_t OSAtomicAdd64(int64_t theAmount, int64_t *theValue) { + __asm__ __volatile__( + "_OSAtomicAdd64_not_supported_for_32_bit_ppc\n\t"); + return 0; +} + +inline int64_t OSAtomicCompareAndSwap64Barrier( + int64_t oldValue, int64_t newValue, int64_t *theValue) { + int64_t prev = OSAtomicCompareAndSwap64(oldValue, newValue, theValue); + OSMemoryBarrier(); + return prev; +} + +inline int64_t OSAtomicAdd64Barrier( + int64_t theAmount, int64_t *theValue) { + int64_t new_val = OSAtomicAdd64(theAmount, theValue); + OSMemoryBarrier(); + return new_val; +} +#endif + +typedef int64_t Atomic64; + +inline void MemoryBarrier() { + OSMemoryBarrier(); +} + +// 32-bit Versions. + +inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32 *ptr, + Atomic32 old_value, + Atomic32 new_value) { + Atomic32 prev_value; + do { + if (OSAtomicCompareAndSwap32(old_value, new_value, + const_cast<Atomic32*>(ptr))) { + return old_value; + } + prev_value = *ptr; + } while (prev_value == old_value); + return prev_value; +} + +inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32 *ptr, + Atomic32 new_value) { + Atomic32 old_value; + do { + old_value = *ptr; + } while (!OSAtomicCompareAndSwap32(old_value, new_value, + const_cast<Atomic32*>(ptr))); + return old_value; +} + +inline Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32 *ptr, + Atomic32 increment) { + return OSAtomicAdd32(increment, const_cast<Atomic32*>(ptr)); +} + +inline Atomic32 Barrier_AtomicIncrement(volatile Atomic32 *ptr, + Atomic32 increment) { + return OSAtomicAdd32Barrier(increment, const_cast<Atomic32*>(ptr)); +} + +inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32 *ptr, + Atomic32 old_value, + Atomic32 new_value) { + Atomic32 prev_value; + do { + if (OSAtomicCompareAndSwap32Barrier(old_value, new_value, + const_cast<Atomic32*>(ptr))) { + return old_value; + } + prev_value = *ptr; + } while (prev_value == old_value); + return prev_value; +} + +inline Atomic32 Release_CompareAndSwap(volatile Atomic32 *ptr, + Atomic32 old_value, + Atomic32 new_value) { + return Acquire_CompareAndSwap(ptr, old_value, new_value); +} + +inline void NoBarrier_Store(volatile Atomic32* ptr, Atomic32 value) { + *ptr = value; +} + +inline void Acquire_Store(volatile Atomic32 *ptr, Atomic32 value) { + *ptr = value; + MemoryBarrier(); +} + +inline void Release_Store(volatile Atomic32 *ptr, Atomic32 value) { + MemoryBarrier(); + *ptr = value; +} + +inline Atomic32 NoBarrier_Load(volatile const Atomic32* ptr) { + return *ptr; +} + +inline Atomic32 Acquire_Load(volatile const Atomic32 *ptr) { + Atomic32 value = *ptr; + MemoryBarrier(); + return value; +} + +inline Atomic32 Release_Load(volatile const Atomic32 *ptr) { + MemoryBarrier(); + return *ptr; +} + +// 64-bit version + +inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64 *ptr, + Atomic64 old_value, + Atomic64 new_value) { + Atomic64 prev_value; + do { + if (OSAtomicCompareAndSwap64(old_value, new_value, + const_cast<Atomic64*>(ptr))) { + return old_value; + } + prev_value = *ptr; + } while (prev_value == old_value); + return prev_value; +} + +inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64 *ptr, + Atomic64 new_value) { + Atomic64 old_value; + do { + old_value = *ptr; + } while (!OSAtomicCompareAndSwap64(old_value, new_value, + const_cast<Atomic64*>(ptr))); + return old_value; +} + +inline Atomic64 NoBarrier_AtomicIncrement(volatile Atomic64 *ptr, + Atomic64 increment) { + return OSAtomicAdd64(increment, const_cast<Atomic64*>(ptr)); +} + +inline Atomic64 Barrier_AtomicIncrement(volatile Atomic64 *ptr, + Atomic64 increment) { + return OSAtomicAdd64Barrier(increment, const_cast<Atomic64*>(ptr)); +} + +inline Atomic64 Acquire_CompareAndSwap(volatile Atomic64 *ptr, + Atomic64 old_value, + Atomic64 new_value) { + Atomic64 prev_value; + do { + if (OSAtomicCompareAndSwap64Barrier(old_value, new_value, + const_cast<Atomic64*>(ptr))) { + return old_value; + } + prev_value = *ptr; + } while (prev_value == old_value); + return prev_value; +} + +inline Atomic64 Release_CompareAndSwap(volatile Atomic64 *ptr, + Atomic64 old_value, + Atomic64 new_value) { + // The lib kern interface does not distinguish between + // Acquire and Release memory barriers; they are equivalent. + return Acquire_CompareAndSwap(ptr, old_value, new_value); +} + +#ifdef __LP64__ + +// 64-bit implementation on 64-bit platform + +inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) { + *ptr = value; +} + +inline void Acquire_Store(volatile Atomic64 *ptr, Atomic64 value) { + *ptr = value; + MemoryBarrier(); +} + +inline void Release_Store(volatile Atomic64 *ptr, Atomic64 value) { + MemoryBarrier(); + *ptr = value; +} + +inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) { + return *ptr; +} + +inline Atomic64 Acquire_Load(volatile const Atomic64 *ptr) { + Atomic64 value = *ptr; + MemoryBarrier(); + return value; +} + +inline Atomic64 Release_Load(volatile const Atomic64 *ptr) { + MemoryBarrier(); + return *ptr; +} + +#else + +// 64-bit implementation on 32-bit platform + +#if defined(__ppc__) + +inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) { + __asm__ __volatile__( + "_NoBarrier_Store_not_supported_for_32_bit_ppc\n\t"); +} + +inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) { + __asm__ __volatile__( + "_NoBarrier_Load_not_supported_for_32_bit_ppc\n\t"); + return 0; +} + +#elif defined(__i386__) + +inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) { + __asm__ __volatile__("movq %1, %%mm0\n\t" // Use mmx reg for 64-bit atomic + "movq %%mm0, %0\n\t" // moves (ptr could be read-only) + "emms\n\t" // Reset FP registers + : "=m" (*ptr) + : "m" (value) + : // mark the FP stack and mmx registers as clobbered + "st", "st(1)", "st(2)", "st(3)", "st(4)", + "st(5)", "st(6)", "st(7)", "mm0", "mm1", + "mm2", "mm3", "mm4", "mm5", "mm6", "mm7"); + +} + +inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) { + Atomic64 value; + __asm__ __volatile__("movq %1, %%mm0\n\t" // Use mmx reg for 64-bit atomic + "movq %%mm0, %0\n\t" // moves (ptr could be read-only) + "emms\n\t" // Reset FP registers + : "=m" (value) + : "m" (*ptr) + : // mark the FP stack and mmx registers as clobbered + "st", "st(1)", "st(2)", "st(3)", "st(4)", + "st(5)", "st(6)", "st(7)", "mm0", "mm1", + "mm2", "mm3", "mm4", "mm5", "mm6", "mm7"); + + return value; +} +#endif + + +inline void Acquire_Store(volatile Atomic64 *ptr, Atomic64 value) { + NoBarrier_Store(ptr, value); + MemoryBarrier(); +} + +inline void Release_Store(volatile Atomic64 *ptr, Atomic64 value) { + MemoryBarrier(); + NoBarrier_Store(ptr, value); +} + +inline Atomic64 Acquire_Load(volatile const Atomic64 *ptr) { + Atomic64 value = NoBarrier_Load(ptr); + MemoryBarrier(); + return value; +} + +inline Atomic64 Release_Load(volatile const Atomic64 *ptr) { + MemoryBarrier(); + return NoBarrier_Load(ptr); +} +#endif // __LP64__ + +} // namespace base::subtle +} // namespace base + +#endif // BASE_ATOMICOPS_INTERNALS_MACOSX_H_ diff --git a/third_party/tcmalloc/chromium/src/base/atomicops-internals-x86-msvc.h b/third_party/tcmalloc/chromium/src/base/atomicops-internals-x86-msvc.h new file mode 100644 index 0000000..d50894c --- /dev/null +++ b/third_party/tcmalloc/chromium/src/base/atomicops-internals-x86-msvc.h @@ -0,0 +1,414 @@ +/* Copyright (c) 2006, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * Author: Sanjay Ghemawat + */ + +// Implementation of atomic operations for x86. This file should not +// be included directly. Clients should instead include +// "base/atomicops.h". + +#ifndef BASE_ATOMICOPS_INTERNALS_X86_MSVC_H_ +#define BASE_ATOMICOPS_INTERNALS_X86_MSVC_H_ + +#include <stdio.h> +#include <stdlib.h> +#include "base/basictypes.h" // For COMPILE_ASSERT + +typedef int32 Atomic32; + +#if defined(_WIN64) +#define BASE_HAS_ATOMIC64 1 // Use only in tests and base/atomic* +#endif + +namespace base { +namespace subtle { + +typedef int64 Atomic64; + +// 32-bit low-level operations on any platform + +// MinGW has a bug in the header files where it doesn't indicate the +// first argument is volatile -- they're not up to date. See +// http://readlist.com/lists/lists.sourceforge.net/mingw-users/0/3861.html +// We have to const_cast away the volatile to avoid compiler warnings. +// TODO(csilvers): remove this once MinGW has updated MinGW/include/winbase.h +#ifdef __MINGW32__ +inline LONG InterlockedCompareExchange(volatile LONG* ptr, + LONG newval, LONG oldval) { + return ::InterlockedCompareExchange(const_cast<LONG*>(ptr), newval, oldval); +} +inline LONG InterlockedExchange(volatile LONG* ptr, LONG newval) { + return ::InterlockedExchange(const_cast<LONG*>(ptr), newval); +} +inline LONG InterlockedExchangeAdd(volatile LONG* ptr, LONG increment) { + return ::InterlockedExchangeAdd(const_cast<LONG*>(ptr), increment); +} +#endif // ifdef __MINGW32__ + +inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr, + Atomic32 old_value, + Atomic32 new_value) { + LONG result = InterlockedCompareExchange( + reinterpret_cast<volatile LONG*>(ptr), + static_cast<LONG>(new_value), + static_cast<LONG>(old_value)); + return static_cast<Atomic32>(result); +} + +inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr, + Atomic32 new_value) { + LONG result = InterlockedExchange( + reinterpret_cast<volatile LONG*>(ptr), + static_cast<LONG>(new_value)); + return static_cast<Atomic32>(result); +} + +inline Atomic32 Barrier_AtomicIncrement(volatile Atomic32* ptr, + Atomic32 increment) { + return InterlockedExchangeAdd( + reinterpret_cast<volatile LONG*>(ptr), + static_cast<LONG>(increment)) + increment; +} + +inline Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32* ptr, + Atomic32 increment) { + return Barrier_AtomicIncrement(ptr, increment); +} + +} // namespace base::subtle +} // namespace base + + +// In msvc8/vs2005, winnt.h already contains a definition for +// MemoryBarrier in the global namespace. Add it there for earlier +// versions and forward to it from within the namespace. +#if !(defined(_MSC_VER) && _MSC_VER >= 1400) +inline void MemoryBarrier() { + Atomic32 value = 0; + base::subtle::NoBarrier_AtomicExchange(&value, 0); + // actually acts as a barrier in thisd implementation +} +#endif + +namespace base { +namespace subtle { + +inline void MemoryBarrier() { + ::MemoryBarrier(); +} + +inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr, + Atomic32 old_value, + Atomic32 new_value) { + return NoBarrier_CompareAndSwap(ptr, old_value, new_value); +} + +inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr, + Atomic32 old_value, + Atomic32 new_value) { + return NoBarrier_CompareAndSwap(ptr, old_value, new_value); +} + +inline void NoBarrier_Store(volatile Atomic32* ptr, Atomic32 value) { + *ptr = value; +} + +inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) { + NoBarrier_AtomicExchange(ptr, value); + // acts as a barrier in this implementation +} + +inline void Release_Store(volatile Atomic32* ptr, Atomic32 value) { + *ptr = value; // works w/o barrier for current Intel chips as of June 2005 + // See comments in Atomic64 version of Release_Store() below. +} + +inline Atomic32 NoBarrier_Load(volatile const Atomic32* ptr) { + return *ptr; +} + +inline Atomic32 Acquire_Load(volatile const Atomic32* ptr) { + Atomic32 value = *ptr; + return value; +} + +inline Atomic32 Release_Load(volatile const Atomic32* ptr) { + MemoryBarrier(); + return *ptr; +} + +// 64-bit operations + +#if defined(_WIN64) || defined(__MINGW64__) + +// 64-bit low-level operations on 64-bit platform. + +COMPILE_ASSERT(sizeof(Atomic64) == sizeof(PVOID), atomic_word_is_atomic); + +// Like for the __MINGW32__ case above, this works around a header +// error in mingw, where it's missing 'volatile'. +#ifdef __MINGW64__ +inline PVOID InterlockedCompareExchangePointer(volatile PVOID* ptr, + PVOID newval, PVOID oldval) { + return ::InterlockedCompareExchangePointer(const_cast<PVOID*>(ptr), + newval, oldval); +} +inline PVOID InterlockedExchangePointer(volatile PVOID* ptr, PVOID newval) { + return ::InterlockedExchangePointer(const_cast<PVOID*>(ptr), newval); +} +inline LONGLONG InterlockedExchangeAdd64(volatile LONGLONG* ptr, + LONGLONG increment) { + return ::InterlockedExchangeAdd64(const_cast<LONGLONG*>(ptr), increment); +} +#endif // ifdef __MINGW64__ + +inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64* ptr, + Atomic64 old_value, + Atomic64 new_value) { + PVOID result = InterlockedCompareExchangePointer( + reinterpret_cast<volatile PVOID*>(ptr), + reinterpret_cast<PVOID>(new_value), reinterpret_cast<PVOID>(old_value)); + return reinterpret_cast<Atomic64>(result); +} + +inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr, + Atomic64 new_value) { + PVOID result = InterlockedExchangePointer( + reinterpret_cast<volatile PVOID*>(ptr), + reinterpret_cast<PVOID>(new_value)); + return reinterpret_cast<Atomic64>(result); +} + +inline Atomic64 Barrier_AtomicIncrement(volatile Atomic64* ptr, + Atomic64 increment) { + return InterlockedExchangeAdd64( + reinterpret_cast<volatile LONGLONG*>(ptr), + static_cast<LONGLONG>(increment)) + increment; +} + +inline Atomic64 NoBarrier_AtomicIncrement(volatile Atomic64* ptr, + Atomic64 increment) { + return Barrier_AtomicIncrement(ptr, increment); +} + +inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) { + *ptr = value; +} + +inline void Acquire_Store(volatile Atomic64* ptr, Atomic64 value) { + NoBarrier_AtomicExchange(ptr, value); + // acts as a barrier in this implementation +} + +inline void Release_Store(volatile Atomic64* ptr, Atomic64 value) { + *ptr = value; // works w/o barrier for current Intel chips as of June 2005 + + // When new chips come out, check: + // IA-32 Intel Architecture Software Developer's Manual, Volume 3: + // System Programming Guide, Chatper 7: Multiple-processor management, + // Section 7.2, Memory Ordering. + // Last seen at: + // http://developer.intel.com/design/pentium4/manuals/index_new.htm +} + +inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) { + return *ptr; +} + +inline Atomic64 Acquire_Load(volatile const Atomic64* ptr) { + Atomic64 value = *ptr; + return value; +} + +inline Atomic64 Release_Load(volatile const Atomic64* ptr) { + MemoryBarrier(); + return *ptr; +} + +#else // defined(_WIN64) || defined(__MINGW64__) + +// 64-bit low-level operations on 32-bit platform + +// TBD(vchen): The GNU assembly below must be converted to MSVC inline +// assembly. + +inline void NotImplementedFatalError(const char *function_name) { + fprintf(stderr, "64-bit %s() not implemented on this platform\n", + function_name); + abort(); +} + +inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64* ptr, + Atomic64 old_value, + Atomic64 new_value) { +#if 0 // Not implemented + Atomic64 prev; + __asm__ __volatile__("movl (%3), %%ebx\n\t" // Move 64-bit new_value into + "movl 4(%3), %%ecx\n\t" // ecx:ebx + "lock; cmpxchg8b %1\n\t" // If edx:eax (old_value) same + : "=A" (prev) // as contents of ptr: + : "m" (*ptr), // ecx:ebx => ptr + "0" (old_value), // else: + "r" (&new_value) // old *ptr => edx:eax + : "memory", "%ebx", "%ecx"); + return prev; +#else + NotImplementedFatalError("NoBarrier_CompareAndSwap"); + return 0; +#endif +} + +inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr, + Atomic64 new_value) { +#if 0 // Not implemented + __asm__ __volatile__( + "movl (%2), %%ebx\n\t" // Move 64-bit new_value into + "movl 4(%2), %%ecx\n\t" // ecx:ebx + "0:\n\t" + "movl %1, %%eax\n\t" // Read contents of ptr into + "movl 4%1, %%edx\n\t" // edx:eax + "lock; cmpxchg8b %1\n\t" // Attempt cmpxchg; if *ptr + "jnz 0b\n\t" // is no longer edx:eax, loop + : "=A" (new_value) + : "m" (*ptr), + "r" (&new_value) + : "memory", "%ebx", "%ecx"); + return new_value; // Now it's the previous value. +#else + NotImplementedFatalError("NoBarrier_AtomicExchange"); + return 0; +#endif +} + +inline Atomic64 NoBarrier_AtomicIncrement(volatile Atomic64* ptr, + Atomic64 increment) { +#if 0 // Not implemented + Atomic64 temp = increment; + __asm__ __volatile__( + "0:\n\t" + "movl (%3), %%ebx\n\t" // Move 64-bit increment into + "movl 4(%3), %%ecx\n\t" // ecx:ebx + "movl (%2), %%eax\n\t" // Read contents of ptr into + "movl 4(%2), %%edx\n\t" // edx:eax + "add %%eax, %%ebx\n\t" // sum => ecx:ebx + "adc %%edx, %%ecx\n\t" // edx:eax still has old *ptr + "lock; cmpxchg8b (%2)\n\t"// Attempt cmpxchg; if *ptr + "jnz 0b\n\t" // is no longer edx:eax, loop + : "=A"(temp), "+m"(*ptr) + : "D" (ptr), "S" (&increment) + : "memory", "%ebx", "%ecx"); + // temp now contains the previous value of *ptr + return temp + increment; +#else + NotImplementedFatalError("NoBarrier_AtomicIncrement"); + return 0; +#endif +} + +inline Atomic64 Barrier_AtomicIncrement(volatile Atomic64* ptr, + Atomic64 increment) { +#if 0 // Not implemented + Atomic64 new_val = NoBarrier_AtomicIncrement(ptr, increment); + if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) { + __asm__ __volatile__("lfence" : : : "memory"); + } + return new_val; +#else + NotImplementedFatalError("Barrier_AtomicIncrement"); + return 0; +#endif +} + +inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) { +#if 0 // Not implemented + __asm { + mov mm0, value; // Use mmx reg for 64-bit atomic moves + mov ptr, mm0; + emms; // Empty mmx state to enable FP registers + } +#else + NotImplementedFatalError("NoBarrier_Store"); +#endif +} + +inline void Acquire_Store(volatile Atomic64* ptr, Atomic64 value) { + NoBarrier_AtomicExchange(ptr, value); + // acts as a barrier in this implementation +} + +inline void Release_Store(volatile Atomic64* ptr, Atomic64 value) { + NoBarrier_Store(ptr, value); +} + +inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) { +#if 0 // Not implemented + Atomic64 value; + __asm { + mov mm0, ptr; // Use mmx reg for 64-bit atomic moves + mov value, mm0; + emms; // Empty mmx state to enable FP registers + } + return value; +#else + NotImplementedFatalError("NoBarrier_Store"); + return 0; +#endif +} + +inline Atomic64 Acquire_Load(volatile const Atomic64* ptr) { + Atomic64 value = NoBarrier_Load(ptr); + return value; +} + +inline Atomic64 Release_Load(volatile const Atomic64* ptr) { + MemoryBarrier(); + return NoBarrier_Load(ptr); +} + +#endif // defined(_WIN64) || defined(__MINGW64__) + + +inline Atomic64 Acquire_CompareAndSwap(volatile Atomic64* ptr, + Atomic64 old_value, + Atomic64 new_value) { + return NoBarrier_CompareAndSwap(ptr, old_value, new_value); +} + +inline Atomic64 Release_CompareAndSwap(volatile Atomic64* ptr, + Atomic64 old_value, + Atomic64 new_value) { + return NoBarrier_CompareAndSwap(ptr, old_value, new_value); +} + +} // namespace base::subtle +} // namespace base + +#endif // BASE_ATOMICOPS_INTERNALS_X86_MSVC_H_ diff --git a/third_party/tcmalloc/chromium/src/base/atomicops-internals-x86.cc b/third_party/tcmalloc/chromium/src/base/atomicops-internals-x86.cc new file mode 100644 index 0000000..4f75d47 --- /dev/null +++ b/third_party/tcmalloc/chromium/src/base/atomicops-internals-x86.cc @@ -0,0 +1,125 @@ +/* Copyright (c) 2007, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * This module gets enough CPU information to optimize the + * atomicops module on x86. + */ + +#include "base/atomicops.h" +#include "base/basictypes.h" +#include "base/googleinit.h" +#include "base/logging.h" +#include <string.h> + +// This file only makes sense with atomicops-internals-x86.h -- it +// depends on structs that are defined in that file. If atomicops.h +// doesn't sub-include that file, then we aren't needed, and shouldn't +// try to do anything. +#ifdef BASE_ATOMICOPS_INTERNALS_X86_H_ + +// Inline cpuid instruction. In PIC compilations, %ebx contains the address +// of the global offset table. To avoid breaking such executables, this code +// must preserve that register's value across cpuid instructions. +#if defined(__i386__) +#define cpuid(a, b, c, d, inp) \ + asm ("mov %%ebx, %%edi\n" \ + "cpuid\n" \ + "xchg %%edi, %%ebx\n" \ + : "=a" (a), "=D" (b), "=c" (c), "=d" (d) : "a" (inp)) +#elif defined (__x86_64__) +#define cpuid(a, b, c, d, inp) \ + asm ("mov %%rbx, %%rdi\n" \ + "cpuid\n" \ + "xchg %%rdi, %%rbx\n" \ + : "=a" (a), "=D" (b), "=c" (c), "=d" (d) : "a" (inp)) +#endif + +#if defined(cpuid) // initialize the struct only on x86 + +// Set the flags so that code will run correctly and conservatively +// until InitGoogle() is called. +struct AtomicOps_x86CPUFeatureStruct AtomicOps_Internalx86CPUFeatures = { + false, // bug can't exist before process spawns multiple threads + false, // no SSE2 + false, // no cmpxchg16b +}; + +// Initialize the AtomicOps_Internalx86CPUFeatures struct. +static void AtomicOps_Internalx86CPUFeaturesInit() { + uint32 eax; + uint32 ebx; + uint32 ecx; + uint32 edx; + + // Get vendor string (issue CPUID with eax = 0) + cpuid(eax, ebx, ecx, edx, 0); + char vendor[13]; + memcpy(vendor, &ebx, 4); + memcpy(vendor + 4, &edx, 4); + memcpy(vendor + 8, &ecx, 4); + vendor[12] = 0; + + // get feature flags in ecx/edx, and family/model in eax + cpuid(eax, ebx, ecx, edx, 1); + + int family = (eax >> 8) & 0xf; // family and model fields + int model = (eax >> 4) & 0xf; + if (family == 0xf) { // use extended family and model fields + family += (eax >> 20) & 0xff; + model += ((eax >> 16) & 0xf) << 4; + } + + // Opteron Rev E has a bug in which on very rare occasions a locked + // instruction doesn't act as a read-acquire barrier if followed by a + // non-locked read-modify-write instruction. Rev F has this bug in + // pre-release versions, but not in versions released to customers, + // so we test only for Rev E, which is family 15, model 32..63 inclusive. + if (strcmp(vendor, "AuthenticAMD") == 0 && // AMD + family == 15 && + 32 <= model && model <= 63) { + AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug = true; + } else { + AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug = false; + } + + // edx bit 26 is SSE2 which we use to tell use whether we can use mfence + AtomicOps_Internalx86CPUFeatures.has_sse2 = ((edx >> 26) & 1); + + // ecx bit 13 indicates whether the cmpxchg16b instruction is supported + AtomicOps_Internalx86CPUFeatures.has_cmpxchg16b = ((ecx >> 13) & 1); +} + +REGISTER_MODULE_INITIALIZER(atomicops_x86, { + AtomicOps_Internalx86CPUFeaturesInit(); +}); + +#endif + +#endif /* ifdef BASE_ATOMICOPS_INTERNALS_X86_H_ */ diff --git a/third_party/tcmalloc/chromium/src/base/atomicops-internals-x86.h b/third_party/tcmalloc/chromium/src/base/atomicops-internals-x86.h new file mode 100644 index 0000000..c34aa5c --- /dev/null +++ b/third_party/tcmalloc/chromium/src/base/atomicops-internals-x86.h @@ -0,0 +1,428 @@ +/* Copyright (c) 2006, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * Author: Sanjay Ghemawat + */ + +// Implementation of atomic operations for x86. This file should not +// be included directly. Clients should instead include +// "base/atomicops.h". + +#ifndef BASE_ATOMICOPS_INTERNALS_X86_H_ +#define BASE_ATOMICOPS_INTERNALS_X86_H_ + +typedef int32_t Atomic32; +#define BASE_HAS_ATOMIC64 1 // Use only in tests and base/atomic* + + +// NOTE(vchen): x86 does not need to define AtomicWordCastType, because it +// already matches Atomic32 or Atomic64, depending on the platform. + + +// This struct is not part of the public API of this module; clients may not +// use it. +// Features of this x86. Values may not be correct before main() is run, +// but are set conservatively. +struct AtomicOps_x86CPUFeatureStruct { + bool has_amd_lock_mb_bug; // Processor has AMD memory-barrier bug; do lfence + // after acquire compare-and-swap. + bool has_sse2; // Processor has SSE2. + bool has_cmpxchg16b; // Processor supports cmpxchg16b instruction. +}; +extern struct AtomicOps_x86CPUFeatureStruct AtomicOps_Internalx86CPUFeatures; + + +#define ATOMICOPS_COMPILER_BARRIER() __asm__ __volatile__("" : : : "memory") + + +namespace base { +namespace subtle { + +typedef int64_t Atomic64; + +// 32-bit low-level operations on any platform. + +inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr, + Atomic32 old_value, + Atomic32 new_value) { + Atomic32 prev; + __asm__ __volatile__("lock; cmpxchgl %1,%2" + : "=a" (prev) + : "q" (new_value), "m" (*ptr), "0" (old_value) + : "memory"); + return prev; +} + +inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr, + Atomic32 new_value) { + __asm__ __volatile__("xchgl %1,%0" // The lock prefix is implicit for xchg. + : "=r" (new_value) + : "m" (*ptr), "0" (new_value) + : "memory"); + return new_value; // Now it's the previous value. +} + +inline Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32* ptr, + Atomic32 increment) { + Atomic32 temp = increment; + __asm__ __volatile__("lock; xaddl %0,%1" + : "+r" (temp), "+m" (*ptr) + : : "memory"); + // temp now holds the old value of *ptr + return temp + increment; +} + +inline Atomic32 Barrier_AtomicIncrement(volatile Atomic32* ptr, + Atomic32 increment) { + Atomic32 temp = increment; + __asm__ __volatile__("lock; xaddl %0,%1" + : "+r" (temp), "+m" (*ptr) + : : "memory"); + // temp now holds the old value of *ptr + if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) { + __asm__ __volatile__("lfence" : : : "memory"); + } + return temp + increment; +} + +inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr, + Atomic32 old_value, + Atomic32 new_value) { + Atomic32 x = NoBarrier_CompareAndSwap(ptr, old_value, new_value); + if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) { + __asm__ __volatile__("lfence" : : : "memory"); + } + return x; +} + +inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr, + Atomic32 old_value, + Atomic32 new_value) { + return NoBarrier_CompareAndSwap(ptr, old_value, new_value); +} + +inline void NoBarrier_Store(volatile Atomic32* ptr, Atomic32 value) { + *ptr = value; +} + +#if defined(__x86_64__) + +// 64-bit implementations of memory barrier can be simpler, because it +// "mfence" is guaranteed to exist. +inline void MemoryBarrier() { + __asm__ __volatile__("mfence" : : : "memory"); +} + +inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) { + *ptr = value; + MemoryBarrier(); +} + +#else + +inline void MemoryBarrier() { + if (AtomicOps_Internalx86CPUFeatures.has_sse2) { + __asm__ __volatile__("mfence" : : : "memory"); + } else { // mfence is faster but not present on PIII + Atomic32 x = 0; + NoBarrier_AtomicExchange(&x, 0); // acts as a barrier on PIII + } +} + +inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) { + if (AtomicOps_Internalx86CPUFeatures.has_sse2) { + *ptr = value; + __asm__ __volatile__("mfence" : : : "memory"); + } else { + NoBarrier_AtomicExchange(ptr, value); + // acts as a barrier on PIII + } +} +#endif + +inline void Release_Store(volatile Atomic32* ptr, Atomic32 value) { + ATOMICOPS_COMPILER_BARRIER(); + *ptr = value; // An x86 store acts as a release barrier. + // See comments in Atomic64 version of Release_Store(), below. +} + +inline Atomic32 NoBarrier_Load(volatile const Atomic32* ptr) { + return *ptr; +} + +inline Atomic32 Acquire_Load(volatile const Atomic32* ptr) { + Atomic32 value = *ptr; // An x86 load acts as a acquire barrier. + // See comments in Atomic64 version of Release_Store(), below. + ATOMICOPS_COMPILER_BARRIER(); + return value; +} + +inline Atomic32 Release_Load(volatile const Atomic32* ptr) { + MemoryBarrier(); + return *ptr; +} + +#if defined(__x86_64__) + +// 64-bit low-level operations on 64-bit platform. + +inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64* ptr, + Atomic64 old_value, + Atomic64 new_value) { + Atomic64 prev; + __asm__ __volatile__("lock; cmpxchgq %1,%2" + : "=a" (prev) + : "q" (new_value), "m" (*ptr), "0" (old_value) + : "memory"); + return prev; +} + +inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr, + Atomic64 new_value) { + __asm__ __volatile__("xchgq %1,%0" // The lock prefix is implicit for xchg. + : "=r" (new_value) + : "m" (*ptr), "0" (new_value) + : "memory"); + return new_value; // Now it's the previous value. +} + +inline Atomic64 NoBarrier_AtomicIncrement(volatile Atomic64* ptr, + Atomic64 increment) { + Atomic64 temp = increment; + __asm__ __volatile__("lock; xaddq %0,%1" + : "+r" (temp), "+m" (*ptr) + : : "memory"); + // temp now contains the previous value of *ptr + return temp + increment; +} + +inline Atomic64 Barrier_AtomicIncrement(volatile Atomic64* ptr, + Atomic64 increment) { + Atomic64 temp = increment; + __asm__ __volatile__("lock; xaddq %0,%1" + : "+r" (temp), "+m" (*ptr) + : : "memory"); + // temp now contains the previous value of *ptr + if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) { + __asm__ __volatile__("lfence" : : : "memory"); + } + return temp + increment; +} + +inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) { + *ptr = value; +} + +inline void Acquire_Store(volatile Atomic64* ptr, Atomic64 value) { + *ptr = value; + MemoryBarrier(); +} + +inline void Release_Store(volatile Atomic64* ptr, Atomic64 value) { + ATOMICOPS_COMPILER_BARRIER(); + + *ptr = value; // An x86 store acts as a release barrier + // for current AMD/Intel chips as of Jan 2008. + // See also Acquire_Load(), below. + + // When new chips come out, check: + // IA-32 Intel Architecture Software Developer's Manual, Volume 3: + // System Programming Guide, Chatper 7: Multiple-processor management, + // Section 7.2, Memory Ordering. + // Last seen at: + // http://developer.intel.com/design/pentium4/manuals/index_new.htm + // + // x86 stores/loads fail to act as barriers for a few instructions (clflush + // maskmovdqu maskmovq movntdq movnti movntpd movntps movntq) but these are + // not generated by the compiler, and are rare. Users of these instructions + // need to know about cache behaviour in any case since all of these involve + // either flushing cache lines or non-temporal cache hints. +} + +inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) { + return *ptr; +} + +inline Atomic64 Acquire_Load(volatile const Atomic64* ptr) { + Atomic64 value = *ptr; // An x86 load acts as a acquire barrier, + // for current AMD/Intel chips as of Jan 2008. + // See also Release_Store(), above. + ATOMICOPS_COMPILER_BARRIER(); + return value; +} + +inline Atomic64 Release_Load(volatile const Atomic64* ptr) { + MemoryBarrier(); + return *ptr; +} + +#else // defined(__x86_64__) + +// 64-bit low-level operations on 32-bit platform. + +#if !((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1)) +// For compilers older than gcc 4.1, we use inline asm. +// +// Potential pitfalls: +// +// 1. %ebx points to Global offset table (GOT) with -fPIC. +// We need to preserve this register. +// 2. When explicit registers are used in inline asm, the +// compiler may not be aware of it and might try to reuse +// the same register for another argument which has constraints +// that allow it ("r" for example). + +inline Atomic64 __sync_val_compare_and_swap(volatile Atomic64* ptr, + Atomic64 old_value, + Atomic64 new_value) { + Atomic64 prev; + __asm__ __volatile__("push %%ebx\n\t" + "movl (%3), %%ebx\n\t" // Move 64-bit new_value into + "movl 4(%3), %%ecx\n\t" // ecx:ebx + "lock; cmpxchg8b (%1)\n\t"// If edx:eax (old_value) same + "pop %%ebx\n\t" + : "=A" (prev) // as contents of ptr: + : "D" (ptr), // ecx:ebx => ptr + "0" (old_value), // else: + "S" (&new_value) // old *ptr => edx:eax + : "memory", "%ecx"); + return prev; +} +#endif // Compiler < gcc-4.1 + +inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64* ptr, + Atomic64 old_val, + Atomic64 new_val) { + return __sync_val_compare_and_swap(ptr, old_val, new_val); +} + +inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr, + Atomic64 new_val) { + Atomic64 old_val; + + do { + old_val = *ptr; + } while (__sync_val_compare_and_swap(ptr, old_val, new_val) != old_val); + + return old_val; +} + +inline Atomic64 NoBarrier_AtomicIncrement(volatile Atomic64* ptr, + Atomic64 increment) { + Atomic64 old_val, new_val; + + do { + old_val = *ptr; + new_val = old_val + increment; + } while (__sync_val_compare_and_swap(ptr, old_val, new_val) != old_val); + + return old_val + increment; +} + +inline Atomic64 Barrier_AtomicIncrement(volatile Atomic64* ptr, + Atomic64 increment) { + Atomic64 new_val = NoBarrier_AtomicIncrement(ptr, increment); + if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) { + __asm__ __volatile__("lfence" : : : "memory"); + } + return new_val; +} + +inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) { + __asm__ __volatile__("movq %1, %%mm0\n\t" // Use mmx reg for 64-bit atomic + "movq %%mm0, %0\n\t" // moves (ptr could be read-only) + "emms\n\t" // Empty mmx state/Reset FP regs + : "=m" (*ptr) + : "m" (value) + : // mark the FP stack and mmx registers as clobbered + "st", "st(1)", "st(2)", "st(3)", "st(4)", + "st(5)", "st(6)", "st(7)", "mm0", "mm1", + "mm2", "mm3", "mm4", "mm5", "mm6", "mm7"); +} + +inline void Acquire_Store(volatile Atomic64* ptr, Atomic64 value) { + NoBarrier_Store(ptr, value); + MemoryBarrier(); +} + +inline void Release_Store(volatile Atomic64* ptr, Atomic64 value) { + ATOMICOPS_COMPILER_BARRIER(); + NoBarrier_Store(ptr, value); +} + +inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) { + Atomic64 value; + __asm__ __volatile__("movq %1, %%mm0\n\t" // Use mmx reg for 64-bit atomic + "movq %%mm0, %0\n\t" // moves (ptr could be read-only) + "emms\n\t" // Empty mmx state/Reset FP regs + : "=m" (value) + : "m" (*ptr) + : // mark the FP stack and mmx registers as clobbered + "st", "st(1)", "st(2)", "st(3)", "st(4)", + "st(5)", "st(6)", "st(7)", "mm0", "mm1", + "mm2", "mm3", "mm4", "mm5", "mm6", "mm7"); + return value; +} + +inline Atomic64 Acquire_Load(volatile const Atomic64* ptr) { + Atomic64 value = NoBarrier_Load(ptr); + ATOMICOPS_COMPILER_BARRIER(); + return value; +} + +inline Atomic64 Release_Load(volatile const Atomic64* ptr) { + MemoryBarrier(); + return NoBarrier_Load(ptr); +} + +#endif // defined(__x86_64__) + +inline Atomic64 Acquire_CompareAndSwap(volatile Atomic64* ptr, + Atomic64 old_value, + Atomic64 new_value) { + Atomic64 x = NoBarrier_CompareAndSwap(ptr, old_value, new_value); + if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) { + __asm__ __volatile__("lfence" : : : "memory"); + } + return x; +} + +inline Atomic64 Release_CompareAndSwap(volatile Atomic64* ptr, + Atomic64 old_value, + Atomic64 new_value) { + return NoBarrier_CompareAndSwap(ptr, old_value, new_value); +} + +} // namespace base::subtle +} // namespace base + +#undef ATOMICOPS_COMPILER_BARRIER + +#endif // BASE_ATOMICOPS_INTERNALS_X86_H_ diff --git a/third_party/tcmalloc/chromium/src/base/atomicops.h b/third_party/tcmalloc/chromium/src/base/atomicops.h new file mode 100644 index 0000000..0f3d3ef --- /dev/null +++ b/third_party/tcmalloc/chromium/src/base/atomicops.h @@ -0,0 +1,387 @@ +/* Copyright (c) 2006, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * Author: Sanjay Ghemawat + */ + +// For atomic operations on statistics counters, see atomic_stats_counter.h. +// For atomic operations on sequence numbers, see atomic_sequence_num.h. +// For atomic operations on reference counts, see atomic_refcount.h. + +// Some fast atomic operations -- typically with machine-dependent +// implementations. This file may need editing as Google code is +// ported to different architectures. + +// The routines exported by this module are subtle. If you use them, even if +// you get the code right, it will depend on careful reasoning about atomicity +// and memory ordering; it will be less readable, and harder to maintain. If +// you plan to use these routines, you should have a good reason, such as solid +// evidence that performance would otherwise suffer, or there being no +// alternative. You should assume only properties explicitly guaranteed by the +// specifications in this file. You are almost certainly _not_ writing code +// just for the x86; if you assume x86 semantics, x86 hardware bugs and +// implementations on other archtectures will cause your code to break. If you +// do not know what you are doing, avoid these routines, and use a Mutex. +// +// It is incorrect to make direct assignments to/from an atomic variable. +// You should use one of the Load or Store routines. The NoBarrier +// versions are provided when no barriers are needed: +// NoBarrier_Store() +// NoBarrier_Load() +// Although there are currently no compiler enforcement, you are encouraged +// to use these. Moreover, if you choose to use base::subtle::Atomic64 type, +// you MUST use one of the Load or Store routines to get correct behavior +// on 32-bit platforms. +// +// The intent is eventually to put all of these routines in namespace +// base::subtle + +#ifndef THREAD_ATOMICOPS_H_ +#define THREAD_ATOMICOPS_H_ + +#include <config.h> +#ifdef HAVE_STDINT_H +#include <stdint.h> +#endif + +// ------------------------------------------------------------------------ +// Include the platform specific implementations of the types +// and operations listed below. Implementations are to provide Atomic32 +// and Atomic64 operations. If there is a mismatch between intptr_t and +// the Atomic32 or Atomic64 types for a platform, the platform-specific header +// should define the macro, AtomicWordCastType in a clause similar to the +// following: +// #if ...pointers are 64 bits... +// # define AtomicWordCastType base::subtle::Atomic64 +// #else +// # define AtomicWordCastType Atomic32 +// #endif +// TODO(csilvers): figure out ARCH_PIII/ARCH_K8 (perhaps via ./configure?) +// ------------------------------------------------------------------------ + +// TODO(csilvers): match piii, not just __i386. Also, match k8 +#if defined(__MACH__) && defined(__APPLE__) +#include "base/atomicops-internals-macosx.h" +#elif defined(_MSC_VER) && defined(_M_IX86) +#include "base/atomicops-internals-x86-msvc.h" +#elif defined(__MINGW32__) && defined(__i386__) +#include "base/atomicops-internals-x86-msvc.h" +#elif defined(__GNUC__) && (defined(__i386) || defined(ARCH_K8)) +#include "base/atomicops-internals-x86.h" +#elif defined(__linux__) && defined(__PPC__) +#include "base/atomicops-internals-linuxppc.h" +#else +// Assume x86 for now. If you need to support a new architecture and +// don't know how to implement atomic ops, you can probably get away +// with using pthreads, since atomicops is only used by spinlock.h/cc +//#error You need to implement atomic operations for this architecture +#include "base/atomicops-internals-x86.h" +#endif + +// Signed type that can hold a pointer and supports the atomic ops below, as +// well as atomic loads and stores. Instances must be naturally-aligned. +typedef intptr_t AtomicWord; + +#ifdef AtomicWordCastType +// ------------------------------------------------------------------------ +// This section is needed only when explicit type casting is required to +// cast AtomicWord to one of the basic atomic types (Atomic64 or Atomic32). +// It also serves to document the AtomicWord interface. +// ------------------------------------------------------------------------ + +namespace base { +namespace subtle { + +// Atomically execute: +// result = *ptr; +// if (*ptr == old_value) +// *ptr = new_value; +// return result; +// +// I.e., replace "*ptr" with "new_value" if "*ptr" used to be "old_value". +// Always return the old value of "*ptr" +// +// This routine implies no memory barriers. +inline AtomicWord NoBarrier_CompareAndSwap(volatile AtomicWord* ptr, + AtomicWord old_value, + AtomicWord new_value) { + return NoBarrier_CompareAndSwap( + reinterpret_cast<volatile AtomicWordCastType*>(ptr), + old_value, new_value); +} + +// Atomically store new_value into *ptr, returning the previous value held in +// *ptr. This routine implies no memory barriers. +inline AtomicWord NoBarrier_AtomicExchange(volatile AtomicWord* ptr, + AtomicWord new_value) { + return NoBarrier_AtomicExchange( + reinterpret_cast<volatile AtomicWordCastType*>(ptr), new_value); +} + +// Atomically increment *ptr by "increment". Returns the new value of +// *ptr with the increment applied. This routine implies no memory +// barriers. +inline AtomicWord NoBarrier_AtomicIncrement(volatile AtomicWord* ptr, + AtomicWord increment) { + return NoBarrier_AtomicIncrement( + reinterpret_cast<volatile AtomicWordCastType*>(ptr), increment); +} + +inline AtomicWord Barrier_AtomicIncrement(volatile AtomicWord* ptr, + AtomicWord increment) { + return Barrier_AtomicIncrement( + reinterpret_cast<volatile AtomicWordCastType*>(ptr), increment); +} + +// ------------------------------------------------------------------------ +// These following lower-level operations are typically useful only to people +// implementing higher-level synchronization operations like spinlocks, +// mutexes, and condition-variables. They combine CompareAndSwap(), a load, or +// a store with appropriate memory-ordering instructions. "Acquire" operations +// ensure that no later memory access can be reordered ahead of the operation. +// "Release" operations ensure that no previous memory access can be reordered +// after the operation. "Barrier" operations have both "Acquire" and "Release" +// semantics. A MemoryBarrier() has "Barrier" semantics, but does no memory +// access. +// ------------------------------------------------------------------------ +inline AtomicWord Acquire_CompareAndSwap(volatile AtomicWord* ptr, + AtomicWord old_value, + AtomicWord new_value) { + return base::subtle::Acquire_CompareAndSwap( + reinterpret_cast<volatile AtomicWordCastType*>(ptr), + old_value, new_value); +} + +inline AtomicWord Release_CompareAndSwap(volatile AtomicWord* ptr, + AtomicWord old_value, + AtomicWord new_value) { + return base::subtle::Release_CompareAndSwap( + reinterpret_cast<volatile AtomicWordCastType*>(ptr), + old_value, new_value); +} + +inline void NoBarrier_Store(volatile AtomicWord *ptr, AtomicWord value) { + NoBarrier_Store( + reinterpret_cast<volatile AtomicWordCastType*>(ptr), value); +} + +inline void Acquire_Store(volatile AtomicWord* ptr, AtomicWord value) { + return base::subtle::Acquire_Store( + reinterpret_cast<volatile AtomicWordCastType*>(ptr), value); +} + +inline void Release_Store(volatile AtomicWord* ptr, AtomicWord value) { + return base::subtle::Release_Store( + reinterpret_cast<volatile AtomicWordCastType*>(ptr), value); +} + +inline AtomicWord NoBarrier_Load(volatile const AtomicWord *ptr) { + return NoBarrier_Load( + reinterpret_cast<volatile const AtomicWordCastType*>(ptr)); +} + +inline AtomicWord Acquire_Load(volatile const AtomicWord* ptr) { + return base::subtle::Acquire_Load( + reinterpret_cast<volatile const AtomicWordCastType*>(ptr)); +} + +inline AtomicWord Release_Load(volatile const AtomicWord* ptr) { + return base::subtle::Release_Load( + reinterpret_cast<volatile const AtomicWordCastType*>(ptr)); +} + +} // namespace base::subtle +} // namespace base +#endif // AtomicWordCastType + +// ------------------------------------------------------------------------ +// Commented out type definitions and method declarations for documentation +// of the interface provided by this module. +// ------------------------------------------------------------------------ + +#if 0 + +// Signed 32-bit type that supports the atomic ops below, as well as atomic +// loads and stores. Instances must be naturally aligned. This type differs +// from AtomicWord in 64-bit binaries where AtomicWord is 64-bits. +typedef int32_t Atomic32; + +// Corresponding operations on Atomic32 +namespace base { +namespace subtle { + +// Signed 64-bit type that supports the atomic ops below, as well as atomic +// loads and stores. Instances must be naturally aligned. This type differs +// from AtomicWord in 32-bit binaries where AtomicWord is 32-bits. +typedef int64_t Atomic64; + +Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr, + Atomic32 old_value, + Atomic32 new_value); +Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr, Atomic32 new_value); +Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32* ptr, Atomic32 increment); +Atomic32 Barrier_AtomicIncrement(volatile Atomic32* ptr, + Atomic32 increment); +Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr, + Atomic32 old_value, + Atomic32 new_value); +Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr, + Atomic32 old_value, + Atomic32 new_value); +void NoBarrier_Store(volatile Atomic32* ptr, Atomic32 value); +void Acquire_Store(volatile Atomic32* ptr, Atomic32 value); +void Release_Store(volatile Atomic32* ptr, Atomic32 value); +Atomic32 NoBarrier_Load(volatile const Atomic32* ptr); +Atomic32 Acquire_Load(volatile const Atomic32* ptr); +Atomic32 Release_Load(volatile const Atomic32* ptr); + +// Corresponding operations on Atomic64 +Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64* ptr, + Atomic64 old_value, + Atomic64 new_value); +Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr, Atomic64 new_value); +Atomic64 NoBarrier_AtomicIncrement(volatile Atomic64* ptr, Atomic64 increment); +Atomic64 Barrier_AtomicIncrement(volatile Atomic64* ptr, Atomic64 increment); + +Atomic64 Acquire_CompareAndSwap(volatile Atomic64* ptr, + Atomic64 old_value, + Atomic64 new_value); +Atomic64 Release_CompareAndSwap(volatile Atomic64* ptr, + Atomic64 old_value, + Atomic64 new_value); +void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value); +void Acquire_Store(volatile Atomic64* ptr, Atomic64 value); +void Release_Store(volatile Atomic64* ptr, Atomic64 value); +Atomic64 NoBarrier_Load(volatile const Atomic64* ptr); +Atomic64 Acquire_Load(volatile const Atomic64* ptr); +Atomic64 Release_Load(volatile const Atomic64* ptr); +} // namespace base::subtle +} // namespace base + +void MemoryBarrier(); + +#endif // 0 + + +// ------------------------------------------------------------------------ +// The following are to be deprecated when all uses have been changed to +// use the base::subtle namespace. +// ------------------------------------------------------------------------ + +#ifdef AtomicWordCastType +// AtomicWord versions to be deprecated +inline AtomicWord Acquire_CompareAndSwap(volatile AtomicWord* ptr, + AtomicWord old_value, + AtomicWord new_value) { + return base::subtle::Acquire_CompareAndSwap(ptr, old_value, new_value); +} + +inline AtomicWord Release_CompareAndSwap(volatile AtomicWord* ptr, + AtomicWord old_value, + AtomicWord new_value) { + return base::subtle::Release_CompareAndSwap(ptr, old_value, new_value); +} + +inline void Acquire_Store(volatile AtomicWord* ptr, AtomicWord value) { + return base::subtle::Acquire_Store(ptr, value); +} + +inline void Release_Store(volatile AtomicWord* ptr, AtomicWord value) { + return base::subtle::Release_Store(ptr, value); +} + +inline AtomicWord Acquire_Load(volatile const AtomicWord* ptr) { + return base::subtle::Acquire_Load(ptr); +} + +inline AtomicWord Release_Load(volatile const AtomicWord* ptr) { + return base::subtle::Release_Load(ptr); +} +#endif // AtomicWordCastType + +// 32-bit Acquire/Release operations to be deprecated. + +inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr, + Atomic32 old_value, + Atomic32 new_value) { + return base::subtle::Acquire_CompareAndSwap(ptr, old_value, new_value); +} +inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr, + Atomic32 old_value, + Atomic32 new_value) { + return base::subtle::Release_CompareAndSwap(ptr, old_value, new_value); +} +inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) { + base::subtle::Acquire_Store(ptr, value); +} +inline void Release_Store(volatile Atomic32* ptr, Atomic32 value) { + return base::subtle::Release_Store(ptr, value); +} +inline Atomic32 Acquire_Load(volatile const Atomic32* ptr) { + return base::subtle::Acquire_Load(ptr); +} +inline Atomic32 Release_Load(volatile const Atomic32* ptr) { + return base::subtle::Release_Load(ptr); +} + +#ifdef BASE_HAS_ATOMIC64 + +// 64-bit Acquire/Release operations to be deprecated. + +inline base::subtle::Atomic64 Acquire_CompareAndSwap( + volatile base::subtle::Atomic64* ptr, + base::subtle::Atomic64 old_value, base::subtle::Atomic64 new_value) { + return base::subtle::Acquire_CompareAndSwap(ptr, old_value, new_value); +} +inline base::subtle::Atomic64 Release_CompareAndSwap( + volatile base::subtle::Atomic64* ptr, + base::subtle::Atomic64 old_value, base::subtle::Atomic64 new_value) { + return base::subtle::Release_CompareAndSwap(ptr, old_value, new_value); +} +inline void Acquire_Store( + volatile base::subtle::Atomic64* ptr, base::subtle::Atomic64 value) { + base::subtle::Acquire_Store(ptr, value); +} +inline void Release_Store( + volatile base::subtle::Atomic64* ptr, base::subtle::Atomic64 value) { + return base::subtle::Release_Store(ptr, value); +} +inline base::subtle::Atomic64 Acquire_Load( + volatile const base::subtle::Atomic64* ptr) { + return base::subtle::Acquire_Load(ptr); +} +inline base::subtle::Atomic64 Release_Load( + volatile const base::subtle::Atomic64* ptr) { + return base::subtle::Release_Load(ptr); +} + +#endif // BASE_HAS_ATOMIC64 + +#endif // THREAD_ATOMICOPS_H_ diff --git a/third_party/tcmalloc/chromium/src/base/basictypes.h b/third_party/tcmalloc/chromium/src/base/basictypes.h new file mode 100644 index 0000000..9991413 --- /dev/null +++ b/third_party/tcmalloc/chromium/src/base/basictypes.h @@ -0,0 +1,327 @@ +// Copyright (c) 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef _BASICTYPES_H_ +#define _BASICTYPES_H_ + +#include <config.h> +#ifdef HAVE_INTTYPES_H +#include <inttypes.h> // gets us PRId64, etc +#endif + +// To use this in an autoconf setting, make sure you run the following +// autoconf macros: +// AC_HEADER_STDC /* for stdint_h and inttypes_h */ +// AC_CHECK_TYPES([__int64]) /* defined in some windows platforms */ + +#ifdef HAVE_INTTYPES_H +#include <inttypes.h> // uint16_t might be here; PRId64 too. +#endif +#ifdef HAVE_STDINT_H +#include <stdint.h> // to get uint16_t (ISO naming madness) +#endif +#include <sys/types.h> // our last best hope for uint16_t + +// Standard typedefs +// All Google code is compiled with -funsigned-char to make "char" +// unsigned. Google code therefore doesn't need a "uchar" type. +// TODO(csilvers): how do we make sure unsigned-char works on non-gcc systems? +typedef signed char schar; +typedef int8_t int8; +typedef int16_t int16; +typedef int32_t int32; +typedef int64_t int64; + +// NOTE: unsigned types are DANGEROUS in loops and other arithmetical +// places. Use the signed types unless your variable represents a bit +// pattern (eg a hash value) or you really need the extra bit. Do NOT +// use 'unsigned' to express "this value should always be positive"; +// use assertions for this. + +typedef uint8_t uint8; +typedef uint16_t uint16; +typedef uint32_t uint32; +typedef uint64_t uint64; + +const uint16 kuint16max = ( (uint16) 0xFFFF); +const uint32 kuint32max = ( (uint32) 0xFFFFFFFF); +const uint64 kuint64max = ( (((uint64) kuint32max) << 32) | kuint32max ); + +const int8 kint8max = ( ( int8) 0x7F); +const int16 kint16max = ( ( int16) 0x7FFF); +const int32 kint32max = ( ( int32) 0x7FFFFFFF); +const int64 kint64max = ( ((( int64) kint32max) << 32) | kuint32max ); + +const int8 kint8min = ( ( int8) 0x80); +const int16 kint16min = ( ( int16) 0x8000); +const int32 kint32min = ( ( int32) 0x80000000); +const int64 kint64min = ( ((( int64) kint32min) << 32) | 0 ); + +// Define the "portable" printf and scanf macros, if they're not +// already there (via the inttypes.h we #included above, hopefully). +// Mostly it's old systems that don't support inttypes.h, so we assume +// they're 32 bit. +#ifndef PRIx64 +#define PRIx64 "llx" +#endif +#ifndef SCNx64 +#define SCNx64 "llx" +#endif +#ifndef PRId64 +#define PRId64 "lld" +#endif +#ifndef SCNd64 +#define SCNd64 "lld" +#endif +#ifndef PRIu64 +#define PRIu64 "llu" +#endif +#ifndef PRIxPTR +#define PRIxPTR "lx" +#endif + +// Also allow for printing of a pthread_t. +#define GPRIuPTHREAD "lu" +#define GPRIxPTHREAD "lx" +#if defined(__CYGWIN__) || defined(__CYGWIN32__) || defined(__APPLE__) +#define PRINTABLE_PTHREAD(pthreadt) reinterpret_cast<uintptr_t>(pthreadt) +#else +#define PRINTABLE_PTHREAD(pthreadt) pthreadt +#endif + +// A macro to disallow the evil copy constructor and operator= functions +// This should be used in the private: declarations for a class +#define DISALLOW_EVIL_CONSTRUCTORS(TypeName) \ + TypeName(const TypeName&); \ + void operator=(const TypeName&) + +// An alternate name that leaves out the moral judgment... :-) +#define DISALLOW_COPY_AND_ASSIGN(TypeName) DISALLOW_EVIL_CONSTRUCTORS(TypeName) + +// The COMPILE_ASSERT macro can be used to verify that a compile time +// expression is true. For example, you could use it to verify the +// size of a static array: +// +// COMPILE_ASSERT(sizeof(num_content_type_names) == sizeof(int), +// content_type_names_incorrect_size); +// +// or to make sure a struct is smaller than a certain size: +// +// COMPILE_ASSERT(sizeof(foo) < 128, foo_too_large); +// +// The second argument to the macro is the name of the variable. If +// the expression is false, most compilers will issue a warning/error +// containing the name of the variable. +// +// Implementation details of COMPILE_ASSERT: +// +// - COMPILE_ASSERT works by defining an array type that has -1 +// elements (and thus is invalid) when the expression is false. +// +// - The simpler definition +// +// #define COMPILE_ASSERT(expr, msg) typedef char msg[(expr) ? 1 : -1] +// +// does not work, as gcc supports variable-length arrays whose sizes +// are determined at run-time (this is gcc's extension and not part +// of the C++ standard). As a result, gcc fails to reject the +// following code with the simple definition: +// +// int foo; +// COMPILE_ASSERT(foo, msg); // not supposed to compile as foo is +// // not a compile-time constant. +// +// - By using the type CompileAssert<(bool(expr))>, we ensures that +// expr is a compile-time constant. (Template arguments must be +// determined at compile-time.) +// +// - The outter parentheses in CompileAssert<(bool(expr))> are necessary +// to work around a bug in gcc 3.4.4 and 4.0.1. If we had written +// +// CompileAssert<bool(expr)> +// +// instead, these compilers will refuse to compile +// +// COMPILE_ASSERT(5 > 0, some_message); +// +// (They seem to think the ">" in "5 > 0" marks the end of the +// template argument list.) +// +// - The array size is (bool(expr) ? 1 : -1), instead of simply +// +// ((expr) ? 1 : -1). +// +// This is to avoid running into a bug in MS VC 7.1, which +// causes ((0.0) ? 1 : -1) to incorrectly evaluate to 1. + +template <bool> +struct CompileAssert { +}; + +#define COMPILE_ASSERT(expr, msg) \ + typedef CompileAssert<(bool(expr))> msg[bool(expr) ? 1 : -1] + +#define arraysize(a) (sizeof(a) / sizeof(*(a))) + +#define OFFSETOF_MEMBER(strct, field) \ + (reinterpret_cast<char*>(&reinterpret_cast<strct*>(16)->field) - \ + reinterpret_cast<char*>(16)) + +#ifdef HAVE___ATTRIBUTE__ +# define ATTRIBUTE_WEAK __attribute__((weak)) +# define ATTRIBUTE_NOINLINE __attribute__((noinline)) +#else +# define ATTRIBUTE_WEAK +# define ATTRIBUTE_NOINLINE +#endif + +// Section attributes are supported for both ELF and Mach-O, but in +// very different ways. Here's the API we provide: +// 1) ATTRIBUTE_SECTION: put this with the declaration of all functions +// you want to be in the same linker section +// 2) DEFINE_ATTRIBUTE_SECTION_VARS: must be called once per unique +// name. You want to make sure this is executed before any +// DECLARE_ATTRIBUTE_SECTION_VARS; the easiest way is to put them +// in the same .cc file. Put this call at the global level. +// 3) INIT_ATTRIBUTE_SECTION_VARS: you can scatter calls to this in +// multiple places to help ensure execution before any +// DECLARE_ATTRIBUTE_SECTION_VARS. You must have at least one +// DEFINE, but you can have many INITs. Put each in its own scope. +// 4) DECLARE_ATTRIBUTE_SECTION_VARS: must be called before using +// ATTRIBUTE_SECTION_START or ATTRIBUTE_SECTION_STOP on a name. +// Put this call at the global level. +// 5) ATTRIBUTE_SECTION_START/ATTRIBUTE_SECTION_STOP: call this to say +// where in memory a given section is. All functions declared with +// ATTRIBUTE_SECTION are guaranteed to be between START and STOP. + +#if defined(HAVE___ATTRIBUTE__) && defined(__ELF__) +# define ATTRIBUTE_SECTION(name) __attribute__ ((section (#name))) + + // Weak section declaration to be used as a global declaration + // for ATTRIBUTE_SECTION_START|STOP(name) to compile and link + // even without functions with ATTRIBUTE_SECTION(name). +# define DECLARE_ATTRIBUTE_SECTION_VARS(name) \ + extern char __start_##name[] ATTRIBUTE_WEAK; \ + extern char __stop_##name[] ATTRIBUTE_WEAK +# define INIT_ATTRIBUTE_SECTION_VARS(name) // no-op for ELF +# define DEFINE_ATTRIBUTE_SECTION_VARS(name) // no-op for ELF + + // Return void* pointers to start/end of a section of code with functions + // having ATTRIBUTE_SECTION(name), or 0 if no such function exists. + // One must DECLARE_ATTRIBUTE_SECTION(name) for this to compile and link. +# define ATTRIBUTE_SECTION_START(name) (reinterpret_cast<void*>(__start_##name)) +# define ATTRIBUTE_SECTION_STOP(name) (reinterpret_cast<void*>(__stop_##name)) +# define HAVE_ATTRIBUTE_SECTION_START 1 + +#elif defined(HAVE___ATTRIBUTE__) && defined(__MACH__) +# define ATTRIBUTE_SECTION(name) __attribute__ ((section ("__TEXT, " #name))) + +#include <mach-o/getsect.h> +#include <mach-o/dyld.h> +class AssignAttributeStartEnd { + public: + AssignAttributeStartEnd(const char* name, char** pstart, char** pend) { + // Find out what dynamic library name is defined in + if (_dyld_present()) { + for (int i = _dyld_image_count() - 1; i >= 0; --i) { + const mach_header* hdr = _dyld_get_image_header(i); +#ifdef MH_MAGIC_64 + if (hdr->magic == MH_MAGIC_64) { + uint64_t len; + *pstart = getsectdatafromheader_64((mach_header_64*)hdr, + "__TEXT", name, &len); + if (*pstart) { // NULL if not defined in this dynamic library + *pstart += _dyld_get_image_vmaddr_slide(i); // correct for reloc + *pend = *pstart + len; + return; + } + } +#endif + if (hdr->magic == MH_MAGIC) { + uint32_t len; + *pstart = getsectdatafromheader(hdr, "__TEXT", name, &len); + if (*pstart) { // NULL if not defined in this dynamic library + *pstart += _dyld_get_image_vmaddr_slide(i); // correct for reloc + *pend = *pstart + len; + return; + } + } + } + } + // If we get here, not defined in a dll at all. See if defined statically. + unsigned long len; // don't ask me why this type isn't uint32_t too... + *pstart = getsectdata("__TEXT", name, &len); + *pend = *pstart + len; + } +}; + +#define DECLARE_ATTRIBUTE_SECTION_VARS(name) \ + extern char* __start_##name; \ + extern char* __stop_##name + +#define INIT_ATTRIBUTE_SECTION_VARS(name) \ + DECLARE_ATTRIBUTE_SECTION_VARS(name); \ + static const AssignAttributeStartEnd __assign_##name( \ + #name, &__start_##name, &__stop_##name) + +#define DEFINE_ATTRIBUTE_SECTION_VARS(name) \ + char* __start_##name, *__stop_##name; \ + INIT_ATTRIBUTE_SECTION_VARS(name) + +# define ATTRIBUTE_SECTION_START(name) (reinterpret_cast<void*>(__start_##name)) +# define ATTRIBUTE_SECTION_STOP(name) (reinterpret_cast<void*>(__stop_##name)) +# define HAVE_ATTRIBUTE_SECTION_START 1 + +#else // not HAVE___ATTRIBUTE__ && __ELF__, nor HAVE___ATTRIBUTE__ && __MACH__ +# define ATTRIBUTE_SECTION(name) +# define DECLARE_ATTRIBUTE_SECTION_VARS(name) +# define INIT_ATTRIBUTE_SECTION_VARS(name) +# define DEFINE_ATTRIBUTE_SECTION_VARS(name) +# define ATTRIBUTE_SECTION_START(name) (reinterpret_cast<void*>(0)) +# define ATTRIBUTE_SECTION_STOP(name) (reinterpret_cast<void*>(0)) + +#endif // HAVE___ATTRIBUTE__ and __ELF__ or __MACH__ + +// The following enum should be used only as a constructor argument to indicate +// that the variable has static storage class, and that the constructor should +// do nothing to its state. It indicates to the reader that it is legal to +// declare a static nistance of the class, provided the constructor is given +// the base::LINKER_INITIALIZED argument. Normally, it is unsafe to declare a +// static variable that has a constructor or a destructor because invocation +// order is undefined. However, IF the type can be initialized by filling with +// zeroes (which the loader does for static variables), AND the destructor also +// does nothing to the storage, then a constructor declared as +// explicit MyClass(base::LinkerInitialized x) {} +// and invoked as +// static MyClass my_variable_name(base::LINKER_INITIALIZED); +namespace base { +enum LinkerInitialized { LINKER_INITIALIZED }; +} + +#endif // _BASICTYPES_H_ diff --git a/third_party/tcmalloc/chromium/src/base/commandlineflags.h b/third_party/tcmalloc/chromium/src/base/commandlineflags.h new file mode 100644 index 0000000..54bf94fa --- /dev/null +++ b/third_party/tcmalloc/chromium/src/base/commandlineflags.h @@ -0,0 +1,131 @@ +// Copyright (c) 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// This file is a compatibility layer that defines Google's version of +// command line flags that are used for configuration. +// +// We put flags into their own namespace. It is purposefully +// named in an opaque way that people should have trouble typing +// directly. The idea is that DEFINE puts the flag in the weird +// namespace, and DECLARE imports the flag from there into the +// current namespace. The net result is to force people to use +// DECLARE to get access to a flag, rather than saying +// extern bool FLAGS_logtostderr; +// or some such instead. We want this so we can put extra +// functionality (like sanity-checking) in DECLARE if we want, +// and make sure it is picked up everywhere. +// +// We also put the type of the variable in the namespace, so that +// people can't DECLARE_int32 something that they DEFINE_bool'd +// elsewhere. +#ifndef BASE_COMMANDLINEFLAGS_H_ +#define BASE_COMMANDLINEFLAGS_H_ + +#include <config.h> +#include <string> +#include <string.h> // for memchr +#include <stdlib.h> // for getenv +#include "base/basictypes.h" + +#define DECLARE_VARIABLE(type, name) \ + namespace FLAG__namespace_do_not_use_directly_use_DECLARE_##type##_instead { \ + extern PERFTOOLS_DLL_DECL type FLAGS_##name; \ + } \ + using FLAG__namespace_do_not_use_directly_use_DECLARE_##type##_instead::FLAGS_##name + +#define DEFINE_VARIABLE(type, name, value, meaning) \ + namespace FLAG__namespace_do_not_use_directly_use_DECLARE_##type##_instead { \ + PERFTOOLS_DLL_DECL type FLAGS_##name(value); \ + char FLAGS_no##name; \ + } \ + using FLAG__namespace_do_not_use_directly_use_DECLARE_##type##_instead::FLAGS_##name + +// bool specialization +#define DECLARE_bool(name) \ + DECLARE_VARIABLE(bool, name) +#define DEFINE_bool(name, value, meaning) \ + DEFINE_VARIABLE(bool, name, value, meaning) + +// int32 specialization +#define DECLARE_int32(name) \ + DECLARE_VARIABLE(int32, name) +#define DEFINE_int32(name, value, meaning) \ + DEFINE_VARIABLE(int32, name, value, meaning) + +// int64 specialization +#define DECLARE_int64(name) \ + DECLARE_VARIABLE(int64, name) +#define DEFINE_int64(name, value, meaning) \ + DEFINE_VARIABLE(int64, name, value, meaning) + +#define DECLARE_uint64(name) \ + DECLARE_VARIABLE(uint64, name) +#define DEFINE_uint64(name, value, meaning) \ + DEFINE_VARIABLE(uint64, name, value, meaning) + +// double specialization +#define DECLARE_double(name) \ + DECLARE_VARIABLE(double, name) +#define DEFINE_double(name, value, meaning) \ + DEFINE_VARIABLE(double, name, value, meaning) + +// Special case for string, because we have to specify the namespace +// std::string, which doesn't play nicely with our FLAG__namespace hackery. +#define DECLARE_string(name) \ + namespace FLAG__namespace_do_not_use_directly_use_DECLARE_string_instead { \ + extern std::string FLAGS_##name; \ + } \ + using FLAG__namespace_do_not_use_directly_use_DECLARE_string_instead::FLAGS_##name +#define DEFINE_string(name, value, meaning) \ + namespace FLAG__namespace_do_not_use_directly_use_DECLARE_string_instead { \ + std::string FLAGS_##name(value); \ + char FLAGS_no##name; \ + } \ + using FLAG__namespace_do_not_use_directly_use_DECLARE_string_instead::FLAGS_##name + +// These macros (could be functions, but I don't want to bother with a .cc +// file), make it easier to initialize flags from the environment. + +#define EnvToString(envname, dflt) \ + (!getenv(envname) ? (dflt) : getenv(envname)) + +#define EnvToBool(envname, dflt) \ + (!getenv(envname) ? (dflt) : memchr("tTyY1\0", getenv(envname)[0], 6) != NULL) + +#define EnvToInt(envname, dflt) \ + (!getenv(envname) ? (dflt) : strtol(getenv(envname), NULL, 10)) + +#define EnvToInt64(envname, dflt) \ + (!getenv(envname) ? (dflt) : strtoll(getenv(envname), NULL, 10)) + +#define EnvToDouble(envname, dflt) \ + (!getenv(envname) ? (dflt) : strtod(getenv(envname), NULL)) + +#endif // BASE_COMMANDLINEFLAGS_H_ diff --git a/third_party/tcmalloc/chromium/src/base/cycleclock.h b/third_party/tcmalloc/chromium/src/base/cycleclock.h new file mode 100644 index 0000000..8af664ed --- /dev/null +++ b/third_party/tcmalloc/chromium/src/base/cycleclock.h @@ -0,0 +1,110 @@ +// Copyright (c) 2004, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// ---------------------------------------------------------------------- +// CycleClock +// A CycleClock tells you the current time in Cycles. The "time" +// is actually time since power-on. This is like time() but doesn't +// involve a system call and is much more precise. +// +// NOTE: Not all cpu/platform/kernel combinations guarantee that this +// clock increments at a constant rate or is synchronized across all logical +// cpus in a system. +// +// Also, in some out of order CPU implementations, the CycleClock is not +// serializing. So if you're trying to count at cycles granularity, your +// data might be inaccurate due to out of order instruction execution. +// ---------------------------------------------------------------------- + +#ifndef GOOGLE_BASE_CYCLECLOCK_H_ +#define GOOGLE_BASE_CYCLECLOCK_H_ + +#include "base/basictypes.h" // make sure we get the def for int64 +#if defined(__MACH__) && defined(__APPLE__) +#include <mach/mach_time.h> +#endif + +// NOTE: only i386 and x86_64 have been well tested. +// PPC, sparc, alpha, and ia64 are based on +// http://peter.kuscsik.com/wordpress/?p=14 +// with modifications by m3b. cf +// https://setisvn.ssl.berkeley.edu/svn/lib/fftw-3.0.1/kernel/cycle.h +struct CycleClock { + // This should return the number of cycles since power-on. Thread-safe. + static inline int64 Now() { +#if defined(__MACH__) && defined(__APPLE__) + // this goes at the top because we need ALL Macs, regardless + // of architecture, to return the number of "mach time units" + // that have passes since startup. See sysinfo.cc where + // InitializeSystemInfo() sets the supposed cpu clock frequency of macs + // to the number of mach time units per second, not actual + // CPU clock frequency (which can change in the face of CPU + // frequency scaling). also note that when the Mac sleeps, + // this counter pauses; it does not continue counting, nor resets + // to zero. + return mach_absolute_time(); +#elif defined(__i386__) + int64 ret; + __asm__ volatile ("rdtsc" + : "=A" (ret) ); + return ret; +#elif defined(__x86_64__) || defined(__amd64__) + uint64 low, high; + __asm__ volatile ("rdtsc" : "=a" (low), "=d" (high)); + return (high << 32) | low; +#elif defined(__powerpc__) || defined(__ppc__) + // This returns a time-base, which is not always precisely a cycle-count. + int64 tbl, tbu0, tbu1; + asm("mftbu %0" : "=r" (tbu0)); + asm("mftb %0" : "=r" (tbl )); + asm("mftbu %0" : "=r" (tbu1)); + tbl &= -static_cast<int64>(tbu0 == tbu1); + // high 32 bits in tbu1; low 32 bits in tbl (tbu0 is garbage) + return (tbu1 << 32) | tbl; +#elif defined(__sparc__) + int64 tick; + asm(".byte 0x83, 0x41, 0x00, 0x00"); + asm("mov %%g1, %0" : "=r" (tick)); + return tick; +#elif defined(__ia64__) + int64 itc; + asm("mov %0 = ar.itc" : "=r" (itc)); + return itc; +#elif defined(_MSC_VER) && defined(_M_IX86) + _asm rdtsc +#else + // We could define __alpha here as well, but it only has a 32-bit + // timer (good for like 4 seconds), which isn't very useful. +#error You need to define CycleTimer for your O/S and CPU +#endif + } +}; + + +#endif // GOOGLE_BASE_CYCLECLOCK_H_ diff --git a/third_party/tcmalloc/chromium/src/base/dynamic_annotations.cc b/third_party/tcmalloc/chromium/src/base/dynamic_annotations.cc new file mode 100644 index 0000000..c8bbcd7 --- /dev/null +++ b/third_party/tcmalloc/chromium/src/base/dynamic_annotations.cc @@ -0,0 +1,110 @@ +/* Copyright (c) 2008, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * Author: Kostya Serebryany + */ + +#include <config.h> +#include <stdlib.h> +#include <string.h> + +#include "base/dynamic_annotations.h" +#include "base/sysinfo.h" + +// Each function is empty and called (via a macro) only in debug mode. +// The arguments are captured by dynamic tools at runtime. + +extern "C" void AnnotateRWLockCreate(const char *file, int line, + const volatile void *lock){} +extern "C" void AnnotateRWLockDestroy(const char *file, int line, + const volatile void *lock){} +extern "C" void AnnotateRWLockAcquired(const char *file, int line, + const volatile void *lock, long is_w){} +extern "C" void AnnotateRWLockReleased(const char *file, int line, + const volatile void *lock, long is_w){} +extern "C" void AnnotateCondVarWait(const char *file, int line, + const volatile void *cv, + const volatile void *lock){} +extern "C" void AnnotateCondVarSignal(const char *file, int line, + const volatile void *cv){} +extern "C" void AnnotateCondVarSignalAll(const char *file, int line, + const volatile void *cv){} +extern "C" void AnnotatePublishMemoryRange(const char *file, int line, + const volatile void *address, + long size){} +extern "C" void AnnotateUnpublishMemoryRange(const char *file, int line, + const volatile void *address, + long size){} +extern "C" void AnnotatePCQCreate(const char *file, int line, + const volatile void *pcq){} +extern "C" void AnnotatePCQDestroy(const char *file, int line, + const volatile void *pcq){} +extern "C" void AnnotatePCQPut(const char *file, int line, + const volatile void *pcq){} +extern "C" void AnnotatePCQGet(const char *file, int line, + const volatile void *pcq){} +extern "C" void AnnotateNewMemory(const char *file, int line, + const volatile void *mem, + long size){} +extern "C" void AnnotateExpectRace(const char *file, int line, + const volatile void *mem, + const char *description){} +extern "C" void AnnotateBenignRace(const char *file, int line, + const volatile void *mem, + const char *description){} +extern "C" void AnnotateMutexIsUsedAsCondVar(const char *file, int line, + const volatile void *mu){} +extern "C" void AnnotateTraceMemory(const char *file, int line, + const volatile void *arg){} +extern "C" void AnnotateThreadName(const char *file, int line, + const char *name){} +extern "C" void AnnotateIgnoreReadsBegin(const char *file, int line){} +extern "C" void AnnotateIgnoreReadsEnd(const char *file, int line){} +extern "C" void AnnotateIgnoreWritesBegin(const char *file, int line){} +extern "C" void AnnotateIgnoreWritesEnd(const char *file, int line){} +extern "C" void AnnotateNoOp(const char *file, int line, + const volatile void *arg){} + +static int GetRunningOnValgrind() { + const char *running_on_valgrind_str = GetenvBeforeMain("RUNNING_ON_VALGRIND"); + if (running_on_valgrind_str) { + return strcmp(running_on_valgrind_str, "0") != 0; + } + return 0; +} + +// When running under valgrind, this function will be intercepted +// and a non-zero value will be returned. +// Some valgrind-based tools (e.g. callgrind) do not intercept functions, +// so we also read environment variable. +extern "C" int RunningOnValgrind() { + static int running_on_valgrind = GetRunningOnValgrind(); + return running_on_valgrind; +} diff --git a/third_party/tcmalloc/chromium/src/base/dynamic_annotations.h b/third_party/tcmalloc/chromium/src/base/dynamic_annotations.h new file mode 100644 index 0000000..a2a268f --- /dev/null +++ b/third_party/tcmalloc/chromium/src/base/dynamic_annotations.h @@ -0,0 +1,475 @@ +/* Copyright (c) 2008, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * Author: Kostya Serebryany + */ + +// This file defines dynamic annotations for use with dynamic analysis +// tool such as valgrind, PIN, etc. +// +// Dynamic annotation is a source code annotation that affects +// the generated code (that is, the annotation is not a comment). +// Each such annotation is attached to a particular +// instruction and/or to a particular object (address) in the program. +// +// The annotations that should be used by users are macros in all upper-case +// (e.g., ANNOTATE_NEW_MEMORY). +// +// Actual implementation of these macros may differ depending on the +// dynamic analysis tool being used. +// +// This file supports the following dynamic analysis tools: +// - None (NDEBUG is defined). +// Macros are defined empty. +// - Helgrind (NDEBUG is not defined). +// Macros are defined as calls to non-inlinable empty functions +// that are intercepted by helgrind. +// +#ifndef BASE_DYNAMIC_ANNOTATIONS_H_ +#define BASE_DYNAMIC_ANNOTATIONS_H_ + +#include "base/thread_annotations.h" + +// All the annotation macros are in effect only in debug mode. +#ifndef NDEBUG + + // ------------------------------------------------------------- + // Annotations useful when implementing condition variables such as CondVar, + // using conditional critical sections (Await/LockWhen) and when constructing + // user-defined synchronization mechanisms. + // + // The annotations ANNOTATE_HAPPENS_BEFORE() and ANNOTATE_HAPPENS_AFTER() can + // be used to define happens-before arcs in user-defined synchronization + // mechanisms: the race detector will infer an arc from the former to the + // latter when they share the same argument pointer. + // + // Example 1 (reference counting): + // + // void Unref() { + // ANNOTATE_HAPPENS_BEFORE(&refcount_); + // if (AtomicDecrementByOne(&refcount_) == 0) { + // ANNOTATE_HAPPENS_AFTER(&refcount_); + // delete this; + // } + // } + // + // Example 2 (message queue): + // + // void MyQueue::Put(Type *e) { + // MutexLock lock(&mu_); + // ANNOTATE_HAPPENS_BEFORE(e); + // PutElementIntoMyQueue(e); + // } + // + // Type *MyQueue::Get() { + // MutexLock lock(&mu_); + // Type *e = GetElementFromMyQueue(); + // ANNOTATE_HAPPENS_AFTER(e); + // return e; + // } + // + // Note: when possible, please use the existing reference counting and message + // queue implementations instead of inventing new ones. + + // Report that wait on the condition variable at address "cv" has succeeded + // and the lock at address "lock" is held. + #define ANNOTATE_CONDVAR_LOCK_WAIT(cv, lock) \ + AnnotateCondVarWait(__FILE__, __LINE__, cv, lock) + + // Report that wait on the condition variable at "cv" has succeeded. Variant + // w/o lock. + #define ANNOTATE_CONDVAR_WAIT(cv) \ + AnnotateCondVarWait(__FILE__, __LINE__, cv, NULL) + + // Report that we are about to signal on the condition variable at address + // "cv". + #define ANNOTATE_CONDVAR_SIGNAL(cv) \ + AnnotateCondVarSignal(__FILE__, __LINE__, cv) + + // Report that we are about to signal_all on the condition variable at "cv". + #define ANNOTATE_CONDVAR_SIGNAL_ALL(cv) \ + AnnotateCondVarSignalAll(__FILE__, __LINE__, cv) + + // Annotations for user-defined synchronization mechanisms. + #define ANNOTATE_HAPPENS_BEFORE(obj) ANNOTATE_CONDVAR_SIGNAL(obj) + #define ANNOTATE_HAPPENS_AFTER(obj) ANNOTATE_CONDVAR_WAIT(obj) + + // Report that the bytes in the range [pointer, pointer+size) are about + // to be published safely. The race checker will create a happens-before + // arc from the call ANNOTATE_PUBLISH_MEMORY_RANGE(pointer, size) to + // subsequent accesses to this memory. + #define ANNOTATE_PUBLISH_MEMORY_RANGE(pointer, size) \ + AnnotatePublishMemoryRange(__FILE__, __LINE__, pointer, size) + + // Report that the bytes in the range [pointer, pointer+size) are not shared + // between threads any more and can be safely used by the current thread w/o + // synchronization. The race checker will create a happens-before arc from + // all previous accesses to this memory to this call. + // + // This annotation could be applied to complex objects, such as STL + // containers, with one condition: the accesses to the object itself + // and its internal data should not be separated with any synchronization. + // + // Example that works: + // + // map<int, int> the_map; + // void Thread1() { + // MutexLock lock(&mu); + // // Ok: accesses to the_map and its internal data is not separated by + // // synchronization. + // the_map[1]++; + // } + // void Thread2() { + // { + // MutexLock lock(&mu); + // ... + // // because of some reason we know that the_map will not be used by + // // other threads any more + // ANNOTATE_UNPUBLISH_MEMORY_RANGE(&the_map, sizeof(the_map)); + // } + // the_map->DoSomething(); + // } + // + // Example that does not work (due to the way happens-before arcs are + // represented in some race detectors): + // + // void Thread1() { + // MutexLock lock(&mu); + // int *guts_of_the_map = &(*the_map)[1]; + // // we have some synchronization between access to 'c' and its guts. + // // This will make ANNOTATE_UNPUBLISH_MEMORY_RANGE in Thread2 useless. + // some_other_lock_or_other_synchronization_utility.Lock(); + // (*guts_of_the_map)++; + // ... + // } + // + // void Thread1() { // same as above... + #define ANNOTATE_UNPUBLISH_MEMORY_RANGE(pointer, size) \ + AnnotateUnpublishMemoryRange(__FILE__, __LINE__, pointer, size) + + // This annotation should be used to annotate thread-safe swapping of + // containers. Required only when using hybrid (i.e. not pure happens-before) + // detectors. + // + // This annotation has the same limitation as ANNOTATE_UNPUBLISH_MEMORY_RANGE + // (see above). + // + // Example: + // map<int, int> the_map; + // void Thread1() { + // MutexLock lock(&mu); + // the_map[1]++; + // } + // void Thread2() { + // map<int,int> tmp; + // { + // MutexLock lock(&mu); + // the_map.swap(tmp); + // ANNOTATE_SWAP_MEMORY_RANGE(&the_map, sizeof(the_map)); + // } + // tmp->DoSomething(); + // } + #define ANNOTATE_SWAP_MEMORY_RANGE(pointer, size) \ + do { \ + ANNOTATE_UNPUBLISH_MEMORY_RANGE(pointer, size); \ + ANNOTATE_PUBLISH_MEMORY_RANGE(pointer, size); \ + } while (0) + + // Instruct the tool to create a happens-before arc between mu->Unlock() and + // mu->Lock(). This annotation may slow down the race detector and hide real + // races. Normally it is used only when it would be difficult to annotate each + // of the mutex's critical sections individually using the annotations above. + // This annotation makes sense only for hybrid race detectors. For pure + // happens-before detectors this is a no-op. For more details see + // http://code.google.com/p/data-race-test/wiki/PureHappensBeforeVsHybrid . + #define ANNOTATE_PURE_HAPPENS_BEFORE_MUTEX(mu) \ + AnnotateMutexIsUsedAsCondVar(__FILE__, __LINE__, mu) + + // Deprecated. Use ANNOTATE_PURE_HAPPENS_BEFORE_MUTEX. + #define ANNOTATE_MUTEX_IS_USED_AS_CONDVAR(mu) \ + AnnotateMutexIsUsedAsCondVar(__FILE__, __LINE__, mu) + + // ------------------------------------------------------------- + // Annotations useful when defining memory allocators, or when memory that + // was protected in one way starts to be protected in another. + + // Report that a new memory at "address" of size "size" has been allocated. + // This might be used when the memory has been retrieved from a free list and + // is about to be reused, or when a the locking discipline for a variable + // changes. + #define ANNOTATE_NEW_MEMORY(address, size) \ + AnnotateNewMemory(__FILE__, __LINE__, address, size) + + // ------------------------------------------------------------- + // Annotations useful when defining FIFO queues that transfer data between + // threads. + + // Report that the producer-consumer queue (such as ProducerConsumerQueue) at + // address "pcq" has been created. The ANNOTATE_PCQ_* annotations + // should be used only for FIFO queues. For non-FIFO queues use + // ANNOTATE_HAPPENS_BEFORE (for put) and ANNOTATE_HAPPENS_AFTER (for get). + #define ANNOTATE_PCQ_CREATE(pcq) \ + AnnotatePCQCreate(__FILE__, __LINE__, pcq) + + // Report that the queue at address "pcq" is about to be destroyed. + #define ANNOTATE_PCQ_DESTROY(pcq) \ + AnnotatePCQDestroy(__FILE__, __LINE__, pcq) + + // Report that we are about to put an element into a FIFO queue at address + // "pcq". + #define ANNOTATE_PCQ_PUT(pcq) \ + AnnotatePCQPut(__FILE__, __LINE__, pcq) + + // Report that we've just got an element from a FIFO queue at address "pcq". + #define ANNOTATE_PCQ_GET(pcq) \ + AnnotatePCQGet(__FILE__, __LINE__, pcq) + + // ------------------------------------------------------------- + // Annotations that suppress errors. It is usually better to express the + // program's synchronization using the other annotations, but these can + // be used when all else fails. + + // Report that we may have a benign race on at "address". + // Insert at the point where "address" has been allocated, preferably close + // to the point where the race happens. + // See also ANNOTATE_BENIGN_RACE_STATIC. + #define ANNOTATE_BENIGN_RACE(address, description) \ + AnnotateBenignRace(__FILE__, __LINE__, address, description) + + // Request the analysis tool to ignore all reads in the current thread + // until ANNOTATE_IGNORE_READS_END is called. + // Useful to ignore intentional racey reads, while still checking + // other reads and all writes. + // See also ANNOTATE_UNPROTECTED_READ. + #define ANNOTATE_IGNORE_READS_BEGIN() \ + AnnotateIgnoreReadsBegin(__FILE__, __LINE__) + + // Stop ignoring reads. + #define ANNOTATE_IGNORE_READS_END() \ + AnnotateIgnoreReadsEnd(__FILE__, __LINE__) + + // Similar to ANNOTATE_IGNORE_READS_BEGIN, but ignore writes. + #define ANNOTATE_IGNORE_WRITES_BEGIN() \ + AnnotateIgnoreWritesBegin(__FILE__, __LINE__) + + // Stop ignoring writes. + #define ANNOTATE_IGNORE_WRITES_END() \ + AnnotateIgnoreWritesEnd(__FILE__, __LINE__) + + // Start ignoring all memory accesses (reads and writes). + #define ANNOTATE_IGNORE_READS_AND_WRITES_BEGIN() \ + do {\ + ANNOTATE_IGNORE_READS_BEGIN();\ + ANNOTATE_IGNORE_WRITES_BEGIN();\ + }while(0)\ + + // Stop ignoring all memory accesses. + #define ANNOTATE_IGNORE_READS_AND_WRITES_END() \ + do {\ + ANNOTATE_IGNORE_WRITES_END();\ + ANNOTATE_IGNORE_READS_END();\ + }while(0)\ + + // ------------------------------------------------------------- + // Annotations useful for debugging. + + // Request to trace every access to "address". + #define ANNOTATE_TRACE_MEMORY(address) \ + AnnotateTraceMemory(__FILE__, __LINE__, address) + + // Report the current thread name to a race detector. + #define ANNOTATE_THREAD_NAME(name) \ + AnnotateThreadName(__FILE__, __LINE__, name) + + // ------------------------------------------------------------- + // Annotations useful when implementing locks. They are not + // normally needed by modules that merely use locks. + // The "lock" argument is a pointer to the lock object. + + // Report that a lock has been created at address "lock". + #define ANNOTATE_RWLOCK_CREATE(lock) \ + AnnotateRWLockCreate(__FILE__, __LINE__, lock) + + // Report that the lock at address "lock" is about to be destroyed. + #define ANNOTATE_RWLOCK_DESTROY(lock) \ + AnnotateRWLockDestroy(__FILE__, __LINE__, lock) + + // Report that the lock at address "lock" has been acquired. + // is_w=1 for writer lock, is_w=0 for reader lock. + #define ANNOTATE_RWLOCK_ACQUIRED(lock, is_w) \ + AnnotateRWLockAcquired(__FILE__, __LINE__, lock, is_w) + + // Report that the lock at address "lock" is about to be released. + #define ANNOTATE_RWLOCK_RELEASED(lock, is_w) \ + AnnotateRWLockReleased(__FILE__, __LINE__, lock, is_w) + + // ------------------------------------------------------------- + // Annotations useful for testing race detectors. + + // Report that we expect a race on the variable at "address". + // Use only in unit tests for a race detector. + #define ANNOTATE_EXPECT_RACE(address, description) \ + AnnotateExpectRace(__FILE__, __LINE__, address, description) + + // A no-op. Insert where you like to test the interceptors. + #define ANNOTATE_NO_OP(arg) \ + AnnotateNoOp(__FILE__, __LINE__, arg) + +#else // NDEBUG is defined + + #define ANNOTATE_RWLOCK_CREATE(lock) // empty + #define ANNOTATE_RWLOCK_DESTROY(lock) // empty + #define ANNOTATE_RWLOCK_ACQUIRED(lock, is_w) // empty + #define ANNOTATE_RWLOCK_RELEASED(lock, is_w) // empty + #define ANNOTATE_CONDVAR_LOCK_WAIT(cv, lock) // empty + #define ANNOTATE_CONDVAR_WAIT(cv) // empty + #define ANNOTATE_CONDVAR_SIGNAL(cv) // empty + #define ANNOTATE_CONDVAR_SIGNAL_ALL(cv) // empty + #define ANNOTATE_HAPPENS_BEFORE(obj) // empty + #define ANNOTATE_HAPPENS_AFTER(obj) // empty + #define ANNOTATE_PUBLISH_MEMORY_RANGE(address, size) // empty + #define ANNOTATE_UNPUBLISH_MEMORY_RANGE(address, size) // empty + #define ANNOTATE_SWAP_MEMORY_RANGE(address, size) // empty + #define ANNOTATE_PCQ_CREATE(pcq) // empty + #define ANNOTATE_PCQ_DESTROY(pcq) // empty + #define ANNOTATE_PCQ_PUT(pcq) // empty + #define ANNOTATE_PCQ_GET(pcq) // empty + #define ANNOTATE_NEW_MEMORY(address, size) // empty + #define ANNOTATE_EXPECT_RACE(address, description) // empty + #define ANNOTATE_BENIGN_RACE(address, description) // empty + #define ANNOTATE_PURE_HAPPENS_BEFORE_MUTEX(mu) // empty + #define ANNOTATE_MUTEX_IS_USED_AS_CONDVAR(mu) // empty + #define ANNOTATE_TRACE_MEMORY(arg) // empty + #define ANNOTATE_THREAD_NAME(name) // empty + #define ANNOTATE_IGNORE_READS_BEGIN() // empty + #define ANNOTATE_IGNORE_READS_END() // empty + #define ANNOTATE_IGNORE_WRITES_BEGIN() // empty + #define ANNOTATE_IGNORE_WRITES_END() // empty + #define ANNOTATE_IGNORE_READS_AND_WRITES_BEGIN() // empty + #define ANNOTATE_IGNORE_READS_AND_WRITES_END() // empty + #define ANNOTATE_NO_OP(arg) // empty + +#endif // NDEBUG + +// Use the macros above rather than using these functions directly. +extern "C" void AnnotateRWLockCreate(const char *file, int line, + const volatile void *lock); +extern "C" void AnnotateRWLockDestroy(const char *file, int line, + const volatile void *lock); +extern "C" void AnnotateRWLockAcquired(const char *file, int line, + const volatile void *lock, long is_w); +extern "C" void AnnotateRWLockReleased(const char *file, int line, + const volatile void *lock, long is_w); +extern "C" void AnnotateCondVarWait(const char *file, int line, + const volatile void *cv, + const volatile void *lock); +extern "C" void AnnotateCondVarSignal(const char *file, int line, + const volatile void *cv); +extern "C" void AnnotateCondVarSignalAll(const char *file, int line, + const volatile void *cv); +extern "C" void AnnotatePublishMemoryRange(const char *file, int line, + const volatile void *address, + long size); +extern "C" void AnnotateUnpublishMemoryRange(const char *file, int line, + const volatile void *address, + long size); +extern "C" void AnnotatePCQCreate(const char *file, int line, + const volatile void *pcq); +extern "C" void AnnotatePCQDestroy(const char *file, int line, + const volatile void *pcq); +extern "C" void AnnotatePCQPut(const char *file, int line, + const volatile void *pcq); +extern "C" void AnnotatePCQGet(const char *file, int line, + const volatile void *pcq); +extern "C" void AnnotateNewMemory(const char *file, int line, + const volatile void *address, + long size); +extern "C" void AnnotateExpectRace(const char *file, int line, + const volatile void *address, + const char *description); +extern "C" void AnnotateBenignRace(const char *file, int line, + const volatile void *address, + const char *description); +extern "C" void AnnotateMutexIsUsedAsCondVar(const char *file, int line, + const volatile void *mu); +extern "C" void AnnotateTraceMemory(const char *file, int line, + const volatile void *arg); +extern "C" void AnnotateThreadName(const char *file, int line, + const char *name); +extern "C" void AnnotateIgnoreReadsBegin(const char *file, int line); +extern "C" void AnnotateIgnoreReadsEnd(const char *file, int line); +extern "C" void AnnotateIgnoreWritesBegin(const char *file, int line); +extern "C" void AnnotateIgnoreWritesEnd(const char *file, int line); +extern "C" void AnnotateNoOp(const char *file, int line, + const volatile void *arg); + +#ifndef NDEBUG + + // ANNOTATE_UNPROTECTED_READ is the preferred way to annotate racey reads. + // + // Instead of doing + // ANNOTATE_IGNORE_READS_BEGIN(); + // ... = x; + // ANNOTATE_IGNORE_READS_END(); + // one can use + // ... = ANNOTATE_UNPROTECTED_READ(x); + template <class T> + inline T ANNOTATE_UNPROTECTED_READ(const volatile T &x) + NO_THREAD_SAFETY_ANALYSIS { + ANNOTATE_IGNORE_READS_BEGIN(); + T res = x; + ANNOTATE_IGNORE_READS_END(); + return res; + } + + // Apply ANNOTATE_BENIGN_RACE to a static variable. + #define ANNOTATE_BENIGN_RACE_STATIC(static_var, description) \ + namespace { \ + class static_var ## _annotator { \ + public: \ + static_var ## _annotator() { \ + ANNOTATE_BENIGN_RACE(&static_var, \ + # static_var ": " description); \ + } \ + }; \ + static static_var ## _annotator the ## static_var ## _annotator;\ + } +#else // !NDEBUG + + #define ANNOTATE_UNPROTECTED_READ(x) (x) + #define ANNOTATE_BENIGN_RACE_STATIC(static_var, description) // empty + +#endif // !NDEBUG + +// Return non-zero value if running under valgrind. +extern "C" int RunningOnValgrind(); + + +#endif // BASE_DYNAMIC_ANNOTATIONS_H_ diff --git a/third_party/tcmalloc/chromium/src/base/elfcore.h b/third_party/tcmalloc/chromium/src/base/elfcore.h new file mode 100644 index 0000000..34a96de --- /dev/null +++ b/third_party/tcmalloc/chromium/src/base/elfcore.h @@ -0,0 +1,385 @@ +/* Copyright (c) 2005-2008, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * Author: Markus Gutschke, Carl Crous + */ + +#ifndef _ELFCORE_H +#define _ELFCORE_H +#ifdef __cplusplus +extern "C" { +#endif + +/* We currently only support x86-32, x86-64, ARM, and MIPS on Linux. + * Porting to other related platforms should not be difficult. + */ +#if (defined(__i386__) || defined(__x86_64__) || defined(__ARM_ARCH_3__) || \ + defined(__mips__)) && defined(__linux) + +#include <stdarg.h> +#include <stdint.h> +#include <sys/types.h> +#include <config.h> + + +/* Define the DUMPER symbol to make sure that there is exactly one + * core dumper built into the library. + */ +#define DUMPER "ELF" + +/* By the time that we get a chance to read CPU registers in the + * calling thread, they are already in a not particularly useful + * state. Besides, there will be multiple frames on the stack that are + * just making the core file confusing. To fix this problem, we take a + * snapshot of the frame pointer, stack pointer, and instruction + * pointer at an earlier time, and then insert these values into the + * core file. + */ + +#if defined(__i386__) || defined(__x86_64__) + typedef struct i386_regs { /* Normal (non-FPU) CPU registers */ + #ifdef __x86_64__ + #define BP rbp + #define SP rsp + #define IP rip + uint64_t r15,r14,r13,r12,rbp,rbx,r11,r10; + uint64_t r9,r8,rax,rcx,rdx,rsi,rdi,orig_rax; + uint64_t rip,cs,eflags; + uint64_t rsp,ss; + uint64_t fs_base, gs_base; + uint64_t ds,es,fs,gs; + #else + #define BP ebp + #define SP esp + #define IP eip + uint32_t ebx, ecx, edx, esi, edi, ebp, eax; + uint16_t ds, __ds, es, __es; + uint16_t fs, __fs, gs, __gs; + uint32_t orig_eax, eip; + uint16_t cs, __cs; + uint32_t eflags, esp; + uint16_t ss, __ss; + #endif + } i386_regs; +#elif defined(__ARM_ARCH_3__) + typedef struct arm_regs { /* General purpose registers */ + #define BP uregs[11] /* Frame pointer */ + #define SP uregs[13] /* Stack pointer */ + #define IP uregs[15] /* Program counter */ + #define LR uregs[14] /* Link register */ + long uregs[18]; + } arm_regs; +#elif defined(__mips__) + typedef struct mips_regs { + unsigned long pad[6]; /* Unused padding to match kernel structures */ + unsigned long uregs[32]; /* General purpose registers. */ + unsigned long hi; /* Used for multiplication and division. */ + unsigned long lo; + unsigned long cp0_epc; /* Program counter. */ + unsigned long cp0_badvaddr; + unsigned long cp0_status; + unsigned long cp0_cause; + unsigned long unused; + } mips_regs; +#endif + +#if defined(__i386__) && defined(__GNUC__) + /* On x86 we provide an optimized version of the FRAME() macro, if the + * compiler supports a GCC-style asm() directive. This results in somewhat + * more accurate values for CPU registers. + */ + typedef struct Frame { + struct i386_regs uregs; + int errno_; + pid_t tid; + } Frame; + #define FRAME(f) Frame f; \ + do { \ + f.errno_ = errno; \ + f.tid = sys_gettid(); \ + __asm__ volatile ( \ + "push %%ebp\n" \ + "push %%ebx\n" \ + "mov %%ebx,0(%%eax)\n" \ + "mov %%ecx,4(%%eax)\n" \ + "mov %%edx,8(%%eax)\n" \ + "mov %%esi,12(%%eax)\n" \ + "mov %%edi,16(%%eax)\n" \ + "mov %%ebp,20(%%eax)\n" \ + "mov %%eax,24(%%eax)\n" \ + "mov %%ds,%%ebx\n" \ + "mov %%ebx,28(%%eax)\n" \ + "mov %%es,%%ebx\n" \ + "mov %%ebx,32(%%eax)\n" \ + "mov %%fs,%%ebx\n" \ + "mov %%ebx,36(%%eax)\n" \ + "mov %%gs,%%ebx\n" \ + "mov %%ebx, 40(%%eax)\n" \ + "call 0f\n" \ + "0:pop %%ebx\n" \ + "add $1f-0b,%%ebx\n" \ + "mov %%ebx,48(%%eax)\n" \ + "mov %%cs,%%ebx\n" \ + "mov %%ebx,52(%%eax)\n" \ + "pushf\n" \ + "pop %%ebx\n" \ + "mov %%ebx,56(%%eax)\n" \ + "mov %%esp,%%ebx\n" \ + "add $8,%%ebx\n" \ + "mov %%ebx,60(%%eax)\n" \ + "mov %%ss,%%ebx\n" \ + "mov %%ebx,64(%%eax)\n" \ + "pop %%ebx\n" \ + "pop %%ebp\n" \ + "1:" \ + : : "a" (&f) : "memory"); \ + } while (0) + #define SET_FRAME(f,r) \ + do { \ + errno = (f).errno_; \ + (r) = (f).uregs; \ + } while (0) +#elif defined(__x86_64__) && defined(__GNUC__) + /* The FRAME and SET_FRAME macros for x86_64. */ + typedef struct Frame { + struct i386_regs uregs; + int errno_; + pid_t tid; + } Frame; + #define FRAME(f) Frame f; \ + do { \ + f.errno_ = errno; \ + f.tid = sys_gettid(); \ + __asm__ volatile ( \ + "push %%rbp\n" \ + "push %%rbx\n" \ + "mov %%r15,0(%%rax)\n" \ + "mov %%r14,8(%%rax)\n" \ + "mov %%r13,16(%%rax)\n" \ + "mov %%r12,24(%%rax)\n" \ + "mov %%rbp,32(%%rax)\n" \ + "mov %%rbx,40(%%rax)\n" \ + "mov %%r11,48(%%rax)\n" \ + "mov %%r10,56(%%rax)\n" \ + "mov %%r9,64(%%rax)\n" \ + "mov %%r8,72(%%rax)\n" \ + "mov %%rax,80(%%rax)\n" \ + "mov %%rcx,88(%%rax)\n" \ + "mov %%rdx,96(%%rax)\n" \ + "mov %%rsi,104(%%rax)\n" \ + "mov %%rdi,112(%%rax)\n" \ + "mov %%ds,%%rbx\n" \ + "mov %%rbx,184(%%rax)\n" \ + "mov %%es,%%rbx\n" \ + "mov %%rbx,192(%%rax)\n" \ + "mov %%fs,%%rbx\n" \ + "mov %%rbx,200(%%rax)\n" \ + "mov %%gs,%%rbx\n" \ + "mov %%rbx,208(%%rax)\n" \ + "call 0f\n" \ + "0:pop %%rbx\n" \ + "add $1f-0b,%%rbx\n" \ + "mov %%rbx,128(%%rax)\n" \ + "mov %%cs,%%rbx\n" \ + "mov %%rbx,136(%%rax)\n" \ + "pushf\n" \ + "pop %%rbx\n" \ + "mov %%rbx,144(%%rax)\n" \ + "mov %%rsp,%%rbx\n" \ + "add $16,%%ebx\n" \ + "mov %%rbx,152(%%rax)\n" \ + "mov %%ss,%%rbx\n" \ + "mov %%rbx,160(%%rax)\n" \ + "pop %%rbx\n" \ + "pop %%rbp\n" \ + "1:" \ + : : "a" (&f) : "memory"); \ + } while (0) + #define SET_FRAME(f,r) \ + do { \ + errno = (f).errno_; \ + (f).uregs.fs_base = (r).fs_base; \ + (f).uregs.gs_base = (r).gs_base; \ + (r) = (f).uregs; \ + } while (0) +#elif defined(__ARM_ARCH_3__) && defined(__GNUC__) + /* ARM calling conventions are a little more tricky. A little assembly + * helps in obtaining an accurate snapshot of all registers. + */ + typedef struct Frame { + struct arm_regs arm; + int errno_; + pid_t tid; + } Frame; + #define FRAME(f) Frame f; \ + do { \ + long cpsr; \ + f.errno_ = errno; \ + f.tid = sys_gettid(); \ + __asm__ volatile( \ + "stmia %0, {r0-r15}\n" /* All integer regs */\ + : : "r"(&f.arm) : "memory"); \ + f.arm.uregs[16] = 0; \ + __asm__ volatile( \ + "mrs %0, cpsr\n" /* Condition code reg */\ + : "=r"(cpsr)); \ + f.arm.uregs[17] = cpsr; \ + } while (0) + #define SET_FRAME(f,r) \ + do { \ + /* Don't override the FPU status register. */\ + /* Use the value obtained from ptrace(). This*/\ + /* works, because our code does not perform */\ + /* any FPU operations, itself. */\ + long fps = (f).arm.uregs[16]; \ + errno = (f).errno_; \ + (r) = (f).arm; \ + (r).uregs[16] = fps; \ + } while (0) +#elif defined(__mips__) && defined(__GNUC__) + typedef struct Frame { + struct mips_regs mips_regs; + int errno_; + pid_t tid; + } Frame; + #define MIPSREG(n) ({ register unsigned long r __asm__("$"#n); r; }) + #define FRAME(f) Frame f = { 0 }; \ + do { \ + unsigned long hi, lo; \ + register unsigned long pc __asm__("$31"); \ + f.mips_regs.uregs[ 0] = MIPSREG( 0); \ + f.mips_regs.uregs[ 1] = MIPSREG( 1); \ + f.mips_regs.uregs[ 2] = MIPSREG( 2); \ + f.mips_regs.uregs[ 3] = MIPSREG( 3); \ + f.mips_regs.uregs[ 4] = MIPSREG( 4); \ + f.mips_regs.uregs[ 5] = MIPSREG( 5); \ + f.mips_regs.uregs[ 6] = MIPSREG( 6); \ + f.mips_regs.uregs[ 7] = MIPSREG( 7); \ + f.mips_regs.uregs[ 8] = MIPSREG( 8); \ + f.mips_regs.uregs[ 9] = MIPSREG( 9); \ + f.mips_regs.uregs[10] = MIPSREG(10); \ + f.mips_regs.uregs[11] = MIPSREG(11); \ + f.mips_regs.uregs[12] = MIPSREG(12); \ + f.mips_regs.uregs[13] = MIPSREG(13); \ + f.mips_regs.uregs[14] = MIPSREG(14); \ + f.mips_regs.uregs[15] = MIPSREG(15); \ + f.mips_regs.uregs[16] = MIPSREG(16); \ + f.mips_regs.uregs[17] = MIPSREG(17); \ + f.mips_regs.uregs[18] = MIPSREG(18); \ + f.mips_regs.uregs[19] = MIPSREG(19); \ + f.mips_regs.uregs[20] = MIPSREG(20); \ + f.mips_regs.uregs[21] = MIPSREG(21); \ + f.mips_regs.uregs[22] = MIPSREG(22); \ + f.mips_regs.uregs[23] = MIPSREG(23); \ + f.mips_regs.uregs[24] = MIPSREG(24); \ + f.mips_regs.uregs[25] = MIPSREG(25); \ + f.mips_regs.uregs[26] = MIPSREG(26); \ + f.mips_regs.uregs[27] = MIPSREG(27); \ + f.mips_regs.uregs[28] = MIPSREG(28); \ + f.mips_regs.uregs[29] = MIPSREG(29); \ + f.mips_regs.uregs[30] = MIPSREG(30); \ + f.mips_regs.uregs[31] = MIPSREG(31); \ + __asm__ volatile ("mfhi %0" : "=r"(hi)); \ + __asm__ volatile ("mflo %0" : "=r"(lo)); \ + __asm__ volatile ("jal 1f; 1:nop" : "=r"(pc)); \ + f.mips_regs.hi = hi; \ + f.mips_regs.lo = lo; \ + f.mips_regs.cp0_epc = pc; \ + f.errno_ = errno; \ + f.tid = sys_gettid(); \ + } while (0) + #define SET_FRAME(f,r) \ + do { \ + errno = (f).errno_; \ + memcpy((r).uregs, (f).mips_regs.uregs, \ + 32*sizeof(unsigned long)); \ + (r).hi = (f).mips_regs.hi; \ + (r).lo = (f).mips_regs.lo; \ + (r).cp0_epc = (f).mips_regs.cp0_epc; \ + } while (0) +#else + /* If we do not have a hand-optimized assembly version of the FRAME() + * macro, we cannot reliably unroll the stack. So, we show a few additional + * stack frames for the coredumper. + */ + typedef struct Frame { + pid_t tid; + } Frame; + #define FRAME(f) Frame f; do { f.tid = sys_gettid(); } while (0) + #define SET_FRAME(f,r) do { } while (0) +#endif + + +/* Internal function for generating a core file. This API can change without + * notice and is only supposed to be used internally by the core dumper. + * + * This function works for both single- and multi-threaded core + * dumps. If called as + * + * FRAME(frame); + * InternalGetCoreDump(&frame, 0, NULL, ap); + * + * it creates a core file that only contains information about the + * calling thread. + * + * Optionally, the caller can provide information about other threads + * by passing their process ids in "thread_pids". The process id of + * the caller should not be included in this array. All of the threads + * must have been attached to with ptrace(), prior to calling this + * function. They will be detached when "InternalGetCoreDump()" returns. + * + * This function either returns a file handle that can be read for obtaining + * a core dump, or "-1" in case of an error. In the latter case, "errno" + * will be set appropriately. + * + * While "InternalGetCoreDump()" is not technically async signal safe, you + * might be tempted to invoke it from a signal handler. The code goes to + * great lengths to make a best effort that this will actually work. But in + * any case, you must make sure that you preserve the value of "errno" + * yourself. It is guaranteed to be clobbered otherwise. + * + * Also, "InternalGetCoreDump" is not strictly speaking re-entrant. Again, + * it makes a best effort to behave reasonably when called in a multi- + * threaded environment, but it is ultimately the caller's responsibility + * to provide locking. + */ +int InternalGetCoreDump(void *frame, int num_threads, pid_t *thread_pids, + va_list ap + /* const struct CoreDumpParameters *params, + const char *file_name, + const char *PATH + */); + +#endif + +#ifdef __cplusplus +} +#endif +#endif /* _ELFCORE_H */ diff --git a/third_party/tcmalloc/chromium/src/base/googleinit.h b/third_party/tcmalloc/chromium/src/base/googleinit.h new file mode 100644 index 0000000..62ad84c --- /dev/null +++ b/third_party/tcmalloc/chromium/src/base/googleinit.h @@ -0,0 +1,51 @@ +// Copyright (c) 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Jacob Hoffman-Andrews + +#ifndef _GOOGLEINIT_H +#define _GOOGLEINIT_H + +class GoogleInitializer { + public: + typedef void (*void_function)(void); + GoogleInitializer(const char* name, void_function f) { + f(); + } +}; + +#define REGISTER_MODULE_INITIALIZER(name, body) \ + namespace { \ + static void google_init_module_##name () { body; } \ + GoogleInitializer google_initializer_module_##name(#name, \ + google_init_module_##name); \ + } + +#endif /* _GOOGLEINIT_H */ diff --git a/third_party/tcmalloc/chromium/src/base/linux_syscall_support.h b/third_party/tcmalloc/chromium/src/base/linux_syscall_support.h new file mode 100644 index 0000000..512805b --- /dev/null +++ b/third_party/tcmalloc/chromium/src/base/linux_syscall_support.h @@ -0,0 +1,3020 @@ +/* Copyright (c) 2005-2008, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * Author: Markus Gutschke + */ + +/* This file includes Linux-specific support functions common to the + * coredumper and the thread lister; primarily, this is a collection + * of direct system calls, and a couple of symbols missing from + * standard header files. + * There are a few options that the including file can set to control + * the behavior of this file: + * + * SYS_CPLUSPLUS: + * The entire header file will normally be wrapped in 'extern "C" { }", + * making it suitable for compilation as both C and C++ source. If you + * do not want to do this, you can set the SYS_CPLUSPLUS macro to inhibit + * the wrapping. N.B. doing so will suppress inclusion of all prerequisite + * system header files, too. It is the caller's responsibility to provide + * the necessary definitions. + * + * SYS_ERRNO: + * All system calls will update "errno" unless overriden by setting the + * SYS_ERRNO macro prior to including this file. SYS_ERRNO should be + * an l-value. + * + * SYS_INLINE: + * New symbols will be defined "static inline", unless overridden by + * the SYS_INLINE macro. + * + * SYS_LINUX_SYSCALL_SUPPORT_H + * This macro is used to avoid multiple inclusions of this header file. + * If you need to include this file more than once, make sure to + * unset SYS_LINUX_SYSCALL_SUPPORT_H before each inclusion. + * + * SYS_PREFIX: + * New system calls will have a prefix of "sys_" unless overridden by + * the SYS_PREFIX macro. Valid values for this macro are [0..9] which + * results in prefixes "sys[0..9]_". It is also possible to set this + * macro to -1, which avoids all prefixes. + * + * This file defines a few internal symbols that all start with "LSS_". + * Do not access these symbols from outside this file. They are not part + * of the supported API. + */ +#ifndef SYS_LINUX_SYSCALL_SUPPORT_H +#define SYS_LINUX_SYSCALL_SUPPORT_H + +/* We currently only support x86-32, x86-64, ARM, MIPS, and PPC on Linux. + * Porting to other related platforms should not be difficult. + */ +#if (defined(__i386__) || defined(__x86_64__) || defined(__ARM_ARCH_3__) || \ + defined(__mips__) || defined(__PPC__)) && defined(__linux) + +#ifndef SYS_CPLUSPLUS +#ifdef __cplusplus +/* Some system header files in older versions of gcc neglect to properly + * handle being included from C++. As it appears to be harmless to have + * multiple nested 'extern "C"' blocks, just add another one here. + */ +extern "C" { +#endif + +#include <errno.h> +#include <signal.h> +#include <stdarg.h> +#include <string.h> +#include <sys/ptrace.h> +#include <sys/resource.h> +#include <sys/time.h> +#include <sys/types.h> +#include <syscall.h> +#include <unistd.h> +#include <linux/unistd.h> +#include <endian.h> + +#ifdef __mips__ +/* Include definitions of the ABI currently in use. */ +#include <sgidefs.h> +#endif + +#endif + +/* As glibc often provides subtly incompatible data structures (and implicit + * wrapper functions that convert them), we provide our own kernel data + * structures for use by the system calls. + * These structures have been developed by using Linux 2.6.23 headers for + * reference. Note though, we do not care about exact API compatibility + * with the kernel, and in fact the kernel often does not have a single + * API that works across architectures. Instead, we try to mimic the glibc + * API where reasonable, and only guarantee ABI compatibility with the + * kernel headers. + * Most notably, here are a few changes that were made to the structures + * defined by kernel headers: + * + * - we only define structures, but not symbolic names for kernel data + * types. For the latter, we directly use the native C datatype + * (i.e. "unsigned" instead of "mode_t"). + * - in a few cases, it is possible to define identical structures for + * both 32bit (e.g. i386) and 64bit (e.g. x86-64) platforms by + * standardizing on the 64bit version of the data types. In particular, + * this means that we use "unsigned" where the 32bit headers say + * "unsigned long". + * - overall, we try to minimize the number of cases where we need to + * conditionally define different structures. + * - the "struct kernel_sigaction" class of structures have been + * modified to more closely mimic glibc's API by introducing an + * anonymous union for the function pointer. + * - a small number of field names had to have an underscore appended to + * them, because glibc defines a global macro by the same name. + */ + +/* include/linux/dirent.h */ +struct kernel_dirent64 { + unsigned long long d_ino; + long long d_off; + unsigned short d_reclen; + unsigned char d_type; + char d_name[256]; +}; + +/* include/linux/dirent.h */ +struct kernel_dirent { + long d_ino; + long d_off; + unsigned short d_reclen; + char d_name[256]; +}; + +/* include/linux/uio.h */ +struct kernel_iovec { + void *iov_base; + unsigned long iov_len; +}; + +/* include/linux/socket.h */ +struct kernel_msghdr { + void *msg_name; + int msg_namelen; + struct kernel_iovec*msg_iov; + unsigned long msg_iovlen; + void *msg_control; + unsigned long msg_controllen; + unsigned msg_flags; +}; + +/* include/asm-generic/poll.h */ +struct kernel_pollfd { + int fd; + short events; + short revents; +}; + +/* include/linux/resource.h */ +struct kernel_rlimit { + unsigned long rlim_cur; + unsigned long rlim_max; +}; + +/* include/linux/time.h */ +struct kernel_timespec { + long tv_sec; + long tv_nsec; +}; + +/* include/linux/time.h */ +struct kernel_timeval { + long tv_sec; + long tv_usec; +}; + +/* include/linux/resource.h */ +struct kernel_rusage { + struct kernel_timeval ru_utime; + struct kernel_timeval ru_stime; + long ru_maxrss; + long ru_ixrss; + long ru_idrss; + long ru_isrss; + long ru_minflt; + long ru_majflt; + long ru_nswap; + long ru_inblock; + long ru_oublock; + long ru_msgsnd; + long ru_msgrcv; + long ru_nsignals; + long ru_nvcsw; + long ru_nivcsw; +}; + +struct siginfo; +#if defined(__i386__) || defined(__ARM_ARCH_3__) || defined(__PPC__) + +/* include/asm-{arm,i386,mips,ppc}/signal.h */ +struct kernel_old_sigaction { + union { + void (*sa_handler_)(int); + void (*sa_sigaction_)(int, struct siginfo *, void *); + }; + unsigned long sa_mask; + unsigned long sa_flags; + void (*sa_restorer)(void); +} __attribute__((packed,aligned(4))); +#elif (defined(__mips__) && _MIPS_SIM == _MIPS_SIM_ABI32) + #define kernel_old_sigaction kernel_sigaction +#endif + +/* Some kernel functions (e.g. sigaction() in 2.6.23) require that the + * exactly match the size of the signal set, even though the API was + * intended to be extensible. We define our own KERNEL_NSIG to deal with + * this. + * Please note that glibc provides signals [1.._NSIG-1], whereas the + * kernel (and this header) provides the range [1..KERNEL_NSIG]. The + * actual number of signals is obviously the same, but the constants + * differ by one. + */ +#ifdef __mips__ +#define KERNEL_NSIG 128 +#else +#define KERNEL_NSIG 64 +#endif + +/* include/asm-{arm,i386,mips,x86_64}/signal.h */ +struct kernel_sigset_t { + unsigned long sig[(KERNEL_NSIG + 8*sizeof(unsigned long) - 1)/ + (8*sizeof(unsigned long))]; +}; + +/* include/asm-{arm,i386,mips,x86_64,ppc}/signal.h */ +struct kernel_sigaction { +#ifdef __mips__ + unsigned long sa_flags; + union { + void (*sa_handler_)(int); + void (*sa_sigaction_)(int, struct siginfo *, void *); + }; + struct kernel_sigset_t sa_mask; +#else + union { + void (*sa_handler_)(int); + void (*sa_sigaction_)(int, struct siginfo *, void *); + }; + unsigned long sa_flags; + void (*sa_restorer)(void); + struct kernel_sigset_t sa_mask; +#endif +}; + +/* include/linux/socket.h */ +struct kernel_sockaddr { + unsigned short sa_family; + char sa_data[14]; +}; + +/* include/asm-{arm,i386,mips,ppc}/stat.h */ +#ifdef __mips__ +#if _MIPS_SIM == _MIPS_SIM_ABI64 +struct kernel_stat { +#else +struct kernel_stat64 { +#endif + unsigned st_dev; + unsigned __pad0[3]; + unsigned long long st_ino; + unsigned st_mode; + unsigned st_nlink; + unsigned st_uid; + unsigned st_gid; + unsigned st_rdev; + unsigned __pad1[3]; + long long st_size; + unsigned st_atime_; + unsigned st_atime_nsec_; + unsigned st_mtime_; + unsigned st_mtime_nsec_; + unsigned st_ctime_; + unsigned st_ctime_nsec_; + unsigned st_blksize; + unsigned __pad2; + unsigned long long st_blocks; +}; +#elif defined __PPC__ +struct kernel_stat64 { + unsigned long long st_dev; + unsigned long long st_ino; + unsigned st_mode; + unsigned st_nlink; + unsigned st_uid; + unsigned st_gid; + unsigned long long st_rdev; + unsigned short int __pad2; + long long st_size; + long st_blksize; + long long st_blocks; + long st_atime_; + unsigned long st_atime_nsec_; + long st_mtime_; + unsigned long st_mtime_nsec_; + long st_ctime_; + unsigned long st_ctime_nsec_; + unsigned long __unused4; + unsigned long __unused5; +}; +#else +struct kernel_stat64 { + unsigned long long st_dev; + unsigned char __pad0[4]; + unsigned __st_ino; + unsigned st_mode; + unsigned st_nlink; + unsigned st_uid; + unsigned st_gid; + unsigned long long st_rdev; + unsigned char __pad3[4]; + long long st_size; + unsigned st_blksize; + unsigned long long st_blocks; + unsigned st_atime_; + unsigned st_atime_nsec_; + unsigned st_mtime_; + unsigned st_mtime_nsec_; + unsigned st_ctime_; + unsigned st_ctime_nsec_; + unsigned long long st_ino; +}; +#endif + +/* include/asm-{arm,i386,mips,x86_64,ppc}/stat.h */ +#if defined(__i386__) || defined(__ARM_ARCH_3__) +struct kernel_stat { + /* The kernel headers suggest that st_dev and st_rdev should be 32bit + * quantities encoding 12bit major and 20bit minor numbers in an interleaved + * format. In reality, we do not see useful data in the top bits. So, + * we'll leave the padding in here, until we find a better solution. + */ + unsigned short st_dev; + short pad1; + unsigned st_ino; + unsigned short st_mode; + unsigned short st_nlink; + unsigned short st_uid; + unsigned short st_gid; + unsigned short st_rdev; + short pad2; + unsigned st_size; + unsigned st_blksize; + unsigned st_blocks; + unsigned st_atime_; + unsigned st_atime_nsec_; + unsigned st_mtime_; + unsigned st_mtime_nsec_; + unsigned st_ctime_; + unsigned st_ctime_nsec_; + unsigned __unused4; + unsigned __unused5; +}; +#elif defined(__x86_64__) +struct kernel_stat { + unsigned long st_dev; + unsigned long st_ino; + unsigned long st_nlink; + unsigned st_mode; + unsigned st_uid; + unsigned st_gid; + unsigned __pad0; + unsigned long st_rdev; + long st_size; + long st_blksize; + long st_blocks; + unsigned long st_atime_; + unsigned long st_atime_nsec_; + unsigned long st_mtime_; + unsigned long st_mtime_nsec_; + unsigned long st_ctime_; + unsigned long st_ctime_nsec_; + long __unused[3]; +}; +#elif defined(__PPC__) +struct kernel_stat { + unsigned st_dev; + unsigned long st_ino; // ino_t + unsigned long st_mode; // mode_t + unsigned short st_nlink; // nlink_t + unsigned st_uid; // uid_t + unsigned st_gid; // gid_t + unsigned st_rdev; + long st_size; // off_t + unsigned long st_blksize; + unsigned long st_blocks; + unsigned long st_atime_; + unsigned long st_atime_nsec_; + unsigned long st_mtime_; + unsigned long st_mtime_nsec_; + unsigned long st_ctime_; + unsigned long st_ctime_nsec_; + unsigned long __unused4; + unsigned long __unused5; +}; +#elif (defined(__mips__) && _MIPS_SIM != _MIPS_SIM_ABI64) +struct kernel_stat { + unsigned st_dev; + int st_pad1[3]; + unsigned st_ino; + unsigned st_mode; + unsigned st_nlink; + unsigned st_uid; + unsigned st_gid; + unsigned st_rdev; + int st_pad2[2]; + long st_size; + int st_pad3; + long st_atime_; + long st_atime_nsec_; + long st_mtime_; + long st_mtime_nsec_; + long st_ctime_; + long st_ctime_nsec_; + int st_blksize; + int st_blocks; + int st_pad4[14]; +}; +#endif + +/* include/asm-{arm,i386,mips,x86_64,ppc}/statfs.h */ +#ifdef __mips__ +#if _MIPS_SIM != _MIPS_SIM_ABI64 +struct kernel_statfs64 { + unsigned long f_type; + unsigned long f_bsize; + unsigned long f_frsize; + unsigned long __pad; + unsigned long long f_blocks; + unsigned long long f_bfree; + unsigned long long f_files; + unsigned long long f_ffree; + unsigned long long f_bavail; + struct { int val[2]; } f_fsid; + unsigned long f_namelen; + unsigned long f_spare[6]; +}; +#endif +#elif !defined(__x86_64__) +struct kernel_statfs64 { + unsigned long f_type; + unsigned long f_bsize; + unsigned long long f_blocks; + unsigned long long f_bfree; + unsigned long long f_bavail; + unsigned long long f_files; + unsigned long long f_ffree; + struct { int val[2]; } f_fsid; + unsigned long f_namelen; + unsigned long f_frsize; + unsigned long f_spare[5]; +}; +#endif + +/* include/asm-{arm,i386,mips,x86_64,ppc,generic}/statfs.h */ +#ifdef __mips__ +struct kernel_statfs { + long f_type; + long f_bsize; + long f_frsize; + long f_blocks; + long f_bfree; + long f_files; + long f_ffree; + long f_bavail; + struct { int val[2]; } f_fsid; + long f_namelen; + long f_spare[6]; +}; +#else +struct kernel_statfs { + /* x86_64 actually defines all these fields as signed, whereas all other */ + /* platforms define them as unsigned. Leaving them at unsigned should not */ + /* cause any problems. */ + unsigned long f_type; + unsigned long f_bsize; + unsigned long f_blocks; + unsigned long f_bfree; + unsigned long f_bavail; + unsigned long f_files; + unsigned long f_ffree; + struct { int val[2]; } f_fsid; + unsigned long f_namelen; + unsigned long f_frsize; + unsigned long f_spare[5]; +}; +#endif + + +/* Definitions missing from the standard header files */ +#ifndef O_DIRECTORY +#if defined(__ARM_ARCH_3__) +#define O_DIRECTORY 0040000 +#else +#define O_DIRECTORY 0200000 +#endif +#endif +#ifndef NT_PRXFPREG +#define NT_PRXFPREG 0x46e62b7f +#endif +#ifndef PTRACE_GETFPXREGS +#define PTRACE_GETFPXREGS ((enum __ptrace_request)18) +#endif +#ifndef PR_GET_DUMPABLE +#define PR_GET_DUMPABLE 3 +#endif +#ifndef PR_SET_DUMPABLE +#define PR_SET_DUMPABLE 4 +#endif +#ifndef AT_FDCWD +#define AT_FDCWD (-100) +#endif +#ifndef AT_SYMLINK_NOFOLLOW +#define AT_SYMLINK_NOFOLLOW 0x100 +#endif +#ifndef AT_REMOVEDIR +#define AT_REMOVEDIR 0x200 +#endif +#ifndef MREMAP_FIXED +#define MREMAP_FIXED 2 +#endif +#ifndef SA_RESTORER +#define SA_RESTORER 0x04000000 +#endif +#ifndef CPUCLOCK_PROF +#define CPUCLOCK_PROF 0 +#endif +#ifndef CPUCLOCK_VIRT +#define CPUCLOCK_VIRT 1 +#endif +#ifndef CPUCLOCK_SCHED +#define CPUCLOCK_SCHED 2 +#endif +#ifndef CPUCLOCK_PERTHREAD_MASK +#define CPUCLOCK_PERTHREAD_MASK 4 +#endif +#ifndef MAKE_PROCESS_CPUCLOCK +#define MAKE_PROCESS_CPUCLOCK(pid, clock) \ + ((~(int)(pid) << 3) | (int)(clock)) +#endif +#ifndef MAKE_THREAD_CPUCLOCK +#define MAKE_THREAD_CPUCLOCK(tid, clock) \ + ((~(int)(tid) << 3) | (int)((clock) | CPUCLOCK_PERTHREAD_MASK)) +#endif + +#if defined(__i386__) +#ifndef __NR_setresuid +#define __NR_setresuid 164 +#define __NR_setresgid 170 +#endif +#ifndef __NR_rt_sigaction +#define __NR_rt_sigaction 174 +#define __NR_rt_sigprocmask 175 +#define __NR_rt_sigpending 176 +#define __NR_rt_sigsuspend 179 +#endif +#ifndef __NR_pread64 +#define __NR_pread64 180 +#endif +#ifndef __NR_pwrite64 +#define __NR_pwrite64 181 +#endif +#ifndef __NR_ugetrlimit +#define __NR_ugetrlimit 191 +#endif +#ifndef __NR_stat64 +#define __NR_stat64 195 +#endif +#ifndef __NR_fstat64 +#define __NR_fstat64 197 +#endif +#ifndef __NR_setresuid32 +#define __NR_setresuid32 208 +#define __NR_setresgid32 210 +#endif +#ifndef __NR_setfsuid32 +#define __NR_setfsuid32 215 +#define __NR_setfsgid32 216 +#endif +#ifndef __NR_getdents64 +#define __NR_getdents64 220 +#endif +#ifndef __NR_gettid +#define __NR_gettid 224 +#endif +#ifndef __NR_readahead +#define __NR_readahead 225 +#endif +#ifndef __NR_setxattr +#define __NR_setxattr 226 +#endif +#ifndef __NR_lsetxattr +#define __NR_lsetxattr 227 +#endif +#ifndef __NR_getxattr +#define __NR_getxattr 229 +#endif +#ifndef __NR_lgetxattr +#define __NR_lgetxattr 230 +#endif +#ifndef __NR_listxattr +#define __NR_listxattr 232 +#endif +#ifndef __NR_llistxattr +#define __NR_llistxattr 233 +#endif +#ifndef __NR_futex +#define __NR_futex 240 +#endif +#ifndef __NR_sched_setaffinity +#define __NR_sched_setaffinity 241 +#define __NR_sched_getaffinity 242 +#endif +#ifndef __NR_set_tid_address +#define __NR_set_tid_address 258 +#endif +#ifndef __NR_clock_gettime +#define __NR_clock_gettime 265 +#endif +#ifndef __NR_clock_getres +#define __NR_clock_getres 266 +#endif +#ifndef __NR_statfs64 +#define __NR_statfs64 268 +#endif +#ifndef __NR_fstatfs64 +#define __NR_fstatfs64 269 +#endif +#ifndef __NR_fadvise64_64 +#define __NR_fadvise64_64 272 +#endif +#ifndef __NR_ioprio_set +#define __NR_ioprio_set 289 +#endif +#ifndef __NR_ioprio_get +#define __NR_ioprio_get 290 +#endif +#ifndef __NR_openat +#define __NR_openat 295 +#endif +#ifndef __NR_fstatat64 +#define __NR_fstatat64 300 +#endif +#ifndef __NR_unlinkat +#define __NR_unlinkat 301 +#endif +#ifndef __NR_move_pages +#define __NR_move_pages 317 +#endif +#ifndef __NR_getcpu +#define __NR_getcpu 318 +#endif +#ifndef __NR_fallocate +#define __NR_fallocate 324 +#endif +/* End of i386 definitions */ +#elif defined(__ARM_ARCH_3__) +#ifndef __NR_setresuid +#define __NR_setresuid (__NR_SYSCALL_BASE + 164) +#define __NR_setresgid (__NR_SYSCALL_BASE + 170) +#endif +#ifndef __NR_rt_sigaction +#define __NR_rt_sigaction (__NR_SYSCALL_BASE + 174) +#define __NR_rt_sigprocmask (__NR_SYSCALL_BASE + 175) +#define __NR_rt_sigpending (__NR_SYSCALL_BASE + 176) +#define __NR_rt_sigsuspend (__NR_SYSCALL_BASE + 179) +#endif +#ifndef __NR_pread64 +#define __NR_pread64 (__NR_SYSCALL_BASE + 180) +#endif +#ifndef __NR_pwrite64 +#define __NR_pwrite64 (__NR_SYSCALL_BASE + 181) +#endif +#ifndef __NR_ugetrlimit +#define __NR_ugetrlimit (__NR_SYSCALL_BASE + 191) +#endif +#ifndef __NR_stat64 +#define __NR_stat64 (__NR_SYSCALL_BASE + 195) +#endif +#ifndef __NR_fstat64 +#define __NR_fstat64 (__NR_SYSCALL_BASE + 197) +#endif +#ifndef __NR_setresuid32 +#define __NR_setresuid32 (__NR_SYSCALL_BASE + 208) +#define __NR_setresgid32 (__NR_SYSCALL_BASE + 210) +#endif +#ifndef __NR_setfsuid32 +#define __NR_setfsuid32 (__NR_SYSCALL_BASE + 215) +#define __NR_setfsgid32 (__NR_SYSCALL_BASE + 216) +#endif +#ifndef __NR_getdents64 +#define __NR_getdents64 (__NR_SYSCALL_BASE + 217) +#endif +#ifndef __NR_gettid +#define __NR_gettid (__NR_SYSCALL_BASE + 224) +#endif +#ifndef __NR_readahead +#define __NR_readahead (__NR_SYSCALL_BASE + 225) +#endif +#ifndef __NR_setxattr +#define __NR_setxattr (__NR_SYSCALL_BASE + 226) +#endif +#ifndef __NR_lsetxattr +#define __NR_lsetxattr (__NR_SYSCALL_BASE + 227) +#endif +#ifndef __NR_getxattr +#define __NR_getxattr (__NR_SYSCALL_BASE + 229) +#endif +#ifndef __NR_lgetxattr +#define __NR_lgetxattr (__NR_SYSCALL_BASE + 230) +#endif +#ifndef __NR_listxattr +#define __NR_listxattr (__NR_SYSCALL_BASE + 232) +#endif +#ifndef __NR_llistxattr +#define __NR_llistxattr (__NR_SYSCALL_BASE + 233) +#endif +#ifndef __NR_futex +#define __NR_futex (__NR_SYSCALL_BASE + 240) +#endif +#ifndef __NR_sched_setaffinity +#define __NR_sched_setaffinity (__NR_SYSCALL_BASE + 241) +#define __NR_sched_getaffinity (__NR_SYSCALL_BASE + 242) +#endif +#ifndef __NR_set_tid_address +#define __NR_set_tid_address (__NR_SYSCALL_BASE + 256) +#endif +#ifndef __NR_clock_gettime +#define __NR_clock_gettime (__NR_SYSCALL_BASE + 263) +#endif +#ifndef __NR_clock_getres +#define __NR_clock_getres (__NR_SYSCALL_BASE + 264) +#endif +#ifndef __NR_statfs64 +#define __NR_statfs64 (__NR_SYSCALL_BASE + 266) +#endif +#ifndef __NR_fstatfs64 +#define __NR_fstatfs64 (__NR_SYSCALL_BASE + 267) +#endif +#ifndef __NR_ioprio_set +#define __NR_ioprio_set (__NR_SYSCALL_BASE + 314) +#endif +#ifndef __NR_ioprio_get +#define __NR_ioprio_get (__NR_SYSCALL_BASE + 315) +#endif +#ifndef __NR_move_pages +#define __NR_move_pages (__NR_SYSCALL_BASE + 344) +#endif +#ifndef __NR_getcpu +#define __NR_getcpu (__NR_SYSCALL_BASE + 345) +#endif +/* End of ARM 3 definitions */ +#elif defined(__x86_64__) +#ifndef __NR_pread64 +#define __NR_pread64 17 +#endif +#ifndef __NR_pwrite64 +#define __NR_pwrite64 18 +#endif +#ifndef __NR_setresuid +#define __NR_setresuid 117 +#define __NR_setresgid 119 +#endif +#ifndef __NR_gettid +#define __NR_gettid 186 +#endif +#ifndef __NR_readahead +#define __NR_readahead 187 +#endif +#ifndef __NR_setxattr +#define __NR_setxattr 188 +#endif +#ifndef __NR_lsetxattr +#define __NR_lsetxattr 189 +#endif +#ifndef __NR_getxattr +#define __NR_getxattr 191 +#endif +#ifndef __NR_lgetxattr +#define __NR_lgetxattr 192 +#endif +#ifndef __NR_listxattr +#define __NR_listxattr 194 +#endif +#ifndef __NR_llistxattr +#define __NR_llistxattr 195 +#endif +#ifndef __NR_futex +#define __NR_futex 202 +#endif +#ifndef __NR_sched_setaffinity +#define __NR_sched_setaffinity 203 +#define __NR_sched_getaffinity 204 +#endif +#ifndef __NR_getdents64 +#define __NR_getdents64 217 +#endif +#ifndef __NR_set_tid_address +#define __NR_set_tid_address 218 +#endif +#ifndef __NR_fadvise64 +#define __NR_fadvise64 221 +#endif +#ifndef __NR_clock_gettime +#define __NR_clock_gettime 228 +#endif +#ifndef __NR_clock_getres +#define __NR_clock_getres 229 +#endif +#ifndef __NR_ioprio_set +#define __NR_ioprio_set 251 +#endif +#ifndef __NR_ioprio_get +#define __NR_ioprio_get 252 +#endif +#ifndef __NR_openat +#define __NR_openat 257 +#endif +#ifndef __NR_newfstatat +#define __NR_newfstatat 262 +#endif +#ifndef __NR_unlinkat +#define __NR_unlinkat 263 +#endif +#ifndef __NR_move_pages +#define __NR_move_pages 279 +#endif +#ifndef __NR_fallocate +#define __NR_fallocate 285 +#endif +/* End of x86-64 definitions */ +#elif defined(__mips__) +#if _MIPS_SIM == _MIPS_SIM_ABI32 +#ifndef __NR_setresuid +#define __NR_setresuid (__NR_Linux + 185) +#define __NR_setresgid (__NR_Linux + 190) +#endif +#ifndef __NR_rt_sigaction +#define __NR_rt_sigaction (__NR_Linux + 194) +#define __NR_rt_sigprocmask (__NR_Linux + 195) +#define __NR_rt_sigpending (__NR_Linux + 196) +#define __NR_rt_sigsuspend (__NR_Linux + 199) +#endif +#ifndef __NR_pread64 +#define __NR_pread64 (__NR_Linux + 200) +#endif +#ifndef __NR_pwrite64 +#define __NR_pwrite64 (__NR_Linux + 201) +#endif +#ifndef __NR_stat64 +#define __NR_stat64 (__NR_Linux + 213) +#endif +#ifndef __NR_fstat64 +#define __NR_fstat64 (__NR_Linux + 215) +#endif +#ifndef __NR_getdents64 +#define __NR_getdents64 (__NR_Linux + 219) +#endif +#ifndef __NR_gettid +#define __NR_gettid (__NR_Linux + 222) +#endif +#ifndef __NR_readahead +#define __NR_readahead (__NR_Linux + 223) +#endif +#ifndef __NR_setxattr +#define __NR_setxattr (__NR_Linux + 224) +#endif +#ifndef __NR_lsetxattr +#define __NR_lsetxattr (__NR_Linux + 225) +#endif +#ifndef __NR_getxattr +#define __NR_getxattr (__NR_Linux + 227) +#endif +#ifndef __NR_lgetxattr +#define __NR_lgetxattr (__NR_Linux + 228) +#endif +#ifndef __NR_listxattr +#define __NR_listxattr (__NR_Linux + 230) +#endif +#ifndef __NR_llistxattr +#define __NR_llistxattr (__NR_Linux + 231) +#endif +#ifndef __NR_futex +#define __NR_futex (__NR_Linux + 238) +#endif +#ifndef __NR_sched_setaffinity +#define __NR_sched_setaffinity (__NR_Linux + 239) +#define __NR_sched_getaffinity (__NR_Linux + 240) +#endif +#ifndef __NR_set_tid_address +#define __NR_set_tid_address (__NR_Linux + 252) +#endif +#ifndef __NR_statfs64 +#define __NR_statfs64 (__NR_Linux + 255) +#endif +#ifndef __NR_fstatfs64 +#define __NR_fstatfs64 (__NR_Linux + 256) +#endif +#ifndef __NR_clock_gettime +#define __NR_clock_gettime (__NR_Linux + 263) +#endif +#ifndef __NR_clock_getres +#define __NR_clock_getres (__NR_Linux + 264) +#endif +#ifndef __NR_openat +#define __NR_openat (__NR_Linux + 288) +#endif +#ifndef __NR_fstatat +#define __NR_fstatat (__NR_Linux + 293) +#endif +#ifndef __NR_unlinkat +#define __NR_unlinkat (__NR_Linux + 294) +#endif +#ifndef __NR_move_pages +#define __NR_move_pages (__NR_Linux + 308) +#endif +#ifndef __NR_getcpu +#define __NR_getcpu (__NR_Linux + 312) +#endif +#ifndef __NR_ioprio_set +#define __NR_ioprio_set (__NR_Linux + 314) +#endif +#ifndef __NR_ioprio_get +#define __NR_ioprio_get (__NR_Linux + 315) +#endif +/* End of MIPS (old 32bit API) definitions */ +#elif _MIPS_SIM == _MIPS_SIM_ABI64 +#ifndef __NR_pread64 +#define __NR_pread64 (__NR_Linux + 16) +#endif +#ifndef __NR_pwrite64 +#define __NR_pwrite64 (__NR_Linux + 17) +#endif +#ifndef __NR_setresuid +#define __NR_setresuid (__NR_Linux + 115) +#define __NR_setresgid (__NR_Linux + 117) +#endif +#ifndef __NR_gettid +#define __NR_gettid (__NR_Linux + 178) +#endif +#ifndef __NR_readahead +#define __NR_readahead (__NR_Linux + 179) +#endif +#ifndef __NR_setxattr +#define __NR_setxattr (__NR_Linux + 180) +#endif +#ifndef __NR_lsetxattr +#define __NR_lsetxattr (__NR_Linux + 181) +#endif +#ifndef __NR_getxattr +#define __NR_getxattr (__NR_Linux + 183) +#endif +#ifndef __NR_lgetxattr +#define __NR_lgetxattr (__NR_Linux + 184) +#endif +#ifndef __NR_listxattr +#define __NR_listxattr (__NR_Linux + 186) +#endif +#ifndef __NR_llistxattr +#define __NR_llistxattr (__NR_Linux + 187) +#endif +#ifndef __NR_futex +#define __NR_futex (__NR_Linux + 194) +#endif +#ifndef __NR_sched_setaffinity +#define __NR_sched_setaffinity (__NR_Linux + 195) +#define __NR_sched_getaffinity (__NR_Linux + 196) +#endif +#ifndef __NR_set_tid_address +#define __NR_set_tid_address (__NR_Linux + 212) +#endif +#ifndef __NR_clock_gettime +#define __NR_clock_gettime (__NR_Linux + 222) +#endif +#ifndef __NR_clock_getres +#define __NR_clock_getres (__NR_Linux + 223) +#endif +#ifndef __NR_openat +#define __NR_openat (__NR_Linux + 247) +#endif +#ifndef __NR_fstatat +#define __NR_fstatat (__NR_Linux + 252) +#endif +#ifndef __NR_unlinkat +#define __NR_unlinkat (__NR_Linux + 253) +#endif +#ifndef __NR_move_pages +#define __NR_move_pages (__NR_Linux + 267) +#endif +#ifndef __NR_getcpu +#define __NR_getcpu (__NR_Linux + 271) +#endif +#ifndef __NR_ioprio_set +#define __NR_ioprio_set (__NR_Linux + 273) +#endif +#ifndef __NR_ioprio_get +#define __NR_ioprio_get (__NR_Linux + 274) +#endif +/* End of MIPS (64bit API) definitions */ +#else +#ifndef __NR_setresuid +#define __NR_setresuid (__NR_Linux + 115) +#define __NR_setresgid (__NR_Linux + 117) +#endif +#ifndef __NR_gettid +#define __NR_gettid (__NR_Linux + 178) +#endif +#ifndef __NR_readahead +#define __NR_readahead (__NR_Linux + 179) +#endif +#ifndef __NR_setxattr +#define __NR_setxattr (__NR_Linux + 180) +#endif +#ifndef __NR_lsetxattr +#define __NR_lsetxattr (__NR_Linux + 181) +#endif +#ifndef __NR_getxattr +#define __NR_getxattr (__NR_Linux + 183) +#endif +#ifndef __NR_lgetxattr +#define __NR_lgetxattr (__NR_Linux + 184) +#endif +#ifndef __NR_listxattr +#define __NR_listxattr (__NR_Linux + 186) +#endif +#ifndef __NR_llistxattr +#define __NR_llistxattr (__NR_Linux + 187) +#endif +#ifndef __NR_futex +#define __NR_futex (__NR_Linux + 194) +#endif +#ifndef __NR_sched_setaffinity +#define __NR_sched_setaffinity (__NR_Linux + 195) +#define __NR_sched_getaffinity (__NR_Linux + 196) +#endif +#ifndef __NR_set_tid_address +#define __NR_set_tid_address (__NR_Linux + 213) +#endif +#ifndef __NR_statfs64 +#define __NR_statfs64 (__NR_Linux + 217) +#endif +#ifndef __NR_fstatfs64 +#define __NR_fstatfs64 (__NR_Linux + 218) +#endif +#ifndef __NR_clock_gettime +#define __NR_clock_gettime (__NR_Linux + 226) +#endif +#ifndef __NR_clock_getres +#define __NR_clock_getres (__NR_Linux + 227) +#endif +#ifndef __NR_openat +#define __NR_openat (__NR_Linux + 251) +#endif +#ifndef __NR_fstatat +#define __NR_fstatat (__NR_Linux + 256) +#endif +#ifndef __NR_unlinkat +#define __NR_unlinkat (__NR_Linux + 257) +#endif +#ifndef __NR_move_pages +#define __NR_move_pages (__NR_Linux + 271) +#endif +#ifndef __NR_getcpu +#define __NR_getcpu (__NR_Linux + 275) +#endif +#ifndef __NR_ioprio_set +#define __NR_ioprio_set (__NR_Linux + 277) +#endif +#ifndef __NR_ioprio_get +#define __NR_ioprio_get (__NR_Linux + 278) +#endif +/* End of MIPS (new 32bit API) definitions */ +#endif +/* End of MIPS definitions */ +#elif defined(__PPC__) +#ifndef __NR_setfsuid +#define __NR_setfsuid 138 +#define __NR_setfsgid 139 +#endif +#ifndef __NR_setresuid +#define __NR_setresuid 164 +#define __NR_setresgid 169 +#endif +#ifndef __NR_rt_sigaction +#define __NR_rt_sigaction 173 +#define __NR_rt_sigprocmask 174 +#define __NR_rt_sigpending 175 +#define __NR_rt_sigsuspend 178 +#endif +#ifndef __NR_pread64 +#define __NR_pread64 179 +#endif +#ifndef __NR_pwrite64 +#define __NR_pwrite64 180 +#endif +#ifndef __NR_ugetrlimit +#define __NR_ugetrlimit 190 +#endif +#ifndef __NR_readahead +#define __NR_readahead 191 +#endif +#ifndef __NR_stat64 +#define __NR_stat64 195 +#endif +#ifndef __NR_fstat64 +#define __NR_fstat64 197 +#endif +#ifndef __NR_getdents64 +#define __NR_getdents64 202 +#endif +#ifndef __NR_gettid +#define __NR_gettid 207 +#endif +#ifndef __NR_setxattr +#define __NR_setxattr 209 +#endif +#ifndef __NR_lsetxattr +#define __NR_lsetxattr 210 +#endif +#ifndef __NR_getxattr +#define __NR_getxattr 212 +#endif +#ifndef __NR_lgetxattr +#define __NR_lgetxattr 213 +#endif +#ifndef __NR_listxattr +#define __NR_listxattr 215 +#endif +#ifndef __NR_llistxattr +#define __NR_llistxattr 216 +#endif +#ifndef __NR_futex +#define __NR_futex 221 +#endif +#ifndef __NR_sched_setaffinity +#define __NR_sched_setaffinity 222 +#define __NR_sched_getaffinity 223 +#endif +#ifndef __NR_set_tid_address +#define __NR_set_tid_address 232 +#endif +#ifndef __NR_clock_gettime +#define __NR_clock_gettime 246 +#endif +#ifndef __NR_clock_getres +#define __NR_clock_getres 247 +#endif +#ifndef __NR_statfs64 +#define __NR_statfs64 252 +#endif +#ifndef __NR_fstatfs64 +#define __NR_fstatfs64 253 +#endif +#ifndef __NR_fadvise64_64 +#define __NR_fadvise64_64 254 +#endif +#ifndef __NR_ioprio_set +#define __NR_ioprio_set 273 +#endif +#ifndef __NR_ioprio_get +#define __NR_ioprio_get 274 +#endif +#ifndef __NR_openat +#define __NR_openat 286 +#endif +#ifndef __NR_fstatat64 +#define __NR_fstatat64 291 +#endif +#ifndef __NR_unlinkat +#define __NR_unlinkat 292 +#endif +#ifndef __NR_move_pages +#define __NR_move_pages 301 +#endif +#ifndef __NR_getcpu +#define __NR_getcpu 302 +#endif +/* End of powerpc defininitions */ +#endif + + +/* After forking, we must make sure to only call system calls. */ +#if __BOUNDED_POINTERS__ + #error "Need to port invocations of syscalls for bounded ptrs" +#else + /* The core dumper and the thread lister get executed after threads + * have been suspended. As a consequence, we cannot call any functions + * that acquire locks. Unfortunately, libc wraps most system calls + * (e.g. in order to implement pthread_atfork, and to make calls + * cancellable), which means we cannot call these functions. Instead, + * we have to call syscall() directly. + */ + #undef LSS_ERRNO + #ifdef SYS_ERRNO + /* Allow the including file to override the location of errno. This can + * be useful when using clone() with the CLONE_VM option. + */ + #define LSS_ERRNO SYS_ERRNO + #else + #define LSS_ERRNO errno + #endif + + #undef LSS_INLINE + #ifdef SYS_INLINE + #define LSS_INLINE SYS_INLINE + #else + #define LSS_INLINE static inline + #endif + + /* Allow the including file to override the prefix used for all new + * system calls. By default, it will be set to "sys_". + */ + #undef LSS_NAME + #ifndef SYS_PREFIX + #define LSS_NAME(name) sys_##name + #elif SYS_PREFIX < 0 + #define LSS_NAME(name) name + #elif SYS_PREFIX == 0 + #define LSS_NAME(name) sys0_##name + #elif SYS_PREFIX == 1 + #define LSS_NAME(name) sys1_##name + #elif SYS_PREFIX == 2 + #define LSS_NAME(name) sys2_##name + #elif SYS_PREFIX == 3 + #define LSS_NAME(name) sys3_##name + #elif SYS_PREFIX == 4 + #define LSS_NAME(name) sys4_##name + #elif SYS_PREFIX == 5 + #define LSS_NAME(name) sys5_##name + #elif SYS_PREFIX == 6 + #define LSS_NAME(name) sys6_##name + #elif SYS_PREFIX == 7 + #define LSS_NAME(name) sys7_##name + #elif SYS_PREFIX == 8 + #define LSS_NAME(name) sys8_##name + #elif SYS_PREFIX == 9 + #define LSS_NAME(name) sys9_##name + #endif + + #undef LSS_RETURN + #if (defined(__i386__) || defined(__x86_64__) || defined(__ARM_ARCH_3__)) + /* Failing system calls return a negative result in the range of + * -1..-4095. These are "errno" values with the sign inverted. + */ + #define LSS_RETURN(type, res) \ + do { \ + if ((unsigned long)(res) >= (unsigned long)(-4095)) { \ + LSS_ERRNO = -(res); \ + res = -1; \ + } \ + return (type) (res); \ + } while (0) + #elif defined(__mips__) + /* On MIPS, failing system calls return -1, and set errno in a + * separate CPU register. + */ + #define LSS_RETURN(type, res, err) \ + do { \ + if (err) { \ + LSS_ERRNO = (res); \ + res = -1; \ + } \ + return (type) (res); \ + } while (0) + #elif defined(__PPC__) + /* On PPC, failing system calls return -1, and set errno in a + * separate CPU register. See linux/unistd.h. + */ + #define LSS_RETURN(type, res, err) \ + do { \ + if (err & 0x10000000 ) { \ + LSS_ERRNO = (res); \ + res = -1; \ + } \ + return (type) (res); \ + } while (0) + #endif + #if defined(__i386__) + /* In PIC mode (e.g. when building shared libraries), gcc for i386 + * reserves ebx. Unfortunately, most distribution ship with implementations + * of _syscallX() which clobber ebx. + * Also, most definitions of _syscallX() neglect to mark "memory" as being + * clobbered. This causes problems with compilers, that do a better job + * at optimizing across __asm__ calls. + * So, we just have to redefine all of the _syscallX() macros. + */ + #undef LSS_BODY + #define LSS_BODY(type,args...) \ + long __res; \ + __asm__ __volatile__("push %%ebx\n" \ + "movl %2,%%ebx\n" \ + "int $0x80\n" \ + "pop %%ebx" \ + args \ + : "memory"); \ + LSS_RETURN(type,__res) + #undef _syscall0 + #define _syscall0(type,name) \ + type LSS_NAME(name)(void) { \ + long __res; \ + __asm__ volatile("int $0x80" \ + : "=a" (__res) \ + : "0" (__NR_##name) \ + : "memory"); \ + LSS_RETURN(type,__res); \ + } + #undef _syscall1 + #define _syscall1(type,name,type1,arg1) \ + type LSS_NAME(name)(type1 arg1) { \ + LSS_BODY(type, \ + : "=a" (__res) \ + : "0" (__NR_##name), "ri" ((long)(arg1))); \ + } + #undef _syscall2 + #define _syscall2(type,name,type1,arg1,type2,arg2) \ + type LSS_NAME(name)(type1 arg1,type2 arg2) { \ + LSS_BODY(type, \ + : "=a" (__res) \ + : "0" (__NR_##name),"ri" ((long)(arg1)), "c" ((long)(arg2))); \ + } + #undef _syscall3 + #define _syscall3(type,name,type1,arg1,type2,arg2,type3,arg3) \ + type LSS_NAME(name)(type1 arg1,type2 arg2,type3 arg3) { \ + LSS_BODY(type, \ + : "=a" (__res) \ + : "0" (__NR_##name), "ri" ((long)(arg1)), "c" ((long)(arg2)), \ + "d" ((long)(arg3))); \ + } + #undef _syscall4 + #define _syscall4(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4) { \ + LSS_BODY(type, \ + : "=a" (__res) \ + : "0" (__NR_##name), "ri" ((long)(arg1)), "c" ((long)(arg2)), \ + "d" ((long)(arg3)),"S" ((long)(arg4))); \ + } + #undef _syscall5 + #define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \ + type5,arg5) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ + type5 arg5) { \ + long __res; \ + __asm__ __volatile__("push %%ebx\n" \ + "movl %2,%%ebx\n" \ + "movl %1,%%eax\n" \ + "int $0x80\n" \ + "pop %%ebx" \ + : "=a" (__res) \ + : "i" (__NR_##name), "ri" ((long)(arg1)), \ + "c" ((long)(arg2)), "d" ((long)(arg3)), \ + "S" ((long)(arg4)), "D" ((long)(arg5)) \ + : "memory"); \ + LSS_RETURN(type,__res); \ + } + #undef _syscall6 + #define _syscall6(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \ + type5,arg5,type6,arg6) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ + type5 arg5, type6 arg6) { \ + long __res; \ + struct { long __a1; long __a6; } __s = { (long)arg1, (long) arg6 }; \ + __asm__ __volatile__("push %%ebp\n" \ + "push %%ebx\n" \ + "movl 4(%2),%%ebp\n" \ + "movl 0(%2), %%ebx\n" \ + "movl %1,%%eax\n" \ + "int $0x80\n" \ + "pop %%ebx\n" \ + "pop %%ebp" \ + : "=a" (__res) \ + : "i" (__NR_##name), "0" ((long)(&__s)), \ + "c" ((long)(arg2)), "d" ((long)(arg3)), \ + "S" ((long)(arg4)), "D" ((long)(arg5)) \ + : "memory"); \ + LSS_RETURN(type,__res); \ + } + LSS_INLINE int LSS_NAME(clone)(int (*fn)(void *), void *child_stack, + int flags, void *arg, int *parent_tidptr, + void *newtls, int *child_tidptr) { + long __res; + __asm__ __volatile__(/* if (fn == NULL) + * return -EINVAL; + */ + "movl %3,%%ecx\n" + "jecxz 1f\n" + + /* if (child_stack == NULL) + * return -EINVAL; + */ + "movl %4,%%ecx\n" + "jecxz 1f\n" + + /* Set up alignment of the child stack: + * child_stack = (child_stack & ~0xF) - 20; + */ + "andl $-16,%%ecx\n" + "subl $20,%%ecx\n" + + /* Push "arg" and "fn" onto the stack that will be + * used by the child. + */ + "movl %6,%%eax\n" + "movl %%eax,4(%%ecx)\n" + "movl %3,%%eax\n" + "movl %%eax,(%%ecx)\n" + + /* %eax = syscall(%eax = __NR_clone, + * %ebx = flags, + * %ecx = child_stack, + * %edx = parent_tidptr, + * %esi = newtls, + * %edi = child_tidptr) + * Also, make sure that %ebx gets preserved as it is + * used in PIC mode. + */ + "movl %8,%%esi\n" + "movl %7,%%edx\n" + "movl %5,%%eax\n" + "movl %9,%%edi\n" + "pushl %%ebx\n" + "movl %%eax,%%ebx\n" + "movl %2,%%eax\n" + "int $0x80\n" + + /* In the parent: restore %ebx + * In the child: move "fn" into %ebx + */ + "popl %%ebx\n" + + /* if (%eax != 0) + * return %eax; + */ + "test %%eax,%%eax\n" + "jnz 1f\n" + + /* In the child, now. Terminate frame pointer chain. + */ + "movl $0,%%ebp\n" + + /* Call "fn". "arg" is already on the stack. + */ + "call *%%ebx\n" + + /* Call _exit(%ebx). Unfortunately older versions + * of gcc restrict the number of arguments that can + * be passed to asm(). So, we need to hard-code the + * system call number. + */ + "movl %%eax,%%ebx\n" + "movl $1,%%eax\n" + "int $0x80\n" + + /* Return to parent. + */ + "1:\n" + : "=a" (__res) + : "0"(-EINVAL), "i"(__NR_clone), + "m"(fn), "m"(child_stack), "m"(flags), "m"(arg), + "m"(parent_tidptr), "m"(newtls), "m"(child_tidptr) + : "memory", "ecx", "edx", "esi", "edi"); + LSS_RETURN(int, __res); + } + + #define __NR__fadvise64_64 __NR_fadvise64_64 + LSS_INLINE _syscall6(int, _fadvise64_64, int, fd, + unsigned, offset_lo, unsigned, offset_hi, + unsigned, len_lo, unsigned, len_hi, + int, advice) + + LSS_INLINE int LSS_NAME(fadvise64)(int fd, loff_t offset, + loff_t len, int advice) { + return LSS_NAME(_fadvise64_64)(fd, + (unsigned)offset, (unsigned)(offset >>32), + (unsigned)len, (unsigned)(len >> 32), + advice); + } + + #define __NR__fallocate __NR_fallocate + LSS_INLINE _syscall6(int, _fallocate, int, fd, + int, mode, + unsigned, offset_lo, unsigned, offset_hi, + unsigned, len_lo, unsigned, len_hi) + + LSS_INLINE int LSS_NAME(fallocate)(int fd, int mode, + loff_t offset, loff_t len) { + union { loff_t off; unsigned w[2]; } o = { offset }, l = { len }; + return LSS_NAME(_fallocate)(fd, mode, o.w[0], o.w[1], l.w[0], l.w[1]); + } + + LSS_INLINE void (*LSS_NAME(restore_rt)(void))(void) { + /* On i386, the kernel does not know how to return from a signal + * handler. Instead, it relies on user space to provide a + * restorer function that calls the {rt_,}sigreturn() system call. + * Unfortunately, we cannot just reference the glibc version of this + * function, as glibc goes out of its way to make it inaccessible. + */ + void (*res)(void); + __asm__ __volatile__("call 2f\n" + "0:.align 16\n" + "1:movl %1,%%eax\n" + "int $0x80\n" + "2:popl %0\n" + "addl $(1b-0b),%0\n" + : "=a" (res) + : "i" (__NR_rt_sigreturn)); + return res; + } + LSS_INLINE void (*LSS_NAME(restore)(void))(void) { + /* On i386, the kernel does not know how to return from a signal + * handler. Instead, it relies on user space to provide a + * restorer function that calls the {rt_,}sigreturn() system call. + * Unfortunately, we cannot just reference the glibc version of this + * function, as glibc goes out of its way to make it inaccessible. + */ + void (*res)(void); + __asm__ __volatile__("call 2f\n" + "0:.align 16\n" + "1:pop %%eax\n" + "movl %1,%%eax\n" + "int $0x80\n" + "2:popl %0\n" + "addl $(1b-0b),%0\n" + : "=a" (res) + : "i" (__NR_sigreturn)); + return res; + } + #elif defined(__x86_64__) + /* There are no known problems with any of the _syscallX() macros + * currently shipping for x86_64, but we still need to be able to define + * our own version so that we can override the location of the errno + * location (e.g. when using the clone() system call with the CLONE_VM + * option). + */ + #undef LSS_BODY + #define LSS_BODY(type,name, ...) \ + long __res; \ + __asm__ __volatile__("syscall" : "=a" (__res) : "0" (__NR_##name), \ + ##__VA_ARGS__ : "r11", "rcx", "memory"); \ + LSS_RETURN(type, __res) + #undef _syscall0 + #define _syscall0(type,name) \ + type LSS_NAME(name)() { \ + LSS_BODY(type, name); \ + } + #undef _syscall1 + #define _syscall1(type,name,type1,arg1) \ + type LSS_NAME(name)(type1 arg1) { \ + LSS_BODY(type, name, "D" ((long)(arg1))); \ + } + #undef _syscall2 + #define _syscall2(type,name,type1,arg1,type2,arg2) \ + type LSS_NAME(name)(type1 arg1, type2 arg2) { \ + LSS_BODY(type, name, "D" ((long)(arg1)), "S" ((long)(arg2))); \ + } + #undef _syscall3 + #define _syscall3(type,name,type1,arg1,type2,arg2,type3,arg3) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3) { \ + LSS_BODY(type, name, "D" ((long)(arg1)), "S" ((long)(arg2)), \ + "d" ((long)(arg3))); \ + } + #undef _syscall4 + #define _syscall4(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4) { \ + long __res; \ + __asm__ __volatile__("movq %5,%%r10; syscall" : \ + "=a" (__res) : "0" (__NR_##name), \ + "D" ((long)(arg1)), "S" ((long)(arg2)), "d" ((long)(arg3)), \ + "g" ((long)(arg4)) : "r10", "r11", "rcx", "memory"); \ + LSS_RETURN(type, __res); \ + } + #undef _syscall5 + #define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \ + type5,arg5) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ + type5 arg5) { \ + long __res; \ + __asm__ __volatile__("movq %5,%%r10; movq %6,%%r8; syscall" : \ + "=a" (__res) : "0" (__NR_##name), \ + "D" ((long)(arg1)), "S" ((long)(arg2)), "d" ((long)(arg3)), \ + "g" ((long)(arg4)), "g" ((long)(arg5)) : \ + "r8", "r10", "r11", "rcx", "memory"); \ + LSS_RETURN(type, __res); \ + } + #undef _syscall6 + #define _syscall6(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \ + type5,arg5,type6,arg6) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ + type5 arg5, type6 arg6) { \ + long __res; \ + __asm__ __volatile__("movq %5,%%r10; movq %6,%%r8; movq %7,%%r9;" \ + "syscall" : \ + "=a" (__res) : "0" (__NR_##name), \ + "D" ((long)(arg1)), "S" ((long)(arg2)), "d" ((long)(arg3)), \ + "g" ((long)(arg4)), "g" ((long)(arg5)), "g" ((long)(arg6)) : \ + "r8", "r9", "r10", "r11", "rcx", "memory"); \ + LSS_RETURN(type, __res); \ + } + LSS_INLINE int LSS_NAME(clone)(int (*fn)(void *), void *child_stack, + int flags, void *arg, int *parent_tidptr, + void *newtls, int *child_tidptr) { + long __res; + { + register void *__tls __asm__("r8") = newtls; + register int *__ctid __asm__("r10") = child_tidptr; + __asm__ __volatile__(/* if (fn == NULL) + * return -EINVAL; + */ + "testq %4,%4\n" + "jz 1f\n" + + /* if (child_stack == NULL) + * return -EINVAL; + */ + "testq %5,%5\n" + "jz 1f\n" + + /* childstack -= 2*sizeof(void *); + */ + "subq $16,%5\n" + + /* Push "arg" and "fn" onto the stack that will be + * used by the child. + */ + "movq %7,8(%5)\n" + "movq %4,0(%5)\n" + + /* %rax = syscall(%rax = __NR_clone, + * %rdi = flags, + * %rsi = child_stack, + * %rdx = parent_tidptr, + * %r8 = new_tls, + * %r10 = child_tidptr) + */ + "movq %2,%%rax\n" + "syscall\n" + + /* if (%rax != 0) + * return; + */ + "testq %%rax,%%rax\n" + "jnz 1f\n" + + /* In the child. Terminate frame pointer chain. + */ + "xorq %%rbp,%%rbp\n" + + /* Call "fn(arg)". + */ + "popq %%rax\n" + "popq %%rdi\n" + "call *%%rax\n" + + /* Call _exit(%ebx). + */ + "movq %%rax,%%rdi\n" + "movq %3,%%rax\n" + "syscall\n" + + /* Return to parent. + */ + "1:\n" + : "=a" (__res) + : "0"(-EINVAL), "i"(__NR_clone), "i"(__NR_exit), + "r"(fn), "S"(child_stack), "D"(flags), "r"(arg), + "d"(parent_tidptr), "r"(__tls), "r"(__ctid) + : "memory", "r11", "rcx"); + } + LSS_RETURN(int, __res); + } + LSS_INLINE _syscall4(int, fadvise64, int, fd, loff_t, offset, loff_t, len, + int, advice) + + LSS_INLINE void (*LSS_NAME(restore_rt)(void))(void) { + /* On x86-64, the kernel does not know how to return from + * a signal handler. Instead, it relies on user space to provide a + * restorer function that calls the rt_sigreturn() system call. + * Unfortunately, we cannot just reference the glibc version of this + * function, as glibc goes out of its way to make it inaccessible. + */ + void (*res)(void); + __asm__ __volatile__("call 2f\n" + "0:.align 16\n" + "1:movq %1,%%rax\n" + "syscall\n" + "2:popq %0\n" + "addq $(1b-0b),%0\n" + : "=a" (res) + : "i" (__NR_rt_sigreturn)); + return res; + } + #elif defined(__ARM_ARCH_3__) + /* Most definitions of _syscallX() neglect to mark "memory" as being + * clobbered. This causes problems with compilers, that do a better job + * at optimizing across __asm__ calls. + * So, we just have to redefine all fo the _syscallX() macros. + */ + #undef LSS_REG + #define LSS_REG(r,a) register long __r##r __asm__("r"#r) = (long)a + #undef LSS_BODY + #define LSS_BODY(type,name,args...) \ + register long __res_r0 __asm__("r0"); \ + long __res; \ + __asm__ __volatile__ (__syscall(name) \ + : "=r"(__res_r0) : args : "lr", "memory"); \ + __res = __res_r0; \ + LSS_RETURN(type, __res) + #undef _syscall0 + #define _syscall0(type, name) \ + type LSS_NAME(name)() { \ + LSS_BODY(type, name); \ + } + #undef _syscall1 + #define _syscall1(type, name, type1, arg1) \ + type LSS_NAME(name)(type1 arg1) { \ + LSS_REG(0, arg1); LSS_BODY(type, name, "r"(__r0)); \ + } + #undef _syscall2 + #define _syscall2(type, name, type1, arg1, type2, arg2) \ + type LSS_NAME(name)(type1 arg1, type2 arg2) { \ + LSS_REG(0, arg1); LSS_REG(1, arg2); \ + LSS_BODY(type, name, "r"(__r0), "r"(__r1)); \ + } + #undef _syscall3 + #define _syscall3(type, name, type1, arg1, type2, arg2, type3, arg3) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3) { \ + LSS_REG(0, arg1); LSS_REG(1, arg2); LSS_REG(2, arg3); \ + LSS_BODY(type, name, "r"(__r0), "r"(__r1), "r"(__r2)); \ + } + #undef _syscall4 + #define _syscall4(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4) { \ + LSS_REG(0, arg1); LSS_REG(1, arg2); LSS_REG(2, arg3); \ + LSS_REG(3, arg4); \ + LSS_BODY(type, name, "r"(__r0), "r"(__r1), "r"(__r2), "r"(__r3)); \ + } + #undef _syscall5 + #define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \ + type5,arg5) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ + type5 arg5) { \ + LSS_REG(0, arg1); LSS_REG(1, arg2); LSS_REG(2, arg3); \ + LSS_REG(3, arg4); LSS_REG(4, arg5); \ + LSS_BODY(type, name, "r"(__r0), "r"(__r1), "r"(__r2), "r"(__r3), \ + "r"(__r4)); \ + } + #undef _syscall6 + #define _syscall6(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \ + type5,arg5,type6,arg6) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ + type5 arg5, type6 arg6) { \ + LSS_REG(0, arg1); LSS_REG(1, arg2); LSS_REG(2, arg3); \ + LSS_REG(3, arg4); LSS_REG(4, arg5); LSS_REG(5, arg6); \ + LSS_BODY(type, name, "r"(__r0), "r"(__r1), "r"(__r2), "r"(__r3), \ + "r"(__r4), "r"(__r5)); \ + } + LSS_INLINE int LSS_NAME(clone)(int (*fn)(void *), void *child_stack, + int flags, void *arg, int *parent_tidptr, + void *newtls, int *child_tidptr) { + long __res; + { + register int __flags __asm__("r0") = flags; + register void *__stack __asm__("r1") = child_stack; + register void *__ptid __asm__("r2") = parent_tidptr; + register void *__tls __asm__("r3") = newtls; + register int *__ctid __asm__("r4") = child_tidptr; + __asm__ __volatile__(/* if (fn == NULL || child_stack == NULL) + * return -EINVAL; + */ + "cmp %2,#0\n" + "cmpne %3,#0\n" + "moveq %0,%1\n" + "beq 1f\n" + + /* Push "arg" and "fn" onto the stack that will be + * used by the child. + */ + "str %5,[%3,#-4]!\n" + "str %2,[%3,#-4]!\n" + + /* %r0 = syscall(%r0 = flags, + * %r1 = child_stack, + * %r2 = parent_tidptr, + * %r3 = newtls, + * %r4 = child_tidptr) + */ + __syscall(clone)"\n" + + /* if (%r0 != 0) + * return %r0; + */ + "movs %0,r0\n" + "bne 1f\n" + + /* In the child, now. Call "fn(arg)". + */ + "ldr r0,[sp, #4]\n" + "mov lr,pc\n" + "ldr pc,[sp]\n" + + /* Call _exit(%r0). + */ + __syscall(exit)"\n" + "1:\n" + : "=r" (__res) + : "i"(-EINVAL), + "r"(fn), "r"(__stack), "r"(__flags), "r"(arg), + "r"(__ptid), "r"(__tls), "r"(__ctid) + : "lr", "memory"); + } + LSS_RETURN(int, __res); + } + #elif defined(__mips__) + #undef LSS_REG + #define LSS_REG(r,a) register unsigned long __r##r __asm__("$"#r) = \ + (unsigned long)(a) + #undef LSS_BODY + #define LSS_BODY(type,name,r7,...) \ + register unsigned long __v0 __asm__("$2") = __NR_##name; \ + __asm__ __volatile__ ("syscall\n" \ + : "=&r"(__v0), r7 (__r7) \ + : "0"(__v0), ##__VA_ARGS__ \ + : "$8", "$9", "$10", "$11", "$12", \ + "$13", "$14", "$15", "$24", "memory"); \ + LSS_RETURN(type, __v0, __r7) + #undef _syscall0 + #define _syscall0(type, name) \ + type LSS_NAME(name)() { \ + register unsigned long __r7 __asm__("$7"); \ + LSS_BODY(type, name, "=r"); \ + } + #undef _syscall1 + #define _syscall1(type, name, type1, arg1) \ + type LSS_NAME(name)(type1 arg1) { \ + register unsigned long __r7 __asm__("$7"); \ + LSS_REG(4, arg1); LSS_BODY(type, name, "=r", "r"(__r4)); \ + } + #undef _syscall2 + #define _syscall2(type, name, type1, arg1, type2, arg2) \ + type LSS_NAME(name)(type1 arg1, type2 arg2) { \ + register unsigned long __r7 __asm__("$7"); \ + LSS_REG(4, arg1); LSS_REG(5, arg2); \ + LSS_BODY(type, name, "=r", "r"(__r4), "r"(__r5)); \ + } + #undef _syscall3 + #define _syscall3(type, name, type1, arg1, type2, arg2, type3, arg3) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3) { \ + register unsigned long __r7 __asm__("$7"); \ + LSS_REG(4, arg1); LSS_REG(5, arg2); LSS_REG(6, arg3); \ + LSS_BODY(type, name, "=r", "r"(__r4), "r"(__r5), "r"(__r6)); \ + } + #undef _syscall4 + #define _syscall4(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4) { \ + LSS_REG(4, arg1); LSS_REG(5, arg2); LSS_REG(6, arg3); \ + LSS_REG(7, arg4); \ + LSS_BODY(type, name, "+r", "r"(__r4), "r"(__r5), "r"(__r6)); \ + } + #undef _syscall5 + #if _MIPS_SIM == _MIPS_SIM_ABI32 + /* The old 32bit MIPS system call API passes the fifth and sixth argument + * on the stack, whereas the new APIs use registers "r8" and "r9". + */ + #define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \ + type5,arg5) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ + type5 arg5) { \ + LSS_REG(4, arg1); LSS_REG(5, arg2); LSS_REG(6, arg3); \ + LSS_REG(7, arg4); \ + register unsigned long __v0 __asm__("$2"); \ + __asm__ __volatile__ (".set noreorder\n" \ + "lw $2, %6\n" \ + "subu $29, 32\n" \ + "sw $2, 16($29)\n" \ + "li $2, %2\n" \ + "syscall\n" \ + "addiu $29, 32\n" \ + ".set reorder\n" \ + : "=&r"(__v0), "+r" (__r7) \ + : "i" (__NR_##name), "r"(__r4), "r"(__r5), \ + "r"(__r6), "m" ((unsigned long)arg5) \ + : "$8", "$9", "$10", "$11", "$12", \ + "$13", "$14", "$15", "$24", "memory"); \ + LSS_RETURN(type, __v0, __r7); \ + } + #else + #define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \ + type5,arg5) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ + type5 arg5) { \ + LSS_REG(4, arg1); LSS_REG(5, arg2); LSS_REG(6, arg3); \ + LSS_REG(7, arg4); LSS_REG(8, arg5); \ + LSS_BODY(type, name, "+r", "r"(__r4), "r"(__r5), "r"(__r6), \ + "r"(__r8)); \ + } + #endif + #undef _syscall6 + #if _MIPS_SIM == _MIPS_SIM_ABI32 + /* The old 32bit MIPS system call API passes the fifth and sixth argument + * on the stack, whereas the new APIs use registers "r8" and "r9". + */ + #define _syscall6(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \ + type5,arg5,type6,arg6) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ + type5 arg5, type6 arg6) { \ + LSS_REG(4, arg1); LSS_REG(5, arg2); LSS_REG(6, arg3); \ + LSS_REG(7, arg4); \ + register unsigned long __v0 __asm__("$2"); \ + __asm__ __volatile__ (".set noreorder\n" \ + "lw $2, %6\n" \ + "lw $8, %7\n" \ + "subu $29, 32\n" \ + "sw $2, 16($29)\n" \ + "sw $8, 20($29)\n" \ + "li $2, %2\n" \ + "syscall\n" \ + "addiu $29, 32\n" \ + ".set reorder\n" \ + : "=&r"(__v0), "+r" (__r7) \ + : "i" (__NR_##name), "r"(__r4), "r"(__r5), \ + "r"(__r6), "r" ((unsigned long)arg5), \ + "r" ((unsigned long)arg6) \ + : "$8", "$9", "$10", "$11", "$12", \ + "$13", "$14", "$15", "$24", "memory"); \ + LSS_RETURN(type, __v0, __r7); \ + } + #else + #define _syscall6(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \ + type5,arg5,type6,arg6) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ + type5 arg5,type6 arg6) { \ + LSS_REG(4, arg1); LSS_REG(5, arg2); LSS_REG(6, arg3); \ + LSS_REG(7, arg4); LSS_REG(8, arg5); LSS_REG(9, arg6); \ + LSS_BODY(type, name, "+r", "r"(__r4), "r"(__r5), "r"(__r6), \ + "r"(__r8), "r"(__r9)); \ + } + #endif + LSS_INLINE int LSS_NAME(clone)(int (*fn)(void *), void *child_stack, + int flags, void *arg, int *parent_tidptr, + void *newtls, int *child_tidptr) { + register unsigned long __v0 __asm__("$2"); + register unsigned long __r7 __asm__("$7") = (unsigned long)newtls; + { + register int __flags __asm__("$4") = flags; + register void *__stack __asm__("$5") = child_stack; + register void *__ptid __asm__("$6") = parent_tidptr; + register int *__ctid __asm__("$8") = child_tidptr; + __asm__ __volatile__( + #if _MIPS_SIM == _MIPS_SIM_ABI32 && _MIPS_SZPTR == 32 + "subu $29,24\n" + #elif _MIPS_SIM == _MIPS_SIM_NABI32 + "sub $29,16\n" + #else + "dsubu $29,16\n" + #endif + + /* if (fn == NULL || child_stack == NULL) + * return -EINVAL; + */ + "li %0,%2\n" + "beqz %5,1f\n" + "beqz %6,1f\n" + + /* Push "arg" and "fn" onto the stack that will be + * used by the child. + */ + #if _MIPS_SIM == _MIPS_SIM_ABI32 && _MIPS_SZPTR == 32 + "subu %6,32\n" + "sw %5,0(%6)\n" + "sw %8,4(%6)\n" + #elif _MIPS_SIM == _MIPS_SIM_NABI32 + "sub %6,32\n" + "sw %5,0(%6)\n" + "sw %8,8(%6)\n" + #else + "dsubu %6,32\n" + "sd %5,0(%6)\n" + "sd %8,8(%6)\n" + #endif + + /* $7 = syscall($4 = flags, + * $5 = child_stack, + * $6 = parent_tidptr, + * $7 = newtls, + * $8 = child_tidptr) + */ + "li $2,%3\n" + "syscall\n" + + /* if ($7 != 0) + * return $2; + */ + "bnez $7,1f\n" + "bnez $2,1f\n" + + /* In the child, now. Call "fn(arg)". + */ + #if _MIPS_SIM == _MIPS_SIM_ABI32 && _MIPS_SZPTR == 32 + "lw $25,0($29)\n" + "lw $4,4($29)\n" + #elif _MIPS_SIM == _MIPS_SIM_NABI32 + "lw $25,0($29)\n" + "lw $4,8($29)\n" + #else + "ld $25,0($29)\n" + "ld $4,8($29)\n" + #endif + "jalr $25\n" + + /* Call _exit($2) + */ + "move $4,$2\n" + "li $2,%4\n" + "syscall\n" + + "1:\n" + #if _MIPS_SIM == _MIPS_SIM_ABI32 && _MIPS_SZPTR == 32 + "addu $29, 24\n" + #elif _MIPS_SIM == _MIPS_SIM_NABI32 + "add $29, 16\n" + #else + "daddu $29,16\n" + #endif + : "=&r" (__v0), "=r" (__r7) + : "i"(-EINVAL), "i"(__NR_clone), "i"(__NR_exit), + "r"(fn), "r"(__stack), "r"(__flags), "r"(arg), + "r"(__ptid), "r"(__r7), "r"(__ctid) + : "$9", "$10", "$11", "$12", "$13", "$14", "$15", + "$24", "memory"); + } + LSS_RETURN(int, __v0, __r7); + } + #elif defined (__PPC__) + #undef LSS_LOADARGS_0 + #define LSS_LOADARGS_0(name, dummy...) \ + __sc_0 = __NR_##name + #undef LSS_LOADARGS_1 + #define LSS_LOADARGS_1(name, arg1) \ + LSS_LOADARGS_0(name); \ + __sc_3 = (unsigned long) (arg1) + #undef LSS_LOADARGS_2 + #define LSS_LOADARGS_2(name, arg1, arg2) \ + LSS_LOADARGS_1(name, arg1); \ + __sc_4 = (unsigned long) (arg2) + #undef LSS_LOADARGS_3 + #define LSS_LOADARGS_3(name, arg1, arg2, arg3) \ + LSS_LOADARGS_2(name, arg1, arg2); \ + __sc_5 = (unsigned long) (arg3) + #undef LSS_LOADARGS_4 + #define LSS_LOADARGS_4(name, arg1, arg2, arg3, arg4) \ + LSS_LOADARGS_3(name, arg1, arg2, arg3); \ + __sc_6 = (unsigned long) (arg4) + #undef LSS_LOADARGS_5 + #define LSS_LOADARGS_5(name, arg1, arg2, arg3, arg4, arg5) \ + LSS_LOADARGS_4(name, arg1, arg2, arg3, arg4); \ + __sc_7 = (unsigned long) (arg5) + #undef LSS_LOADARGS_6 + #define LSS_LOADARGS_6(name, arg1, arg2, arg3, arg4, arg5, arg6) \ + LSS_LOADARGS_5(name, arg1, arg2, arg3, arg4, arg5); \ + __sc_8 = (unsigned long) (arg6) + #undef LSS_ASMINPUT_0 + #define LSS_ASMINPUT_0 "0" (__sc_0) + #undef LSS_ASMINPUT_1 + #define LSS_ASMINPUT_1 LSS_ASMINPUT_0, "1" (__sc_3) + #undef LSS_ASMINPUT_2 + #define LSS_ASMINPUT_2 LSS_ASMINPUT_1, "2" (__sc_4) + #undef LSS_ASMINPUT_3 + #define LSS_ASMINPUT_3 LSS_ASMINPUT_2, "3" (__sc_5) + #undef LSS_ASMINPUT_4 + #define LSS_ASMINPUT_4 LSS_ASMINPUT_3, "4" (__sc_6) + #undef LSS_ASMINPUT_5 + #define LSS_ASMINPUT_5 LSS_ASMINPUT_4, "5" (__sc_7) + #undef LSS_ASMINPUT_6 + #define LSS_ASMINPUT_6 LSS_ASMINPUT_5, "6" (__sc_8) + #undef LSS_BODY + #define LSS_BODY(nr, type, name, args...) \ + long __sc_ret, __sc_err; \ + { \ + register unsigned long __sc_0 __asm__ ("r0"); \ + register unsigned long __sc_3 __asm__ ("r3"); \ + register unsigned long __sc_4 __asm__ ("r4"); \ + register unsigned long __sc_5 __asm__ ("r5"); \ + register unsigned long __sc_6 __asm__ ("r6"); \ + register unsigned long __sc_7 __asm__ ("r7"); \ + register unsigned long __sc_8 __asm__ ("r8"); \ + \ + LSS_LOADARGS_##nr(name, args); \ + __asm__ __volatile__ \ + ("sc\n\t" \ + "mfcr %0" \ + : "=&r" (__sc_0), \ + "=&r" (__sc_3), "=&r" (__sc_4), \ + "=&r" (__sc_5), "=&r" (__sc_6), \ + "=&r" (__sc_7), "=&r" (__sc_8) \ + : LSS_ASMINPUT_##nr \ + : "cr0", "ctr", "memory", \ + "r9", "r10", "r11", "r12"); \ + __sc_ret = __sc_3; \ + __sc_err = __sc_0; \ + } \ + LSS_RETURN(type, __sc_ret, __sc_err) + #undef _syscall0 + #define _syscall0(type, name) \ + type LSS_NAME(name)(void) { \ + LSS_BODY(0, type, name); \ + } + #undef _syscall1 + #define _syscall1(type, name, type1, arg1) \ + type LSS_NAME(name)(type1 arg1) { \ + LSS_BODY(1, type, name, arg1); \ + } + #undef _syscall2 + #define _syscall2(type, name, type1, arg1, type2, arg2) \ + type LSS_NAME(name)(type1 arg1, type2 arg2) { \ + LSS_BODY(2, type, name, arg1, arg2); \ + } + #undef _syscall3 + #define _syscall3(type, name, type1, arg1, type2, arg2, type3, arg3) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3) { \ + LSS_BODY(3, type, name, arg1, arg2, arg3); \ + } + #undef _syscall4 + #define _syscall4(type, name, type1, arg1, type2, arg2, type3, arg3, \ + type4, arg4) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4) { \ + LSS_BODY(4, type, name, arg1, arg2, arg3, arg4); \ + } + #undef _syscall5 + #define _syscall5(type, name, type1, arg1, type2, arg2, type3, arg3, \ + type4, arg4, type5, arg5) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ + type5 arg5) { \ + LSS_BODY(5, type, name, arg1, arg2, arg3, arg4, arg5); \ + } + #undef _syscall6 + #define _syscall6(type, name, type1, arg1, type2, arg2, type3, arg3, \ + type4, arg4, type5, arg5, type6, arg6) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ + type5 arg5, type6 arg6) { \ + LSS_BODY(6, type, name, arg1, arg2, arg3, arg4, arg5, arg6); \ + } + /* clone function adapted from glibc 2.3.6 clone.S */ + /* TODO(csilvers): consider wrapping some args up in a struct, like we + * do for i386's _syscall6, so we can compile successfully on gcc 2.95 + */ + LSS_INLINE int LSS_NAME(clone)(int (*fn)(void *), void *child_stack, + int flags, void *arg, int *parent_tidptr, + void *newtls, int *child_tidptr) { + long __ret, __err; + { + register int (*__fn)(void *) __asm__ ("r8") = fn; + register void *__cstack __asm__ ("r4") = child_stack; + register int __flags __asm__ ("r3") = flags; + register void * __arg __asm__ ("r9") = arg; + register int * __ptidptr __asm__ ("r5") = parent_tidptr; + register void * __newtls __asm__ ("r6") = newtls; + register int * __ctidptr __asm__ ("r7") = child_tidptr; + __asm__ __volatile__( + /* check for fn == NULL + * and child_stack == NULL + */ + "cmpwi cr0, %6, 0\n\t" + "cmpwi cr1, %7, 0\n\t" + "cror cr0*4+eq, cr1*4+eq, cr0*4+eq\n\t" + "beq- cr0, 1f\n\t" + + /* set up stack frame for child */ + "clrrwi %7, %7, 4\n\t" + "li 0, 0\n\t" + "stwu 0, -16(%7)\n\t" + + /* fn, arg, child_stack are saved across the syscall: r28-30 */ + "mr 28, %6\n\t" + "mr 29, %7\n\t" + "mr 27, %9\n\t" + + /* syscall */ + "li 0, %4\n\t" + /* flags already in r3 + * child_stack already in r4 + * ptidptr already in r5 + * newtls already in r6 + * ctidptr already in r7 + */ + "sc\n\t" + + /* Test if syscall was successful */ + "cmpwi cr1, 3, 0\n\t" + "crandc cr1*4+eq, cr1*4+eq, cr0*4+so\n\t" + "bne- cr1, 1f\n\t" + + /* Do the function call */ + "mtctr 28\n\t" + "mr 3, 27\n\t" + "bctrl\n\t" + + /* Call _exit(r3) */ + "li 0, %5\n\t" + "sc\n\t" + + /* Return to parent */ + "1:\n" + "mfcr %1\n\t" + "mr %0, 3\n\t" + : "=r" (__ret), "=r" (__err) + : "0" (-1), "1" (EINVAL), + "i" (__NR_clone), "i" (__NR_exit), + "r" (__fn), "r" (__cstack), "r" (__flags), + "r" (__arg), "r" (__ptidptr), "r" (__newtls), + "r" (__ctidptr) + : "cr0", "cr1", "memory", "ctr", + "r0", "r29", "r27", "r28"); + } + LSS_RETURN(int, __ret, __err); + } + #endif + #define __NR__exit __NR_exit + #define __NR__gettid __NR_gettid + #define __NR__mremap __NR_mremap + LSS_INLINE _syscall1(int, chdir, const char *,p) + LSS_INLINE _syscall1(int, close, int, f) + LSS_INLINE _syscall2(int, clock_getres, int, c, + struct kernel_timespec*, t) + LSS_INLINE _syscall2(int, clock_gettime, int, c, + struct kernel_timespec*, t) + LSS_INLINE _syscall1(int, dup, int, f) + LSS_INLINE _syscall2(int, dup2, int, s, + int, d) + LSS_INLINE _syscall3(int, execve, const char*, f, + const char*const*,a,const char*const*, e) + LSS_INLINE _syscall1(int, _exit, int, e) + LSS_INLINE _syscall3(int, fcntl, int, f, + int, c, long, a) + LSS_INLINE _syscall0(pid_t, fork) + LSS_INLINE _syscall2(int, fstat, int, f, + struct kernel_stat*, b) + LSS_INLINE _syscall2(int, fstatfs, int, f, + struct kernel_statfs*, b) + LSS_INLINE _syscall4(int, futex, int*, a, + int, o, int, v, + struct kernel_timespec*, t) + LSS_INLINE _syscall3(int, getdents, int, f, + struct kernel_dirent*, d, int, c) + LSS_INLINE _syscall3(int, getdents64, int, f, + struct kernel_dirent64*, d, int, c) + LSS_INLINE _syscall0(gid_t, getegid) + LSS_INLINE _syscall0(uid_t, geteuid) + LSS_INLINE _syscall0(pid_t, getpgrp) + LSS_INLINE _syscall0(pid_t, getpid) + LSS_INLINE _syscall0(pid_t, getppid) + LSS_INLINE _syscall2(int, getpriority, int, a, + int, b) + LSS_INLINE _syscall2(int, getrlimit, int, r, + struct kernel_rlimit*, l) + LSS_INLINE _syscall1(pid_t, getsid, pid_t, p) + LSS_INLINE _syscall0(pid_t, _gettid) + LSS_INLINE _syscall5(int, setxattr, const char *,p, + const char *, n, const void *,v, + size_t, s, int, f) + LSS_INLINE _syscall5(int, lsetxattr, const char *,p, + const char *, n, const void *,v, + size_t, s, int, f) + LSS_INLINE _syscall4(ssize_t, getxattr, const char *,p, + const char *, n, void *, v, size_t, s) + LSS_INLINE _syscall4(ssize_t, lgetxattr, const char *,p, + const char *, n, void *, v, size_t, s) + LSS_INLINE _syscall3(ssize_t, listxattr, const char *,p, + char *, l, size_t, s) + LSS_INLINE _syscall3(ssize_t, llistxattr, const char *,p, + char *, l, size_t, s) + LSS_INLINE _syscall2(int, ioprio_get, int, which, + int, who) + LSS_INLINE _syscall3(int, ioprio_set, int, which, + int, who, int, ioprio) + LSS_INLINE _syscall2(int, kill, pid_t, p, + int, s) + LSS_INLINE _syscall3(off_t, lseek, int, f, + off_t, o, int, w) + LSS_INLINE _syscall2(int, munmap, void*, s, + size_t, l) + LSS_INLINE _syscall6(long, move_pages, pid_t, p, + unsigned long, n, void **,g, int *, d, + int *, s, int, f) + LSS_INLINE _syscall5(void*, _mremap, void*, o, + size_t, os, size_t, ns, + unsigned long, f, void *, a) + LSS_INLINE _syscall3(int, open, const char*, p, + int, f, int, m) + LSS_INLINE _syscall3(int, poll, struct kernel_pollfd*, u, + unsigned int, n, int, t) + LSS_INLINE _syscall2(int, prctl, int, o, + long, a) + LSS_INLINE _syscall4(long, ptrace, int, r, + pid_t, p, void *, a, void *, d) + LSS_INLINE _syscall3(ssize_t, read, int, f, + void *, b, size_t, c) + LSS_INLINE _syscall3(int, readlink, const char*, p, + char*, b, size_t, s) + LSS_INLINE _syscall4(int, rt_sigaction, int, s, + const struct kernel_sigaction*, a, + struct kernel_sigaction*, o, size_t, c) + LSS_INLINE _syscall2(int, rt_sigpending, struct kernel_sigset_t *, s, + size_t, c) + LSS_INLINE _syscall4(int, rt_sigprocmask, int, h, + const struct kernel_sigset_t*, s, + struct kernel_sigset_t*, o, size_t, c); + LSS_INLINE _syscall2(int, rt_sigsuspend, + const struct kernel_sigset_t*, s, size_t, c); + LSS_INLINE _syscall3(int, sched_getaffinity,pid_t, p, + unsigned int, l, unsigned long *, m) + LSS_INLINE _syscall3(int, sched_setaffinity,pid_t, p, + unsigned int, l, unsigned long *, m) + LSS_INLINE _syscall0(int, sched_yield) + LSS_INLINE _syscall1(long, set_tid_address, int *, t) + LSS_INLINE _syscall1(int, setfsgid, gid_t, g) + LSS_INLINE _syscall1(int, setfsuid, uid_t, u) + LSS_INLINE _syscall1(int, setuid, uid_t, u) + LSS_INLINE _syscall1(int, setgid, gid_t, g) + LSS_INLINE _syscall2(int, setpgid, pid_t, p, + pid_t, g) + LSS_INLINE _syscall3(int, setpriority, int, a, + int, b, int, p) + LSS_INLINE _syscall3(int, setresgid, gid_t, r, + gid_t, e, gid_t, s) + LSS_INLINE _syscall3(int, setresuid, uid_t, r, + uid_t, e, uid_t, s) + LSS_INLINE _syscall2(int, setrlimit, int, r, + const struct kernel_rlimit*, l) + LSS_INLINE _syscall0(pid_t, setsid) + LSS_INLINE _syscall2(int, sigaltstack, const stack_t*, s, + const stack_t*, o) + LSS_INLINE _syscall2(int, stat, const char*, f, + struct kernel_stat*, b) + LSS_INLINE _syscall2(int, statfs, const char*, f, + struct kernel_statfs*, b) + LSS_INLINE _syscall3(ssize_t, write, int, f, + const void *, b, size_t, c) + LSS_INLINE _syscall3(ssize_t, writev, int, f, + const struct kernel_iovec*, v, size_t, c) + #if defined(__NR_getcpu) + LSS_INLINE _syscall3(long, getcpu, unsigned *, cpu, + unsigned *, node, void *, unused); + #endif + #if defined(__x86_64__) || \ + (defined(__mips__) && _MIPS_SIM != _MIPS_SIM_ABI32) + LSS_INLINE _syscall3(int, recvmsg, int, s, + struct kernel_msghdr*, m, int, f) + LSS_INLINE _syscall3(int, sendmsg, int, s, + const struct kernel_msghdr*, m, int, f) + LSS_INLINE _syscall6(int, sendto, int, s, + const void*, m, size_t, l, + int, f, + const struct kernel_sockaddr*, a, int, t) + LSS_INLINE _syscall2(int, shutdown, int, s, + int, h) + LSS_INLINE _syscall3(int, socket, int, d, + int, t, int, p) + LSS_INLINE _syscall4(int, socketpair, int, d, + int, t, int, p, int*, s) + #endif + #if defined(__x86_64__) + LSS_INLINE _syscall4(int, fallocate, int, fd, int, mode, + loff_t, offset, loff_t, len) + LSS_INLINE _syscall6(void*, mmap, void*, s, + size_t, l, int, p, + int, f, int, d, + __off64_t, o) + LSS_INLINE _syscall4(int, newfstatat, int, d, + const char *, p, + struct kernel_stat*, b, int, f) + + LSS_INLINE int LSS_NAME(setfsgid32)(gid_t gid) { + return LSS_NAME(setfsgid)(gid); + } + + LSS_INLINE int LSS_NAME(setfsuid32)(uid_t uid) { + return LSS_NAME(setfsuid)(uid); + } + + LSS_INLINE int LSS_NAME(setresgid32)(gid_t rgid, gid_t egid, gid_t sgid) { + return LSS_NAME(setresgid)(rgid, egid, sgid); + } + + LSS_INLINE int LSS_NAME(setresuid32)(uid_t ruid, uid_t euid, uid_t suid) { + return LSS_NAME(setresuid)(ruid, euid, suid); + } + + LSS_INLINE int LSS_NAME(sigaction)(int signum, + const struct kernel_sigaction *act, + struct kernel_sigaction *oldact) { + /* On x86_64, the kernel requires us to always set our own + * SA_RESTORER in order to be able to return from a signal handler. + * This function must have a "magic" signature that the "gdb" + * (and maybe the kernel?) can recognize. + */ + if (act != NULL && !(act->sa_flags & SA_RESTORER)) { + struct kernel_sigaction a = *act; + a.sa_flags |= SA_RESTORER; + a.sa_restorer = LSS_NAME(restore_rt)(); + return LSS_NAME(rt_sigaction)(signum, &a, oldact, + (KERNEL_NSIG+7)/8); + } else { + return LSS_NAME(rt_sigaction)(signum, act, oldact, + (KERNEL_NSIG+7)/8); + } + } + + LSS_INLINE int LSS_NAME(sigpending)(struct kernel_sigset_t *set) { + return LSS_NAME(rt_sigpending)(set, (KERNEL_NSIG+7)/8); + } + + LSS_INLINE int LSS_NAME(sigprocmask)(int how, + const struct kernel_sigset_t *set, + struct kernel_sigset_t *oldset) { + return LSS_NAME(rt_sigprocmask)(how, set, oldset, (KERNEL_NSIG+7)/8); + } + + LSS_INLINE int LSS_NAME(sigsuspend)(const struct kernel_sigset_t *set) { + return LSS_NAME(rt_sigsuspend)(set, (KERNEL_NSIG+7)/8); + } + #endif + #if defined(__x86_64__) || defined(__ARM_ARCH_3__) || \ + (defined(__mips__) && _MIPS_SIM != _MIPS_SIM_ABI32) + LSS_INLINE _syscall4(pid_t, wait4, pid_t, p, + int*, s, int, o, + struct kernel_rusage*, r) + + LSS_INLINE pid_t LSS_NAME(waitpid)(pid_t pid, int *status, int options){ + return LSS_NAME(wait4)(pid, status, options, 0); + } + #endif + #if defined(__i386__) || defined(__x86_64__) + LSS_INLINE _syscall4(int, openat, int, d, const char *, p, int, f, int, m) + LSS_INLINE _syscall3(int, unlinkat, int, d, const char *, p, int, f) + #endif + #if defined(__i386__) || defined(__ARM_ARCH_3__) + #define __NR__setfsgid32 __NR_setfsgid32 + #define __NR__setfsuid32 __NR_setfsuid32 + #define __NR__setresgid32 __NR_setresgid32 + #define __NR__setresuid32 __NR_setresuid32 + LSS_INLINE _syscall2(int, ugetrlimit, int, r, + struct kernel_rlimit*, l) + LSS_INLINE _syscall1(int, _setfsgid32, gid_t, f) + LSS_INLINE _syscall1(int, _setfsuid32, uid_t, f) + LSS_INLINE _syscall3(int, _setresgid32, gid_t, r, + gid_t, e, gid_t, s) + LSS_INLINE _syscall3(int, _setresuid32, uid_t, r, + uid_t, e, uid_t, s) + + LSS_INLINE int LSS_NAME(setfsgid32)(gid_t gid) { + int rc; + if ((rc = LSS_NAME(_setfsgid32)(gid)) < 0 && + LSS_ERRNO == ENOSYS) { + if ((unsigned int)gid & ~0xFFFFu) { + rc = EINVAL; + } else { + rc = LSS_NAME(setfsgid)(gid); + } + } + return rc; + } + + LSS_INLINE int LSS_NAME(setfsuid32)(uid_t uid) { + int rc; + if ((rc = LSS_NAME(_setfsuid32)(uid)) < 0 && + LSS_ERRNO == ENOSYS) { + if ((unsigned int)uid & ~0xFFFFu) { + rc = EINVAL; + } else { + rc = LSS_NAME(setfsuid)(uid); + } + } + return rc; + } + + LSS_INLINE int LSS_NAME(setresgid32)(gid_t rgid, gid_t egid, gid_t sgid) { + int rc; + if ((rc = LSS_NAME(_setresgid32)(rgid, egid, sgid)) < 0 && + LSS_ERRNO == ENOSYS) { + if ((unsigned int)rgid & ~0xFFFFu || + (unsigned int)egid & ~0xFFFFu || + (unsigned int)sgid & ~0xFFFFu) { + rc = EINVAL; + } else { + rc = LSS_NAME(setresgid)(rgid, egid, sgid); + } + } + return rc; + } + + LSS_INLINE int LSS_NAME(setresuid32)(uid_t ruid, uid_t euid, uid_t suid) { + int rc; + if ((rc = LSS_NAME(_setresuid32)(ruid, euid, suid)) < 0 && + LSS_ERRNO == ENOSYS) { + if ((unsigned int)ruid & ~0xFFFFu || + (unsigned int)euid & ~0xFFFFu || + (unsigned int)suid & ~0xFFFFu) { + rc = EINVAL; + } else { + rc = LSS_NAME(setresuid)(ruid, euid, suid); + } + } + return rc; + } + #endif + LSS_INLINE int LSS_NAME(sigemptyset)(struct kernel_sigset_t *set) { + memset(&set->sig, 0, sizeof(set->sig)); + return 0; + } + + LSS_INLINE int LSS_NAME(sigfillset)(struct kernel_sigset_t *set) { + memset(&set->sig, -1, sizeof(set->sig)); + return 0; + } + + LSS_INLINE int LSS_NAME(sigaddset)(struct kernel_sigset_t *set, + int signum) { + if (signum < 1 || signum > (int)(8*sizeof(set->sig))) { + LSS_ERRNO = EINVAL; + return -1; + } else { + set->sig[(signum - 1)/(8*sizeof(set->sig[0]))] + |= 1UL << ((signum - 1) % (8*sizeof(set->sig[0]))); + return 0; + } + } + + LSS_INLINE int LSS_NAME(sigdelset)(struct kernel_sigset_t *set, + int signum) { + if (signum < 1 || signum > (int)(8*sizeof(set->sig))) { + LSS_ERRNO = EINVAL; + return -1; + } else { + set->sig[(signum - 1)/(8*sizeof(set->sig[0]))] + &= ~(1UL << ((signum - 1) % (8*sizeof(set->sig[0])))); + return 0; + } + } + + LSS_INLINE int LSS_NAME(sigismember)(struct kernel_sigset_t *set, + int signum) { + if (signum < 1 || signum > (int)(8*sizeof(set->sig))) { + LSS_ERRNO = EINVAL; + return -1; + } else { + return !!(set->sig[(signum - 1)/(8*sizeof(set->sig[0]))] & + (1UL << ((signum - 1) % (8*sizeof(set->sig[0]))))); + } + } + #if defined(__i386__) || defined(__ARM_ARCH_3__) || \ + (defined(__mips__) && _MIPS_SIM == _MIPS_SIM_ABI32) || defined(__PPC__) + #define __NR__sigaction __NR_sigaction + #define __NR__sigpending __NR_sigpending + #define __NR__sigprocmask __NR_sigprocmask + #define __NR__sigsuspend __NR_sigsuspend + #define __NR__socketcall __NR_socketcall + LSS_INLINE _syscall2(int, fstat64, int, f, + struct kernel_stat64 *, b) + LSS_INLINE _syscall5(int, _llseek, uint, fd, ulong, hi, ulong, lo, + loff_t *, res, uint, wh) + LSS_INLINE _syscall1(void*, mmap, void*, a) +#ifndef __PPC64__ + LSS_INLINE _syscall6(void*, mmap2, void*, s, + size_t, l, int, p, + int, f, int, d, + __off64_t, o) +#endif + LSS_INLINE _syscall3(int, _sigaction, int, s, + const struct kernel_old_sigaction*, a, + struct kernel_old_sigaction*, o) + LSS_INLINE _syscall1(int, _sigpending, unsigned long*, s) + LSS_INLINE _syscall3(int, _sigprocmask, int, h, + const unsigned long*, s, + unsigned long*, o) + #ifdef __PPC__ + LSS_INLINE _syscall1(int, _sigsuspend, unsigned long, s) + #else + LSS_INLINE _syscall3(int, _sigsuspend, const void*, a, + int, b, + unsigned long, s) + #endif + LSS_INLINE _syscall2(int, stat64, const char *, p, + struct kernel_stat64 *, b) + + LSS_INLINE int LSS_NAME(sigaction)(int signum, + const struct kernel_sigaction *act, + struct kernel_sigaction *oldact) { + int old_errno = LSS_ERRNO; + int rc; + struct kernel_sigaction a; + if (act != NULL) { + a = *act; + #ifdef __i386__ + /* On i386, the kernel requires us to always set our own + * SA_RESTORER when using realtime signals. Otherwise, it does not + * know how to return from a signal handler. This function must have + * a "magic" signature that the "gdb" (and maybe the kernel?) can + * recognize. + * Apparently, a SA_RESTORER is implicitly set by the kernel, when + * using non-realtime signals. + * + * TODO: Test whether ARM needs a restorer + */ + if (!(a.sa_flags & SA_RESTORER)) { + a.sa_flags |= SA_RESTORER; + a.sa_restorer = (a.sa_flags & SA_SIGINFO) + ? LSS_NAME(restore_rt)() : LSS_NAME(restore)(); + } + #endif + } + rc = LSS_NAME(rt_sigaction)(signum, act ? &a : act, oldact, + (KERNEL_NSIG+7)/8); + if (rc < 0 && LSS_ERRNO == ENOSYS) { + struct kernel_old_sigaction oa, ooa, *ptr_a = &oa, *ptr_oa = &ooa; + if (!act) { + ptr_a = NULL; + } else { + oa.sa_handler_ = act->sa_handler_; + memcpy(&oa.sa_mask, &act->sa_mask, sizeof(oa.sa_mask)); + #ifndef __mips__ + oa.sa_restorer = act->sa_restorer; + #endif + oa.sa_flags = act->sa_flags; + } + if (!oldact) { + ptr_oa = NULL; + } + LSS_ERRNO = old_errno; + rc = LSS_NAME(_sigaction)(signum, ptr_a, ptr_oa); + if (rc == 0 && oldact) { + if (act) { + memcpy(oldact, act, sizeof(*act)); + } else { + memset(oldact, 0, sizeof(*oldact)); + } + oldact->sa_handler_ = ptr_oa->sa_handler_; + oldact->sa_flags = ptr_oa->sa_flags; + memcpy(&oldact->sa_mask, &ptr_oa->sa_mask, sizeof(ptr_oa->sa_mask)); + #ifndef __mips__ + oldact->sa_restorer = ptr_oa->sa_restorer; + #endif + } + } + return rc; + } + + LSS_INLINE int LSS_NAME(sigpending)(struct kernel_sigset_t *set) { + int old_errno = LSS_ERRNO; + int rc = LSS_NAME(rt_sigpending)(set, (KERNEL_NSIG+7)/8); + if (rc < 0 && LSS_ERRNO == ENOSYS) { + LSS_ERRNO = old_errno; + LSS_NAME(sigemptyset)(set); + rc = LSS_NAME(_sigpending)(&set->sig[0]); + } + return rc; + } + + LSS_INLINE int LSS_NAME(sigprocmask)(int how, + const struct kernel_sigset_t *set, + struct kernel_sigset_t *oldset) { + int olderrno = LSS_ERRNO; + int rc = LSS_NAME(rt_sigprocmask)(how, set, oldset, (KERNEL_NSIG+7)/8); + if (rc < 0 && LSS_ERRNO == ENOSYS) { + LSS_ERRNO = olderrno; + if (oldset) { + LSS_NAME(sigemptyset)(oldset); + } + rc = LSS_NAME(_sigprocmask)(how, + set ? &set->sig[0] : NULL, + oldset ? &oldset->sig[0] : NULL); + } + return rc; + } + + LSS_INLINE int LSS_NAME(sigsuspend)(const struct kernel_sigset_t *set) { + int olderrno = LSS_ERRNO; + int rc = LSS_NAME(rt_sigsuspend)(set, (KERNEL_NSIG+7)/8); + if (rc < 0 && LSS_ERRNO == ENOSYS) { + LSS_ERRNO = olderrno; + rc = LSS_NAME(_sigsuspend)( + #ifndef __PPC__ + set, 0, + #endif + set->sig[0]); + } + return rc; + } + #endif + #if defined(__PPC__) + #undef LSS_SC_LOADARGS_0 + #define LSS_SC_LOADARGS_0(dummy...) + #undef LSS_SC_LOADARGS_1 + #define LSS_SC_LOADARGS_1(arg1) \ + __sc_4 = (unsigned long) (arg1) + #undef LSS_SC_LOADARGS_2 + #define LSS_SC_LOADARGS_2(arg1, arg2) \ + LSS_SC_LOADARGS_1(arg1); \ + __sc_5 = (unsigned long) (arg2) + #undef LSS_SC_LOADARGS_3 + #define LSS_SC_LOADARGS_3(arg1, arg2, arg3) \ + LSS_SC_LOADARGS_2(arg1, arg2); \ + __sc_6 = (unsigned long) (arg3) + #undef LSS_SC_LOADARGS_4 + #define LSS_SC_LOADARGS_4(arg1, arg2, arg3, arg4) \ + LSS_SC_LOADARGS_3(arg1, arg2, arg3); \ + __sc_7 = (unsigned long) (arg4) + #undef LSS_SC_LOADARGS_5 + #define LSS_SC_LOADARGS_5(arg1, arg2, arg3, arg4, arg5) \ + LSS_SC_LOADARGS_4(arg1, arg2, arg3, arg4); \ + __sc_8 = (unsigned long) (arg5) + #undef LSS_SC_BODY + #define LSS_SC_BODY(nr, type, opt, args...) \ + long __sc_ret, __sc_err; \ + { \ + register unsigned long __sc_0 __asm__ ("r0") = __NR_socketcall; \ + register unsigned long __sc_3 __asm__ ("r3") = opt; \ + register unsigned long __sc_4 __asm__ ("r4"); \ + register unsigned long __sc_5 __asm__ ("r5"); \ + register unsigned long __sc_6 __asm__ ("r6"); \ + register unsigned long __sc_7 __asm__ ("r7"); \ + register unsigned long __sc_8 __asm__ ("r8"); \ + LSS_SC_LOADARGS_##nr(args); \ + __asm__ __volatile__ \ + ("stwu 1, -48(1)\n\t" \ + "stw 4, 20(1)\n\t" \ + "stw 5, 24(1)\n\t" \ + "stw 6, 28(1)\n\t" \ + "stw 7, 32(1)\n\t" \ + "stw 8, 36(1)\n\t" \ + "addi 4, 1, 20\n\t" \ + "sc\n\t" \ + "mfcr %0" \ + : "=&r" (__sc_0), \ + "=&r" (__sc_3), "=&r" (__sc_4), \ + "=&r" (__sc_5), "=&r" (__sc_6), \ + "=&r" (__sc_7), "=&r" (__sc_8) \ + : LSS_ASMINPUT_##nr \ + : "cr0", "ctr", "memory"); \ + __sc_ret = __sc_3; \ + __sc_err = __sc_0; \ + } \ + LSS_RETURN(type, __sc_ret, __sc_err) + + LSS_INLINE ssize_t LSS_NAME(recvmsg)(int s,struct kernel_msghdr *msg, + int flags){ + LSS_SC_BODY(3, ssize_t, 17, s, msg, flags); + } + + LSS_INLINE ssize_t LSS_NAME(sendmsg)(int s, + const struct kernel_msghdr *msg, + int flags) { + LSS_SC_BODY(3, ssize_t, 16, s, msg, flags); + } + + // TODO(csilvers): why is this ifdef'ed out? +#if 0 + LSS_INLINE ssize_t LSS_NAME(sendto)(int s, const void *buf, size_t len, + int flags, + const struct kernel_sockaddr *to, + unsigned int tolen) { + LSS_BODY(6, ssize_t, 11, s, buf, len, flags, to, tolen); + } +#endif + + LSS_INLINE int LSS_NAME(shutdown)(int s, int how) { + LSS_SC_BODY(2, int, 13, s, how); + } + + LSS_INLINE int LSS_NAME(socket)(int domain, int type, int protocol) { + LSS_SC_BODY(3, int, 1, domain, type, protocol); + } + + LSS_INLINE int LSS_NAME(socketpair)(int d, int type, int protocol, + int sv[2]) { + LSS_SC_BODY(4, int, 8, d, type, protocol, sv); + } + #endif + #if defined(__i386__) || defined(__ARM_ARCH_3__) || \ + (defined(__mips__) && _MIPS_SIM == _MIPS_SIM_ABI32) + #define __NR__socketcall __NR_socketcall + LSS_INLINE _syscall2(int, _socketcall, int, c, + va_list, a) + + LSS_INLINE int LSS_NAME(socketcall)(int op, ...) { + int rc; + va_list ap; + va_start(ap, op); + rc = LSS_NAME(_socketcall)(op, ap); + va_end(ap); + return rc; + } + + LSS_INLINE ssize_t LSS_NAME(recvmsg)(int s,struct kernel_msghdr *msg, + int flags){ + return (ssize_t)LSS_NAME(socketcall)(17, s, msg, flags); + } + + LSS_INLINE ssize_t LSS_NAME(sendmsg)(int s, + const struct kernel_msghdr *msg, + int flags) { + return (ssize_t)LSS_NAME(socketcall)(16, s, msg, flags); + } + + LSS_INLINE ssize_t LSS_NAME(sendto)(int s, const void *buf, size_t len, + int flags, + const struct kernel_sockaddr *to, + unsigned int tolen) { + return (ssize_t)LSS_NAME(socketcall)(11, s, buf, len, flags, to, tolen); + } + + LSS_INLINE int LSS_NAME(shutdown)(int s, int how) { + return LSS_NAME(socketcall)(13, s, how); + } + + LSS_INLINE int LSS_NAME(socket)(int domain, int type, int protocol) { + return LSS_NAME(socketcall)(1, domain, type, protocol); + } + + LSS_INLINE int LSS_NAME(socketpair)(int d, int type, int protocol, + int sv[2]) { + return LSS_NAME(socketcall)(8, d, type, protocol, sv); + } + #endif + #if defined(__i386__) || defined(__PPC__) + LSS_INLINE _syscall4(int, fstatat64, int, d, + const char *, p, + struct kernel_stat64 *, b, int, f) + #endif + #if defined(__i386__) || defined(__PPC__) || \ + (defined(__mips__) && _MIPS_SIM == _MIPS_SIM_ABI32) + LSS_INLINE _syscall3(pid_t, waitpid, pid_t, p, + int*, s, int, o) + #endif + #if defined(__mips__) + /* sys_pipe() on MIPS has non-standard calling conventions, as it returns + * both file handles through CPU registers. + */ + LSS_INLINE int LSS_NAME(pipe)(int *p) { + register unsigned long __v0 __asm__("$2") = __NR_pipe; + register unsigned long __v1 __asm__("$3"); + register unsigned long __r7 __asm__("$7"); + __asm__ __volatile__ ("syscall\n" + : "=&r"(__v0), "=&r"(__v1), "+r" (__r7) + : "0"(__v0) + : "$8", "$9", "$10", "$11", "$12", + "$13", "$14", "$15", "$24", "memory"); + if (__r7) { + LSS_ERRNO = __v0; + return -1; + } else { + p[0] = __v0; + p[1] = __v1; + return 0; + } + } + #else + LSS_INLINE _syscall1(int, pipe, int *, p) + #endif + /* TODO(csilvers): see if ppc can/should support this as well */ + #if defined(__i386__) || defined(__ARM_ARCH_3__) || \ + (defined(__mips__) && _MIPS_SIM != _MIPS_SIM_ABI64) + #define __NR__statfs64 __NR_statfs64 + #define __NR__fstatfs64 __NR_fstatfs64 + LSS_INLINE _syscall3(int, _statfs64, const char*, p, + size_t, s,struct kernel_statfs64*, b) + LSS_INLINE _syscall3(int, _fstatfs64, int, f, + size_t, s,struct kernel_statfs64*, b) + LSS_INLINE int LSS_NAME(statfs64)(const char *p, + struct kernel_statfs64 *b) { + return LSS_NAME(_statfs64)(p, sizeof(*b), b); + } + LSS_INLINE int LSS_NAME(fstatfs64)(int f,struct kernel_statfs64 *b) { + return LSS_NAME(_fstatfs64)(f, sizeof(*b), b); + } + #endif + + LSS_INLINE int LSS_NAME(execv)(const char *path, const char *const argv[]) { + extern char **environ; + return LSS_NAME(execve)(path, argv, (const char *const *)environ); + } + + LSS_INLINE pid_t LSS_NAME(gettid)() { + pid_t tid = LSS_NAME(_gettid)(); + if (tid != -1) { + return tid; + } + return LSS_NAME(getpid)(); + } + + LSS_INLINE void *LSS_NAME(mremap)(void *old_address, size_t old_size, + size_t new_size, int flags, ...) { + va_list ap; + void *new_address, *rc; + va_start(ap, flags); + new_address = va_arg(ap, void *); + rc = LSS_NAME(_mremap)(old_address, old_size, new_size, + flags, new_address); + va_end(ap); + return rc; + } + + LSS_INLINE int LSS_NAME(ptrace_detach)(pid_t pid) { + /* PTRACE_DETACH can sometimes forget to wake up the tracee and it + * then sends job control signals to the real parent, rather than to + * the tracer. We reduce the risk of this happening by starting a + * whole new time slice, and then quickly sending a SIGCONT signal + * right after detaching from the tracee. + */ + int rc, err; + LSS_NAME(sched_yield)(); + rc = LSS_NAME(ptrace)(PTRACE_DETACH, pid, (void *)0, (void *)0); + err = LSS_ERRNO; + LSS_NAME(kill)(pid, SIGCONT); + LSS_ERRNO = err; + return rc; + } + + LSS_INLINE int LSS_NAME(raise)(int sig) { + return LSS_NAME(kill)(LSS_NAME(getpid)(), sig); + } + + LSS_INLINE int LSS_NAME(setpgrp)() { + return LSS_NAME(setpgid)(0, 0); + } + + LSS_INLINE int LSS_NAME(sysconf)(int name) { + extern int __getpagesize(void); + switch (name) { + case _SC_OPEN_MAX: { + struct kernel_rlimit limit; + return LSS_NAME(getrlimit)(RLIMIT_NOFILE, &limit) < 0 + ? 8192 : limit.rlim_cur; + } + case _SC_PAGESIZE: + return __getpagesize(); + default: + errno = ENOSYS; + return -1; + } + } + #if defined(__x86_64__) || \ + (defined(__mips__) && _MIPS_SIM == _MIPS_SIM_ABI64) + LSS_INLINE _syscall4(ssize_t, pread64, int, f, + void *, b, size_t, c, + loff_t, o) + LSS_INLINE _syscall4(ssize_t, pwrite64, int, f, + const void *, b, size_t, c, + loff_t, o) + LSS_INLINE _syscall3(int, readahead, int, f, + loff_t, o, unsigned, c) + #else + #define __NR__pread64 __NR_pread64 + #define __NR__pwrite64 __NR_pwrite64 + #define __NR__readahead __NR_readahead + LSS_INLINE _syscall5(ssize_t, _pread64, int, f, + void *, b, size_t, c, unsigned, o1, + unsigned, o2) + LSS_INLINE _syscall5(ssize_t, _pwrite64, int, f, + const void *, b, size_t, c, unsigned, o1, + long, o2) + LSS_INLINE _syscall4(int, _readahead, int, f, + unsigned, o1, unsigned, o2, size_t, c); + /* We force 64bit-wide parameters onto the stack, then access each + * 32-bit component individually. This guarantees that we build the + * correct parameters independent of the native byte-order of the + * underlying architecture. + */ + LSS_INLINE ssize_t LSS_NAME(pread64)(int fd, void *buf, size_t count, + loff_t off) { + union { loff_t off; unsigned arg[2]; } o = { off }; + return LSS_NAME(_pread64)(fd, buf, count, o.arg[0], o.arg[1]); + } + LSS_INLINE ssize_t LSS_NAME(pwrite64)(int fd, const void *buf, + size_t count, loff_t off) { + union { loff_t off; unsigned arg[2]; } o = { off }; + return LSS_NAME(_pwrite64)(fd, buf, count, o.arg[0], o.arg[1]); + } + LSS_INLINE int LSS_NAME(readahead)(int fd, loff_t off, int len) { + union { loff_t off; unsigned arg[2]; } o = { off }; + return LSS_NAME(_readahead)(fd, o.arg[0], o.arg[1], len); + } + #endif +#endif + +#if defined(__cplusplus) && !defined(SYS_CPLUSPLUS) +} +#endif + +#endif +#endif diff --git a/third_party/tcmalloc/chromium/src/base/linuxthreads.cc b/third_party/tcmalloc/chromium/src/base/linuxthreads.cc new file mode 100644 index 0000000..19da400 --- /dev/null +++ b/third_party/tcmalloc/chromium/src/base/linuxthreads.cc @@ -0,0 +1,665 @@ +/* Copyright (c) 2005-2007, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * Author: Markus Gutschke + */ + +#include "base/linuxthreads.h" + +#ifdef THREADS +#ifdef __cplusplus +extern "C" { +#endif + +#include <sched.h> +#include <signal.h> +#include <stdlib.h> +#include <string.h> +#include <fcntl.h> +#include <sys/socket.h> +#include <sys/wait.h> + +#include "base/linux_syscall_support.h" +#include "base/thread_lister.h" + +#ifndef CLONE_UNTRACED +#define CLONE_UNTRACED 0x00800000 +#endif + + +/* Synchronous signals that should not be blocked while in the lister thread. + */ +static const int sync_signals[] = { SIGABRT, SIGILL, SIGFPE, SIGSEGV, SIGBUS, + SIGXCPU, SIGXFSZ }; + +/* itoa() is not a standard function, and we cannot safely call printf() + * after suspending threads. So, we just implement our own copy. A + * recursive approach is the easiest here. + */ +static char *local_itoa(char *buf, int i) { + if (i < 0) { + *buf++ = '-'; + return local_itoa(buf, -i); + } else { + if (i >= 10) + buf = local_itoa(buf, i/10); + *buf++ = (i%10) + '0'; + *buf = '\000'; + return buf; + } +} + + +/* Wrapper around clone() that runs "fn" on the same stack as the + * caller! Unlike fork(), the cloned thread shares the same address space. + * The caller must be careful to use only minimal amounts of stack until + * the cloned thread has returned. + * There is a good chance that the cloned thread and the caller will share + * the same copy of errno! + */ +#ifdef __GNUC__ +#if __GNUC__ == 3 && __GNUC_MINOR__ >= 1 || __GNUC__ > 3 +/* Try to force this function into a separate stack frame, and make sure + * that arguments are passed on the stack. + */ +static int local_clone (int (*fn)(void *), void *arg, ...) + __attribute__ ((noinline)); +#endif +#endif + +static int local_clone (int (*fn)(void *), void *arg, ...) { + /* Leave 4kB of gap between the callers stack and the new clone. This + * should be more than sufficient for the caller to call waitpid() until + * the cloned thread terminates. + * + * It is important that we set the CLONE_UNTRACED flag, because newer + * versions of "gdb" otherwise attempt to attach to our thread, and will + * attempt to reap its status codes. This subsequently results in the + * caller hanging indefinitely in waitpid(), waiting for a change in + * status that will never happen. By setting the CLONE_UNTRACED flag, we + * prevent "gdb" from stealing events, but we still expect the thread + * lister to fail, because it cannot PTRACE_ATTACH to the process that + * is being debugged. This is OK and the error code will be reported + * correctly. + */ + return sys_clone(fn, (char *)&arg - 4096, + CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_UNTRACED, arg, 0, 0, 0); +} + + +/* Local substitute for the atoi() function, which is not necessarily safe + * to call once threads are suspended (depending on whether libc looks up + * locale information, when executing atoi()). + */ +static int local_atoi(const char *s) { + int n = 0; + int neg = *s == '-'; + if (neg) + s++; + while (*s >= '0' && *s <= '9') + n = 10*n + (*s++ - '0'); + return neg ? -n : n; +} + + +/* Re-runs fn until it doesn't cause EINTR + */ +#define NO_INTR(fn) do {} while ((fn) < 0 && errno == EINTR) + + +/* Wrap a class around system calls, in order to give us access to + * a private copy of errno. This only works in C++, but it has the + * advantage of not needing nested functions, which are a non-standard + * language extension. + */ +#ifdef __cplusplus +namespace { + class SysCalls { + public: + #define SYS_CPLUSPLUS + #define SYS_ERRNO my_errno + #define SYS_INLINE inline + #define SYS_PREFIX -1 + #undef SYS_LINUX_SYSCALL_SUPPORT_H + #include "linux_syscall_support.h" + SysCalls() : my_errno(0) { } + int my_errno; + }; +} +#define ERRNO sys.my_errno +#else +#define ERRNO my_errno +#endif + + +/* Wrapper for open() which is guaranteed to never return EINTR. + */ +static int c_open(const char *fname, int flags, int mode) { + ssize_t rc; + NO_INTR(rc = sys_open(fname, flags, mode)); + return rc; +} + + +/* abort() is not safely reentrant, and changes it's behavior each time + * it is called. This means, if the main application ever called abort() + * we cannot safely call it again. This would happen if we were called + * from a SIGABRT signal handler in the main application. So, document + * that calling SIGABRT from the thread lister makes it not signal safe + * (and vice-versa). + * Also, since we share address space with the main application, we + * cannot call abort() from the callback and expect the main application + * to behave correctly afterwards. In fact, the only thing we can do, is + * to terminate the main application with extreme prejudice (aka + * PTRACE_KILL). + * We set up our own SIGABRT handler to do this. + * In order to find the main application from the signal handler, we + * need to store information about it in global variables. This is + * safe, because the main application should be suspended at this + * time. If the callback ever called ResumeAllProcessThreads(), then + * we are running a higher risk, though. So, try to avoid calling + * abort() after calling ResumeAllProcessThreads. + */ +static volatile int *sig_pids, sig_num_threads, sig_proc, sig_marker; + + +/* Signal handler to help us recover from dying while we are attached to + * other threads. + */ +static void SignalHandler(int signum, siginfo_t *si, void *data) { + if (sig_pids != NULL) { + if (signum == SIGABRT) { + while (sig_num_threads-- > 0) { + /* Not sure if sched_yield is really necessary here, but it does not */ + /* hurt, and it might be necessary for the same reasons that we have */ + /* to do so in sys_ptrace_detach(). */ + sys_sched_yield(); + sys_ptrace(PTRACE_KILL, sig_pids[sig_num_threads], 0, 0); + } + } else if (sig_num_threads > 0) { + ResumeAllProcessThreads(sig_num_threads, (int *)sig_pids); + } + } + sig_pids = NULL; + if (sig_marker >= 0) + NO_INTR(sys_close(sig_marker)); + sig_marker = -1; + if (sig_proc >= 0) + NO_INTR(sys_close(sig_proc)); + sig_proc = -1; + + sys__exit(signum == SIGABRT ? 1 : 2); +} + + +/* Try to dirty the stack, and hope that the compiler is not smart enough + * to optimize this function away. Or worse, the compiler could inline the + * function and permanently allocate the data on the stack. + */ +static void DirtyStack(size_t amount) { + char buf[amount]; + memset(buf, 0, amount); + sys_read(-1, buf, amount); +} + + +/* Data structure for passing arguments to the lister thread. + */ +#define ALT_STACKSIZE (MINSIGSTKSZ + 4096) + +struct ListerParams { + int result, err; + char *altstack_mem; + ListAllProcessThreadsCallBack callback; + void *parameter; + va_list ap; +}; + + +static void ListerThread(struct ListerParams *args) { + int found_parent = 0; + pid_t clone_pid = sys_gettid(), ppid = sys_getppid(); + char proc_self_task[80], marker_name[48], *marker_path; + const char *proc_paths[3]; + const char *const *proc_path = proc_paths; + int proc = -1, marker = -1, num_threads = 0; + int max_threads = 0, sig; + struct kernel_stat marker_sb, proc_sb; + stack_t altstack; + + /* Create "marker" that we can use to detect threads sharing the same + * address space and the same file handles. By setting the FD_CLOEXEC flag + * we minimize the risk of misidentifying child processes as threads; + * and since there is still a race condition, we will filter those out + * later, anyway. + */ + if ((marker = sys_socket(PF_LOCAL, SOCK_DGRAM, 0)) < 0 || + sys_fcntl(marker, F_SETFD, FD_CLOEXEC) < 0) { + failure: + args->result = -1; + args->err = errno; + if (marker >= 0) + NO_INTR(sys_close(marker)); + sig_marker = marker = -1; + if (proc >= 0) + NO_INTR(sys_close(proc)); + sig_proc = proc = -1; + sys__exit(1); + } + + /* Compute search paths for finding thread directories in /proc */ + local_itoa(strrchr(strcpy(proc_self_task, "/proc/"), '\000'), ppid); + strcpy(marker_name, proc_self_task); + marker_path = marker_name + strlen(marker_name); + strcat(proc_self_task, "/task/"); + proc_paths[0] = proc_self_task; /* /proc/$$/task/ */ + proc_paths[1] = "/proc/"; /* /proc/ */ + proc_paths[2] = NULL; + + /* Compute path for marker socket in /proc */ + local_itoa(strcpy(marker_path, "/fd/") + 4, marker); + if (sys_stat(marker_name, &marker_sb) < 0) { + goto failure; + } + + /* Catch signals on an alternate pre-allocated stack. This way, we can + * safely execute the signal handler even if we ran out of memory. + */ + memset(&altstack, 0, sizeof(altstack)); + altstack.ss_sp = args->altstack_mem; + altstack.ss_flags = 0; + altstack.ss_size = ALT_STACKSIZE; + sys_sigaltstack(&altstack, (const stack_t *)NULL); + + /* Some kernels forget to wake up traced processes, when the + * tracer dies. So, intercept synchronous signals and make sure + * that we wake up our tracees before dying. It is the caller's + * responsibility to ensure that asynchronous signals do not + * interfere with this function. + */ + sig_marker = marker; + sig_proc = -1; + for (sig = 0; sig < sizeof(sync_signals)/sizeof(*sync_signals); sig++) { + struct kernel_sigaction sa; + memset(&sa, 0, sizeof(sa)); + sa.sa_sigaction_ = SignalHandler; + sys_sigfillset(&sa.sa_mask); + sa.sa_flags = SA_ONSTACK|SA_SIGINFO|SA_RESETHAND; + sys_sigaction(sync_signals[sig], &sa, (struct kernel_sigaction *)NULL); + } + + /* Read process directories in /proc/... */ + for (;;) { + /* Some kernels know about threads, and hide them in "/proc" + * (although they are still there, if you know the process + * id). Threads are moved into a separate "task" directory. We + * check there first, and then fall back on the older naming + * convention if necessary. + */ + if ((sig_proc = proc = c_open(*proc_path, O_RDONLY|O_DIRECTORY, 0)) < 0) { + if (*++proc_path != NULL) + continue; + goto failure; + } + if (sys_fstat(proc, &proc_sb) < 0) + goto failure; + + /* Since we are suspending threads, we cannot call any libc + * functions that might acquire locks. Most notably, we cannot + * call malloc(). So, we have to allocate memory on the stack, + * instead. Since we do not know how much memory we need, we + * make a best guess. And if we guessed incorrectly we retry on + * a second iteration (by jumping to "detach_threads"). + * + * Unless the number of threads is increasing very rapidly, we + * should never need to do so, though, as our guestimate is very + * conservative. + */ + if (max_threads < proc_sb.st_nlink + 100) + max_threads = proc_sb.st_nlink + 100; + + /* scope */ { + pid_t pids[max_threads]; + int added_entries = 0; + sig_num_threads = num_threads; + sig_pids = pids; + for (;;) { + struct kernel_dirent *entry; + char buf[4096]; + ssize_t nbytes = sys_getdents(proc, (struct kernel_dirent *)buf, + sizeof(buf)); + if (nbytes < 0) + goto failure; + else if (nbytes == 0) { + if (added_entries) { + /* Need to keep iterating over "/proc" in multiple + * passes until we no longer find any more threads. This + * algorithm eventually completes, when all threads have + * been suspended. + */ + added_entries = 0; + sys_lseek(proc, 0, SEEK_SET); + continue; + } + break; + } + for (entry = (struct kernel_dirent *)buf; + entry < (struct kernel_dirent *)&buf[nbytes]; + entry = (struct kernel_dirent *)((char *)entry+entry->d_reclen)) { + if (entry->d_ino != 0) { + const char *ptr = entry->d_name; + pid_t pid; + + /* Some kernels hide threads by preceding the pid with a '.' */ + if (*ptr == '.') + ptr++; + + /* If the directory is not numeric, it cannot be a + * process/thread + */ + if (*ptr < '0' || *ptr > '9') + continue; + pid = local_atoi(ptr); + + /* Attach (and suspend) all threads */ + if (pid && pid != clone_pid) { + struct kernel_stat tmp_sb; + char fname[entry->d_reclen + 48]; + strcat(strcat(strcpy(fname, "/proc/"), + entry->d_name), marker_path); + + /* Check if the marker is identical to the one we created */ + if (sys_stat(fname, &tmp_sb) >= 0 && + marker_sb.st_ino == tmp_sb.st_ino) { + long i, j; + + /* Found one of our threads, make sure it is no duplicate */ + for (i = 0; i < num_threads; i++) { + /* Linear search is slow, but should not matter much for + * the typically small number of threads. + */ + if (pids[i] == pid) { + /* Found a duplicate; most likely on second pass */ + goto next_entry; + } + } + + /* Check whether data structure needs growing */ + if (num_threads >= max_threads) { + /* Back to square one, this time with more memory */ + NO_INTR(sys_close(proc)); + goto detach_threads; + } + + /* Attaching to thread suspends it */ + pids[num_threads++] = pid; + sig_num_threads = num_threads; + if (sys_ptrace(PTRACE_ATTACH, pid, (void *)0, + (void *)0) < 0) { + /* If operation failed, ignore thread. Maybe it + * just died? There might also be a race + * condition with a concurrent core dumper or + * with a debugger. In that case, we will just + * make a best effort, rather than failing + * entirely. + */ + num_threads--; + sig_num_threads = num_threads; + goto next_entry; + } + while (sys_waitpid(pid, (int *)0, __WALL) < 0) { + if (errno != EINTR) { + sys_ptrace_detach(pid); + num_threads--; + sig_num_threads = num_threads; + goto next_entry; + } + } + + if (sys_ptrace(PTRACE_PEEKDATA, pid, &i, &j) || i++ != j || + sys_ptrace(PTRACE_PEEKDATA, pid, &i, &j) || i != j) { + /* Address spaces are distinct, even though both + * processes show the "marker". This is probably + * a forked child process rather than a thread. + */ + sys_ptrace_detach(pid); + num_threads--; + sig_num_threads = num_threads; + } else { + found_parent |= pid == ppid; + added_entries++; + } + } + } + } + next_entry:; + } + } + NO_INTR(sys_close(proc)); + sig_proc = proc = -1; + + /* If we failed to find any threads, try looking somewhere else in + * /proc. Maybe, threads are reported differently on this system. + */ + if (num_threads > 1 || !*++proc_path) { + NO_INTR(sys_close(marker)); + sig_marker = marker = -1; + + /* If we never found the parent process, something is very wrong. + * Most likely, we are running in debugger. Any attempt to operate + * on the threads would be very incomplete. Let's just report an + * error to the caller. + */ + if (!found_parent) { + ResumeAllProcessThreads(num_threads, pids); + sys__exit(3); + } + + /* Now we are ready to call the callback, + * which takes care of resuming the threads for us. + */ + args->result = args->callback(args->parameter, num_threads, + pids, args->ap); + args->err = errno; + + /* Callback should have resumed threads, but better safe than sorry */ + if (ResumeAllProcessThreads(num_threads, pids)) { + /* Callback forgot to resume at least one thread, report error */ + args->err = EINVAL; + args->result = -1; + } + + sys__exit(0); + } + detach_threads: + /* Resume all threads prior to retrying the operation */ + ResumeAllProcessThreads(num_threads, pids); + sig_pids = NULL; + num_threads = 0; + sig_num_threads = num_threads; + max_threads += 100; + } + } +} + + +/* This function gets the list of all linux threads of the current process + * passes them to the 'callback' along with the 'parameter' pointer; at the + * call back call time all the threads are paused via + * PTRACE_ATTACH. + * The callback is executed from a separate thread which shares only the + * address space, the filesystem, and the filehandles with the caller. Most + * notably, it does not share the same pid and ppid; and if it terminates, + * the rest of the application is still there. 'callback' is supposed to do + * or arrange for ResumeAllProcessThreads. This happens automatically, if + * the thread raises a synchronous signal (e.g. SIGSEGV); asynchronous + * signals are blocked. If the 'callback' decides to unblock them, it must + * ensure that they cannot terminate the application, or that + * ResumeAllProcessThreads will get called. + * It is an error for the 'callback' to make any library calls that could + * acquire locks. Most notably, this means that most system calls have to + * avoid going through libc. Also, this means that it is not legal to call + * exit() or abort(). + * We return -1 on error and the return value of 'callback' on success. + */ +int ListAllProcessThreads(void *parameter, + ListAllProcessThreadsCallBack callback, ...) { + char altstack_mem[ALT_STACKSIZE]; + struct ListerParams args; + pid_t clone_pid; + int dumpable = 1, sig; + struct kernel_sigset_t sig_blocked, sig_old; + + va_start(args.ap, callback); + + /* If we are short on virtual memory, initializing the alternate stack + * might trigger a SIGSEGV. Let's do this early, before it could get us + * into more trouble (i.e. before signal handlers try to use the alternate + * stack, and before we attach to other threads). + */ + memset(altstack_mem, 0, sizeof(altstack_mem)); + + /* Some of our cleanup functions could conceivable use more stack space. + * Try to touch the stack right now. This could be defeated by the compiler + * being too smart for it's own good, so try really hard. + */ + DirtyStack(32768); + + /* Make this process "dumpable". This is necessary in order to ptrace() + * after having called setuid(). + */ + dumpable = sys_prctl(PR_GET_DUMPABLE, 0); + if (!dumpable) + sys_prctl(PR_SET_DUMPABLE, 1); + + /* Fill in argument block for dumper thread */ + args.result = -1; + args.err = 0; + args.altstack_mem = altstack_mem; + args.parameter = parameter; + args.callback = callback; + + /* Before cloning the thread lister, block all asynchronous signals, as we */ + /* are not prepared to handle them. */ + sys_sigfillset(&sig_blocked); + for (sig = 0; sig < sizeof(sync_signals)/sizeof(*sync_signals); sig++) { + sys_sigdelset(&sig_blocked, sync_signals[sig]); + } + if (sys_sigprocmask(SIG_BLOCK, &sig_blocked, &sig_old)) { + args.err = errno; + args.result = -1; + goto failed; + } + + /* scope */ { + /* After cloning, both the parent and the child share the same instance + * of errno. We must make sure that at least one of these processes + * (in our case, the parent) uses modified syscall macros that update + * a local copy of errno, instead. + */ + #ifdef __cplusplus + #define sys0_sigprocmask sys.sigprocmask + #define sys0_waitpid sys.waitpid + SysCalls sys; + #else + int my_errno; + #define SYS_ERRNO my_errno + #define SYS_INLINE inline + #define SYS_PREFIX 0 + #undef SYS_LINUX_SYSCALL_SUPPORT_H + #include "linux_syscall_support.h" + #endif + + int clone_errno; + clone_pid = local_clone((int (*)(void *))ListerThread, &args); + clone_errno = errno; + + sys_sigprocmask(SIG_SETMASK, &sig_old, &sig_old); + + if (clone_pid >= 0) { + int status, rc; + while ((rc = sys0_waitpid(clone_pid, &status, __WALL)) < 0 && + ERRNO == EINTR) { + /* Keep waiting */ + } + if (rc < 0) { + args.err = ERRNO; + args.result = -1; + } else if (WIFEXITED(status)) { + switch (WEXITSTATUS(status)) { + case 0: break; /* Normal process termination */ + case 2: args.err = EFAULT; /* Some fault (e.g. SIGSEGV) detected */ + args.result = -1; + break; + case 3: args.err = EPERM; /* Process is already being traced */ + args.result = -1; + break; + default:args.err = ECHILD; /* Child died unexpectedly */ + args.result = -1; + break; + } + } else if (!WIFEXITED(status)) { + args.err = EFAULT; /* Terminated due to an unhandled signal*/ + args.result = -1; + } + } else { + args.result = -1; + args.err = clone_errno; + } + } + + /* Restore the "dumpable" state of the process */ +failed: + if (!dumpable) + sys_prctl(PR_SET_DUMPABLE, dumpable); + + va_end(args.ap); + + errno = args.err; + return args.result; +} + +/* This function resumes the list of all linux threads that + * ListAllProcessThreads pauses before giving to its callback. + * The function returns non-zero if at least one thread was + * suspended and has now been resumed. + */ +int ResumeAllProcessThreads(int num_threads, pid_t *thread_pids) { + int detached_at_least_one = 0; + while (num_threads-- > 0) { + detached_at_least_one |= sys_ptrace_detach(thread_pids[num_threads]) >= 0; + } + return detached_at_least_one; +} + +#ifdef __cplusplus +} +#endif +#endif diff --git a/third_party/tcmalloc/chromium/src/base/linuxthreads.h b/third_party/tcmalloc/chromium/src/base/linuxthreads.h new file mode 100644 index 0000000..5c318fe --- /dev/null +++ b/third_party/tcmalloc/chromium/src/base/linuxthreads.h @@ -0,0 +1,53 @@ +/* Copyright (c) 2005-2007, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * Author: Markus Gutschke + */ + +#ifndef _LINUXTHREADS_H +#define _LINUXTHREADS_H + +/* Include thread_lister.h to get the interface that we implement for linux. + */ + +/* We currently only support x86-32 and x86-64 on Linux. Porting to other + * related platforms should not be difficult. + */ +#if (defined(__i386__) || defined(__x86_64__) || defined(__ARM_ARCH_3__) || \ + defined(__mips__) || defined(__PPC__)) && defined(__linux) + +/* Define the THREADS symbol to make sure that there is exactly one core dumper + * built into the library. + */ +#define THREADS "Linux /proc" + +#endif + +#endif /* _LINUXTHREADS_H */ diff --git a/third_party/tcmalloc/chromium/src/base/logging.cc b/third_party/tcmalloc/chromium/src/base/logging.cc new file mode 100644 index 0000000..4b97858 --- /dev/null +++ b/third_party/tcmalloc/chromium/src/base/logging.cc @@ -0,0 +1,107 @@ +// Copyright (c) 2007, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// This file just provides storage for FLAGS_verbose. + +#include <config.h> +#include "base/logging.h" +#include "base/commandlineflags.h" + +DEFINE_int32(verbose, EnvToInt("PERFTOOLS_VERBOSE", 0), + "Set to numbers >0 for more verbose output, or <0 for less. " + "--verbose == -4 means we log fatal errors only."); + + +#if defined(_WIN32) || defined(__CYGWIN__) || defined(__CYGWIN32__) + +// While windows does have a POSIX-compatible API +// (_open/_write/_close), it acquires memory. Using this lower-level +// windows API is the closest we can get to being "raw". +RawFD RawOpenForWriting(const char* filename) { + // CreateFile allocates memory if file_name isn't absolute, so if + // that ever becomes a problem then we ought to compute the absolute + // path on its behalf (perhaps the ntdll/kernel function isn't aware + // of the working directory?) + RawFD fd = CreateFileA(filename, GENERIC_WRITE, 0, NULL, + CREATE_ALWAYS, 0, NULL); + if (fd != kIllegalRawFD && GetLastError() == ERROR_ALREADY_EXISTS) + SetEndOfFile(fd); // truncate the existing file + return fd; +} + +void RawWrite(RawFD handle, const char* buf, size_t len) { + while (len > 0) { + DWORD wrote; + BOOL ok = WriteFile(handle, buf, len, &wrote, NULL); + // We do not use an asynchronous file handle, so ok==false means an error + if (!ok) break; + buf += wrote; + len -= wrote; + } +} + +void RawClose(RawFD handle) { + CloseHandle(handle); +} + +#else // _WIN32 || __CYGWIN__ || __CYGWIN32__ + +#ifdef HAVE_SYS_TYPES_H +#include <sys/types.h> +#endif +#ifdef HAVE_UNISTD_H +#include <unistd.h> +#endif +#ifdef HAVE_FCNTL_H +#include <fcntl.h> +#endif + +// Re-run fn until it doesn't cause EINTR. +#define NO_INTR(fn) do {} while ((fn) < 0 && errno == EINTR) + +RawFD RawOpenForWriting(const char* filename) { + return open(filename, O_WRONLY|O_CREAT|O_TRUNC, 0664); +} + +void RawWrite(RawFD fd, const char* buf, size_t len) { + while (len > 0) { + ssize_t r; + NO_INTR(r = write(fd, buf, len)); + if (r <= 0) break; + buf += r; + len -= r; + } +} + +void RawClose(RawFD fd) { + NO_INTR(close(fd)); +} + +#endif // _WIN32 || __CYGWIN__ || __CYGWIN32__ diff --git a/third_party/tcmalloc/chromium/src/base/logging.h b/third_party/tcmalloc/chromium/src/base/logging.h new file mode 100644 index 0000000..3b1dacf --- /dev/null +++ b/third_party/tcmalloc/chromium/src/base/logging.h @@ -0,0 +1,236 @@ +// Copyright (c) 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// This file contains #include information about logging-related stuff. +// Pretty much everybody needs to #include this file so that they can +// log various happenings. +// +#ifndef _LOGGING_H_ +#define _LOGGING_H_ + +#include <config.h> +#include <stdarg.h> +#include <stdlib.h> +#include <stdio.h> +#ifdef HAVE_UNISTD_H +#include <unistd.h> // for write() +#endif +#include <string.h> // for strlen(), strcmp() +#include <assert.h> +#include <errno.h> // for errno +#include "base/commandlineflags.h" + +// On some systems (like freebsd), we can't call write() at all in a +// global constructor, perhaps because errno hasn't been set up. +// Calling the write syscall is safer (it doesn't set errno), so we +// prefer that. Note we don't care about errno for logging: we just +// do logging on a best-effort basis. +#ifdef HAVE_SYS_SYSCALL_H +#include <sys/syscall.h> +#define WRITE_TO_STDERR(buf, len) syscall(SYS_write, STDERR_FILENO, buf, len) +#else +#define WRITE_TO_STDERR(buf, len) write(STDERR_FILENO, buf, len) +#endif + + +// We log all messages at this log-level and below. +// INFO == -1, WARNING == -2, ERROR == -3, FATAL == -4 +DECLARE_int32(verbose); + +// CHECK dies with a fatal error if condition is not true. It is *not* +// controlled by NDEBUG, so the check will be executed regardless of +// compilation mode. Therefore, it is safe to do things like: +// CHECK(fp->Write(x) == 4) +// Note we use write instead of printf/puts to avoid the risk we'll +// call malloc(). +#define CHECK(condition) \ + do { \ + if (!(condition)) { \ + WRITE_TO_STDERR("Check failed: " #condition "\n", \ + sizeof("Check failed: " #condition "\n")-1); \ + exit(1); \ + } \ + } while (0) + +// This takes a message to print. The name is historical. +#define RAW_CHECK(condition, message) \ + do { \ + if (!(condition)) { \ + WRITE_TO_STDERR("Check failed: " #condition ": " message "\n", \ + sizeof("Check failed: " #condition ": " message "\n")-1);\ + exit(1); \ + } \ + } while (0) + +// This is like RAW_CHECK, but only in debug-mode +#ifdef NDEBUG +enum { DEBUG_MODE = 0 }; +#define RAW_DCHECK(condition, message) +#else +enum { DEBUG_MODE = 1 }; +#define RAW_DCHECK(condition, message) RAW_CHECK(condition, message) +#endif + +// This prints errno as well. Note we use write instead of printf/puts to +// avoid the risk we'll call malloc(). +#define PCHECK(condition) \ + do { \ + if (!(condition)) { \ + const int err_no = errno; \ + WRITE_TO_STDERR("Check failed: " #condition ": ", \ + sizeof("Check failed: " #condition ": ")-1); \ + WRITE_TO_STDERR(strerror(err_no), strlen(strerror(err_no))); \ + WRITE_TO_STDERR("\n", sizeof("\n")-1); \ + exit(1); \ + } \ + } while (0) + +// Helper macro for binary operators; prints the two values on error +// Don't use this macro directly in your code, use CHECK_EQ et al below + +// WARNING: These don't compile correctly if one of the arguments is a pointer +// and the other is NULL. To work around this, simply static_cast NULL to the +// type of the desired pointer. + +// TODO(jandrews): Also print the values in case of failure. Requires some +// sort of type-sensitive ToString() function. +#define CHECK_OP(op, val1, val2) \ + do { \ + if (!((val1) op (val2))) { \ + fprintf(stderr, "Check failed: %s %s %s\n", #val1, #op, #val2); \ + exit(1); \ + } \ + } while (0) + +#define CHECK_EQ(val1, val2) CHECK_OP(==, val1, val2) +#define CHECK_NE(val1, val2) CHECK_OP(!=, val1, val2) +#define CHECK_LE(val1, val2) CHECK_OP(<=, val1, val2) +#define CHECK_LT(val1, val2) CHECK_OP(< , val1, val2) +#define CHECK_GE(val1, val2) CHECK_OP(>=, val1, val2) +#define CHECK_GT(val1, val2) CHECK_OP(> , val1, val2) + +// A synonym for CHECK_* that is used in some unittests. +#define EXPECT_EQ(val1, val2) CHECK_EQ(val1, val2) +#define EXPECT_NE(val1, val2) CHECK_NE(val1, val2) +#define EXPECT_LE(val1, val2) CHECK_LE(val1, val2) +#define EXPECT_LT(val1, val2) CHECK_LT(val1, val2) +#define EXPECT_GE(val1, val2) CHECK_GE(val1, val2) +#define EXPECT_GT(val1, val2) CHECK_GT(val1, val2) +// As are these variants. +#define EXPECT_TRUE(cond) CHECK(cond) +#define EXPECT_FALSE(cond) CHECK(!(cond)) +#define EXPECT_STREQ(a, b) CHECK(strcmp(a, b) == 0) + +// Used for (libc) functions that return -1 and set errno +#define CHECK_ERR(invocation) PCHECK((invocation) != -1) + +// A few more checks that only happen in debug mode +#ifdef NDEBUG +#define DCHECK_EQ(val1, val2) +#define DCHECK_NE(val1, val2) +#define DCHECK_LE(val1, val2) +#define DCHECK_LT(val1, val2) +#define DCHECK_GE(val1, val2) +#define DCHECK_GT(val1, val2) +#else +#define DCHECK_EQ(val1, val2) CHECK_EQ(val1, val2) +#define DCHECK_NE(val1, val2) CHECK_NE(val1, val2) +#define DCHECK_LE(val1, val2) CHECK_LE(val1, val2) +#define DCHECK_LT(val1, val2) CHECK_LT(val1, val2) +#define DCHECK_GE(val1, val2) CHECK_GE(val1, val2) +#define DCHECK_GT(val1, val2) CHECK_GT(val1, val2) +#endif + + +#ifdef ERROR +#undef ERROR // may conflict with ERROR macro on windows +#endif +enum LogSeverity {INFO = -1, WARNING = -2, ERROR = -3, FATAL = -4}; + +// NOTE: we add a newline to the end of the output if it's not there already +inline void LogPrintf(int severity, const char* pat, va_list ap) { + // We write directly to the stderr file descriptor and avoid FILE + // buffering because that may invoke malloc() + char buf[600]; + vsnprintf(buf, sizeof(buf)-1, pat, ap); + if (buf[0] != '\0' && buf[strlen(buf)-1] != '\n') { + assert(strlen(buf)+1 < sizeof(buf)); + strcat(buf, "\n"); + } + WRITE_TO_STDERR(buf, strlen(buf)); + if ((severity) == FATAL) + abort(); // LOG(FATAL) indicates a big problem, so don't run atexit() calls +} + +// Note that since the order of global constructors is unspecified, +// global code that calls RAW_LOG may execute before FLAGS_verbose is set. +// Such code will run with verbosity == 0 no matter what. +#define VLOG_IS_ON(severity) (FLAGS_verbose >= severity) + +// In a better world, we'd use __VA_ARGS__, but VC++ 7 doesn't support it. +#define LOG_PRINTF(severity, pat) do { \ + if (VLOG_IS_ON(severity)) { \ + va_list ap; \ + va_start(ap, pat); \ + LogPrintf(severity, pat, ap); \ + va_end(ap); \ + } \ +} while (0) + +// RAW_LOG is the main function; some synonyms are used in unittests. +inline void RAW_LOG(int lvl, const char* pat, ...) { LOG_PRINTF(lvl, pat); } +inline void RAW_VLOG(int lvl, const char* pat, ...) { LOG_PRINTF(lvl, pat); } +inline void LOG(int lvl, const char* pat, ...) { LOG_PRINTF(lvl, pat); } +inline void VLOG(int lvl, const char* pat, ...) { LOG_PRINTF(lvl, pat); } +inline void LOG_IF(int lvl, bool cond, const char* pat, ...) { + if (cond) LOG_PRINTF(lvl, pat); +} + +// This isn't technically logging, but it's also IO and also is an +// attempt to be "raw" -- that is, to not use any higher-level libc +// routines that might allocate memory or (ideally) try to allocate +// locks. We use an opaque file handle (not necessarily an int) +// to allow even more low-level stuff in the future. +// Like other "raw" routines, these functions are best effort, and +// thus don't return error codes (except RawOpenForWriting()). +#if defined(_WIN32) || defined(__CYGWIN__) || defined(__CYGWIN32__) +#include <windows.h> +typedef HANDLE RawFD; +const RawFD kIllegalRawFD = INVALID_HANDLE_VALUE; +#else +typedef int RawFD; +const RawFD kIllegalRawFD = -1; // what open returns if it fails +#endif // defined(_WIN32) || defined(__CYGWIN__) || defined(__CYGWIN32__) + +RawFD RawOpenForWriting(const char* filename); // uses default permissions +void RawWrite(RawFD fd, const char* buf, size_t len); +void RawClose(RawFD fd); + +#endif // _LOGGING_H_ diff --git a/third_party/tcmalloc/chromium/src/base/low_level_alloc.cc b/third_party/tcmalloc/chromium/src/base/low_level_alloc.cc new file mode 100644 index 0000000..900ae4e --- /dev/null +++ b/third_party/tcmalloc/chromium/src/base/low_level_alloc.cc @@ -0,0 +1,458 @@ +/* Copyright (c) 2006, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +// A low-level allocator that can be used by other low-level +// modules without introducing dependency cycles. +// This allocator is slow and wasteful of memory; +// it should not be used when performance is key. + +#include "base/low_level_alloc.h" +#include "base/dynamic_annotations.h" +#include "base/spinlock.h" +#include "base/logging.h" +#include "malloc_hook-inl.h" +#include <google/malloc_hook.h> +#include <errno.h> +#ifdef HAVE_UNISTD_H +#include <unistd.h> +#endif +#ifdef HAVE_MMAP +#include <sys/mman.h> +#endif +#include <new> // for placement-new + +// On systems (like freebsd) that don't define MAP_ANONYMOUS, use the old +// form of the name instead. +#ifndef MAP_ANONYMOUS +# define MAP_ANONYMOUS MAP_ANON +#endif + +// A first-fit allocator with amortized logarithmic free() time. + +// --------------------------------------------------------------------------- +static const int kMaxLevel = 30; + +namespace { + // This struct describes one allocated block, or one free block. + struct AllocList { + struct Header { + intptr_t size; // size of entire region, including this field. Must be + // first. Valid in both allocated and unallocated blocks + intptr_t magic; // kMagicAllocated or kMagicUnallocated xor this + LowLevelAlloc::Arena *arena; // pointer to parent arena + void *dummy_for_alignment; // aligns regions to 0 mod 2*sizeof(void*) + } header; + + // Next two fields: in unallocated blocks: freelist skiplist data + // in allocated blocks: overlaps with client data + int levels; // levels in skiplist used + AllocList *next[kMaxLevel]; // actually has levels elements. + // The AllocList node may not have room for + // all kMaxLevel entries. See max_fit in + // LLA_SkiplistLevels() + }; +} + +// --------------------------------------------------------------------------- +// A trivial skiplist implementation. This is used to keep the freelist +// in address order while taking only logarithmic time per insert and delete. + +// An integer approximation of log2(size/base) +// Requires size >= base. +static int IntLog2(size_t size, size_t base) { + int result = 0; + for (size_t i = size; i > base; i >>= 1) { // i == floor(size/2**result) + result++; + } + // floor(size / 2**result) <= base < floor(size / 2**(result-1)) + // => log2(size/(base+1)) <= result < 1+log2(size/base) + // => result ~= log2(size/base) + return result; +} + +// Return a random integer n: p(n)=1/(2**n) if 1 <= n; p(n)=0 if n < 1. +static int Random() { + static int32 r = 1; // no locking---it's not critical + ANNOTATE_BENIGN_RACE(&r, "benign race, not critical."); + int result = 1; + while ((((r = r*1103515245 + 12345) >> 30) & 1) == 0) { + result++; + } + return result; +} + +// Return a number of skiplist levels for a node of size bytes, where +// base is the minimum node size. Compute level=log2(size / base)+n +// where n is 1 if random is false and otherwise a random number generated with +// the standard distribution for a skiplist: See Random() above. +// Bigger nodes tend to have more skiplist levels due to the log2(size / base) +// term, so first-fit searches touch fewer nodes. "level" is clipped so +// level<kMaxLevel and next[level-1] will fit in the node. +// 0 < LLA_SkiplistLevels(x,y,false) <= LLA_SkiplistLevels(x,y,true) < kMaxLevel +static int LLA_SkiplistLevels(size_t size, size_t base, bool random) { + // max_fit is the maximum number of levels that will fit in a node for the + // given size. We can't return more than max_fit, no matter what the + // random number generator says. + int max_fit = (size-OFFSETOF_MEMBER(AllocList, next)) / sizeof (AllocList *); + int level = IntLog2(size, base) + (random? Random() : 1); + if (level > max_fit) level = max_fit; + if (level > kMaxLevel-1) level = kMaxLevel - 1; + RAW_CHECK(level >= 1, "block not big enough for even one level"); + return level; +} + +// Return "atleast", the first element of AllocList *head s.t. *atleast >= *e. +// For 0 <= i < head->levels, set prev[i] to "no_greater", where no_greater +// points to the last element at level i in the AllocList less than *e, or is +// head if no such element exists. +static AllocList *LLA_SkiplistSearch(AllocList *head, + AllocList *e, AllocList **prev) { + AllocList *p = head; + for (int level = head->levels - 1; level >= 0; level--) { + for (AllocList *n; (n = p->next[level]) != 0 && n < e; p = n) { + } + prev[level] = p; + } + return (head->levels == 0) ? 0 : prev[0]->next[0]; +} + +// Insert element *e into AllocList *head. Set prev[] as LLA_SkiplistSearch. +// Requires that e->levels be previously set by the caller (using +// LLA_SkiplistLevels()) +static void LLA_SkiplistInsert(AllocList *head, AllocList *e, + AllocList **prev) { + LLA_SkiplistSearch(head, e, prev); + for (; head->levels < e->levels; head->levels++) { // extend prev pointers + prev[head->levels] = head; // to all *e's levels + } + for (int i = 0; i != e->levels; i++) { // add element to list + e->next[i] = prev[i]->next[i]; + prev[i]->next[i] = e; + } +} + +// Remove element *e from AllocList *head. Set prev[] as LLA_SkiplistSearch(). +// Requires that e->levels be previous set by the caller (using +// LLA_SkiplistLevels()) +static void LLA_SkiplistDelete(AllocList *head, AllocList *e, + AllocList **prev) { + AllocList *found = LLA_SkiplistSearch(head, e, prev); + RAW_CHECK(e == found, "element not in freelist"); + for (int i = 0; i != e->levels && prev[i]->next[i] == e; i++) { + prev[i]->next[i] = e->next[i]; + } + while (head->levels > 0 && head->next[head->levels - 1] == 0) { + head->levels--; // reduce head->levels if level unused + } +} + +// --------------------------------------------------------------------------- +// Arena implementation + +struct LowLevelAlloc::Arena { + Arena() : mu(SpinLock::LINKER_INITIALIZED) {} // does nothing; for static init + explicit Arena(int) : pagesize(0) {} // set pagesize to zero explicitly + // for non-static init + + SpinLock mu; // protects freelist, allocation_count, + // pagesize, roundup, min_size + AllocList freelist; // head of free list; sorted by addr (under mu) + int32 allocation_count; // count of allocated blocks (under mu) + int32 flags; // flags passed to NewArena (ro after init) + size_t pagesize; // ==getpagesize() (init under mu, then ro) + size_t roundup; // lowest power of 2 >= max(16,sizeof (AllocList)) + // (init under mu, then ro) + size_t min_size; // smallest allocation block size + // (init under mu, then ro) +}; + +// The default arena, which is used when 0 is passed instead of an Arena +// pointer. +static struct LowLevelAlloc::Arena default_arena; + +// A non-malloc-hooked arena: used only to allocate metadata for arenas that +// do not want malloc hook reporting, so that for them there's no malloc hook +// reporting even during arena creation. +static struct LowLevelAlloc::Arena unhooked_arena; + +// magic numbers to identify allocated and unallocated blocks +static const intptr_t kMagicAllocated = 0x4c833e95; +static const intptr_t kMagicUnallocated = ~kMagicAllocated; + +// create an appropriate magic number for an object at "ptr" +// "magic" should be kMagicAllocated or kMagicUnallocated +inline static intptr_t Magic(intptr_t magic, AllocList::Header *ptr) { + return magic ^ reinterpret_cast<intptr_t>(ptr); +} + +// Initialize the fields of an Arena +static void ArenaInit(LowLevelAlloc::Arena *arena) { + if (arena->pagesize == 0) { + arena->pagesize = getpagesize(); + // Round up block sizes to a power of two close to the header size. + arena->roundup = 16; + while (arena->roundup < sizeof (arena->freelist.header)) { + arena->roundup += arena->roundup; + } + // Don't allocate blocks less than twice the roundup size to avoid tiny + // free blocks. + arena->min_size = 2 * arena->roundup; + arena->freelist.header.size = 0; + arena->freelist.header.magic = + Magic(kMagicUnallocated, &arena->freelist.header); + arena->freelist.header.arena = arena; + arena->freelist.levels = 0; + memset(arena->freelist.next, 0, sizeof (arena->freelist.next)); + arena->allocation_count = 0; + if (arena == &default_arena) { + // Default arena should be hooked, e.g. for heap-checker to trace + // pointer chains through objects in the default arena. + arena->flags = LowLevelAlloc::kCallMallocHook; + } else { + arena->flags = 0; // other arenas' flags may be overridden by client, + // but unhooked_arena will have 0 in 'flags'. + } + } +} + +// L < meta_data_arena->mu +LowLevelAlloc::Arena *LowLevelAlloc::NewArena(int32 flags, + Arena *meta_data_arena) { + RAW_CHECK(meta_data_arena != 0, "must pass a valid arena"); + if (meta_data_arena == &default_arena && + (flags & LowLevelAlloc::kCallMallocHook) == 0) { + meta_data_arena = &unhooked_arena; + } + // Arena(0) uses the constructor for non-static contexts + Arena *result = + new (AllocWithArena(sizeof (*result), meta_data_arena)) Arena(0); + ArenaInit(result); + result->flags = flags; + return result; +} + +// L < arena->mu, L < arena->arena->mu +bool LowLevelAlloc::DeleteArena(Arena *arena) { + RAW_CHECK(arena != 0 && arena != &default_arena && arena != &unhooked_arena, + "may not delete default arena"); + arena->mu.Lock(); + bool empty = (arena->allocation_count == 0); + arena->mu.Unlock(); + if (empty) { + while (arena->freelist.next[0] != 0) { + AllocList *region = arena->freelist.next[0]; + size_t size = region->header.size; + arena->freelist.next[0] = region->next[0]; + RAW_CHECK(region->header.magic == + Magic(kMagicUnallocated, ®ion->header), + "bad magic number in DeleteArena()"); + RAW_CHECK(region->header.arena == arena, + "bad arena pointer in DeleteArena()"); + RAW_CHECK(size % arena->pagesize == 0, + "empty arena has non-page-aligned block size"); + RAW_CHECK(reinterpret_cast<intptr_t>(region) % arena->pagesize == 0, + "empty arena has non-page-aligned block"); + RAW_CHECK(munmap(region, size) == 0, + "LowLevelAlloc::DeleteArena: munmap failed address"); + } + Free(arena); + } + return empty; +} + +// --------------------------------------------------------------------------- + +// Return value rounded up to next multiple of align. +// align must be a power of two. +static intptr_t RoundUp(intptr_t addr, intptr_t align) { + return (addr + align - 1) & ~(align - 1); +} + +// Equivalent to "return prev->next[i]" but with sanity checking +// that the freelist is in the correct order, that it +// consists of regions marked "unallocated", and that no two regions +// are adjacent in memory (they should have been coalesced). +// L < arena->mu +static AllocList *Next(int i, AllocList *prev, LowLevelAlloc::Arena *arena) { + RAW_CHECK(i < prev->levels, "too few levels in Next()"); + AllocList *next = prev->next[i]; + if (next != 0) { + RAW_CHECK(next->header.magic == Magic(kMagicUnallocated, &next->header), + "bad magic number in Next()"); + RAW_CHECK(next->header.arena == arena, + "bad arena pointer in Next()"); + if (prev != &arena->freelist) { + RAW_CHECK(prev < next, "unordered freelist"); + RAW_CHECK(reinterpret_cast<char *>(prev) + prev->header.size < + reinterpret_cast<char *>(next), "malformed freelist"); + } + } + return next; +} + +// Coalesce list item "a" with its successor if they are adjacent. +static void Coalesce(AllocList *a) { + AllocList *n = a->next[0]; + if (n != 0 && reinterpret_cast<char *>(a) + a->header.size == + reinterpret_cast<char *>(n)) { + LowLevelAlloc::Arena *arena = a->header.arena; + a->header.size += n->header.size; + n->header.magic = 0; + n->header.arena = 0; + AllocList *prev[kMaxLevel]; + LLA_SkiplistDelete(&arena->freelist, n, prev); + LLA_SkiplistDelete(&arena->freelist, a, prev); + a->levels = LLA_SkiplistLevels(a->header.size, arena->min_size, true); + LLA_SkiplistInsert(&arena->freelist, a, prev); + } +} + +// Adds block at location "v" to the free list +// L >= arena->mu +static void AddToFreelist(void *v, LowLevelAlloc::Arena *arena) { + AllocList *f = reinterpret_cast<AllocList *>( + reinterpret_cast<char *>(v) - sizeof (f->header)); + RAW_CHECK(f->header.magic == Magic(kMagicAllocated, &f->header), + "bad magic number in AddToFreelist()"); + RAW_CHECK(f->header.arena == arena, + "bad arena pointer in AddToFreelist()"); + f->levels = LLA_SkiplistLevels(f->header.size, arena->min_size, true); + AllocList *prev[kMaxLevel]; + LLA_SkiplistInsert(&arena->freelist, f, prev); + f->header.magic = Magic(kMagicUnallocated, &f->header); + Coalesce(f); // maybe coalesce with successor + Coalesce(prev[0]); // maybe coalesce with predecessor +} + +// Frees storage allocated by LowLevelAlloc::Alloc(). +// L < arena->mu +void LowLevelAlloc::Free(void *v) { + if (v != 0) { + AllocList *f = reinterpret_cast<AllocList *>( + reinterpret_cast<char *>(v) - sizeof (f->header)); + RAW_CHECK(f->header.magic == Magic(kMagicAllocated, &f->header), + "bad magic number in Free()"); + LowLevelAlloc::Arena *arena = f->header.arena; + if ((arena->flags & kCallMallocHook) != 0) { + MallocHook::InvokeDeleteHook(v); + } + arena->mu.Lock(); + AddToFreelist(v, arena); + RAW_CHECK(arena->allocation_count > 0, "nothing in arena to free"); + arena->allocation_count--; + arena->mu.Unlock(); + } +} + +// allocates and returns a block of size bytes, to be freed with Free() +// L < arena->mu +void *DoAllocWithArena(size_t request, LowLevelAlloc::Arena *arena) { + void *result = 0; + if (request != 0) { + AllocList *s; // will point to region that satisfies request + arena->mu.Lock(); + ArenaInit(arena); + // round up with header + size_t req_rnd = RoundUp(request + sizeof (s->header), arena->roundup); + for (;;) { // loop until we find a suitable region + // find the minimum levels that a block of this size must have + int i = LLA_SkiplistLevels(req_rnd, arena->min_size, false) - 1; + if (i < arena->freelist.levels) { // potential blocks exist + AllocList *before = &arena->freelist; // predecessor of s + while ((s = Next(i, before, arena)) != 0 && s->header.size < req_rnd) { + before = s; + } + if (s != 0) { // we found a region + break; + } + } + // we unlock before mmap() both because mmap() may call a callback hook, + // and because it may be slow. + arena->mu.Unlock(); + // mmap generous 64K chunks to decrease + // the chances/impact of fragmentation: + size_t new_pages_size = RoundUp(req_rnd, arena->pagesize * 16); + void *new_pages = mmap(0, new_pages_size, + PROT_WRITE|PROT_READ, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); + RAW_CHECK(new_pages != MAP_FAILED, "mmap error"); + arena->mu.Lock(); + s = reinterpret_cast<AllocList *>(new_pages); + s->header.size = new_pages_size; + // Pretend the block is allocated; call AddToFreelist() to free it. + s->header.magic = Magic(kMagicAllocated, &s->header); + s->header.arena = arena; + AddToFreelist(&s->levels, arena); // insert new region into free list + } + AllocList *prev[kMaxLevel]; + LLA_SkiplistDelete(&arena->freelist, s, prev); // remove from free list + // s points to the first free region that's big enough + if (req_rnd + arena->min_size <= s->header.size) { // big enough to split + AllocList *n = reinterpret_cast<AllocList *> + (req_rnd + reinterpret_cast<char *>(s)); + n->header.size = s->header.size - req_rnd; + n->header.magic = Magic(kMagicAllocated, &n->header); + n->header.arena = arena; + s->header.size = req_rnd; + AddToFreelist(&n->levels, arena); + } + s->header.magic = Magic(kMagicAllocated, &s->header); + RAW_CHECK(s->header.arena == arena, ""); + arena->allocation_count++; + arena->mu.Unlock(); + result = &s->levels; + } + ANNOTATE_NEW_MEMORY(result, request); + return result; +} + +void *LowLevelAlloc::Alloc(size_t request) { + void *result = DoAllocWithArena(request, &default_arena); + if ((default_arena.flags & kCallMallocHook) != 0) { + // this call must be directly in the user-called allocator function + // for MallocHook::GetCallerStackTrace to work properly + MallocHook::InvokeNewHook(result, request); + } + return result; +} + +void *LowLevelAlloc::AllocWithArena(size_t request, Arena *arena) { + RAW_CHECK(arena != 0, "must pass a valid arena"); + void *result = DoAllocWithArena(request, arena); + if ((arena->flags & kCallMallocHook) != 0) { + // this call must be directly in the user-called allocator function + // for MallocHook::GetCallerStackTrace to work properly + MallocHook::InvokeNewHook(result, request); + } + return result; +} + +LowLevelAlloc::Arena *LowLevelAlloc::DefaultArena() { + return &default_arena; +} diff --git a/third_party/tcmalloc/chromium/src/base/low_level_alloc.h b/third_party/tcmalloc/chromium/src/base/low_level_alloc.h new file mode 100644 index 0000000..0df1298 --- /dev/null +++ b/third_party/tcmalloc/chromium/src/base/low_level_alloc.h @@ -0,0 +1,100 @@ +/* Copyright (c) 2006, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#if !defined(_BASE_LOW_LEVEL_ALLOC_H_) +#define _BASE_LOW_LEVEL_ALLOC_H_ + +// A simple thread-safe memory allocator that does not depend on +// mutexes or thread-specific data. It is intended to be used +// sparingly, and only when malloc() would introduce an unwanted +// dependency, such as inside the heap-checker. + +#include <config.h> +#include <stddef.h> // for size_t +#include "base/basictypes.h" + +class LowLevelAlloc { + public: + struct Arena; // an arena from which memory may be allocated + + // Returns a pointer to a block of at least "request" bytes + // that have been newly allocated from the specific arena. + // for Alloc() call the DefaultArena() is used. + // Returns 0 if passed request==0. + // Does not return 0 under other circumstances; it crashes if memory + // is not available. + static void *Alloc(size_t request) + ATTRIBUTE_SECTION(malloc_hook); + static void *AllocWithArena(size_t request, Arena *arena) + ATTRIBUTE_SECTION(malloc_hook); + + // Deallocates a region of memory that was previously allocated with + // Alloc(). Does nothing if passed 0. "s" must be either 0, + // or must have been returned from a call to Alloc() and not yet passed to + // Free() since that call to Alloc(). The space is returned to the arena + // from which it was allocated. + static void Free(void *s) ATTRIBUTE_SECTION(malloc_hook); + + // ATTRIBUTE_SECTION(malloc_hook) for Alloc* and Free + // are to put all callers of MallocHook::Invoke* in this module + // into special section, + // so that MallocHook::GetCallerStackTrace can function accurately. + + // Create a new arena. + // The root metadata for the new arena is allocated in the + // meta_data_arena; the DefaultArena() can be passed for meta_data_arena. + // These values may be ored into flags: + enum { + // Calls to Alloc() and Free() will be reported + // via the MallocHook interface. + // The DefaultArena() has this flag on. + // NewArena(flags, DefaultArena()) w/o this bit in 'flags', + // on the other hand, will not cause any MallocHook + // calls even during the NewArena call itself + // (it will in fact use meta_data_arena different from DefaultArena()). + kCallMallocHook = 0x0001 + }; + static Arena *NewArena(int32 flags, Arena *meta_data_arena); + + // Destroys an arena allocated by NewArena and returns true, + // provided no allocated blocks remain in the arena. + // If allocated blocks remain in the arena, does nothing and + // returns false. + // It is illegal to attempt to destroy the DefaultArena(). + static bool DeleteArena(Arena *arena); + + // The default arena that always exists. + static Arena *DefaultArena(); + + private: + LowLevelAlloc(); // no instances +}; + +#endif diff --git a/third_party/tcmalloc/chromium/src/base/simple_mutex.h b/third_party/tcmalloc/chromium/src/base/simple_mutex.h new file mode 100644 index 0000000..a97172a --- /dev/null +++ b/third_party/tcmalloc/chromium/src/base/simple_mutex.h @@ -0,0 +1,331 @@ +// Copyright (c) 2007, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// --- +// Author: Craig Silverstein. +// +// A simple mutex wrapper, supporting locks and read-write locks. +// You should assume the locks are *not* re-entrant. +// +// To use: you should define the following macros in your configure.ac: +// ACX_PTHREAD +// AC_RWLOCK +// The latter is defined in ../autoconf. +// +// This class is meant to be internal-only and should be wrapped by an +// internal namespace. Before you use this module, please give the +// name of your internal namespace for this module. Or, if you want +// to expose it, you'll want to move it to the Google namespace. We +// cannot put this class in global namespace because there can be some +// problems when we have multiple versions of Mutex in each shared object. +// +// NOTE: TryLock() is broken for NO_THREADS mode, at least in NDEBUG +// mode. +// +// CYGWIN NOTE: Cygwin support for rwlock seems to be buggy: +// http://www.cygwin.com/ml/cygwin/2008-12/msg00017.html +// Because of that, we might as well use windows locks for +// cygwin. They seem to be more reliable than the cygwin pthreads layer. +// +// TRICKY IMPLEMENTATION NOTE: +// This class is designed to be safe to use during +// dynamic-initialization -- that is, by global constructors that are +// run before main() starts. The issue in this case is that +// dynamic-initialization happens in an unpredictable order, and it +// could be that someone else's dynamic initializer could call a +// function that tries to acquire this mutex -- but that all happens +// before this mutex's constructor has run. (This can happen even if +// the mutex and the function that uses the mutex are in the same .cc +// file.) Basically, because Mutex does non-trivial work in its +// constructor, it's not, in the naive implementation, safe to use +// before dynamic initialization has run on it. +// +// The solution used here is to pair the actual mutex primitive with a +// bool that is set to true when the mutex is dynamically initialized. +// (Before that it's false.) Then we modify all mutex routines to +// look at the bool, and not try to lock/unlock until the bool makes +// it to true (which happens after the Mutex constructor has run.) +// +// This works because before main() starts -- particularly, during +// dynamic initialization -- there are no threads, so a) it's ok that +// the mutex operations are a no-op, since we don't need locking then +// anyway; and b) we can be quite confident our bool won't change +// state between a call to Lock() and a call to Unlock() (that would +// require a global constructor in one translation unit to call Lock() +// and another global constructor in another translation unit to call +// Unlock() later, which is pretty perverse). +// +// That said, it's tricky, and can conceivably fail; it's safest to +// avoid trying to acquire a mutex in a global constructor, if you +// can. One way it can fail is that a really smart compiler might +// initialize the bool to true at static-initialization time (too +// early) rather than at dynamic-initialization time. To discourage +// that, we set is_safe_ to true in code (not the constructor +// colon-initializer) and set it to true via a function that always +// evaluates to true, but that the compiler can't know always +// evaluates to true. This should be good enough. +// +// A related issue is code that could try to access the mutex +// after it's been destroyed in the global destructors (because +// the Mutex global destructor runs before some other global +// destructor, that tries to acquire the mutex). The way we +// deal with this is by taking a constructor arg that global +// mutexes should pass in, that causes the destructor to do no +// work. We still depend on the compiler not doing anything +// weird to a Mutex's memory after it is destroyed, but for a +// static global variable, that's pretty safe. + +#ifndef GOOGLE_MUTEX_H_ +#define GOOGLE_MUTEX_H_ + +#include <config.h> + +#if defined(NO_THREADS) + typedef int MutexType; // to keep a lock-count +#elif defined(_WIN32) || defined(__CYGWIN__) || defined(__CYGWIN32__) +# ifndef WIN32_LEAN_AND_MEAN +# define WIN32_LEAN_AND_MEAN // We only need minimal includes +# endif + // We need Windows NT or later for TryEnterCriticalSection(). If you + // don't need that functionality, you can remove these _WIN32_WINNT + // lines, and change TryLock() to assert(0) or something. +# ifndef _WIN32_WINNT +# define _WIN32_WINNT 0x0400 +# endif +# include <windows.h> + typedef CRITICAL_SECTION MutexType; +#elif defined(HAVE_PTHREAD) && defined(HAVE_RWLOCK) + // Needed for pthread_rwlock_*. If it causes problems, you could take it + // out, but then you'd have to unset HAVE_RWLOCK (at least on linux -- it + // *does* cause problems for FreeBSD, or MacOSX, but isn't needed + // for locking there.) +# ifdef __linux__ +# define _XOPEN_SOURCE 500 // may be needed to get the rwlock calls +# endif +# include <pthread.h> + typedef pthread_rwlock_t MutexType; +#elif defined(HAVE_PTHREAD) +# include <pthread.h> + typedef pthread_mutex_t MutexType; +#else +# error Need to implement mutex.h for your architecture, or #define NO_THREADS +#endif + +#include <assert.h> +#include <stdlib.h> // for abort() + +#define MUTEX_NAMESPACE perftools_mutex_namespace + +namespace MUTEX_NAMESPACE { + +class Mutex { + public: + // This is used for the single-arg constructor + enum LinkerInitialized { LINKER_INITIALIZED }; + + // Create a Mutex that is not held by anybody. This constructor is + // typically used for Mutexes allocated on the heap or the stack. + inline Mutex(); + // This constructor should be used for global, static Mutex objects. + // It inhibits work being done by the destructor, which makes it + // safer for code that tries to acqiure this mutex in their global + // destructor. + inline Mutex(LinkerInitialized); + + // Destructor + inline ~Mutex(); + + inline void Lock(); // Block if needed until free then acquire exclusively + inline void Unlock(); // Release a lock acquired via Lock() + inline bool TryLock(); // If free, Lock() and return true, else return false + // Note that on systems that don't support read-write locks, these may + // be implemented as synonyms to Lock() and Unlock(). So you can use + // these for efficiency, but don't use them anyplace where being able + // to do shared reads is necessary to avoid deadlock. + inline void ReaderLock(); // Block until free or shared then acquire a share + inline void ReaderUnlock(); // Release a read share of this Mutex + inline void WriterLock() { Lock(); } // Acquire an exclusive lock + inline void WriterUnlock() { Unlock(); } // Release a lock from WriterLock() + + private: + MutexType mutex_; + // We want to make sure that the compiler sets is_safe_ to true only + // when we tell it to, and never makes assumptions is_safe_ is + // always true. volatile is the most reliable way to do that. + volatile bool is_safe_; + // This indicates which constructor was called. + bool destroy_; + + inline void SetIsSafe() { is_safe_ = true; } + + // Catch the error of writing Mutex when intending MutexLock. + Mutex(Mutex* /*ignored*/) {} + // Disallow "evil" constructors + Mutex(const Mutex&); + void operator=(const Mutex&); +}; + +// Now the implementation of Mutex for various systems +#if defined(NO_THREADS) + +// When we don't have threads, we can be either reading or writing, +// but not both. We can have lots of readers at once (in no-threads +// mode, that's most likely to happen in recursive function calls), +// but only one writer. We represent this by having mutex_ be -1 when +// writing and a number > 0 when reading (and 0 when no lock is held). +// +// In debug mode, we assert these invariants, while in non-debug mode +// we do nothing, for efficiency. That's why everything is in an +// assert. + +Mutex::Mutex() : mutex_(0) { } +Mutex::Mutex(Mutex::LinkerInitialized) : mutex_(0) { } +Mutex::~Mutex() { assert(mutex_ == 0); } +void Mutex::Lock() { assert(--mutex_ == -1); } +void Mutex::Unlock() { assert(mutex_++ == -1); } +bool Mutex::TryLock() { if (mutex_) return false; Lock(); return true; } +void Mutex::ReaderLock() { assert(++mutex_ > 0); } +void Mutex::ReaderUnlock() { assert(mutex_-- > 0); } + +#elif defined(_WIN32) || defined(__CYGWIN__) || defined(__CYGWIN32__) + +Mutex::Mutex() : destroy_(true) { + InitializeCriticalSection(&mutex_); + SetIsSafe(); +} +Mutex::Mutex(LinkerInitialized) : destroy_(false) { + InitializeCriticalSection(&mutex_); + SetIsSafe(); +} +Mutex::~Mutex() { if (destroy_) DeleteCriticalSection(&mutex_); } +void Mutex::Lock() { if (is_safe_) EnterCriticalSection(&mutex_); } +void Mutex::Unlock() { if (is_safe_) LeaveCriticalSection(&mutex_); } +bool Mutex::TryLock() { return is_safe_ ? + TryEnterCriticalSection(&mutex_) != 0 : true; } +void Mutex::ReaderLock() { Lock(); } // we don't have read-write locks +void Mutex::ReaderUnlock() { Unlock(); } + +#elif defined(HAVE_PTHREAD) && defined(HAVE_RWLOCK) + +#define SAFE_PTHREAD(fncall) do { /* run fncall if is_safe_ is true */ \ + if (is_safe_ && fncall(&mutex_) != 0) abort(); \ +} while (0) + +Mutex::Mutex() : destroy_(true) { + SetIsSafe(); + if (is_safe_ && pthread_rwlock_init(&mutex_, NULL) != 0) abort(); +} +Mutex::Mutex(Mutex::LinkerInitialized) : destroy_(false) { + SetIsSafe(); + if (is_safe_ && pthread_rwlock_init(&mutex_, NULL) != 0) abort(); +} +Mutex::~Mutex() { if (destroy_) SAFE_PTHREAD(pthread_rwlock_destroy); } +void Mutex::Lock() { SAFE_PTHREAD(pthread_rwlock_wrlock); } +void Mutex::Unlock() { SAFE_PTHREAD(pthread_rwlock_unlock); } +bool Mutex::TryLock() { return is_safe_ ? + pthread_rwlock_trywrlock(&mutex_) == 0 : true; } +void Mutex::ReaderLock() { SAFE_PTHREAD(pthread_rwlock_rdlock); } +void Mutex::ReaderUnlock() { SAFE_PTHREAD(pthread_rwlock_unlock); } +#undef SAFE_PTHREAD + +#elif defined(HAVE_PTHREAD) + +#define SAFE_PTHREAD(fncall) do { /* run fncall if is_safe_ is true */ \ + if (is_safe_ && fncall(&mutex_) != 0) abort(); \ +} while (0) + +Mutex::Mutex() : destroy_(true) { + SetIsSafe(); + if (is_safe_ && pthread_mutex_init(&mutex_, NULL) != 0) abort(); +} +Mutex::Mutex(Mutex::LinkerInitialized) : destroy_(false) { + SetIsSafe(); + if (is_safe_ && pthread_mutex_init(&mutex_, NULL) != 0) abort(); +} +Mutex::~Mutex() { if (destroy_) SAFE_PTHREAD(pthread_mutex_destroy); } +void Mutex::Lock() { SAFE_PTHREAD(pthread_mutex_lock); } +void Mutex::Unlock() { SAFE_PTHREAD(pthread_mutex_unlock); } +bool Mutex::TryLock() { return is_safe_ ? + pthread_mutex_trylock(&mutex_) == 0 : true; } +void Mutex::ReaderLock() { Lock(); } +void Mutex::ReaderUnlock() { Unlock(); } +#undef SAFE_PTHREAD + +#endif + +// -------------------------------------------------------------------------- +// Some helper classes + +// MutexLock(mu) acquires mu when constructed and releases it when destroyed. +class MutexLock { + public: + explicit MutexLock(Mutex *mu) : mu_(mu) { mu_->Lock(); } + ~MutexLock() { mu_->Unlock(); } + private: + Mutex * const mu_; + // Disallow "evil" constructors + MutexLock(const MutexLock&); + void operator=(const MutexLock&); +}; + +// ReaderMutexLock and WriterMutexLock do the same, for rwlocks +class ReaderMutexLock { + public: + explicit ReaderMutexLock(Mutex *mu) : mu_(mu) { mu_->ReaderLock(); } + ~ReaderMutexLock() { mu_->ReaderUnlock(); } + private: + Mutex * const mu_; + // Disallow "evil" constructors + ReaderMutexLock(const ReaderMutexLock&); + void operator=(const ReaderMutexLock&); +}; + +class WriterMutexLock { + public: + explicit WriterMutexLock(Mutex *mu) : mu_(mu) { mu_->WriterLock(); } + ~WriterMutexLock() { mu_->WriterUnlock(); } + private: + Mutex * const mu_; + // Disallow "evil" constructors + WriterMutexLock(const WriterMutexLock&); + void operator=(const WriterMutexLock&); +}; + +// Catch bug where variable name is omitted, e.g. MutexLock (&mu); +#define MutexLock(x) COMPILE_ASSERT(0, mutex_lock_decl_missing_var_name) +#define ReaderMutexLock(x) COMPILE_ASSERT(0, rmutex_lock_decl_missing_var_name) +#define WriterMutexLock(x) COMPILE_ASSERT(0, wmutex_lock_decl_missing_var_name) + +} // namespace MUTEX_NAMESPACE + +using namespace MUTEX_NAMESPACE; + +#undef MUTEX_NAMESPACE + +#endif /* #define GOOGLE_SIMPLE_MUTEX_H_ */ diff --git a/third_party/tcmalloc/chromium/src/base/spinlock.cc b/third_party/tcmalloc/chromium/src/base/spinlock.cc new file mode 100644 index 0000000..48cdc89 --- /dev/null +++ b/third_party/tcmalloc/chromium/src/base/spinlock.cc @@ -0,0 +1,126 @@ +/* Copyright (c) 2006, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * Author: Sanjay Ghemawat + */ + +#include <config.h> +#include <time.h> /* For nanosleep() */ +#ifdef HAVE_SCHED_H +#include <sched.h> /* For sched_yield() */ +#endif +#ifdef HAVE_UNISTD_H +#include <unistd.h> /* For read() */ +#endif +#include <fcntl.h> /* for open(), O_RDONLY */ +#include <string.h> /* for strncmp */ +#include <errno.h> +#include "base/spinlock.h" +#include "base/cycleclock.h" +#include "base/sysinfo.h" /* for NumCPUs() */ + +// We can do contention-profiling of SpinLocks, but the code is in +// mutex.cc, which is not always linked in with spinlock. Hence we +// provide this weak definition, which is used if mutex.cc isn't linked in. +ATTRIBUTE_WEAK extern void SubmitSpinLockProfileData(const void *, int64); +void SubmitSpinLockProfileData(const void *, int64) {} + +static int adaptive_spin_count = 0; + +const base::LinkerInitialized SpinLock::LINKER_INITIALIZED = + base::LINKER_INITIALIZED; + +// The OS-specific header included below must provide two calls: +// Wait until *w becomes zero, atomically set it to 1 and return. +// static void SpinLockWait(volatile Atomic32 *w); +// +// Hint that a thread waiting in SpinLockWait() could now make progress. May +// do nothing. This call may not read or write *w; it must use only the +// address. +// static void SpinLockWake(volatile Atomic32 *w); +#if defined(_WIN32) +#include "base/spinlock_win32-inl.h" +#elif defined(__linux__) +#include "base/spinlock_linux-inl.h" +#else +#include "base/spinlock_posix-inl.h" +#endif + +namespace { +struct SpinLock_InitHelper { + SpinLock_InitHelper() { + // On multi-cpu machines, spin for longer before yielding + // the processor or sleeping. Reduces idle time significantly. + if (NumCPUs() > 1) { + adaptive_spin_count = 1000; + } + } +}; + +// Hook into global constructor execution: +// We do not do adaptive spinning before that, +// but nothing lock-intensive should be going on at that time. +static SpinLock_InitHelper init_helper; + +} // unnamed namespace + + +void SpinLock::SlowLock() { + int c = adaptive_spin_count; + + // Spin a few times in the hope that the lock holder releases the lock + while ((c > 0) && (lockword_ != 0)) { + c--; + } + + if (lockword_ == 1) { + int32 now = (CycleClock::Now() >> PROFILE_TIMESTAMP_SHIFT); + // Don't loose the lock: make absolutely sure "now" is not zero + now |= 1; + // Atomically replace the value of lockword_ with "now" if + // lockword_ is 1, thereby remembering the first timestamp to + // be recorded. + base::subtle::NoBarrier_CompareAndSwap(&lockword_, 1, now); + // base::subtle::NoBarrier_CompareAndSwap() returns: + // 0: the lock is/was available; nothing stored + // 1: our timestamp was stored + // > 1: an older timestamp is already in lockword_; nothing stored + } + + SpinLockWait(&lockword_); // wait until lock acquired; OS specific +} + +void SpinLock::SlowUnlock(int64 wait_timestamp) { + SpinLockWake(&lockword_); // wake waiter if necessary; OS specific + + // Collect contentionz profile info. Subtract one from wait_timestamp as + // antidote to "now |= 1;" in SlowLock(). + SubmitSpinLockProfileData(this, wait_timestamp - 1); +} diff --git a/third_party/tcmalloc/chromium/src/base/spinlock.h b/third_party/tcmalloc/chromium/src/base/spinlock.h new file mode 100644 index 0000000..dda59a6 --- /dev/null +++ b/third_party/tcmalloc/chromium/src/base/spinlock.h @@ -0,0 +1,158 @@ +/* Copyright (c) 2006, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * Author: Sanjay Ghemawat + */ + +// +// Fast spinlocks (at least on x86, a lock/unlock pair is approximately +// half the cost of a Mutex because the unlock just does a store instead +// of a compare-and-swap which is expensive). + +// SpinLock is async signal safe. +// If used within a signal handler, all lock holders +// should block the signal even outside the signal handler. + +#ifndef BASE_SPINLOCK_H_ +#define BASE_SPINLOCK_H_ + +#include <config.h> +#include "base/basictypes.h" +#include "base/atomicops.h" +#include "base/dynamic_annotations.h" +#include "base/thread_annotations.h" + +class LOCKABLE SpinLock { + public: + SpinLock() : lockword_(0) { } + + // Special constructor for use with static SpinLock objects. E.g., + // + // static SpinLock lock(base::LINKER_INITIALIZED); + // + // When intialized using this constructor, we depend on the fact + // that the linker has already initialized the memory appropriately. + // A SpinLock constructed like this can be freely used from global + // initializers without worrying about the order in which global + // initializers run. + explicit SpinLock(base::LinkerInitialized /*x*/) { + // Does nothing; lockword_ is already initialized + } + + // Acquire this SpinLock. + // TODO(csilvers): uncomment the annotation when we figure out how to + // support this macro with 0 args (see thread_annotations.h) + inline void Lock() /*EXCLUSIVE_LOCK_FUNCTION()*/ { + if (Acquire_CompareAndSwap(&lockword_, 0, 1) != 0) { + SlowLock(); + } + ANNOTATE_RWLOCK_ACQUIRED(this, 1); + } + + // Acquire this SpinLock and return true if the acquisition can be + // done without blocking, else return false. If this SpinLock is + // free at the time of the call, TryLock will return true with high + // probability. + inline bool TryLock() EXCLUSIVE_TRYLOCK_FUNCTION(true) { + bool res = (Acquire_CompareAndSwap(&lockword_, 0, 1) == 0); + if (res) { + ANNOTATE_RWLOCK_ACQUIRED(this, 1); + } + return res; + } + + // Release this SpinLock, which must be held by the calling thread. + // TODO(csilvers): uncomment the annotation when we figure out how to + // support this macro with 0 args (see thread_annotations.h) + inline void Unlock() /*UNLOCK_FUNCTION()*/ { + // This is defined in mutex.cc. + extern void SubmitSpinLockProfileData(const void *, int64); + + int64 wait_timestamp = static_cast<uint32>(lockword_); + ANNOTATE_RWLOCK_RELEASED(this, 1); + Release_Store(&lockword_, 0); + if (wait_timestamp != 1) { + // Collect contentionz profile info, and speed the wakeup of any waiter. + // The lockword_ value indicates when the waiter started waiting. + SlowUnlock(wait_timestamp); + } + } + + // Report if we think the lock can be held by this thread. + // When the lock is truly held by the invoking thread + // we will always return true. + // Indended to be used as CHECK(lock.IsHeld()); + inline bool IsHeld() const { + return lockword_ != 0; + } + + // The timestamp for contention lock profiling must fit into 31 bits. + // as lockword_ is 32 bits and we loose an additional low-order bit due + // to the statement "now |= 1" in SlowLock(). + // To select 31 bits from the 64-bit cycle counter, we shift right by + // PROFILE_TIMESTAMP_SHIFT = 7. + // Using these 31 bits, we reduce granularity of time measurement to + // 256 cycles, and will loose track of wait time for waits greater than + // 109 seconds on a 5 GHz machine, longer for faster clock cycles. + // Waits this long should be very rare. + enum { PROFILE_TIMESTAMP_SHIFT = 7 }; + + static const base::LinkerInitialized LINKER_INITIALIZED; // backwards compat + private: + // Lock-state: 0 means unlocked; 1 means locked with no waiters; values + // greater than 1 indicate locked with waiters, where the value is the time + // the first waiter started waiting and is used for contention profiling. + volatile Atomic32 lockword_; + + void SlowLock(); + void SlowUnlock(int64 wait_timestamp); + + DISALLOW_EVIL_CONSTRUCTORS(SpinLock); +}; + +// Corresponding locker object that arranges to acquire a spinlock for +// the duration of a C++ scope. +class SCOPED_LOCKABLE SpinLockHolder { + private: + SpinLock* lock_; + public: + inline explicit SpinLockHolder(SpinLock* l) EXCLUSIVE_LOCK_FUNCTION(l) + : lock_(l) { + l->Lock(); + } + // TODO(csilvers): uncomment the annotation when we figure out how to + // support this macro with 0 args (see thread_annotations.h) + inline ~SpinLockHolder() /*UNLOCK_FUNCTION()*/ { lock_->Unlock(); } +}; +// Catch bug where variable name is omitted, e.g. SpinLockHolder (&lock); +#define SpinLockHolder(x) COMPILE_ASSERT(0, spin_lock_decl_missing_var_name) + + +#endif // BASE_SPINLOCK_H_ diff --git a/third_party/tcmalloc/chromium/src/base/spinlock_linux-inl.h b/third_party/tcmalloc/chromium/src/base/spinlock_linux-inl.h new file mode 100644 index 0000000..0df09a3 --- /dev/null +++ b/third_party/tcmalloc/chromium/src/base/spinlock_linux-inl.h @@ -0,0 +1,91 @@ +/* Copyright (c) 2009, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * This file is a Linux-specific part of spinlock.cc + */ + +#include <sched.h> +#include <time.h> +#include "base/linux_syscall_support.h" + +#define FUTEX_WAIT 0 +#define FUTEX_WAKE 1 +#define FUTEX_PRIVATE_FLAG 128 + +static bool have_futex; +static int futex_private_flag = FUTEX_PRIVATE_FLAG; + +namespace { +static struct InitModule { + InitModule() { + int x = 0; + // futexes are ints, so we can use them only when + // that's the same size as the lockword_ in SpinLock. + have_futex = (sizeof (Atomic32) == sizeof (int) && + sys_futex(&x, FUTEX_WAKE, 1, 0) >= 0); + if (have_futex && + sys_futex(&x, FUTEX_WAKE | futex_private_flag, 1, 0) < 0) { + futex_private_flag = 0; + } + } +} init_module; +} // anonymous namespace + +static void SpinLockWait(volatile Atomic32 *w) { + int save_errno = errno; + struct timespec tm; + tm.tv_sec = 0; + if (have_futex) { + int value; + tm.tv_nsec = 1000000; // 1ms; really we're trying to sleep for one kernel + // clock tick + while ((value = base::subtle::Acquire_CompareAndSwap(w, 0, 1)) != 0) { + sys_futex(reinterpret_cast<int *>(const_cast<Atomic32 *>(w)), + FUTEX_WAIT | futex_private_flag, + value, reinterpret_cast<struct kernel_timespec *>(&tm)); + } + } else { + tm.tv_nsec = 2000001; // above 2ms so linux 2.4 doesn't spin + if (base::subtle::NoBarrier_Load(w) != 0) { + sched_yield(); + } + while (base::subtle::Acquire_CompareAndSwap(w, 0, 1) != 0) { + nanosleep(&tm, NULL); + } + } + errno = save_errno; +} + +static void SpinLockWake(volatile Atomic32 *w) { + if (have_futex) { + sys_futex(reinterpret_cast<int *>(const_cast<Atomic32 *>(w)), + FUTEX_WAKE | futex_private_flag, 1, 0); + } +} diff --git a/third_party/tcmalloc/chromium/src/base/spinlock_posix-inl.h b/third_party/tcmalloc/chromium/src/base/spinlock_posix-inl.h new file mode 100644 index 0000000..0d933c0 --- /dev/null +++ b/third_party/tcmalloc/chromium/src/base/spinlock_posix-inl.h @@ -0,0 +1,52 @@ +/* Copyright (c) 2009, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * This file is a Posix-specific part of spinlock.cc + */ + +#include <sched.h> +#include <time.h> + +static void SpinLockWait(volatile Atomic32 *w) { + int save_errno = errno; + struct timespec tm; + tm.tv_sec = 0; + tm.tv_nsec = 1000000; + if (base::subtle::NoBarrier_Load(w) != 0) { + sched_yield(); + } + while (base::subtle::Acquire_CompareAndSwap(w, 0, 1) != 0) { + nanosleep(&tm, NULL); + } + errno = save_errno; +} + +static void SpinLockWake(volatile Atomic32 *w) { +} diff --git a/third_party/tcmalloc/chromium/src/base/spinlock_win32-inl.h b/third_party/tcmalloc/chromium/src/base/spinlock_win32-inl.h new file mode 100644 index 0000000..9058939 --- /dev/null +++ b/third_party/tcmalloc/chromium/src/base/spinlock_win32-inl.h @@ -0,0 +1,47 @@ +/* Copyright (c) 2009, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * This file is a Win32-specific part of spinlock.cc + */ + + +#include <windows.h> + +static void SpinLockWait(volatile Atomic32 *w) { + if (base::subtle::NoBarrier_Load(w) != 0) { + Sleep(0); + } + while (base::subtle::Acquire_CompareAndSwap(w, 0, 1) != 0) { + Sleep(1); + } +} + +static void SpinLockWake(volatile Atomic32 *w) { +} diff --git a/third_party/tcmalloc/chromium/src/base/stl_allocator.h b/third_party/tcmalloc/chromium/src/base/stl_allocator.h new file mode 100644 index 0000000..b0ddc68 --- /dev/null +++ b/third_party/tcmalloc/chromium/src/base/stl_allocator.h @@ -0,0 +1,96 @@ +/* Copyright (c) 2006, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * Author: Maxim Lifantsev + */ + + +#ifndef BASE_STL_ALLOCATOR_H_ +#define BASE_STL_ALLOCATOR_H_ + +#include <config.h> + +#include <limits> + +#include "base/basictypes.h" +#include "base/logging.h" + +// Generic allocator class for STL objects +// that uses a given type-less allocator Alloc, which must provide: +// static void* Alloc::Allocate(size_t size); +// static void Alloc::Free(void* ptr); +// +// STL_Allocator<T, MyAlloc> provides the same thread-safety +// guarantees as MyAlloc. +// +// Usage example: +// set<T, less<T>, STL_Allocator<T, MyAlloc> > my_set; +// CAVEAT: Parts of the code below are probably specific +// to the STL version(s) we are using. +// The code is simply lifted from what std::allocator<> provides. +template <typename T, class Alloc> +class STL_Allocator { + public: + typedef size_t size_type; + typedef ptrdiff_t difference_type; + typedef T* pointer; + typedef const T* const_pointer; + typedef T& reference; + typedef const T& const_reference; + typedef T value_type; + + template <class T1> struct rebind { + typedef STL_Allocator<T1, Alloc> other; + }; + + STL_Allocator() { } + STL_Allocator(const STL_Allocator&) { } + template <class T1> STL_Allocator(const STL_Allocator<T1, Alloc>&) { } + ~STL_Allocator() { } + + pointer address(reference x) const { return &x; } + const_pointer address(const_reference x) const { return &x; } + + pointer allocate(size_type n, const void* = 0) { + RAW_DCHECK((n * sizeof(T)) / sizeof(T) == n, "n is too big to allocate"); + return static_cast<T*>(Alloc::Allocate(n * sizeof(T))); + } + void deallocate(pointer p, size_type /*n*/) { Alloc::Free(p); } + + size_type max_size() const { return size_t(-1) / sizeof(T); } + + void construct(pointer p, const T& val) { ::new(p) T(val); } + void destroy(pointer p) { p->~T(); } + + // There's no state, so these allocators are always equal + bool operator==(const STL_Allocator&) const { return true; } +}; + +#endif // BASE_STL_ALLOCATOR_H_ diff --git a/third_party/tcmalloc/chromium/src/base/sysinfo.cc b/third_party/tcmalloc/chromium/src/base/sysinfo.cc new file mode 100644 index 0000000..3919ba45 --- /dev/null +++ b/third_party/tcmalloc/chromium/src/base/sysinfo.cc @@ -0,0 +1,901 @@ +// Copyright (c) 2006, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include <config.h> +#if (defined(_WIN32) || defined(__MINGW32__)) && !defined(__CYGWIN__) && !defined(__CYGWIN32) +# define PLATFORM_WINDOWS 1 +#endif + +#include <stdlib.h> // for getenv() +#include <stdio.h> // for snprintf(), sscanf() +#include <string.h> // for memmove(), memchr(), etc. +#include <fcntl.h> // for open() +#include <errno.h> // for errno +#ifdef HAVE_UNISTD_H +#include <unistd.h> // for read() +#endif +#if defined __MACH__ // Mac OS X, almost certainly +#include <mach-o/dyld.h> // for iterating over dll's in ProcMapsIter +#include <mach-o/loader.h> // for iterating over dll's in ProcMapsIter +#include <sys/types.h> +#include <sys/sysctl.h> // how we figure out numcpu's on OS X +#elif defined __FreeBSD__ +#include <sys/sysctl.h> +#elif defined __sun__ // Solaris +#include <procfs.h> // for, e.g., prmap_t +#elif defined(PLATFORM_WINDOWS) +#include <process.h> // for getpid() (actually, _getpid()) +#include <shlwapi.h> // for SHGetValueA() +#include <tlhelp32.h> // for Module32First() +#endif +#include "base/sysinfo.h" +#include "base/commandlineflags.h" +#include "base/logging.h" +#include "base/cycleclock.h" + +#ifdef PLATFORM_WINDOWS +#ifdef MODULEENTRY32 +// In a change from the usual W-A pattern, there is no A variant of +// MODULEENTRY32. Tlhelp32.h #defines the W variant, but not the A. +// In unicode mode, tlhelp32.h #defines MODULEENTRY32 to be +// MODULEENTRY32W. These #undefs are the only way I see to get back +// access to the original, ascii struct (and related functions). +#undef MODULEENTRY32 +#undef Module32First +#undef Module32Next +#undef PMODULEENTRY32 +#undef LPMODULEENTRY32 +#endif /* MODULEENTRY32 */ +// MinGW doesn't seem to define this, perhaps some windowsen don't either. +#ifndef TH32CS_SNAPMODULE32 +#define TH32CS_SNAPMODULE32 0 +#endif /* TH32CS_SNAPMODULE32 */ +#endif /* PLATFORM_WINDOWS */ + +// Re-run fn until it doesn't cause EINTR. +#define NO_INTR(fn) do {} while ((fn) < 0 && errno == EINTR) + +// open/read/close can set errno, which may be illegal at this +// time, so prefer making the syscalls directly if we can. +#ifdef HAVE_SYS_SYSCALL_H +# include <sys/syscall.h> +# define safeopen(filename, mode) syscall(SYS_open, filename, mode) +# define saferead(fd, buffer, size) syscall(SYS_read, fd, buffer, size) +# define safeclose(fd) syscall(SYS_close, fd) +#else +# define safeopen(filename, mode) open(filename, mode) +# define saferead(fd, buffer, size) read(fd, buffer, size) +# define safeclose(fd) close(fd) +#endif + +// ---------------------------------------------------------------------- +// GetenvBeforeMain() +// GetUniquePathFromEnv() +// Some non-trivial getenv-related functions. +// ---------------------------------------------------------------------- + +const char* GetenvBeforeMain(const char* name) { + // static is ok because this function should only be called before + // main(), when we're single-threaded. + static char envbuf[16<<10]; +#ifndef PLATFORM_WINDOWS + if (*envbuf == '\0') { // haven't read the environ yet + int fd = safeopen("/proc/self/environ", O_RDONLY); + // The -2 below guarantees the last two bytes of the buffer will be \0\0 + if (fd == -1 || // unable to open the file, fall back onto libc + saferead(fd, envbuf, sizeof(envbuf) - 2) < 0) { // error reading file + RAW_VLOG(1, "Unable to open /proc/self/environ, falling back " + "on getenv(\"%s\"), which may not work", name); + if (fd != -1) safeclose(fd); + return getenv(name); + } + safeclose(fd); + } + const int namelen = strlen(name); + const char* p = envbuf; + while (*p != '\0') { // will happen at the \0\0 that terminates the buffer + // proc file has the format NAME=value\0NAME=value\0NAME=value\0... + const char* endp = (char*)memchr(p, '\0', sizeof(envbuf) - (p - envbuf)); + if (endp == NULL) // this entry isn't NUL terminated + return NULL; + else if (!memcmp(p, name, namelen) && p[namelen] == '=') // it's a match + return p + namelen+1; // point after = + p = endp + 1; + } + return NULL; // env var never found +#else + // TODO(mbelshe) - repeated calls to this function will overwrite the + // contents of the static buffer. + if (!GetEnvironmentVariableA(name, envbuf, sizeof(envbuf)-1)) + return NULL; + return envbuf; +#endif +} + +// This takes as an argument an environment-variable name (like +// CPUPROFILE) whose value is supposed to be a file-path, and sets +// path to that path, and returns true. If the env var doesn't exist, +// or is the empty string, leave path unchanged and returns false. +// The reason this is non-trivial is that this function handles munged +// pathnames. Here's why: +// +// If we're a child process of the 'main' process, we can't just use +// getenv("CPUPROFILE") -- the parent process will be using that path. +// Instead we append our pid to the pathname. How do we tell if we're a +// child process? Ideally we'd set an environment variable that all +// our children would inherit. But -- and this is seemingly a bug in +// gcc -- if you do a setenv() in a shared libarary in a global +// constructor, the environment setting is lost by the time main() is +// called. The only safe thing we can do in such a situation is to +// modify the existing envvar. So we do a hack: in the parent, we set +// the high bit of the 1st char of CPUPROFILE. In the child, we +// notice the high bit is set and append the pid(). This works +// assuming cpuprofile filenames don't normally have the high bit set +// in their first character! If that assumption is violated, we'll +// still get a profile, but one with an unexpected name. +// TODO(csilvers): set an envvar instead when we can do it reliably. +bool GetUniquePathFromEnv(const char* env_name, char* path) { + char* envval = getenv(env_name); + if (envval == NULL || *envval == '\0') + return false; + if (envval[0] & 128) { // high bit is set + snprintf(path, PATH_MAX, "%c%s_%u", // add pid and clear high bit + envval[0] & 127, envval+1, (unsigned int)(getpid())); + } else { + snprintf(path, PATH_MAX, "%s", envval); + envval[0] |= 128; // set high bit for kids to see + } + return true; +} + +// ---------------------------------------------------------------------- +// CyclesPerSecond() +// NumCPUs() +// It's important this not call malloc! -- they may be called at +// global-construct time, before we've set up all our proper malloc +// hooks and such. +// ---------------------------------------------------------------------- + +static double cpuinfo_cycles_per_second = 1.0; // 0.0 might be dangerous +static int cpuinfo_num_cpus = 1; // Conservative guess + +static void SleepForMilliseconds(int milliseconds) { +#ifdef PLATFORM_WINDOWS + _sleep(milliseconds); // Windows's _sleep takes milliseconds argument +#else + // Sleep for a few milliseconds + struct timespec sleep_time; + sleep_time.tv_sec = milliseconds / 1000; + sleep_time.tv_nsec = (milliseconds % 1000) * 1000000; + while (nanosleep(&sleep_time, &sleep_time) != 0 && errno == EINTR) + ; // Ignore signals and wait for the full interval to elapse. +#endif +} + +// Helper function estimates cycles/sec by observing cycles elapsed during +// sleep(). Using small sleep time decreases accuracy significantly. +static int64 EstimateCyclesPerSecond(const int estimate_time_ms) { + assert(estimate_time_ms > 0); + if (estimate_time_ms <= 0) + return 1; + double multiplier = 1000.0 / (double)estimate_time_ms; // scale by this much + + const int64 start_ticks = CycleClock::Now(); + SleepForMilliseconds(estimate_time_ms); + const int64 guess = int64(multiplier * (CycleClock::Now() - start_ticks)); + return guess; +} + +// WARNING: logging calls back to InitializeSystemInfo() so it must +// not invoke any logging code. Also, InitializeSystemInfo() can be +// called before main() -- in fact it *must* be since already_called +// isn't protected -- before malloc hooks are properly set up, so +// we make an effort not to call any routines which might allocate +// memory. + +static void InitializeSystemInfo() { + static bool already_called = false; // safe if we run before threads + if (already_called) return; + already_called = true; + + // I put in a never-called reference to EstimateCyclesPerSecond() here + // to silence the compiler for OS's that don't need it + if (0) EstimateCyclesPerSecond(0); + +#if defined(__linux__) || defined(__CYGWIN__) || defined(__CYGWIN32__) + char line[1024]; + char* err; + + // If CPU scaling is in effect, we want to use the *maximum* frequency, + // not whatever CPU speed some random processor happens to be using now. + bool saw_mhz = false; + const char* pname0 = "/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq"; + int fd0 = open(pname0, O_RDONLY); + if (fd0 != -1) { + memset(line, '\0', sizeof(line)); + read(fd0, line, sizeof(line)); + const int max_freq = strtol(line, &err, 10); + if (line[0] != '\0' && (*err == '\n' || *err == '\0')) { + // The value is in kHz. For example, on a 2GHz machine, the file + // contains the value "2000000". Historically this file contained no + // newline, but at some point the kernel started appending a newline. + cpuinfo_cycles_per_second = max_freq * 1000.0; + saw_mhz = true; + } + close(fd0); + } + + // Read /proc/cpuinfo for other values, and if there is no cpuinfo_max_freq. + const char* pname = "/proc/cpuinfo"; + int fd = open(pname, O_RDONLY); + if (fd == -1) { + perror(pname); + cpuinfo_cycles_per_second = EstimateCyclesPerSecond(1000); + return; // TODO: use generic tester instead? + } + + double bogo_clock = 1.0; + int num_cpus = 0; + line[0] = line[1] = '\0'; + int chars_read = 0; + do { // we'll exit when the last read didn't read anything + // Move the next line to the beginning of the buffer + const int oldlinelen = strlen(line); + if (sizeof(line) == oldlinelen + 1) // oldlinelen took up entire line + line[0] = '\0'; + else // still other lines left to save + memmove(line, line + oldlinelen+1, sizeof(line) - (oldlinelen+1)); + // Terminate the new line, reading more if we can't find the newline + char* newline = strchr(line, '\n'); + if (newline == NULL) { + const int linelen = strlen(line); + const int bytes_to_read = sizeof(line)-1 - linelen; + assert(bytes_to_read > 0); // because the memmove recovered >=1 bytes + chars_read = read(fd, line + linelen, bytes_to_read); + line[linelen + chars_read] = '\0'; + newline = strchr(line, '\n'); + } + if (newline != NULL) + *newline = '\0'; + + if (!saw_mhz && strncmp(line, "cpu MHz", sizeof("cpu MHz")-1) == 0) { + const char* freqstr = strchr(line, ':'); + if (freqstr) { + cpuinfo_cycles_per_second = strtod(freqstr+1, &err) * 1000000.0; + if (freqstr[1] != '\0' && *err == '\0') + saw_mhz = true; + } + } else if (strncmp(line, "bogomips", sizeof("bogomips")-1) == 0) { + const char* freqstr = strchr(line, ':'); + if (freqstr) + bogo_clock = strtod(freqstr+1, &err) * 1000000.0; + if (freqstr == NULL || freqstr[1] == '\0' || *err != '\0') + bogo_clock = 1.0; + } else if (strncmp(line, "processor", sizeof("processor")-1) == 0) { + num_cpus++; // count up every time we see an "processor :" entry + } + } while (chars_read > 0); + close(fd); + + if (!saw_mhz) { + // If we didn't find anything better, we'll use bogomips, but + // we're not happy about it. + cpuinfo_cycles_per_second = bogo_clock; + } + if (cpuinfo_cycles_per_second == 0.0) { + cpuinfo_cycles_per_second = 1.0; // maybe unnecessary, but safe + } + if (num_cpus > 0) { + cpuinfo_num_cpus = num_cpus; + } + +#elif defined __FreeBSD__ + // For this sysctl to work, the machine must be configured without + // SMP, APIC, or APM support. hz should be 64-bit in freebsd 7.0 + // and later. Before that, it's a 32-bit quantity (and gives the + // wrong answer on machines faster than 2^32 Hz). See + // http://lists.freebsd.org/pipermail/freebsd-i386/2004-November/001846.html + // But also compare FreeBSD 7.0: + // http://fxr.watson.org/fxr/source/i386/i386/tsc.c?v=RELENG70#L223 + // 231 error = sysctl_handle_quad(oidp, &freq, 0, req); + // To FreeBSD 6.3 (it's the same in 6-STABLE): + // http://fxr.watson.org/fxr/source/i386/i386/tsc.c?v=RELENG6#L131 + // 139 error = sysctl_handle_int(oidp, &freq, sizeof(freq), req); +#if __FreeBSD__ >= 7 + uint64_t hz = 0; +#else + unsigned int hz = 0; +#endif + size_t sz = sizeof(hz); + const char *sysctl_path = "machdep.tsc_freq"; + if ( sysctlbyname(sysctl_path, &hz, &sz, NULL, 0) != 0 ) { + fprintf(stderr, "Unable to determine clock rate from sysctl: %s: %s\n", + sysctl_path, strerror(errno)); + cpuinfo_cycles_per_second = EstimateCyclesPerSecond(1000); + } else { + cpuinfo_cycles_per_second = hz; + } + // TODO(csilvers): also figure out cpuinfo_num_cpus + +#elif defined(PLATFORM_WINDOWS) +# pragma comment(lib, "shlwapi.lib") // for SHGetValue() + // In NT, read MHz from the registry. If we fail to do so or we're in win9x + // then make a crude estimate. + OSVERSIONINFO os; + os.dwOSVersionInfoSize = sizeof(os); + DWORD data, data_size = sizeof(data); + if (GetVersionEx(&os) && + os.dwPlatformId == VER_PLATFORM_WIN32_NT && + SUCCEEDED(SHGetValueA(HKEY_LOCAL_MACHINE, + "HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\0", + "~MHz", NULL, &data, &data_size))) + cpuinfo_cycles_per_second = (int64)data * (int64)(1000 * 1000); // was mhz + else + cpuinfo_cycles_per_second = EstimateCyclesPerSecond(500); // TODO <500? + + // Get the number of processors. + SYSTEM_INFO info; + GetSystemInfo(&info); + cpuinfo_num_cpus = info.dwNumberOfProcessors; + +#elif defined(__MACH__) && defined(__APPLE__) + // returning "mach time units" per second. the current number of elapsed + // mach time units can be found by calling uint64 mach_absolute_time(); + // while not as precise as actual CPU cycles, it is accurate in the face + // of CPU frequency scaling and multi-cpu/core machines. + // Our mac users have these types of machines, and accuracy + // (i.e. correctness) trumps precision. + // See cycleclock.h: CycleClock::Now(), which returns number of mach time + // units on Mac OS X. + mach_timebase_info_data_t timebase_info; + mach_timebase_info(&timebase_info); + double mach_time_units_per_nanosecond = + static_cast<double>(timebase_info.denom) / + static_cast<double>(timebase_info.numer); + cpuinfo_cycles_per_second = mach_time_units_per_nanosecond * 1e9; + + int num_cpus = 0; + size_t size = sizeof(num_cpus); + int numcpus_name[] = { CTL_HW, HW_NCPU }; + if (::sysctl(numcpus_name, arraysize(numcpus_name), &num_cpus, &size, 0, 0) + == 0 + && (size == sizeof(num_cpus))) + cpuinfo_num_cpus = num_cpus; + +#else + // Generic cycles per second counter + cpuinfo_cycles_per_second = EstimateCyclesPerSecond(1000); +#endif +} + +double CyclesPerSecond(void) { + InitializeSystemInfo(); + return cpuinfo_cycles_per_second; +} + +int NumCPUs(void) { + InitializeSystemInfo(); + return cpuinfo_num_cpus; +} + +// ---------------------------------------------------------------------- +// HasPosixThreads() +// Return true if we're running POSIX (e.g., NPTL on Linux) +// threads, as opposed to a non-POSIX thread libary. The thing +// that we care about is whether a thread's pid is the same as +// the thread that spawned it. If so, this function returns +// true. +// ---------------------------------------------------------------------- +bool HasPosixThreads() { +#if defined(__linux__) +#ifndef _CS_GNU_LIBPTHREAD_VERSION +#define _CS_GNU_LIBPTHREAD_VERSION 3 +#endif + char buf[32]; + // We assume that, if confstr() doesn't know about this name, then + // the same glibc is providing LinuxThreads. + if (confstr(_CS_GNU_LIBPTHREAD_VERSION, buf, sizeof(buf)) == 0) + return false; + return strncmp(buf, "NPTL", 4) == 0; +#elif defined(PLATFORM_WINDOWS) || defined(__CYGWIN__) || defined(__CYGWIN32__) + return false; +#else // other OS + return true; // Assume that everything else has Posix +#endif // else OS_LINUX +} + +// ---------------------------------------------------------------------- + +#if defined __linux__ || defined __FreeBSD__ || defined __sun__ || defined __CYGWIN__ || defined __CYGWIN32__ +static void ConstructFilename(const char* spec, pid_t pid, + char* buf, int buf_size) { + CHECK_LT(snprintf(buf, buf_size, + spec, + pid ? pid : getpid()), buf_size); +} +#endif + +// A templatized helper function instantiated for Mach (OS X) only. +// It can handle finding info for both 32 bits and 64 bits. +// Returns true if it successfully handled the hdr, false else. +#ifdef __MACH__ // Mac OS X, almost certainly +template<uint32_t kMagic, uint32_t kLCSegment, + typename MachHeader, typename SegmentCommand> +static bool NextExtMachHelper(const mach_header* hdr, + int current_image, int current_load_cmd, + uint64 *start, uint64 *end, char **flags, + uint64 *offset, int64 *inode, char **filename, + uint64 *file_mapping, uint64 *file_pages, + uint64 *anon_mapping, uint64 *anon_pages, + dev_t *dev) { + static char kDefaultPerms[5] = "r-xp"; + if (hdr->magic != kMagic) + return false; + const char* lc = (const char *)hdr + sizeof(MachHeader); + // TODO(csilvers): make this not-quadradic (increment and hold state) + for (int j = 0; j < current_load_cmd; j++) // advance to *our* load_cmd + lc += ((const load_command *)lc)->cmdsize; + if (((const load_command *)lc)->cmd == kLCSegment) { + const intptr_t dlloff = _dyld_get_image_vmaddr_slide(current_image); + const SegmentCommand* sc = (const SegmentCommand *)lc; + if (start) *start = sc->vmaddr + dlloff; + if (end) *end = sc->vmaddr + sc->vmsize + dlloff; + if (flags) *flags = kDefaultPerms; // can we do better? + if (offset) *offset = sc->fileoff; + if (inode) *inode = 0; + if (filename) + *filename = const_cast<char*>(_dyld_get_image_name(current_image)); + if (file_mapping) *file_mapping = 0; + if (file_pages) *file_pages = 0; // could we use sc->filesize? + if (anon_mapping) *anon_mapping = 0; + if (anon_pages) *anon_pages = 0; + if (dev) *dev = 0; + return true; + } + + return false; +} +#endif + +ProcMapsIterator::ProcMapsIterator(pid_t pid) { + Init(pid, NULL, false); +} + +ProcMapsIterator::ProcMapsIterator(pid_t pid, Buffer *buffer) { + Init(pid, buffer, false); +} + +ProcMapsIterator::ProcMapsIterator(pid_t pid, Buffer *buffer, + bool use_maps_backing) { + Init(pid, buffer, use_maps_backing); +} + +void ProcMapsIterator::Init(pid_t pid, Buffer *buffer, + bool use_maps_backing) { + pid_ = pid; + using_maps_backing_ = use_maps_backing; + dynamic_buffer_ = NULL; + if (!buffer) { + // If the user didn't pass in any buffer storage, allocate it + // now. This is the normal case; the signal handler passes in a + // static buffer. + buffer = dynamic_buffer_ = new Buffer; + } else { + dynamic_buffer_ = NULL; + } + + ibuf_ = buffer->buf_; + + stext_ = etext_ = nextline_ = ibuf_; + ebuf_ = ibuf_ + Buffer::kBufSize - 1; + nextline_ = ibuf_; + +#if defined(__linux__) || defined(__CYGWIN__) || defined(__CYGWIN32__) + if (use_maps_backing) { // don't bother with clever "self" stuff in this case + ConstructFilename("/proc/%d/maps_backing", pid, ibuf_, Buffer::kBufSize); + } else if (pid == 0) { + // We have to kludge a bit to deal with the args ConstructFilename + // expects. The 1 is never used -- it's only impt. that it's not 0. + ConstructFilename("/proc/self/maps", 1, ibuf_, Buffer::kBufSize); + } else { + ConstructFilename("/proc/%d/maps", pid, ibuf_, Buffer::kBufSize); + } + // No error logging since this can be called from the crash dump + // handler at awkward moments. Users should call Valid() before + // using. + NO_INTR(fd_ = open(ibuf_, O_RDONLY)); +#elif defined(__FreeBSD__) + // We don't support maps_backing on freebsd + if (pid == 0) { + ConstructFilename("/proc/curproc/map", 1, ibuf_, Buffer::kBufSize); + } else { + ConstructFilename("/proc/%d/map", pid, ibuf_, Buffer::kBufSize); + } + NO_INTR(fd_ = open(ibuf_, O_RDONLY)); +#elif defined(__sun__) + if (pid == 0) { + ConstructFilename("/proc/self/map", 1, ibuf_, Buffer::kBufSize); + } else { + ConstructFilename("/proc/%d/map", pid, ibuf_, Buffer::kBufSize); + } + NO_INTR(fd_ = open(ibuf_, O_RDONLY)); +#elif defined(__MACH__) + current_image_ = _dyld_image_count(); // count down from the top + current_load_cmd_ = -1; +#elif defined(PLATFORM_WINDOWS) + snapshot_ = CreateToolhelp32Snapshot(TH32CS_SNAPMODULE | + TH32CS_SNAPMODULE32, + GetCurrentProcessId()); + memset(&module_, 0, sizeof(module_)); +#else + fd_ = -1; // so Valid() is always false +#endif + +} + +ProcMapsIterator::~ProcMapsIterator() { +#if defined(PLATFORM_WINDOWS) + if (snapshot_ != INVALID_HANDLE_VALUE) CloseHandle(snapshot_); +#elif defined(__MACH__) + // no cleanup necessary! +#else + if (fd_ >= 0) NO_INTR(close(fd_)); +#endif + delete dynamic_buffer_; +} + +bool ProcMapsIterator::Valid() const { +#if defined(PLATFORM_WINDOWS) + return snapshot_ != INVALID_HANDLE_VALUE; +#elif defined(__MACH__) + return 1; +#else + return fd_ != -1; +#endif +} + +bool ProcMapsIterator::Next(uint64 *start, uint64 *end, char **flags, + uint64 *offset, int64 *inode, char **filename) { + return NextExt(start, end, flags, offset, inode, filename, NULL, NULL, + NULL, NULL, NULL); +} + +// This has too many arguments. It should really be building +// a map object and returning it. The problem is that this is called +// when the memory allocator state is undefined, hence the arguments. +bool ProcMapsIterator::NextExt(uint64 *start, uint64 *end, char **flags, + uint64 *offset, int64 *inode, char **filename, + uint64 *file_mapping, uint64 *file_pages, + uint64 *anon_mapping, uint64 *anon_pages, + dev_t *dev) { + +#if defined(__linux__) || defined(__FreeBSD__) || defined(__CYGWIN__) || defined(__CYGWIN32__) + do { + // Advance to the start of the next line + stext_ = nextline_; + + // See if we have a complete line in the buffer already + nextline_ = static_cast<char *>(memchr (stext_, '\n', etext_ - stext_)); + if (!nextline_) { + // Shift/fill the buffer so we do have a line + int count = etext_ - stext_; + + // Move the current text to the start of the buffer + memmove(ibuf_, stext_, count); + stext_ = ibuf_; + etext_ = ibuf_ + count; + + int nread = 0; // fill up buffer with text + while (etext_ < ebuf_) { + NO_INTR(nread = read(fd_, etext_, ebuf_ - etext_)); + if (nread > 0) + etext_ += nread; + else + break; + } + + // Zero out remaining characters in buffer at EOF to avoid returning + // garbage from subsequent calls. + if (etext_ != ebuf_ && nread == 0) { + memset(etext_, 0, ebuf_ - etext_); + } + *etext_ = '\n'; // sentinel; safe because ibuf extends 1 char beyond ebuf + nextline_ = static_cast<char *>(memchr (stext_, '\n', etext_ + 1 - stext_)); + } + *nextline_ = 0; // turn newline into nul + nextline_ += ((nextline_ < etext_)? 1 : 0); // skip nul if not end of text + // stext_ now points at a nul-terminated line + uint64 tmpstart, tmpend, tmpoffset; + int64 tmpinode; + int major, minor; + unsigned filename_offset = 0; +#if defined(__linux__) + // for now, assume all linuxes have the same format + if (sscanf(stext_, "%"SCNx64"-%"SCNx64" %4s %"SCNx64" %x:%x %"SCNd64" %n", + start ? start : &tmpstart, + end ? end : &tmpend, + flags_, + offset ? offset : &tmpoffset, + &major, &minor, + inode ? inode : &tmpinode, &filename_offset) != 7) continue; +#elif defined(__CYGWIN__) || defined(__CYGWIN32__) + // cygwin is like linux, except the third field is the "entry point" + // rather than the offset (see format_process_maps at + // http://cygwin.com/cgi-bin/cvsweb.cgi/src/winsup/cygwin/fhandler_process.cc?rev=1.89&content-type=text/x-cvsweb-markup&cvsroot=src + // Offset is always be 0 on cygwin: cygwin implements an mmap + // by loading the whole file and then calling NtMapViewOfSection. + // Cygwin also seems to set its flags kinda randomly; use windows default. + char tmpflags[5]; + if (offset) + *offset = 0; + strcpy(flags_, "r-xp"); + if (sscanf(stext_, "%llx-%llx %4s %llx %x:%x %lld %n", + start ? start : &tmpstart, + end ? end : &tmpend, + tmpflags, + &tmpoffset, + &major, &minor, + inode ? inode : &tmpinode, &filename_offset) != 7) continue; +#elif defined(__FreeBSD__) + // For the format, see http://www.freebsd.org/cgi/cvsweb.cgi/src/sys/fs/procfs/procfs_map.c?rev=1.31&content-type=text/x-cvsweb-markup + tmpstart = tmpend = tmpoffset = 0; + tmpinode = 0; + major = minor = 0; // can't get this info in freebsd + if (inode) + *inode = 0; // nor this + if (offset) + *offset = 0; // seems like this should be in there, but maybe not + // start end resident privateresident obj(?) prot refcnt shadowcnt + // flags copy_on_write needs_copy type filename: + // 0x8048000 0x804a000 2 0 0xc104ce70 r-x 1 0 0x0 COW NC vnode /bin/cat + if (sscanf(stext_, "0x%"SCNx64" 0x%"SCNx64" %*d %*d %*p %3s %*d %*d 0x%*x %*s %*s %*s %n", + start ? start : &tmpstart, + end ? end : &tmpend, + flags_, + &filename_offset) != 3) continue; +#endif + + // Depending on the Linux kernel being used, there may or may not be a space + // after the inode if there is no filename. sscanf will in such situations + // nondeterministically either fill in filename_offset or not (the results + // differ on multiple calls in the same run even with identical arguments). + // We don't want to wander off somewhere beyond the end of the string. + size_t stext_length = strlen(stext_); + if (filename_offset == 0 || filename_offset > stext_length) + filename_offset = stext_length; + + // We found an entry + if (flags) *flags = flags_; + if (filename) *filename = stext_ + filename_offset; + if (dev) *dev = minor | (major << 8); + + if (using_maps_backing_) { + // Extract and parse physical page backing info. + char *backing_ptr = stext_ + filename_offset + + strlen(stext_+filename_offset); + + // find the second '(' + int paren_count = 0; + while (--backing_ptr > stext_) { + if (*backing_ptr == '(') { + ++paren_count; + if (paren_count >= 2) { + uint64 tmp_file_mapping; + uint64 tmp_file_pages; + uint64 tmp_anon_mapping; + uint64 tmp_anon_pages; + + sscanf(backing_ptr+1, "F %"SCNx64" %"SCNd64") (A %"SCNx64" %"SCNd64")", + file_mapping ? file_mapping : &tmp_file_mapping, + file_pages ? file_pages : &tmp_file_pages, + anon_mapping ? anon_mapping : &tmp_anon_mapping, + anon_pages ? anon_pages : &tmp_anon_pages); + // null terminate the file name (there is a space + // before the first (. + backing_ptr[-1] = 0; + break; + } + } + } + } + + return true; + } while (etext_ > ibuf_); +#elif defined(__sun__) + // This is based on MA_READ == 4, MA_WRITE == 2, MA_EXEC == 1 + static char kPerms[8][4] = { "---", "--x", "-w-", "-wx", + "r--", "r-x", "rw-", "rwx" }; + COMPILE_ASSERT(MA_READ == 4, solaris_ma_read_must_equal_4); + COMPILE_ASSERT(MA_WRITE == 2, solaris_ma_write_must_equal_2); + COMPILE_ASSERT(MA_EXEC == 1, solaris_ma_exec_must_equal_1); + Buffer object_path; + int nread = 0; // fill up buffer with text + NO_INTR(nread = read(fd_, ibuf_, sizeof(prmap_t))); + if (nread == sizeof(prmap_t)) { + long inode_from_mapname = 0; + prmap_t* mapinfo = reinterpret_cast<prmap_t*>(ibuf_); + // Best-effort attempt to get the inode from the filename. I think the + // two middle ints are major and minor device numbers, but I'm not sure. + sscanf(mapinfo->pr_mapname, "ufs.%*d.%*d.%ld", &inode_from_mapname); + + if (pid_ == 0) { + CHECK_LT(snprintf(object_path.buf_, Buffer::kBufSize, + "/proc/self/path/%s", mapinfo->pr_mapname), + Buffer::kBufSize); + } else { + CHECK_LT(snprintf(object_path.buf_, Buffer::kBufSize, + "/proc/%d/path/%s", pid_, mapinfo->pr_mapname), + Buffer::kBufSize); + } + ssize_t len = readlink(object_path.buf_, current_filename_, PATH_MAX); + CHECK_LT(len, PATH_MAX); + if (len < 0) + len = 0; + current_filename_[len] = '\0'; + + if (start) *start = mapinfo->pr_vaddr; + if (end) *end = mapinfo->pr_vaddr + mapinfo->pr_size; + if (flags) *flags = kPerms[mapinfo->pr_mflags & 7]; + if (offset) *offset = mapinfo->pr_offset; + if (inode) *inode = inode_from_mapname; + if (filename) *filename = current_filename_; + if (file_mapping) *file_mapping = 0; + if (file_pages) *file_pages = 0; + if (anon_mapping) *anon_mapping = 0; + if (anon_pages) *anon_pages = 0; + if (dev) *dev = 0; + return true; + } +#elif defined(__MACH__) + // We return a separate entry for each segment in the DLL. (TODO(csilvers): + // can we do better?) A DLL ("image") has load-commands, some of which + // talk about segment boundaries. + // cf image_for_address from http://svn.digium.com/view/asterisk/team/oej/minivoicemail/dlfcn.c?revision=53912 + for (; current_image_ >= 0; current_image_--) { + const mach_header* hdr = _dyld_get_image_header(current_image_); + if (!hdr) continue; + if (current_load_cmd_ < 0) // set up for this image + current_load_cmd_ = hdr->ncmds; // again, go from the top down + + // We start with the next load command (we've already looked at this one). + for (current_load_cmd_--; current_load_cmd_ >= 0; current_load_cmd_--) { +#ifdef MH_MAGIC_64 + if (NextExtMachHelper<MH_MAGIC_64, LC_SEGMENT_64, + struct mach_header_64, struct segment_command_64>( + hdr, current_image_, current_load_cmd_, + start, end, flags, offset, inode, filename, + file_mapping, file_pages, anon_mapping, + anon_pages, dev)) { + return true; + } +#endif + if (NextExtMachHelper<MH_MAGIC, LC_SEGMENT, + struct mach_header, struct segment_command>( + hdr, current_image_, current_load_cmd_, + start, end, flags, offset, inode, filename, + file_mapping, file_pages, anon_mapping, + anon_pages, dev)) { + return true; + } + } + // If we get here, no more load_cmd's in this image talk about + // segments. Go on to the next image. + } +#elif defined(PLATFORM_WINDOWS) + static char kDefaultPerms[5] = "r-xp"; + BOOL ok; + if (module_.dwSize == 0) { // only possible before first call + module_.dwSize = sizeof(module_); + ok = Module32First(snapshot_, &module_); + } else { + ok = Module32Next(snapshot_, &module_); + } + if (ok) { + uint64 base_addr = reinterpret_cast<DWORD_PTR>(module_.modBaseAddr); + if (start) *start = base_addr; + if (end) *end = base_addr + module_.modBaseSize; + if (flags) *flags = kDefaultPerms; + if (offset) *offset = 0; + if (inode) *inode = 0; + if (filename) *filename = module_.szExePath; + if (file_mapping) *file_mapping = 0; + if (file_pages) *file_pages = 0; + if (anon_mapping) *anon_mapping = 0; + if (anon_pages) *anon_pages = 0; + if (dev) *dev = 0; + return true; + } +#endif + + // We didn't find anything + return false; +} + +int ProcMapsIterator::FormatLine(char* buffer, int bufsize, + uint64 start, uint64 end, const char *flags, + uint64 offset, int64 inode, + const char *filename, dev_t dev) { + // We assume 'flags' looks like 'rwxp' or 'rwx'. + char r = (flags && flags[0] == 'r') ? 'r' : '-'; + char w = (flags && flags[0] && flags[1] == 'w') ? 'w' : '-'; + char x = (flags && flags[0] && flags[1] && flags[2] == 'x') ? 'x' : '-'; + // p always seems set on linux, so we set the default to 'p', not '-' + char p = (flags && flags[0] && flags[1] && flags[2] && flags[3] != 'p') + ? '-' : 'p'; + + const int rc = snprintf(buffer, bufsize, + "%08"PRIx64"-%08"PRIx64" %c%c%c%c %08"PRIx64" %02x:%02x %-11"PRId64" %s\n", + start, end, r,w,x,p, offset, + static_cast<int>(dev/256), static_cast<int>(dev%256), + inode, filename); + return (rc < 0 || rc >= bufsize) ? 0 : rc; +} + +namespace tcmalloc { + +// Helper to add the list of mapped shared libraries to a profile. +// Fill formatted "/proc/self/maps" contents into buffer 'buf' of size 'size' +// and return the actual size occupied in 'buf'. +// We do not provision for 0-terminating 'buf'. +int FillProcSelfMaps(char buf[], int size) { + ProcMapsIterator::Buffer iterbuf; + ProcMapsIterator it(0, &iterbuf); // 0 means "current pid" + + uint64 start, end, offset; + int64 inode; + char *flags, *filename; + int bytes_written = 0; + while (it.Next(&start, &end, &flags, &offset, &inode, &filename)) { + bytes_written += it.FormatLine(buf + bytes_written, size - bytes_written, + start, end, flags, offset, inode, filename, + 0); + } + return bytes_written; +} + +// Dump the same data as FillProcSelfMaps reads to fd. +// It seems easier to repeat parts of FillProcSelfMaps here than to +// reuse it via a call. +void DumpProcSelfMaps(RawFD fd) { + ProcMapsIterator::Buffer iterbuf; + ProcMapsIterator it(0, &iterbuf); // 0 means "current pid" + + uint64 start, end, offset; + int64 inode; + char *flags, *filename; + ProcMapsIterator::Buffer linebuf; + while (it.Next(&start, &end, &flags, &offset, &inode, &filename)) { + int written = it.FormatLine(linebuf.buf_, sizeof(linebuf.buf_), + start, end, flags, offset, inode, filename, + 0); + RawWrite(fd, linebuf.buf_, written); + } +} + +} // namespace tcmalloc diff --git a/third_party/tcmalloc/chromium/src/base/sysinfo.h b/third_party/tcmalloc/chromium/src/base/sysinfo.h new file mode 100644 index 0000000..0bcc1f5 --- /dev/null +++ b/third_party/tcmalloc/chromium/src/base/sysinfo.h @@ -0,0 +1,233 @@ +// Copyright (c) 2006, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// All functions here are thread-hostile due to file caching unless +// commented otherwise. + +#ifndef _SYSINFO_H_ +#define _SYSINFO_H_ + +#include <config.h> + +#include <time.h> +#if (defined(_WIN32) || defined(__MINGW32__)) && (!defined(__CYGWIN__) && !defined(__CYGWIN32__)) +#include <windows.h> // for DWORD +#include <TlHelp32.h> // for CreateToolhelp32Snapshot +#endif +#ifdef HAVE_UNISTD_H +#include <unistd.h> // for pid_t +#endif +#include <stddef.h> // for size_t +#include <limits.h> // for PATH_MAX +#include "base/basictypes.h" +#include "base/logging.h" // for RawFD + +// This getenv function is safe to call before the C runtime is initialized. +// On Windows, it utilizes GetEnvironmentVariable() and on unix it uses +// /proc/self/environ instead calling getenv(). It's intended to be used in +// routines that run before main(), when the state required for getenv() may +// not be set up yet. In particular, errno isn't set up until relatively late +// (after the pthreads library has a chance to make it threadsafe), and +// getenv() doesn't work until then. +// On some platforms, this call will utilize the same, static buffer for +// repeated GetenvBeforeMain() calls. Callers should not expect pointers from +// this routine to be long lived. +// Note that on unix, /proc only has the environment at the time the +// application was started, so this routine ignores setenv() calls/etc. Also +// note it only reads the first 16K of the environment. +extern const char* GetenvBeforeMain(const char* name); + +// This takes as an argument an environment-variable name (like +// CPUPROFILE) whose value is supposed to be a file-path, and sets +// path to that path, and returns true. Non-trivial for surprising +// reasons, as documented in sysinfo.cc. path must have space PATH_MAX. +extern bool GetUniquePathFromEnv(const char* env_name, char* path); + +extern int NumCPUs(); + +// processor cycles per second of each processor. Thread-safe. +extern double CyclesPerSecond(void); + + +// Return true if we're running POSIX (e.g., NPTL on Linux) threads, +// as opposed to a non-POSIX thread libary. The thing that we care +// about is whether a thread's pid is the same as the thread that +// spawned it. If so, this function returns true. +// Thread-safe. +// Note: We consider false negatives to be OK. +bool HasPosixThreads(); + +#ifndef SWIG // SWIG doesn't like struct Buffer and variable arguments. + +// A ProcMapsIterator abstracts access to /proc/maps for a given +// process. Needs to be stack-allocatable and avoid using stdio/malloc +// so it can be used in the google stack dumper, heap-profiler, etc. +// +// On Windows and Mac OS X, this iterator iterates *only* over DLLs +// mapped into this process space. For Linux, FreeBSD, and Solaris, +// it iterates over *all* mapped memory regions, including anonymous +// mmaps. For other O/Ss, it is unlikely to work at all, and Valid() +// will always return false. Also note: this routine only works on +// FreeBSD if procfs is mounted: make sure this is in your /etc/fstab: +// proc /proc procfs rw 0 0 +class ProcMapsIterator { + public: + struct Buffer { +#ifdef __FreeBSD__ + // FreeBSD requires us to read all of the maps file at once, so + // we have to make a buffer that's "always" big enough + static const size_t kBufSize = 102400; +#else // a one-line buffer is good enough + static const size_t kBufSize = PATH_MAX + 1024; +#endif + char buf_[kBufSize]; + }; + + + // Create a new iterator for the specified pid. pid can be 0 for "self". + explicit ProcMapsIterator(pid_t pid); + + // Create an iterator with specified storage (for use in signal + // handler). "buffer" should point to a ProcMapsIterator::Buffer + // buffer can be NULL in which case a bufer will be allocated. + ProcMapsIterator(pid_t pid, Buffer *buffer); + + // Iterate through maps_backing instead of maps if use_maps_backing + // is true. Otherwise the same as above. buffer can be NULL and + // it will allocate a buffer itself. + ProcMapsIterator(pid_t pid, Buffer *buffer, + bool use_maps_backing); + + // Returns true if the iterator successfully initialized; + bool Valid() const; + + // Returns a pointer to the most recently parsed line. Only valid + // after Next() returns true, and until the iterator is destroyed or + // Next() is called again. This may give strange results on non-Linux + // systems. Prefer FormatLine() if that may be a concern. + const char *CurrentLine() const { return stext_; } + + // Writes the "canonical" form of the /proc/xxx/maps info for a single + // line to the passed-in buffer. Returns the number of bytes written, + // or 0 if it was not able to write the complete line. (To guarantee + // success, buffer should have size at least Buffer::kBufSize.) + // Takes as arguments values set via a call to Next(). The + // "canonical" form of the line (taken from linux's /proc/xxx/maps): + // <start_addr(hex)>-<end_addr(hex)> <perms(rwxp)> <offset(hex)> + + // <major_dev(hex)>:<minor_dev(hex)> <inode> <filename> Note: the + // eg + // 08048000-0804c000 r-xp 00000000 03:01 3793678 /bin/cat + // If you don't have the dev_t (dev), feel free to pass in 0. + // (Next() doesn't return a dev_t, though NextExt does.) + // + // Note: if filename and flags were obtained via a call to Next(), + // then the output of this function is only valid if Next() returned + // true, and only until the iterator is destroyed or Next() is + // called again. (Since filename, at least, points into CurrentLine.) + static int FormatLine(char* buffer, int bufsize, + uint64 start, uint64 end, const char *flags, + uint64 offset, int64 inode, const char *filename, + dev_t dev); + + // Find the next entry in /proc/maps; return true if found or false + // if at the end of the file. + // + // Any of the result pointers can be NULL if you're not interested + // in those values. + // + // If "flags" and "filename" are passed, they end up pointing to + // storage within the ProcMapsIterator that is valid only until the + // iterator is destroyed or Next() is called again. The caller may + // modify the contents of these strings (up as far as the first NUL, + // and only until the subsequent call to Next()) if desired. + + // The offsets are all uint64 in order to handle the case of a + // 32-bit process running on a 64-bit kernel + // + // IMPORTANT NOTE: see top-of-class notes for details about what + // mapped regions Next() iterates over, depending on O/S. + // TODO(csilvers): make flags and filename const. + bool Next(uint64 *start, uint64 *end, char **flags, + uint64 *offset, int64 *inode, char **filename); + + bool NextExt(uint64 *start, uint64 *end, char **flags, + uint64 *offset, int64 *inode, char **filename, + uint64 *file_mapping, uint64 *file_pages, + uint64 *anon_mapping, uint64 *anon_pages, + dev_t *dev); + + ~ProcMapsIterator(); + + private: + void Init(pid_t pid, Buffer *buffer, bool use_maps_backing); + + char *ibuf_; // input buffer + char *stext_; // start of text + char *etext_; // end of text + char *nextline_; // start of next line + char *ebuf_; // end of buffer (1 char for a nul) +#if (defined(_WIN32) || defined(__MINGW32__)) && (!defined(__CYGWIN__) && !defined(__CYGWIN32__)) + HANDLE snapshot_; // filehandle on dll info + // In a change from the usual W-A pattern, there is no A variant of + // MODULEENTRY32. Tlhelp32.h #defines the W variant, but not the A. + // We want the original A variants, and this #undef is the only + // way I see to get them. Redefining it when we're done prevents us + // from affecting other .cc files. +# ifdef MODULEENTRY32 // Alias of W +# undef MODULEENTRY32 + MODULEENTRY32 module_; // info about current dll (and dll iterator) +# define MODULEENTRY32 MODULEENTRY32W +# else // It's the ascii, the one we want. + MODULEENTRY32 module_; // info about current dll (and dll iterator) +# endif +#elif defined(__MACH__) + int current_image_; // dll's are called "images" in macos parlance + int current_load_cmd_; // the segment of this dll we're examining +#elif defined(__sun__) // Solaris + int fd_; + char current_filename_[PATH_MAX]; +#else + int fd_; // filehandle on /proc/*/maps +#endif + pid_t pid_; + char flags_[10]; + Buffer* dynamic_buffer_; // dynamically-allocated Buffer + bool using_maps_backing_; // true if we are looking at maps_backing instead of maps. +}; + +#endif /* #ifndef SWIG */ + +// Helper routines + +namespace tcmalloc { +int FillProcSelfMaps(char buf[], int size); +void DumpProcSelfMaps(RawFD fd); +} + +#endif /* #ifndef _SYSINFO_H_ */ diff --git a/third_party/tcmalloc/chromium/src/base/thread_annotations.h b/third_party/tcmalloc/chromium/src/base/thread_annotations.h new file mode 100644 index 0000000..ded13d6 --- /dev/null +++ b/third_party/tcmalloc/chromium/src/base/thread_annotations.h @@ -0,0 +1,152 @@ +// Copyright (c) 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Le-Chun Wu +// +// This header file contains the macro definitions for thread safety +// annotations that allow the developers to document the locking policies +// of their multi-threaded code. The annotations can also help program +// analysis tools to identify potential thread safety issues. +// +// The annotations are implemented using GCC's "attributes" extension. +// Using the macros defined here instead of the raw GCC attributes allows +// for portability and future compatibility. +// +// This functionality is not yet fully implemented in perftools, +// but may be one day. + +#ifndef BASE_THREAD_ANNOTATIONS_H_ +#define BASE_THREAD_ANNOTATIONS_H_ + +#if defined(__GNUC__) && defined(__SUPPORT_TS_ANNOTATION__) && (!defined(SWIG)) + +// Document if a shared variable/field needs to be protected by a lock. +// GUARDED_BY allows the user to specify a particular lock that should be +// held when accessing the annotated variable, while GUARDED_VAR only +// indicates a shared variable should be guarded (by any lock). GUARDED_VAR +// is primarily used when the client cannot express the name of the lock. +#define GUARDED_BY(x) __attribute__ ((guarded_by(x))) +#define GUARDED_VAR __attribute__ ((guarded)) + +// Document if the memory location pointed to by a pointer should be guarded +// by a lock when dereferencing the pointer. Similar to GUARDED_VAR, +// PT_GUARDED_VAR is primarily used when the client cannot express the name +// of the lock. Note that a pointer variable to a shared memory location +// could itself be a shared variable. For example, if a shared global pointer +// q, which is guarded by mu1, points to a shared memory location that is +// guarded by mu2, q should be annotated as follows: +// int *q GUARDED_BY(mu1) PT_GUARDED_BY(mu2); +#define PT_GUARDED_BY(x) __attribute__ ((point_to_guarded_by(x))) +#define PT_GUARDED_VAR __attribute__ ((point_to_guarded)) + +// Document the acquisition order between locks that can be held +// simultaneously by a thread. For any two locks that need to be annotated +// to establish an acquisition order, only one of them needs the annotation. +// (i.e. You don't have to annotate both locks with both ACQUIRED_AFTER +// and ACQUIRED_BEFORE.) +#define ACQUIRED_AFTER(...) __attribute__ ((acquired_after(__VA_ARGS__))) +#define ACQUIRED_BEFORE(...) __attribute__ ((acquired_before(__VA_ARGS__))) + +// The following three annotations document the lock requirements for +// functions/methods. + +// Document if a function expects certain locks to be held before it is called +#define EXCLUSIVE_LOCKS_REQUIRED(...) \ + __attribute__ ((exclusive_locks_required(__VA_ARGS__))) + +#define SHARED_LOCKS_REQUIRED(...) \ + __attribute__ ((shared_locks_required(__VA_ARGS__))) + +// Document the locks acquired in the body of the function. These locks +// cannot be held when calling this function (as google3's Mutex locks are +// non-reentrant). +#define LOCKS_EXCLUDED(...) __attribute__ ((locks_excluded(__VA_ARGS__))) + +// Document the lock the annotated function returns without acquiring it. +#define LOCK_RETURNED(x) __attribute__ ((lock_returned(x))) + +// Document if a class/type is a lockable type (such as the Mutex class). +#define LOCKABLE __attribute__ ((lockable)) + +// Document if a class is a scoped lockable type (such as the MutexLock class). +#define SCOPED_LOCKABLE __attribute__ ((scoped_lockable)) + +// The following annotations specify lock and unlock primitives. +#define EXCLUSIVE_LOCK_FUNCTION(...) \ + __attribute__ ((exclusive_lock(__VA_ARGS__))) + +#define SHARED_LOCK_FUNCTION(...) \ + __attribute__ ((shared_lock(__VA_ARGS__))) + +#define EXCLUSIVE_TRYLOCK_FUNCTION(...) \ + __attribute__ ((exclusive_trylock(__VA_ARGS__))) + +#define SHARED_TRYLOCK_FUNCTION(...) \ + __attribute__ ((shared_trylock(__VA_ARGS__))) + +#define UNLOCK_FUNCTION(...) __attribute__ ((unlock(__VA_ARGS__))) + +// An escape hatch for thread safety analysis to ignore the annotated function. +#define NO_THREAD_SAFETY_ANALYSIS __attribute__ ((no_thread_safety_analysis)) + + +#else + +// When the compiler is not GCC, these annotations are simply no-ops. + +// NOTE: in theory, the macros that take "arg" below *could* take +// multiple arguments, but in practice so far they only take one. +// Since not all non-gcc compilers support ... -- notably MSVC 7.1 -- +// I just hard-code in a single arg. If this assumption ever breaks, +// we can change it back to "...", or handle it some other way. + +#define GUARDED_BY(x) // no-op +#define GUARDED_VAR // no-op +#define PT_GUARDED_BY(x) // no-op +#define PT_GUARDED_VAR // no-op +#define ACQUIRED_AFTER(arg) // no-op +#define ACQUIRED_BEFORE(arg) // no-op +#define EXCLUSIVE_LOCKS_REQUIRED(arg) // no-op +#define SHARED_LOCKS_REQUIRED(arg) // no-op +#define LOCKS_EXCLUDED(arg) // no-op +#define LOCK_RETURNED(x) // no-op +#define LOCKABLE // no-op +#define SCOPED_LOCKABLE // no-op +#define EXCLUSIVE_LOCK_FUNCTION(arg) // no-op +#define SHARED_LOCK_FUNCTION(arg) // no-op +#define EXCLUSIVE_TRYLOCK_FUNCTION(arg) // no-op +#define SHARED_TRYLOCK_FUNCTION(arg) // no-op +#define UNLOCK_FUNCTION(arg) // no-op +#define NO_THREAD_SAFETY_ANALYSIS // no-op + +#endif // defined(__GNUC__) && defined(__SUPPORT_TS_ANNOTATION__) + // && !defined(SWIG) + +#endif // BASE_THREAD_ANNOTATIONS_H_ diff --git a/third_party/tcmalloc/chromium/src/base/thread_lister.c b/third_party/tcmalloc/chromium/src/base/thread_lister.c new file mode 100644 index 0000000..bc180db --- /dev/null +++ b/third_party/tcmalloc/chromium/src/base/thread_lister.c @@ -0,0 +1,77 @@ +/* Copyright (c) 2005-2007, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * Author: Markus Gutschke + */ + +#include "config.h" +#include <stdio.h> /* needed for NULL on some powerpc platforms (?!) */ +#ifdef HAVE_SYS_PRCTL +# include <sys/prctl.h> +#endif +#include "base/thread_lister.h" +#include "base/linuxthreads.h" +/* Include other thread listers here that define THREADS macro + * only when they can provide a good implementation. + */ + +#ifndef THREADS + +/* Default trivial thread lister for single-threaded applications, + * or if the multi-threading code has not been ported, yet. + */ + +int ListAllProcessThreads(void *parameter, + ListAllProcessThreadsCallBack callback, ...) { + int rc; + va_list ap; + pid_t pid; + +#ifdef HAVE_SYS_PRCTL + int dumpable = prctl(PR_GET_DUMPABLE, 0); + if (!dumpable) + prctl(PR_SET_DUMPABLE, 1); +#endif + va_start(ap, callback); + pid = getpid(); + rc = callback(parameter, 1, &pid, ap); + va_end(ap); +#ifdef HAVE_SYS_PRCTL + if (!dumpable) + prctl(PR_SET_DUMPABLE, 0); +#endif + return rc; +} + +int ResumeAllProcessThreads(int num_threads, pid_t *thread_pids) { + return 1; +} + +#endif /* ifndef THREADS */ diff --git a/third_party/tcmalloc/chromium/src/base/thread_lister.h b/third_party/tcmalloc/chromium/src/base/thread_lister.h new file mode 100644 index 0000000..6afe4dd --- /dev/null +++ b/third_party/tcmalloc/chromium/src/base/thread_lister.h @@ -0,0 +1,82 @@ +/* Copyright (c) 2005-2007, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * Author: Markus Gutschke + */ + +#ifndef _THREAD_LISTER_H +#define _THREAD_LISTER_H + +#include <stdarg.h> +#include <sys/types.h> + +#ifdef __cplusplus +extern "C" { +#endif + +typedef int (*ListAllProcessThreadsCallBack)(void *parameter, + int num_threads, + pid_t *thread_pids, + va_list ap); + +/* This function gets the list of all linux threads of the current process + * passes them to the 'callback' along with the 'parameter' pointer; at the + * call back call time all the threads are paused via + * PTRACE_ATTACH. + * The callback is executed from a separate thread which shares only the + * address space, the filesystem, and the filehandles with the caller. Most + * notably, it does not share the same pid and ppid; and if it terminates, + * the rest of the application is still there. 'callback' is supposed to do + * or arrange for ResumeAllProcessThreads. This happens automatically, if + * the thread raises a synchronous signal (e.g. SIGSEGV); asynchronous + * signals are blocked. If the 'callback' decides to unblock them, it must + * ensure that they cannot terminate the application, or that + * ResumeAllProcessThreads will get called. + * It is an error for the 'callback' to make any library calls that could + * acquire locks. Most notably, this means that most system calls have to + * avoid going through libc. Also, this means that it is not legal to call + * exit() or abort(). + * We return -1 on error and the return value of 'callback' on success. + */ +int ListAllProcessThreads(void *parameter, + ListAllProcessThreadsCallBack callback, ...); + +/* This function resumes the list of all linux threads that + * ListAllProcessThreads pauses before giving to its callback. + * The function returns non-zero if at least one thread was + * suspended and has now been resumed. + */ +int ResumeAllProcessThreads(int num_threads, pid_t *thread_pids); + +#ifdef __cplusplus +} +#endif + +#endif /* _THREAD_LISTER_H */ diff --git a/third_party/tcmalloc/chromium/src/base/vdso_support.cc b/third_party/tcmalloc/chromium/src/base/vdso_support.cc new file mode 100644 index 0000000..ddaca37 --- /dev/null +++ b/third_party/tcmalloc/chromium/src/base/vdso_support.cc @@ -0,0 +1,569 @@ +// Copyright (c) 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Paul Pluzhnikov +// +// Allow dynamic symbol lookup in the kernel VDSO page. +// +// VDSOSupport -- a class representing kernel VDSO (if present). +// + +#include "base/vdso_support.h" + +#ifdef HAVE_VDSO_SUPPORT // defined in vdso_support.h + +#include <fcntl.h> + +#include "base/atomicops.h" // for MemoryBarrier +#include "base/logging.h" +#include "base/linux_syscall_support.h" +#include "base/dynamic_annotations.h" +#include "base/basictypes.h" // for COMPILE_ASSERT + +using base::subtle::MemoryBarrier; + +#ifndef AT_SYSINFO_EHDR +#define AT_SYSINFO_EHDR 33 +#endif + +// From binutils/include/elf/common.h (this doesn't appear to be documented +// anywhere else). +// +// /* This flag appears in a Versym structure. It means that the symbol +// is hidden, and is only visible with an explicit version number. +// This is a GNU extension. */ +// #define VERSYM_HIDDEN 0x8000 +// +// /* This is the mask for the rest of the Versym information. */ +// #define VERSYM_VERSION 0x7fff + +#define VERSYM_VERSION 0x7fff + +namespace base { + +namespace { +template <int N> class ElfClass { + public: + static const int kElfClass = -1; + static int ElfBind(const ElfW(Sym) *) { + CHECK(false); // << "Unexpected word size"; + return 0; + } + static int ElfType(const ElfW(Sym) *) { + CHECK(false); // << "Unexpected word size"; + return 0; + } +}; + +template <> class ElfClass<32> { + public: + static const int kElfClass = ELFCLASS32; + static int ElfBind(const ElfW(Sym) *symbol) { + return ELF32_ST_BIND(symbol->st_info); + } + static int ElfType(const ElfW(Sym) *symbol) { + return ELF32_ST_TYPE(symbol->st_info); + } +}; + +template <> class ElfClass<64> { + public: + static const int kElfClass = ELFCLASS64; + static int ElfBind(const ElfW(Sym) *symbol) { + return ELF64_ST_BIND(symbol->st_info); + } + static int ElfType(const ElfW(Sym) *symbol) { + return ELF64_ST_TYPE(symbol->st_info); + } +}; + +typedef ElfClass<__WORDSIZE> CurrentElfClass; + +// Extract an element from one of the ELF tables, cast it to desired type. +// This is just a simple arithmetic and a glorified cast. +// Callers are responsible for bounds checking. +template <class T> +const T* GetTableElement(const ElfW(Ehdr) *ehdr, + ElfW(Off) table_offset, + ElfW(Word) element_size, + size_t index) { + return reinterpret_cast<const T*>(reinterpret_cast<const char *>(ehdr) + + table_offset + + index * element_size); +} +} // namespace + +const void *const VDSOSupport::kInvalidBase = + reinterpret_cast<const void *>(~0L); + +const void *VDSOSupport::vdso_base_ = kInvalidBase; +VDSOSupport::GetCpuFn VDSOSupport::getcpu_fn_ = &InitAndGetCPU; + +VDSOSupport::ElfMemImage::ElfMemImage(const void *base) { + CHECK(base != kInvalidBase); + Init(base); +} + +int VDSOSupport::ElfMemImage::GetNumSymbols() const { + if (!hash_) { + return 0; + } + // See http://www.caldera.com/developers/gabi/latest/ch5.dynamic.html#hash + return hash_[1]; +} + +const ElfW(Sym) *VDSOSupport::ElfMemImage::GetDynsym(int index) const { + CHECK_LT(index, GetNumSymbols()); + return dynsym_ + index; +} + +const ElfW(Versym) *VDSOSupport::ElfMemImage::GetVersym(int index) const { + CHECK_LT(index, GetNumSymbols()); + return versym_ + index; +} + +const ElfW(Phdr) *VDSOSupport::ElfMemImage::GetPhdr(int index) const { + CHECK_LT(index, ehdr_->e_phnum); + return GetTableElement<ElfW(Phdr)>(ehdr_, + ehdr_->e_phoff, + ehdr_->e_phentsize, + index); +} + +const char *VDSOSupport::ElfMemImage::GetDynstr(ElfW(Word) offset) const { + CHECK_LT(offset, strsize_); + return dynstr_ + offset; +} + +const void *VDSOSupport::ElfMemImage::GetSymAddr(const ElfW(Sym) *sym) const { + if (sym->st_shndx == SHN_UNDEF || sym->st_shndx >= SHN_LORESERVE) { + // Symbol corresponds to "special" (e.g. SHN_ABS) section. + return reinterpret_cast<const void *>(sym->st_value); + } + CHECK_LT(link_base_, sym->st_value); + return GetTableElement<char>(ehdr_, 0, 1, sym->st_value) - link_base_; +} + +const ElfW(Verdef) *VDSOSupport::ElfMemImage::GetVerdef(int index) const { + CHECK_LE(index, verdefnum_); + const ElfW(Verdef) *version_definition = verdef_; + while (version_definition->vd_ndx < index && version_definition->vd_next) { + const char *const version_definition_as_char = + reinterpret_cast<const char *>(version_definition); + version_definition = + reinterpret_cast<const ElfW(Verdef) *>(version_definition_as_char + + version_definition->vd_next); + } + return version_definition->vd_ndx == index ? version_definition : NULL; +} + +const ElfW(Verdaux) *VDSOSupport::ElfMemImage::GetVerdefAux( + const ElfW(Verdef) *verdef) const { + return reinterpret_cast<const ElfW(Verdaux) *>(verdef+1); +} + +const char *VDSOSupport::ElfMemImage::GetVerstr(ElfW(Word) offset) const { + CHECK_LT(offset, strsize_); + return dynstr_ + offset; +} + +void VDSOSupport::ElfMemImage::Init(const void *base) { + ehdr_ = NULL; + dynsym_ = NULL; + dynstr_ = NULL; + versym_ = NULL; + verdef_ = NULL; + hash_ = NULL; + strsize_ = 0; + verdefnum_ = 0; + link_base_ = ~0L; // Sentinel: PT_LOAD .p_vaddr can't possibly be this. + if (!base) { + return; + } + const char *const base_as_char = reinterpret_cast<const char *>(base); + if (base_as_char[EI_MAG0] != ELFMAG0 || base_as_char[EI_MAG1] != ELFMAG1 || + base_as_char[EI_MAG2] != ELFMAG2 || base_as_char[EI_MAG3] != ELFMAG3) { + RAW_DCHECK(false, "no ELF magic"); // at %p", base); + return; + } + int elf_class = base_as_char[EI_CLASS]; + if (elf_class != CurrentElfClass::kElfClass) { + DCHECK_EQ(elf_class, CurrentElfClass::kElfClass); + return; + } + switch (base_as_char[EI_DATA]) { + case ELFDATA2LSB: { + if (__LITTLE_ENDIAN != __BYTE_ORDER) { + DCHECK_EQ(__LITTLE_ENDIAN, __BYTE_ORDER); // << ": wrong byte order"; + return; + } + break; + } + case ELFDATA2MSB: { + if (__BIG_ENDIAN != __BYTE_ORDER) { + DCHECK_EQ(__BIG_ENDIAN, __BYTE_ORDER); // << ": wrong byte order"; + return; + } + break; + } + default: { + RAW_DCHECK(false, "unexpected data encoding"); // << base_as_char[EI_DATA]; + return; + } + } + + ehdr_ = reinterpret_cast<const ElfW(Ehdr) *>(base); + const ElfW(Phdr) *dynamic_program_header = NULL; + for (int i = 0; i < ehdr_->e_phnum; ++i) { + const ElfW(Phdr) *const program_header = GetPhdr(i); + switch (program_header->p_type) { + case PT_LOAD: + if (link_base_ == ~0L) { + link_base_ = program_header->p_vaddr; + } + break; + case PT_DYNAMIC: + dynamic_program_header = program_header; + break; + } + } + if (link_base_ == ~0L || !dynamic_program_header) { + RAW_DCHECK(~0L != link_base_, "no PT_LOADs in VDSO"); + RAW_DCHECK(dynamic_program_header, "no PT_DYNAMIC in VDSO"); + // Mark this image as not present. Can not recur infinitely. + Init(0); + return; + } + ptrdiff_t relocation = + base_as_char - reinterpret_cast<const char *>(link_base_); + ElfW(Dyn) *dynamic_entry = + reinterpret_cast<ElfW(Dyn) *>(dynamic_program_header->p_vaddr + + relocation); + bool fake_vdso = false; // Assume we are dealing with the real VDSO. + for (ElfW(Dyn) *de = dynamic_entry; de->d_tag != DT_NULL; ++de) { + ElfW(Sxword) tag = de->d_tag; + if (tag == DT_PLTGOT || tag == DT_RELA || tag == DT_JMPREL || + tag == DT_NEEDED || tag == DT_RPATH || tag == DT_VERNEED || + tag == DT_INIT || tag == DT_FINI) { + /* Real vdso can not reasonably have any of the above entries. */ + fake_vdso = true; + break; + } + } + for (; dynamic_entry->d_tag != DT_NULL; ++dynamic_entry) { + ElfW(Xword) value = dynamic_entry->d_un.d_val; + if (fake_vdso) { + // A complication: in the real VDSO, dynamic entries are not relocated + // (it wasn't loaded by a dynamic loader). But when testing with a + // "fake" dlopen()ed vdso library, the loader relocates some (but + // not all!) of them before we get here. + if (dynamic_entry->d_tag == DT_VERDEF) { + // The only dynamic entry (of the ones we care about) libc-2.3.6 + // loader doesn't relocate. + value += relocation; + } + } else { + // Real VDSO. Everything needs to be relocated. + value += relocation; + } + switch (dynamic_entry->d_tag) { + case DT_HASH: + hash_ = reinterpret_cast<ElfW(Word) *>(value); + break; + case DT_SYMTAB: + dynsym_ = reinterpret_cast<ElfW(Sym) *>(value); + break; + case DT_STRTAB: + dynstr_ = reinterpret_cast<const char *>(value); + break; + case DT_VERSYM: + versym_ = reinterpret_cast<ElfW(Versym) *>(value); + break; + case DT_VERDEF: + verdef_ = reinterpret_cast<ElfW(Verdef) *>(value); + break; + case DT_VERDEFNUM: + verdefnum_ = dynamic_entry->d_un.d_val; + break; + case DT_STRSZ: + strsize_ = dynamic_entry->d_un.d_val; + break; + default: + // Unrecognized entries explicitly ignored. + break; + } + } + if (!hash_ || !dynsym_ || !dynstr_ || !versym_ || + !verdef_ || !verdefnum_ || !strsize_) { + RAW_DCHECK(hash_, "invalid VDSO (no DT_HASH)"); + RAW_DCHECK(dynsym_, "invalid VDSO (no DT_SYMTAB)"); + RAW_DCHECK(dynstr_, "invalid VDSO (no DT_STRTAB)"); + RAW_DCHECK(versym_, "invalid VDSO (no DT_VERSYM)"); + RAW_DCHECK(verdef_, "invalid VDSO (no DT_VERDEF)"); + RAW_DCHECK(verdefnum_, "invalid VDSO (no DT_VERDEFNUM)"); + RAW_DCHECK(strsize_, "invalid VDSO (no DT_STRSZ)"); + // Mark this image as not present. Can not recur infinitely. + Init(0); + return; + } +} + +VDSOSupport::VDSOSupport() + // If vdso_base_ is still set to kInvalidBase, we got here + // before VDSOSupport::Init has been called. Call it now. + : image_(vdso_base_ == kInvalidBase ? Init() : vdso_base_) { +} + +// NOTE: we can't use GoogleOnceInit() below, because we can be +// called by tcmalloc, and none of the *once* stuff may be functional yet. +// +// In addition, we hope that the VDSOSupportHelper constructor +// causes this code to run before there are any threads, and before +// InitGoogle() has executed any chroot or setuid calls. +// +// Finally, even if there is a race here, it is harmless, because +// the operation should be idempotent. +const void *VDSOSupport::Init() { + if (vdso_base_ == kInvalidBase) { + // Valgrind zaps AT_SYSINFO_EHDR and friends from the auxv[] + // on stack, and so glibc works as if VDSO was not present. + // But going directly to kernel via /proc/self/auxv below bypasses + // Valgrind zapping. So we check for Valgrind separately. + if (RunningOnValgrind()) { + vdso_base_ = NULL; + getcpu_fn_ = &GetCPUViaSyscall; + return NULL; + } + int fd = open("/proc/self/auxv", O_RDONLY); + if (fd == -1) { + // Kernel too old to have a VDSO. + vdso_base_ = NULL; + getcpu_fn_ = &GetCPUViaSyscall; + return NULL; + } + ElfW(auxv_t) aux; + while (read(fd, &aux, sizeof(aux)) == sizeof(aux)) { + if (aux.a_type == AT_SYSINFO_EHDR) { + COMPILE_ASSERT(sizeof(vdso_base_) == sizeof(aux.a_un.a_val), + unexpected_sizeof_pointer_NE_sizeof_a_val); + vdso_base_ = reinterpret_cast<void *>(aux.a_un.a_val); + break; + } + } + close(fd); + if (vdso_base_ == kInvalidBase) { + // Didn't find AT_SYSINFO_EHDR in auxv[]. + vdso_base_ = NULL; + } + } + GetCpuFn fn = &GetCPUViaSyscall; // default if VDSO not present. + if (vdso_base_) { + VDSOSupport vdso; + SymbolInfo info; + if (vdso.LookupSymbol("__vdso_getcpu", "LINUX_2.6", STT_FUNC, &info)) { + // Casting from an int to a pointer is not legal C++. To emphasize + // this, we use a C-style cast rather than a C++-style cast. + fn = (GetCpuFn)(info.address); + } + } + // Subtle: this code runs outside of any locks; prevent compiler + // from assigning to getcpu_fn_ more than once. + MemoryBarrier(); + getcpu_fn_ = fn; + return vdso_base_; +} + +const void *VDSOSupport::SetBase(const void *base) { + const void *old_base = vdso_base_; + vdso_base_ = base; + image_.Init(base); + // Also reset getcpu_fn_, so GetCPU could be tested with simulated VDSO. + getcpu_fn_ = &InitAndGetCPU; + return old_base; +} + +bool VDSOSupport::LookupSymbol(const char *name, + const char *version, + int type, + SymbolInfo *info) const { + for (SymbolIterator it = begin(); it != end(); ++it) { + if (strcmp(it->name, name) == 0 && strcmp(it->version, version) == 0 && + CurrentElfClass::ElfType(it->symbol) == type) { + if (info) { + *info = *it; + } + return true; + } + } + return false; +} + +bool VDSOSupport::LookupSymbolByAddress(const void *address, + SymbolInfo *info_out) const { + for (SymbolIterator it = begin(); it != end(); ++it) { + const char *const symbol_start = + reinterpret_cast<const char *>(it->address); + const char *const symbol_end = symbol_start + it->symbol->st_size; + if (symbol_start <= address && address < symbol_end) { + if (info_out) { + // Client wants to know details for that symbol (the usual case). + if (CurrentElfClass::ElfBind(it->symbol) == STB_GLOBAL) { + // Strong symbol; just return it. + *info_out = *it; + return true; + } else { + // Weak or local. Record it, but keep looking for a strong one. + *info_out = *it; + } + } else { + // Client only cares if there is an overlapping symbol. + return true; + } + } + } + return false; +} + +VDSOSupport::SymbolIterator::SymbolIterator(const void *const image, int index) + : index_(index), image_(image) { +} + +const VDSOSupport::SymbolInfo *VDSOSupport::SymbolIterator::operator->() const { + return &info_; +} + +const VDSOSupport::SymbolInfo& VDSOSupport::SymbolIterator::operator*() const { + return info_; +} + +bool VDSOSupport::SymbolIterator::operator==(const SymbolIterator &rhs) const { + return this->image_ == rhs.image_ && this->index_ == rhs.index_; +} + +bool VDSOSupport::SymbolIterator::operator!=(const SymbolIterator &rhs) const { + return !(*this == rhs); +} + +VDSOSupport::SymbolIterator &VDSOSupport::SymbolIterator::operator++() { + this->Update(1); + return *this; +} + +VDSOSupport::SymbolIterator VDSOSupport::begin() const { + SymbolIterator it(&image_, 0); + it.Update(0); + return it; +} + +VDSOSupport::SymbolIterator VDSOSupport::end() const { + return SymbolIterator(&image_, image_.GetNumSymbols()); +} + +void VDSOSupport::SymbolIterator::Update(int increment) { + const ElfMemImage *image = reinterpret_cast<const ElfMemImage *>(image_); + CHECK(image->IsPresent() || increment == 0); + if (!image->IsPresent()) { + return; + } + index_ += increment; + if (index_ >= image->GetNumSymbols()) { + index_ = image->GetNumSymbols(); + return; + } + const ElfW(Sym) *symbol = image->GetDynsym(index_); + const ElfW(Versym) *version_symbol = image->GetVersym(index_); + CHECK(symbol && version_symbol); + const char *const symbol_name = image->GetDynstr(symbol->st_name); + const ElfW(Versym) version_index = version_symbol[0] & VERSYM_VERSION; + const ElfW(Verdef) *version_definition = NULL; + const char *version_name = ""; + if (symbol->st_shndx == SHN_UNDEF) { + // Undefined symbols reference DT_VERNEED, not DT_VERDEF, and + // version_index could well be greater than verdefnum_, so calling + // GetVerdef(version_index) may trigger assertion. + } else { + version_definition = image->GetVerdef(version_index); + } + if (version_definition) { + // I am expecting 1 or 2 auxiliary entries: 1 for the version itself, + // optional 2nd if the version has a parent. + CHECK_LE(1, version_definition->vd_cnt); + CHECK_LE(version_definition->vd_cnt, 2); + const ElfW(Verdaux) *version_aux = image->GetVerdefAux(version_definition); + version_name = image->GetVerstr(version_aux->vda_name); + } + info_.name = symbol_name; + info_.version = version_name; + info_.address = image->GetSymAddr(symbol); + info_.symbol = symbol; +} + +// NOLINT on 'long' because this routine mimics kernel api. +long VDSOSupport::GetCPUViaSyscall(unsigned *cpu, void *, void *) { // NOLINT +#if defined(__NR_getcpu) + return sys_getcpu(cpu, NULL, NULL); +#else + // x86_64 never implemented sys_getcpu(), except as a VDSO call. + errno = ENOSYS; + return -1; +#endif +} + +// Use fast __vdso_getcpu if available. +long VDSOSupport::InitAndGetCPU(unsigned *cpu, void *x, void *y) { // NOLINT + Init(); + CHECK_NE(getcpu_fn_, &InitAndGetCPU); // << "Init() did not set getcpu_fn_"; + return (*getcpu_fn_)(cpu, x, y); +} + +// This function must be very fast, and may be called from very +// low level (e.g. tcmalloc). Hence I avoid things like +// GoogleOnceInit() and ::operator new. +int GetCPU(void) { + unsigned cpu; + int ret_code = (*VDSOSupport::getcpu_fn_)(&cpu, NULL, NULL); + return ret_code == 0 ? cpu : ret_code; +} + +// We need to make sure VDSOSupport::Init() is called before +// the main() runs, since it might do something like setuid or +// chroot. If VDSOSupport +// is used in any global constructor, this will happen, since +// VDSOSupport's constructor calls Init. But if not, we need to +// ensure it here, with a global constructor of our own. This +// is an allowed exception to the normal rule against non-trivial +// global constructors. +static class VDSOInitHelper { + public: + VDSOInitHelper() { VDSOSupport::Init(); } +} vdso_init_helper; +} + +#endif // HAVE_VDSO_SUPPORT diff --git a/third_party/tcmalloc/chromium/src/base/vdso_support.h b/third_party/tcmalloc/chromium/src/base/vdso_support.h new file mode 100644 index 0000000..c47b3c5 --- /dev/null +++ b/third_party/tcmalloc/chromium/src/base/vdso_support.h @@ -0,0 +1,182 @@ +// Copyright 2008 Google Inc. All Rights Reserved. +// Author: ppluzhnikov@google.com (Paul Pluzhnikov) +// +// Allow dynamic symbol lookup in the kernel VDSO page. +// +// VDSO stands for "Virtual Dynamic Shared Object" -- a page of +// executable code, which looks like a shared library, but doesn't +// necessarily exist anywhere on disk, and which gets mmap()ed into +// every process by kernels which support VDSO, such as 2.6.x for 32-bit +// executables, and 2.6.24 and above for 64-bit executables. +// +// More details could be found here: +// http://www.trilithium.com/johan/2005/08/linux-gate/ +// +// VDSOSupport -- a class representing kernel VDSO (if present). +// +// Example usage: +// VDSOSupport vdso; +// VDSOSupport::SymbolInfo info; +// typedef (*FN)(unsigned *, void *, void *); +// FN fn = NULL; +// if (vdso.LookupSymbol("__vdso_getcpu", "LINUX_2.6", STT_FUNC, &info)) { +// fn = reinterpret_cast<FN>(info.address); +// } + +#ifndef BASE_VDSO_SUPPORT_H_ +#define BASE_VDSO_SUPPORT_H_ + +#include <config.h> +#ifdef HAVE_FEATURES_H +#include <features.h> // for __GLIBC__ +#endif + +// Maybe one day we can rewrite this file not to require the elf +// symbol extensions in glibc, but for right now we need them. +#if defined(__ELF__) && defined(__GLIBC__) + +#define HAVE_VDSO_SUPPORT 1 + +#include <stdlib.h> // for NULL +#include <link.h> // for ElfW +#include "base/basictypes.h" + +namespace base { + +// NOTE: this class may be used from within tcmalloc, and can not +// use any memory allocation routines. +class VDSOSupport { + public: + // Sentinel: there could never be a VDSO at this address. + static const void *const kInvalidBase; + + // Information about a single vdso symbol. + // All pointers are into .dynsym, .dynstr, or .text of the VDSO. + // Do not free() them or modify through them. + struct SymbolInfo { + const char *name; // E.g. "__vdso_getcpu" + const char *version; // E.g. "LINUX_2.6", could be "" + // for unversioned symbol. + const void *address; // Relocated symbol address. + const ElfW(Sym) *symbol; // Symbol in the dynamic symbol table. + }; + + // Supports iteration over all dynamic symbols. + class SymbolIterator { + public: + friend struct VDSOSupport; + const SymbolInfo *operator->() const; + const SymbolInfo &operator*() const; + SymbolIterator& operator++(); + bool operator!=(const SymbolIterator &rhs) const; + bool operator==(const SymbolIterator &rhs) const; + private: + SymbolIterator(const void *const image, int index); + void Update(int incr); + SymbolInfo info_; + int index_; + const void *const image_; + }; + + VDSOSupport(); + + // Answers whether we have a vdso at all. + bool IsPresent() const { return image_.IsPresent(); } + + // Allow to iterate over all VDSO symbols. + SymbolIterator begin() const; + SymbolIterator end() const; + + // Look up versioned dynamic symbol in the kernel VDSO. + // Returns false if VDSO is not present, or doesn't contain given + // symbol/version/type combination. + // If info_out != NULL, additional details are filled in. + bool LookupSymbol(const char *name, const char *version, + int symbol_type, SymbolInfo *info_out) const; + + // Find info about symbol (if any) which overlaps given address. + // Returns true if symbol was found; false if VDSO isn't present + // or doesn't have a symbol overlapping given address. + // If info_out != NULL, additional details are filled in. + bool LookupSymbolByAddress(const void *address, SymbolInfo *info_out) const; + + // Used only for testing. Replace real VDSO base with a mock. + // Returns previous value of vdso_base_. After you are done testing, + // you are expected to call SetBase() with previous value, in order to + // reset state to the way it was. + const void *SetBase(const void *s); + + // Computes vdso_base_ and returns it. Should be called as early as + // possible; before any thread creation, chroot or setuid. + static const void *Init(); + + private: + // An in-memory ELF image (may not exist on disk). + class ElfMemImage { + public: + explicit ElfMemImage(const void *base); + void Init(const void *base); + bool IsPresent() const { return ehdr_ != NULL; } + const ElfW(Phdr)* GetPhdr(int index) const; + const ElfW(Sym)* GetDynsym(int index) const; + const ElfW(Versym)* GetVersym(int index) const; + const ElfW(Verdef)* GetVerdef(int index) const; + const ElfW(Verdaux)* GetVerdefAux(const ElfW(Verdef) *verdef) const; + const char* GetDynstr(ElfW(Word) offset) const; + const void* GetSymAddr(const ElfW(Sym) *sym) const; + const char* GetVerstr(ElfW(Word) offset) const; + int GetNumSymbols() const; + private: + const ElfW(Ehdr) *ehdr_; + const ElfW(Sym) *dynsym_; + const ElfW(Versym) *versym_; + const ElfW(Verdef) *verdef_; + const ElfW(Word) *hash_; + const char *dynstr_; + size_t strsize_; + size_t verdefnum_; + ElfW(Addr) link_base_; // Link-time base (p_vaddr of first PT_LOAD). + }; + + // image_ represents VDSO ELF image in memory. + // image_.ehdr_ == NULL implies there is no VDSO. + ElfMemImage image_; + + // Cached value of auxv AT_SYSINFO_EHDR, computed once. + // This is a tri-state: + // kInvalidBase => value hasn't been determined yet. + // 0 => there is no VDSO. + // else => vma of VDSO Elf{32,64}_Ehdr. + static const void *vdso_base_; + + // NOLINT on 'long' because these routines mimic kernel api. + // The 'cache' parameter may be used by some versions of the kernel, + // and should be NULL or point to a static buffer containing at + // least two 'long's. + static long InitAndGetCPU(unsigned *cpu, void *cache, // NOLINT 'long'. + void *unused); + static long GetCPUViaSyscall(unsigned *cpu, void *cache, // NOLINT 'long'. + void *unused); + typedef long (*GetCpuFn)(unsigned *cpu, void *cache, // NOLINT 'long'. + void *unused); + + // This function pointer may point to InitAndGetCPU, + // GetCPUViaSyscall, or __vdso_getcpu at different stages of initialization. + static GetCpuFn getcpu_fn_; + + friend int GetCPU(void); // Needs access to getcpu_fn_. + + DISALLOW_COPY_AND_ASSIGN(VDSOSupport); +}; + +// Same as sched_getcpu() on later glibc versions. +// Return current CPU, using (fast) __vdso_getcpu@LINUX_2.6 if present, +// otherwise use syscall(SYS_getcpu,...). +// May return -1 with errno == ENOSYS if the kernel doesn't +// support SYS_getcpu. +int GetCPU(); +} // namespace base + +#endif // __ELF__ and __GLIBC__ + +#endif // BASE_VDSO_SUPPORT_H_ diff --git a/third_party/tcmalloc/chromium/src/central_freelist.cc b/third_party/tcmalloc/chromium/src/central_freelist.cc new file mode 100644 index 0000000..674ff9b --- /dev/null +++ b/third_party/tcmalloc/chromium/src/central_freelist.cc @@ -0,0 +1,313 @@ +// Copyright (c) 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Sanjay Ghemawat <opensource@google.com> + +#include "config.h" +#include "central_freelist.h" + +#include "linked_list.h" +#include "static_vars.h" + +namespace tcmalloc { + +void CentralFreeList::Init(size_t cl) { + size_class_ = cl; + tcmalloc::DLL_Init(&empty_); + tcmalloc::DLL_Init(&nonempty_); + counter_ = 0; + + cache_size_ = 1; + used_slots_ = 0; + ASSERT(cache_size_ <= kNumTransferEntries); +} + +void CentralFreeList::ReleaseListToSpans(void* start) { + while (start) { + void *next = SLL_Next(start); + ReleaseToSpans(start); + start = next; + } +} + +void CentralFreeList::ReleaseToSpans(void* object) { + const PageID p = reinterpret_cast<uintptr_t>(object) >> kPageShift; + Span* span = Static::pageheap()->GetDescriptor(p); + ASSERT(span != NULL); + ASSERT(span->refcount > 0); + + // If span is empty, move it to non-empty list + if (span->objects == NULL) { + tcmalloc::DLL_Remove(span); + tcmalloc::DLL_Prepend(&nonempty_, span); + Event(span, 'N', 0); + } + + // The following check is expensive, so it is disabled by default + if (false) { + // Check that object does not occur in list + int got = 0; + for (void* p = span->objects; p != NULL; p = *((void**) p)) { + ASSERT(p != object); + got++; + } + ASSERT(got + span->refcount == + (span->length<<kPageShift) / + Static::sizemap()->ByteSizeForClass(span->sizeclass)); + } + + counter_++; + span->refcount--; + if (span->refcount == 0) { + Event(span, '#', 0); + counter_ -= ((span->length<<kPageShift) / + Static::sizemap()->ByteSizeForClass(span->sizeclass)); + tcmalloc::DLL_Remove(span); + + // Release central list lock while operating on pageheap + lock_.Unlock(); + { + SpinLockHolder h(Static::pageheap_lock()); + Static::pageheap()->Delete(span); + } + lock_.Lock(); + } else { + *(reinterpret_cast<void**>(object)) = span->objects; + span->objects = object; + } +} + +bool CentralFreeList::EvictRandomSizeClass( + int locked_size_class, bool force) { + static int race_counter = 0; + int t = race_counter++; // Updated without a lock, but who cares. + if (t >= kNumClasses) { + while (t >= kNumClasses) { + t -= kNumClasses; + } + race_counter = t; + } + ASSERT(t >= 0); + ASSERT(t < kNumClasses); + if (t == locked_size_class) return false; + return Static::central_cache()[t].ShrinkCache(locked_size_class, force); +} + +bool CentralFreeList::MakeCacheSpace() { + // Is there room in the cache? + if (used_slots_ < cache_size_) return true; + // Check if we can expand this cache? + if (cache_size_ == kNumTransferEntries) return false; + // Ok, we'll try to grab an entry from some other size class. + if (EvictRandomSizeClass(size_class_, false) || + EvictRandomSizeClass(size_class_, true)) { + // Succeeded in evicting, we're going to make our cache larger. + cache_size_++; + return true; + } + return false; +} + + +namespace { +class LockInverter { + private: + SpinLock *held_, *temp_; + public: + inline explicit LockInverter(SpinLock* held, SpinLock *temp) + : held_(held), temp_(temp) { held_->Unlock(); temp_->Lock(); } + inline ~LockInverter() { temp_->Unlock(); held_->Lock(); } +}; +} + +// This function is marked as NO_THREAD_SAFETY_ANALYSIS because it uses +// LockInverter to release one lock and acquire another in scoped-lock +// style, which our current annotation/analysis does not support. +bool CentralFreeList::ShrinkCache(int locked_size_class, bool force) + NO_THREAD_SAFETY_ANALYSIS { + // Start with a quick check without taking a lock. + if (cache_size_ == 0) return false; + // We don't evict from a full cache unless we are 'forcing'. + if (force == false && used_slots_ == cache_size_) return false; + + // Grab lock, but first release the other lock held by this thread. We use + // the lock inverter to ensure that we never hold two size class locks + // concurrently. That can create a deadlock because there is no well + // defined nesting order. + LockInverter li(&Static::central_cache()[locked_size_class].lock_, &lock_); + ASSERT(used_slots_ <= cache_size_); + ASSERT(0 <= cache_size_); + if (cache_size_ == 0) return false; + if (used_slots_ == cache_size_) { + if (force == false) return false; + // ReleaseListToSpans releases the lock, so we have to make all the + // updates to the central list before calling it. + cache_size_--; + used_slots_--; + ReleaseListToSpans(tc_slots_[used_slots_].head); + return true; + } + cache_size_--; + return true; +} + +void CentralFreeList::InsertRange(void *start, void *end, int N) { + SpinLockHolder h(&lock_); + if (N == Static::sizemap()->num_objects_to_move(size_class_) && + MakeCacheSpace()) { + int slot = used_slots_++; + ASSERT(slot >=0); + ASSERT(slot < kNumTransferEntries); + TCEntry *entry = &tc_slots_[slot]; + entry->head = start; + entry->tail = end; + return; + } + ReleaseListToSpans(start); +} + +int CentralFreeList::RemoveRange(void **start, void **end, int N) { + ASSERT(N > 0); + lock_.Lock(); + if (N == Static::sizemap()->num_objects_to_move(size_class_) && + used_slots_ > 0) { + int slot = --used_slots_; + ASSERT(slot >= 0); + TCEntry *entry = &tc_slots_[slot]; + *start = entry->head; + *end = entry->tail; + lock_.Unlock(); + return N; + } + + int result = 0; + void* head = NULL; + void* tail = NULL; + // TODO: Prefetch multiple TCEntries? + tail = FetchFromSpansSafe(); + if (tail != NULL) { + SLL_SetNext(tail, NULL); + head = tail; + result = 1; + while (result < N) { + void *t = FetchFromSpans(); + if (!t) break; + SLL_Push(&head, t); + result++; + } + } + lock_.Unlock(); + *start = head; + *end = tail; + return result; +} + + +void* CentralFreeList::FetchFromSpansSafe() { + void *t = FetchFromSpans(); + if (!t) { + Populate(); + t = FetchFromSpans(); + } + return t; +} + +void* CentralFreeList::FetchFromSpans() { + if (tcmalloc::DLL_IsEmpty(&nonempty_)) return NULL; + Span* span = nonempty_.next; + + ASSERT(span->objects != NULL); + span->refcount++; + void* result = span->objects; + span->objects = *(reinterpret_cast<void**>(result)); + if (span->objects == NULL) { + // Move to empty list + tcmalloc::DLL_Remove(span); + tcmalloc::DLL_Prepend(&empty_, span); + Event(span, 'E', 0); + } + counter_--; + return result; +} + +// Fetch memory from the system and add to the central cache freelist. +void CentralFreeList::Populate() { + // Release central list lock while operating on pageheap + lock_.Unlock(); + const size_t npages = Static::sizemap()->class_to_pages(size_class_); + + Span* span; + { + SpinLockHolder h(Static::pageheap_lock()); + span = Static::pageheap()->New(npages); + if (span) Static::pageheap()->RegisterSizeClass(span, size_class_); + } + if (span == NULL) { + MESSAGE("tcmalloc: allocation failed", npages << kPageShift); + lock_.Lock(); + return; + } + ASSERT(span->length == npages); + // Cache sizeclass info eagerly. Locking is not necessary. + // (Instead of being eager, we could just replace any stale info + // about this span, but that seems to be no better in practice.) + for (int i = 0; i < npages; i++) { + Static::pageheap()->CacheSizeClass(span->start + i, size_class_); + } + + // Split the block into pieces and add to the free-list + // TODO: coloring of objects to avoid cache conflicts? + void** tail = &span->objects; + char* ptr = reinterpret_cast<char*>(span->start << kPageShift); + char* limit = ptr + (npages << kPageShift); + const size_t size = Static::sizemap()->ByteSizeForClass(size_class_); + int num = 0; + while (ptr + size <= limit) { + *tail = ptr; + tail = reinterpret_cast<void**>(ptr); + ptr += size; + num++; + } + ASSERT(ptr <= limit); + *tail = NULL; + span->refcount = 0; // No sub-object in use yet + + // Add span to list of non-empty spans + lock_.Lock(); + tcmalloc::DLL_Prepend(&nonempty_, span); + counter_ += num; +} + +int CentralFreeList::tc_length() { + SpinLockHolder h(&lock_); + return used_slots_ * Static::sizemap()->num_objects_to_move(size_class_); +} + +} // namespace tcmalloc diff --git a/third_party/tcmalloc/chromium/src/central_freelist.h b/third_party/tcmalloc/chromium/src/central_freelist.h new file mode 100644 index 0000000..2e6a31b --- /dev/null +++ b/third_party/tcmalloc/chromium/src/central_freelist.h @@ -0,0 +1,175 @@ +// Copyright (c) 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Sanjay Ghemawat <opensource@google.com> + +#ifndef TCMALLOC_CENTRAL_FREELIST_H_ +#define TCMALLOC_CENTRAL_FREELIST_H_ + +#include "config.h" +#include "base/thread_annotations.h" +#include "base/spinlock.h" +#include "common.h" +#include "span.h" + +namespace tcmalloc { + +// Data kept per size-class in central cache. +class CentralFreeList { + public: + void Init(size_t cl); + + // These methods all do internal locking. + + // Insert the specified range into the central freelist. N is the number of + // elements in the range. RemoveRange() is the opposite operation. + void InsertRange(void *start, void *end, int N); + + // Returns the actual number of fetched elements and sets *start and *end. + int RemoveRange(void **start, void **end, int N); + + // Returns the number of free objects in cache. + int length() { + SpinLockHolder h(&lock_); + return counter_; + } + + // Returns the number of free objects in the transfer cache. + int tc_length(); + + private: + // TransferCache is used to cache transfers of + // sizemap.num_objects_to_move(size_class) back and forth between + // thread caches and the central cache for a given size class. + struct TCEntry { + void *head; // Head of chain of objects. + void *tail; // Tail of chain of objects. + }; + + // A central cache freelist can have anywhere from 0 to kNumTransferEntries + // slots to put link list chains into. To keep memory usage bounded the total + // number of TCEntries across size classes is fixed. Currently each size + // class is initially given one TCEntry which also means that the maximum any + // one class can have is kNumClasses. + static const int kNumTransferEntries = kNumClasses; + + // REQUIRES: lock_ is held + // Remove object from cache and return. + // Return NULL if no free entries in cache. + void* FetchFromSpans() EXCLUSIVE_LOCKS_REQUIRED(lock_); + + // REQUIRES: lock_ is held + // Remove object from cache and return. Fetches + // from pageheap if cache is empty. Only returns + // NULL on allocation failure. + void* FetchFromSpansSafe() EXCLUSIVE_LOCKS_REQUIRED(lock_); + + // REQUIRES: lock_ is held + // Release a linked list of objects to spans. + // May temporarily release lock_. + void ReleaseListToSpans(void *start) EXCLUSIVE_LOCKS_REQUIRED(lock_); + + // REQUIRES: lock_ is held + // Release an object to spans. + // May temporarily release lock_. + void ReleaseToSpans(void* object) EXCLUSIVE_LOCKS_REQUIRED(lock_); + + // REQUIRES: lock_ is held + // Populate cache by fetching from the page heap. + // May temporarily release lock_. + void Populate() EXCLUSIVE_LOCKS_REQUIRED(lock_); + + // REQUIRES: lock is held. + // Tries to make room for a TCEntry. If the cache is full it will try to + // expand it at the cost of some other cache size. Return false if there is + // no space. + bool MakeCacheSpace() EXCLUSIVE_LOCKS_REQUIRED(lock_); + + // REQUIRES: lock_ for locked_size_class is held. + // Picks a "random" size class to steal TCEntry slot from. In reality it + // just iterates over the sizeclasses but does so without taking a lock. + // Returns true on success. + // May temporarily lock a "random" size class. + static bool EvictRandomSizeClass(int locked_size_class, bool force); + + // REQUIRES: lock_ is *not* held. + // Tries to shrink the Cache. If force is true it will relase objects to + // spans if it allows it to shrink the cache. Return false if it failed to + // shrink the cache. Decrements cache_size_ on succeess. + // May temporarily take lock_. If it takes lock_, the locked_size_class + // lock is released to keep the thread from holding two size class locks + // concurrently which could lead to a deadlock. + bool ShrinkCache(int locked_size_class, bool force) LOCKS_EXCLUDED(lock_); + + // This lock protects all the data members. cached_entries and cache_size_ + // may be looked at without holding the lock. + SpinLock lock_; + + // We keep linked lists of empty and non-empty spans. + size_t size_class_; // My size class + Span empty_; // Dummy header for list of empty spans + Span nonempty_; // Dummy header for list of non-empty spans + size_t counter_; // Number of free objects in cache entry + + // Here we reserve space for TCEntry cache slots. Since one size class can + // end up getting all the TCEntries quota in the system we just preallocate + // sufficient number of entries here. + TCEntry tc_slots_[kNumTransferEntries]; + + // Number of currently used cached entries in tc_slots_. This variable is + // updated under a lock but can be read without one. + int32_t used_slots_; + // The current number of slots for this size class. This is an + // adaptive value that is increased if there is lots of traffic + // on a given size class. + int32_t cache_size_; +}; + +// Pads each CentralCache object to multiple of 64 bytes. Since some +// compilers (such as MSVC) don't like it when the padding is 0, I use +// template specialization to remove the padding entirely when +// sizeof(CentralFreeList) is a multiple of 64. +template<int kFreeListSizeMod64> +class CentralFreeListPaddedTo : public CentralFreeList { + private: + char pad_[64 - kFreeListSizeMod64]; +}; + +template<> +class CentralFreeListPaddedTo<0> : public CentralFreeList { +}; + +class CentralFreeListPadded : public CentralFreeListPaddedTo< + sizeof(CentralFreeList) % 64> { +}; + +} // namespace tcmalloc + +#endif // TCMALLOC_CENTRAL_FREELIST_H_ diff --git a/third_party/tcmalloc/chromium/src/common.cc b/third_party/tcmalloc/chromium/src/common.cc new file mode 100644 index 0000000..04723b1 --- /dev/null +++ b/third_party/tcmalloc/chromium/src/common.cc @@ -0,0 +1,208 @@ +// Copyright (c) 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Sanjay Ghemawat <opensource@google.com> + +#include "config.h" +#include "system-alloc.h" +#include "config.h" +#include "common.h" + +namespace tcmalloc { + +// Note: the following only works for "n"s that fit in 32-bits, but +// that is fine since we only use it for small sizes. +static inline int LgFloor(size_t n) { + int log = 0; + for (int i = 4; i >= 0; --i) { + int shift = (1 << i); + size_t x = n >> shift; + if (x != 0) { + n = x; + log += shift; + } + } + ASSERT(n == 1); + return log; +} + +int SizeMap::NumMoveSize(size_t size) { + if (size == 0) return 0; + // Use approx 64k transfers between thread and central caches. + int num = static_cast<int>(64.0 * 1024.0 / size); + if (num < 2) num = 2; + + // Avoid bringing too many objects into small object free lists. + // If this value is too large: + // - We waste memory with extra objects sitting in the thread caches. + // - The central freelist holds its lock for too long while + // building a linked list of objects, slowing down the allocations + // of other threads. + // If this value is too small: + // - We go to the central freelist too often and we have to acquire + // its lock each time. + // This value strikes a balance between the constraints above. + if (num > 32) num = 32; + + return num; +} + +// Initialize the mapping arrays +void SizeMap::Init() { + // Do some sanity checking on add_amount[]/shift_amount[]/class_array[] + if (ClassIndex(0) < 0) { + CRASH("Invalid class index %d for size 0\n", ClassIndex(0)); + } + if (ClassIndex(kMaxSize) >= sizeof(class_array_)) { + CRASH("Invalid class index %d for kMaxSize\n", ClassIndex(kMaxSize)); + } + + // Compute the size classes we want to use + int sc = 1; // Next size class to assign + int alignment = kAlignment; + CHECK_CONDITION(kAlignment <= 16); + int last_lg = -1; + for (size_t size = kAlignment; size <= kMaxSize; size += alignment) { + int lg = LgFloor(size); + if (lg > last_lg) { + // Increase alignment every so often to reduce number of size classes. + if (size >= 2048) { + // Cap alignment at 256 for large sizes + alignment = 256; + } else if (size >= 128) { + // Space wasted due to alignment is at most 1/8, i.e., 12.5%. + alignment = size / 8; + } else if (size >= 16) { + // We need an alignment of at least 16 bytes to satisfy + // requirements for some SSE types. + alignment = 16; + } + CHECK_CONDITION(size < 16 || alignment >= 16); + CHECK_CONDITION((alignment & (alignment - 1)) == 0); + last_lg = lg; + } + CHECK_CONDITION((size % alignment) == 0); + + // Allocate enough pages so leftover is less than 1/8 of total. + // This bounds wasted space to at most 12.5%. + size_t psize = kPageSize; + while ((psize % size) > (psize >> 3)) { + psize += kPageSize; + } + const size_t my_pages = psize >> kPageShift; + + if (sc > 1 && my_pages == class_to_pages_[sc-1]) { + // See if we can merge this into the previous class without + // increasing the fragmentation of the previous class. + const size_t my_objects = (my_pages << kPageShift) / size; + const size_t prev_objects = (class_to_pages_[sc-1] << kPageShift) + / class_to_size_[sc-1]; + if (my_objects == prev_objects) { + // Adjust last class to include this size + class_to_size_[sc-1] = size; + continue; + } + } + + // Add new class + class_to_pages_[sc] = my_pages; + class_to_size_[sc] = size; + sc++; + } + if (sc != kNumClasses) { + CRASH("wrong number of size classes: found %d instead of %d\n", + sc, int(kNumClasses)); + } + + // Initialize the mapping arrays + int next_size = 0; + for (int c = 1; c < kNumClasses; c++) { + const int max_size_in_class = class_to_size_[c]; + for (int s = next_size; s <= max_size_in_class; s += kAlignment) { + class_array_[ClassIndex(s)] = c; + } + next_size = max_size_in_class + kAlignment; + } + + // Double-check sizes just to be safe + for (size_t size = 0; size <= kMaxSize; size++) { + const int sc = SizeClass(size); + if (sc <= 0 || sc >= kNumClasses) { + CRASH("Bad size class %d for %" PRIuS "\n", sc, size); + } + if (sc > 1 && size <= class_to_size_[sc-1]) { + CRASH("Allocating unnecessarily large class %d for %" PRIuS + "\n", sc, size); + } + const size_t s = class_to_size_[sc]; + if (size > s) { + CRASH("Bad size %" PRIuS " for %" PRIuS " (sc = %d)\n", s, size, sc); + } + if (s == 0) { + CRASH("Bad size %" PRIuS " for %" PRIuS " (sc = %d)\n", s, size, sc); + } + } + + // Initialize the num_objects_to_move array. + for (size_t cl = 1; cl < kNumClasses; ++cl) { + num_objects_to_move_[cl] = NumMoveSize(ByteSizeForClass(cl)); + } +} + +void SizeMap::Dump(TCMalloc_Printer* out) { + // Dump class sizes and maximum external wastage per size class + for (size_t cl = 1; cl < kNumClasses; ++cl) { + const int alloc_size = class_to_pages_[cl] << kPageShift; + const int alloc_objs = alloc_size / class_to_size_[cl]; + const int min_used = (class_to_size_[cl-1] + 1) * alloc_objs; + const int max_waste = alloc_size - min_used; + out->printf("SC %3d [ %8d .. %8d ] from %8d ; %2.0f%% maxwaste\n", + int(cl), + int(class_to_size_[cl-1] + 1), + int(class_to_size_[cl]), + int(class_to_pages_[cl] << kPageShift), + max_waste * 100.0 / alloc_size + ); + } +} + +// Metadata allocator -- keeps stats about how many bytes allocated. +static uint64_t metadata_system_bytes_ = 0; +void* MetaDataAlloc(size_t bytes) { + void* result = TCMalloc_SystemAlloc(bytes, NULL); + if (result != NULL) { + metadata_system_bytes_ += bytes; + } + return result; +} + +uint64_t metadata_system_bytes() { return metadata_system_bytes_; } + +} // namespace tcmalloc diff --git a/third_party/tcmalloc/chromium/src/common.h b/third_party/tcmalloc/chromium/src/common.h new file mode 100644 index 0000000..92c582f --- /dev/null +++ b/third_party/tcmalloc/chromium/src/common.h @@ -0,0 +1,197 @@ +// Copyright (c) 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Sanjay Ghemawat <opensource@google.com> +// +// Common definitions for tcmalloc code. + +#ifndef TCMALLOC_COMMON_H_ +#define TCMALLOC_COMMON_H_ + +#include "config.h" +#include <stddef.h> +#ifdef HAVE_STDINT_H +#include <stdint.h> +#endif +#include <stdarg.h> +#include "base/commandlineflags.h" +#include "internal_logging.h" + +// Type that can hold a page number +typedef uintptr_t PageID; + +// Type that can hold the length of a run of pages +typedef uintptr_t Length; + +//------------------------------------------------------------------- +// Configuration +//------------------------------------------------------------------- + +// Not all possible combinations of the following parameters make +// sense. In particular, if kMaxSize increases, you may have to +// increase kNumClasses as well. +static const size_t kPageShift = 12; +static const size_t kPageSize = 1 << kPageShift; +static const size_t kMaxSize = 8u * kPageSize; +static const size_t kAlignment = 8; +static const size_t kNumClasses = 61; + +// Maximum length we allow a per-thread free-list to have before we +// move objects from it into the corresponding central free-list. We +// want this big to avoid locking the central free-list too often. It +// should not hurt to make this list somewhat big because the +// scavenging code will shrink it down when its contents are not in use. +static const int kMaxDynamicFreeListLength = 8192; + +static const Length kMaxValidPages = (~static_cast<Length>(0)) >> kPageShift; + +namespace tcmalloc { + +// Convert byte size into pages. This won't overflow, but may return +// an unreasonably large value if bytes is huge enough. +inline Length pages(size_t bytes) { + return (bytes >> kPageShift) + + ((bytes & (kPageSize - 1)) > 0 ? 1 : 0); +} + +// Size-class information + mapping +class SizeMap { + private: + // Number of objects to move between a per-thread list and a central + // list in one shot. We want this to be not too small so we can + // amortize the lock overhead for accessing the central list. Making + // it too big may temporarily cause unnecessary memory wastage in the + // per-thread free list until the scavenger cleans up the list. + int num_objects_to_move_[kNumClasses]; + + //------------------------------------------------------------------- + // Mapping from size to size_class and vice versa + //------------------------------------------------------------------- + + // Sizes <= 1024 have an alignment >= 8. So for such sizes we have an + // array indexed by ceil(size/8). Sizes > 1024 have an alignment >= 128. + // So for these larger sizes we have an array indexed by ceil(size/128). + // + // We flatten both logical arrays into one physical array and use + // arithmetic to compute an appropriate index. The constants used by + // ClassIndex() were selected to make the flattening work. + // + // Examples: + // Size Expression Index + // ------------------------------------------------------- + // 0 (0 + 7) / 8 0 + // 1 (1 + 7) / 8 1 + // ... + // 1024 (1024 + 7) / 8 128 + // 1025 (1025 + 127 + (120<<7)) / 128 129 + // ... + // 32768 (32768 + 127 + (120<<7)) / 128 376 + static const int kMaxSmallSize = 1024; + unsigned char class_array_[377]; + + // Compute index of the class_array[] entry for a given size + static inline int ClassIndex(int s) { + ASSERT(0 <= s); + ASSERT(s <= kMaxSize); + const bool big = (s > kMaxSmallSize); + const int add_amount = big ? (127 + (120<<7)) : 7; + const int shift_amount = big ? 7 : 3; + return (s + add_amount) >> shift_amount; + } + + int NumMoveSize(size_t size); + + // Mapping from size class to max size storable in that class + size_t class_to_size_[kNumClasses]; + + // Mapping from size class to number of pages to allocate at a time + size_t class_to_pages_[kNumClasses]; + + public: + // Constructor should do nothing since we rely on explicit Init() + // call, which may or may not be called before the constructor runs. + SizeMap() { } + + // Initialize the mapping arrays + void Init(); + + inline int SizeClass(int size) { + return class_array_[ClassIndex(size)]; + } + + // Get the byte-size for a specified class + inline size_t ByteSizeForClass(size_t cl) { + return class_to_size_[cl]; + } + + // Mapping from size class to max size storable in that class + inline size_t class_to_size(size_t cl) { + return class_to_size_[cl]; + } + + // Mapping from size class to number of pages to allocate at a time + inline size_t class_to_pages(size_t cl) { + return class_to_pages_[cl]; + } + + // Number of objects to move between a per-thread list and a central + // list in one shot. We want this to be not too small so we can + // amortize the lock overhead for accessing the central list. Making + // it too big may temporarily cause unnecessary memory wastage in the + // per-thread free list until the scavenger cleans up the list. + inline int num_objects_to_move(size_t cl) { + return num_objects_to_move_[cl]; + } + + // Dump contents of the computed size map + void Dump(TCMalloc_Printer* out); +}; + +// Allocates "bytes" worth of memory and returns it. Increments +// metadata_system_bytes appropriately. May return NULL if allocation +// fails. Requires pageheap_lock is held. +void* MetaDataAlloc(size_t bytes); + +// Returns the total number of bytes allocated from the system. +// Requires pageheap_lock is held. +uint64_t metadata_system_bytes(); + +// size/depth are made the same size as a pointer so that some generic +// code below can conveniently cast them back and forth to void*. +static const int kMaxStackDepth = 31; +struct StackTrace { + uintptr_t size; // Size of object + uintptr_t depth; // Number of PC values stored in array below + void* stack[kMaxStackDepth]; +}; + +} // namespace tcmalloc + +#endif // TCMALLOC_COMMON_H_ diff --git a/third_party/tcmalloc/chromium/src/config.h.in b/third_party/tcmalloc/chromium/src/config.h.in new file mode 100644 index 0000000..1b1a3d9 --- /dev/null +++ b/third_party/tcmalloc/chromium/src/config.h.in @@ -0,0 +1,236 @@ +/* src/config.h.in. Generated from configure.ac by autoheader. */ + +/* Define to 1 if compiler supports __builtin_stack_pointer */ +#undef HAVE_BUILTIN_STACK_POINTER + +/* Define to 1 if you have the <conflict-signal.h> header file. */ +#undef HAVE_CONFLICT_SIGNAL_H + +/* Define to 1 if you have the <cygwin/signal.h> header file. */ +#undef HAVE_CYGWIN_SIGNAL_H + +/* Define to 1 if you have the declaration of `cfree', and to 0 if you don't. + */ +#undef HAVE_DECL_CFREE + +/* Define to 1 if you have the declaration of `memalign', and to 0 if you + don't. */ +#undef HAVE_DECL_MEMALIGN + +/* Define to 1 if you have the declaration of `posix_memalign', and to 0 if + you don't. */ +#undef HAVE_DECL_POSIX_MEMALIGN + +/* Define to 1 if you have the declaration of `pvalloc', and to 0 if you + don't. */ +#undef HAVE_DECL_PVALLOC + +/* Define to 1 if you have the declaration of `uname', and to 0 if you don't. + */ +#undef HAVE_DECL_UNAME + +/* Define to 1 if you have the declaration of `valloc', and to 0 if you don't. + */ +#undef HAVE_DECL_VALLOC + +/* Define to 1 if you have the <dlfcn.h> header file. */ +#undef HAVE_DLFCN_H + +/* Define to 1 if the system has the type `Elf32_Versym'. */ +#undef HAVE_ELF32_VERSYM + +/* Define to 1 if you have the <execinfo.h> header file. */ +#undef HAVE_EXECINFO_H + +/* Define to 1 if you have the <fcntl.h> header file. */ +#undef HAVE_FCNTL_H + +/* Define to 1 if you have the <features.h> header file. */ +#undef HAVE_FEATURES_H + +/* Define to 1 if you have the `geteuid' function. */ +#undef HAVE_GETEUID + +/* Define to 1 if you have the `getpagesize' function. */ +#undef HAVE_GETPAGESIZE + +/* Define to 1 if you have the <glob.h> header file. */ +#undef HAVE_GLOB_H + +/* Define to 1 if you have the <grp.h> header file. */ +#undef HAVE_GRP_H + +/* Define to 1 if you have the <inttypes.h> header file. */ +#undef HAVE_INTTYPES_H + +/* Define to 1 if you have the <libunwind.h> header file. */ +#undef HAVE_LIBUNWIND_H + +/* Define to 1 if you have the <linux/ptrace.h> header file. */ +#undef HAVE_LINUX_PTRACE_H + +/* Define to 1 if you have the <malloc.h> header file. */ +#undef HAVE_MALLOC_H + +/* Define to 1 if you have the <memory.h> header file. */ +#undef HAVE_MEMORY_H + +/* Define to 1 if you have a working `mmap' system call. */ +#undef HAVE_MMAP + +/* define if the compiler implements namespaces */ +#undef HAVE_NAMESPACES + +/* Define to 1 if you have the <poll.h> header file. */ +#undef HAVE_POLL_H + +/* define if libc has program_invocation_name */ +#undef HAVE_PROGRAM_INVOCATION_NAME + +/* Define if you have POSIX threads libraries and header files. */ +#undef HAVE_PTHREAD + +/* Define to 1 if you have the <pwd.h> header file. */ +#undef HAVE_PWD_H + +/* Define to 1 if you have the `sbrk' function. */ +#undef HAVE_SBRK + +/* Define to 1 if you have the <sched.h> header file. */ +#undef HAVE_SCHED_H + +/* Define to 1 if you have the <stdint.h> header file. */ +#undef HAVE_STDINT_H + +/* Define to 1 if you have the <stdlib.h> header file. */ +#undef HAVE_STDLIB_H + +/* Define to 1 if you have the <strings.h> header file. */ +#undef HAVE_STRINGS_H + +/* Define to 1 if you have the <string.h> header file. */ +#undef HAVE_STRING_H + +/* Define to 1 if the system has the type `struct mallinfo'. */ +#undef HAVE_STRUCT_MALLINFO + +/* Define to 1 if you have the <sys/prctl.h> header file. */ +#undef HAVE_SYS_PRCTL_H + +/* Define to 1 if you have the <sys/resource.h> header file. */ +#undef HAVE_SYS_RESOURCE_H + +/* Define to 1 if you have the <sys/socket.h> header file. */ +#undef HAVE_SYS_SOCKET_H + +/* Define to 1 if you have the <sys/stat.h> header file. */ +#undef HAVE_SYS_STAT_H + +/* Define to 1 if you have the <sys/syscall.h> header file. */ +#undef HAVE_SYS_SYSCALL_H + +/* Define to 1 if you have the <sys/types.h> header file. */ +#undef HAVE_SYS_TYPES_H + +/* Define to 1 if you have the <sys/ucontext.h> header file. */ +#undef HAVE_SYS_UCONTEXT_H + +/* Define to 1 if you have the <sys/wait.h> header file. */ +#undef HAVE_SYS_WAIT_H + +/* Define to 1 if compiler supports __thread */ +#undef HAVE_TLS + +/* Define to 1 if you have the <ucontext.h> header file. */ +#undef HAVE_UCONTEXT_H + +/* Define to 1 if you have the <unistd.h> header file. */ +#undef HAVE_UNISTD_H + +/* Define to 1 if you have the <unwind.h> header file. */ +#undef HAVE_UNWIND_H + +/* define if your compiler has __attribute__ */ +#undef HAVE___ATTRIBUTE__ + +/* Define to 1 if the system has the type `__int64'. */ +#undef HAVE___INT64 + +/* prefix where we look for installed files */ +#undef INSTALL_PREFIX + +/* Define to 1 if int32_t is equivalent to intptr_t */ +#undef INT32_EQUALS_INTPTR + +/* Define to 1 if your C compiler doesn't accept -c and -o together. */ +#undef NO_MINUS_C_MINUS_O + +/* Name of package */ +#undef PACKAGE + +/* Define to the address where bug reports for this package should be sent. */ +#undef PACKAGE_BUGREPORT + +/* Define to the full name of this package. */ +#undef PACKAGE_NAME + +/* Define to the full name and version of this package. */ +#undef PACKAGE_STRING + +/* Define to the one symbol short name of this package. */ +#undef PACKAGE_TARNAME + +/* Define to the home page for this package. */ +#undef PACKAGE_URL + +/* Define to the version of this package. */ +#undef PACKAGE_VERSION + +/* How to access the PC from a struct ucontext */ +#undef PC_FROM_UCONTEXT + +/* Always the empty-string on non-windows systems. On windows, should be + "__declspec(dllexport)". This way, when we compile the dll, we export our + functions/classes. It's safe to define this here because config.h is only + used internally, to compile the DLL, and every DLL source file #includes + "config.h" before anything else. */ +#undef PERFTOOLS_DLL_DECL + +/* printf format code for printing a size_t and ssize_t */ +#undef PRIdS + +/* printf format code for printing a size_t and ssize_t */ +#undef PRIuS + +/* printf format code for printing a size_t and ssize_t */ +#undef PRIxS + +/* Define to necessary symbol if this constant uses a non-standard name on + your system. */ +#undef PTHREAD_CREATE_JOINABLE + +/* Define to 1 if you have the ANSI C header files. */ +#undef STDC_HEADERS + +/* the namespace where STL code like vector<> is defined */ +#undef STL_NAMESPACE + +/* Version number of package */ +#undef VERSION + +/* C99 says: define this to get the PRI... macros from stdint.h */ +#ifndef __STDC_FORMAT_MACROS +# define __STDC_FORMAT_MACROS 1 +#endif + +/* Define to `__inline__' or `__inline' if that's what the C compiler + calls it, or to nothing if 'inline' is not supported under any name. */ +#ifndef __cplusplus +#undef inline +#endif + + +#ifdef __MINGW32__ +#include "windows/mingw.h" +#endif + diff --git a/third_party/tcmalloc/chromium/src/config_for_unittests.h b/third_party/tcmalloc/chromium/src/config_for_unittests.h new file mode 100644 index 0000000..b418dbf --- /dev/null +++ b/third_party/tcmalloc/chromium/src/config_for_unittests.h @@ -0,0 +1,64 @@ +// Copyright (c) 2007, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// All Rights Reserved. +// +// Author: Craig Silverstein +// +// This file is needed for windows -- unittests are not part of the +// perftools dll, but still want to include config.h just like the +// dll does, so they can use internal tools and APIs for testing. +// +// The problem is that config.h declares PERFTOOLS_DLL_DECL to be +// for exporting symbols, but the unittest needs to *import* symbols +// (since it's not the dll). +// +// The solution is to have this file, which is just like config.h but +// sets PERFTOOLS_DLL_DECL to do a dllimport instead of a dllexport. +// +// The reason we need this extra PERFTOOLS_DLL_DECL_FOR_UNITTESTS +// variable is in case people want to set PERFTOOLS_DLL_DECL explicitly +// to something other than __declspec(dllexport). In that case, they +// may want to use something other than __declspec(dllimport) for the +// unittest case. For that, we allow folks to define both +// PERFTOOLS_DLL_DECL and PERFTOOLS_DLL_DECL_FOR_UNITTESTS explicitly. +// +// NOTE: This file is equivalent to config.h on non-windows systems, +// which never defined PERFTOOLS_DLL_DECL_FOR_UNITTESTS and always +// define PERFTOOLS_DLL_DECL to the empty string. + +#include "config.h" + +#undef PERFTOOLS_DLL_DECL +#ifdef PERFTOOLS_DLL_DECL_FOR_UNITTESTS +# define PERFTOOLS_DLL_DECL PERFTOOLS_DLL_DECL_FOR_UNITTESTS +#else +# define PERFTOOLS_DLL_DECL // if DLL_DECL_FOR_UNITTESTS isn't defined, use "" +#endif diff --git a/third_party/tcmalloc/chromium/src/debugallocation.cc b/third_party/tcmalloc/chromium/src/debugallocation.cc new file mode 100644 index 0000000..47fef16 --- /dev/null +++ b/third_party/tcmalloc/chromium/src/debugallocation.cc @@ -0,0 +1,1391 @@ +// Copyright (c) 2000, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Urs Holzle <opensource@google.com> + +#include "config.h" +#ifdef HAVE_MALLOC_H +#include <malloc.h> +#endif +#include <pthread.h> +#include <stdio.h> +#ifdef HAVE_INTTYPES_H +#include <inttypes.h> +#endif +#include <stdarg.h> +#ifdef HAVE_MMAP +#include <sys/mman.h> +#endif +#include <sys/types.h> +#include <sys/stat.h> +#ifdef HAVE_FCNTL_H +#include <fcntl.h> +#endif +#ifdef HAVE_UNISTD_H +#include <unistd.h> +#endif +#include <errno.h> +#include <string.h> + +#include "base/commandlineflags.h" +#include "base/googleinit.h" +#include "base/logging.h" +#include "google/malloc_extension.h" +#include "google/malloc_hook.h" +#include "google/stacktrace.h" +#include "addressmap-inl.h" +#include "malloc_hook-inl.h" +#include "symbolize.h" + +#ifdef TCMALLOC_FOR_DEBUGALLOCATION +#include "tcmalloc.cc" +#else +#include "base/spinlock.h" +// Else we already have a SpinLock defined in tcmalloc/internal_spinlock.h +#endif + +// __THROW is defined in glibc systems. It means, counter-intuitively, +// "This function will never throw an exception." It's an optional +// optimization tool, but we may need to use it to match glibc prototypes. +#ifndef __THROW // I guess we're not on a glibc system +# define __THROW // __THROW is just an optimization, so ok to make it "" +#endif + +// On systems (like freebsd) that don't define MAP_ANONYMOUS, use the old +// form of the name instead. +#ifndef MAP_ANONYMOUS +# define MAP_ANONYMOUS MAP_ANON +#endif + +// ========================================================================= // + +DEFINE_bool(malloctrace, + EnvToBool("TCMALLOC_TRACE", false), + "Enables memory (de)allocation tracing to /tmp/google.alloc."); +#ifdef HAVE_MMAP +DEFINE_bool(malloc_page_fence, + EnvToBool("TCMALLOC_PAGE_FENCE", false), + "Enables putting of memory allocations at page boundaries " + "with a guard page following the allocation (to catch buffer " + "overruns right when they happen)."); +DEFINE_bool(malloc_page_fence_never_reclaim, + EnvToBool("TCMALLOC_PAGE_FRANCE_NEVER_RECLAIM", false), + "Enables making the virtual address space inaccessible " + "upon a deallocation instead of returning it and reusing later."); +#else +DEFINE_bool(malloc_page_fence, false, "Not usable (requires mmap)"); +DEFINE_bool(malloc_page_fence_never_reclaim, false, "Not usable (required mmap)"); +#endif +DEFINE_bool(malloc_reclaim_memory, + EnvToBool("TCMALLOC_RECLAIM_MEMORY", true), + "If set to false, we never return memory to malloc " + "when an object is deallocated. This ensures that all " + "heap object addresses are unique."); +DEFINE_int32(max_free_queue_size, + EnvToInt("TCMALLOC_MAX_FREE_QUEUE_SIZE", 10*1024*1024), + "If greater than 0, keep freed blocks in a queue instead of " + "releasing them to the allocator immediately. Release them when " + "the total size of all blocks in the queue would otherwise exceed " + "this limit."); + +DEFINE_bool(symbolize_stacktrace, + EnvToBool("TCMALLOC_SYMBOLIZE_STACKTRACE", true), + "Symbolize the stack trace when provided (on some error exits)"); + +// ========================================================================= // + +// A safe version of printf() that does not do any allocation and +// uses very little stack space. +static void TracePrintf(int fd, const char *fmt, ...) + __attribute__ ((__format__ (__printf__, 2, 3))); + +// +// GNU has some weird "weak aliasing" thing that permits us to define our +// own malloc(), free(), and realloc() which can use the normal versions of +// of themselves by calling __libc_malloc(), __libc_free(), and +// __libc_realloc(). +// +extern "C" { + extern void* __libc_malloc(size_t size); + extern void __libc_free(void* ptr); + extern void* __libc_realloc(void* ptr, size_t size); + extern void* __libc_calloc(size_t nmemb, size_t size); + extern int __libc_mallopt(int cmd, int value); +#ifdef HAVE_STRUCT_MALLINFO + extern struct mallinfo __libc_mallinfo(void); +#endif +} + +// Define the malloc/free/mallopt/mallinfo implementations +// we will be working on top of. +// TODO(csilvers): provide debugallocation on top of libc alloc, +// so this #ifdef might sometimes be false. +#ifdef TCMALLOC_FOR_DEBUGALLOCATION + +// The do_* functions are defined in tcmalloc.cc, +// which is included before this file +// when TCMALLOC_FOR_DEBUGALLOCATION is defined. +#define BASE_MALLOC do_malloc +#define BASE_FREE do_free +#define BASE_MALLOPT do_mallopt +#define BASE_MALLINFO do_mallinfo + +#else + +// We are working on top of standard libc's malloc library +#define BASE_MALLOC __libc_malloc +#define BASE_FREE __libc_free +#define BASE_MALLOPT __libc_mallopt +#define BASE_MALLINFO __libc_mallinfo + +#endif + +// ========================================================================= // + +class MallocBlock; + +// A circular buffer to hold freed blocks of memory. MallocBlock::Deallocate +// (below) pushes blocks into this queue instead of returning them to the +// underlying allocator immediately. See MallocBlock::Deallocate for more +// information. +// +// We can't use an STL class for this because we need to be careful not to +// perform any heap de-allocations in any of the code in this class, since the +// code in MallocBlock::Deallocate is not re-entrant. +template <typename QueueEntry> +class FreeQueue { + public: + FreeQueue() : q_front_(0), q_back_(0) {} + + bool Full() { + return (q_front_ + 1) % kFreeQueueSize == q_back_; + } + + void Push(QueueEntry block) { + q_[q_front_] = block; + q_front_ = (q_front_ + 1) % kFreeQueueSize; + } + + QueueEntry Pop() { + const QueueEntry& ret = q_[q_back_]; + q_back_ = (q_back_ + 1) % kFreeQueueSize; + return ret; + } + + size_t size() const { + return (q_front_ - q_back_ + kFreeQueueSize) % kFreeQueueSize; + } + + private: + // Maximum number of blocks kept in the free queue before being freed. + static const int kFreeQueueSize = 1024; + + QueueEntry q_[kFreeQueueSize]; + int q_front_; + int q_back_; +}; + +struct MallocBlockQueueEntry { + MallocBlockQueueEntry() : block(NULL), size(0), + num_deleter_pcs(0), deleter_threadid(0) {} + MallocBlockQueueEntry(MallocBlock* b, size_t s) : block(b), size(s) { + if (FLAGS_max_free_queue_size != 0) { + // Adjust the number of frames to skip (4) if you change the + // location of this call. + num_deleter_pcs = + GetStackTrace(deleter_pcs, + sizeof(deleter_pcs) / sizeof(deleter_pcs[0]), + 4); + deleter_threadid = pthread_self(); + } else { + num_deleter_pcs = 0; + // Zero is an illegal pthread id by my reading of the pthread + // implementation: + deleter_threadid = 0; + } + } + + MallocBlock* block; + size_t size; + + // When deleted and put in the free queue, we (flag-controlled) + // record the stack so that if corruption is later found, we can + // print the deleter's stack. (These three vars add 144 bytes of + // overhead under the LP64 data model.) + void* deleter_pcs[16]; + int num_deleter_pcs; + pthread_t deleter_threadid; +}; + +class MallocBlock { + public: // allocation type constants + + // Different allocation types we distinguish. + // Note: The lower 4 bits are not random: we index kAllocName array + // by these values masked with kAllocTypeMask; + // the rest are "random" magic bits to help catch memory corruption. + static const int kMallocType = 0xEFCDAB90; + static const int kNewType = 0xFEBADC81; + static const int kArrayNewType = 0xBCEADF72; + + private: // constants + + // A mask used on alloc types above to get to 0, 1, 2 + static const int kAllocTypeMask = 0x3; + // An additional bit to set in AllocType constants + // to mark now deallocated regions. + static const int kDeallocatedTypeBit = 0x4; + + // For better memory debugging, we initialize all storage to known + // values, and overwrite the storage when it's deallocated: + // Byte that fills uninitialized storage. + static const int kMagicUninitializedByte = 0xAB; + // Byte that fills deallocated storage. + // NOTE: tcmalloc.cc depends on the value of kMagicDeletedByte + // to work around a bug in the pthread library. + static const int kMagicDeletedByte = 0xCD; + // An int (type of alloc_type_ below) in a deallocated storage + // filled with kMagicDeletedByte. + static const int kMagicDeletedInt = 0xCDCDCDCD | ((0xCDCDCDCD << 16) << 16); + // Initializer works for 32 and 64 bit ints; + // "<< 16 << 16" is to fool gcc from issuing a warning + // when ints are 32 bits. + + // NOTE: on Linux, you can enable malloc debugging support in libc by + // setting the environment variable MALLOC_CHECK_ to 1 before you + // start the program (see man malloc). + + // We use either BASE_MALLOC or mmap to make the actual allocation. In + // order to remember which one of the two was used for any block, we store an + // appropriate magic word next to the block. + static const int kMagicMalloc = 0xDEADBEEF; + static const int kMagicMMap = 0xABCDEFAB; + + // This array will be filled with 0xCD, for use with memcmp. + static unsigned char kMagicDeletedBuffer[1024]; + static bool deleted_buffer_initialized_; + + private: // data layout + + // The four fields size1_,offset_,magic1_,alloc_type_ + // should together occupy a multiple of 8 bytes. + size_t size1_; + size_t offset_; // normally 0 unless memaligned memory + // see comments in memalign() and FromRawPointer(). + int magic1_; + int alloc_type_; + // here comes the actual data (variable length) + // ... + // then come the size2_ and magic2_, or a full page of mprotect-ed memory + // if the malloc_page_fence feature is enabled. + size_t size2_; + int magic2_; + + private: // static data and helpers + + // Allocation map: stores the allocation type for each allocated object, + // or the type or'ed with kDeallocatedTypeBit + // for each formerly allocated object. + typedef AddressMap<int> AllocMap; + static AllocMap* alloc_map_; + // This protects alloc_map_ and consistent state of metadata + // for each still-allocated object in it. + // We use spin locks instead of pthread_mutex_t locks + // to prevent crashes via calls to pthread_mutex_(un)lock + // for the (de)allocations coming from pthreads initialization itself. + static SpinLock alloc_map_lock_; + + // A queue of freed blocks. Instead of releasing blocks to the allocator + // immediately, we put them in a queue, freeing them only when necessary + // to keep the total size of all the freed blocks below the limit set by + // FLAGS_max_free_queue_size. + static FreeQueue<MallocBlockQueueEntry>* free_queue_; + + static size_t free_queue_size_; // total size of blocks in free_queue_ + // protects free_queue_ and free_queue_size_ + static SpinLock free_queue_lock_; + + // Names of allocation types (kMallocType, kNewType, kArrayNewType) + static const char* const kAllocName[]; + // Names of corresponding deallocation types + static const char* const kDeallocName[]; + + static const char* AllocName(int type) { + return kAllocName[type & kAllocTypeMask]; + } + + static const char* DeallocName(int type) { + return kDeallocName[type & kAllocTypeMask]; + } + + private: // helper accessors + + bool IsMMapped() const { return kMagicMMap == magic1_; } + + bool IsValidMagicValue(int value) const { + return kMagicMMap == value || kMagicMalloc == value; + } + + static size_t real_malloced_size(size_t size) { + return size + sizeof(MallocBlock); + } + static size_t real_mmapped_size(size_t size) { + return size + MallocBlock::data_offset(); + } + + size_t real_size() { + return IsMMapped() ? real_mmapped_size(size1_) : real_malloced_size(size1_); + } + + // NOTE: if the block is mmapped (that is, we're using the + // malloc_page_fence option) then there's no size2 or magic2 + // (instead, the guard page begins where size2 would be). + + size_t* size2_addr() { return (size_t*)((char*)&size2_ + size1_); } + const size_t* size2_addr() const { + return (const size_t*)((char*)&size2_ + size1_); + } + + int* magic2_addr() { return (int*)(size2_addr() + 1); } + const int* magic2_addr() const { return (const int*)(size2_addr() + 1); } + + private: // other helpers + + void Initialize(size_t size, int type) { + RAW_CHECK(IsValidMagicValue(magic1_), ""); + // record us as allocated in the map + alloc_map_lock_.Lock(); + if (!alloc_map_) { + void* p = BASE_MALLOC(sizeof(AllocMap)); + alloc_map_ = new(p) AllocMap(BASE_MALLOC, BASE_FREE); + } + alloc_map_->Insert(data_addr(), type); + // initialize us + size1_ = size; + offset_ = 0; + alloc_type_ = type; + if (!IsMMapped()) { + *magic2_addr() = magic1_; + *size2_addr() = size; + } + alloc_map_lock_.Unlock(); + memset(data_addr(), kMagicUninitializedByte, size); + if (!IsMMapped()) { + RAW_CHECK(size1_ == *size2_addr(), "should hold"); + RAW_CHECK(magic1_ == *magic2_addr(), "should hold"); + } + } + + size_t CheckAndClear(int type) { + alloc_map_lock_.Lock(); + CheckLocked(type); + if (!IsMMapped()) { + RAW_CHECK(size1_ == *size2_addr(), "should hold"); + } + // record us as deallocated in the map + alloc_map_->Insert(data_addr(), type | kDeallocatedTypeBit); + alloc_map_lock_.Unlock(); + // clear us + const size_t size = real_size(); + memset(this, kMagicDeletedByte, size); + return size; + } + + void CheckLocked(int type) const { + int map_type = 0; + const int* found_type = + alloc_map_ != NULL ? alloc_map_->Find(data_addr()) : NULL; + if (found_type == NULL) { + RAW_LOG(FATAL, "memory allocation bug: object at %p " + "has never been allocated", data_addr()); + } else { + map_type = *found_type; + } + if ((map_type & kDeallocatedTypeBit) != 0) { + RAW_LOG(FATAL, "memory allocation bug: object at %p " + "has been already deallocated (it was allocated with %s)", + data_addr(), AllocName(map_type & ~kDeallocatedTypeBit)); + } + if (alloc_type_ == kMagicDeletedInt) { + RAW_LOG(FATAL, "memory stomping bug: a word before object at %p " + "has been corrupted; or else the object has been already " + "deallocated and our memory map has been corrupted", + data_addr()); + } + if (!IsValidMagicValue(magic1_)) { + RAW_LOG(FATAL, "memory stomping bug: a word before object at %p " + "has been corrupted; " + "or else our memory map has been corrupted and this is a " + "deallocation for not (currently) heap-allocated object", + data_addr()); + } + if (!IsMMapped()) { + if (size1_ != *size2_addr()) { + RAW_LOG(FATAL, "memory stomping bug: a word after object at %p " + "has been corrupted", data_addr()); + } + if (!IsValidMagicValue(*magic2_addr())) { + RAW_LOG(FATAL, "memory stomping bug: a word after object at %p " + "has been corrupted", data_addr()); + } + } + if (alloc_type_ != type) { + if ((alloc_type_ != MallocBlock::kMallocType) && + (alloc_type_ != MallocBlock::kNewType) && + (alloc_type_ != MallocBlock::kArrayNewType)) { + RAW_LOG(FATAL, "memory stomping bug: a word before object at %p " + "has been corrupted", data_addr()); + } + RAW_LOG(FATAL, "memory allocation/deallocation mismatch at %p: " + "allocated with %s being deallocated with %s", + data_addr(), AllocName(alloc_type_), DeallocName(type)); + } + if (alloc_type_ != map_type) { + RAW_LOG(FATAL, "memory stomping bug: our memory map has been corrupted : " + "allocation at %p made with %s " + "is recorded in the map to be made with %s", + data_addr(), AllocName(alloc_type_), AllocName(map_type)); + } + } + + public: // public accessors + + void* data_addr() { return (void*)&size2_; } + const void* data_addr() const { return (const void*)&size2_; } + + static size_t data_offset() { return OFFSETOF_MEMBER(MallocBlock, size2_); } + + size_t data_size() const { return size1_; } + + void set_offset(int offset) { this->offset_ = offset; } + + public: // our main interface + + static MallocBlock* Allocate(size_t size, int type) { + // Prevent an integer overflow / crash with large allocation sizes. + // TODO - Note that for a e.g. 64-bit size_t, max_size_t may not actually + // be the maximum value, depending on how the compiler treats ~0. The worst + // practical effect is that allocations are limited to 4Gb or so, even if + // the address space could take more. + static size_t max_size_t = ~0; + if (size < 0 || size > max_size_t - sizeof(MallocBlock)) { + RAW_LOG(ERROR, "Massive size passed to malloc: %"PRIuS"", size); + return NULL; + } + MallocBlock* b = NULL; + const bool use_malloc_page_fence = FLAGS_malloc_page_fence; +#ifdef HAVE_MMAP + if (use_malloc_page_fence) { + // Put the block towards the end of the page and make the next page + // inaccessible. This will catch buffer overrun right when it happens. + size_t sz = real_mmapped_size(size); + int pagesize = getpagesize(); + int num_pages = (sz + pagesize - 1) / pagesize + 1; + char* p = (char*) mmap(NULL, num_pages * pagesize, PROT_READ|PROT_WRITE, + MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); + if (p == MAP_FAILED) { + // If the allocation fails, abort rather than returning NULL to + // malloc. This is because in most cases, the program will run out + // of memory in this mode due to tremendous amount of wastage. There + // is no point in propagating the error elsewhere. + RAW_LOG(FATAL, "Out of memory: possibly due to page fence overhead: %s", + strerror(errno)); + } + // Mark the page after the block inaccessible + if (mprotect(p + (num_pages - 1) * pagesize, pagesize, PROT_NONE)) { + RAW_LOG(FATAL, "Guard page setup failed: %s", strerror(errno)); + } + b = (MallocBlock*) (p + (num_pages - 1) * pagesize - sz); + } else { + b = (MallocBlock*) BASE_MALLOC(real_malloced_size(size)); + } +#else + b = (MallocBlock*) BASE_MALLOC(real_malloced_size(size)); +#endif + + // It would be nice to output a diagnostic on allocation failure + // here, but logging (other than FATAL) requires allocating + // memory, which could trigger a nasty recursion. Instead, preserve + // malloc semantics and return NULL on failure. + if (b != NULL) { + b->magic1_ = use_malloc_page_fence ? kMagicMMap : kMagicMalloc; + b->Initialize(size, type); + } + return b; + } + + void Deallocate(int type) { + if (IsMMapped()) { // have to do this before CheckAndClear +#ifdef HAVE_MMAP + int size = CheckAndClear(type); + int pagesize = getpagesize(); + int num_pages = (size + pagesize - 1) / pagesize + 1; + char* p = (char*) this; + if (FLAGS_malloc_page_fence_never_reclaim || + !FLAGS_malloc_reclaim_memory) { + mprotect(p - (num_pages - 1) * pagesize + size, + num_pages * pagesize, PROT_NONE); + } else { + munmap(p - (num_pages - 1) * pagesize + size, num_pages * pagesize); + } +#endif + } else { + const size_t size = CheckAndClear(type); + if (FLAGS_malloc_reclaim_memory) { + // Instead of freeing the block immediately, push it onto a queue of + // recently freed blocks. Free only enough blocks to keep from + // exceeding the capacity of the queue or causing the total amount of + // un-released memory in the queue from exceeding + // FLAGS_max_free_queue_size. + ProcessFreeQueue(this, size, FLAGS_max_free_queue_size); + } + } + } + + static size_t FreeQueueSize() { + SpinLockHolder l(&free_queue_lock_); + return free_queue_size_; + } + + static void ProcessFreeQueue(MallocBlock* b, size_t size, + int max_free_queue_size) { + SpinLockHolder l(&free_queue_lock_); + if (free_queue_ == NULL) + free_queue_ = new FreeQueue<MallocBlockQueueEntry>; + RAW_CHECK(!free_queue_->Full(), "Free queue mustn't be full!"); + + if (b != NULL) { + free_queue_size_ += size + sizeof(MallocBlockQueueEntry); + MallocBlockQueueEntry new_entry(b, size); + free_queue_->Push(new_entry); + } + + // Free blocks until the total size of unfreed blocks no longer exceeds + // max_free_queue_size, and the free queue has at least one free + // space in it. + while (free_queue_size_ > max_free_queue_size || free_queue_->Full()) { + MallocBlockQueueEntry cur = free_queue_->Pop(); + CheckForDanglingWrites(cur); + free_queue_size_ -= cur.size + sizeof(MallocBlockQueueEntry); + BASE_FREE(cur.block); + } + RAW_CHECK(free_queue_size_ >= 0, "Free queue size went negative!"); + } + + static void CheckForDanglingWrites(const MallocBlockQueueEntry& queue_entry) { + // Initialize the buffer if necessary. + if (!deleted_buffer_initialized_) { + // This is threadsafe. We hold free_queue_lock_. + memset(kMagicDeletedBuffer, 0xcd, sizeof(kMagicDeletedBuffer)); + deleted_buffer_initialized_ = true; + } + + const unsigned char* p = + reinterpret_cast<unsigned char*>(queue_entry.block); + + static const size_t size_of_buffer = sizeof(kMagicDeletedBuffer); + const size_t size = queue_entry.size; + const size_t buffers = size / size_of_buffer; + const size_t remainder = size % size_of_buffer; + size_t buffer_idx; + for (buffer_idx = 0; buffer_idx < buffers; ++buffer_idx) { + CheckForCorruptedBuffer(queue_entry, buffer_idx, p, size_of_buffer); + p += size_of_buffer; + } + CheckForCorruptedBuffer(queue_entry, buffer_idx, p, remainder); + } + + static void CheckForCorruptedBuffer(const MallocBlockQueueEntry& queue_entry, + size_t buffer_idx, + const unsigned char* buffer, + size_t size_of_buffer) { + if (memcmp(buffer, kMagicDeletedBuffer, size_of_buffer) == 0) { + return; + } + + RAW_LOG(ERROR, + "Found a corrupted memory buffer in MallocBlock (may be offset " + "from user ptr): buffer index: %zd, buffer ptr: %p, size of " + "buffer: %zd", buffer_idx, buffer, size_of_buffer); + + // The magic deleted buffer should only be 1024 bytes, but in case + // this changes, let's put an upper limit on the number of debug + // lines we'll output: + if (size_of_buffer <= 1024) { + for (int i = 0; i < size_of_buffer; ++i) { + if (buffer[i] != 0xcd) { + RAW_LOG(ERROR, "Buffer byte %d is 0x%02x (should be 0xcd).", + i, buffer[i]); + } + } + } else { + RAW_LOG(ERROR, "Buffer too large to print corruption."); + } + + const MallocBlock* b = queue_entry.block; + const size_t size = queue_entry.size; + if (queue_entry.num_deleter_pcs > 0) { + TracePrintf(STDERR_FILENO, "Deleted by thread %p\n", + reinterpret_cast<void*>( + PRINTABLE_PTHREAD(queue_entry.deleter_threadid))); + + SymbolMap symbolization_table; + const int num_symbols = queue_entry.num_deleter_pcs; // short alias name + for (int i = 0; i < num_symbols; i++) { + // Symbolizes the previous address of pc because pc may be in the + // next function. This may happen when the function ends with + // a call to a function annotated noreturn (e.g. CHECK). + uintptr_t pc = + reinterpret_cast<uintptr_t>(queue_entry.deleter_pcs[i]) - 1; + symbolization_table[pc] = ""; + } + int sym_buffer_len = kSymbolSize * num_symbols; + char *sym_buffer = new char[sym_buffer_len]; + if (FLAGS_symbolize_stacktrace) + Symbolize(sym_buffer, sym_buffer_len, &symbolization_table); + for (int i = 0; i < num_symbols; i++) { + uintptr_t pc = + reinterpret_cast<uintptr_t>(queue_entry.deleter_pcs[i]) - 1; + TracePrintf(STDERR_FILENO, " @ %p %s\n", + reinterpret_cast<void*>(pc), symbolization_table[pc]); + } + } else { + RAW_LOG(ERROR, + "Skipping the printing of the deleter's stack! Its stack was " + "not found; either the corruption occurred too early in " + "execution to obtain a stack trace or --max_free_queue_size was " + "set to 0."); + } + + RAW_LOG(FATAL, + "Memory was written to after being freed. MallocBlock: %p, user " + "ptr: %p, size: %zd. If you can't find the source of the error, " + "try using valgrind or purify, or study the output of the " + "deleter's stack printed above.", b, b->data_addr(), size); + } + + static MallocBlock* FromRawPointer(void* p) { + const size_t data_offset = MallocBlock::data_offset(); + // Find the header just before client's memory. + MallocBlock *mb = reinterpret_cast<MallocBlock *>( + reinterpret_cast<char *>(p) - data_offset); + // If mb->offset_ is zero (common case), mb is the real header. If + // mb->offset_ is non-zero, this block was allocated by memalign, and + // mb->offset_ is the distance backwards to the real header from mb, + // which is a fake header. The following subtraction works for both zero + // and non-zero values. + return reinterpret_cast<MallocBlock *>( + reinterpret_cast<char *>(mb) - mb->offset_); + } + static const MallocBlock* FromRawPointer(const void* p) { + // const-safe version: we just cast about + return FromRawPointer(const_cast<void*>(p)); + } + + void Check(int type) { + alloc_map_lock_.Lock(); + CheckLocked(type); + alloc_map_lock_.Unlock(); + } + + static bool CheckEverything() { + alloc_map_lock_.Lock(); + if (alloc_map_ != NULL) alloc_map_->Iterate(CheckCallback, 0); + alloc_map_lock_.Unlock(); + return true; // if we get here, we're okay + } + + static bool MemoryStats(int* blocks, size_t* total, + int histogram[kMallocHistogramSize]) { + memset(histogram, 0, kMallocHistogramSize * sizeof(int)); + alloc_map_lock_.Lock(); + stats_blocks_ = 0; + stats_total_ = 0; + stats_histogram_ = histogram; + if (alloc_map_ != NULL) alloc_map_->Iterate(StatsCallback, 0); + *blocks = stats_blocks_; + *total = stats_total_; + alloc_map_lock_.Unlock(); + return true; + } + + private: // helpers for CheckEverything and MemoryStats + + static void CheckCallback(const void* ptr, int* type, int dummy) { + if ((*type & kDeallocatedTypeBit) == 0) { + FromRawPointer(ptr)->CheckLocked(*type); + } + } + + // Accumulation variables for StatsCallback protected by alloc_map_lock_ + static int stats_blocks_; + static size_t stats_total_; + static int* stats_histogram_; + + static void StatsCallback(const void* ptr, int* type, int dummy) { + if ((*type & kDeallocatedTypeBit) == 0) { + const MallocBlock* b = FromRawPointer(ptr); + b->CheckLocked(*type); + ++stats_blocks_; + size_t mysize = b->size1_; + int entry = 0; + stats_total_ += mysize; + while (mysize) { + ++entry; + mysize >>= 1; + } + RAW_CHECK(entry < kMallocHistogramSize, + "kMallocHistogramSize should be at least as large as log2 " + "of the maximum process memory size"); + stats_histogram_[entry] += 1; + } + } +}; + +void DanglingWriteChecker() { + // Clear out the remaining free queue to check for dangling writes. + MallocBlock::ProcessFreeQueue(NULL, 0, 0); +} + +// ========================================================================= // + +const int MallocBlock::kMagicMalloc; +const int MallocBlock::kMagicMMap; + +MallocBlock::AllocMap* MallocBlock::alloc_map_ = NULL; +SpinLock MallocBlock::alloc_map_lock_(SpinLock::LINKER_INITIALIZED); + +FreeQueue<MallocBlockQueueEntry>* MallocBlock::free_queue_ = NULL; +size_t MallocBlock::free_queue_size_ = 0; +SpinLock MallocBlock::free_queue_lock_(SpinLock::LINKER_INITIALIZED); + +unsigned char MallocBlock::kMagicDeletedBuffer[1024]; +bool MallocBlock::deleted_buffer_initialized_ = false; + +const char* const MallocBlock::kAllocName[] = { + "malloc", + "new", + "new []", + NULL, +}; + +const char* const MallocBlock::kDeallocName[] = { + "free", + "delete", + "delete []", + NULL, +}; + +int MallocBlock::stats_blocks_; +size_t MallocBlock::stats_total_; +int* MallocBlock::stats_histogram_; + +// ========================================================================= // + +// The following cut-down version of printf() avoids +// using stdio or ostreams. +// This is to guarantee no recursive calls into +// the allocator and to bound the stack space consumed. (The pthread +// manager thread in linuxthreads has a very small stack, +// so fprintf can't be called.) +static void TracePrintf(int fd, const char *fmt, ...) { + char buf[64]; + int i = 0; + va_list ap; + va_start(ap, fmt); + const char *p = fmt; + char numbuf[25]; + numbuf[sizeof(numbuf)-1] = 0; + while (*p != '\0') { // until end of format string + char *s = &numbuf[sizeof(numbuf)-1]; + if (p[0] == '%' && p[1] != 0) { // handle % formats + int64 l = 0; + unsigned long base = 0; + if (*++p == 's') { // %s + s = va_arg(ap, char *); + } else if (*p == 'l' && p[1] == 'd') { // %ld + l = va_arg(ap, long); + base = 10; + p++; + } else if (*p == 'l' && p[1] == 'u') { // %lu + l = va_arg(ap, unsigned long); + base = 10; + p++; + } else if (*p == 'z' && p[1] == 'u') { // %zu + l = va_arg(ap, size_t); + base = 10; + p++; + } else if (*p == 'u') { // %u + l = va_arg(ap, unsigned int); + base = 10; + } else if (*p == 'd') { // %d + l = va_arg(ap, int); + base = 10; + } else if (*p == 'p') { // %p + l = va_arg(ap, intptr_t); + base = 16; + } else { + write(STDERR_FILENO, "Unimplemented TracePrintf format\n", 33); + write(STDERR_FILENO, p, 2); + write(STDERR_FILENO, "\n", 1); + abort(); + } + p++; + if (base != 0) { + bool minus = (l < 0 && base == 10); + uint64 ul = minus? -l : l; + do { + *--s = "0123456789abcdef"[ul % base]; + ul /= base; + } while (ul != 0); + if (base == 16) { + *--s = 'x'; + *--s = '0'; + } else if (minus) { + *--s = '-'; + } + } + } else { // handle normal characters + *--s = *p++; + } + while (*s != 0) { + if (i == sizeof(buf)) { + write(fd, buf, i); + i = 0; + } + buf[i++] = *s++; + } + } + if (i != 0) { + write(fd, buf, i); + } + va_end(ap); +} + +// Return the file descriptor we're writing a log to +static int TraceFd() { + static int trace_fd = -1; + if (trace_fd == -1) { // Open the trace file on the first call + trace_fd = open("/tmp/google.alloc", O_CREAT|O_TRUNC|O_WRONLY, 0666); + if (trace_fd == -1) { + trace_fd = 2; + TracePrintf(trace_fd, + "Can't open /tmp/google.alloc. Logging to stderr.\n"); + } + // Add a header to the log. + TracePrintf(trace_fd, "Trace started: %lu\n", + static_cast<unsigned long>(time(NULL))); + TracePrintf(trace_fd, + "func\tsize\tptr\tthread_id\tstack pcs for tools/symbolize\n"); + } + return trace_fd; +} + +// Print the hex stack dump on a single line. PCs are separated by tabs. +static void TraceStack(void) { + void *pcs[16]; + int n = GetStackTrace(pcs, sizeof(pcs)/sizeof(pcs[0]), 0); + for (int i = 0; i != n; i++) { + TracePrintf(TraceFd(), "\t%p", pcs[i]); + } +} + +// This protects MALLOC_TRACE, to make sure its info is atomically written. +static SpinLock malloc_trace_lock(SpinLock::LINKER_INITIALIZED); + +#define MALLOC_TRACE(name, size, addr) \ + do { \ + if (FLAGS_malloctrace) { \ + SpinLockHolder l(&malloc_trace_lock); \ + TracePrintf(TraceFd(), "%s\t%"PRIuS"\t%p\t%"GPRIuPTHREAD, \ + name, size, addr, PRINTABLE_PTHREAD(pthread_self())); \ + TraceStack(); \ + TracePrintf(TraceFd(), "\n"); \ + } \ + } while (0) + +// ========================================================================= // + +// Write the characters buf[0, ..., size-1] to +// the malloc trace buffer. +// This function is intended for debugging, +// and is not declared in any header file. +// You must insert a declaration of it by hand when you need +// to use it. +void __malloctrace_write(const char *buf, size_t size) { + if (FLAGS_malloctrace) { + write(TraceFd(), buf, size); + } +} + +// ========================================================================= // + +// General debug allocation/deallocation + +static inline void* DebugAllocate(size_t size, int type) { + MallocBlock* ptr = MallocBlock::Allocate(size, type); + if (ptr == NULL) return NULL; + MALLOC_TRACE("malloc", size, ptr->data_addr()); + return ptr->data_addr(); +} + +static inline void DebugDeallocate(void* ptr, int type) { + MALLOC_TRACE("free", + (ptr != 0 ? MallocBlock::FromRawPointer(ptr)->data_size() : 0), + ptr); + if (ptr) MallocBlock::FromRawPointer(ptr)->Deallocate(type); +} + +// ========================================================================= // + +// Alloc/free stuff for debug hooks for malloc & friends + +// CAVEAT: The code structure below ensures that MallocHook methods are always +// called from the stack frame of the invoked allocation function. +// heap-checker.cc depends on this to start a stack trace from +// the call to the (de)allocation function. + +// Put all callers of MallocHook::Invoke* in this module into +// ATTRIBUTE_SECTION(google_malloc) section, +// so that MallocHook::GetCallerStackTrace can function accurately: + +extern "C" { + void* malloc(size_t size) __THROW ATTRIBUTE_SECTION(google_malloc); + void free(void* ptr) __THROW ATTRIBUTE_SECTION(google_malloc); + void* realloc(void* ptr, size_t size) __THROW + ATTRIBUTE_SECTION(google_malloc); + void* calloc(size_t nmemb, size_t size) __THROW + ATTRIBUTE_SECTION(google_malloc); + void cfree(void* ptr) __THROW ATTRIBUTE_SECTION(google_malloc); + + void* memalign(size_t __alignment, size_t __size) __THROW + ATTRIBUTE_SECTION(google_malloc); + int posix_memalign(void** ptr, size_t align, size_t size) __THROW + ATTRIBUTE_SECTION(google_malloc); + void* valloc(size_t __size) __THROW + ATTRIBUTE_SECTION(google_malloc); + void* pvalloc(size_t __size) __THROW + ATTRIBUTE_SECTION(google_malloc); +} + +static void *MemalignOverride(size_t align, size_t size, + const void *caller) __THROW + ATTRIBUTE_SECTION(google_malloc); + +void* operator new(size_t size) + ATTRIBUTE_SECTION(google_malloc); +void* operator new(size_t size, const std::nothrow_t&) __THROW + ATTRIBUTE_SECTION(google_malloc); +void operator delete(void* p) __THROW + ATTRIBUTE_SECTION(google_malloc); +void operator delete(void* p, const std::nothrow_t&) __THROW + ATTRIBUTE_SECTION(google_malloc); +void* operator new[](size_t size) + ATTRIBUTE_SECTION(google_malloc); +void* operator new[](size_t size, const std::nothrow_t&) __THROW + ATTRIBUTE_SECTION(google_malloc); +void operator delete[](void* p) __THROW + ATTRIBUTE_SECTION(google_malloc); +void operator delete[](void* p, const std::nothrow_t&) __THROW + ATTRIBUTE_SECTION(google_malloc); + +extern "C" void* malloc(size_t size) __THROW { + void* ptr = DebugAllocate(size, MallocBlock::kMallocType); + MallocHook::InvokeNewHook(ptr, size); + return ptr; +} + +extern "C" void free(void* ptr) __THROW { + MallocHook::InvokeDeleteHook(ptr); + DebugDeallocate(ptr, MallocBlock::kMallocType); +} + +extern "C" void* realloc(void* ptr, size_t size) __THROW { + if (ptr == NULL) { + ptr = DebugAllocate(size, MallocBlock::kMallocType); + MallocHook::InvokeNewHook(ptr, size); + return ptr; + } + if (size == 0) { + MallocHook::InvokeDeleteHook(ptr); + DebugDeallocate(ptr, MallocBlock::kMallocType); + return NULL; + } + MallocBlock* old = MallocBlock::FromRawPointer(ptr); + old->Check(MallocBlock::kMallocType); + MallocBlock* p = MallocBlock::Allocate(size, MallocBlock::kMallocType); + + // If realloc fails we are to leave the old block untouched and + // return null + if (p == NULL) return NULL; + + memcpy(p->data_addr(), old->data_addr(), + (old->data_size() < size) ? old->data_size() : size); + MallocHook::InvokeDeleteHook(ptr); + MallocHook::InvokeNewHook(p->data_addr(), size); + DebugDeallocate(ptr, MallocBlock::kMallocType); + MALLOC_TRACE("realloc", p->data_size(), p->data_addr()); + return p->data_addr(); +} + +extern "C" void* calloc(size_t count, size_t size) __THROW { + // Overflow check + const size_t total_size = count * size; + if (size != 0 && total_size / size != count) return NULL; + + void* block = DebugAllocate(total_size, MallocBlock::kMallocType); + MallocHook::InvokeNewHook(block, total_size); + if (block) memset(block, 0, total_size); + return block; +} + +extern "C" void cfree(void* ptr) __THROW { + MallocHook::InvokeDeleteHook(ptr); + DebugDeallocate(ptr, MallocBlock::kMallocType); +} + +// Round "value" up to next "alignment" boundary. +// Requires that "alignment" be a power of two. +static intptr_t RoundUp(intptr_t value, intptr_t alignment) { + return (value + alignment - 1) & ~(alignment - 1); +} + +static void *do_debug_memalign(size_t alignment, size_t size) { + // Allocate >= size bytes aligned on "alignment" boundary + // "alignment" is a power of two. + void *p = 0; + RAW_CHECK((alignment & (alignment-1)) == 0, "must be power of two"); + const size_t data_offset = MallocBlock::data_offset(); + // Allocate "alignment-1" extra bytes to ensure alignment is possible, and + // a further data_offset bytes for an additional fake header. + size_t extra_bytes = data_offset + alignment - 1; + if (size + extra_bytes < size) return NULL; // Overflow + p = DebugAllocate(size + extra_bytes, MallocBlock::kMallocType); + if (p != 0) { + intptr_t orig_p = reinterpret_cast<intptr_t>(p); + // Leave data_offset bytes for fake header, and round up to meet + // alignment. + p = reinterpret_cast<void *>(RoundUp(orig_p + data_offset, alignment)); + // Create a fake header block with an offset_ that points back to the + // real header. FromRawPointer uses this value. + MallocBlock *fake_hdr = reinterpret_cast<MallocBlock *>( + reinterpret_cast<char *>(p) - data_offset); + // offset_ is distance between real and fake headers. + // p is now end of fake header (beginning of client area), + // and orig_p is the end of the real header, so offset_ + // is their difference. + fake_hdr->set_offset(reinterpret_cast<intptr_t>(p) - orig_p); + } + return p; +} + +// Override __libc_memalign in libc on linux boxes. +// They have a bug in libc that causes them (very rarely) to allocate +// with __libc_memalign() yet deallocate with free(). +// This function is an exception to the rule of calling MallocHook method +// from the stack frame of the allocation function; +// heap-checker handles this special case explicitly. +static void *MemalignOverride(size_t align, size_t size, + const void *caller) __THROW { + void *p = do_debug_memalign(align, size); + MallocHook::InvokeNewHook(p, size); + return p; +} +void *(*__memalign_hook)(size_t, size_t, const void *) = MemalignOverride; + +extern "C" void* memalign(size_t align, size_t size) __THROW { + void *p = do_debug_memalign(align, size); + MallocHook::InvokeNewHook(p, size); + return p; +} + +// Implementation taken from tcmalloc/tcmalloc.cc +extern "C" int posix_memalign(void** result_ptr, + size_t align, size_t size) __THROW { + if (((align % sizeof(void*)) != 0) || + ((align & (align - 1)) != 0) || + (align == 0)) { + return EINVAL; + } + + void* result = do_debug_memalign(align, size); + MallocHook::InvokeNewHook(result, size); + if (result == NULL) { + return ENOMEM; + } else { + *result_ptr = result; + return 0; + } +} + +extern "C" void* valloc(size_t size) __THROW { + // Allocate >= size bytes starting on a page boundary + void *p = do_debug_memalign(getpagesize(), size); + MallocHook::InvokeNewHook(p, size); + return p; +} + +extern "C" void* pvalloc(size_t size) __THROW { + // Round size up to a multiple of pages + // then allocate memory on a page boundary + int pagesize = getpagesize(); + size = RoundUp(size, pagesize); + if (size == 0) { // pvalloc(0) should allocate one page, according to + size = pagesize; // http://man.free4web.biz/man3/libmpatrol.3.html + } + void *p = do_debug_memalign(pagesize, size); + MallocHook::InvokeNewHook(p, size); + return p; +} + +extern "C" int mallopt(int cmd, int value) { + return BASE_MALLOPT(cmd, value); +} + +#ifdef HAVE_STRUCT_MALLINFO +extern "C" struct mallinfo mallinfo(void) { + return BASE_MALLINFO(); +} +#endif + +// ========================================================================= // + +// Alloc/free stuff for debug operator new & friends + +// This is mostly the same a cpp_alloc in tcmalloc.cc. +inline void* cpp_debug_alloc(size_t size, int new_type, bool nothrow) { + for (;;) { + void* p = DebugAllocate(size, new_type); +#ifdef PREANSINEW + return p; +#else + if (p == NULL) { // allocation failed + // Get the current new handler. NB: this function is not + // thread-safe. We make a feeble stab at making it so here, but + // this lock only protects against tcmalloc interfering with + // itself, not with other libraries calling set_new_handler. + std::new_handler nh; + { + SpinLockHolder h(&set_new_handler_lock); + nh = std::set_new_handler(0); + (void) std::set_new_handler(nh); + } +#if (defined(__GNUC__) && !defined(__EXCEPTIONS)) || (defined(_HAS_EXCEPTIONS) && !_HAS_EXCEPTIONS) + if (nh) { + // Since exceptions are disabled, we don't really know if new_handler + // failed. Assume it will abort if it fails. + (*nh)(); + continue; + } + return 0; +#else + // If no new_handler is established, the allocation failed. + if (!nh) { + if (nothrow) return 0; + throw std::bad_alloc(); + } + // Otherwise, try the new_handler. If it returns, retry the + // allocation. If it throws std::bad_alloc, fail the allocation. + // if it throws something else, don't interfere. + try { + (*nh)(); + } catch (const std::bad_alloc&) { + if (!nothrow) throw; + return p; + } +#endif // (defined(__GNUC__) && !defined(__EXCEPTIONS)) || (defined(_HAS_EXCEPTIONS) && !_HAS_EXCEPTIONS) + } else { // allocation success + return p; + } +#endif // PREANSINEW + } +} + +void* operator new(size_t size) { + void* ptr = cpp_debug_alloc(size, MallocBlock::kNewType, false); + MallocHook::InvokeNewHook(ptr, size); + if (ptr == NULL) { + RAW_LOG(FATAL, "Unable to allocate %"PRIuS" bytes: new failed.", size); + } + return ptr; +} + +void* operator new(size_t size, const std::nothrow_t&) __THROW { + void* ptr = cpp_debug_alloc(size, MallocBlock::kNewType, true); + MallocHook::InvokeNewHook(ptr, size); + return ptr; +} + +void operator delete(void* ptr) __THROW { + MallocHook::InvokeDeleteHook(ptr); + DebugDeallocate(ptr, MallocBlock::kNewType); +} + +// Compilers use this, though I can't see how it differs from normal delete. +void operator delete(void* ptr, const std::nothrow_t&) __THROW { + MallocHook::InvokeDeleteHook(ptr); + DebugDeallocate(ptr, MallocBlock::kNewType); +} + +// ========================================================================= // + +// Alloc/free stuff for debug operator new[] & friends + +void* operator new[](size_t size) { + void* ptr = cpp_debug_alloc(size, MallocBlock::kArrayNewType, false); + MallocHook::InvokeNewHook(ptr, size); + if (ptr == NULL) { + RAW_LOG(FATAL, "Unable to allocate %"PRIuS" bytes: new[] failed.", size); + } + return ptr; +} + +void* operator new[](size_t size, const std::nothrow_t&) __THROW { + void* ptr = cpp_debug_alloc(size, MallocBlock::kArrayNewType, true); + MallocHook::InvokeNewHook(ptr, size); + return ptr; +} + +void operator delete[](void* ptr) __THROW { + MallocHook::InvokeDeleteHook(ptr); + DebugDeallocate(ptr, MallocBlock::kArrayNewType); +} + +// Compilers use this, though I can't see how it differs from normal delete. +void operator delete[](void* ptr, const std::nothrow_t&) __THROW { + MallocHook::InvokeDeleteHook(ptr); + DebugDeallocate(ptr, MallocBlock::kArrayNewType); +} + +// ========================================================================= // + +// The following functions may be called via MallocExtension::instance() +// for memory verification and statistics. +#ifdef TCMALLOC_FOR_DEBUGALLOCATION +// Inherit from tcmalloc's version +typedef TCMallocImplementation ParentImplementation; +#else +// Inherit from default version +typedef MallocExtension ParentImplementation; +#endif + +class DebugMallocImplementation : public ParentImplementation { + public: + virtual bool GetNumericProperty(const char* name, size_t* value) { + bool result = ParentImplementation::GetNumericProperty(name, value); + if (result && (strcmp(name, "generic.current_allocated_bytes") == 0)) { + // Subtract bytes kept in the free queue + size_t qsize = MallocBlock::FreeQueueSize(); + if (*value >= qsize) { + *value -= qsize; + } + } + return result; + } + + virtual bool VerifyNewMemory(void* p) { + if (p) MallocBlock::FromRawPointer(p)->Check(MallocBlock::kNewType); + return true; + } + + virtual bool VerifyArrayNewMemory(void* p) { + if (p) MallocBlock::FromRawPointer(p)->Check(MallocBlock::kArrayNewType); + return true; + } + + virtual bool VerifyMallocMemory(void* p) { + if (p) MallocBlock::FromRawPointer(p)->Check(MallocBlock::kMallocType); + return true; + } + + virtual bool VerifyAllMemory() { + return MallocBlock::CheckEverything(); + } + + virtual bool MallocMemoryStats(int* blocks, size_t* total, + int histogram[kMallocHistogramSize]) { + return MallocBlock::MemoryStats(blocks, total, histogram); + } + + virtual size_t GetAllocatedSize(void* p) { + if (p) { + return MallocBlock::FromRawPointer(p)->data_size(); + } + return 0; + } + virtual size_t GetEstimatedAllocatedSize(size_t size) { + return size; + } + }; + +static DebugMallocImplementation debug_malloc_implementation; + +REGISTER_MODULE_INITIALIZER(debugallocation, { + MallocExtension::Register(&debug_malloc_implementation); + + // When the program exits, check all blocks still in the free queue for + // corruption. + atexit(DanglingWriteChecker); +}); + +#ifdef TCMALLOC_FOR_DEBUGALLOCATION + +// Redefine malloc_stats to use tcmalloc's implementation: +extern "C" void malloc_stats(void) { + do_malloc_stats(); +} + +// Some library routines on RedHat 9 allocate memory using malloc() +// and free it using __libc_free() (or vice-versa). Since we provide +// our own implementations of malloc/free using tcmalloc.cc, +// we need to make sure that the __libc_XXX variants +// also point to the same implementations. +// +// Note: this might not override __libc_XXX calls withing libc itself, +// but it can be important for other libraries that mention these functions +// or when this code is LD_PRELOAD-ed. +// TODO: In case these __libc_* definitions do not actually matter, +// they should go away from here and from tcmalloc/tcmalloc.cc. +// +extern "C" { + void* __libc_malloc(size_t size) { return malloc(size); } + void __libc_free(void* ptr) { free(ptr); } + void* __libc_realloc(void* ptr, size_t size) { return realloc(ptr, size); } + void* __libc_calloc(size_t n, size_t size) { return calloc(n, size); } + void __libc_cfree(void* ptr) { cfree(ptr); } + void* __libc_memalign(size_t align, size_t s) { return memalign(align, s); } + void* __libc_valloc(size_t size) { return valloc(size); } + void* __libc_pvalloc(size_t size) { return pvalloc(size); } + int __posix_memalign(void** r, size_t a, size_t s) { + return posix_memalign(r, a, s); + } +} + +#endif // #ifdef TCMALLOC_FOR_DEBUGALLOCATION diff --git a/third_party/tcmalloc/chromium/src/getpc.h b/third_party/tcmalloc/chromium/src/getpc.h new file mode 100644 index 0000000..9fb2e16 --- /dev/null +++ b/third_party/tcmalloc/chromium/src/getpc.h @@ -0,0 +1,186 @@ +// Copyright (c) 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Craig Silverstein +// +// This is an internal header file used by profiler.cc. It defines +// the single (inline) function GetPC. GetPC is used in a signal +// handler to figure out the instruction that was being executed when +// the signal-handler was triggered. +// +// To get this, we use the ucontext_t argument to the signal-handler +// callback, which holds the full context of what was going on when +// the signal triggered. How to get from a ucontext_t to a Program +// Counter is OS-dependent. + +#ifndef BASE_GETPC_H_ +#define BASE_GETPC_H_ + +#include "config.h" + +// On many linux systems, we may need _GNU_SOURCE to get access to +// the defined constants that define the register we want to see (eg +// REG_EIP). Note this #define must come first! +#define _GNU_SOURCE 1 +// If #define _GNU_SOURCE causes problems, this might work instead. +// It will cause problems for FreeBSD though!, because it turns off +// the needed __BSD_VISIBLE. +//#define _XOPEN_SOURCE 500 + +#include <string.h> // for memcmp +#if defined(HAVE_SYS_UCONTEXT_H) +#include <sys/ucontext.h> +#elif defined(HAVE_UCONTEXT_H) +#include <ucontext.h> // for ucontext_t (and also mcontext_t) +#elif defined(HAVE_CYGWIN_SIGNAL_H) +#include <cygwin/signal.h> +typedef ucontext ucontext_t; +#endif + + +// Take the example where function Foo() calls function Bar(). For +// many architectures, Bar() is responsible for setting up and tearing +// down its own stack frame. In that case, it's possible for the +// interrupt to happen when execution is in Bar(), but the stack frame +// is not properly set up (either before it's done being set up, or +// after it's been torn down but before Bar() returns). In those +// cases, the stack trace cannot see the caller function anymore. +// +// GetPC can try to identify this situation, on architectures where it +// might occur, and unwind the current function call in that case to +// avoid false edges in the profile graph (that is, edges that appear +// to show a call skipping over a function). To do this, we hard-code +// in the asm instructions we might see when setting up or tearing +// down a stack frame. +// +// This is difficult to get right: the instructions depend on the +// processor, the compiler ABI, and even the optimization level. This +// is a best effort patch -- if we fail to detect such a situation, or +// mess up the PC, nothing happens; the returned PC is not used for +// any further processing. +struct CallUnrollInfo { + // Offset from (e)ip register where this instruction sequence + // should be matched. Interpreted as bytes. Offset 0 is the next + // instruction to execute. Be extra careful with negative offsets in + // architectures of variable instruction length (like x86) - it is + // not that easy as taking an offset to step one instruction back! + int pc_offset; + // The actual instruction bytes. Feel free to make it larger if you + // need a longer sequence. + char ins[16]; + // How many bytes to match from ins array? + int ins_size; + // The offset from the stack pointer (e)sp where to look for the + // call return address. Interpreted as bytes. + int return_sp_offset; +}; + + +// The dereferences needed to get the PC from a struct ucontext were +// determined at configure time, and stored in the macro +// PC_FROM_UCONTEXT in config.h. The only thing we need to do here, +// then, is to do the magic call-unrolling for systems that support it. + +// -- Special case 1: linux x86, for which we have CallUnrollInfo +#if defined(__linux) && defined(__i386) && defined(__GNUC__) +static const CallUnrollInfo callunrollinfo[] = { + // Entry to a function: push %ebp; mov %esp,%ebp + // Top-of-stack contains the caller IP. + { 0, + {0x55, 0x89, 0xe5}, 3, + 0 + }, + // Entry to a function, second instruction: push %ebp; mov %esp,%ebp + // Top-of-stack contains the old frame, caller IP is +4. + { -1, + {0x55, 0x89, 0xe5}, 3, + 4 + }, + // Return from a function: RET. + // Top-of-stack contains the caller IP. + { 0, + {0xc3}, 1, + 0 + } +}; + +inline void* GetPC(const ucontext_t& signal_ucontext) { + // See comment above struct CallUnrollInfo. Only try instruction + // flow matching if both eip and esp looks reasonable. + const int eip = signal_ucontext.uc_mcontext.gregs[REG_EIP]; + const int esp = signal_ucontext.uc_mcontext.gregs[REG_ESP]; + if ((eip & 0xffff0000) != 0 && (~eip & 0xffff0000) != 0 && + (esp & 0xffff0000) != 0) { + char* eip_char = reinterpret_cast<char*>(eip); + for (int i = 0; i < sizeof(callunrollinfo)/sizeof(*callunrollinfo); ++i) { + if (!memcmp(eip_char + callunrollinfo[i].pc_offset, + callunrollinfo[i].ins, callunrollinfo[i].ins_size)) { + // We have a match. + void **retaddr = (void**)(esp + callunrollinfo[i].return_sp_offset); + return *retaddr; + } + } + } + return (void*)eip; +} + +// Special case #2: Windows, which has to do something totally different. +#elif defined(_WIN32) || defined(__CYGWIN__) || defined(__CYGWIN32__) || defined(__MINGW32__) +// If this is ever implemented, probably the way to do it is to have +// profiler.cc use a high-precision timer via timeSetEvent: +// http://msdn2.microsoft.com/en-us/library/ms712713.aspx +// We'd use it in mode TIME_CALLBACK_FUNCTION/TIME_PERIODIC. +// The callback function would be something like prof_handler, but +// alas the arguments are different: no ucontext_t! I don't know +// how we'd get the PC (using StackWalk64?) +// http://msdn2.microsoft.com/en-us/library/ms680650.aspx + +#include "base/logging.h" // for RAW_LOG +#ifndef HAVE_CYGWIN_SIGNAL_H +typedef int ucontext_t; +#endif + +inline void* GetPC(const struct ucontext_t& signal_ucontext) { + RAW_LOG(ERROR, "GetPC is not yet implemented on Windows\n"); + return NULL; +} + +// Normal cases. If this doesn't compile, it's probably because +// PC_FROM_UCONTEXT is the empty string. You need to figure out +// the right value for your system, and add it to the list in +// configure.ac (or set it manually in your config.h). +#else +inline void* GetPC(const ucontext_t& signal_ucontext) { + return (void*)signal_ucontext.PC_FROM_UCONTEXT; // defined in config.h +} + +#endif + +#endif // BASE_GETPC_H_ diff --git a/third_party/tcmalloc/chromium/src/google/heap-checker.h b/third_party/tcmalloc/chromium/src/google/heap-checker.h new file mode 100644 index 0000000..c0ee8a8 --- /dev/null +++ b/third_party/tcmalloc/chromium/src/google/heap-checker.h @@ -0,0 +1,418 @@ +// Copyright (c) 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Maxim Lifantsev (with design ideas by Sanjay Ghemawat) +// +// +// Module for detecing heap (memory) leaks. +// +// For full(er) information, see doc/heap_checker.html +// +// This module can be linked into programs with +// no slowdown caused by this unless you activate the leak-checker: +// +// 1. Set the environment variable HEAPCHEK to _type_ before +// running the program. +// +// _type_ is usually "normal" but can also be "minimal", "strict", or +// "draconian". (See the html file for other options, like 'local'.) +// +// After that, just run your binary. If the heap-checker detects +// a memory leak at program-exit, it will print instructions on how +// to track down the leak. + +#ifndef BASE_HEAP_CHECKER_H_ +#define BASE_HEAP_CHECKER_H_ + +#include <sys/types.h> // for size_t +// I can't #include config.h in this public API file, but I should +// really use configure (and make malloc_extension.h a .in file) to +// figure out if the system has stdint.h or not. But I'm lazy, so +// for now I'm assuming it's a problem only with MSVC. +#ifndef _MSC_VER +#include <stdint.h> // for uintptr_t +#endif +#include <stdarg.h> // for va_list +#include <vector> + +// Annoying stuff for windows -- makes sure clients can import these functions +#ifndef PERFTOOLS_DLL_DECL +# ifdef _WIN32 +# define PERFTOOLS_DLL_DECL __declspec(dllimport) +# else +# define PERFTOOLS_DLL_DECL +# endif +#endif + + +// The class is thread-safe with respect to all the provided static methods, +// as well as HeapLeakChecker objects: they can be accessed by multiple threads. +class PERFTOOLS_DLL_DECL HeapLeakChecker { + public: + + // ----------------------------------------------------------------------- // + // Static functions for working with (whole-program) leak checking. + + // If heap leak checking is currently active in some mode + // e.g. if leak checking was started (and is still active now) + // due to HEAPCHECK=... defined in the environment. + // The return value reflects iff HeapLeakChecker objects manually + // constructed right now will be doing leak checking or nothing. + // Note that we can go from active to inactive state during InitGoogle() + // if FLAGS_heap_check gets set to "" by some code before/during InitGoogle(). + static bool IsActive(); + + // Return pointer to the whole-program checker if it has been created + // and NULL otherwise. + // Once GlobalChecker() returns non-NULL that object will not disappear and + // will be returned by all later GlobalChecker calls. + // This is mainly to access BytesLeaked() and ObjectsLeaked() (see below) + // for the whole-program checker after one calls NoGlobalLeaks() + // or similar and gets false. + static HeapLeakChecker* GlobalChecker(); + + // Do whole-program leak check now (if it was activated for this binary); + // return false only if it was activated and has failed. + // The mode of the check is controlled by the command-line flags. + // This method can be called repeatedly. + // Things like GlobalChecker()->SameHeap() can also be called explicitly + // to do the desired flavor of the check. + static bool NoGlobalLeaks(); + + // If whole-program checker if active, + // cancel its automatic execution after main() exits. + // This requires that some leak check (e.g. NoGlobalLeaks()) + // has been called at least once on the whole-program checker. + static void CancelGlobalCheck(); + + // ----------------------------------------------------------------------- // + // Non-static functions for starting and doing leak checking. + + // Start checking and name the leak check performed. + // The name is used in naming dumped profiles + // and needs to be unique only within your binary. + // It must also be a string that can be a part of a file name, + // in particular not contain path expressions. + explicit HeapLeakChecker(const char *name); + + // Destructor (verifies that some *NoLeaks or *SameHeap method + // has been called at least once). + ~HeapLeakChecker(); + + // These used to be different but are all the same now: they return + // true iff all memory allocated since this HeapLeakChecker object + // was constructor is still reachable from global state. + // + // Because we fork to convert addresses to symbol-names, and forking + // is not thread-safe, and we may be called in a threaded context, + // we do not try to symbolize addresses when called manually. + bool NoLeaks() { return DoNoLeaks(DO_NOT_SYMBOLIZE); } + + // These forms are obsolete; use NoLeaks() instead. + // TODO(csilvers): mark with ATTRIBUTE_DEPRECATED. + bool QuickNoLeaks() { return NoLeaks(); } + bool BriefNoLeaks() { return NoLeaks(); } + bool SameHeap() { return NoLeaks(); } + bool QuickSameHeap() { return NoLeaks(); } + bool BriefSameHeap() { return NoLeaks(); } + + // Detailed information about the number of leaked bytes and objects + // (both of these can be negative as well). + // These are available only after a *SameHeap or *NoLeaks + // method has been called. + // Note that it's possible for both of these to be zero + // while SameHeap() or NoLeaks() returned false in case + // of a heap state change that is significant + // but preserves the byte and object counts. + ssize_t BytesLeaked() const; + ssize_t ObjectsLeaked() const; + + // ----------------------------------------------------------------------- // + // Static helpers to make us ignore certain leaks. + + // Scoped helper class. Should be allocated on the stack inside a + // block of code. Any heap allocations done in the code block + // covered by the scoped object (including in nested function calls + // done by the code block) will not be reported as leaks. This is + // the recommended replacement for the GetDisableChecksStart() and + // DisableChecksToHereFrom() routines below. + // + // Example: + // void Foo() { + // HeapLeakChecker::Disabler disabler; + // ... code that allocates objects whose leaks should be ignored ... + // } + // + // REQUIRES: Destructor runs in same thread as constructor + class Disabler { + public: + Disabler(); + ~Disabler(); + private: + Disabler(const Disabler&); // disallow copy + void operator=(const Disabler&); // and assign + }; + + // Ignore an object located at 'ptr' (can go at the start or into the object) + // as well as all heap objects (transitively) referenced from it + // for the purposes of heap leak checking. + // If 'ptr' does not point to an active allocated object + // at the time of this call, it is ignored; + // but if it does, the object must not get deleted from the heap later on; + // it must also be not already ignored at the time of this call. + // + // See also HiddenPointer, below, if you need to prevent a pointer from + // being traversed by the heap checker but do not wish to transitively + // whitelist objects referenced through it. + static void IgnoreObject(const void* ptr); + + // Undo what an earlier IgnoreObject() call promised and asked to do. + // At the time of this call 'ptr' must point at or inside of an active + // allocated object which was previously registered with IgnoreObject(). + static void UnIgnoreObject(const void* ptr); + + // ----------------------------------------------------------------------- // + // Initialization; to be called from main() only. + + // Full starting of recommended whole-program checking. + static void InternalInitStart(); + + // ----------------------------------------------------------------------- // + // Internal types defined in .cc + + class Allocator; + struct RangeValue; + + private: + + // ----------------------------------------------------------------------- // + // Various helpers + + // Create the name of the heap profile file. + // Should be deleted via Allocator::Free(). + char* MakeProfileNameLocked(); + + // Helper for constructors + void Create(const char *name, bool make_start_snapshot); + + enum ShouldSymbolize { SYMBOLIZE, DO_NOT_SYMBOLIZE }; + + // Helper for *NoLeaks and *SameHeap + bool DoNoLeaks(ShouldSymbolize should_symbolize); + + // These used to be public, but they are now deprecated. + // Will remove entirely when all internal uses are fixed. + // In the meantime, use friendship so the unittest can still test them. + static void* GetDisableChecksStart(); + static void DisableChecksToHereFrom(const void* start_address); + static void DisableChecksIn(const char* pattern); + friend void RangeDisabledLeaks(); + friend void NamedTwoDisabledLeaks(); + friend void* RunNamedDisabledLeaks(void*); + friend void TestHeapLeakCheckerNamedDisabling(); + friend int main(int, char**); + + + // Helper for DisableChecksIn + static void DisableChecksInLocked(const char* pattern); + + // Disable checks based on stack trace entry at a depth <= + // max_depth. Used to hide allocations done inside some special + // libraries. + static void DisableChecksFromToLocked(const void* start_address, + const void* end_address, + int max_depth); + + // Helper for DoNoLeaks to ignore all objects reachable from all live data + static void IgnoreAllLiveObjectsLocked(const void* self_stack_top); + + // Callback we pass to ListAllProcessThreads (see thread_lister.h) + // that is invoked when all threads of our process are found and stopped. + // The call back does the things needed to ignore live data reachable from + // thread stacks and registers for all our threads + // as well as do other global-live-data ignoring + // (via IgnoreNonThreadLiveObjectsLocked) + // during the quiet state of all threads being stopped. + // For the argument meaning see the comment by ListAllProcessThreads. + // Here we only use num_threads and thread_pids, that ListAllProcessThreads + // fills for us with the number and pids of all the threads of our process + // it found and attached to. + static int IgnoreLiveThreadsLocked(void* parameter, + int num_threads, + pid_t* thread_pids, + va_list ap); + + // Helper for IgnoreAllLiveObjectsLocked and IgnoreLiveThreadsLocked + // that we prefer to execute from IgnoreLiveThreadsLocked + // while all threads are stopped. + // This helper does live object discovery and ignoring + // for all objects that are reachable from everything + // not related to thread stacks and registers. + static void IgnoreNonThreadLiveObjectsLocked(); + + // Helper for IgnoreNonThreadLiveObjectsLocked and IgnoreLiveThreadsLocked + // to discover and ignore all heap objects + // reachable from currently considered live objects + // (live_objects static global variable in out .cc file). + // "name", "name2" are two strings that we print one after another + // in a debug message to describe what kind of live object sources + // are being used. + static void IgnoreLiveObjectsLocked(const char* name, const char* name2); + + // Runs REGISTER_HEAPCHECK_CLEANUP cleanups and potentially + // calls DoMainHeapCheck + static void RunHeapCleanups(); + + // Do the overall whole-program heap leak check if needed; + // returns true when did the leak check. + static bool DoMainHeapCheck(); + + // Type of task for UseProcMapsLocked + enum ProcMapsTask { + RECORD_GLOBAL_DATA, + DISABLE_LIBRARY_ALLOCS + }; + + // Success/Error Return codes for UseProcMapsLocked. + enum ProcMapsResult { + PROC_MAPS_USED, + CANT_OPEN_PROC_MAPS, + NO_SHARED_LIBS_IN_PROC_MAPS + }; + + // Read /proc/self/maps, parse it, and do the 'proc_maps_task' for each line. + static ProcMapsResult UseProcMapsLocked(ProcMapsTask proc_maps_task); + + // A ProcMapsTask to disable allocations from 'library' + // that is mapped to [start_address..end_address) + // (only if library is a certain system library). + static void DisableLibraryAllocsLocked(const char* library, + uintptr_t start_address, + uintptr_t end_address); + + // Return true iff "*ptr" points to a heap object + // ("*ptr" can point at the start or inside of a heap object + // so that this works e.g. for pointers to C++ arrays, C++ strings, + // multiple-inherited objects, or pointers to members). + // We also fill *object_size for this object then + // and we move "*ptr" to point to the very start of the heap object. + static inline bool HaveOnHeapLocked(const void** ptr, size_t* object_size); + + // Helper to shutdown heap leak checker when it's not needed + // or can't function properly. + static void TurnItselfOffLocked(); + + // Internally-used c-tor to start whole-executable checking. + HeapLeakChecker(); + + // ----------------------------------------------------------------------- // + // Friends and externally accessed helpers. + + // Helper for VerifyHeapProfileTableStackGet in the unittest + // to get the recorded allocation caller for ptr, + // which must be a heap object. + static const void* GetAllocCaller(void* ptr); + friend void VerifyHeapProfileTableStackGet(); + + // This gets to execute before constructors for all global objects + static void BeforeConstructorsLocked(); + friend void HeapLeakChecker_BeforeConstructors(); + + // This gets to execute after destructors for all global objects + friend void HeapLeakChecker_AfterDestructors(); + + // ----------------------------------------------------------------------- // + // Member data. + + class SpinLock* lock_; // to make HeapLeakChecker objects thread-safe + const char* name_; // our remembered name (we own it) + // NULL means this leak checker is a noop + + // Snapshot taken when the checker was created. May be NULL + // for the global heap checker object. We use void* instead of + // HeapProfileTable::Snapshot* to avoid including heap-profile-table.h. + void* start_snapshot_; + + bool has_checked_; // if we have done the leak check, so these are ready: + ssize_t inuse_bytes_increase_; // bytes-in-use increase for this checker + ssize_t inuse_allocs_increase_; // allocations-in-use increase + // for this checker + bool keep_profiles_; // iff we should keep the heap profiles we've made + + // ----------------------------------------------------------------------- // + + // Disallow "evil" constructors. + HeapLeakChecker(const HeapLeakChecker&); + void operator=(const HeapLeakChecker&); +}; + + +// Holds a pointer that will not be traversed by the heap checker. +// Contrast with HeapLeakChecker::IgnoreObject(o), in which o and +// all objects reachable from o are ignored by the heap checker. +template <class T> +class HiddenPointer { + public: + explicit HiddenPointer(T* t) + : masked_t_(reinterpret_cast<uintptr_t>(t) ^ kHideMask) { + } + // Returns unhidden pointer. Be careful where you save the result. + T* get() const { return reinterpret_cast<T*>(masked_t_ ^ kHideMask); } + + private: + // Arbitrary value, but not such that xor'ing with it is likely + // to map one valid pointer to another valid pointer: + static const uintptr_t kHideMask = + static_cast<uintptr_t>(0xF03A5F7BF03A5F7Bll); + uintptr_t masked_t_; +}; + +// A class that exists solely to run its destructor. This class should not be +// used directly, but instead by the REGISTER_HEAPCHECK_CLEANUP macro below. +class PERFTOOLS_DLL_DECL HeapCleaner { + public: + typedef void (*void_function)(void); + HeapCleaner(void_function f); + static void RunHeapCleanups(); + private: + static std::vector<void_function>* heap_cleanups_; +}; + +// A macro to declare module heap check cleanup tasks +// (they run only if we are doing heap leak checking.) +// 'body' should be the cleanup code to run. 'name' doesn't matter, +// but must be unique amongst all REGISTER_HEAPCHECK_CLEANUP calls. +#define REGISTER_HEAPCHECK_CLEANUP(name, body) \ + namespace { \ + void heapcheck_cleanup_##name() { body; } \ + static HeapCleaner heapcheck_cleaner_##name(&heapcheck_cleanup_##name); \ + } + +#endif // BASE_HEAP_CHECKER_H_ diff --git a/third_party/tcmalloc/chromium/src/google/heap-profiler.h b/third_party/tcmalloc/chromium/src/google/heap-profiler.h new file mode 100644 index 0000000..5efaf64 --- /dev/null +++ b/third_party/tcmalloc/chromium/src/google/heap-profiler.h @@ -0,0 +1,103 @@ +/* Copyright (c) 2005, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * Author: Sanjay Ghemawat + * + * Module for heap-profiling. + * + * For full(er) information, see doc/heapprofile.html + * + * This module can be linked into your program with + * no slowdown caused by this unless you activate the profiler + * using one of the following methods: + * + * 1. Before starting the program, set the environment variable + * "HEAPPROFILE" to be the name of the file to which the profile + * data should be written. + * + * 2. Programmatically, start and stop the profiler using the + * routines "HeapProfilerStart(filename)" and "HeapProfilerStop()". + * + */ + +#ifndef BASE_HEAP_PROFILER_H_ +#define BASE_HEAP_PROFILER_H_ + +#include <stddef.h> + +/* Annoying stuff for windows; makes sure clients can import these functions */ +#ifndef PERFTOOLS_DLL_DECL +# ifdef _WIN32 +# define PERFTOOLS_DLL_DECL __declspec(dllimport) +# else +# define PERFTOOLS_DLL_DECL +# endif +#endif + +/* All this code should be usable from within C apps. */ +#ifdef __cplusplus +extern "C" { +#endif + +/* Start profiling and arrange to write profile data to file names + * of the form: "prefix.0000", "prefix.0001", ... + */ +PERFTOOLS_DLL_DECL void HeapProfilerStart(const char* prefix); + +/* Returns true if we are currently profiling the heap. This is true + * between calls to HeapProfilerStart() and HeapProfilerStop(), and + * also if the program has been run with HEAPPROFILER, or some other + * way to turn on whole-program profiling. + */ +bool IsHeapProfilerRunning(); + +/* Stop heap profiling. Can be restarted again with HeapProfilerStart(), + * but the currently accumulated profiling information will be cleared. + */ +PERFTOOLS_DLL_DECL void HeapProfilerStop(); + +/* Dump a profile now - can be used for dumping at a hopefully + * quiescent state in your program, in order to more easily track down + * memory leaks. Will include the reason in the logged message + */ +PERFTOOLS_DLL_DECL void HeapProfilerDump(const char *reason); + +/* Generate current heap profiling information. + * Returns an empty string when heap profiling is not active. + * The returned pointer is a '\0'-terminated string allocated using malloc() + * and should be free()-ed as soon as the caller does not need it anymore. + */ +PERFTOOLS_DLL_DECL char* GetHeapProfile(); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif /* BASE_HEAP_PROFILER_H_ */ diff --git a/third_party/tcmalloc/chromium/src/google/malloc_extension.h b/third_party/tcmalloc/chromium/src/google/malloc_extension.h new file mode 100644 index 0000000..0342843 --- /dev/null +++ b/third_party/tcmalloc/chromium/src/google/malloc_extension.h @@ -0,0 +1,296 @@ +// Copyright (c) 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Sanjay Ghemawat <opensource@google.com> +// +// Extra extensions exported by some malloc implementations. These +// extensions are accessed through a virtual base class so an +// application can link against a malloc that does not implement these +// extensions, and it will get default versions that do nothing. +// +// NOTE FOR C USERS: If you wish to use this functionality from within +// a C program, see malloc_extension_c.h. + +#ifndef BASE_MALLOC_EXTENSION_H_ +#define BASE_MALLOC_EXTENSION_H_ + +#include <stddef.h> +// I can't #include config.h in this public API file, but I should +// really use configure (and make malloc_extension.h a .in file) to +// figure out if the system has stdint.h or not. But I'm lazy, so +// for now I'm assuming it's a problem only with MSVC. +#ifndef _MSC_VER +#include <stdint.h> +#endif +#include <string> + +// Annoying stuff for windows -- makes sure clients can import these functions +#ifndef PERFTOOLS_DLL_DECL +# ifdef _WIN32 +# define PERFTOOLS_DLL_DECL __declspec(dllimport) +# else +# define PERFTOOLS_DLL_DECL +# endif +#endif + +static const int kMallocHistogramSize = 64; + +// One day, we could support other types of writers (perhaps for C?) +typedef std::string MallocExtensionWriter; + +namespace base { +struct MallocRange; +} + +// The default implementations of the following routines do nothing. +// All implementations should be thread-safe; the current one +// (TCMallocImplementation) is. +class PERFTOOLS_DLL_DECL MallocExtension { + public: + virtual ~MallocExtension(); + + // Call this very early in the program execution -- say, in a global + // constructor -- to set up parameters and state needed by all + // instrumented malloc implemenatations. One example: this routine + // sets environemnt variables to tell STL to use libc's malloc() + // instead of doing its own memory management. This is safe to call + // multiple times, as long as each time is before threads start up. + static void Initialize(); + + // See "verify_memory.h" to see what these routines do + virtual bool VerifyAllMemory(); + virtual bool VerifyNewMemory(void* p); + virtual bool VerifyArrayNewMemory(void* p); + virtual bool VerifyMallocMemory(void* p); + virtual bool MallocMemoryStats(int* blocks, size_t* total, + int histogram[kMallocHistogramSize]); + + // Get a human readable description of the current state of the malloc + // data structures. The state is stored as a null-terminated string + // in a prefix of "buffer[0,buffer_length-1]". + // REQUIRES: buffer_length > 0. + virtual void GetStats(char* buffer, int buffer_length); + + // Outputs to "writer" a sample of live objects and the stack traces + // that allocated these objects. The format of the returned output + // is equivalent to the output of the heap profiler and can + // therefore be passed to "pprof". + virtual void GetHeapSample(MallocExtensionWriter* writer); + + // Outputs to "writer" the stack traces that caused growth in the + // address space size. The format of the returned output is + // equivalent to the output of the heap profiler and can therefore + // be passed to "pprof". + virtual void GetHeapGrowthStacks(MallocExtensionWriter* writer); + + // Invokes func(arg, range) for every controlled memory + // range. *range is filled in with information about the range. + // + // This is a best-effort interface useful only for performance + // analysis. The implementation may not call func at all. + typedef void (RangeFunction)(void*, const base::MallocRange*); + virtual void Ranges(void* arg, RangeFunction func); + + // ------------------------------------------------------------------- + // Control operations for getting and setting malloc implementation + // specific parameters. Some currently useful properties: + // + // generic + // ------- + // "generic.current_allocated_bytes" + // Number of bytes currently allocated by application + // This property is not writable. + // + // "generic.heap_size" + // Number of bytes in the heap == + // current_allocated_bytes + + // fragmentation + + // freed memory regions + // This property is not writable. + // + // tcmalloc + // -------- + // "tcmalloc.max_total_thread_cache_bytes" + // Upper limit on total number of bytes stored across all + // per-thread caches. Default: 16MB. + // + // "tcmalloc.current_total_thread_cache_bytes" + // Number of bytes used across all thread caches. + // This property is not writable. + // + // "tcmalloc.slack_bytes" + // Number of bytes allocated from system, but not currently in + // use by malloced objects. I.e., bytes available for + // allocation without needing more bytes from system. It is + // the sum of pageheap_free_bytes and pageheap_unmapped_bytes. + // This property is not writable. + // + // "tcmalloc.pageheap_free_bytes" + // Number of bytes in free, mapped pages in pageheap + // This property is not writable. + // + // "tcmalloc.pageheap_unmapped_bytes" + // Number of bytes in free, unmapped pages in pageheap + // This property is not writable. + // + // ------------------------------------------------------------------- + + // Get the named "property"'s value. Returns true if the property + // is known. Returns false if the property is not a valid property + // name for the current malloc implementation. + // REQUIRES: property != NULL; value != NULL + virtual bool GetNumericProperty(const char* property, size_t* value); + + // Set the named "property"'s value. Returns true if the property + // is known and writable. Returns false if the property is not a + // valid property name for the current malloc implementation, or + // is not writable. + // REQUIRES: property != NULL + virtual bool SetNumericProperty(const char* property, size_t value); + + // Mark the current thread as "idle". This routine may optionally + // be called by threads as a hint to the malloc implementation that + // any thread-specific resources should be released. Note: this may + // be an expensive routine, so it should not be called too often. + // + // Also, if the code that calls this routine will go to sleep for + // a while, it should take care to not allocate anything between + // the call to this routine and the beginning of the sleep. + // + // Most malloc implementations ignore this routine. + virtual void MarkThreadIdle(); + + // Mark the current thread as "busy". This routine should be + // called after MarkThreadIdle() if the thread will now do more + // work. If this method is not called, performance may suffer. + // + // Most malloc implementations ignore this routine. + virtual void MarkThreadBusy(); + + // Try to release num_bytes of free memory back to the operating + // system for reuse. Use this extension with caution -- to get this + // memory back may require faulting pages back in by the OS, and + // that may be slow. (Currently only implemented in tcmalloc.) + // A negative values for num_bytes results in a noop. + virtual void ReleaseToSystem(ssize_t num_bytes); + + // Same as ReleaseToSystem() but release as much memory as possible. + virtual void ReleaseFreeMemory(); + + // Sets the rate at which we release unused memory to the system. + // Zero means we never release memory back to the system. Increase + // this flag to return memory faster; decrease it to return memory + // slower. Reasonable rates are in the range [0,10]. (Currently + // only implemented in tcmalloc). + virtual void SetMemoryReleaseRate(double rate); + + // Gets the release rate. Returns a value < 0 if unknown. + virtual double GetMemoryReleaseRate(); + + // Returns the estimated number of bytes that will be allocated for + // a request of "size" bytes. This is an estimate: an allocation of + // SIZE bytes may reserve more bytes, but will never reserve less. + // (Currently only implemented in tcmalloc, other implementations + // always return SIZE.) + virtual size_t GetEstimatedAllocatedSize(size_t size); + + // Returns the actual number N of bytes reserved by tcmalloc for the + // pointer p. The client is allowed to use the range of bytes + // [p, p+N) in any way it wishes (i.e. N is the "usable size" of this + // allocation). This number may be equal to or greater than the number + // of bytes requested when p was allocated. + // p must have been allocated by this malloc implementation, + // must not be an interior pointer -- that is, must be exactly + // the pointer returned to by malloc() et al., not some offset + // from that -- and should not have been freed yet. p may be NULL. + // (Currently only implemented in tcmalloc; other implementations + // will return 0.) + virtual size_t GetAllocatedSize(void* p); + + // The current malloc implementation. Always non-NULL. + static MallocExtension* instance(); + + // Change the malloc implementation. Typically called by the + // malloc implementation during initialization. + static void Register(MallocExtension* implementation); + + protected: + // Get a list of stack traces of sampled allocation points. Returns + // a pointer to a "new[]-ed" result array, and stores the sample + // period in "sample_period". + // + // The state is stored as a sequence of adjacent entries + // in the returned array. Each entry has the following form: + // uintptr_t count; // Number of objects with following trace + // uintptr_t size; // Total size of objects with following trace + // uintptr_t depth; // Number of PC values in stack trace + // void* stack[depth]; // PC values that form the stack trace + // + // The list of entries is terminated by a "count" of 0. + // + // It is the responsibility of the caller to "delete[]" the returned array. + // + // May return NULL to indicate no results. + // + // This is an internal extension. Callers should use the more + // convenient "GetHeapSample(string*)" method defined above. + virtual void** ReadStackTraces(int* sample_period); + + // Like ReadStackTraces(), but returns stack traces that caused growth + // in the address space size. + virtual void** ReadHeapGrowthStackTraces(); +}; + +namespace base { + +// Information passed per range. More fields may be added later. +struct MallocRange { + enum Type { + INUSE, // Application is using this range + FREE, // Range is currently free + UNMAPPED, // Backing physical memory has been returned to the OS + UNKNOWN, + // More enum values may be added in the future + }; + + uintptr_t address; // Address of range + size_t length; // Byte length of range + Type type; // Type of this range + double fraction; // Fraction of range that is being used (0 if !INUSE) + + // Perhaps add the following: + // - stack trace if this range was sampled + // - heap growth stack trace if applicable to this range + // - age when allocated (for inuse) or freed (if not in use) +}; + +} // namespace base + +#endif // BASE_MALLOC_EXTENSION_H_ diff --git a/third_party/tcmalloc/chromium/src/google/malloc_extension_c.h b/third_party/tcmalloc/chromium/src/google/malloc_extension_c.h new file mode 100644 index 0000000..95f7f4c --- /dev/null +++ b/third_party/tcmalloc/chromium/src/google/malloc_extension_c.h @@ -0,0 +1,87 @@ +/* Copyright (c) 2008, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * -- + * Author: Craig Silverstein + * + * C shims for the C++ malloc_extension.h. See malloc_extension.h for + * details. Note these C shims always work on + * MallocExtension::instance(); it is not possible to have more than + * one MallocExtension object in C applications. + */ + +#ifndef _MALLOC_EXTENSION_C_H_ +#define _MALLOC_EXTENSION_C_H_ + +#include <stddef.h> +#include <sys/types.h> + +/* Annoying stuff for windows -- makes sure clients can import these fns */ +#ifndef PERFTOOLS_DLL_DECL +# ifdef _WIN32 +# define PERFTOOLS_DLL_DECL __declspec(dllimport) +# else +# define PERFTOOLS_DLL_DECL +# endif +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +#define kMallocExtensionHistogramSize 64 + +PERFTOOLS_DLL_DECL int MallocExtension_VerifyAllMemory(void); +PERFTOOLS_DLL_DECL int MallocExtension_VerifyNewMemory(void* p); +PERFTOOLS_DLL_DECL int MallocExtension_VerifyArrayNewMemory(void* p); +PERFTOOLS_DLL_DECL int MallocExtension_VerifyMallocMemory(void* p); +PERFTOOLS_DLL_DECL int MallocExtension_MallocMemoryStats(int* blocks, size_t* total, + int histogram[kMallocExtensionHistogramSize]); +PERFTOOLS_DLL_DECL void MallocExtension_GetStats(char* buffer, int buffer_length); + +/* TODO(csilvers): write a C version of these routines, that perhaps + * takes a function ptr and a void *. + */ +/* void MallocExtension_GetHeapSample(string* result); */ +/* void MallocExtension_GetHeapGrowthStacks(string* result); */ + +PERFTOOLS_DLL_DECL int MallocExtension_GetNumericProperty(const char* property, size_t* value); +PERFTOOLS_DLL_DECL int MallocExtension_SetNumericProperty(const char* property, size_t value); +PERFTOOLS_DLL_DECL void MallocExtension_MarkThreadIdle(void); +PERFTOOLS_DLL_DECL void MallocExtension_MarkThreadBusy(void); +PERFTOOLS_DLL_DECL void MallocExtension_ReleaseToSystem(ssize_t num_bytes); +PERFTOOLS_DLL_DECL void MallocExtension_ReleaseFreeMemory(void); +PERFTOOLS_DLL_DECL size_t MallocExtension_GetEstimatedAllocatedSize(size_t size); +PERFTOOLS_DLL_DECL size_t MallocExtension_GetAllocatedSize(void* p); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif /* _MALLOC_EXTENSION_C_H_ */ diff --git a/third_party/tcmalloc/chromium/src/google/malloc_hook.h b/third_party/tcmalloc/chromium/src/google/malloc_hook.h new file mode 100644 index 0000000..f2713e9 --- /dev/null +++ b/third_party/tcmalloc/chromium/src/google/malloc_hook.h @@ -0,0 +1,196 @@ +// Copyright (c) 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Sanjay Ghemawat +// +// Some of our malloc implementations can invoke the following hooks +// whenever memory is allocated or deallocated. If the hooks are +// NULL, they are not invoked. MallocHook is thread-safe, and things +// you do before calling SetFooHook(MyHook) are visible to any +// resulting calls to MyHook. Hooks must be thread-safe, and if you +// write: +// +// MallocHook::NewHook old_new_hook_ = NULL; +// ... +// old_new_hook_ = MallocHook::SetNewHook(&MyNewHook); +// +// old_new_hook_ could still be NULL the first couple times MyNewHook +// is called. +// +// One important user of these hooks is the heap profiler. +// +// CAVEAT: If you add new MallocHook::Invoke* calls (not for chaining hooks), +// then those calls must be directly in the code of the (de)allocation +// function that is provided to the user and that function must have +// an ATTRIBUTE_SECTION(malloc_hook) attribute. +// +// Note: Get*Hook() and Invoke*Hook() functions are defined in +// malloc_hook-inl.h. If you need to get or invoke a hook (which you +// shouldn't unless you're part of tcmalloc), be sure to #include +// malloc_hook-inl.h in addition to malloc_hook.h. +// +// NOTE FOR C USERS: If you want to use malloc_hook functionality from +// a C program, #include malloc_hook_c.h instead of this file. +// +// TODO(csilvers): support a non-inlined function called +// Assert*HookIs()? This is the context in which I normally see +// Get*Hook() called in non-tcmalloc code. + +#ifndef _MALLOC_HOOK_H_ +#define _MALLOC_HOOK_H_ + +#include <stddef.h> +#include <sys/types.h> +extern "C" { +#include <google/malloc_hook_c.h> // a C version of the malloc_hook interface +} + +// Annoying stuff for windows -- makes sure clients can import these functions +#ifndef PERFTOOLS_DLL_DECL +# ifdef _WIN32 +# define PERFTOOLS_DLL_DECL __declspec(dllimport) +# else +# define PERFTOOLS_DLL_DECL +# endif +#endif + +// Note: malloc_hook_c.h defines MallocHook_*Hook and +// MallocHook_Set*Hook. The version of these inside the MallocHook +// class are defined in terms of the malloc_hook_c version. See +// malloc_hook_c.h for details of these types/functions. + +class PERFTOOLS_DLL_DECL MallocHook { + public: + // The NewHook is invoked whenever an object is allocated. + // It may be passed NULL if the allocator returned NULL. + typedef MallocHook_NewHook NewHook; + inline static NewHook GetNewHook(); + inline static NewHook SetNewHook(NewHook hook) { + return MallocHook_SetNewHook(hook); + } + inline static void InvokeNewHook(const void* p, size_t s); + + // The DeleteHook is invoked whenever an object is deallocated. + // It may be passed NULL if the caller is trying to delete NULL. + typedef MallocHook_DeleteHook DeleteHook; + inline static DeleteHook GetDeleteHook(); + inline static DeleteHook SetDeleteHook(DeleteHook hook) { + return MallocHook_SetDeleteHook(hook); + } + inline static void InvokeDeleteHook(const void* p); + + // The PreMmapHook is invoked with mmap or mmap64 arguments just + // before the call is actually made. Such a hook may be useful + // in memory limited contexts, to catch allocations that will exceed + // a memory limit, and take outside actions to increase that limit. + typedef MallocHook_PreMmapHook PreMmapHook; + inline static PreMmapHook GetPreMmapHook(); + inline static PreMmapHook SetPreMmapHook(PreMmapHook hook) { + return MallocHook_SetPreMmapHook(hook); + } + inline static void InvokePreMmapHook(const void* start, + size_t size, + int protection, + int flags, + int fd, + off_t offset); + + // The MmapHook is invoked whenever a region of memory is mapped. + // It may be passed MAP_FAILED if the mmap failed. + typedef MallocHook_MmapHook MmapHook; + inline static MmapHook GetMmapHook(); + inline static MmapHook SetMmapHook(MmapHook hook) { + return MallocHook_SetMmapHook(hook); + } + inline static void InvokeMmapHook(const void* result, + const void* start, + size_t size, + int protection, + int flags, + int fd, + off_t offset); + + // The MunmapHook is invoked whenever a region of memory is unmapped. + typedef MallocHook_MunmapHook MunmapHook; + inline static MunmapHook GetMunmapHook(); + inline static MunmapHook SetMunmapHook(MunmapHook hook) { + return MallocHook_SetMunmapHook(hook); + } + inline static void InvokeMunmapHook(const void* p, size_t size); + + // The MremapHook is invoked whenever a region of memory is remapped. + typedef MallocHook_MremapHook MremapHook; + inline static MremapHook GetMremapHook(); + inline static MremapHook SetMremapHook(MremapHook hook) { + return MallocHook_SetMremapHook(hook); + } + inline static void InvokeMremapHook(const void* result, + const void* old_addr, + size_t old_size, + size_t new_size, + int flags, + const void* new_addr); + + // The PreSbrkHook is invoked just before sbrk is called -- except when + // the increment is 0. This is because sbrk(0) is often called + // to get the top of the memory stack, and is not actually a + // memory-allocation call. It may be useful in memory-limited contexts, + // to catch allocations that will exceed the limit and take outside + // actions to increase such a limit. + typedef MallocHook_PreSbrkHook PreSbrkHook; + inline static PreSbrkHook GetPreSbrkHook(); + inline static PreSbrkHook SetPreSbrkHook(PreSbrkHook hook) { + return MallocHook_SetPreSbrkHook(hook); + } + inline static void InvokePreSbrkHook(ptrdiff_t increment); + + // The SbrkHook is invoked whenever sbrk is called -- except when + // the increment is 0. This is because sbrk(0) is often called + // to get the top of the memory stack, and is not actually a + // memory-allocation call. + typedef MallocHook_SbrkHook SbrkHook; + inline static SbrkHook GetSbrkHook(); + inline static SbrkHook SetSbrkHook(SbrkHook hook) { + return MallocHook_SetSbrkHook(hook); + } + inline static void InvokeSbrkHook(const void* result, ptrdiff_t increment); + + // Get the current stack trace. Try to skip all routines up to and + // and including the caller of MallocHook::Invoke*. + // Use "skip_count" (similarly to GetStackTrace from stacktrace.h) + // as a hint about how many routines to skip if better information + // is not available. + inline static int GetCallerStackTrace(void** result, int max_depth, + int skip_count) { + return MallocHook_GetCallerStackTrace(result, max_depth, skip_count); + } +}; + +#endif /* _MALLOC_HOOK_H_ */ diff --git a/third_party/tcmalloc/chromium/src/google/malloc_hook_c.h b/third_party/tcmalloc/chromium/src/google/malloc_hook_c.h new file mode 100644 index 0000000..c870dae --- /dev/null +++ b/third_party/tcmalloc/chromium/src/google/malloc_hook_c.h @@ -0,0 +1,93 @@ +/* Copyright (c) 2008, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * -- + * Author: Craig Silverstein + * + * C shims for the C++ malloc_hook.h. See malloc_hook.h for details + * on how to use these. + */ + +#ifndef _MALLOC_HOOK_C_H_ +#define _MALLOC_HOOK_C_H_ + +#include <stddef.h> +#include <sys/types.h> + +/* Get the current stack trace. Try to skip all routines up to and + * and including the caller of MallocHook::Invoke*. + * Use "skip_count" (similarly to GetStackTrace from stacktrace.h) + * as a hint about how many routines to skip if better information + * is not available. + */ +int MallocHook_GetCallerStackTrace(void** result, int max_depth, + int skip_count); + + +typedef void (*MallocHook_NewHook)(const void* ptr, size_t size); +MallocHook_NewHook MallocHook_SetNewHook(MallocHook_NewHook hook); + +typedef void (*MallocHook_DeleteHook)(const void* ptr); +MallocHook_DeleteHook MallocHook_SetDeleteHook(MallocHook_DeleteHook hook); + +typedef void (*MallocHook_PreMmapHook)(const void *start, + size_t size, + int protection, + int flags, + int fd, + off_t offset); +MallocHook_PreMmapHook MallocHook_SetPreMmapHook(MallocHook_PreMmapHook hook); + +typedef void (*MallocHook_MmapHook)(const void* result, + const void* start, + size_t size, + int protection, + int flags, + int fd, + off_t offset); +MallocHook_MmapHook MallocHook_SetMmapHook(MallocHook_MmapHook hook); + +typedef void (*MallocHook_MunmapHook)(const void* ptr, size_t size); +MallocHook_MunmapHook MallocHook_SetMunmapHook(MallocHook_MunmapHook hook); + +typedef void (*MallocHook_MremapHook)(const void* result, + const void* old_addr, + size_t old_size, + size_t new_size, + int flags, + const void* new_addr); +MallocHook_MremapHook MallocHook_SetMremapHook(MallocHook_MremapHook hook); + +typedef void (*MallocHook_PreSbrkHook)(ptrdiff_t increment); +MallocHook_PreSbrkHook MallocHook_SetPreSbrkHook(MallocHook_PreSbrkHook hook); + +typedef void (*MallocHook_SbrkHook)(const void* result, ptrdiff_t increment); +MallocHook_SbrkHook MallocHook_SetSbrkHook(MallocHook_SbrkHook hook); + +#endif /* _MALLOC_HOOK_C_H_ */ diff --git a/third_party/tcmalloc/chromium/src/google/profiler.h b/third_party/tcmalloc/chromium/src/google/profiler.h new file mode 100644 index 0000000..74b936f --- /dev/null +++ b/third_party/tcmalloc/chromium/src/google/profiler.h @@ -0,0 +1,166 @@ +/* Copyright (c) 2005, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * Author: Sanjay Ghemawat + * + * Module for CPU profiling based on periodic pc-sampling. + * + * For full(er) information, see doc/cpuprofile.html + * + * This module is linked into your program with + * no slowdown caused by this unless you activate the profiler + * using one of the following methods: + * + * 1. Before starting the program, set the environment variable + * "PROFILE" to be the name of the file to which the profile + * data should be written. + * + * 2. Programmatically, start and stop the profiler using the + * routines "ProfilerStart(filename)" and "ProfilerStop()". + * + * + * (Note: if using linux 2.4 or earlier, only the main thread may be + * profiled.) + * + * Use pprof to view the resulting profile output. + * % pprof <path_to_executable> <profile_file_name> + * % pprof --gv <path_to_executable> <profile_file_name> + * + * These functions are thread-safe. + */ + +#ifndef BASE_PROFILER_H_ +#define BASE_PROFILER_H_ + +#include <time.h> /* For time_t */ + +/* Annoying stuff for windows; makes sure clients can import these functions */ +#ifndef PERFTOOLS_DLL_DECL +# ifdef _WIN32 +# define PERFTOOLS_DLL_DECL __declspec(dllimport) +# else +# define PERFTOOLS_DLL_DECL +# endif +#endif + +/* All this code should be usable from within C apps. */ +#ifdef __cplusplus +extern "C" { +#endif + +/* Profiler options, for use with ProfilerStartWithOptions. To use: + * + * struct ProfilerOptions options; + * memset(&options, 0, sizeof options); + * + * then fill in fields as needed. + * + * This structure is intended to be usable from C code, so no constructor + * is provided to initialize it. (Use memset as described above). + */ +struct ProfilerOptions { + /* Filter function and argument. + * + * If filter_in_thread is not NULL, when a profiling tick is delivered + * the profiler will call: + * + * (*filter_in_thread)(filter_in_thread_arg) + * + * If it returns nonzero, the sample will be included in the profile. + * Note that filter_in_thread runs in a signal handler, so must be + * async-signal-safe. + * + * A typical use would be to set up filter results for each thread + * in the system before starting the profiler, then to make + * filter_in_thread be a very simple function which retrieves those + * results in an async-signal-safe way. Retrieval could be done + * using thread-specific data, or using a shared data structure that + * supports async-signal-safe lookups. + */ + int (*filter_in_thread)(void *arg); + void *filter_in_thread_arg; +}; + +/* Start profiling and write profile info into fname. + * + * This is equivalent to calling ProfilerStartWithOptions(fname, NULL). + */ +PERFTOOLS_DLL_DECL int ProfilerStart(const char* fname); + +/* Start profiling and write profile into fname. + * + * The profiler is configured using the options given by 'options'. + * Options which are not specified are given default values. + * + * 'options' may be NULL, in which case all are given default values. + * + * Returns nonzero if profiling was started sucessfully, or zero else. + */ +PERFTOOLS_DLL_DECL int ProfilerStartWithOptions( + const char *fname, const struct ProfilerOptions *options); + +/* Stop profiling. Can be started again with ProfilerStart(), but + * the currently accumulated profiling data will be cleared. + */ +PERFTOOLS_DLL_DECL void ProfilerStop(); + +/* Flush any currently buffered profiling state to the profile file. + * Has no effect if the profiler has not been started. + */ +PERFTOOLS_DLL_DECL void ProfilerFlush(); + + +/* DEPRECATED: these functions were used to enable/disable profiling + * in the current thread, but no longer do anything. + */ +PERFTOOLS_DLL_DECL void ProfilerEnable(); +PERFTOOLS_DLL_DECL void ProfilerDisable(); + +/* Returns nonzero if profile is currently enabled, zero if it's not. */ +PERFTOOLS_DLL_DECL int ProfilingIsEnabledForAllThreads(); + +/* Routine for registering new threads with the profiler. + */ +PERFTOOLS_DLL_DECL void ProfilerRegisterThread(); + +/* Stores state about profiler's current status into "*state". */ +struct ProfilerState { + int enabled; /* Is profiling currently enabled? */ + time_t start_time; /* If enabled, when was profiling started? */ + char profile_name[1024]; /* Name of profile file being written, or '\0' */ + int samples_gathered; /* Number of samples gatheered to far (or 0) */ +}; +PERFTOOLS_DLL_DECL void ProfilerGetCurrentState(struct ProfilerState* state); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif /* BASE_PROFILER_H_ */ diff --git a/third_party/tcmalloc/chromium/src/google/stacktrace.h b/third_party/tcmalloc/chromium/src/google/stacktrace.h new file mode 100644 index 0000000..8188ce3 --- /dev/null +++ b/third_party/tcmalloc/chromium/src/google/stacktrace.h @@ -0,0 +1,116 @@ +// Copyright (c) 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Sanjay Ghemawat +// +// Routines to extract the current stack trace. These functions are +// thread-safe. + +#ifndef GOOGLE_STACKTRACE_H_ +#define GOOGLE_STACKTRACE_H_ + +// Annoying stuff for windows -- makes sure clients can import these functions +#ifndef PERFTOOLS_DLL_DECL +# ifdef _WIN32 +# define PERFTOOLS_DLL_DECL __declspec(dllimport) +# else +# define PERFTOOLS_DLL_DECL +# endif +#endif + + +// Skips the most recent "skip_count" stack frames (also skips the +// frame generated for the "GetStackFrames" routine itself), and then +// records the pc values for up to the next "max_depth" frames in +// "pcs", and the corresponding stack frame sizes in "sizes". Returns +// the number of values recorded in "pcs"/"sizes". +// +// Example: +// main() { foo(); } +// foo() { bar(); } +// bar() { +// void* pcs[10]; +// int sizes[10]; +// int depth = GetStackFrames(pcs, sizes, 10, 1); +// } +// +// The GetStackFrames call will skip the frame for "bar". It will +// return 2 and will produce pc values that map to the following +// procedures: +// pcs[0] foo +// pcs[1] main +// (Actually, there may be a few more entries after "main" to account for +// startup procedures.) +// And corresponding stack frame sizes will also be recorded: +// sizes[0] 16 +// sizes[1] 16 +// (Stack frame sizes of 16 above are just for illustration purposes.) +// Stack frame sizes of 0 or less indicate that those frame sizes couldn't +// be identified. +// +// This routine may return fewer stack frame entries than are +// available. Also note that "pcs" and "sizes" must both be non-NULL. +extern PERFTOOLS_DLL_DECL int GetStackFrames(void** pcs, int* sizes, int max_depth, + int skip_count); + +// Same as above, but to be used from a signal handler. The "uc" parameter +// should be the pointer to ucontext_t which was passed as the 3rd parameter +// to sa_sigaction signal handler. It may help the unwinder to get a +// better stack trace under certain conditions. The "uc" may safely be NULL. +extern PERFTOOLS_DLL_DECL int GetStackFramesWithContext(void** pcs, int* sizes, int max_depth, + int skip_count, const void *uc); + +// This is similar to the GetStackFrames routine, except that it returns +// the stack trace only, and not the stack frame sizes as well. +// Example: +// main() { foo(); } +// foo() { bar(); } +// bar() { +// void* result[10]; +// int depth = GetStackTrace(result, 10, 1); +// } +// +// This produces: +// result[0] foo +// result[1] main +// .... ... +// +// "result" must not be NULL. +extern PERFTOOLS_DLL_DECL int GetStackTrace(void** result, int max_depth, + int skip_count); + +// Same as above, but to be used from a signal handler. The "uc" parameter +// should be the pointer to ucontext_t which was passed as the 3rd parameter +// to sa_sigaction signal handler. It may help the unwinder to get a +// better stack trace under certain conditions. The "uc" may safely be NULL. +extern PERFTOOLS_DLL_DECL int GetStackTraceWithContext(void** result, int max_depth, + int skip_count, const void *uc); + +#endif /* GOOGLE_STACKTRACE_H_ */ diff --git a/third_party/tcmalloc/chromium/src/google/tcmalloc.h.in b/third_party/tcmalloc/chromium/src/google/tcmalloc.h.in new file mode 100644 index 0000000..cf62c70 --- /dev/null +++ b/third_party/tcmalloc/chromium/src/google/tcmalloc.h.in @@ -0,0 +1,108 @@ +/* Copyright (c) 2003, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * Author: Sanjay Ghemawat <opensource@google.com> + * .h file by Craig Silverstein <opensource@google.com> + */ + +#ifndef TCMALLOC_TCMALLOC_H_ +#define TCMALLOC_TCMALLOC_H_ + +// __THROW is defined in glibc systems. It means, counter-intuitively, +// "This function will never throw an exception." It's an optional +// optimization tool, but we may need to use it to match glibc prototypes. +#ifndef __THROW /* I guess we're not on a glibc system */ +# define __THROW /* __THROW is just an optimization, so ok to make it "" */ +#endif + +// Define the version number so folks can check against it +#define TC_VERSION_MAJOR @TC_VERSION_MAJOR@ +#define TC_VERSION_MINOR @TC_VERSION_MINOR@ +#define TC_VERSION_PATCH "@TC_VERSION_PATCH@" +#define TC_VERSION_STRING "google-perftools @TC_VERSION_MAJOR@.@TC_VERSION_MINOR@@TC_VERSION_PATCH@" + +#include <stdlib.h> // for struct mallinfo, if it's defined + +// Annoying stuff for windows -- makes sure clients can import these functions +#ifndef PERFTOOLS_DLL_DECL +# ifdef _WIN32 +# define PERFTOOLS_DLL_DECL __declspec(dllimport) +# else +# define PERFTOOLS_DLL_DECL +# endif +#endif + +#ifdef __cplusplus +#include <new> // for nothrow_t +extern "C" { +#endif + // Returns a human-readable version string. If major, minor, + // and/or patch are not NULL, they are set to the major version, + // minor version, and patch-code (a string, usually ""). + PERFTOOLS_DLL_DECL const char* tc_version(int* major, int* minor, + const char** patch) __THROW; + + PERFTOOLS_DLL_DECL void* tc_malloc(size_t size) __THROW; + PERFTOOLS_DLL_DECL void tc_free(void* ptr) __THROW; + PERFTOOLS_DLL_DECL void* tc_realloc(void* ptr, size_t size) __THROW; + PERFTOOLS_DLL_DECL void* tc_calloc(size_t nmemb, size_t size) __THROW; + PERFTOOLS_DLL_DECL void tc_cfree(void* ptr) __THROW; + + PERFTOOLS_DLL_DECL void* tc_memalign(size_t __alignment, + size_t __size) __THROW; + PERFTOOLS_DLL_DECL int tc_posix_memalign(void** ptr, + size_t align, size_t size) __THROW; + PERFTOOLS_DLL_DECL void* tc_valloc(size_t __size) __THROW; + PERFTOOLS_DLL_DECL void* tc_pvalloc(size_t __size) __THROW; + + PERFTOOLS_DLL_DECL void tc_malloc_stats(void) __THROW; + PERFTOOLS_DLL_DECL int tc_mallopt(int cmd, int value) __THROW; +#if @ac_cv_have_struct_mallinfo@ + PERFTOOLS_DLL_DECL struct mallinfo tc_mallinfo(void) __THROW; +#endif + +#ifdef __cplusplus + PERFTOOLS_DLL_DECL void* tc_new(size_t size); + PERFTOOLS_DLL_DECL void tc_delete(void* p) __THROW; + PERFTOOLS_DLL_DECL void* tc_newarray(size_t size); + PERFTOOLS_DLL_DECL void tc_deletearray(void* p) __THROW; + + PERFTOOLS_DLL_DECL void* tc_new_nothrow(size_t size, + const std::nothrow_t&) __THROW; + PERFTOOLS_DLL_DECL void* tc_newarray_nothrow(size_t size, + const std::nothrow_t&) __THROW; + PERFTOOLS_DLL_DECL void tc_delete_nothrow(void* p, + const std::nothrow_t&) __THROW; + PERFTOOLS_DLL_DECL void tc_deletearray_nothrow(void* p, + const std::nothrow_t&) __THROW; +} +#endif + +#endif // #ifndef TCMALLOC_TCMALLOC_H_ diff --git a/third_party/tcmalloc/chromium/src/heap-checker-bcad.cc b/third_party/tcmalloc/chromium/src/heap-checker-bcad.cc new file mode 100644 index 0000000..87d3d87 --- /dev/null +++ b/third_party/tcmalloc/chromium/src/heap-checker-bcad.cc @@ -0,0 +1,88 @@ +// Copyright (c) 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// All Rights Reserved. +// +// Author: Maxim Lifantsev +// +// A file to ensure that components of heap leak checker run before +// all global object constructors and after all global object +// destructors. +// +// This file must be the last library any binary links against. +// Otherwise, the heap checker may not be able to run early enough to +// catalog all the global objects in your program. If this happens, +// and later in the program you allocate memory and have one of these +// "uncataloged" global objects point to it, the heap checker will +// consider that allocation to be a leak, even though it's not (since +// the allocated object is reachable from global data and hence "live"). + +#include <stdlib.h> // for abort() +#include <google/malloc_extension.h> + +// A dummy variable to refer from heap-checker.cc. This is to make +// sure this file is not optimized out by the linker. +bool heap_leak_checker_bcad_variable; + +extern void HeapLeakChecker_BeforeConstructors(); // in heap-checker.cc +extern void HeapLeakChecker_AfterDestructors(); // in heap-checker.cc + +// A helper class to ensure that some components of heap leak checking +// can happen before construction and after destruction +// of all global/static objects. +class HeapLeakCheckerGlobalPrePost { + public: + HeapLeakCheckerGlobalPrePost() { + if (count_ == 0) { + HeapLeakChecker_BeforeConstructors(); + // This needs to be called before the first allocation of an STL + // object, but after libc is done setting up threads (because it + // calls setenv, which requires a thread-aware errno). By + // putting it here, we hope it's the first bit of code executed + // after the libc global-constructor code. + MallocExtension::Initialize(); + } + ++count_; + } + ~HeapLeakCheckerGlobalPrePost() { + if (count_ <= 0) abort(); + --count_; + if (count_ == 0) HeapLeakChecker_AfterDestructors(); + } + private: + // Counter of constructions/destructions of objects of this class + // (just in case there are more than one of them). + static int count_; +}; + +int HeapLeakCheckerGlobalPrePost::count_ = 0; + +// The early-construction/late-destruction global object. +static const HeapLeakCheckerGlobalPrePost heap_leak_checker_global_pre_post; diff --git a/third_party/tcmalloc/chromium/src/heap-checker.cc b/third_party/tcmalloc/chromium/src/heap-checker.cc new file mode 100644 index 0000000..59288e6 --- /dev/null +++ b/third_party/tcmalloc/chromium/src/heap-checker.cc @@ -0,0 +1,2369 @@ +// Copyright (c) 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// All Rights Reserved. +// +// Author: Maxim Lifantsev +// + +#include "config.h" + +#include <fcntl.h> // for O_RDONLY (we use syscall to do actual reads) +#include <string.h> +#include <errno.h> +#ifdef HAVE_UNISTD_H +#include <unistd.h> +#endif +#ifdef HAVE_MMAP +#include <sys/mman.h> +#endif +#ifdef HAVE_PTHREAD +#include <pthread.h> +#endif +#include <sys/stat.h> +#include <sys/types.h> +#include <time.h> +#include <assert.h> + +#ifdef HAVE_LINUX_PTRACE_H +#include <linux/ptrace.h> +#endif +#ifdef HAVE_SYS_SYSCALL_H +#include <sys/syscall.h> +#endif +#if defined(_WIN32) || defined(__CYGWIN__) || defined(__CYGWIN32__) || defined(__MINGW32__) +#include <wtypes.h> +#include <winbase.h> +#undef ERROR // windows defines these as macros, which can cause trouble +#undef max +#undef min +#endif + +#include <string> +#include <vector> +#include <map> +#include <set> +#include <algorithm> +#include <functional> + +#include <google/heap-checker.h> + +#include "base/basictypes.h" +#include "base/googleinit.h" +#include "base/logging.h" +#include <google/stacktrace.h> +#include "base/commandlineflags.h" +#include "base/elfcore.h" // for i386_regs +#include "base/thread_lister.h" +#include "heap-profile-table.h" +#include "base/low_level_alloc.h" +#include "malloc_hook-inl.h" +#include <google/malloc_hook.h> +#include <google/malloc_extension.h> +#include "maybe_threads.h" +#include "memory_region_map.h" +#include "base/spinlock.h" +#include "base/sysinfo.h" +#include "base/stl_allocator.h" + +using std::string; +using std::basic_string; +using std::pair; +using std::map; +using std::set; +using std::vector; +using std::swap; +using std::make_pair; +using std::min; +using std::max; +using std::less; +using std::char_traits; + +// This is the default if you don't link in -lprofiler +extern "C" { +ATTRIBUTE_WEAK PERFTOOLS_DLL_DECL bool ProfilingIsEnabledForAllThreads(); +bool ProfilingIsEnabledForAllThreads() { return false; } +} + +//---------------------------------------------------------------------- +// Flags that control heap-checking +//---------------------------------------------------------------------- + +DEFINE_string(heap_check, + EnvToString("HEAPCHECK", ""), + "The heap leak checking to be done over the whole executable: " + "\"minimal\", \"normal\", \"strict\", " + "\"draconian\", \"as-is\", and \"local\" " + " or the empty string are the supported choices. " + "(See HeapLeakChecker::InternalInitStart for details.)"); + +DEFINE_bool(heap_check_report, true, "Obsolete"); + +DEFINE_bool(heap_check_before_constructors, + true, + "deprecated; pretty much always true now"); + +DEFINE_bool(heap_check_after_destructors, + EnvToBool("HEAP_CHECK_AFTER_DESTRUCTORS", false), + "If overall heap check is to end after global destructors " + "or right after all REGISTER_HEAPCHECK_CLEANUP's"); + +DEFINE_bool(heap_check_strict_check, true, "Obsolete"); + +DEFINE_bool(heap_check_ignore_global_live, + EnvToBool("HEAP_CHECK_IGNORE_GLOBAL_LIVE", true), + "If overall heap check is to ignore heap objects reachable " + "from the global data"); + +DEFINE_bool(heap_check_identify_leaks, + EnvToBool("HEAP_CHECK_IDENTIFY_LEAKS", false), + "If heap check should generate the addresses of the leaked " + "objects in the memory leak profiles. This may be useful " + "in tracking down leaks where only a small fraction of " + "objects allocated at the same stack trace are leaked."); + +DEFINE_bool(heap_check_ignore_thread_live, + EnvToBool("HEAP_CHECK_IGNORE_THREAD_LIVE", true), + "If set to true, objects reachable from thread stacks " + "and registers are not reported as leaks"); + +DEFINE_bool(heap_check_test_pointer_alignment, + EnvToBool("HEAP_CHECK_TEST_POINTER_ALIGNMENT", false), + "Set to true to check if the found leak can be due to " + "use of unaligned pointers"); + +// A reasonable default to handle pointers inside of typical class objects: +// Too low and we won't be able to traverse pointers to normally-used +// nested objects and base parts of multiple-inherited objects. +// Too high and it will both slow down leak checking (FindInsideAlloc +// in HaveOnHeapLocked will get slower when there are large on-heap objects) +// and make it probabilistically more likely to miss leaks +// of large-sized objects. +static const int64 kHeapCheckMaxPointerOffset = 1024; +DEFINE_int64(heap_check_max_pointer_offset, + EnvToInt("HEAP_CHECK_MAX_POINTER_OFFSET", + kHeapCheckMaxPointerOffset), + "Largest pointer offset for which we traverse " + "pointers going inside of heap allocated objects. " + "Set to -1 to use the actual largest heap object size."); + +DEFINE_bool(heap_check_run_under_gdb, + EnvToBool("HEAP_CHECK_RUN_UNDER_GDB", false), + "If false, turns off heap-checking library when running under gdb " + "(normally, set to 'true' only when debugging the heap-checker)"); + +DEFINE_int32(heap_check_delay_seconds, 0, + "Number of seconds to delay on-exit heap checking." + " If you set this flag," + " you may also want to set exit_timeout_seconds in order to" + " avoid exit timeouts.\n" + "NOTE: This flag is to be used only to help diagnose issues" + " where it is suspected that the heap checker is reporting" + " false leaks that will disappear if the heap checker delays" + " its checks. Report any such issues to the heap-checker" + " maintainer(s)."); + +//---------------------------------------------------------------------- + +DEFINE_string(heap_profile_pprof, + EnvToString("PPROF_PATH", "pprof"), + "OBSOLETE; not used"); + +DEFINE_string(heap_check_dump_directory, + EnvToString("HEAP_CHECK_DUMP_DIRECTORY", "/tmp"), + "Directory to put heap-checker leak dump information"); + + +//---------------------------------------------------------------------- +// HeapLeakChecker global data +//---------------------------------------------------------------------- + +// Global lock for all the global data of this module. +static SpinLock heap_checker_lock(SpinLock::LINKER_INITIALIZED); + +//---------------------------------------------------------------------- + +// Heap profile prefix for leak checking profiles. +// Gets assigned once when leak checking is turned on, then never modified. +static const string* profile_name_prefix = NULL; + +// Whole-program heap leak checker. +// Gets assigned once when leak checking is turned on, +// then main_heap_checker is never deleted. +static HeapLeakChecker* main_heap_checker = NULL; + +// Whether we will use main_heap_checker to do a check at program exit +// automatically. In any case user can ask for more checks on main_heap_checker +// via GlobalChecker(). +static bool do_main_heap_check = false; + +// The heap profile we use to collect info about the heap. +// This is created in HeapLeakChecker::BeforeConstructorsLocked +// together with setting heap_checker_on (below) to true +// and registering our new/delete malloc hooks; +// similarly all are unset in HeapLeakChecker::TurnItselfOffLocked. +static HeapProfileTable* heap_profile = NULL; + +// If we are doing (or going to do) any kind of heap-checking. +static bool heap_checker_on = false; + +// pid of the process that does whole-program heap leak checking +static pid_t heap_checker_pid = 0; + +// If we did heap profiling during global constructors execution +static bool constructor_heap_profiling = false; + +// RAW_VLOG level we dump key INFO messages at. If you want to turn +// off these messages, set the environment variable PERFTOOLS_VERBOSE=-1. +static const int heap_checker_info_level = 0; + +//---------------------------------------------------------------------- + +// Alignment at which all pointers in memory are supposed to be located; +// use 1 if any alignment is ok. +// heap_check_test_pointer_alignment flag guides if we try the value of 1. +// The larger it can be, the lesser is the chance of missing real leaks. +static const size_t kPointerSourceAlignment = sizeof(void*); + +// Cancel our InitialMallocHook_* if present. +static void CancelInitialMallocHooks(); // defined below + +//---------------------------------------------------------------------- +// HeapLeakChecker's own memory allocator that is +// independent of the normal program allocator. +//---------------------------------------------------------------------- + +// Wrapper of LowLevelAlloc for STL_Allocator and direct use. +// We always access this class under held heap_checker_lock, +// this allows us to in particular protect the period when threads are stopped +// at random spots with ListAllProcessThreads by heap_checker_lock, +// w/o worrying about the lock in LowLevelAlloc::Arena. +// We rely on the fact that we use an own arena with an own lock here. +class HeapLeakChecker::Allocator { + public: + static void Init() { + RAW_DCHECK(heap_checker_lock.IsHeld(), ""); + RAW_DCHECK(arena_ == NULL, ""); + arena_ = LowLevelAlloc::NewArena(0, LowLevelAlloc::DefaultArena()); + } + static void Shutdown() { + RAW_DCHECK(heap_checker_lock.IsHeld(), ""); + if (!LowLevelAlloc::DeleteArena(arena_) || alloc_count_ != 0) { + RAW_LOG(FATAL, "Internal heap checker leak of %d objects", alloc_count_); + } + } + static int alloc_count() { + RAW_DCHECK(heap_checker_lock.IsHeld(), ""); + return alloc_count_; + } + static void* Allocate(size_t n) { + RAW_DCHECK(arena_ && heap_checker_lock.IsHeld(), ""); + void* p = LowLevelAlloc::AllocWithArena(n, arena_); + if (p) alloc_count_ += 1; + return p; + } + static void Free(void* p) { + RAW_DCHECK(heap_checker_lock.IsHeld(), ""); + if (p) alloc_count_ -= 1; + LowLevelAlloc::Free(p); + } + // destruct, free, and make *p to be NULL + template<typename T> static void DeleteAndNull(T** p) { + (*p)->~T(); + Free(*p); + *p = NULL; + } + template<typename T> static void DeleteAndNullIfNot(T** p) { + if (*p != NULL) DeleteAndNull(p); + } + private: + static LowLevelAlloc::Arena* arena_; + static int alloc_count_; +}; + +LowLevelAlloc::Arena* HeapLeakChecker::Allocator::arena_ = NULL; +int HeapLeakChecker::Allocator::alloc_count_ = 0; + +//---------------------------------------------------------------------- +// HeapLeakChecker live object tracking components +//---------------------------------------------------------------------- + +// Cases of live object placement we distinguish +enum ObjectPlacement { + MUST_BE_ON_HEAP, // Must point to a live object of the matching size in the + // heap_profile map of the heap when we get to it + IGNORED_ON_HEAP, // Is a live (ignored) object on heap + MAYBE_LIVE, // Is a piece of writable memory from /proc/self/maps + IN_GLOBAL_DATA, // Is part of global data region of the executable + THREAD_DATA, // Part of a thread stack and a thread descriptor with TLS + THREAD_REGISTERS, // Values in registers of some thread +}; + +// Information about an allocated object +struct AllocObject { + const void* ptr; // the object + uintptr_t size; // its size + ObjectPlacement place; // where ptr points to + + AllocObject(const void* p, size_t s, ObjectPlacement l) + : ptr(p), size(s), place(l) { } +}; + +// All objects (memory ranges) ignored via HeapLeakChecker::IgnoreObject +// Key is the object's address; value is its size. +typedef map<uintptr_t, size_t, less<uintptr_t>, + STL_Allocator<pair<const uintptr_t, size_t>, + HeapLeakChecker::Allocator> + > IgnoredObjectsMap; +static IgnoredObjectsMap* ignored_objects = NULL; + +// All objects (memory ranges) that we consider to be the sources of pointers +// to live (not leaked) objects. +// At different times this holds (what can be reached from) global data regions +// and the objects we've been told to ignore. +// For any AllocObject::ptr "live_objects" is supposed to contain at most one +// record at any time. We maintain this by checking with the heap_profile map +// of the heap and removing the live heap objects we've handled from it. +// This vector is maintained as a stack and the frontier of reachable +// live heap objects in our flood traversal of them. +typedef vector<AllocObject, + STL_Allocator<AllocObject, HeapLeakChecker::Allocator> + > LiveObjectsStack; +static LiveObjectsStack* live_objects = NULL; + +// A special string type that uses my allocator +typedef basic_string<char, char_traits<char>, + STL_Allocator<char, HeapLeakChecker::Allocator> + > HCL_string; + +// A placeholder to fill-in the starting values for live_objects +// for each library so we can keep the library-name association for logging. +typedef map<HCL_string, LiveObjectsStack, less<HCL_string>, + STL_Allocator<pair<const HCL_string, LiveObjectsStack>, + HeapLeakChecker::Allocator> + > LibraryLiveObjectsStacks; +static LibraryLiveObjectsStacks* library_live_objects = NULL; + +// Value stored in the map of disabled address ranges; +// its key is the end of the address range. +// We'll ignore allocations with a return address in a disabled range +// if the address occurs at 'max_depth' or less in the stack trace. +struct HeapLeakChecker::RangeValue { + uintptr_t start_address; // the start of the range + int max_depth; // the maximal stack depth to disable at +}; +typedef map<uintptr_t, HeapLeakChecker::RangeValue, less<uintptr_t>, + STL_Allocator<pair<const uintptr_t, HeapLeakChecker::RangeValue>, + HeapLeakChecker::Allocator> + > DisabledRangeMap; +// The disabled program counter address ranges for profile dumping +// that are registered with HeapLeakChecker::DisableChecksFromToLocked. +static DisabledRangeMap* disabled_ranges = NULL; + +// Set of stack tops. +// These are used to consider live only appropriate chunks of the memory areas +// that are used for stacks (and maybe thread-specific data as well) +// so that we do not treat pointers from outdated stack frames as live. +typedef set<uintptr_t, less<uintptr_t>, + STL_Allocator<uintptr_t, HeapLeakChecker::Allocator> + > StackTopSet; +static StackTopSet* stack_tops = NULL; + +// A map of ranges of code addresses for the system libraries +// that can mmap/mremap/sbrk-allocate memory regions for stacks +// and thread-local storage that we want to consider as live global data. +// Maps from the end address to the start address. +typedef map<uintptr_t, uintptr_t, less<uintptr_t>, + STL_Allocator<pair<const uintptr_t, uintptr_t>, + HeapLeakChecker::Allocator> + > GlobalRegionCallerRangeMap; +static GlobalRegionCallerRangeMap* global_region_caller_ranges = NULL; + +// TODO(maxim): make our big data structs into own modules + +// Disabler is implemented by keeping track of a per-thread count +// of active Disabler objects. Any objects allocated while the +// count > 0 are not reported. + +#ifdef HAVE_TLS + +static __thread int thread_disable_counter +// The "inital exec" model is faster than the default TLS model, at +// the cost you can't dlopen this library. But dlopen on heap-checker +// doesn't work anyway -- it must run before main -- so this is a good +// trade-off. +# ifdef HAVE___ATTRIBUTE__ + __attribute__ ((tls_model ("initial-exec"))) +# endif + ; +inline int get_thread_disable_counter() { + return thread_disable_counter; +} +inline void set_thread_disable_counter(int value) { + thread_disable_counter = value; +} + +#else // #ifdef HAVE_TLS + +static pthread_key_t thread_disable_counter_key; +static int main_thread_counter; // storage for use before main() +static bool use_main_thread_counter = true; + +// TODO(csilvers): this is called from NewHook, in the middle of malloc(). +// If perftools_pthread_getspecific calls malloc, that will lead to an +// infinite loop. I don't know how to fix that, so I hope it never happens! +inline int get_thread_disable_counter() { + if (use_main_thread_counter) // means we're running really early + return main_thread_counter; + void* p = perftools_pthread_getspecific(thread_disable_counter_key); + return (intptr_t)p; // kinda evil: store the counter directly in the void* +} + +inline void set_thread_disable_counter(int value) { + if (use_main_thread_counter) { // means we're running really early + main_thread_counter = value; + return; + } + intptr_t pointer_sized_value = value; + // kinda evil: store the counter directly in the void* + void* p = (void*)pointer_sized_value; + // NOTE: this may call malloc, which will call NewHook which will call + // get_thread_disable_counter() which will call pthread_getspecific(). I + // don't know if anything bad can happen if we call getspecific() in the + // middle of a setspecific() call. It seems to work ok in practice... + perftools_pthread_setspecific(thread_disable_counter_key, p); +} + +// The idea here is that this initializer will run pretty late: after +// pthreads have been totally set up. At this point we can call +// pthreads routines, so we set those up. +class InitThreadDisableCounter { + public: + InitThreadDisableCounter() { + perftools_pthread_key_create(&thread_disable_counter_key, NULL); + // Set up the main thread's value, which we have a special variable for. + void* p = (void*)main_thread_counter; // store the counter directly + perftools_pthread_setspecific(thread_disable_counter_key, p); + use_main_thread_counter = false; + } +}; +InitThreadDisableCounter init_thread_disable_counter; + +#endif // #ifdef HAVE_TLS + +HeapLeakChecker::Disabler::Disabler() { + // It is faster to unconditionally increment the thread-local + // counter than to check whether or not heap-checking is on + // in a thread-safe manner. + int counter = get_thread_disable_counter(); + set_thread_disable_counter(counter + 1); + RAW_VLOG(1, "Increasing thread disable counter to %d", counter + 1); +} + +HeapLeakChecker::Disabler::~Disabler() { + int counter = get_thread_disable_counter(); + RAW_DCHECK(counter > 0, ""); + if (counter > 0) { + set_thread_disable_counter(counter - 1); + RAW_VLOG(1, "Decreasing thread disable counter to %d", counter); + } else { + RAW_VLOG(0, "Thread disable counter underflow : %d", counter); + } +} + +//---------------------------------------------------------------------- + +// The size of the largest heap object allocated so far. +static size_t max_heap_object_size = 0; +// The possible range of addresses that can point +// into one of the elements of heap_objects. +static uintptr_t min_heap_address = uintptr_t(-1LL); +static uintptr_t max_heap_address = 0; + +//---------------------------------------------------------------------- + +// Simple casting helpers for uintptr_t and void*: +template<typename T> +inline static const void* AsPtr(T addr) { + return reinterpret_cast<void*>(addr); +} +inline static uintptr_t AsInt(const void* ptr) { + return reinterpret_cast<uintptr_t>(ptr); +} + +//---------------------------------------------------------------------- + +// Our hooks for MallocHook +static void NewHook(const void* ptr, size_t size) { + if (ptr != NULL) { + const int counter = get_thread_disable_counter(); + const bool ignore = (counter > 0); + RAW_VLOG(7, "Recording Alloc: %p of %"PRIuS "; %d", ptr, size, + int(counter)); + { SpinLockHolder l(&heap_checker_lock); + if (size > max_heap_object_size) max_heap_object_size = size; + uintptr_t addr = AsInt(ptr); + if (addr < min_heap_address) min_heap_address = addr; + addr += size; + if (addr > max_heap_address) max_heap_address = addr; + if (heap_checker_on) { + heap_profile->RecordAlloc(ptr, size, 0); + if (ignore) { + heap_profile->MarkAsIgnored(ptr); + } + } + } + RAW_VLOG(8, "Alloc Recorded: %p of %"PRIuS"", ptr, size); + } +} + +static void DeleteHook(const void* ptr) { + if (ptr != NULL) { + RAW_VLOG(7, "Recording Free %p", ptr); + { SpinLockHolder l(&heap_checker_lock); + if (heap_checker_on) heap_profile->RecordFree(ptr); + } + RAW_VLOG(8, "Free Recorded: %p", ptr); + } +} + +//---------------------------------------------------------------------- + +enum StackDirection { + GROWS_TOWARDS_HIGH_ADDRESSES, + GROWS_TOWARDS_LOW_ADDRESSES, + UNKNOWN_DIRECTION +}; + +// Determine which way the stack grows: + +static StackDirection ATTRIBUTE_NOINLINE GetStackDirection( + const uintptr_t *const ptr) { + uintptr_t x; + if (&x < ptr) + return GROWS_TOWARDS_LOW_ADDRESSES; + if (ptr < &x) + return GROWS_TOWARDS_HIGH_ADDRESSES; + + RAW_CHECK(0, ""); // Couldn't determine the stack direction. + + return UNKNOWN_DIRECTION; +} + +// Direction of stack growth (will initialize via GetStackDirection()) +static StackDirection stack_direction = UNKNOWN_DIRECTION; + +// This routine is called for every thread stack we know about to register it. +static void RegisterStackLocked(const void* top_ptr) { + RAW_DCHECK(heap_checker_lock.IsHeld(), ""); + RAW_DCHECK(MemoryRegionMap::LockIsHeld(), ""); + RAW_VLOG(1, "Thread stack at %p", top_ptr); + uintptr_t top = AsInt(top_ptr); + stack_tops->insert(top); // add for later use + + // make sure stack_direction is initialized + if (stack_direction == UNKNOWN_DIRECTION) { + stack_direction = GetStackDirection(&top); + } + + // Find memory region with this stack + MemoryRegionMap::Region region; + if (MemoryRegionMap::FindAndMarkStackRegion(top, ®ion)) { + // Make the proper portion of the stack live: + if (stack_direction == GROWS_TOWARDS_LOW_ADDRESSES) { + RAW_VLOG(2, "Live stack at %p of %"PRIuPTR" bytes", + top_ptr, region.end_addr - top); + live_objects->push_back(AllocObject(top_ptr, region.end_addr - top, + THREAD_DATA)); + } else { // GROWS_TOWARDS_HIGH_ADDRESSES + RAW_VLOG(2, "Live stack at %p of %"PRIuPTR" bytes", + AsPtr(region.start_addr), + top - region.start_addr); + live_objects->push_back(AllocObject(AsPtr(region.start_addr), + top - region.start_addr, + THREAD_DATA)); + } + // not in MemoryRegionMap, look in library_live_objects: + } else if (FLAGS_heap_check_ignore_global_live) { + for (LibraryLiveObjectsStacks::iterator lib = library_live_objects->begin(); + lib != library_live_objects->end(); ++lib) { + for (LiveObjectsStack::iterator span = lib->second.begin(); + span != lib->second.end(); ++span) { + uintptr_t start = AsInt(span->ptr); + uintptr_t end = start + span->size; + if (start <= top && top < end) { + RAW_VLOG(2, "Stack at %p is inside /proc/self/maps chunk %p..%p", + top_ptr, AsPtr(start), AsPtr(end)); + // Shrink start..end region by chopping away the memory regions in + // MemoryRegionMap that land in it to undo merging of regions + // in /proc/self/maps, so that we correctly identify what portion + // of start..end is actually the stack region. + uintptr_t stack_start = start; + uintptr_t stack_end = end; + // can optimize-away this loop, but it does not run often + RAW_DCHECK(MemoryRegionMap::LockIsHeld(), ""); + for (MemoryRegionMap::RegionIterator r = + MemoryRegionMap::BeginRegionLocked(); + r != MemoryRegionMap::EndRegionLocked(); ++r) { + if (top < r->start_addr && r->start_addr < stack_end) { + stack_end = r->start_addr; + } + if (stack_start < r->end_addr && r->end_addr <= top) { + stack_start = r->end_addr; + } + } + if (stack_start != start || stack_end != end) { + RAW_VLOG(2, "Stack at %p is actually inside memory chunk %p..%p", + top_ptr, AsPtr(stack_start), AsPtr(stack_end)); + } + // Make the proper portion of the stack live: + if (stack_direction == GROWS_TOWARDS_LOW_ADDRESSES) { + RAW_VLOG(2, "Live stack at %p of %"PRIuPTR" bytes", + top_ptr, stack_end - top); + live_objects->push_back( + AllocObject(top_ptr, stack_end - top, THREAD_DATA)); + } else { // GROWS_TOWARDS_HIGH_ADDRESSES + RAW_VLOG(2, "Live stack at %p of %"PRIuPTR" bytes", + AsPtr(stack_start), top - stack_start); + live_objects->push_back( + AllocObject(AsPtr(stack_start), top - stack_start, THREAD_DATA)); + } + lib->second.erase(span); // kill the rest of the region + // Put the non-stack part(s) of the region back: + if (stack_start != start) { + lib->second.push_back(AllocObject(AsPtr(start), stack_start - start, + MAYBE_LIVE)); + } + if (stack_end != end) { + lib->second.push_back(AllocObject(AsPtr(stack_end), end - stack_end, + MAYBE_LIVE)); + } + return; + } + } + } + RAW_LOG(ERROR, "Memory region for stack at %p not found. " + "Will likely report false leak positives.", top_ptr); + } +} + +// Iterator for heap allocation map data to make ignored objects "live" +// (i.e., treated as roots for the mark-and-sweep phase) +static void MakeIgnoredObjectsLiveCallbackLocked( + const void* ptr, const HeapProfileTable::AllocInfo& info) { + RAW_DCHECK(heap_checker_lock.IsHeld(), ""); + if (info.ignored) { + live_objects->push_back(AllocObject(ptr, info.object_size, + MUST_BE_ON_HEAP)); + } +} + +// Iterator for heap allocation map data to make objects allocated from +// disabled regions of code to be live. +static void MakeDisabledLiveCallbackLocked( + const void* ptr, const HeapProfileTable::AllocInfo& info) { + RAW_DCHECK(heap_checker_lock.IsHeld(), ""); + bool stack_disable = false; + bool range_disable = false; + for (int depth = 0; depth < info.stack_depth; depth++) { + uintptr_t addr = AsInt(info.call_stack[depth]); + if (disabled_ranges) { + DisabledRangeMap::const_iterator iter + = disabled_ranges->upper_bound(addr); + if (iter != disabled_ranges->end()) { + RAW_DCHECK(iter->first > addr, ""); + if (iter->second.start_address < addr && + iter->second.max_depth > depth) { + range_disable = true; // in range; dropping + break; + } + } + } + } + if (stack_disable || range_disable) { + uintptr_t start_address = AsInt(ptr); + uintptr_t end_address = start_address + info.object_size; + StackTopSet::const_iterator iter + = stack_tops->lower_bound(start_address); + if (iter != stack_tops->end()) { + RAW_DCHECK(*iter >= start_address, ""); + if (*iter < end_address) { + // We do not disable (treat as live) whole allocated regions + // if they are used to hold thread call stacks + // (i.e. when we find a stack inside). + // The reason is that we'll treat as live the currently used + // stack portions anyway (see RegisterStackLocked), + // and the rest of the region where the stack lives can well + // contain outdated stack variables which are not live anymore, + // hence should not be treated as such. + RAW_VLOG(2, "Not %s-disabling %"PRIuS" bytes at %p" + ": have stack inside: %p", + (stack_disable ? "stack" : "range"), + info.object_size, ptr, AsPtr(*iter)); + return; + } + } + RAW_VLOG(2, "%s-disabling %"PRIuS" bytes at %p", + (stack_disable ? "Stack" : "Range"), info.object_size, ptr); + live_objects->push_back(AllocObject(ptr, info.object_size, + MUST_BE_ON_HEAP)); + } +} + +// This function takes some fields from a /proc/self/maps line: +// +// start_address start address of a memory region. +// end_address end address of a memory region +// permissions rwx + private/shared bit +// filename filename of the mapped file +// +// If the region is not writeable, then it cannot have any heap +// pointers in it, otherwise we record it as a candidate live region +// to get filtered later. +static void RecordGlobalDataLocked(uintptr_t start_address, + uintptr_t end_address, + const char* permissions, + const char* filename) { + RAW_DCHECK(heap_checker_lock.IsHeld(), ""); + // Ignore non-writeable regions. + if (strchr(permissions, 'w') == NULL) return; + if (filename == NULL || *filename == '\0') filename = "UNNAMED"; + RAW_VLOG(2, "Looking into %s: 0x%" PRIxPTR "..0x%" PRIxPTR, + filename, start_address, end_address); + (*library_live_objects)[filename]. + push_back(AllocObject(AsPtr(start_address), + end_address - start_address, + MAYBE_LIVE)); +} + +// See if 'library' from /proc/self/maps has base name 'library_base' +// i.e. contains it and has '.' or '-' after it. +static bool IsLibraryNamed(const char* library, const char* library_base) { + const char* p = strstr(library, library_base); + size_t sz = strlen(library_base); + return p != NULL && (p[sz] == '.' || p[sz] == '-'); +} + +// static +void HeapLeakChecker::DisableLibraryAllocsLocked(const char* library, + uintptr_t start_address, + uintptr_t end_address) { + RAW_DCHECK(heap_checker_lock.IsHeld(), ""); + int depth = 0; + // TODO(maxim): maybe this should be extended to also use objdump + // and pick the text portion of the library more precisely. + if (IsLibraryNamed(library, "/libpthread") || + // libpthread has a lot of small "system" leaks we don't care about. + // In particular it allocates memory to store data supplied via + // pthread_setspecific (which can be the only pointer to a heap object). + IsLibraryNamed(library, "/libdl") || + // library loaders leak some "system" heap that we don't care about + IsLibraryNamed(library, "/libcrypto") + // Sometimes libcrypto of OpenSSH is compiled with -fomit-frame-pointer + // (any library can be, of course, but this one often is because speed + // is so important for making crypto usable). We ignore all its + // allocations because we can't see the call stacks. We'd prefer + // to ignore allocations done in files/symbols that match + // "default_malloc_ex|default_realloc_ex" + // but that doesn't work when the end-result binary is stripped. + ) { + depth = 1; // only disable allocation calls directly from the library code + } else if (IsLibraryNamed(library, "/ld") + // library loader leaks some "system" heap + // (e.g. thread-local storage) that we don't care about + ) { + depth = 2; // disable allocation calls directly from the library code + // and at depth 2 from it. + // We need depth 2 here solely because of a libc bug that + // forces us to jump through __memalign_hook and MemalignOverride hoops + // in tcmalloc.cc. + // Those buggy __libc_memalign() calls are in ld-linux.so and happen for + // thread-local storage allocations that we want to ignore here. + // We go with the depth-2 hack as a workaround for this libc bug: + // otherwise we'd need to extend MallocHook interface + // so that correct stack depth adjustment can be propagated from + // the exceptional case of MemalignOverride. + // Using depth 2 here should not mask real leaks because ld-linux.so + // does not call user code. + } + if (depth) { + RAW_VLOG(1, "Disabling allocations from %s at depth %d:", library, depth); + DisableChecksFromToLocked(AsPtr(start_address), AsPtr(end_address), depth); + if (IsLibraryNamed(library, "/libpthread") || + IsLibraryNamed(library, "/libdl") || + IsLibraryNamed(library, "/ld")) { + RAW_VLOG(1, "Global memory regions made by %s will be live data", + library); + if (global_region_caller_ranges == NULL) { + global_region_caller_ranges = + new(Allocator::Allocate(sizeof(GlobalRegionCallerRangeMap))) + GlobalRegionCallerRangeMap; + } + global_region_caller_ranges + ->insert(make_pair(end_address, start_address)); + } + } +} + +// static +HeapLeakChecker::ProcMapsResult HeapLeakChecker::UseProcMapsLocked( + ProcMapsTask proc_maps_task) { + RAW_DCHECK(heap_checker_lock.IsHeld(), ""); + // Need to provide own scratch memory to ProcMapsIterator: + ProcMapsIterator::Buffer buffer; + ProcMapsIterator it(0, &buffer); + if (!it.Valid()) { + int errsv = errno; + RAW_LOG(ERROR, "Could not open /proc/self/maps: errno=%d. " + "Libraries will not be handled correctly.", errsv); + return CANT_OPEN_PROC_MAPS; + } + uint64 start_address, end_address, file_offset; + int64 inode; + char *permissions, *filename; + bool saw_shared_lib = false; + while (it.Next(&start_address, &end_address, &permissions, + &file_offset, &inode, &filename)) { + if (start_address >= end_address) { + // Warn if a line we can be interested in is ill-formed: + if (inode != 0) { + RAW_LOG(ERROR, "Errors reading /proc/self/maps. " + "Some global memory regions will not " + "be handled correctly."); + } + // Silently skip other ill-formed lines: some are possible + // probably due to the interplay of how /proc/self/maps is updated + // while we read it in chunks in ProcMapsIterator and + // do things in this loop. + continue; + } + // Determine if any shared libraries are present. + if (inode != 0 && strstr(filename, "lib") && strstr(filename, ".so")) { + saw_shared_lib = true; + } + switch (proc_maps_task) { + case DISABLE_LIBRARY_ALLOCS: + // All lines starting like + // "401dc000-4030f000 r??p 00132000 03:01 13991972 lib/bin" + // identify a data and code sections of a shared library or our binary + if (inode != 0 && strncmp(permissions, "r-xp", 4) == 0) { + DisableLibraryAllocsLocked(filename, start_address, end_address); + } + break; + case RECORD_GLOBAL_DATA: + RecordGlobalDataLocked(start_address, end_address, + permissions, filename); + break; + default: + RAW_CHECK(0, ""); + } + } + if (!saw_shared_lib) { + RAW_LOG(ERROR, "No shared libs detected. Will likely report false leak " + "positives for statically linked executables."); + return NO_SHARED_LIBS_IN_PROC_MAPS; + } + return PROC_MAPS_USED; +} + +// Total number and size of live objects dropped from the profile; +// (re)initialized in IgnoreAllLiveObjectsLocked. +static int64 live_objects_total; +static int64 live_bytes_total; + +// pid of the thread that is doing the current leak check +// (protected by our lock; IgnoreAllLiveObjectsLocked sets it) +static pid_t self_thread_pid = 0; + +// Status of our thread listing callback execution +// (protected by our lock; used from within IgnoreAllLiveObjectsLocked) +static enum { + CALLBACK_NOT_STARTED, + CALLBACK_STARTED, + CALLBACK_COMPLETED, +} thread_listing_status = CALLBACK_NOT_STARTED; + +// Ideally to avoid deadlocks this function should not result in any libc +// or other function calls that might need to lock a mutex: +// It is called when all threads of a process are stopped +// at arbitrary points thus potentially holding those locks. +// +// In practice we are calling some simple i/o and sprintf-type library functions +// for logging messages, but use only our own LowLevelAlloc::Arena allocator. +// +// This is known to be buggy: the library i/o function calls are able to cause +// deadlocks when they request a lock that a stopped thread happens to hold. +// This issue as far as we know have so far not resulted in any deadlocks +// in practice, so for now we are taking our chance that the deadlocks +// have insignificant frequency. +// +// If such deadlocks become a problem we should make the i/o calls +// into appropriately direct system calls (or eliminate them), +// in particular write() is not safe and vsnprintf() is potentially dangerous +// due to reliance on locale functions (these are called through RAW_LOG +// and in other ways). +// +/*static*/ int HeapLeakChecker::IgnoreLiveThreadsLocked(void* parameter, + int num_threads, + pid_t* thread_pids, + va_list /*ap*/) { + RAW_DCHECK(heap_checker_lock.IsHeld(), ""); + thread_listing_status = CALLBACK_STARTED; + RAW_VLOG(2, "Found %d threads (from pid %d)", num_threads, getpid()); + + if (FLAGS_heap_check_ignore_global_live) { + UseProcMapsLocked(RECORD_GLOBAL_DATA); + } + + // We put the registers from other threads here + // to make pointers stored in them live. + vector<void*, STL_Allocator<void*, Allocator> > thread_registers; + + int failures = 0; + for (int i = 0; i < num_threads; ++i) { + // the leak checking thread itself is handled + // specially via self_thread_stack, not here: + if (thread_pids[i] == self_thread_pid) continue; + RAW_VLOG(2, "Handling thread with pid %d", thread_pids[i]); +#if defined(HAVE_LINUX_PTRACE_H) && defined(HAVE_SYS_SYSCALL_H) && defined(DUMPER) + i386_regs thread_regs; +#define sys_ptrace(r, p, a, d) syscall(SYS_ptrace, (r), (p), (a), (d)) + // We use sys_ptrace to avoid thread locking + // because this is called from ListAllProcessThreads + // when all but this thread are suspended. + if (sys_ptrace(PTRACE_GETREGS, thread_pids[i], NULL, &thread_regs) == 0) { + // Need to use SP to get all the data from the very last stack frame: + COMPILE_ASSERT(sizeof(thread_regs.SP) == sizeof(void*), + SP_register_does_not_look_like_a_pointer); + RegisterStackLocked(reinterpret_cast<void*>(thread_regs.SP)); + // Make registers live (just in case PTRACE_ATTACH resulted in some + // register pointers still being in the registers and not on the stack): + for (void** p = reinterpret_cast<void**>(&thread_regs); + p < reinterpret_cast<void**>(&thread_regs + 1); ++p) { + RAW_VLOG(3, "Thread register %p", *p); + thread_registers.push_back(*p); + } + } else { + failures += 1; + } +#else + failures += 1; +#endif + } + // Use all the collected thread (stack) liveness sources: + IgnoreLiveObjectsLocked("threads stack data", ""); + if (thread_registers.size()) { + // Make thread registers be live heap data sources. + // we rely here on the fact that vector is in one memory chunk: + RAW_VLOG(2, "Live registers at %p of %"PRIuS" bytes", + &thread_registers[0], thread_registers.size() * sizeof(void*)); + live_objects->push_back(AllocObject(&thread_registers[0], + thread_registers.size() * sizeof(void*), + THREAD_REGISTERS)); + IgnoreLiveObjectsLocked("threads register data", ""); + } + // Do all other liveness walking while all threads are stopped: + IgnoreNonThreadLiveObjectsLocked(); + // Can now resume the threads: + ResumeAllProcessThreads(num_threads, thread_pids); + thread_listing_status = CALLBACK_COMPLETED; + return failures; +} + +// Stack top of the thread that is doing the current leak check +// (protected by our lock; IgnoreAllLiveObjectsLocked sets it) +static const void* self_thread_stack_top; + +// static +void HeapLeakChecker::IgnoreNonThreadLiveObjectsLocked() { + RAW_DCHECK(heap_checker_lock.IsHeld(), ""); + RAW_DCHECK(MemoryRegionMap::LockIsHeld(), ""); + RAW_VLOG(2, "Handling self thread with pid %d", self_thread_pid); + // Register our own stack: + + // Important that all stack ranges (including the one here) + // are known before we start looking at them + // in MakeDisabledLiveCallbackLocked: + RegisterStackLocked(self_thread_stack_top); + IgnoreLiveObjectsLocked("stack data", ""); + + // Make objects we were told to ignore live: + if (ignored_objects) { + for (IgnoredObjectsMap::const_iterator object = ignored_objects->begin(); + object != ignored_objects->end(); ++object) { + const void* ptr = AsPtr(object->first); + RAW_VLOG(2, "Ignored live object at %p of %"PRIuS" bytes", + ptr, object->second); + live_objects-> + push_back(AllocObject(ptr, object->second, MUST_BE_ON_HEAP)); + // we do this liveness check for ignored_objects before doing any + // live heap walking to make sure it does not fail needlessly: + size_t object_size; + if (!(heap_profile->FindAlloc(ptr, &object_size) && + object->second == object_size)) { + RAW_LOG(FATAL, "Object at %p of %"PRIuS" bytes from an" + " IgnoreObject() has disappeared", ptr, object->second); + } + } + IgnoreLiveObjectsLocked("ignored objects", ""); + } + + // Treat objects that were allocated when a Disabler was live as + // roots. I.e., if X was allocated while a Disabler was active, + // and Y is reachable from X, arrange that neither X nor Y are + // treated as leaks. + heap_profile->IterateAllocs(MakeIgnoredObjectsLiveCallbackLocked); + IgnoreLiveObjectsLocked("disabled objects", ""); + + // Make code-address-disabled objects live and ignored: + // This in particular makes all thread-specific data live + // because the basic data structure to hold pointers to thread-specific data + // is allocated from libpthreads and we have range-disabled that + // library code with UseProcMapsLocked(DISABLE_LIBRARY_ALLOCS); + // so now we declare all thread-specific data reachable from there as live. + heap_profile->IterateAllocs(MakeDisabledLiveCallbackLocked); + IgnoreLiveObjectsLocked("disabled code", ""); + + // Actually make global data live: + if (FLAGS_heap_check_ignore_global_live) { + bool have_null_region_callers = false; + for (LibraryLiveObjectsStacks::iterator l = library_live_objects->begin(); + l != library_live_objects->end(); ++l) { + RAW_CHECK(live_objects->empty(), ""); + // Process library_live_objects in l->second + // filtering them by MemoryRegionMap: + // It's safe to iterate over MemoryRegionMap + // w/o locks here as we are inside MemoryRegionMap::Lock(): + RAW_DCHECK(MemoryRegionMap::LockIsHeld(), ""); + // The only change to MemoryRegionMap possible in this loop + // is region addition as a result of allocating more memory + // for live_objects. This won't invalidate the RegionIterator + // or the intent of the loop. + // --see the comment by MemoryRegionMap::BeginRegionLocked(). + for (MemoryRegionMap::RegionIterator region = + MemoryRegionMap::BeginRegionLocked(); + region != MemoryRegionMap::EndRegionLocked(); ++region) { + // "region" from MemoryRegionMap is to be subtracted from + // (tentatively live) regions in l->second + // if it has a stack inside or it was allocated by + // a non-special caller (not one covered by a range + // in global_region_caller_ranges). + // This will in particular exclude all memory chunks used + // by the heap itself as well as what's been allocated with + // any allocator on top of mmap. + bool subtract = true; + if (!region->is_stack && global_region_caller_ranges) { + if (region->caller() == static_cast<uintptr_t>(NULL)) { + have_null_region_callers = true; + } else { + GlobalRegionCallerRangeMap::const_iterator iter + = global_region_caller_ranges->upper_bound(region->caller()); + if (iter != global_region_caller_ranges->end()) { + RAW_DCHECK(iter->first > region->caller(), ""); + if (iter->second < region->caller()) { // in special region + subtract = false; + } + } + } + } + if (subtract) { + // The loop puts the result of filtering l->second into live_objects: + for (LiveObjectsStack::const_iterator i = l->second.begin(); + i != l->second.end(); ++i) { + // subtract *region from *i + uintptr_t start = AsInt(i->ptr); + uintptr_t end = start + i->size; + if (region->start_addr <= start && end <= region->end_addr) { + // full deletion due to subsumption + } else if (start < region->start_addr && + region->end_addr < end) { // cutting-out split + live_objects->push_back(AllocObject(i->ptr, + region->start_addr - start, + IN_GLOBAL_DATA)); + live_objects->push_back(AllocObject(AsPtr(region->end_addr), + end - region->end_addr, + IN_GLOBAL_DATA)); + } else if (region->end_addr > start && + region->start_addr <= start) { // cut from start + live_objects->push_back(AllocObject(AsPtr(region->end_addr), + end - region->end_addr, + IN_GLOBAL_DATA)); + } else if (region->start_addr > start && + region->start_addr < end) { // cut from end + live_objects->push_back(AllocObject(i->ptr, + region->start_addr - start, + IN_GLOBAL_DATA)); + } else { // pass: no intersection + live_objects->push_back(AllocObject(i->ptr, i->size, + IN_GLOBAL_DATA)); + } + } + // Move live_objects back into l->second + // for filtering by the next region. + live_objects->swap(l->second); + live_objects->clear(); + } + } + // Now get and use live_objects from the final version of l->second: + if (VLOG_IS_ON(2)) { + for (LiveObjectsStack::const_iterator i = l->second.begin(); + i != l->second.end(); ++i) { + RAW_VLOG(2, "Library live region at %p of %"PRIuPTR" bytes", + i->ptr, i->size); + } + } + live_objects->swap(l->second); + IgnoreLiveObjectsLocked("in globals of\n ", l->first.c_str()); + } + if (have_null_region_callers) { + RAW_LOG(ERROR, "Have memory regions w/o callers: " + "might report false leaks"); + } + Allocator::DeleteAndNull(&library_live_objects); + } +} + +// Callback for ListAllProcessThreads in IgnoreAllLiveObjectsLocked below +// to test/verify that we have just the one main thread, in which case +// we can do everything in that main thread, +// so that CPU profiler can collect all its samples. +// Returns the number of threads in the process. +static int IsOneThread(void* parameter, int num_threads, + pid_t* thread_pids, va_list ap) { + if (num_threads != 1) { + RAW_LOG(WARNING, "Have threads: Won't CPU-profile the bulk of leak " + "checking work happening in IgnoreLiveThreadsLocked!"); + } + ResumeAllProcessThreads(num_threads, thread_pids); + return num_threads; +} + +// Dummy for IgnoreAllLiveObjectsLocked below. +// Making it global helps with compiler warnings. +static va_list dummy_ap; + +// static +void HeapLeakChecker::IgnoreAllLiveObjectsLocked(const void* self_stack_top) { + RAW_DCHECK(heap_checker_lock.IsHeld(), ""); + RAW_CHECK(live_objects == NULL, ""); + live_objects = new(Allocator::Allocate(sizeof(LiveObjectsStack))) + LiveObjectsStack; + stack_tops = new(Allocator::Allocate(sizeof(StackTopSet))) StackTopSet; + // reset the counts + live_objects_total = 0; + live_bytes_total = 0; + // Reduce max_heap_object_size to FLAGS_heap_check_max_pointer_offset + // for the time of leak check. + // FLAGS_heap_check_max_pointer_offset caps max_heap_object_size + // to manage reasonably low chances of random bytes + // appearing to be pointing into large actually leaked heap objects. + const size_t old_max_heap_object_size = max_heap_object_size; + max_heap_object_size = ( + FLAGS_heap_check_max_pointer_offset != -1 + ? min(size_t(FLAGS_heap_check_max_pointer_offset), max_heap_object_size) + : max_heap_object_size); + // Record global data as live: + if (FLAGS_heap_check_ignore_global_live) { + library_live_objects = + new(Allocator::Allocate(sizeof(LibraryLiveObjectsStacks))) + LibraryLiveObjectsStacks; + } + // Ignore all thread stacks: + thread_listing_status = CALLBACK_NOT_STARTED; + bool need_to_ignore_non_thread_objects = true; + self_thread_pid = getpid(); + self_thread_stack_top = self_stack_top; + if (FLAGS_heap_check_ignore_thread_live) { + // In case we are doing CPU profiling we'd like to do all the work + // in the main thread, not in the special thread created by + // ListAllProcessThreads, so that CPU profiler can collect all its samples. + // The machinery of ListAllProcessThreads conflicts with the CPU profiler + // by also relying on signals and ::sigaction. + // We can do this (run everything in the main thread) safely + // only if there's just the main thread itself in our process. + // This variable reflects these two conditions: + bool want_and_can_run_in_main_thread = + ProfilingIsEnabledForAllThreads() && + ListAllProcessThreads(NULL, IsOneThread) == 1; + // When the normal path of ListAllProcessThreads below is taken, + // we fully suspend the threads right here before any liveness checking + // and keep them suspended for the whole time of liveness checking + // inside of the IgnoreLiveThreadsLocked callback. + // (The threads can't (de)allocate due to lock on the delete hook but + // if not suspended they could still mess with the pointer + // graph while we walk it). + int r = want_and_can_run_in_main_thread + ? IgnoreLiveThreadsLocked(NULL, 1, &self_thread_pid, dummy_ap) + : ListAllProcessThreads(NULL, IgnoreLiveThreadsLocked); + need_to_ignore_non_thread_objects = r < 0; + if (r < 0) { + RAW_LOG(WARNING, "Thread finding failed with %d errno=%d", r, errno); + if (thread_listing_status == CALLBACK_COMPLETED) { + RAW_LOG(INFO, "Thread finding callback " + "finished ok; hopefully everything is fine"); + need_to_ignore_non_thread_objects = false; + } else if (thread_listing_status == CALLBACK_STARTED) { + RAW_LOG(FATAL, "Thread finding callback was " + "interrupted or crashed; can't fix this"); + } else { // CALLBACK_NOT_STARTED + RAW_LOG(ERROR, "Could not find thread stacks. " + "Will likely report false leak positives."); + } + } else if (r != 0) { + RAW_LOG(ERROR, "Thread stacks not found for %d threads. " + "Will likely report false leak positives.", r); + } else { + RAW_VLOG(2, "Thread stacks appear to be found for all threads"); + } + } else { + RAW_LOG(WARNING, "Not looking for thread stacks; " + "objects reachable only from there " + "will be reported as leaks"); + } + // Do all other live data ignoring here if we did not do it + // within thread listing callback with all threads stopped. + if (need_to_ignore_non_thread_objects) { + if (FLAGS_heap_check_ignore_global_live) { + UseProcMapsLocked(RECORD_GLOBAL_DATA); + } + IgnoreNonThreadLiveObjectsLocked(); + } + if (live_objects_total) { + RAW_VLOG(1, "Ignoring %"PRId64" reachable objects of %"PRId64" bytes", + live_objects_total, live_bytes_total); + } + // Free these: we made them here and heap_profile never saw them + Allocator::DeleteAndNull(&live_objects); + Allocator::DeleteAndNull(&stack_tops); + max_heap_object_size = old_max_heap_object_size; // reset this var +} + +// Alignment at which we should consider pointer positions +// in IgnoreLiveObjectsLocked. Use 1 if any alignment is ok. +static size_t pointer_source_alignment = kPointerSourceAlignment; +// Global lock for HeapLeakChecker::DoNoLeaks +// to protect pointer_source_alignment. +static SpinLock alignment_checker_lock(SpinLock::LINKER_INITIALIZED); + +// This function changes the live bits in the heap_profile-table's state: +// we only record the live objects to be skipped. +// +// When checking if a byte sequence points to a heap object we use +// HeapProfileTable::FindInsideAlloc to handle both pointers to +// the start and inside of heap-allocated objects. +// The "inside" case needs to be checked to support +// at least the following relatively common cases: +// - C++ arrays allocated with new FooClass[size] for classes +// with destructors have their size recorded in a sizeof(int) field +// before the place normal pointers point to. +// - basic_string<>-s for e.g. the C++ library of gcc 3.4 +// have the meta-info in basic_string<...>::_Rep recorded +// before the place normal pointers point to. +// - Multiple-inherited objects have their pointers when cast to +// different base classes pointing inside of the actually +// allocated object. +// - Sometimes reachability pointers point to member objects of heap objects, +// and then those member objects point to the full heap object. +// - Third party UnicodeString: it stores a 32-bit refcount +// (in both 32-bit and 64-bit binaries) as the first uint32 +// in the allocated memory and a normal pointer points at +// the second uint32 behind the refcount. +// By finding these additional objects here +// we slightly increase the chance to mistake random memory bytes +// for a pointer and miss a leak in a particular run of a binary. +// +/*static*/ void HeapLeakChecker::IgnoreLiveObjectsLocked(const char* name, + const char* name2) { + RAW_DCHECK(heap_checker_lock.IsHeld(), ""); + int64 live_object_count = 0; + int64 live_byte_count = 0; + while (!live_objects->empty()) { + const char* object = + reinterpret_cast<const char*>(live_objects->back().ptr); + size_t size = live_objects->back().size; + const ObjectPlacement place = live_objects->back().place; + live_objects->pop_back(); + if (place == MUST_BE_ON_HEAP && heap_profile->MarkAsLive(object)) { + live_object_count += 1; + live_byte_count += size; + } + RAW_VLOG(4, "Looking for heap pointers in %p of %"PRIuS" bytes", + object, size); + const char* const whole_object = object; + size_t const whole_size = size; + // Try interpretting any byte sequence in object,size as a heap pointer: + const size_t remainder = AsInt(object) % pointer_source_alignment; + if (remainder) { + object += pointer_source_alignment - remainder; + if (size >= pointer_source_alignment - remainder) { + size -= pointer_source_alignment - remainder; + } else { + size = 0; + } + } + if (size < sizeof(void*)) continue; + const char* const max_object = object + size - sizeof(void*); + while (object <= max_object) { + // potentially unaligned load: + const uintptr_t addr = *reinterpret_cast<const uintptr_t*>(object); + // Do fast check before the more expensive HaveOnHeapLocked lookup: + // this code runs for all memory words that are potentially pointers: + const bool can_be_on_heap = + // Order tests by the likelyhood of the test failing in 64/32 bit modes. + // Yes, this matters: we either lose 5..6% speed in 32 bit mode + // (which is already slower) or by a factor of 1.5..1.91 in 64 bit mode. + // After the alignment test got dropped the above performance figures + // must have changed; might need to revisit this. +#if defined(__x86_64__) + addr <= max_heap_address && // <= is for 0-sized object with max addr + min_heap_address <= addr; +#else + min_heap_address <= addr && + addr <= max_heap_address; // <= is for 0-sized object with max addr +#endif + if (can_be_on_heap) { + const void* ptr = reinterpret_cast<const void*>(addr); + // Too expensive (inner loop): manually uncomment when debugging: + // RAW_VLOG(8, "Trying pointer to %p at %p", ptr, object); + size_t object_size; + if (HaveOnHeapLocked(&ptr, &object_size) && + heap_profile->MarkAsLive(ptr)) { + // We take the (hopefully low) risk here of encountering by accident + // a byte sequence in memory that matches an address of + // a heap object which is in fact leaked. + // I.e. in very rare and probably not repeatable/lasting cases + // we might miss some real heap memory leaks. + RAW_VLOG(5, "Found pointer to %p of %"PRIuS" bytes at %p " + "inside %p of size %"PRIuS"", + ptr, object_size, object, whole_object, whole_size); + if (VLOG_IS_ON(6)) { + // log call stacks to help debug how come something is not a leak + HeapProfileTable::AllocInfo alloc; + bool r = heap_profile->FindAllocDetails(ptr, &alloc); + r = r; // suppress compiler warning in non-debug mode + RAW_DCHECK(r, ""); // sanity + RAW_LOG(INFO, "New live %p object's alloc stack:", ptr); + for (int i = 0; i < alloc.stack_depth; ++i) { + RAW_LOG(INFO, " @ %p", alloc.call_stack[i]); + } + } + live_object_count += 1; + live_byte_count += object_size; + live_objects->push_back(AllocObject(ptr, object_size, + IGNORED_ON_HEAP)); + } + } + object += pointer_source_alignment; + } + } + live_objects_total += live_object_count; + live_bytes_total += live_byte_count; + if (live_object_count) { + RAW_VLOG(1, "Removed %"PRId64" live heap objects of %"PRId64" bytes: %s%s", + live_object_count, live_byte_count, name, name2); + } +} + +//---------------------------------------------------------------------- +// HeapLeakChecker leak check disabling components +//---------------------------------------------------------------------- + +// static +void HeapLeakChecker::DisableChecksIn(const char* pattern) { + RAW_LOG(WARNING, "DisableChecksIn(%s) is ignored", pattern); +} + +// static +void HeapLeakChecker::IgnoreObject(const void* ptr) { + SpinLockHolder l(&heap_checker_lock); + if (!heap_checker_on) return; + size_t object_size; + if (!HaveOnHeapLocked(&ptr, &object_size)) { + RAW_LOG(ERROR, "No live heap object at %p to ignore", ptr); + } else { + RAW_VLOG(1, "Going to ignore live object at %p of %"PRIuS" bytes", + ptr, object_size); + if (ignored_objects == NULL) { + ignored_objects = new(Allocator::Allocate(sizeof(IgnoredObjectsMap))) + IgnoredObjectsMap; + } + if (!ignored_objects->insert(make_pair(AsInt(ptr), object_size)).second) { + RAW_LOG(FATAL, "Object at %p is already being ignored", ptr); + } + } +} + +// static +void HeapLeakChecker::UnIgnoreObject(const void* ptr) { + SpinLockHolder l(&heap_checker_lock); + if (!heap_checker_on) return; + size_t object_size; + if (!HaveOnHeapLocked(&ptr, &object_size)) { + RAW_LOG(FATAL, "No live heap object at %p to un-ignore", ptr); + } else { + bool found = false; + if (ignored_objects) { + IgnoredObjectsMap::iterator object = ignored_objects->find(AsInt(ptr)); + if (object != ignored_objects->end() && object_size == object->second) { + ignored_objects->erase(object); + found = true; + RAW_VLOG(1, "Now not going to ignore live object " + "at %p of %"PRIuS" bytes", ptr, object_size); + } + } + if (!found) RAW_LOG(FATAL, "Object at %p has not been ignored", ptr); + } +} + +//---------------------------------------------------------------------- +// HeapLeakChecker non-static functions +//---------------------------------------------------------------------- + +char* HeapLeakChecker::MakeProfileNameLocked() { + RAW_DCHECK(lock_->IsHeld(), ""); + RAW_DCHECK(heap_checker_lock.IsHeld(), ""); + const int len = profile_name_prefix->size() + strlen(name_) + 5 + + strlen(HeapProfileTable::kFileExt) + 1; + char* file_name = reinterpret_cast<char*>(Allocator::Allocate(len)); + snprintf(file_name, len, "%s.%s-end%s", + profile_name_prefix->c_str(), name_, + HeapProfileTable::kFileExt); + return file_name; +} + +void HeapLeakChecker::Create(const char *name, bool make_start_snapshot) { + SpinLockHolder l(lock_); + name_ = NULL; // checker is inactive + start_snapshot_ = NULL; + has_checked_ = false; + inuse_bytes_increase_ = 0; + inuse_allocs_increase_ = 0; + keep_profiles_ = false; + char* n = new char[strlen(name) + 1]; // do this before we lock + IgnoreObject(n); // otherwise it might be treated as live due to our stack + { // Heap activity in other threads is paused for this whole scope. + SpinLockHolder al(&alignment_checker_lock); + SpinLockHolder hl(&heap_checker_lock); + MemoryRegionMap::LockHolder ml; + if (heap_checker_on && profile_name_prefix != NULL) { + RAW_DCHECK(strchr(name, '/') == NULL, "must be a simple name"); + memcpy(n, name, strlen(name) + 1); + name_ = n; // checker is active + if (make_start_snapshot) { + start_snapshot_ = heap_profile->TakeSnapshot(); + } + + const HeapProfileTable::Stats& t = heap_profile->total(); + const size_t start_inuse_bytes = t.alloc_size - t.free_size; + const size_t start_inuse_allocs = t.allocs - t.frees; + RAW_VLOG(1, "Start check \"%s\" profile: %"PRIuS" bytes " + "in %"PRIuS" objects", + name_, start_inuse_bytes, start_inuse_allocs); + } else { + RAW_LOG(WARNING, "Heap checker is not active, " + "hence checker \"%s\" will do nothing!", name); + RAW_LOG(WARNING, "To activate set the HEAPCHECK environment variable.\n"); + } + } + if (name_ == NULL) { + UnIgnoreObject(n); + delete[] n; // must be done after we unlock + } +} + +HeapLeakChecker::HeapLeakChecker(const char *name) : lock_(new SpinLock) { + RAW_DCHECK(strcmp(name, "_main_") != 0, "_main_ is reserved"); + Create(name, true/*create start_snapshot_*/); +} + +HeapLeakChecker::HeapLeakChecker() : lock_(new SpinLock) { + if (FLAGS_heap_check_before_constructors) { + // We want to check for leaks of objects allocated during global + // constructors (i.e., objects allocated already). So we do not + // create a baseline snapshot and hence check for leaks of objects + // that may have already been created. + Create("_main_", false); + } else { + // We want to ignore leaks of objects allocated during global + // constructors (i.e., objects allocated already). So we snapshot + // the current heap contents and use them as a baseline that is + // not reported by the leak checker. + Create("_main_", true); + } +} + +ssize_t HeapLeakChecker::BytesLeaked() const { + SpinLockHolder l(lock_); + if (!has_checked_) { + RAW_LOG(FATAL, "*NoLeaks|SameHeap must execute before this call"); + } + return inuse_bytes_increase_; +} + +ssize_t HeapLeakChecker::ObjectsLeaked() const { + SpinLockHolder l(lock_); + if (!has_checked_) { + RAW_LOG(FATAL, "*NoLeaks|SameHeap must execute before this call"); + } + return inuse_allocs_increase_; +} + +// Save pid of main thread for using in naming dump files +static int32 main_thread_pid = getpid(); +#ifdef HAVE_PROGRAM_INVOCATION_NAME +extern char* program_invocation_name; +extern char* program_invocation_short_name; +static const char* invocation_name() { return program_invocation_short_name; } +static string invocation_path() { return program_invocation_name; } +#else +static const char* invocation_name() { return "<your binary>"; } +static string invocation_path() { return "<your binary>"; } +#endif + +// Prints commands that users can run to get more information +// about the reported leaks. +static void SuggestPprofCommand(const char* pprof_file_arg) { + // Extra help information to print for the user when the test is + // being run in a way where the straightforward pprof command will + // not suffice. + string extra_help; + + // Common header info to print for remote runs + const string remote_header = + "This program is being executed remotely and therefore the pprof\n" + "command printed above will not work. Either run this program\n" + "locally, or adjust the pprof command as follows to allow it to\n" + "work on your local machine:\n"; + + // Extra command for fetching remote data + string fetch_cmd; + + RAW_LOG(WARNING, + "\n\n" + "If the preceding stack traces are not enough to find " + "the leaks, try running THIS shell command:\n\n" + "%s%s %s \"%s\" --inuse_objects --lines --heapcheck " + " --edgefraction=1e-10 --nodefraction=1e-10 --gv\n" + "\n" + "%s" + "If you are still puzzled about why the leaks are " + "there, try rerunning this program with " + "HEAP_CHECK_TEST_POINTER_ALIGNMENT=1 and/or with " + "HEAP_CHECK_MAX_POINTER_OFFSET=-1\n" + "If the leak report occurs in a small fraction of runs, " + "try running with TCMALLOC_MAX_FREE_QUEUE_SIZE of few hundred MB " + "or with TCMALLOC_RECLAIM_MEMORY=false, " // only works for debugalloc + "it might help find leaks more repeatably\n", + fetch_cmd.c_str(), + "pprof", // works as long as pprof is on your path + invocation_path().c_str(), + pprof_file_arg, + extra_help.c_str() + ); +} + +bool HeapLeakChecker::DoNoLeaks(ShouldSymbolize should_symbolize) { + SpinLockHolder l(lock_); + // The locking also helps us keep the messages + // for the two checks close together. + SpinLockHolder al(&alignment_checker_lock); + + // thread-safe: protected by alignment_checker_lock + static bool have_disabled_hooks_for_symbolize = false; + // Once we've checked for leaks and symbolized the results once, it's + // not safe to do it again. This is because in order to symbolize + // safely, we had to disable all the malloc hooks here, so we no + // longer can be confident we've collected all the data we need. + if (have_disabled_hooks_for_symbolize) { + RAW_LOG(FATAL, "Must not call heap leak checker manually after " + " program-exit's automatic check."); + } + + HeapProfileTable::Snapshot* leaks = NULL; + char* pprof_file = NULL; + + { + // Heap activity in other threads is paused during this function + // (i.e. until we got all profile difference info). + SpinLockHolder l(&heap_checker_lock); + if (heap_checker_on == false) { + if (name_ != NULL) { // leak checking enabled when created the checker + RAW_LOG(WARNING, "Heap leak checker got turned off after checker " + "\"%s\" has been created, no leak check is being done for it!", + name_); + } + return true; + } + + // Keep track of number of internally allocated objects so we + // can detect leaks in the heap-leak-checket itself + const int initial_allocs = Allocator::alloc_count(); + + if (name_ == NULL) { + RAW_LOG(FATAL, "Heap leak checker must not be turned on " + "after construction of a HeapLeakChecker"); + } + + MemoryRegionMap::LockHolder ml; + int a_local_var; // Use our stack ptr to make stack data live: + + // Sanity check that nobody is messing with the hooks we need: + // Important to have it here: else we can misteriously SIGSEGV + // in IgnoreLiveObjectsLocked inside ListAllProcessThreads's callback + // by looking into a region that got unmapped w/o our knowledge. + MemoryRegionMap::CheckMallocHooks(); + if (MallocHook::GetNewHook() != NewHook || + MallocHook::GetDeleteHook() != DeleteHook) { + RAW_LOG(FATAL, "Had our new/delete MallocHook-s replaced. " + "Are you using another MallocHook client? " + "Use --heap_check=\"\" to avoid this conflict."); + } + + // Make the heap profile, other threads are locked out. + HeapProfileTable::Snapshot* base = + reinterpret_cast<HeapProfileTable::Snapshot*>(start_snapshot_); + IgnoreAllLiveObjectsLocked(&a_local_var); + leaks = heap_profile->NonLiveSnapshot(base); + + inuse_bytes_increase_ = static_cast<ssize_t>(leaks->total().alloc_size); + inuse_allocs_increase_ = static_cast<ssize_t>(leaks->total().allocs); + if (leaks->Empty()) { + heap_profile->ReleaseSnapshot(leaks); + leaks = NULL; + + // We can only check for internal leaks along the no-user-leak + // path since in the leak path we temporarily release + // heap_checker_lock and another thread can come in and disturb + // allocation counts. + if (Allocator::alloc_count() != initial_allocs) { + RAW_LOG(FATAL, "Internal HeapChecker leak of %d objects ; %d -> %d", + Allocator::alloc_count() - initial_allocs, + initial_allocs, Allocator::alloc_count()); + } + } else if (FLAGS_heap_check_test_pointer_alignment) { + // Try with reduced pointer aligment + pointer_source_alignment = 1; + IgnoreAllLiveObjectsLocked(&a_local_var); + HeapProfileTable::Snapshot* leaks_wo_align = + heap_profile->NonLiveSnapshot(base); + pointer_source_alignment = kPointerSourceAlignment; + if (leaks_wo_align->Empty()) { + RAW_LOG(WARNING, "Found no leaks without pointer alignment: " + "something might be placing pointers at " + "unaligned addresses! This needs to be fixed."); + } else { + RAW_LOG(INFO, "Found leaks without pointer alignment as well: " + "unaligned pointers must not be the cause of leaks."); + RAW_LOG(INFO, "--heap_check_test_pointer_alignment did not help " + "to diagnose the leaks."); + } + heap_profile->ReleaseSnapshot(leaks_wo_align); + } + + if (leaks != NULL) { + pprof_file = MakeProfileNameLocked(); + } + } + + has_checked_ = true; + if (leaks == NULL) { + if (FLAGS_heap_check_max_pointer_offset == -1) { + RAW_LOG(WARNING, + "Found no leaks without max_pointer_offset restriction: " + "it's possible that the default value of " + "heap_check_max_pointer_offset flag is too low. " + "Do you use pointers with larger than that offsets " + "pointing in the middle of heap-allocated objects?"); + } + const HeapProfileTable::Stats& stats = heap_profile->total(); + RAW_VLOG(heap_checker_info_level, + "No leaks found for check \"%s\" " + "(but no 100%% guarantee that there aren't any): " + "found %"PRId64" reachable heap objects of %"PRId64" bytes", + name_, + int64(stats.allocs - stats.frees), + int64(stats.alloc_size - stats.free_size)); + } else { + if (should_symbolize == SYMBOLIZE) { + // To turn addresses into symbols, we need to fork, which is a + // problem if both parent and child end up trying to call the + // same malloc-hooks we've set up, at the same time. To avoid + // trouble, we turn off the hooks before symbolizing. Note that + // this makes it unsafe to ever leak-report again! Luckily, we + // typically only want to report once in a program's run, at the + // very end. + if (MallocHook::GetNewHook() == NewHook) + MallocHook::SetNewHook(NULL); + if (MallocHook::GetDeleteHook() == DeleteHook) + MallocHook::SetDeleteHook(NULL); + MemoryRegionMap::Shutdown(); + // Make sure all the hooks really got unset: + RAW_CHECK(MallocHook::GetNewHook() == NULL, ""); + RAW_CHECK(MallocHook::GetDeleteHook() == NULL, ""); + RAW_CHECK(MallocHook::GetMmapHook() == NULL, ""); + RAW_CHECK(MallocHook::GetSbrkHook() == NULL, ""); + have_disabled_hooks_for_symbolize = true; + leaks->ReportLeaks(name_, pprof_file, true); // true = should_symbolize + } else { + leaks->ReportLeaks(name_, pprof_file, false); + } + if (FLAGS_heap_check_identify_leaks) { + leaks->ReportIndividualObjects(); + } + + SuggestPprofCommand(pprof_file); + + { + SpinLockHolder l(&heap_checker_lock); + heap_profile->ReleaseSnapshot(leaks); + Allocator::Free(pprof_file); + } + } + + return (leaks == NULL); +} + +HeapLeakChecker::~HeapLeakChecker() { + if (name_ != NULL) { // had leak checking enabled when created the checker + if (!has_checked_) { + RAW_LOG(FATAL, "Some *NoLeaks|SameHeap method" + " must be called on any created HeapLeakChecker"); + } + + // Deallocate any snapshot taken at start + if (start_snapshot_ != NULL) { + SpinLockHolder l(&heap_checker_lock); + heap_profile->ReleaseSnapshot( + reinterpret_cast<HeapProfileTable::Snapshot*>(start_snapshot_)); + } + + UnIgnoreObject(name_); + delete[] name_; + name_ = NULL; + } + delete lock_; +} + +//---------------------------------------------------------------------- +// HeapLeakChecker overall heap check components +//---------------------------------------------------------------------- + +// static +bool HeapLeakChecker::IsActive() { + SpinLockHolder l(&heap_checker_lock); + return heap_checker_on; +} + +vector<HeapCleaner::void_function>* HeapCleaner::heap_cleanups_ = NULL; + +// When a HeapCleaner object is intialized, add its function to the static list +// of cleaners to be run before leaks checking. +HeapCleaner::HeapCleaner(void_function f) { + if (heap_cleanups_ == NULL) + heap_cleanups_ = new vector<HeapCleaner::void_function>; + heap_cleanups_->push_back(f); +} + +// Run all of the cleanup functions and delete the vector. +void HeapCleaner::RunHeapCleanups() { + if (!heap_cleanups_) + return; + for (int i = 0; i < heap_cleanups_->size(); i++) { + void (*f)(void) = (*heap_cleanups_)[i]; + f(); + } + delete heap_cleanups_; + heap_cleanups_ = NULL; +} + +// Program exit heap cleanup registered with atexit(). +// Will not get executed when we crash on a signal. +// +/*static*/ void HeapLeakChecker::RunHeapCleanups() { + { SpinLockHolder l(&heap_checker_lock); + // can get here (via forks?) with other pids + if (heap_checker_pid != getpid()) return; + } + HeapCleaner::RunHeapCleanups(); + if (!FLAGS_heap_check_after_destructors) DoMainHeapCheck(); +} + +// defined below +static int GetCommandLineFrom(const char* file, char* cmdline, int size); + +static bool internal_init_start_has_run = false; + +// Called exactly once, before main() (but hopefully just before). +// This picks a good unique name for the dumped leak checking heap profiles. +// +// Because we crash when InternalInitStart is called more than once, +// it's fine that we hold heap_checker_lock only around pieces of +// this function: this is still enough for thread-safety w.r.t. other functions +// of this module. +// We can't hold heap_checker_lock throughout because it would deadlock +// on a memory allocation since our new/delete hooks can be on. +// +/*static*/ void HeapLeakChecker::InternalInitStart() { + { SpinLockHolder l(&heap_checker_lock); + RAW_CHECK(!internal_init_start_has_run, "Only one call is expected"); + internal_init_start_has_run = true; + + if (FLAGS_heap_check.empty()) { + // turns out we do not need checking in the end; can stop profiling + TurnItselfOffLocked(); + return; + } + } + + // Changing this to false can be useful when debugging heap-checker itself: + if (!FLAGS_heap_check_run_under_gdb) { + // See if heap checker should turn itself off because we are + // running under gdb (to avoid conflicts over ptrace-ing rights): + char name_buf[15+15]; + snprintf(name_buf, sizeof(name_buf), + "/proc/%d/cmdline", static_cast<int>(getppid())); + char cmdline[1024*8]; // /proc/*/cmdline is at most 4Kb anyway usually + int size = GetCommandLineFrom(name_buf, cmdline, sizeof(cmdline)-1); + cmdline[size] = '\0'; + // look for "gdb" in the executable's name: + const char* last = strrchr(cmdline, '/'); + if (last) last += 1; + else last = cmdline; + if (strncmp(last, "gdb", 3) == 0) { + RAW_LOG(WARNING, "We seem to be running under gdb; will turn itself off"); + SpinLockHolder l(&heap_checker_lock); + TurnItselfOffLocked(); + return; + } + } + + { SpinLockHolder l(&heap_checker_lock); + if (!constructor_heap_profiling) { + RAW_LOG(FATAL, "Can not start so late. You have to enable heap checking " + "with HEAPCHECK=<mode>."); + } + } + + // Set all flags + if (FLAGS_heap_check == "minimal") { + // The least we can check. + FLAGS_heap_check_before_constructors = false; // from after main + // (ignore more) + FLAGS_heap_check_after_destructors = false; // to after cleanup + // (most data is live) + FLAGS_heap_check_ignore_thread_live = true; // ignore all live + FLAGS_heap_check_ignore_global_live = true; // ignore all live + } else if (FLAGS_heap_check == "normal") { + // Faster than 'minimal' and not much stricter. + FLAGS_heap_check_before_constructors = true; // from no profile (fast) + FLAGS_heap_check_after_destructors = false; // to after cleanup + // (most data is live) + FLAGS_heap_check_ignore_thread_live = true; // ignore all live + FLAGS_heap_check_ignore_global_live = true; // ignore all live + } else if (FLAGS_heap_check == "strict") { + // A bit stricter than 'normal': global destructors must fully clean up + // after themselves if they are present. + FLAGS_heap_check_before_constructors = true; // from no profile (fast) + FLAGS_heap_check_after_destructors = true; // to after destructors + // (less data live) + FLAGS_heap_check_ignore_thread_live = true; // ignore all live + FLAGS_heap_check_ignore_global_live = true; // ignore all live + } else if (FLAGS_heap_check == "draconian") { + // Drop not very portable and not very exact live heap flooding. + FLAGS_heap_check_before_constructors = true; // from no profile (fast) + FLAGS_heap_check_after_destructors = true; // to after destructors + // (need them) + FLAGS_heap_check_ignore_thread_live = false; // no live flood (stricter) + FLAGS_heap_check_ignore_global_live = false; // no live flood (stricter) + } else if (FLAGS_heap_check == "as-is") { + // do nothing: use other flags as is + } else if (FLAGS_heap_check == "local") { + // do nothing + } else { + RAW_LOG(FATAL, "Unsupported heap_check flag: %s", + FLAGS_heap_check.c_str()); + } + { SpinLockHolder l(&heap_checker_lock); + RAW_DCHECK(heap_checker_pid == getpid(), ""); + heap_checker_on = true; + RAW_DCHECK(heap_profile, ""); + ProcMapsResult pm_result = UseProcMapsLocked(DISABLE_LIBRARY_ALLOCS); + // might neeed to do this more than once + // if one later dynamically loads libraries that we want disabled + if (pm_result != PROC_MAPS_USED) { // can't function + TurnItselfOffLocked(); + return; + } + } + + // make a good place and name for heap profile leak dumps + string* profile_prefix = + new string(FLAGS_heap_check_dump_directory + "/" + invocation_name()); + + // Finalize prefix for dumping leak checking profiles. + const int32 our_pid = getpid(); // safest to call getpid() outside lock + { SpinLockHolder l(&heap_checker_lock); + // main_thread_pid might still be 0 if this function is being called before + // global constructors. In that case, our pid *is* the main pid. + if (main_thread_pid == 0) + main_thread_pid = our_pid; + } + char pid_buf[15]; + snprintf(pid_buf, sizeof(pid_buf), ".%d", main_thread_pid); + *profile_prefix += pid_buf; + { SpinLockHolder l(&heap_checker_lock); + RAW_DCHECK(profile_name_prefix == NULL, ""); + profile_name_prefix = profile_prefix; + } + + // Make sure new/delete hooks are installed properly + // and heap profiler is indeed able to keep track + // of the objects being allocated. + // We test this to make sure we are indeed checking for leaks. + char* test_str = new char[5]; + size_t size; + { SpinLockHolder l(&heap_checker_lock); + RAW_CHECK(heap_profile->FindAlloc(test_str, &size), + "our own new/delete not linked?"); + } + delete[] test_str; + { SpinLockHolder l(&heap_checker_lock); + // This check can fail when it should not if another thread allocates + // into this same spot right this moment, + // which is unlikely since this code runs in InitGoogle. + RAW_CHECK(!heap_profile->FindAlloc(test_str, &size), + "our own new/delete not linked?"); + } + // If we crash in the above code, it probably means that + // "nm <this_binary> | grep new" will show that tcmalloc's new/delete + // implementation did not get linked-in into this binary + // (i.e. nm will list __builtin_new and __builtin_vec_new as undefined). + // If this happens, it is a BUILD bug to be fixed. + + RAW_VLOG(heap_checker_info_level, + "WARNING: Perftools heap leak checker is active " + "-- Performance may suffer"); + + if (FLAGS_heap_check != "local") { + // Schedule registered heap cleanup + atexit(RunHeapCleanups); + HeapLeakChecker* main_hc = new HeapLeakChecker(); + SpinLockHolder l(&heap_checker_lock); + RAW_DCHECK(main_heap_checker == NULL, + "Repeated creation of main_heap_checker"); + main_heap_checker = main_hc; + do_main_heap_check = true; + } + + { SpinLockHolder l(&heap_checker_lock); + RAW_CHECK(heap_checker_on && constructor_heap_profiling, + "Leak checking is expected to be fully turned on now"); + } + + // For binaries built in debug mode, this will set release queue of + // debugallocation.cc to 100M to make it less likely for real leaks to + // be hidden due to reuse of heap memory object addresses. + // Running a test with --malloc_reclaim_memory=0 would help find leaks even + // better, but the test might run out of memory as a result. + // The scenario is that a heap object at address X is allocated and freed, + // but some other data-structure still retains a pointer to X. + // Then the same heap memory is used for another object, which is leaked, + // but the leak is not noticed due to the pointer to the original object at X. + // TODO(csilvers): support this in some manner. +#if 0 + SetCommandLineOptionWithMode("max_free_queue_size", "104857600", // 100M + SET_FLAG_IF_DEFAULT); +#endif +} + +// We want this to run early as well, but not so early as +// ::BeforeConstructors (we want flag assignments to have already +// happened, for instance). Initializer-registration does the trick. +REGISTER_MODULE_INITIALIZER(init_start, HeapLeakChecker::InternalInitStart()); + +// static +bool HeapLeakChecker::DoMainHeapCheck() { + if (FLAGS_heap_check_delay_seconds > 0) { + sleep(FLAGS_heap_check_delay_seconds); + } + { SpinLockHolder l(&heap_checker_lock); + if (!do_main_heap_check) return false; + RAW_DCHECK(heap_checker_pid == getpid(), ""); + do_main_heap_check = false; // will do it now; no need to do it more + } + + if (!NoGlobalLeaks()) { + if (FLAGS_heap_check_identify_leaks) { + RAW_LOG(FATAL, "Whole-program memory leaks found."); + } + RAW_LOG(ERROR, "Exiting with error code (instead of crashing) " + "because of whole-program memory leaks"); + _exit(1); // we don't want to call atexit() routines! + } + return true; +} + +// static +HeapLeakChecker* HeapLeakChecker::GlobalChecker() { + SpinLockHolder l(&heap_checker_lock); + return main_heap_checker; +} + +// static +bool HeapLeakChecker::NoGlobalLeaks() { + // we never delete or change main_heap_checker once it's set: + HeapLeakChecker* main_hc = GlobalChecker(); + if (main_hc) { + RAW_VLOG(1, "Checking for whole-program memory leaks"); + // The program is over, so it's safe to symbolize addresses (which + // requires a fork) because no serious work is expected to be done + // after this. Symbolizing is really useful -- knowing what + // function has a leak is better than knowing just an address -- + // and while we can only safely symbolize once in a program run, + // now is the time (after all, there's no "later" that would be better). + return main_hc->DoNoLeaks(SYMBOLIZE); + } + return true; +} + +// static +void HeapLeakChecker::CancelGlobalCheck() { + SpinLockHolder l(&heap_checker_lock); + if (do_main_heap_check) { + RAW_VLOG(heap_checker_info_level, + "Canceling the automatic at-exit whole-program memory leak check"); + do_main_heap_check = false; + } +} + +//---------------------------------------------------------------------- +// HeapLeakChecker global constructor/destructor ordering components +//---------------------------------------------------------------------- + +static bool in_initial_malloc_hook = false; + +#ifdef HAVE___ATTRIBUTE__ // we need __attribute__((weak)) for this to work +#define INSTALLED_INITIAL_MALLOC_HOOKS + +void HeapLeakChecker_BeforeConstructors(); // below + +// Helper for InitialMallocHook_* below +static inline void InitHeapLeakCheckerFromMallocHook() { + { SpinLockHolder l(&heap_checker_lock); + RAW_CHECK(!in_initial_malloc_hook, + "Something did not reset initial MallocHook-s"); + in_initial_malloc_hook = true; + } + // Initialize heap checker on the very first allocation/mmap/sbrk call: + HeapLeakChecker_BeforeConstructors(); + { SpinLockHolder l(&heap_checker_lock); + in_initial_malloc_hook = false; + } +} + +// These will owerwrite the weak definitions in malloc_hook.cc: + +// Important to have this to catch the first allocation call from the binary: +extern void InitialMallocHook_New(const void* ptr, size_t size) { + InitHeapLeakCheckerFromMallocHook(); + // record this first allocation as well (if we need to): + MallocHook::InvokeNewHook(ptr, size); +} + +// Important to have this to catch the first mmap call (say from tcmalloc): +extern void InitialMallocHook_MMap(const void* result, + const void* start, + size_t size, + int protection, + int flags, + int fd, + off_t offset) { + InitHeapLeakCheckerFromMallocHook(); + // record this first mmap as well (if we need to): + MallocHook::InvokeMmapHook( + result, start, size, protection, flags, fd, offset); +} + +// Important to have this to catch the first sbrk call (say from tcmalloc): +extern void InitialMallocHook_Sbrk(const void* result, ptrdiff_t increment) { + InitHeapLeakCheckerFromMallocHook(); + // record this first sbrk as well (if we need to): + MallocHook::InvokeSbrkHook(result, increment); +} + +// static +void CancelInitialMallocHooks() { + if (MallocHook::GetNewHook() == InitialMallocHook_New) { + MallocHook::SetNewHook(NULL); + } + RAW_DCHECK(MallocHook::GetNewHook() == NULL, ""); + if (MallocHook::GetMmapHook() == InitialMallocHook_MMap) { + MallocHook::SetMmapHook(NULL); + } + RAW_DCHECK(MallocHook::GetMmapHook() == NULL, ""); + if (MallocHook::GetSbrkHook() == InitialMallocHook_Sbrk) { + MallocHook::SetSbrkHook(NULL); + } + RAW_DCHECK(MallocHook::GetSbrkHook() == NULL, ""); +} + +#else + +// static +void CancelInitialMallocHooks() {} + +#endif + +// static +void HeapLeakChecker::BeforeConstructorsLocked() { + RAW_DCHECK(heap_checker_lock.IsHeld(), ""); + RAW_CHECK(!constructor_heap_profiling, + "BeforeConstructorsLocked called multiple times"); + CancelInitialMallocHooks(); + // Set hooks early to crash if 'new' gets called before we make heap_profile, + // and make sure no other hooks existed: + if (MallocHook::SetNewHook(NewHook) != NULL || + MallocHook::SetDeleteHook(DeleteHook) != NULL) { + RAW_LOG(FATAL, "Had other new/delete MallocHook-s set. " + "Somehow leak checker got activated " + "after something else have set up these hooks."); + } + constructor_heap_profiling = true; + MemoryRegionMap::Init(1); + // Set up MemoryRegionMap with (at least) one caller stack frame to record + // (important that it's done before HeapProfileTable creation below). + Allocator::Init(); + RAW_CHECK(heap_profile == NULL, ""); + heap_profile = new(Allocator::Allocate(sizeof(HeapProfileTable))) + HeapProfileTable(&Allocator::Allocate, &Allocator::Free); + RAW_VLOG(1, "Starting tracking the heap"); + heap_checker_on = true; +} + +// static +void HeapLeakChecker::TurnItselfOffLocked() { + RAW_DCHECK(heap_checker_lock.IsHeld(), ""); + // Set FLAGS_heap_check to "", for users who test for it + if (!FLAGS_heap_check.empty()) // be a noop in the common case + FLAGS_heap_check.clear(); // because clear() could allocate memory + if (constructor_heap_profiling) { + RAW_CHECK(heap_checker_on, ""); + RAW_VLOG(heap_checker_info_level, "Turning perftools heap leak checking off"); + heap_checker_on = false; + // Unset our hooks checking they were the ones set: + if (MallocHook::SetNewHook(NULL) != NewHook || + MallocHook::SetDeleteHook(NULL) != DeleteHook) { + RAW_LOG(FATAL, "Had our new/delete MallocHook-s replaced. " + "Are you using another MallocHook client?"); + } + Allocator::DeleteAndNull(&heap_profile); + // free our optional global data: + Allocator::DeleteAndNullIfNot(&ignored_objects); + Allocator::DeleteAndNullIfNot(&disabled_ranges); + Allocator::DeleteAndNullIfNot(&global_region_caller_ranges); + Allocator::Shutdown(); + MemoryRegionMap::Shutdown(); + } + RAW_CHECK(!heap_checker_on, ""); +} + +// Read in the command line from 'file' into 'cmdline' and return the size read +// 'size' is the space available in 'cmdline'. +// We need this because we don't yet have argv/argc. +// CAVEAT: 'file' (some /proc/*/cmdline) usually contains the command line +// already truncated (to 4K on Linux). +// Arguments in cmdline will be '\0'-terminated, +// the first one will be the binary's name. +static int GetCommandLineFrom(const char* file, char* cmdline, int size) { + // This routine is only used to check if we're running under gdb, so + // it's ok if this #if fails and the routine is a no-op. + // + // This function is called before memory allocation hooks are set up + // so we must not have any memory allocations in it. We use syscall + // versions of open/read/close here because we don't trust the non-syscall + // versions: they might 'accidentally' cause a memory allocation. + // Here's a real-life problem scenario we had: + // 1) A program LD_PRELOADed a library called list_file_used.a + // 2) list_file_used intercepted open/read/close and called dlsym() + // 3) dlsym() called pthread_setspecific() which called malloc(). + // This malloced memory is 'hidden' from the heap-checker. By + // definition, this thread-local data is live, and everything it points + // to is live (not a memory leak) as well. But because this memory + // was hidden from the heap-checker, everything it points to was + // taken to be orphaned, and therefore, a memory leak. +#if defined(_WIN32) || defined(__CYGWIN__) || defined(__CYGWIN32__) || defined(__MINGW32__) + // Use a win32 call to get the command line. + const char* command_line = ::GetCommandLine(); + strncpy(cmdline, command_line, size); + cmdline[size - 1] = '\0'; + return strlen(cmdline); +#elif defined(HAVE_SYS_SYSCALL_H) + int fd = syscall(SYS_open, file, O_RDONLY); + int result = 0; + if (fd >= 0) { + ssize_t r; + while ((r = syscall(SYS_read, fd, cmdline + result, size)) > 0) { + result += r; + size -= r; + } + syscall(SYS_close, fd); + } + return result; +#else + return 0; +#endif +} + +extern bool heap_leak_checker_bcad_variable; // in heap-checker-bcad.cc + +static bool has_called_before_constructors = false; + +void HeapLeakChecker_BeforeConstructors() { + SpinLockHolder l(&heap_checker_lock); + // We can be called from several places: the first mmap/sbrk/alloc call + // or the first global c-tor from heap-checker-bcad.cc: + // Do not re-execute initialization: + if (has_called_before_constructors) return; + has_called_before_constructors = true; + + heap_checker_pid = getpid(); // set it always + heap_leak_checker_bcad_variable = true; + // just to reference it, so that heap-checker-bcad.o is linked in + + // This function can be called *very* early, before the normal + // global-constructor that sets FLAGS_verbose. Set it manually now, + // so the RAW_LOG messages here are controllable. + const char* verbose_str = GetenvBeforeMain("PERFTOOLS_VERBOSE"); + if (verbose_str && atoi(verbose_str)) { // different than the default of 0? + FLAGS_verbose = atoi(verbose_str); + } + + bool need_heap_check = true; + // The user indicates a desire for heap-checking via the HEAPCHECK + // environment variable. If it's not set, there's no way to do + // heap-checking. + if (!GetenvBeforeMain("HEAPCHECK")) { + need_heap_check = false; + } +#ifdef HAVE_GETEUID + if (need_heap_check && getuid() != geteuid()) { + // heap-checker writes out files. Thus, for security reasons, we don't + // recognize the env. var. to turn on heap-checking if we're setuid. + RAW_LOG(WARNING, ("HeapChecker: ignoring HEAPCHECK because " + "program seems to be setuid\n")); + need_heap_check = false; + } +#endif + if (need_heap_check) { + HeapLeakChecker::BeforeConstructorsLocked(); + } else { // cancel our initial hooks + CancelInitialMallocHooks(); + } +} + +// This function is executed after all global object destructors run. +void HeapLeakChecker_AfterDestructors() { + { SpinLockHolder l(&heap_checker_lock); + // can get here (via forks?) with other pids + if (heap_checker_pid != getpid()) return; + } + if (FLAGS_heap_check_after_destructors) { + if (HeapLeakChecker::DoMainHeapCheck()) { + const struct timespec sleep_time = { 0, 500000000 }; // 500 ms + nanosleep(&sleep_time, NULL); + // Need this hack to wait for other pthreads to exit. + // Otherwise tcmalloc find errors + // on a free() call from pthreads. + } + } + SpinLockHolder l(&heap_checker_lock); + RAW_CHECK(!do_main_heap_check, "should have done it"); +} + +//---------------------------------------------------------------------- +// HeapLeakChecker disabling helpers +//---------------------------------------------------------------------- + +// These functions are at the end of the file to prevent their inlining: + +// static +void HeapLeakChecker::DisableChecksFromToLocked(const void* start_address, + const void* end_address, + int max_depth) { + RAW_DCHECK(heap_checker_lock.IsHeld(), ""); + RAW_DCHECK(start_address < end_address, ""); + if (disabled_ranges == NULL) { + disabled_ranges = new(Allocator::Allocate(sizeof(DisabledRangeMap))) + DisabledRangeMap; + } + RangeValue value; + value.start_address = AsInt(start_address); + value.max_depth = max_depth; + if (disabled_ranges->insert(make_pair(AsInt(end_address), value)).second) { + RAW_VLOG(1, "Disabling leak checking in stack traces " + "under frame addresses between %p..%p", + start_address, end_address); + } else { // check that this is just a verbatim repetition + RangeValue const& val = disabled_ranges->find(AsInt(end_address))->second; + if (val.max_depth != value.max_depth || + val.start_address != value.start_address) { + RAW_LOG(FATAL, "Two DisableChecksToHereFrom calls conflict: " + "(%p, %p, %d) vs. (%p, %p, %d)", + AsPtr(val.start_address), end_address, val.max_depth, + start_address, end_address, max_depth); + } + } +} + +// static +inline bool HeapLeakChecker::HaveOnHeapLocked(const void** ptr, + size_t* object_size) { + // Commented-out because HaveOnHeapLocked is very performance-critical: + // RAW_DCHECK(heap_checker_lock.IsHeld(), ""); + const uintptr_t addr = AsInt(*ptr); + if (heap_profile->FindInsideAlloc( + *ptr, max_heap_object_size, ptr, object_size)) { + RAW_VLOG(7, "Got pointer into %p at +%"PRIuPTR" offset", + *ptr, addr - AsInt(*ptr)); + return true; + } + return false; +} + +// static +const void* HeapLeakChecker::GetAllocCaller(void* ptr) { + // this is used only in the unittest, so the heavy checks are fine + HeapProfileTable::AllocInfo info; + { SpinLockHolder l(&heap_checker_lock); + RAW_CHECK(heap_profile->FindAllocDetails(ptr, &info), ""); + } + RAW_CHECK(info.stack_depth >= 1, ""); + return info.call_stack[0]; +} diff --git a/third_party/tcmalloc/chromium/src/heap-profile-table.cc b/third_party/tcmalloc/chromium/src/heap-profile-table.cc new file mode 100644 index 0000000..ce65c47 --- /dev/null +++ b/third_party/tcmalloc/chromium/src/heap-profile-table.cc @@ -0,0 +1,600 @@ +// Copyright (c) 2006, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Sanjay Ghemawat +// Maxim Lifantsev (refactoring) +// + +#include <config.h> + +#ifdef HAVE_UNISTD_H +#include <unistd.h> // for write() +#endif +#include <fcntl.h> // for open() +#ifdef HAVE_GLOB_H +#include <glob.h> +#ifndef GLOB_NOMATCH // true on some old cygwins +# define GLOB_NOMATCH 0 +#endif +#endif +#ifdef HAVE_INTTYPES_H +#include <inttypes.h> // for PRIxPTR +#endif +#ifdef HAVE_POLL_H +#include <poll.h> +#endif +#include <errno.h> +#include <stdarg.h> +#include <string> +#include <map> +#include <algorithm> // for sort(), equal(), and copy() + +#include "heap-profile-table.h" + +#include "base/logging.h" +#include "raw_printer.h" +#include "symbolize.h" +#include <google/stacktrace.h> +#include <google/malloc_hook.h> +#include "base/commandlineflags.h" +#include "base/logging.h" // for the RawFD I/O commands +#include "base/sysinfo.h" + +using std::sort; +using std::equal; +using std::copy; +using std::string; +using std::map; + +using tcmalloc::FillProcSelfMaps; // from sysinfo.h +using tcmalloc::DumpProcSelfMaps; // from sysinfo.h + +//---------------------------------------------------------------------- + +DEFINE_bool(cleanup_old_heap_profiles, + EnvToBool("HEAP_PROFILE_CLEANUP", true), + "At initialization time, delete old heap profiles."); + +//---------------------------------------------------------------------- + +// header of the dumped heap profile +static const char kProfileHeader[] = "heap profile: "; +static const char kProcSelfMapsHeader[] = "\nMAPPED_LIBRARIES:\n"; + +//---------------------------------------------------------------------- + +const char HeapProfileTable::kFileExt[] = ".heap"; + +//---------------------------------------------------------------------- + +static const int kHashTableSize = 179999; // Size for table_. +/*static*/ const int HeapProfileTable::kMaxStackDepth = 32; + +//---------------------------------------------------------------------- + +// We strip out different number of stack frames in debug mode +// because less inlining happens in that case +#ifdef NDEBUG +static const int kStripFrames = 2; +#else +static const int kStripFrames = 3; +#endif + +// For sorting Stats or Buckets by in-use space +static bool ByAllocatedSpace(HeapProfileTable::Stats* a, + HeapProfileTable::Stats* b) { + // Return true iff "a" has more allocated space than "b" + return (a->alloc_size - a->free_size) > (b->alloc_size - b->free_size); +} + +//---------------------------------------------------------------------- + +HeapProfileTable::HeapProfileTable(Allocator alloc, DeAllocator dealloc) + : alloc_(alloc), dealloc_(dealloc) { + // Make the table + const int table_bytes = kHashTableSize * sizeof(*table_); + table_ = reinterpret_cast<Bucket**>(alloc_(table_bytes)); + memset(table_, 0, table_bytes); + // Make allocation map + allocation_ = + new(alloc_(sizeof(AllocationMap))) AllocationMap(alloc_, dealloc_); + // init the rest: + memset(&total_, 0, sizeof(total_)); + num_buckets_ = 0; +} + +HeapProfileTable::~HeapProfileTable() { + // free allocation map + allocation_->~AllocationMap(); + dealloc_(allocation_); + allocation_ = NULL; + // free hash table + for (int b = 0; b < kHashTableSize; b++) { + for (Bucket* x = table_[b]; x != 0; /**/) { + Bucket* b = x; + x = x->next; + dealloc_(b->stack); + dealloc_(b); + } + } + dealloc_(table_); + table_ = NULL; +} + +HeapProfileTable::Bucket* HeapProfileTable::GetBucket(int depth, + const void* const key[]) { + // Make hash-value + uintptr_t h = 0; + for (int i = 0; i < depth; i++) { + h += reinterpret_cast<uintptr_t>(key[i]); + h += h << 10; + h ^= h >> 6; + } + h += h << 3; + h ^= h >> 11; + + // Lookup stack trace in table + unsigned int buck = ((unsigned int) h) % kHashTableSize; + for (Bucket* b = table_[buck]; b != 0; b = b->next) { + if ((b->hash == h) && + (b->depth == depth) && + equal(key, key + depth, b->stack)) { + return b; + } + } + + // Create new bucket + const size_t key_size = sizeof(key[0]) * depth; + const void** kcopy = reinterpret_cast<const void**>(alloc_(key_size)); + copy(key, key + depth, kcopy); + Bucket* b = reinterpret_cast<Bucket*>(alloc_(sizeof(Bucket))); + memset(b, 0, sizeof(*b)); + b->hash = h; + b->depth = depth; + b->stack = kcopy; + b->next = table_[buck]; + table_[buck] = b; + num_buckets_++; + return b; +} + +void HeapProfileTable::RecordAlloc(const void* ptr, size_t bytes, + int skip_count) { + void* key[kMaxStackDepth]; + int depth = MallocHook::GetCallerStackTrace( + key, kMaxStackDepth, kStripFrames + skip_count + 1); + RecordAllocWithStack(ptr, bytes, depth, key); +} + +void HeapProfileTable::RecordAllocWithStack( + const void* ptr, size_t bytes, int stack_depth, + const void* const call_stack[]) { + Bucket* b = GetBucket(stack_depth, call_stack); + b->allocs++; + b->alloc_size += bytes; + total_.allocs++; + total_.alloc_size += bytes; + + AllocValue v; + v.set_bucket(b); // also did set_live(false); set_ignore(false) + v.bytes = bytes; + allocation_->Insert(ptr, v); +} + +void HeapProfileTable::RecordFree(const void* ptr) { + AllocValue v; + if (allocation_->FindAndRemove(ptr, &v)) { + Bucket* b = v.bucket(); + b->frees++; + b->free_size += v.bytes; + total_.frees++; + total_.free_size += v.bytes; + } +} + +bool HeapProfileTable::FindAlloc(const void* ptr, size_t* object_size) const { + const AllocValue* alloc_value = allocation_->Find(ptr); + if (alloc_value != NULL) *object_size = alloc_value->bytes; + return alloc_value != NULL; +} + +bool HeapProfileTable::FindAllocDetails(const void* ptr, + AllocInfo* info) const { + const AllocValue* alloc_value = allocation_->Find(ptr); + if (alloc_value != NULL) { + info->object_size = alloc_value->bytes; + info->call_stack = alloc_value->bucket()->stack; + info->stack_depth = alloc_value->bucket()->depth; + } + return alloc_value != NULL; +} + +bool HeapProfileTable::FindInsideAlloc(const void* ptr, + size_t max_size, + const void** object_ptr, + size_t* object_size) const { + const AllocValue* alloc_value = + allocation_->FindInside(&AllocValueSize, max_size, ptr, object_ptr); + if (alloc_value != NULL) *object_size = alloc_value->bytes; + return alloc_value != NULL; +} + +bool HeapProfileTable::MarkAsLive(const void* ptr) { + AllocValue* alloc = allocation_->FindMutable(ptr); + if (alloc && !alloc->live()) { + alloc->set_live(true); + return true; + } + return false; +} + +void HeapProfileTable::MarkAsIgnored(const void* ptr) { + AllocValue* alloc = allocation_->FindMutable(ptr); + if (alloc) { + alloc->set_ignore(true); + } +} + +// We'd be happier using snprintfer, but we don't to reduce dependencies. +int HeapProfileTable::UnparseBucket(const Bucket& b, + char* buf, int buflen, int bufsize, + const char* extra, + Stats* profile_stats) { + if (profile_stats != NULL) { + profile_stats->allocs += b.allocs; + profile_stats->alloc_size += b.alloc_size; + profile_stats->frees += b.frees; + profile_stats->free_size += b.free_size; + } + int printed = + snprintf(buf + buflen, bufsize - buflen, "%6d: %8"PRId64" [%6d: %8"PRId64"] @%s", + b.allocs - b.frees, + b.alloc_size - b.free_size, + b.allocs, + b.alloc_size, + extra); + // If it looks like the snprintf failed, ignore the fact we printed anything + if (printed < 0 || printed >= bufsize - buflen) return buflen; + buflen += printed; + for (int d = 0; d < b.depth; d++) { + printed = snprintf(buf + buflen, bufsize - buflen, " 0x%08" PRIxPTR, + reinterpret_cast<uintptr_t>(b.stack[d])); + if (printed < 0 || printed >= bufsize - buflen) return buflen; + buflen += printed; + } + printed = snprintf(buf + buflen, bufsize - buflen, "\n"); + if (printed < 0 || printed >= bufsize - buflen) return buflen; + buflen += printed; + return buflen; +} + +HeapProfileTable::Bucket** +HeapProfileTable::MakeSortedBucketList() const { + Bucket** list = + reinterpret_cast<Bucket**>(alloc_(sizeof(Bucket) * num_buckets_)); + + int n = 0; + for (int b = 0; b < kHashTableSize; b++) { + for (Bucket* x = table_[b]; x != 0; x = x->next) { + list[n++] = x; + } + } + RAW_DCHECK(n == num_buckets_, ""); + + sort(list, list + num_buckets_, ByAllocatedSpace); + + return list; +} + +void HeapProfileTable::IterateOrderedAllocContexts( + AllocContextIterator callback) const { + Bucket** list = MakeSortedBucketList(); + AllocContextInfo info; + for (int i = 0; i < num_buckets_; ++i) { + *static_cast<Stats*>(&info) = *static_cast<Stats*>(list[i]); + info.stack_depth = list[i]->depth; + info.call_stack = list[i]->stack; + callback(info); + } + dealloc_(list); +} + +int HeapProfileTable::FillOrderedProfile(char buf[], int size) const { + Bucket** list = MakeSortedBucketList(); + + // Our file format is "bucket, bucket, ..., bucket, proc_self_maps_info". + // In the cases buf is too small, we'd rather leave out the last + // buckets than leave out the /proc/self/maps info. To ensure that, + // we actually print the /proc/self/maps info first, then move it to + // the end of the buffer, then write the bucket info into whatever + // is remaining, and then move the maps info one last time to close + // any gaps. Whew! + int map_length = snprintf(buf, size, "%s", kProcSelfMapsHeader); + if (map_length < 0 || map_length >= size) return 0; + map_length += FillProcSelfMaps(buf + map_length, size - map_length); + RAW_DCHECK(map_length <= size, ""); + char* const map_start = buf + size - map_length; // move to end + memmove(map_start, buf, map_length); + size -= map_length; + + Stats stats; + memset(&stats, 0, sizeof(stats)); + int bucket_length = snprintf(buf, size, "%s", kProfileHeader); + if (bucket_length < 0 || bucket_length >= size) return 0; + bucket_length = UnparseBucket(total_, buf, bucket_length, size, + " heapprofile", &stats); + for (int i = 0; i < num_buckets_; i++) { + bucket_length = UnparseBucket(*list[i], buf, bucket_length, size, "", + &stats); + } + RAW_DCHECK(bucket_length < size, ""); + + dealloc_(list); + + RAW_DCHECK(buf + bucket_length <= map_start, ""); + memmove(buf + bucket_length, map_start, map_length); // close the gap + + return bucket_length + map_length; +} + +inline +void HeapProfileTable::DumpNonLiveIterator(const void* ptr, AllocValue* v, + const DumpArgs& args) { + if (v->live()) { + v->set_live(false); + return; + } + if (v->ignore()) { + return; + } + Bucket b; + memset(&b, 0, sizeof(b)); + b.allocs = 1; + b.alloc_size = v->bytes; + b.depth = v->bucket()->depth; + b.stack = v->bucket()->stack; + char buf[1024]; + int len = UnparseBucket(b, buf, 0, sizeof(buf), "", args.profile_stats); + RawWrite(args.fd, buf, len); +} + +// Callback from NonLiveSnapshot; adds entry to arg->dest +// if not the entry is not live and is not present in arg->base. +void HeapProfileTable::AddIfNonLive(const void* ptr, AllocValue* v, + AddNonLiveArgs* arg) { + if (v->live()) { + v->set_live(false); + } else { + if (arg->base != NULL && arg->base->map_.Find(ptr) != NULL) { + // Present in arg->base, so do not save + } else { + arg->dest->Add(ptr, *v); + } + } +} + +bool HeapProfileTable::WriteProfile(const char* file_name, + const Bucket& total, + AllocationMap* allocations) { + RAW_VLOG(1, "Dumping non-live heap profile to %s", file_name); + RawFD fd = RawOpenForWriting(file_name); + if (fd != kIllegalRawFD) { + RawWrite(fd, kProfileHeader, strlen(kProfileHeader)); + char buf[512]; + int len = UnparseBucket(total, buf, 0, sizeof(buf), " heapprofile", + NULL); + RawWrite(fd, buf, len); + const DumpArgs args(fd, NULL); + allocations->Iterate<const DumpArgs&>(DumpNonLiveIterator, args); + RawWrite(fd, kProcSelfMapsHeader, strlen(kProcSelfMapsHeader)); + DumpProcSelfMaps(fd); + RawClose(fd); + return true; + } else { + RAW_LOG(ERROR, "Failed dumping filtered heap profile to %s", file_name); + return false; + } +} + +void HeapProfileTable::CleanupOldProfiles(const char* prefix) { + if (!FLAGS_cleanup_old_heap_profiles) + return; + string pattern = string(prefix) + ".*" + kFileExt; +#if defined(HAVE_GLOB_H) + glob_t g; + const int r = glob(pattern.c_str(), GLOB_ERR, NULL, &g); + if (r == 0 || r == GLOB_NOMATCH) { + const int prefix_length = strlen(prefix); + for (int i = 0; i < g.gl_pathc; i++) { + const char* fname = g.gl_pathv[i]; + if ((strlen(fname) >= prefix_length) && + (memcmp(fname, prefix, prefix_length) == 0)) { + RAW_VLOG(1, "Removing old heap profile %s", fname); + unlink(fname); + } + } + } + globfree(&g); +#else /* HAVE_GLOB_H */ + RAW_LOG(WARNING, "Unable to remove old heap profiles (can't run glob())"); +#endif +} + +HeapProfileTable::Snapshot* HeapProfileTable::TakeSnapshot() { + Snapshot* s = new (alloc_(sizeof(Snapshot))) Snapshot(alloc_, dealloc_); + allocation_->Iterate(AddToSnapshot, s); + return s; +} + +void HeapProfileTable::ReleaseSnapshot(Snapshot* s) { + s->~Snapshot(); + dealloc_(s); +} + +// Callback from TakeSnapshot; adds a single entry to snapshot +void HeapProfileTable::AddToSnapshot(const void* ptr, AllocValue* v, + Snapshot* snapshot) { + snapshot->Add(ptr, *v); +} + +HeapProfileTable::Snapshot* HeapProfileTable::NonLiveSnapshot( + Snapshot* base) { + RAW_VLOG(2, "NonLiveSnapshot input: %d %d\n", + int(total_.allocs - total_.frees), + int(total_.alloc_size - total_.free_size)); + + Snapshot* s = new (alloc_(sizeof(Snapshot))) Snapshot(alloc_, dealloc_); + AddNonLiveArgs args; + args.dest = s; + args.base = base; + allocation_->Iterate<AddNonLiveArgs*>(AddIfNonLive, &args); + RAW_VLOG(2, "NonLiveSnapshot output: %d %d\n", + int(s->total_.allocs - s->total_.frees), + int(s->total_.alloc_size - s->total_.free_size)); + return s; +} + +// Information kept per unique bucket seen +struct HeapProfileTable::Snapshot::Entry { + int count; + int bytes; + Bucket* bucket; + Entry() : count(0), bytes(0) { } + + // Order by decreasing bytes + bool operator<(const Entry& x) const { + return this->bytes > x.bytes; + } +}; + +// State used to generate leak report. We keep a mapping from Bucket pointer +// the collected stats for that bucket. +struct HeapProfileTable::Snapshot::ReportState { + map<Bucket*, Entry> buckets_; +}; + +// Callback from ReportLeaks; updates ReportState. +void HeapProfileTable::Snapshot::ReportCallback(const void* ptr, + AllocValue* v, + ReportState* state) { + Entry* e = &state->buckets_[v->bucket()]; // Creates empty Entry first time + e->bucket = v->bucket(); + e->count++; + e->bytes += v->bytes; +} + +void HeapProfileTable::Snapshot::ReportLeaks(const char* checker_name, + const char* filename, + bool should_symbolize) { + // This is only used by the heap leak checker, but is intimately + // tied to the allocation map that belongs in this module and is + // therefore placed here. + RAW_LOG(ERROR, "Leak check %s detected leaks of %"PRIuS" bytes " + "in %"PRIuS" objects", + checker_name, + size_t(total_.alloc_size), + size_t(total_.allocs)); + + // Group objects by Bucket + ReportState state; + map_.Iterate(&ReportCallback, &state); + + // Sort buckets by decreasing leaked size + const int n = state.buckets_.size(); + Entry* entries = new Entry[n]; + int dst = 0; + for (map<Bucket*,Entry>::const_iterator iter = state.buckets_.begin(); + iter != state.buckets_.end(); + ++iter) { + entries[dst++] = iter->second; + } + sort(entries, entries + n); + + // Report a bounded number of leaks to keep the leak report from + // growing too long. + const int to_report = (n > 20) ? 20 : n; + RAW_LOG(ERROR, "The %d largest leaks:", to_report); + + // Print + SymbolMap symbolization_table; + int num_symbols = 0; + for (int i = 0; i < to_report; i++) { + const Entry& e = entries[i]; + for (int j = 0; j < e.bucket->depth; j++) { + const void* pc = e.bucket->stack[j]; + symbolization_table[reinterpret_cast<uintptr_t>(pc)] = ""; + num_symbols++; + } + } + static const int kBufSize = 2<<10; + char buffer[kBufSize]; + int sym_buffer_len = kSymbolSize * num_symbols; + char *sym_buffer = new char[sym_buffer_len]; + if (should_symbolize) + Symbolize(sym_buffer, sym_buffer_len, &symbolization_table); + for (int i = 0; i < to_report; i++) { + const Entry& e = entries[i]; + base::RawPrinter printer(buffer, kBufSize); + printer.Printf("Leak of %d bytes in %d objects allocated from:\n", + e.bytes, e.count); + for (int j = 0; j < e.bucket->depth; j++) { + const void* pc = e.bucket->stack[j]; + printer.Printf("\t@ %p %s\n", + pc, symbolization_table[reinterpret_cast<uintptr_t>(pc)]); + } + RAW_LOG(ERROR, "%s", buffer); + } + delete[] sym_buffer; + + if (to_report < n) { + RAW_LOG(ERROR, "Skipping leaks numbered %d..%d", + to_report, n-1); + } + delete[] entries; + + // TODO: Dump the sorted Entry list instead of dumping raw data? + // (should be much shorter) + if (!HeapProfileTable::WriteProfile(filename, total_, &map_)) { + RAW_LOG(ERROR, "Could not write pprof profile to %s", filename); + } +} + +void HeapProfileTable::Snapshot::ReportObject(const void* ptr, + AllocValue* v, + char* unused) { + // Perhaps also log the allocation stack trace (unsymbolized) + // on this line in case somebody finds it useful. + RAW_LOG(ERROR, "leaked %"PRIuS" byte object %p", v->bytes, ptr); +} + +void HeapProfileTable::Snapshot::ReportIndividualObjects() { + char unused; + map_.Iterate(ReportObject, &unused); +} diff --git a/third_party/tcmalloc/chromium/src/heap-profile-table.h b/third_party/tcmalloc/chromium/src/heap-profile-table.h new file mode 100644 index 0000000..92d237e --- /dev/null +++ b/third_party/tcmalloc/chromium/src/heap-profile-table.h @@ -0,0 +1,384 @@ +// Copyright (c) 2006, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Sanjay Ghemawat +// Maxim Lifantsev (refactoring) +// + +#ifndef BASE_HEAP_PROFILE_TABLE_H_ +#define BASE_HEAP_PROFILE_TABLE_H_ + +#include "addressmap-inl.h" +#include "base/basictypes.h" +#include "base/logging.h" // for RawFD + +// Table to maintain a heap profile data inside, +// i.e. the set of currently active heap memory allocations. +// thread-unsafe and non-reentrant code: +// each instance object must be used by one thread +// at a time w/o self-recursion. +// +// TODO(maxim): add a unittest for this class. +class HeapProfileTable { + public: + + // Extension to be used for heap pforile files. + static const char kFileExt[]; + + // Longest stack trace we record. Defined in the .cc file. + static const int kMaxStackDepth; + + // data types ---------------------------- + + // Profile stats. + struct Stats { + int32 allocs; // Number of allocation calls + int32 frees; // Number of free calls + int64 alloc_size; // Total size of all allocated objects so far + int64 free_size; // Total size of all freed objects so far + + // semantic equality + bool Equivalent(const Stats& x) const { + return allocs - frees == x.allocs - x.frees && + alloc_size - free_size == x.alloc_size - x.free_size; + } + }; + + // Info we can return about an allocation. + struct AllocInfo { + size_t object_size; // size of the allocation + const void* const* call_stack; // call stack that made the allocation call + int stack_depth; // depth of call_stack + bool live; + bool ignored; + }; + + // Info we return about an allocation context. + // An allocation context is a unique caller stack trace + // of an allocation operation. + struct AllocContextInfo : public Stats { + int stack_depth; // Depth of stack trace + const void* const* call_stack; // Stack trace + }; + + // Memory (de)allocator interface we'll use. + typedef void* (*Allocator)(size_t size); + typedef void (*DeAllocator)(void* ptr); + + // interface --------------------------- + + HeapProfileTable(Allocator alloc, DeAllocator dealloc); + ~HeapProfileTable(); + + // Record an allocation at 'ptr' of 'bytes' bytes. + // skip_count gives the number of stack frames between this call + // and the memory allocation function that was asked to do the allocation. + void RecordAlloc(const void* ptr, size_t bytes, int skip_count); + + // Direct version of RecordAlloc when the caller stack to use + // is already known: call_stack of depth stack_depth. + void RecordAllocWithStack(const void* ptr, size_t bytes, + int stack_depth, const void* const call_stack[]); + + // Record the deallocation of memory at 'ptr'. + void RecordFree(const void* ptr); + + // Return true iff we have recorded an allocation at 'ptr'. + // If yes, fill *object_size with the allocation byte size. + bool FindAlloc(const void* ptr, size_t* object_size) const; + // Same as FindAlloc, but fills all of *info. + bool FindAllocDetails(const void* ptr, AllocInfo* info) const; + + // Return true iff "ptr" points into a recorded allocation + // If yes, fill *object_ptr with the actual allocation address + // and *object_size with the allocation byte size. + // max_size specifies largest currently possible allocation size. + bool FindInsideAlloc(const void* ptr, size_t max_size, + const void** object_ptr, size_t* object_size) const; + + // If "ptr" points to a recorded allocation and it's not marked as live + // mark it as live and return true. Else return false. + // All allocations start as non-live. + bool MarkAsLive(const void* ptr); + + // If "ptr" points to a recorded allocation, mark it as "ignored". + // Ignored objects are treated like other objects, except that they + // are skipped in heap checking reports. + void MarkAsIgnored(const void* ptr); + + // Return current total (de)allocation statistics. + const Stats& total() const { return total_; } + + // Allocation data iteration callback: gets passed object pointer and + // fully-filled AllocInfo. + typedef void (*AllocIterator)(const void* ptr, const AllocInfo& info); + + // Iterate over the allocation profile data calling "callback" + // for every allocation. + void IterateAllocs(AllocIterator callback) const { + allocation_->Iterate(MapArgsAllocIterator, callback); + } + + // Allocation context profile data iteration callback + typedef void (*AllocContextIterator)(const AllocContextInfo& info); + + // Iterate over the allocation context profile data calling "callback" + // for every allocation context. Allocation contexts are ordered by the + // size of allocated space. + void IterateOrderedAllocContexts(AllocContextIterator callback) const; + + // Fill profile data into buffer 'buf' of size 'size' + // and return the actual size occupied by the dump in 'buf'. + // The profile buckets are dumped in the decreasing order + // of currently allocated bytes. + // We do not provision for 0-terminating 'buf'. + int FillOrderedProfile(char buf[], int size) const; + + // Cleanup any old profile files matching prefix + ".*" + kFileExt. + static void CleanupOldProfiles(const char* prefix); + + // Return a snapshot of the current contents of *this. + // Caller must call ReleaseSnapshot() on result when no longer needed. + // The result is only valid while this exists and until + // the snapshot is discarded by calling ReleaseSnapshot(). + class Snapshot; + Snapshot* TakeSnapshot(); + + // Release a previously taken snapshot. snapshot must not + // be used after this call. + void ReleaseSnapshot(Snapshot* snapshot); + + // Return a snapshot of every non-live, non-ignored object in *this. + // If "base" is non-NULL, skip any objects present in "base". + // As a side-effect, clears the "live" bit on every live object in *this. + // Caller must call ReleaseSnapshot() on result when no longer needed. + Snapshot* NonLiveSnapshot(Snapshot* base); + + private: + + // data types ---------------------------- + + // Hash table bucket to hold (de)allocation stats + // for a given allocation call stack trace. + struct Bucket : public Stats { + uintptr_t hash; // Hash value of the stack trace + int depth; // Depth of stack trace + const void** stack; // Stack trace + Bucket* next; // Next entry in hash-table + }; + + // Info stored in the address map + struct AllocValue { + // Access to the stack-trace bucket + Bucket* bucket() const { + return reinterpret_cast<Bucket*>(bucket_rep & ~uintptr_t(kMask)); + } + // This also does set_live(false). + void set_bucket(Bucket* b) { bucket_rep = reinterpret_cast<uintptr_t>(b); } + size_t bytes; // Number of bytes in this allocation + + // Access to the allocation liveness flag (for leak checking) + bool live() const { return bucket_rep & kLive; } + void set_live(bool l) { + bucket_rep = (bucket_rep & ~uintptr_t(kLive)) | (l ? kLive : 0); + } + + // Should this allocation be ignored if it looks like a leak? + bool ignore() const { return bucket_rep & kIgnore; } + void set_ignore(bool r) { + bucket_rep = (bucket_rep & ~uintptr_t(kIgnore)) | (r ? kIgnore : 0); + } + + private: + // We store a few bits in the bottom bits of bucket_rep. + // (Alignment is at least four, so we have at least two bits.) + static const int kLive = 1; + static const int kIgnore = 2; + static const int kMask = kLive | kIgnore; + + uintptr_t bucket_rep; + }; + + // helper for FindInsideAlloc + static size_t AllocValueSize(const AllocValue& v) { return v.bytes; } + + typedef AddressMap<AllocValue> AllocationMap; + + // Arguments that need to be passed DumpNonLiveIterator callback below. + struct DumpArgs { + RawFD fd; // file to write to + Stats* profile_stats; // stats to update (may be NULL) + + DumpArgs(RawFD a, Stats* d) + : fd(a), profile_stats(d) { } + }; + + // helpers ---------------------------- + + // Unparse bucket b and print its portion of profile dump into buf. + // We return the amount of space in buf that we use. We start printing + // at buf + buflen, and promise not to go beyond buf + bufsize. + // We do not provision for 0-terminating 'buf'. + // + // If profile_stats is non-NULL, we update *profile_stats by + // counting bucket b. + // + // "extra" is appended to the unparsed bucket. Typically it is empty, + // but may be set to something like " heapprofile" for the total + // bucket to indicate the type of the profile. + static int UnparseBucket(const Bucket& b, + char* buf, int buflen, int bufsize, + const char* extra, + Stats* profile_stats); + + // Get the bucket for the caller stack trace 'key' of depth 'depth' + // creating the bucket if needed. + Bucket* GetBucket(int depth, const void* const key[]); + + // Helper for IterateAllocs to do callback signature conversion + // from AllocationMap::Iterate to AllocIterator. + static void MapArgsAllocIterator(const void* ptr, AllocValue* v, + AllocIterator callback) { + AllocInfo info; + info.object_size = v->bytes; + info.call_stack = v->bucket()->stack; + info.stack_depth = v->bucket()->depth; + info.live = v->live(); + info.ignored = v->ignore(); + callback(ptr, info); + } + + // Helper for DumpNonLiveProfile to do object-granularity + // heap profile dumping. It gets passed to AllocationMap::Iterate. + inline static void DumpNonLiveIterator(const void* ptr, AllocValue* v, + const DumpArgs& args); + + // Helper for IterateOrderedAllocContexts and FillOrderedProfile. + // Creates a sorted list of Buckets whose length is num_buckets_. + // The caller is responsible for dellocating the returned list. + Bucket** MakeSortedBucketList() const; + + // Helper for TakeSnapshot. Saves object to snapshot. + static void AddToSnapshot(const void* ptr, AllocValue* v, Snapshot* s); + + // Arguments passed to AddIfNonLive + struct AddNonLiveArgs { + Snapshot* dest; + Snapshot* base; + }; + + // Helper for NonLiveSnapshot. Adds the object to the destination + // snapshot if it is non-live. + static void AddIfNonLive(const void* ptr, AllocValue* v, + AddNonLiveArgs* arg); + + // Write contents of "*allocations" as a heap profile to + // "file_name". "total" must contain the total of all entries in + // "*allocations". + static bool WriteProfile(const char* file_name, + const Bucket& total, + AllocationMap* allocations); + + // data ---------------------------- + + // Memory (de)allocator that we use. + Allocator alloc_; + DeAllocator dealloc_; + + // Overall profile stats; we use only the Stats part, + // but make it a Bucket to pass to UnparseBucket. + Bucket total_; + + // Bucket hash table. + // We hand-craft one instead of using one of the pre-written + // ones because we do not want to use malloc when operating on the table. + // It is only few lines of code, so no big deal. + Bucket** table_; + int num_buckets_; + + // Map of all currently allocated objects we know about. + AllocationMap* allocation_; + + DISALLOW_EVIL_CONSTRUCTORS(HeapProfileTable); +}; + +class HeapProfileTable::Snapshot { + public: + const Stats& total() const { return total_; } + + // Report anything in this snapshot as a leak. + // May use new/delete for temporary storage. + // If should_symbolize is true, will fork (which is not threadsafe) + // to turn addresses into symbol names. Set to false for maximum safety. + // Also writes a heap profile to "filename" that contains + // all of the objects in this snapshot. + void ReportLeaks(const char* checker_name, const char* filename, + bool should_symbolize); + + // Report the addresses of all leaked objects. + // May use new/delete for temporary storage. + void ReportIndividualObjects(); + + bool Empty() const { + return (total_.allocs == 0) && (total_.alloc_size == 0); + } + + private: + friend class HeapProfileTable; + + // Total count/size are stored in a Bucket so we can reuse UnparseBucket + Bucket total_; + + // We share the Buckets managed by the parent table, but have our + // own object->bucket map. + AllocationMap map_; + + Snapshot(Allocator alloc, DeAllocator dealloc) : map_(alloc, dealloc) { + memset(&total_, 0, sizeof(total_)); + } + + // Callback used to populate a Snapshot object with entries found + // in another allocation map. + inline void Add(const void* ptr, const AllocValue& v) { + map_.Insert(ptr, v); + total_.allocs++; + total_.alloc_size += v.bytes; + } + + // Helpers for sorting and generating leak reports + struct Entry; + struct ReportState; + static void ReportCallback(const void* ptr, AllocValue* v, ReportState*); + static void ReportObject(const void* ptr, AllocValue* v, char*); + + DISALLOW_COPY_AND_ASSIGN(Snapshot); +}; + +#endif // BASE_HEAP_PROFILE_TABLE_H_ diff --git a/third_party/tcmalloc/chromium/src/heap-profiler.cc b/third_party/tcmalloc/chromium/src/heap-profiler.cc new file mode 100644 index 0000000..a1c643a9 --- /dev/null +++ b/third_party/tcmalloc/chromium/src/heap-profiler.cc @@ -0,0 +1,621 @@ +// Copyright (c) 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Sanjay Ghemawat +// +// TODO: Log large allocations + +#include <config.h> + +#include <stdio.h> +#include <stdlib.h> +#ifdef HAVE_UNISTD_H +#include <unistd.h> +#endif +#ifdef HAVE_INTTYPES_H +#include <inttypes.h> +#endif +#ifdef HAVE_FCNTL_H +#include <fcntl.h> // for open() +#endif +#ifdef HAVE_MMAP +#include <sys/mman.h> +#endif +#include <errno.h> +#include <assert.h> +#include <sys/types.h> + +#include <algorithm> +#include <string> + +#include <google/heap-profiler.h> + +#include "base/logging.h" +#include "base/basictypes.h" // for PRId64, among other things +#include "base/googleinit.h" +#include "base/commandlineflags.h" +#include "malloc_hook-inl.h" +#include "tcmalloc_guard.h" +#include <google/malloc_hook.h> +#include <google/malloc_extension.h> +#include "base/spinlock.h" +#include "base/low_level_alloc.h" +#include "base/sysinfo.h" // for GetUniquePathFromEnv() +#include "heap-profile-table.h" +#include "memory_region_map.h" + + +#ifndef PATH_MAX +#ifdef MAXPATHLEN +#define PATH_MAX MAXPATHLEN +#else +#define PATH_MAX 4096 // seems conservative for max filename len! +#endif +#endif + +using STL_NAMESPACE::string; +using STL_NAMESPACE::sort; + +//---------------------------------------------------------------------- +// Flags that control heap-profiling +// +// The thread-safety of the profiler depends on these being immutable +// after main starts, so don't change them. +//---------------------------------------------------------------------- + +DEFINE_int64(heap_profile_allocation_interval, + EnvToInt64("HEAP_PROFILE_ALLOCATION_INTERVAL", 1 << 30 /*1GB*/), + "If non-zero, dump heap profiling information once every " + "specified number of bytes allocated by the program since " + "the last dump."); +DEFINE_int64(heap_profile_deallocation_interval, + EnvToInt64("HEAP_PROFILE_DEALLOCATION_INTERVAL", 0), + "If non-zero, dump heap profiling information once every " + "specified number of bytes deallocated by the program " + "since the last dump."); +// We could also add flags that report whenever inuse_bytes changes by +// X or -X, but there hasn't been a need for that yet, so we haven't. +DEFINE_int64(heap_profile_inuse_interval, + EnvToInt64("HEAP_PROFILE_INUSE_INTERVAL", 100 << 20 /*100MB*/), + "If non-zero, dump heap profiling information whenever " + "the high-water memory usage mark increases by the specified " + "number of bytes."); +DEFINE_bool(mmap_log, + EnvToBool("HEAP_PROFILE_MMAP_LOG", false), + "Should mmap/munmap calls be logged?"); +DEFINE_bool(mmap_profile, + EnvToBool("HEAP_PROFILE_MMAP", false), + "If heap-profiling is on, also profile mmap, mremap, and sbrk)"); +DEFINE_bool(only_mmap_profile, + EnvToBool("HEAP_PROFILE_ONLY_MMAP", false), + "If heap-profiling is on, only profile mmap, mremap, and sbrk; " + "do not profile malloc/new/etc"); + + +//---------------------------------------------------------------------- +// Locking +//---------------------------------------------------------------------- + +// A pthread_mutex has way too much lock contention to be used here. +// +// I would like to use Mutex, but it can call malloc(), +// which can cause us to fall into an infinite recursion. +// +// So we use a simple spinlock. +static SpinLock heap_lock(SpinLock::LINKER_INITIALIZED); + +//---------------------------------------------------------------------- +// Simple allocator for heap profiler's internal memory +//---------------------------------------------------------------------- + +static LowLevelAlloc::Arena *heap_profiler_memory; + +static void* ProfilerMalloc(size_t bytes) { + return LowLevelAlloc::AllocWithArena(bytes, heap_profiler_memory); +} +static void ProfilerFree(void* p) { + LowLevelAlloc::Free(p); +} + +// We use buffers of this size in DoGetHeapProfile. +static const int kProfileBufferSize = 1 << 20; + +// This is a last-ditch buffer we use in DumpProfileLocked in case we +// can't allocate more memory from ProfilerMalloc. We expect this +// will be used by HeapProfileEndWriter when the application has to +// exit due to out-of-memory. This buffer is allocated in +// HeapProfilerStart. Access to this must be protected by heap_lock. +static char* global_profiler_buffer = NULL; + + +//---------------------------------------------------------------------- +// Profiling control/state data +//---------------------------------------------------------------------- + +// Access to all of these is protected by heap_lock. +static bool is_on = false; // If are on as a subsytem. +static bool dumping = false; // Dumping status to prevent recursion +static char* filename_prefix = NULL; // Prefix used for profile file names + // (NULL if no need for dumping yet) +static int dump_count = 0; // How many dumps so far +static int64 last_dump_alloc = 0; // alloc_size when did we last dump +static int64 last_dump_free = 0; // free_size when did we last dump +static int64 high_water_mark = 0; // In-use-bytes at last high-water dump + +static HeapProfileTable* heap_profile = NULL; // the heap profile table + +//---------------------------------------------------------------------- +// Profile generation +//---------------------------------------------------------------------- + +enum AddOrRemove { ADD, REMOVE }; + +// Add or remove all MMap-allocated regions to/from *heap_profile. +// Assumes heap_lock is held. +static void AddRemoveMMapDataLocked(AddOrRemove mode) { + RAW_DCHECK(heap_lock.IsHeld(), ""); + if (!FLAGS_mmap_profile || !is_on) return; + if (!FLAGS_mmap_log) MemoryRegionMap::CheckMallocHooks(); + // MemoryRegionMap maintained all the data we need for all + // mmap-like allocations, so we just use it here: + MemoryRegionMap::LockHolder l; + for (MemoryRegionMap::RegionIterator r = MemoryRegionMap::BeginRegionLocked(); + r != MemoryRegionMap::EndRegionLocked(); ++r) { + if (mode == ADD) { + heap_profile->RecordAllocWithStack( + reinterpret_cast<const void*>(r->start_addr), + r->end_addr - r->start_addr, + r->call_stack_depth, r->call_stack); + } else { + heap_profile->RecordFree(reinterpret_cast<void*>(r->start_addr)); + } + } +} + +// Input must be a buffer of size at least 1MB. +static char* DoGetHeapProfileLocked(char* buf, int buflen) { + // We used to be smarter about estimating the required memory and + // then capping it to 1MB and generating the profile into that. + if (buf == NULL || buflen < 1) + return NULL; + + RAW_DCHECK(heap_lock.IsHeld(), ""); + int bytes_written = 0; + if (is_on) { + HeapProfileTable::Stats const stats = heap_profile->total(); + AddRemoveMMapDataLocked(ADD); + bytes_written = heap_profile->FillOrderedProfile(buf, buflen - 1); + // FillOrderedProfile should not reduce the set of active mmap-ed regions, + // hence MemoryRegionMap will let us remove everything we've added above: + AddRemoveMMapDataLocked(REMOVE); + RAW_DCHECK(stats.Equivalent(heap_profile->total()), ""); + // if this fails, we somehow removed by AddRemoveMMapDataLocked + // more than we have added. + } + buf[bytes_written] = '\0'; + RAW_DCHECK(bytes_written == strlen(buf), ""); + + return buf; +} + +extern "C" char* GetHeapProfile() { + // Use normal malloc: we return the profile to the user to free it: + char* buffer = reinterpret_cast<char*>(malloc(kProfileBufferSize)); + SpinLockHolder l(&heap_lock); + return DoGetHeapProfileLocked(buffer, kProfileBufferSize); +} + +// defined below +static void NewHook(const void* ptr, size_t size); +static void DeleteHook(const void* ptr); + +// Helper for HeapProfilerDump. +static void DumpProfileLocked(const char* reason) { + RAW_DCHECK(heap_lock.IsHeld(), ""); + RAW_DCHECK(is_on, ""); + RAW_DCHECK(!dumping, ""); + + if (filename_prefix == NULL) return; // we do not yet need dumping + + if (FLAGS_only_mmap_profile == false) { + if (MallocHook::GetNewHook() != NewHook || + MallocHook::GetDeleteHook() != DeleteHook) { + RAW_LOG(FATAL, "Had our new/delete MallocHook-s replaced. " + "Are you using another MallocHook client? " + "Do not use --heap_profile=... to avoid this conflict."); + } + } + + dumping = true; + + // Make file name + char file_name[1000]; + dump_count++; + snprintf(file_name, sizeof(file_name), "%s.%04d%s", + filename_prefix, dump_count, HeapProfileTable::kFileExt); + + // Dump the profile + RAW_VLOG(0, "Dumping heap profile to %s (%s)", file_name, reason); + // We must use file routines that don't access memory, since we hold + // a memory lock now. + RawFD fd = RawOpenForWriting(file_name); + if (fd == kIllegalRawFD) { + RAW_LOG(ERROR, "Failed dumping heap profile to %s", file_name); + dumping = false; + return; + } + + // This case may be impossible, but it's best to be safe. + // It's safe to use the global buffer: we're protected by heap_lock. + if (global_profiler_buffer == NULL) { + global_profiler_buffer = + reinterpret_cast<char*>(ProfilerMalloc(kProfileBufferSize)); + } + + char* profile = DoGetHeapProfileLocked(global_profiler_buffer, + kProfileBufferSize); + RawWrite(fd, profile, strlen(profile)); + RawClose(fd); + + dumping = false; +} + +//---------------------------------------------------------------------- +// Profile collection +//---------------------------------------------------------------------- + +// Dump a profile after either an allocation or deallocation, if +// the memory use has changed enough since the last dump. +static void MaybeDumpProfileLocked() { + if (!dumping) { + const HeapProfileTable::Stats& total = heap_profile->total(); + const int64 inuse_bytes = total.alloc_size - total.free_size; + bool need_to_dump = false; + char buf[128]; + if (FLAGS_heap_profile_allocation_interval > 0 && + total.alloc_size >= + last_dump_alloc + FLAGS_heap_profile_allocation_interval) { + snprintf(buf, sizeof(buf), ("%"PRId64" MB allocated cumulatively, " + "%"PRId64" MB currently in use"), + total.alloc_size >> 20, inuse_bytes >> 20); + need_to_dump = true; + } else if (FLAGS_heap_profile_deallocation_interval > 0 && + total.free_size >= + last_dump_free + FLAGS_heap_profile_deallocation_interval) { + snprintf(buf, sizeof(buf), ("%"PRId64" MB freed cumulatively, " + "%"PRId64" MB currently in use"), + total.free_size >> 20, inuse_bytes >> 20); + need_to_dump = true; + } else if (FLAGS_heap_profile_inuse_interval > 0 && + inuse_bytes > + high_water_mark + FLAGS_heap_profile_inuse_interval) { + snprintf(buf, sizeof(buf), "%"PRId64" MB currently in use", + inuse_bytes >> 20); + need_to_dump = true; + } + if (need_to_dump) { + DumpProfileLocked(buf); + + last_dump_alloc = total.alloc_size; + last_dump_free = total.free_size; + if (inuse_bytes > high_water_mark) + high_water_mark = inuse_bytes; + } + } +} + +// Record an allocation in the profile. +static void RecordAlloc(const void* ptr, size_t bytes, int skip_count) { + SpinLockHolder l(&heap_lock); + if (is_on) { + heap_profile->RecordAlloc(ptr, bytes, skip_count + 1); + MaybeDumpProfileLocked(); + } +} + +// Record a deallocation in the profile. +static void RecordFree(const void* ptr) { + SpinLockHolder l(&heap_lock); + if (is_on) { + heap_profile->RecordFree(ptr); + MaybeDumpProfileLocked(); + } +} + +//---------------------------------------------------------------------- +// Allocation/deallocation hooks for MallocHook +//---------------------------------------------------------------------- + +// static +void NewHook(const void* ptr, size_t size) { + if (ptr != NULL) RecordAlloc(ptr, size, 0); +} + +// static +void DeleteHook(const void* ptr) { + if (ptr != NULL) RecordFree(ptr); +} + +// TODO(jandrews): Re-enable stack tracing +#ifdef TODO_REENABLE_STACK_TRACING +static void RawInfoStackDumper(const char* message, void*) { + RAW_LOG(INFO, "%.*s", static_cast<int>(strlen(message) - 1), message); + // -1 is to chop the \n which will be added by RAW_LOG +} +#endif + +// Saved MemoryRegionMap's hooks to daisy-chain calls to. +MallocHook::MmapHook saved_mmap_hook = NULL; +MallocHook::MremapHook saved_mremap_hook = NULL; +MallocHook::MunmapHook saved_munmap_hook = NULL; +MallocHook::SbrkHook saved_sbrk_hook = NULL; + +static void MmapHook(const void* result, const void* start, size_t size, + int prot, int flags, int fd, off_t offset) { + if (FLAGS_mmap_log) { // log it + // We use PRIxS not just '%p' to avoid deadlocks + // in pretty-printing of NULL as "nil". + // TODO(maxim): instead should use a safe snprintf reimplementation + RAW_LOG(INFO, + "mmap(start=0x%"PRIxPTR", len=%"PRIuS", prot=0x%x, flags=0x%x, " + "fd=%d, offset=0x%x) = 0x%"PRIxPTR"", + (uintptr_t) start, size, prot, flags, fd, (unsigned int) offset, + (uintptr_t) result); +#ifdef TODO_REENABLE_STACK_TRACING + DumpStackTrace(1, RawInfoStackDumper, NULL); +#endif + } + if (saved_mmap_hook) { + // Call MemoryRegionMap's hook: it will record needed info about the mmap + // for us w/o deadlocks: + (*saved_mmap_hook)(result, start, size, prot, flags, fd, offset); + } +} + +static void MremapHook(const void* result, const void* old_addr, + size_t old_size, size_t new_size, + int flags, const void* new_addr) { + if (FLAGS_mmap_log) { // log it + // We use PRIxS not just '%p' to avoid deadlocks + // in pretty-printing of NULL as "nil". + // TODO(maxim): instead should use a safe snprintf reimplementation + RAW_LOG(INFO, + "mremap(old_addr=0x%"PRIxPTR", old_size=%"PRIuS", " + "new_size=%"PRIuS", flags=0x%x, new_addr=0x%"PRIxPTR") = " + "0x%"PRIxPTR"", + (uintptr_t) old_addr, old_size, new_size, flags, + (uintptr_t) new_addr, (uintptr_t) result); +#ifdef TODO_REENABLE_STACK_TRACING + DumpStackTrace(1, RawInfoStackDumper, NULL); +#endif + } + if (saved_mremap_hook) { // call MemoryRegionMap's hook + (*saved_mremap_hook)(result, old_addr, old_size, new_size, flags, new_addr); + } +} + +static void MunmapHook(const void* ptr, size_t size) { + if (FLAGS_mmap_log) { // log it + // We use PRIxS not just '%p' to avoid deadlocks + // in pretty-printing of NULL as "nil". + // TODO(maxim): instead should use a safe snprintf reimplementation + RAW_LOG(INFO, "munmap(start=0x%"PRIxPTR", len=%"PRIuS")", + (uintptr_t) ptr, size); +#ifdef TODO_REENABLE_STACK_TRACING + DumpStackTrace(1, RawInfoStackDumper, NULL); +#endif + } + if (saved_munmap_hook) { // call MemoryRegionMap's hook + (*saved_munmap_hook)(ptr, size); + } +} + +static void SbrkHook(const void* result, ptrdiff_t increment) { + if (FLAGS_mmap_log) { // log it + RAW_LOG(INFO, "sbrk(inc=%"PRIdS") = 0x%"PRIxPTR"", + increment, (uintptr_t) result); +#ifdef TODO_REENABLE_STACK_TRACING + DumpStackTrace(1, RawInfoStackDumper, NULL); +#endif + } + if (saved_sbrk_hook) { // call MemoryRegionMap's hook + (*saved_sbrk_hook)(result, increment); + } +} + +//---------------------------------------------------------------------- +// Starting/stopping/dumping +//---------------------------------------------------------------------- + +extern "C" void HeapProfilerStart(const char* prefix) { + SpinLockHolder l(&heap_lock); + + if (is_on) return; + + is_on = true; + + RAW_VLOG(0, "Starting tracking the heap"); + + // This should be done before the hooks are set up, since it should + // call new, and we want that to be accounted for correctly. + MallocExtension::Initialize(); + + if (FLAGS_only_mmap_profile) { + FLAGS_mmap_profile = true; + } + + if (FLAGS_mmap_profile) { + // Ask MemoryRegionMap to record all mmap, mremap, and sbrk + // call stack traces of at least size kMaxStackDepth: + MemoryRegionMap::Init(HeapProfileTable::kMaxStackDepth); + } + + if (FLAGS_mmap_log) { + // Install our hooks to do the logging + // and maybe save MemoryRegionMap's hooks to call: + saved_mmap_hook = MallocHook::SetMmapHook(MmapHook); + saved_mremap_hook = MallocHook::SetMremapHook(MremapHook); + saved_munmap_hook = MallocHook::SetMunmapHook(MunmapHook); + saved_sbrk_hook = MallocHook::SetSbrkHook(SbrkHook); + } + + heap_profiler_memory = + LowLevelAlloc::NewArena(0, LowLevelAlloc::DefaultArena()); + + // Reserve space now for the heap profiler, so we can still write a + // heap profile even if the application runs out of memory. + global_profiler_buffer = + reinterpret_cast<char*>(ProfilerMalloc(kProfileBufferSize)); + + heap_profile = new(ProfilerMalloc(sizeof(HeapProfileTable))) + HeapProfileTable(ProfilerMalloc, ProfilerFree); + + last_dump_alloc = 0; + last_dump_free = 0; + high_water_mark = 0; + + // We do not reset dump_count so if the user does a sequence of + // HeapProfilerStart/HeapProfileStop, we will get a continuous + // sequence of profiles. + + if (FLAGS_only_mmap_profile == false) { + // Now set the hooks that capture new/delete and malloc/free + // and check that these are the only hooks: + if (MallocHook::SetNewHook(NewHook) != NULL || + MallocHook::SetDeleteHook(DeleteHook) != NULL) { + RAW_LOG(FATAL, "Had other new/delete MallocHook-s set. " + "Are you using the heap leak checker? " + "Use --heap_check=\"\" to avoid this conflict."); + } + } + + // Copy filename prefix + RAW_DCHECK(filename_prefix == NULL, ""); + const int prefix_length = strlen(prefix); + filename_prefix = reinterpret_cast<char*>(ProfilerMalloc(prefix_length + 1)); + memcpy(filename_prefix, prefix, prefix_length); + filename_prefix[prefix_length] = '\0'; +} + +extern "C" bool IsHeapProfilerRunning() { + SpinLockHolder l(&heap_lock); + return is_on; +} + +extern "C" void HeapProfilerStop() { + SpinLockHolder l(&heap_lock); + + if (!is_on) return; + + if (FLAGS_only_mmap_profile == false) { + // Unset our new/delete hooks, checking they were the ones set: + if (MallocHook::SetNewHook(NULL) != NewHook || + MallocHook::SetDeleteHook(NULL) != DeleteHook) { + RAW_LOG(FATAL, "Had our new/delete MallocHook-s replaced. " + "Are you using another MallocHook client? " + "Do not use --heap_profile=... to avoid this conflict."); + } + } + if (FLAGS_mmap_log) { + // Restore mmap/sbrk hooks, checking that our hooks were the ones set: + if (MallocHook::SetMmapHook(saved_mmap_hook) != MmapHook || + MallocHook::SetMremapHook(saved_mremap_hook) != MremapHook || + MallocHook::SetMunmapHook(saved_munmap_hook) != MunmapHook || + MallocHook::SetSbrkHook(saved_sbrk_hook) != SbrkHook) { + RAW_LOG(FATAL, "Had our mmap/mremap/munmap/sbrk MallocHook-s replaced. " + "Are you using another MallocHook client? " + "Do not use --heap_profile=... to avoid this conflict."); + } + } + + // free profile + heap_profile->~HeapProfileTable(); + ProfilerFree(heap_profile); + heap_profile = NULL; + + // free output-buffer memory + ProfilerFree(global_profiler_buffer); + + // free prefix + ProfilerFree(filename_prefix); + filename_prefix = NULL; + + if (!LowLevelAlloc::DeleteArena(heap_profiler_memory)) { + RAW_LOG(FATAL, "Memory leak in HeapProfiler:"); + } + + if (FLAGS_mmap_profile) { + MemoryRegionMap::Shutdown(); + } + + is_on = false; +} + +extern "C" void HeapProfilerDump(const char *reason) { + SpinLockHolder l(&heap_lock); + if (is_on && !dumping) { + DumpProfileLocked(reason); + } +} + +//---------------------------------------------------------------------- +// Initialization/finalization code +//---------------------------------------------------------------------- + +// Initialization code +static void HeapProfilerInit() { + // Everything after this point is for setting up the profiler based on envvar + char fname[PATH_MAX]; + if (!GetUniquePathFromEnv("HEAPPROFILE", fname)) { + return; + } + // We do a uid check so we don't write out files in a setuid executable. +#ifdef HAVE_GETEUID + if (getuid() != geteuid()) { + RAW_LOG(WARNING, ("HeapProfiler: ignoring HEAPPROFILE because " + "program seems to be setuid\n")); + return; + } +#endif + + HeapProfileTable::CleanupOldProfiles(fname); + + HeapProfilerStart(fname); +} + +// class used for finalization -- dumps the heap-profile at program exit +struct HeapProfileEndWriter { + ~HeapProfileEndWriter() { HeapProfilerDump("Exiting"); } +}; + +// We want to make sure tcmalloc is up and running before starting the profiler +static const TCMallocGuard tcmalloc_initializer; +REGISTER_MODULE_INITIALIZER(heapprofiler, HeapProfilerInit()); +static HeapProfileEndWriter heap_profile_end_writer; diff --git a/third_party/tcmalloc/chromium/src/internal_logging.cc b/third_party/tcmalloc/chromium/src/internal_logging.cc new file mode 100644 index 0000000..ea8e56f1 --- /dev/null +++ b/third_party/tcmalloc/chromium/src/internal_logging.cc @@ -0,0 +1,115 @@ +// Copyright (c) 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Sanjay Ghemawat <opensource@google.com> + +#include <config.h> +#include <stdio.h> +#include <stdarg.h> +#ifdef HAVE_UNISTD_H +#include <unistd.h> // for write() +#endif +#include <string.h> +#include <google/malloc_extension.h> +#include "internal_logging.h" + +static const int kLogBufSize = 800; + +void TCMalloc_MESSAGE(const char* filename, + int line_number, + const char* format, ...) { + char buf[kLogBufSize]; + const int n = snprintf(buf, sizeof(buf), "%s:%d] ", filename, line_number); + if (n < kLogBufSize) { + va_list ap; + va_start(ap, format); + vsnprintf(buf + n, kLogBufSize - n, format, ap); + va_end(ap); + } + write(STDERR_FILENO, buf, strlen(buf)); +} + +static const int kStatsBufferSize = 16 << 10; +static char stats_buffer[kStatsBufferSize] = { 0 }; + +static void TCMalloc_CRASH_internal(bool dump_stats, + const char* filename, + int line_number, + const char* format, va_list ap) { + char buf[kLogBufSize]; + const int n = snprintf(buf, sizeof(buf), "%s:%d] ", filename, line_number); + if (n < kLogBufSize) { + vsnprintf(buf + n, kLogBufSize - n, format, ap); + } + write(STDERR_FILENO, buf, strlen(buf)); + if (dump_stats) { + MallocExtension::instance()->GetStats(stats_buffer, kStatsBufferSize); + write(STDERR_FILENO, stats_buffer, strlen(stats_buffer)); + } + + abort(); +} + +void TCMalloc_CRASH(bool dump_stats, + const char* filename, + int line_number, + const char* format, ...) { + va_list ap; + va_start(ap, format); + TCMalloc_CRASH_internal(dump_stats, filename, line_number, format, ap); + va_end(ap); +} + + +void TCMalloc_CrashReporter::PrintfAndDie(const char* format, ...) { + va_list ap; + va_start(ap, format); + TCMalloc_CRASH_internal(dump_stats_, file_, line_, format, ap); + va_end(ap); +} + +void TCMalloc_Printer::printf(const char* format, ...) { + if (left_ > 0) { + va_list ap; + va_start(ap, format); + const int r = vsnprintf(buf_, left_, format, ap); + va_end(ap); + if (r < 0) { + // Perhaps an old glibc that returns -1 on truncation? + left_ = 0; + } else if (r > left_) { + // Truncation + left_ = 0; + } else { + left_ -= r; + buf_ += r; + } + } +} diff --git a/third_party/tcmalloc/chromium/src/internal_logging.h b/third_party/tcmalloc/chromium/src/internal_logging.h new file mode 100644 index 0000000..0cb9ba2 --- /dev/null +++ b/third_party/tcmalloc/chromium/src/internal_logging.h @@ -0,0 +1,144 @@ +// Copyright (c) 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Sanjay Ghemawat <opensource@google.com> +// +// Internal logging and related utility routines. + +#ifndef TCMALLOC_INTERNAL_LOGGING_H_ +#define TCMALLOC_INTERNAL_LOGGING_H_ + +#include <config.h> +#include <stdlib.h> // for abort() +#ifdef HAVE_UNISTD_H +#include <unistd.h> // for write() +#endif + +//------------------------------------------------------------------- +// Utility routines +//------------------------------------------------------------------- + +// Safe debugging routine: we write directly to the stderr file +// descriptor and avoid FILE buffering because that may invoke +// malloc() +extern void TCMalloc_MESSAGE(const char* filename, + int line_number, + const char* format, ...) +#ifdef HAVE___ATTRIBUTE__ + __attribute__ ((__format__ (__printf__, 3, 4))) +#endif +; + +// Right now, the only non-fatal messages we want to report are when +// an allocation fails (we'll return NULL eventually, but sometimes +// want more prominent notice to help debug). message should be +// a literal string with no %<whatever> format directives. +#ifdef TCMALLOC_WARNINGS +#define MESSAGE(message, num_bytes) \ + TCMalloc_MESSAGE(__FILE__, __LINE__, message " (%"PRIuS" bytes)\n", \ + static_cast<size_t>(num_bytes)) +#else +#define MESSAGE(message, num_bytes) +#endif + +// Dumps the specified message and then calls abort(). If +// "dump_stats" is specified, the first call will also dump the +// tcmalloc stats. +extern void TCMalloc_CRASH(bool dump_stats, + const char* filename, + int line_number, + const char* format, ...) +#ifdef HAVE___ATTRIBUTE__ + __attribute__ ((__format__ (__printf__, 4, 5))) +#endif +; + +// This is a class that makes using the macro easier. With this class, +// CRASH("%d", i) expands to TCMalloc_CrashReporter.PrintfAndDie("%d", i). +class PERFTOOLS_DLL_DECL TCMalloc_CrashReporter { + public: + TCMalloc_CrashReporter(bool dump_stats, const char* file, int line) + : dump_stats_(dump_stats), file_(file), line_(line) { + } + void PrintfAndDie(const char* format, ...) +#ifdef HAVE___ATTRIBUTE__ + __attribute__ ((__format__ (__printf__, 2, 3))) // 2,3 due to "this" +#endif +; + + private: + bool dump_stats_; + const char* file_; + int line_; +}; + +#define CRASH \ + TCMalloc_CrashReporter(false, __FILE__, __LINE__).PrintfAndDie + +#define CRASH_WITH_STATS \ + TCMalloc_CrashReporter(true, __FILE__, __LINE__).PrintfAndDie + +// Like assert(), but executed even in NDEBUG mode +#undef CHECK_CONDITION +#define CHECK_CONDITION(cond) \ +do { \ + if (!(cond)) { \ + CRASH("assertion failed: %s\n", #cond); \ + } \ +} while (0) + +// Our own version of assert() so we can avoid hanging by trying to do +// all kinds of goofy printing while holding the malloc lock. +#ifndef NDEBUG +#define ASSERT(cond) CHECK_CONDITION(cond) +#else +#define ASSERT(cond) ((void) 0) +#endif + +// Print into buffer +class TCMalloc_Printer { + private: + char* buf_; // Where should we write next + int left_; // Space left in buffer (including space for \0) + + public: + // REQUIRES: "length > 0" + TCMalloc_Printer(char* buf, int length) : buf_(buf), left_(length) { + buf[0] = '\0'; + } + + void printf(const char* format, ...) +#ifdef HAVE___ATTRIBUTE__ + __attribute__ ((__format__ (__printf__, 2, 3))) +#endif +; +}; + +#endif // TCMALLOC_INTERNAL_LOGGING_H_ diff --git a/third_party/tcmalloc/chromium/src/linked_list.h b/third_party/tcmalloc/chromium/src/linked_list.h new file mode 100644 index 0000000..638174b --- /dev/null +++ b/third_party/tcmalloc/chromium/src/linked_list.h @@ -0,0 +1,100 @@ +// Copyright (c) 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Sanjay Ghemawat <opensource@google.com> +// +// Some very basic linked list functions for dealing with using void * as +// storage. + +#ifndef TCMALLOC_LINKED_LIST_H_ +#define TCMALLOC_LINKED_LIST_H_ + +namespace tcmalloc { + +inline void *SLL_Next(void *t) { + return *(reinterpret_cast<void**>(t)); +} + +inline void SLL_SetNext(void *t, void *n) { + *(reinterpret_cast<void**>(t)) = n; +} + +inline void SLL_Push(void **list, void *element) { + SLL_SetNext(element, *list); + *list = element; +} + +inline void *SLL_Pop(void **list) { + void *result = *list; + *list = SLL_Next(*list); + return result; +} + +// Remove N elements from a linked list to which head points. head will be +// modified to point to the new head. start and end will point to the first +// and last nodes of the range. Note that end will point to NULL after this +// function is called. +inline void SLL_PopRange(void **head, int N, void **start, void **end) { + if (N == 0) { + *start = NULL; + *end = NULL; + return; + } + + void *tmp = *head; + for (int i = 1; i < N; ++i) { + tmp = SLL_Next(tmp); + } + + *start = *head; + *end = tmp; + *head = SLL_Next(tmp); + // Unlink range from list. + SLL_SetNext(tmp, NULL); +} + +inline void SLL_PushRange(void **head, void *start, void *end) { + if (!start) return; + SLL_SetNext(end, *head); + *head = start; +} + +inline size_t SLL_Size(void *head) { + int count = 0; + while (head) { + count++; + head = SLL_Next(head); + } + return count; +} + +} // namespace tcmalloc + +#endif // TCMALLOC_LINKED_LIST_H_ diff --git a/third_party/tcmalloc/chromium/src/malloc_extension.cc b/third_party/tcmalloc/chromium/src/malloc_extension.cc new file mode 100644 index 0000000..95fd1c1 --- /dev/null +++ b/third_party/tcmalloc/chromium/src/malloc_extension.cc @@ -0,0 +1,338 @@ +// Copyright (c) 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Sanjay Ghemawat <opensource@google.com> + +#include <config.h> +#include <assert.h> +#include <stdio.h> +#include <string.h> +#include <stdio.h> +#if defined HAVE_STDINT_H +#include <stdint.h> +#elif defined HAVE_INTTYPES_H +#include <inttypes.h> +#else +#include <sys/types.h> +#endif +#include <string> +#include "base/dynamic_annotations.h" +#include "base/sysinfo.h" // for FillProcSelfMaps +#ifndef NO_HEAP_CHECK +#include "google/heap-checker.h" +#endif +#include "google/malloc_extension.h" +#include "maybe_threads.h" + +using STL_NAMESPACE::string; + +static void DumpAddressMap(string* result) { + *result += "\nMAPPED_LIBRARIES:\n"; + // We keep doubling until we get a fit + const size_t old_resultlen = result->size(); + for (int amap_size = 10240; amap_size < 10000000; amap_size *= 2) { + result->resize(old_resultlen + amap_size); + const int bytes_written = + tcmalloc::FillProcSelfMaps(&((*result)[old_resultlen]), amap_size); + if (bytes_written < amap_size - 1) { // we fit! + (*result)[old_resultlen + bytes_written] = '\0'; + result->resize(old_resultlen + bytes_written); + return; + } + } + result->reserve(old_resultlen); // just don't print anything +} + +// Note: this routine is meant to be called before threads are spawned. +void MallocExtension::Initialize() { + static bool initialize_called = false; + + if (initialize_called) return; + initialize_called = true; + +#ifdef __GLIBC__ + // GNU libc++ versions 3.3 and 3.4 obey the environment variables + // GLIBCPP_FORCE_NEW and GLIBCXX_FORCE_NEW respectively. Setting + // one of these variables forces the STL default allocator to call + // new() or delete() for each allocation or deletion. Otherwise + // the STL allocator tries to avoid the high cost of doing + // allocations by pooling memory internally. However, tcmalloc + // does allocations really fast, especially for the types of small + // items one sees in STL, so it's better off just using us. + // TODO: control whether we do this via an environment variable? + setenv("GLIBCPP_FORCE_NEW", "1", false /* no overwrite*/); + setenv("GLIBCXX_FORCE_NEW", "1", false /* no overwrite*/); + + // Now we need to make the setenv 'stick', which it may not do since + // the env is flakey before main() is called. But luckily stl only + // looks at this env var the first time it tries to do an alloc, and + // caches what it finds. So we just cause an stl alloc here. + string dummy("I need to be allocated"); + dummy += "!"; // so the definition of dummy isn't optimized out +#endif /* __GLIBC__ */ +} + +// Default implementation -- does nothing +MallocExtension::~MallocExtension() { } +bool MallocExtension::VerifyAllMemory() { return true; } +bool MallocExtension::VerifyNewMemory(void* p) { return true; } +bool MallocExtension::VerifyArrayNewMemory(void* p) { return true; } +bool MallocExtension::VerifyMallocMemory(void* p) { return true; } + +bool MallocExtension::GetNumericProperty(const char* property, size_t* value) { + return false; +} + +bool MallocExtension::SetNumericProperty(const char* property, size_t value) { + return false; +} + +void MallocExtension::GetStats(char* buffer, int length) { + assert(length > 0); + buffer[0] = '\0'; +} + +bool MallocExtension::MallocMemoryStats(int* blocks, size_t* total, + int histogram[kMallocHistogramSize]) { + *blocks = 0; + *total = 0; + memset(histogram, 0, sizeof(*histogram) * kMallocHistogramSize); + return true; +} + +void** MallocExtension::ReadStackTraces(int* sample_period) { + return NULL; +} + +void** MallocExtension::ReadHeapGrowthStackTraces() { + return NULL; +} + +void MallocExtension::MarkThreadIdle() { + // Default implementation does nothing +} + +void MallocExtension::MarkThreadBusy() { + // Default implementation does nothing +} + +void MallocExtension::ReleaseToSystem(ssize_t num_bytes) { + // Default implementation does nothing +} + +void MallocExtension::ReleaseFreeMemory() { + ReleaseToSystem(LONG_MAX); +} + +void MallocExtension::SetMemoryReleaseRate(double rate) { + // Default implementation does nothing +} + +double MallocExtension::GetMemoryReleaseRate() { + return -1.0; +} + +size_t MallocExtension::GetEstimatedAllocatedSize(size_t size) { + return size; +} + +size_t MallocExtension::GetAllocatedSize(void* p) { + return 0; +} + +// The current malloc extension object. + +static pthread_once_t module_init = PTHREAD_ONCE_INIT; +static MallocExtension* current_instance = NULL; + +static void InitModule() { + current_instance = new MallocExtension; +#ifndef NO_HEAP_CHECK + HeapLeakChecker::IgnoreObject(current_instance); +#endif +} + +MallocExtension* MallocExtension::instance() { + perftools_pthread_once(&module_init, InitModule); + return current_instance; +} + +void MallocExtension::Register(MallocExtension* implementation) { + perftools_pthread_once(&module_init, InitModule); + // When running under valgrind, our custom malloc is replaced with + // valgrind's one and malloc extensions will not work. + if (!RunningOnValgrind()) { + current_instance = implementation; + } +} + +// ----------------------------------------------------------------------- +// Heap sampling support +// ----------------------------------------------------------------------- + +namespace { + +// Accessors +uintptr_t Count(void** entry) { + return reinterpret_cast<uintptr_t>(entry[0]); +} +uintptr_t Size(void** entry) { + return reinterpret_cast<uintptr_t>(entry[1]); +} +uintptr_t Depth(void** entry) { + return reinterpret_cast<uintptr_t>(entry[2]); +} +void* PC(void** entry, int i) { + return entry[3+i]; +} + +void PrintCountAndSize(MallocExtensionWriter* writer, + uintptr_t count, uintptr_t size) { + char buf[100]; + snprintf(buf, sizeof(buf), + "%6lld: %8lld [%6lld: %8lld] @", + static_cast<long long>(count), + static_cast<long long>(size), + static_cast<long long>(count), + static_cast<long long>(size)); + writer->append(buf, strlen(buf)); +} + +void PrintHeader(MallocExtensionWriter* writer, + const char* label, void** entries) { + // Compute the total count and total size + uintptr_t total_count = 0; + uintptr_t total_size = 0; + for (void** entry = entries; Count(entry) != 0; entry += 3 + Depth(entry)) { + total_count += Count(entry); + total_size += Size(entry); + } + + const char* const kTitle = "heap profile: "; + writer->append(kTitle, strlen(kTitle)); + PrintCountAndSize(writer, total_count, total_size); + writer->append(" ", 1); + writer->append(label, strlen(label)); + writer->append("\n", 1); +} + +void PrintStackEntry(MallocExtensionWriter* writer, void** entry) { + PrintCountAndSize(writer, Count(entry), Size(entry)); + + for (int i = 0; i < Depth(entry); i++) { + char buf[32]; + snprintf(buf, sizeof(buf), " %p", PC(entry, i)); + writer->append(buf, strlen(buf)); + } + writer->append("\n", 1); +} + +} + +void MallocExtension::GetHeapSample(MallocExtensionWriter* writer) { + int sample_period = 0; + void** entries = ReadStackTraces(&sample_period); + if (entries == NULL) { + const char* const kErrorMsg = + "This malloc implementation does not support sampling.\n" + "As of 2005/01/26, only tcmalloc supports sampling, and\n" + "you are probably running a binary that does not use\n" + "tcmalloc.\n"; + writer->append(kErrorMsg, strlen(kErrorMsg)); + return; + } + + char label[32]; + sprintf(label, "heap_v2/%d", sample_period); + PrintHeader(writer, label, entries); + for (void** entry = entries; Count(entry) != 0; entry += 3 + Depth(entry)) { + PrintStackEntry(writer, entry); + } + delete[] entries; + + DumpAddressMap(writer); +} + +void MallocExtension::GetHeapGrowthStacks(MallocExtensionWriter* writer) { + void** entries = ReadHeapGrowthStackTraces(); + if (entries == NULL) { + const char* const kErrorMsg = + "This malloc implementation does not support " + "ReadHeapGrowthStackTraces().\n" + "As of 2005/09/27, only tcmalloc supports this, and you\n" + "are probably running a binary that does not use tcmalloc.\n"; + writer->append(kErrorMsg, strlen(kErrorMsg)); + return; + } + + // Do not canonicalize the stack entries, so that we get a + // time-ordered list of stack traces, which may be useful if the + // client wants to focus on the latest stack traces. + PrintHeader(writer, "growth", entries); + for (void** entry = entries; Count(entry) != 0; entry += 3 + Depth(entry)) { + PrintStackEntry(writer, entry); + } + delete[] entries; + + DumpAddressMap(writer); +} + +void MallocExtension::Ranges(void* arg, RangeFunction func) { + // No callbacks by default +} + +// These are C shims that work on the current instance. + +#define C_SHIM(fn, retval, paramlist, arglist) \ + extern "C" PERFTOOLS_DLL_DECL retval MallocExtension_##fn paramlist { \ + return MallocExtension::instance()->fn arglist; \ + } + +C_SHIM(VerifyAllMemory, int, (void), ()); +C_SHIM(VerifyNewMemory, int, (void* p), (p)); +C_SHIM(VerifyArrayNewMemory, int, (void* p), (p)); +C_SHIM(VerifyMallocMemory, int, (void* p), (p)); +C_SHIM(MallocMemoryStats, int, + (int* blocks, size_t* total, int histogram[kMallocHistogramSize]), + (blocks, total, histogram)); + +C_SHIM(GetStats, void, + (char* buffer, int buffer_length), (buffer, buffer_length)); +C_SHIM(GetNumericProperty, int, + (const char* property, size_t* value), (property, value)); +C_SHIM(SetNumericProperty, int, + (const char* property, size_t value), (property, value)); + +C_SHIM(MarkThreadIdle, void, (void), ()); +C_SHIM(MarkThreadBusy, void, (void), ()); +C_SHIM(ReleaseFreeMemory, void, (void), ()); +C_SHIM(ReleaseToSystem, void, (ssize_t num_bytes), (num_bytes)); +C_SHIM(GetEstimatedAllocatedSize, size_t, (size_t size), (size)); +C_SHIM(GetAllocatedSize, size_t, (void* p), (p)); diff --git a/third_party/tcmalloc/chromium/src/malloc_hook-inl.h b/third_party/tcmalloc/chromium/src/malloc_hook-inl.h new file mode 100644 index 0000000..a629691 --- /dev/null +++ b/third_party/tcmalloc/chromium/src/malloc_hook-inl.h @@ -0,0 +1,188 @@ +// Copyright (c) 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Sanjay Ghemawat +// +// This has the implementation details of malloc_hook that are needed +// to use malloc-hook inside the tcmalloc system. It does not hold +// any of the client-facing calls that are used to add new hooks. + +#ifndef _MALLOC_HOOK_INL_H_ +#define _MALLOC_HOOK_INL_H_ + +#include <stddef.h> +#include <sys/types.h> +#include "base/atomicops.h" +#include "base/basictypes.h" +#include <google/malloc_hook.h> + +namespace base { namespace internal { + +// A simple atomic pointer class that can be initialized by the linker +// when you define a namespace-scope variable as: +// +// AtomicPtr<Foo*> my_global = { &initial_value }; +// +// This isn't suitable for a general atomic<> class because of the +// public access to data_. +template<typename PtrT> +class AtomicPtr { + public: + COMPILE_ASSERT(sizeof(PtrT) <= sizeof(AtomicWord), + PtrT_should_fit_in_AtomicWord); + + PtrT Get() const { + // Depending on the system, Acquire_Load(AtomicWord*) may have + // been defined to return an AtomicWord, Atomic32, or Atomic64. + // We hide that implementation detail here with an explicit cast. + // This prevents MSVC 2005, at least, from complaining (it has to + // do with __wp64; AtomicWord is __wp64, but Atomic32/64 aren't). + return reinterpret_cast<PtrT>(static_cast<AtomicWord>( + base::subtle::Acquire_Load(&data_))); + } + + // Sets the contained value to new_val and returns the old value, + // atomically, with acquire and release semantics. + PtrT Exchange(PtrT new_val); + + // Not private so that the class is an aggregate and can be + // initialized by the linker. Don't access this directly. + AtomicWord data_; +}; + +// These are initialized in malloc_hook.cc +extern AtomicPtr<MallocHook::NewHook> new_hook_; +extern AtomicPtr<MallocHook::DeleteHook> delete_hook_; +extern AtomicPtr<MallocHook::PreMmapHook> premmap_hook_; +extern AtomicPtr<MallocHook::MmapHook> mmap_hook_; +extern AtomicPtr<MallocHook::MunmapHook> munmap_hook_; +extern AtomicPtr<MallocHook::MremapHook> mremap_hook_; +extern AtomicPtr<MallocHook::PreSbrkHook> presbrk_hook_; +extern AtomicPtr<MallocHook::SbrkHook> sbrk_hook_; + +} } // namespace base::internal + +inline MallocHook::NewHook MallocHook::GetNewHook() { + return base::internal::new_hook_.Get(); +} + +inline void MallocHook::InvokeNewHook(const void* p, size_t s) { + MallocHook::NewHook hook = MallocHook::GetNewHook(); + if (hook != NULL) (*hook)(p, s); +} + +inline MallocHook::DeleteHook MallocHook::GetDeleteHook() { + return base::internal::delete_hook_.Get(); +} + +inline void MallocHook::InvokeDeleteHook(const void* p) { + MallocHook::DeleteHook hook = MallocHook::GetDeleteHook(); + if (hook != NULL) (*hook)(p); +} + +inline MallocHook::PreMmapHook MallocHook::GetPreMmapHook() { + return base::internal::premmap_hook_.Get(); +} + +inline void MallocHook::InvokePreMmapHook(const void* start, + size_t size, + int protection, + int flags, + int fd, + off_t offset) { + MallocHook::PreMmapHook hook = MallocHook::GetPreMmapHook(); + if (hook != NULL) (*hook)(start, size, + protection, flags, + fd, offset); +} + +inline MallocHook::MmapHook MallocHook::GetMmapHook() { + return base::internal::mmap_hook_.Get(); +} + +inline void MallocHook::InvokeMmapHook(const void* result, + const void* start, + size_t size, + int protection, + int flags, + int fd, + off_t offset) { + MallocHook::MmapHook hook = MallocHook::GetMmapHook(); + if (hook != NULL) (*hook)(result, + start, size, + protection, flags, + fd, offset); +} + +inline MallocHook::MunmapHook MallocHook::GetMunmapHook() { + return base::internal::munmap_hook_.Get(); +} + +inline void MallocHook::InvokeMunmapHook(const void* p, size_t size) { + MallocHook::MunmapHook hook = MallocHook::GetMunmapHook(); + if (hook != NULL) (*hook)(p, size); +} + +inline MallocHook::MremapHook MallocHook::GetMremapHook() { + return base::internal::mremap_hook_.Get(); +} + +inline void MallocHook::InvokeMremapHook(const void* result, + const void* old_addr, + size_t old_size, + size_t new_size, + int flags, + const void* new_addr) { + MallocHook::MremapHook hook = MallocHook::GetMremapHook(); + if (hook != NULL) (*hook)(result, + old_addr, old_size, + new_size, flags, new_addr); +} + +inline MallocHook::PreSbrkHook MallocHook::GetPreSbrkHook() { + return base::internal::presbrk_hook_.Get(); +} + +inline void MallocHook::InvokePreSbrkHook(ptrdiff_t increment) { + MallocHook::PreSbrkHook hook = MallocHook::GetPreSbrkHook(); + if (hook != NULL && increment != 0) (*hook)(increment); +} + +inline MallocHook::SbrkHook MallocHook::GetSbrkHook() { + return base::internal::sbrk_hook_.Get(); +} + +inline void MallocHook::InvokeSbrkHook(const void* result, + ptrdiff_t increment) { + MallocHook::SbrkHook hook = MallocHook::GetSbrkHook(); + if (hook != NULL && increment != 0) (*hook)(result, increment); +} + +#endif /* _MALLOC_HOOK_INL_H_ */ diff --git a/third_party/tcmalloc/chromium/src/malloc_hook.cc b/third_party/tcmalloc/chromium/src/malloc_hook.cc new file mode 100644 index 0000000..d1ad12a --- /dev/null +++ b/third_party/tcmalloc/chromium/src/malloc_hook.cc @@ -0,0 +1,504 @@ +// Copyright (c) 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Sanjay Ghemawat <opensource@google.com> + +#include <config.h> + +// Disable the glibc prototype of mremap(), as older versions of the +// system headers define this function with only four arguments, +// whereas newer versions allow an optional fifth argument: +#ifdef HAVE_MMAP +# define mremap glibc_mremap +# include <sys/mman.h> +# undef mremap +#endif + +#include <algorithm> +#include "base/basictypes.h" +#include "base/logging.h" +#include "malloc_hook-inl.h" +#include <google/malloc_hook.h> + +// This #ifdef should almost never be set. Set NO_TCMALLOC_SAMPLES if +// you're porting to a system where you really can't get a stacktrace. +#ifdef NO_TCMALLOC_SAMPLES + // We use #define so code compiles even if you #include stacktrace.h somehow. +# define GetStackTrace(stack, depth, skip) (0) +#else +# include <google/stacktrace.h> +#endif + +// __THROW is defined in glibc systems. It means, counter-intuitively, +// "This function will never throw an exception." It's an optional +// optimization tool, but we may need to use it to match glibc prototypes. +#ifndef __THROW // I guess we're not on a glibc system +# define __THROW // __THROW is just an optimization, so ok to make it "" +#endif + +using std::copy; + + +// Declarations of three default weak hook functions, that can be overridden by +// linking-in a strong definition (as heap-checker.cc does) +// +// These default hooks let some other library we link in +// to define strong versions of InitialMallocHook_New, InitialMallocHook_MMap, +// InitialMallocHook_PreMMap, InitialMallocHook_PreSbrk, and +// InitialMallocHook_Sbrk to have a chance to hook into the very first +// invocation of an allocation function call, mmap, or sbrk. +// +// These functions are declared here as weak, and defined later, rather than a +// more straightforward simple weak definition, as a workround for an icc +// compiler issue ((Intel reference 290819). This issue causes icc to resolve +// weak symbols too early, at compile rather than link time. By declaring it +// (weak) here, then defining it below after its use, we can avoid the problem. +// +ATTRIBUTE_WEAK +extern void InitialMallocHook_New(const void* ptr, size_t size); + +ATTRIBUTE_WEAK +extern void InitialMallocHook_PreMMap(const void* start, + size_t size, + int protection, + int flags, + int fd, + off_t offset); + +ATTRIBUTE_WEAK +extern void InitialMallocHook_MMap(const void* result, + const void* start, + size_t size, + int protection, + int flags, + int fd, + off_t offset); + +ATTRIBUTE_WEAK +extern void InitialMallocHook_PreSbrk(ptrdiff_t increment); + +ATTRIBUTE_WEAK +extern void InitialMallocHook_Sbrk(const void* result, ptrdiff_t increment); + +namespace base { namespace internal { +template<typename PtrT> +PtrT AtomicPtr<PtrT>::Exchange(PtrT new_val) { + base::subtle::MemoryBarrier(); // Release semantics. + // Depending on the system, NoBarrier_AtomicExchange(AtomicWord*) + // may have been defined to return an AtomicWord, Atomic32, or + // Atomic64. We hide that implementation detail here with an + // explicit cast. This prevents MSVC 2005, at least, from complaining. + PtrT old_val = reinterpret_cast<PtrT>(static_cast<AtomicWord>( + base::subtle::NoBarrier_AtomicExchange( + &data_, + reinterpret_cast<AtomicWord>(new_val)))); + base::subtle::MemoryBarrier(); // And acquire semantics. + return old_val; +} + +AtomicPtr<MallocHook::NewHook> new_hook_ = { + reinterpret_cast<AtomicWord>(InitialMallocHook_New) }; +AtomicPtr<MallocHook::DeleteHook> delete_hook_ = { 0 }; +AtomicPtr<MallocHook::PreMmapHook> premmap_hook_ = { + reinterpret_cast<AtomicWord>(InitialMallocHook_PreMMap) }; +AtomicPtr<MallocHook::MmapHook> mmap_hook_ = { + reinterpret_cast<AtomicWord>(InitialMallocHook_MMap) }; +AtomicPtr<MallocHook::MunmapHook> munmap_hook_ = { 0 }; +AtomicPtr<MallocHook::MremapHook> mremap_hook_ = { 0 }; +AtomicPtr<MallocHook::PreSbrkHook> presbrk_hook_ = { + reinterpret_cast<AtomicWord>(InitialMallocHook_PreSbrk) }; +AtomicPtr<MallocHook::SbrkHook> sbrk_hook_ = { + reinterpret_cast<AtomicWord>(InitialMallocHook_Sbrk) }; + +} } // namespace base::internal + +using base::internal::new_hook_; +using base::internal::delete_hook_; +using base::internal::premmap_hook_; +using base::internal::mmap_hook_; +using base::internal::munmap_hook_; +using base::internal::mremap_hook_; +using base::internal::presbrk_hook_; +using base::internal::sbrk_hook_; + + +// These are available as C bindings as well as C++, hence their +// definition outside the MallocHook class. +extern "C" +MallocHook_NewHook MallocHook_SetNewHook(MallocHook_NewHook hook) { + return new_hook_.Exchange(hook); +} + +extern "C" +MallocHook_DeleteHook MallocHook_SetDeleteHook(MallocHook_DeleteHook hook) { + return delete_hook_.Exchange(hook); +} + +extern "C" +MallocHook_PreMmapHook MallocHook_SetPreMmapHook(MallocHook_PreMmapHook hook) { + return premmap_hook_.Exchange(hook); +} + +extern "C" +MallocHook_MmapHook MallocHook_SetMmapHook(MallocHook_MmapHook hook) { + return mmap_hook_.Exchange(hook); +} + +extern "C" +MallocHook_MunmapHook MallocHook_SetMunmapHook(MallocHook_MunmapHook hook) { + return munmap_hook_.Exchange(hook); +} + +extern "C" +MallocHook_MremapHook MallocHook_SetMremapHook(MallocHook_MremapHook hook) { + return mremap_hook_.Exchange(hook); +} + +extern "C" +MallocHook_PreSbrkHook MallocHook_SetPreSbrkHook(MallocHook_PreSbrkHook hook) { + return presbrk_hook_.Exchange(hook); +} + +extern "C" +MallocHook_SbrkHook MallocHook_SetSbrkHook(MallocHook_SbrkHook hook) { + return sbrk_hook_.Exchange(hook); +} + + +// The definitions of weak default malloc hooks (New, MMap, and Sbrk) +// that self deinstall on their first call. This is entirely for +// efficiency: the default version of these functions will be called a +// maximum of one time. If these functions were a no-op instead, they'd +// be called every time, costing an extra function call per malloc. +// +// However, this 'delete self' isn't safe in general -- it's possible +// that this function will be called via a daisy chain. That is, +// someone else might do +// old_hook = MallocHook::SetNewHook(&myhook); +// void myhook(void* ptr, size_t size) { +// do_my_stuff(); +// old_hook(ptr, size); // daisy-chain the hooks +// } +// If old_hook is InitialMallocHook_New(), then this is broken code! -- +// after the first run it'll deregister not only InitialMallocHook_New() +// but also myhook. To protect against that, InitialMallocHook_New() +// makes sure it's the 'top-level' hook before doing the deregistration. +// This means the daisy-chain case will be less efficient because the +// hook will be called, and do an if check, for every new. Alas. +// TODO(csilvers): add support for removing a hook from the middle of a chain. + +void InitialMallocHook_New(const void* ptr, size_t size) { + if (MallocHook::GetNewHook() == &InitialMallocHook_New) + MallocHook::SetNewHook(NULL); +} + +void InitialMallocHook_PreMMap(const void* start, + size_t size, + int protection, + int flags, + int fd, + off_t offset) { + if (MallocHook::GetPreMmapHook() == &InitialMallocHook_PreMMap) + MallocHook::SetPreMmapHook(NULL); +} + +void InitialMallocHook_MMap(const void* result, + const void* start, + size_t size, + int protection, + int flags, + int fd, + off_t offset) { + if (MallocHook::GetMmapHook() == &InitialMallocHook_MMap) + MallocHook::SetMmapHook(NULL); +} + +void InitialMallocHook_PreSbrk(ptrdiff_t increment) { + if (MallocHook::GetPreSbrkHook() == &InitialMallocHook_PreSbrk) + MallocHook::SetPreSbrkHook(NULL); +} + +void InitialMallocHook_Sbrk(const void* result, ptrdiff_t increment) { + if (MallocHook::GetSbrkHook() == &InitialMallocHook_Sbrk) + MallocHook::SetSbrkHook(NULL); +} + +DEFINE_ATTRIBUTE_SECTION_VARS(google_malloc); +DECLARE_ATTRIBUTE_SECTION_VARS(google_malloc); + // actual functions are in debugallocation.cc or tcmalloc.cc +DEFINE_ATTRIBUTE_SECTION_VARS(malloc_hook); +DECLARE_ATTRIBUTE_SECTION_VARS(malloc_hook); + // actual functions are in this file, malloc_hook.cc, and low_level_alloc.cc + +#define ADDR_IN_ATTRIBUTE_SECTION(addr, name) \ + (reinterpret_cast<uintptr_t>(ATTRIBUTE_SECTION_START(name)) <= \ + reinterpret_cast<uintptr_t>(addr) && \ + reinterpret_cast<uintptr_t>(addr) < \ + reinterpret_cast<uintptr_t>(ATTRIBUTE_SECTION_STOP(name))) + +// Return true iff 'caller' is a return address within a function +// that calls one of our hooks via MallocHook:Invoke*. +// A helper for GetCallerStackTrace. +static inline bool InHookCaller(const void* caller) { + return ADDR_IN_ATTRIBUTE_SECTION(caller, google_malloc) || + ADDR_IN_ATTRIBUTE_SECTION(caller, malloc_hook); + // We can use one section for everything except tcmalloc_or_debug + // due to its special linkage mode, which prevents merging of the sections. +} + +#undef ADDR_IN_ATTRIBUTE_SECTION + +static bool checked_sections = false; + +static inline void CheckInHookCaller() { + if (!checked_sections) { + INIT_ATTRIBUTE_SECTION_VARS(google_malloc); + if (ATTRIBUTE_SECTION_START(google_malloc) == + ATTRIBUTE_SECTION_STOP(google_malloc)) { + RAW_LOG(ERROR, "google_malloc section is missing, " + "thus InHookCaller is broken!"); + } + INIT_ATTRIBUTE_SECTION_VARS(malloc_hook); + if (ATTRIBUTE_SECTION_START(malloc_hook) == + ATTRIBUTE_SECTION_STOP(malloc_hook)) { + RAW_LOG(ERROR, "malloc_hook section is missing, " + "thus InHookCaller is broken!"); + } + checked_sections = true; + } +} + +// We can improve behavior/compactness of this function +// if we pass a generic test function (with a generic arg) +// into the implementations for GetStackTrace instead of the skip_count. +extern "C" int MallocHook_GetCallerStackTrace(void** result, int max_depth, + int skip_count) { +#if defined(NO_TCMALLOC_SAMPLES) + return 0; +#elif !defined(HAVE_ATTRIBUTE_SECTION_START) + // Fall back to GetStackTrace and good old but fragile frame skip counts. + // Note: this path is inaccurate when a hook is not called directly by an + // allocation function but is daisy-chained through another hook, + // search for MallocHook::(Get|Set|Invoke)* to find such cases. + return GetStackTrace(result, max_depth, skip_count + int(DEBUG_MODE)); + // due to -foptimize-sibling-calls in opt mode + // there's no need for extra frame skip here then +#else + CheckInHookCaller(); + // MallocHook caller determination via InHookCaller works, use it: + static const int kMaxSkip = 32 + 6 + 3; + // Constant tuned to do just one GetStackTrace call below in practice + // and not get many frames that we don't actually need: + // currently max passsed max_depth is 32, + // max passed/needed skip_count is 6 + // and 3 is to account for some hook daisy chaining. + static const int kStackSize = kMaxSkip + 1; + void* stack[kStackSize]; + int depth = GetStackTrace(stack, kStackSize, 1); // skip this function frame + if (depth == 0) // silenty propagate cases when GetStackTrace does not work + return 0; + for (int i = 0; i < depth; ++i) { // stack[0] is our immediate caller + if (InHookCaller(stack[i])) { + RAW_VLOG(4, "Found hooked allocator at %d: %p <- %p", + i, stack[i], stack[i+1]); + i += 1; // skip hook caller frame + depth -= i; // correct depth + if (depth > max_depth) depth = max_depth; + copy(stack + i, stack + i + depth, result); + if (depth < max_depth && depth + i == kStackSize) { + // get frames for the missing depth + depth += + GetStackTrace(result + depth, max_depth - depth, 1 + kStackSize); + } + return depth; + } + } + RAW_LOG(WARNING, "Hooked allocator frame not found, returning empty trace"); + // If this happens try increasing kMaxSkip + // or else something must be wrong with InHookCaller, + // e.g. for every section used in InHookCaller + // all functions in that section must be inside the same library. + return 0; +#endif +} + +// On Linux/x86, we override mmap/munmap/mremap/sbrk +// and provide support for calling the related hooks. +// +// We define mmap() and mmap64(), which somewhat reimplements libc's mmap +// syscall stubs. Unfortunately libc only exports the stubs via weak symbols +// (which we're overriding with our mmap64() and mmap() wrappers) so we can't +// just call through to them. + + +#if defined(__linux) && \ + (defined(__i386__) || defined(__x86_64__) || defined(__PPC__)) +#include <unistd.h> +#include <syscall.h> +#include <sys/mman.h> +#include <errno.h> +#include "base/linux_syscall_support.h" + +// The x86-32 case and the x86-64 case differ: +// 32b has a mmap2() syscall, 64b does not. +// 64b and 32b have different calling conventions for mmap(). +#if defined(__x86_64__) || defined(__PPC64__) + +static inline void* do_mmap64(void *start, size_t length, + int prot, int flags, + int fd, __off64_t offset) __THROW { + return (void *)syscall(SYS_mmap, start, length, prot, flags, fd, offset); +} + +#elif defined(__i386__) || defined(__PPC__) + +static inline void* do_mmap64(void *start, size_t length, + int prot, int flags, + int fd, __off64_t offset) __THROW { + void *result; + + // Try mmap2() unless it's not supported + static bool have_mmap2 = true; + if (have_mmap2) { + static int pagesize = 0; + if (!pagesize) pagesize = getpagesize(); + + // Check that the offset is page aligned + if (offset & (pagesize - 1)) { + result = MAP_FAILED; + errno = EINVAL; + goto out; + } + + result = (void *)syscall(SYS_mmap2, + start, length, prot, flags, fd, offset / pagesize); + if (result != MAP_FAILED || errno != ENOSYS) goto out; + + // We don't have mmap2() after all - don't bother trying it in future + have_mmap2 = false; + } + + if (((off_t)offset) != offset) { + // If we're trying to map a 64-bit offset, fail now since we don't + // have 64-bit mmap() support. + result = MAP_FAILED; + errno = EINVAL; + goto out; + } + + { + // Fall back to old 32-bit offset mmap() call + // Old syscall interface cannot handle six args, so pass in an array + int32 args[6] = { (int32) start, length, prot, flags, fd, (off_t) offset }; + result = (void *)syscall(SYS_mmap, args); + } + out: + return result; +} + +# endif + +// We use do_mmap64 abstraction to put MallocHook::InvokeMmapHook +// calls right into mmap and mmap64, so that the stack frames in the caller's +// stack are at the same offsets for all the calls of memory allocating +// functions. + +// Put all callers of MallocHook::Invoke* in this module into +// malloc_hook section, +// so that MallocHook::GetCallerStackTrace can function accurately: + +// Make sure mmap doesn't get #define'd away by <sys/mman.h> +#undef mmap + +extern "C" { + void* mmap64(void *start, size_t length, int prot, int flags, + int fd, __off64_t offset ) __THROW + ATTRIBUTE_SECTION(malloc_hook); + void* mmap(void *start, size_t length,int prot, int flags, + int fd, off_t offset) __THROW + ATTRIBUTE_SECTION(malloc_hook); + int munmap(void* start, size_t length) __THROW + ATTRIBUTE_SECTION(malloc_hook); + void* mremap(void* old_addr, size_t old_size, size_t new_size, + int flags, ...) __THROW + ATTRIBUTE_SECTION(malloc_hook); + void* sbrk(ptrdiff_t increment) __THROW + ATTRIBUTE_SECTION(malloc_hook); +} + +extern "C" void* mmap64(void *start, size_t length, int prot, int flags, + int fd, __off64_t offset) __THROW { + MallocHook::InvokePreMmapHook(start, length, prot, flags, fd, offset); + void *result = do_mmap64(start, length, prot, flags, fd, offset); + MallocHook::InvokeMmapHook(result, start, length, prot, flags, fd, offset); + return result; +} + +#if !defined(__USE_FILE_OFFSET64) || !defined(__REDIRECT_NTH) + +extern "C" void* mmap(void *start, size_t length, int prot, int flags, + int fd, off_t offset) __THROW { + MallocHook::InvokePreMmapHook(start, length, prot, flags, fd, offset); + void *result = do_mmap64(start, length, prot, flags, fd, + static_cast<size_t>(offset)); // avoid sign extension + MallocHook::InvokeMmapHook(result, start, length, prot, flags, fd, offset); + return result; +} + +#endif + +extern "C" int munmap(void* start, size_t length) __THROW { + MallocHook::InvokeMunmapHook(start, length); + return syscall(SYS_munmap, start, length); +} + +extern "C" void* mremap(void* old_addr, size_t old_size, size_t new_size, + int flags, ...) __THROW { + va_list ap; + va_start(ap, flags); + void *new_address = va_arg(ap, void *); + va_end(ap); + void* result = sys_mremap(old_addr, old_size, new_size, flags, new_address); + MallocHook::InvokeMremapHook(result, old_addr, old_size, new_size, flags, + new_address); + return result; +} + +// libc's version: +extern "C" void* __sbrk(ptrdiff_t increment); + +extern "C" void* sbrk(ptrdiff_t increment) __THROW { + MallocHook::InvokePreSbrkHook(increment); + void *result = __sbrk(increment); + MallocHook::InvokeSbrkHook(result, increment); + return result; +} + +#endif diff --git a/third_party/tcmalloc/chromium/src/maybe_threads.cc b/third_party/tcmalloc/chromium/src/maybe_threads.cc new file mode 100644 index 0000000..6b7d7b9 --- /dev/null +++ b/third_party/tcmalloc/chromium/src/maybe_threads.cc @@ -0,0 +1,113 @@ +// Copyright (c) 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Paul Menage <opensource@google.com> +// +// Some wrappers for pthread functions so that we can be LD_PRELOADed +// against non-pthreads apps. +// +// This module will behave very strangely if some pthreads functions +// exist and others don't. + +#include "config.h" +#include <assert.h> +#include <string.h> // for memcmp +// We don't actually need strings. But including this header seems to +// stop the compiler trying to short-circuit our pthreads existence +// tests and claiming that the address of a function is always +// non-zero. I have no idea why ... +#include <string> +#include "maybe_threads.h" +#include "base/basictypes.h" + +// __THROW is defined in glibc systems. It means, counter-intuitively, +// "This function will never throw an exception." It's an optional +// optimization tool, but we may need to use it to match glibc prototypes. +#ifndef __THROW // I guess we're not on a glibc system +# define __THROW // __THROW is just an optimization, so ok to make it "" +#endif + +// These are the methods we're going to conditionally include. +extern "C" { + int pthread_key_create (pthread_key_t*, void (*)(void*)) + __THROW ATTRIBUTE_WEAK; + void *pthread_getspecific(pthread_key_t) + __THROW ATTRIBUTE_WEAK; + int pthread_setspecific(pthread_key_t, const void*) + __THROW ATTRIBUTE_WEAK; + int pthread_once(pthread_once_t *, void (*)(void)) + __THROW ATTRIBUTE_WEAK; +} + +#define MAX_PERTHREAD_VALS 16 +static void *perftools_pthread_specific_vals[MAX_PERTHREAD_VALS]; +static int next_key; + +int perftools_pthread_key_create(pthread_key_t *key, + void (*destr_function) (void *)) { + if (pthread_key_create) { + return pthread_key_create(key, destr_function); + } else { + assert(next_key < MAX_PERTHREAD_VALS); + *key = (pthread_key_t)(next_key++); + return 0; + } +} + +void *perftools_pthread_getspecific(pthread_key_t key) { + if (pthread_getspecific) { + return pthread_getspecific(key); + } else { + return perftools_pthread_specific_vals[(int)key]; + } +} + +int perftools_pthread_setspecific(pthread_key_t key, void *val) { + if (pthread_setspecific) { + return pthread_setspecific(key, val); + } else { + perftools_pthread_specific_vals[(int)key] = val; + return 0; + } +} + +static pthread_once_t pthread_once_init = PTHREAD_ONCE_INIT; +int perftools_pthread_once(pthread_once_t *ctl, + void (*init_routine) (void)) { + if (pthread_once) { + return pthread_once(ctl, init_routine); + } else { + if (memcmp(ctl, &pthread_once_init, sizeof(*ctl)) == 0) { + init_routine(); + ++*(char*)(ctl); // make it so it's no longer equal to init + } + return 0; + } +} diff --git a/third_party/tcmalloc/chromium/src/maybe_threads.h b/third_party/tcmalloc/chromium/src/maybe_threads.h new file mode 100644 index 0000000..5f35e00 --- /dev/null +++ b/third_party/tcmalloc/chromium/src/maybe_threads.h @@ -0,0 +1,52 @@ +// Copyright (c) 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Paul Menage <opensource@google.com> + +//------------------------------------------------------------------- +// Some wrappers for pthread functions so that we can be LD_PRELOADed +// against non-pthreads apps. +//------------------------------------------------------------------- + +#ifndef GOOGLE_MAYBE_THREADS_H_ +#define GOOGLE_MAYBE_THREADS_H_ + +#ifdef HAVE_PTHREAD +#include <pthread.h> +#endif + +int perftools_pthread_key_create(pthread_key_t *key, + void (*destr_function) (void *)); +void *perftools_pthread_getspecific(pthread_key_t key); +int perftools_pthread_setspecific(pthread_key_t key, void *val); +int perftools_pthread_once(pthread_once_t *ctl, + void (*init_routine) (void)); + +#endif /* GOOGLE_MAYBE_THREADS_H_ */ diff --git a/third_party/tcmalloc/chromium/src/memfs_malloc.cc b/third_party/tcmalloc/chromium/src/memfs_malloc.cc new file mode 100644 index 0000000..bef2d3c --- /dev/null +++ b/third_party/tcmalloc/chromium/src/memfs_malloc.cc @@ -0,0 +1,229 @@ +// Copyright (c) 2007, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Arun Sharma +// +// A tcmalloc system allocator that uses a memory based filesystem such as +// tmpfs or hugetlbfs +// +// Since these only exist on linux, we only register this allocator there. + +#ifdef __linux + +#include <config.h> +#include <errno.h> +#include <fcntl.h> +#include <unistd.h> +#include <inttypes.h> +#include <sys/mman.h> +#include <sys/param.h> +#include <sys/types.h> +#include <sys/vfs.h> // for statfs +#include <string> + +#include "base/basictypes.h" +#include "base/googleinit.h" +#include "base/sysinfo.h" +#include "system-alloc.h" +#include "internal_logging.h" + +using std::string; + +DEFINE_string(memfs_malloc_path, EnvToString("TCMALLOC_MEMFS_MALLOC_PATH", ""), + "Path where hugetlbfs or tmpfs is mounted. The caller is " + "responsible for ensuring that the path is unique and does " + "not conflict with another process"); +DEFINE_int64(memfs_malloc_limit_mb, + EnvToInt("TCMALLOC_MEMFS_LIMIT_MB", 0), + "Limit total allocation size to the " + "specified number of MiB. 0 == no limit."); +DEFINE_bool(memfs_malloc_abort_on_fail, + EnvToBool("TCMALLOC_MEMFS_ABORT_ON_FAIL", false), + "abort() whenever memfs_malloc fails to satisfy an allocation " + "for any reason."); +DEFINE_bool(memfs_malloc_ignore_mmap_fail, + EnvToBool("TCMALLOC_MEMFS_IGNORE_MMAP_FAIL", false), + "Ignore failures from mmap"); + +// Hugetlbfs based allocator for tcmalloc +class HugetlbSysAllocator: public SysAllocator { +public: + HugetlbSysAllocator(int fd, int page_size) + : big_page_size_(page_size), + hugetlb_fd_(fd), + hugetlb_base_(0) { + } + + void* Alloc(size_t size, size_t *actual_size, size_t alignment); + + void DumpStats(TCMalloc_Printer* printer); + +private: + int64 big_page_size_; + int hugetlb_fd_; // file descriptor for hugetlb + off_t hugetlb_base_; +}; + +void HugetlbSysAllocator::DumpStats(TCMalloc_Printer* printer) { + printer->printf("HugetlbSysAllocator: failed_=%d allocated=%"PRId64"\n", + failed_, static_cast<int64_t>(hugetlb_base_)); +} + +// No locking needed here since we assume that tcmalloc calls +// us with an internal lock held (see tcmalloc/system-alloc.cc). +void* HugetlbSysAllocator::Alloc(size_t size, size_t *actual_size, + size_t alignment) { + + // We don't respond to allocation requests smaller than big_page_size_ unless + // the caller is willing to take more than they asked for. + if (actual_size == NULL && size < big_page_size_) { + return NULL; + } + + // Enforce huge page alignment. Be careful to deal with overflow. + if (alignment < big_page_size_) alignment = big_page_size_; + size_t aligned_size = ((size + alignment - 1) / alignment) * alignment; + if (aligned_size < size) { + return NULL; + } + size = aligned_size; + + // Ask for extra memory if alignment > pagesize + size_t extra = 0; + if (alignment > big_page_size_) { + extra = alignment - big_page_size_; + } + + // Test if this allocation would put us over the limit. + off_t limit = FLAGS_memfs_malloc_limit_mb*1024*1024; + if (limit > 0 && hugetlb_base_ + size + extra > limit) { + // Disable the allocator when there's less than one page left. + if (limit - hugetlb_base_ < big_page_size_) { + TCMalloc_MESSAGE(__FILE__, __LINE__, "reached memfs_malloc_limit_mb\n"); + failed_ = true; + } + else { + TCMalloc_MESSAGE(__FILE__, __LINE__, "alloc size=%"PRIuS + " too large while %"PRId64" bytes remain\n", + size, static_cast<int64_t>(limit - hugetlb_base_)); + } + if (FLAGS_memfs_malloc_abort_on_fail) { + CRASH("memfs_malloc_abort_on_fail is set\n"); + } + return NULL; + } + + // This is not needed for hugetlbfs, but needed for tmpfs. Annoyingly + // hugetlbfs returns EINVAL for ftruncate. + int ret = ftruncate(hugetlb_fd_, hugetlb_base_ + size + extra); + if (ret != 0 && errno != EINVAL) { + TCMalloc_MESSAGE(__FILE__, __LINE__, "ftruncate failed: %s\n", + strerror(errno)); + failed_ = true; + if (FLAGS_memfs_malloc_abort_on_fail) { + CRASH("memfs_malloc_abort_on_fail is set\n"); + } + return NULL; + } + + // Note: size + extra does not overflow since: + // size + alignment < (1<<NBITS). + // and extra <= alignment + // therefore size + extra < (1<<NBITS) + void *result = mmap(0, size + extra, PROT_WRITE|PROT_READ, + MAP_SHARED, hugetlb_fd_, hugetlb_base_); + if (result == reinterpret_cast<void*>(MAP_FAILED)) { + if (!FLAGS_memfs_malloc_ignore_mmap_fail) { + TCMalloc_MESSAGE(__FILE__, __LINE__, "mmap failed: %s\n", + strerror(errno)); + failed_ = true; + if (FLAGS_memfs_malloc_abort_on_fail) { + CRASH("memfs_malloc_abort_on_fail is set\n"); + } + } + return NULL; + } + uintptr_t ptr = reinterpret_cast<uintptr_t>(result); + + // Adjust the return memory so it is aligned + size_t adjust = 0; + if ((ptr & (alignment - 1)) != 0) { + adjust = alignment - (ptr & (alignment - 1)); + } + ptr += adjust; + hugetlb_base_ += (size + extra); + + if (actual_size) { + *actual_size = size + extra - adjust; + } + + return reinterpret_cast<void*>(ptr); +} + +static void InitSystemAllocator() { + if (FLAGS_memfs_malloc_path.length()) { + char path[PATH_MAX]; + int rc = snprintf(path, sizeof(path), "%s.XXXXXX", + FLAGS_memfs_malloc_path.c_str()); + if (rc < 0 || rc >= sizeof(path)) { + CRASH("XX fatal: memfs_malloc_path too long\n"); + } + + int hugetlb_fd = mkstemp(path); + if (hugetlb_fd == -1) { + TCMalloc_MESSAGE(__FILE__, __LINE__, + "warning: unable to create memfs_malloc_path %s: %s\n", + path, strerror(errno)); + return; + } + + // Cleanup memory on process exit + if (unlink(path) == -1) { + CRASH("fatal: error unlinking memfs_malloc_path %s: %s\n", + path, strerror(errno)); + } + + // Use fstatfs to figure out the default page size for memfs + struct statfs sfs; + if (fstatfs(hugetlb_fd, &sfs) == -1) { + CRASH("fatal: error fstatfs of memfs_malloc_path: %s\n", + strerror(errno)); + } + int64 page_size = sfs.f_bsize; + + SysAllocator *alloc = new HugetlbSysAllocator(hugetlb_fd, page_size); + // Register ourselves with tcmalloc + RegisterSystemAllocator(alloc, 0); + } +} + +REGISTER_MODULE_INITIALIZER(memfs_malloc, { InitSystemAllocator(); }); + +#endif /* ifdef __linux */ diff --git a/third_party/tcmalloc/chromium/src/memory_region_map.cc b/third_party/tcmalloc/chromium/src/memory_region_map.cc new file mode 100644 index 0000000..05fdc06 --- /dev/null +++ b/third_party/tcmalloc/chromium/src/memory_region_map.cc @@ -0,0 +1,662 @@ +/* Copyright (c) 2006, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * Author: Maxim Lifantsev + */ + +// +// Background and key design points of MemoryRegionMap. +// +// MemoryRegionMap is a low-level module with quite atypical requirements that +// result in some degree of non-triviality of the implementation and design. +// +// MemoryRegionMap collects info about *all* memory regions created with +// mmap, munmap, mremap, sbrk. +// They key word above is 'all': all that are happening in a process +// during its lifetime frequently starting even before global object +// constructor execution. +// +// This is needed by the primary client of MemoryRegionMap: +// HeapLeakChecker uses the regions and the associated stack traces +// to figure out what part of the memory is the heap: +// if MemoryRegionMap were to miss some (early) regions, leak checking would +// stop working correctly. +// +// To accomplish the goal of functioning before/during global object +// constructor execution MemoryRegionMap is done as a singleton service +// that relies on own on-demand initialized static constructor-less data, +// and only relies on other low-level modules that can also function properly +// even before global object constructors run. +// +// Accomplishing the goal of collecting data about all mmap, munmap, mremap, +// sbrk occurrences is a more involved: conceptually to do this one needs to +// record some bits of data in particular about any mmap or sbrk call, +// but to do that one needs to allocate memory for that data at some point, +// but all memory allocations in the end themselves come from an mmap +// or sbrk call (that's how the address space of the process grows). +// +// Also note that we need to do all the above recording from +// within an mmap/sbrk hook which is sometimes/frequently is made by a memory +// allocator, including the allocator MemoryRegionMap itself must rely on. +// In the case of heap-checker usage this includes even the very first +// mmap/sbrk call happening in the program: heap-checker gets activated due to +// a link-time installed mmap/sbrk hook and it initializes MemoryRegionMap +// and asks it to record info about this very first call right from that +// very first hook invocation. +// +// MemoryRegionMap is doing its memory allocations via LowLevelAlloc: +// unlike more complex standard memory allocator, LowLevelAlloc cooperates with +// MemoryRegionMap by not holding any of its own locks while it calls mmap +// to get memory, thus we are able to call LowLevelAlloc from +// our mmap/sbrk hooks without causing a deadlock in it. +// For the same reason of deadlock prevention the locking in MemoryRegionMap +// itself is write-recursive which is an exception to Google's mutex usage. +// +// We still need to break the infinite cycle of mmap calling our hook, +// which asks LowLevelAlloc for memory to record this mmap, +// which (sometimes) causes mmap, which calls our hook, and so on. +// We do this as follows: on a recursive call of MemoryRegionMap's +// mmap/sbrk/mremap hook we record the data about the allocation in a +// static fixed-sized stack (saved_regions), when the recursion unwinds +// but before returning from the outer hook call we unwind this stack and +// move the data from saved_regions to its permanent place in the RegionSet, +// which can cause more allocations and mmap-s and recursion and unwinding, +// but the whole process ends eventually due to the fact that for the small +// allocations we are doing LowLevelAlloc reuses one mmap call and parcels out +// the memory it created to satisfy several of our allocation requests. +// + +// ========================================================================= // + +#include <config.h> + +#ifdef HAVE_UNISTD_H +#include <unistd.h> +#endif +#ifdef HAVE_INTTYPES_H +#include <inttypes.h> +#endif +#ifdef HAVE_MMAP +#include <sys/mman.h> +#elif !defined(MAP_FAILED) +#define MAP_FAILED -1 // the only thing we need from mman.h +#endif +#ifdef HAVE_PTHREAD +#include <pthread.h> // for pthread_t, pthread_self() +#endif + +#include <algorithm> +#include <set> + +#include "memory_region_map.h" + +#include "base/linux_syscall_support.h" +#include "base/logging.h" +#include "base/low_level_alloc.h" +#include "malloc_hook-inl.h" + +#include <google/stacktrace.h> +#include <google/malloc_hook.h> + +// MREMAP_FIXED is a linux extension. How it's used in this file, +// setting it to 0 is equivalent to saying, "This feature isn't +// supported", which is right. +#ifndef MREMAP_FIXED +# define MREMAP_FIXED 0 +#endif + +using std::max; + +// ========================================================================= // + +int MemoryRegionMap::client_count_ = 0; +int MemoryRegionMap::max_stack_depth_ = 0; +MemoryRegionMap::RegionSet* MemoryRegionMap::regions_ = NULL; +LowLevelAlloc::Arena* MemoryRegionMap::arena_ = NULL; +SpinLock MemoryRegionMap::lock_(SpinLock::LINKER_INITIALIZED); +SpinLock MemoryRegionMap::owner_lock_( // ACQUIRED_AFTER(lock_) + SpinLock::LINKER_INITIALIZED); +int MemoryRegionMap::recursion_count_ = 0; // GUARDED_BY(owner_lock_) +pthread_t MemoryRegionMap::lock_owner_tid_; // GUARDED_BY(owner_lock_) + +// ========================================================================= // + +// Simple hook into execution of global object constructors, +// so that we do not call pthread_self() when it does not yet work. +static bool libpthread_initialized = false; +static bool initializer = (libpthread_initialized = true, true); + +static inline bool current_thread_is(pthread_t should_be) { + // Before main() runs, there's only one thread, so we're always that thread + if (!libpthread_initialized) return true; + // this starts working only sometime well into global constructor execution: + return pthread_equal(pthread_self(), should_be); +} + +// ========================================================================= // + +// Constructor-less place-holder to store a RegionSet in. +union MemoryRegionMap::RegionSetRep { + char rep[sizeof(RegionSet)]; + void* align_it; // do not need a better alignment for 'rep' than this + RegionSet* region_set() { return reinterpret_cast<RegionSet*>(rep); } +}; + +// The bytes where MemoryRegionMap::regions_ will point to. +// We use RegionSetRep with noop c-tor so that global construction +// does not interfere. +static MemoryRegionMap::RegionSetRep regions_rep; + +// ========================================================================= // + +// Has InsertRegionLocked been called recursively +// (or rather should we *not* use regions_ to record a hooked mmap). +static bool recursive_insert = false; + +void MemoryRegionMap::Init(int max_stack_depth) { + RAW_VLOG(2, "MemoryRegionMap Init"); + RAW_CHECK(max_stack_depth >= 0, ""); + // Make sure we don't overflow the memory in region stacks: + RAW_CHECK(max_stack_depth <= kMaxStackDepth, + "need to increase kMaxStackDepth?"); + Lock(); + client_count_ += 1; + max_stack_depth_ = max(max_stack_depth_, max_stack_depth); + if (client_count_ > 1) { + // not first client: already did initialization-proper + Unlock(); + RAW_VLOG(2, "MemoryRegionMap Init increment done"); + return; + } + // Set our hooks and make sure no other hooks existed: + if (MallocHook::SetMmapHook(MmapHook) != NULL || + MallocHook::SetMremapHook(MremapHook) != NULL || + MallocHook::SetSbrkHook(SbrkHook) != NULL || + MallocHook::SetMunmapHook(MunmapHook) != NULL) { + RAW_LOG(FATAL, "Had other mmap/mremap/munmap/sbrk MallocHook-s set. " + "Make sure only one of MemoryRegionMap and the other " + "client is active."); + } + // We need to set recursive_insert since the NewArena call itself + // will already do some allocations with mmap which our hooks will catch + // recursive_insert allows us to buffer info about these mmap calls. + // Note that Init() can be (and is) sometimes called + // already from within an mmap/sbrk hook. + recursive_insert = true; + arena_ = LowLevelAlloc::NewArena(0, LowLevelAlloc::DefaultArena()); + recursive_insert = false; + HandleSavedRegionsLocked(&InsertRegionLocked); // flush the buffered ones + // Can't instead use HandleSavedRegionsLocked(&DoInsertRegionLocked) before + // recursive_insert = false; as InsertRegionLocked will also construct + // regions_ on demand for us. + Unlock(); + RAW_VLOG(2, "MemoryRegionMap Init done"); +} + +bool MemoryRegionMap::Shutdown() { + RAW_VLOG(2, "MemoryRegionMap Shutdown"); + Lock(); + RAW_CHECK(client_count_ > 0, ""); + client_count_ -= 1; + if (client_count_ != 0) { // not last client; need not really shutdown + Unlock(); + RAW_VLOG(2, "MemoryRegionMap Shutdown decrement done"); + return true; + } + CheckMallocHooks(); // we assume no other hooks + MallocHook::SetMmapHook(NULL); + MallocHook::SetMremapHook(NULL); + MallocHook::SetSbrkHook(NULL); + MallocHook::SetMunmapHook(NULL); + if (regions_) regions_->~RegionSet(); + regions_ = NULL; + bool deleted_arena = LowLevelAlloc::DeleteArena(arena_); + if (deleted_arena) { + arena_ = 0; + } else { + RAW_LOG(WARNING, "Can't delete LowLevelAlloc arena: it's being used"); + } + Unlock(); + RAW_VLOG(2, "MemoryRegionMap Shutdown done"); + return deleted_arena; +} + +void MemoryRegionMap::CheckMallocHooks() { + if (MallocHook::GetMmapHook() != MmapHook || + MallocHook::GetMunmapHook() != MunmapHook || + MallocHook::GetMremapHook() != MremapHook || + MallocHook::GetSbrkHook() != SbrkHook) { + RAW_LOG(FATAL, "Our mmap/mremap/munmap/sbrk MallocHook-s got changed."); + } +} + +// Invariants (once libpthread_initialized is true): +// * While lock_ is not held, recursion_count_ is 0 (and +// lock_owner_tid_ is the previous owner, but we don't rely on +// that). +// * recursion_count_ and lock_owner_tid_ are only written while +// both lock_ and owner_lock_ are held. They may be read under +// just owner_lock_. +// * At entry and exit of Lock() and Unlock(), the current thread +// owns lock_ iff pthread_equal(lock_owner_tid_, pthread_self()) +// && recursion_count_ > 0. +void MemoryRegionMap::Lock() { + { + SpinLockHolder l(&owner_lock_); + if (recursion_count_ > 0 && current_thread_is(lock_owner_tid_)) { + RAW_CHECK(lock_.IsHeld(), "Invariants violated"); + recursion_count_++; + RAW_CHECK(recursion_count_ <= 5, + "recursive lock nesting unexpectedly deep"); + return; + } + } + lock_.Lock(); + { + SpinLockHolder l(&owner_lock_); + RAW_CHECK(recursion_count_ == 0, + "Last Unlock didn't reset recursion_count_"); + if (libpthread_initialized) + lock_owner_tid_ = pthread_self(); + recursion_count_ = 1; + } +} + +void MemoryRegionMap::Unlock() { + SpinLockHolder l(&owner_lock_); + RAW_CHECK(recursion_count_ > 0, "unlock when not held"); + RAW_CHECK(lock_.IsHeld(), + "unlock when not held, and recursion_count_ is wrong"); + RAW_CHECK(current_thread_is(lock_owner_tid_), "unlock by non-holder"); + recursion_count_--; + if (recursion_count_ == 0) { + lock_.Unlock(); + } +} + +bool MemoryRegionMap::LockIsHeld() { + SpinLockHolder l(&owner_lock_); + return lock_.IsHeld() && current_thread_is(lock_owner_tid_); +} + +const MemoryRegionMap::Region* +MemoryRegionMap::DoFindRegionLocked(uintptr_t addr) { + RAW_CHECK(LockIsHeld(), "should be held (by this thread)"); + if (regions_ != NULL) { + Region sample; + sample.SetRegionSetKey(addr); + RegionSet::iterator region = regions_->lower_bound(sample); + if (region != regions_->end()) { + RAW_CHECK(addr <= region->end_addr, ""); + if (region->start_addr <= addr && addr < region->end_addr) { + return &(*region); + } + } + } + return NULL; +} + +bool MemoryRegionMap::FindRegion(uintptr_t addr, Region* result) { + Lock(); + const Region* region = DoFindRegionLocked(addr); + if (region != NULL) *result = *region; // create it as an independent copy + Unlock(); + return region != NULL; +} + +bool MemoryRegionMap::FindAndMarkStackRegion(uintptr_t stack_top, + Region* result) { + Lock(); + const Region* region = DoFindRegionLocked(stack_top); + if (region != NULL) { + RAW_VLOG(2, "Stack at %p is inside region %p..%p", + reinterpret_cast<void*>(stack_top), + reinterpret_cast<void*>(region->start_addr), + reinterpret_cast<void*>(region->end_addr)); + const_cast<Region*>(region)->set_is_stack(); // now we know + // cast is safe (set_is_stack does not change the set ordering key) + *result = *region; // create *result as an independent copy + } + Unlock(); + return region != NULL; +} + +MemoryRegionMap::RegionIterator MemoryRegionMap::BeginRegionLocked() { + RAW_CHECK(LockIsHeld(), "should be held (by this thread)"); + RAW_CHECK(regions_ != NULL, ""); + return regions_->begin(); +} + +MemoryRegionMap::RegionIterator MemoryRegionMap::EndRegionLocked() { + RAW_CHECK(LockIsHeld(), "should be held (by this thread)"); + RAW_CHECK(regions_ != NULL, ""); + return regions_->end(); +} + +inline void MemoryRegionMap::DoInsertRegionLocked(const Region& region) { + RAW_VLOG(4, "Inserting region %p..%p from %p", + reinterpret_cast<void*>(region.start_addr), + reinterpret_cast<void*>(region.end_addr), + reinterpret_cast<void*>(region.caller())); + RegionSet::const_iterator i = regions_->lower_bound(region); + if (i != regions_->end() && i->start_addr <= region.start_addr) { + RAW_DCHECK(region.end_addr <= i->end_addr, ""); // lower_bound ensures this + return; // 'region' is a subset of an already recorded region; do nothing + // We can be stricter and allow this only when *i has been created via + // an mmap with MAP_NORESERVE flag set. + } + if (DEBUG_MODE) { + RAW_CHECK(i == regions_->end() || !region.Overlaps(*i), + "Wow, overlapping memory regions"); + Region sample; + sample.SetRegionSetKey(region.start_addr); + i = regions_->lower_bound(sample); + RAW_CHECK(i == regions_->end() || !region.Overlaps(*i), + "Wow, overlapping memory regions"); + } + region.AssertIsConsistent(); // just making sure + // This inserts and allocates permanent storage for region + // and its call stack data: it's safe to do it now: + regions_->insert(region); + RAW_VLOG(4, "Inserted region %p..%p :", + reinterpret_cast<void*>(region.start_addr), + reinterpret_cast<void*>(region.end_addr)); + if (VLOG_IS_ON(4)) LogAllLocked(); +} + +// These variables are local to MemoryRegionMap::InsertRegionLocked() +// and MemoryRegionMap::HandleSavedRegionsLocked() +// and are file-level to ensure that they are initialized at load time. + +// Number of unprocessed region inserts. +static int saved_regions_count = 0; + +// Unprocessed inserts (must be big enough to hold all allocations that can +// be caused by a InsertRegionLocked call). +// Region has no constructor, so that c-tor execution does not interfere +// with the any-time use of the static memory behind saved_regions. +static MemoryRegionMap::Region saved_regions[20]; + +inline void MemoryRegionMap::HandleSavedRegionsLocked( + void (*insert_func)(const Region& region)) { + while (saved_regions_count > 0) { + // Making a local-var copy of the region argument to insert_func + // including its stack (w/o doing any memory allocations) is important: + // in many cases the memory in saved_regions + // will get written-to during the (*insert_func)(r) call below. + Region r = saved_regions[--saved_regions_count]; + (*insert_func)(r); + } +} + +inline void MemoryRegionMap::InsertRegionLocked(const Region& region) { + RAW_CHECK(LockIsHeld(), "should be held (by this thread)"); + // We can be called recursively, because RegionSet constructor + // and DoInsertRegionLocked() (called below) can call the allocator. + // recursive_insert tells us if that's the case. When this happens, + // region insertion information is recorded in saved_regions[], + // and taken into account when the recursion unwinds. + // Do the insert: + if (recursive_insert) { // recursion: save in saved_regions + RAW_VLOG(4, "Saving recursive insert of region %p..%p from %p", + reinterpret_cast<void*>(region.start_addr), + reinterpret_cast<void*>(region.end_addr), + reinterpret_cast<void*>(region.caller())); + RAW_CHECK(saved_regions_count < arraysize(saved_regions), ""); + // Copy 'region' to saved_regions[saved_regions_count] + // together with the contents of its call_stack, + // then increment saved_regions_count. + saved_regions[saved_regions_count++] = region; + } else { // not a recusrive call + if (regions_ == NULL) { // init regions_ + RAW_VLOG(4, "Initializing region set"); + regions_ = regions_rep.region_set(); + recursive_insert = true; + new(regions_) RegionSet(); + HandleSavedRegionsLocked(&DoInsertRegionLocked); + recursive_insert = false; + } + recursive_insert = true; + // Do the actual insertion work to put new regions into regions_: + DoInsertRegionLocked(region); + HandleSavedRegionsLocked(&DoInsertRegionLocked); + recursive_insert = false; + } +} + +// We strip out different number of stack frames in debug mode +// because less inlining happens in that case +#ifdef NDEBUG +static const int kStripFrames = 1; +#else +static const int kStripFrames = 3; +#endif + +void MemoryRegionMap::RecordRegionAddition(const void* start, size_t size) { + // Record start/end info about this memory acquisition call in a new region: + Region region; + region.Create(start, size); + // First get the call stack info into the local varible 'region': + const int depth = + max_stack_depth_ > 0 + ? MallocHook::GetCallerStackTrace(const_cast<void**>(region.call_stack), + max_stack_depth_, kStripFrames + 1) + : 0; + region.set_call_stack_depth(depth); // record stack info fully + RAW_VLOG(2, "New global region %p..%p from %p", + reinterpret_cast<void*>(region.start_addr), + reinterpret_cast<void*>(region.end_addr), + reinterpret_cast<void*>(region.caller())); + // Note: none of the above allocates memory. + Lock(); // recursively lock + InsertRegionLocked(region); + // This will (eventually) allocate storage for and copy over the stack data + // from region.call_stack_data_ that is pointed by region.call_stack(). + Unlock(); +} + +void MemoryRegionMap::RecordRegionRemoval(const void* start, size_t size) { + Lock(); + if (recursive_insert) { + // First remove the removed region from saved_regions, if it's + // there, to prevent overrunning saved_regions in recursive + // map/unmap call sequences, and also from later inserting regions + // which have already been unmapped. + uintptr_t start_addr = reinterpret_cast<uintptr_t>(start); + uintptr_t end_addr = start_addr + size; + int put_pos = 0; + int old_count = saved_regions_count; + for (int i = 0; i < old_count; ++i, ++put_pos) { + Region& r = saved_regions[i]; + if (r.start_addr == start_addr && r.end_addr == end_addr) { + // An exact match, so it's safe to remove. + --saved_regions_count; + --put_pos; + RAW_VLOG(2, ("Insta-Removing saved region %p..%p; " + "now have %d saved regions"), + reinterpret_cast<void*>(start_addr), + reinterpret_cast<void*>(end_addr), + saved_regions_count); + } else { + if (put_pos < i) { + saved_regions[put_pos] = saved_regions[i]; + } + } + } + } + if (regions_ == NULL) { // We must have just unset the hooks, + // but this thread was already inside the hook. + Unlock(); + return; + } + if (!recursive_insert) { + HandleSavedRegionsLocked(&InsertRegionLocked); + } + // first handle adding saved regions if any + uintptr_t start_addr = reinterpret_cast<uintptr_t>(start); + uintptr_t end_addr = start_addr + size; + // subtract start_addr, end_addr from all the regions + RAW_VLOG(2, "Removing global region %p..%p; have %"PRIuS" regions", + reinterpret_cast<void*>(start_addr), + reinterpret_cast<void*>(end_addr), + regions_->size()); + Region sample; + sample.SetRegionSetKey(start_addr); + // Only iterate over the regions that might overlap start_addr..end_addr: + for (RegionSet::iterator region = regions_->lower_bound(sample); + region != regions_->end() && region->start_addr < end_addr; + /*noop*/) { + RAW_VLOG(5, "Looking at region %p..%p", + reinterpret_cast<void*>(region->start_addr), + reinterpret_cast<void*>(region->end_addr)); + if (start_addr <= region->start_addr && + region->end_addr <= end_addr) { // full deletion + RAW_VLOG(4, "Deleting region %p..%p", + reinterpret_cast<void*>(region->start_addr), + reinterpret_cast<void*>(region->end_addr)); + RegionSet::iterator d = region; + ++region; + regions_->erase(d); + continue; + } else if (region->start_addr < start_addr && + end_addr < region->end_addr) { // cutting-out split + RAW_VLOG(4, "Splitting region %p..%p in two", + reinterpret_cast<void*>(region->start_addr), + reinterpret_cast<void*>(region->end_addr)); + // Make another region for the start portion: + // The new region has to be the start portion because we can't + // just modify region->end_addr as it's the sorting key. + Region r = *region; + r.set_end_addr(start_addr); + InsertRegionLocked(r); + // cut *region from start: + const_cast<Region&>(*region).set_start_addr(end_addr); + } else if (end_addr > region->start_addr && + start_addr <= region->start_addr) { // cut from start + RAW_VLOG(4, "Start-chopping region %p..%p", + reinterpret_cast<void*>(region->start_addr), + reinterpret_cast<void*>(region->end_addr)); + const_cast<Region&>(*region).set_start_addr(end_addr); + } else if (start_addr > region->start_addr && + start_addr < region->end_addr) { // cut from end + RAW_VLOG(4, "End-chopping region %p..%p", + reinterpret_cast<void*>(region->start_addr), + reinterpret_cast<void*>(region->end_addr)); + // Can't just modify region->end_addr (it's the sorting key): + Region r = *region; + r.set_end_addr(start_addr); + RegionSet::iterator d = region; + ++region; + // It's safe to erase before inserting since r is independent of *d: + // r contains an own copy of the call stack: + regions_->erase(d); + InsertRegionLocked(r); + continue; + } + ++region; + } + RAW_VLOG(4, "Removed region %p..%p; have %"PRIuS" regions", + reinterpret_cast<void*>(start_addr), + reinterpret_cast<void*>(end_addr), + regions_->size()); + if (VLOG_IS_ON(4)) LogAllLocked(); + Unlock(); +} + +void MemoryRegionMap::MmapHook(const void* result, + const void* start, size_t size, + int prot, int flags, + int fd, off_t offset) { + // TODO(maxim): replace all 0x%"PRIxS" by %p when RAW_VLOG uses a safe + // snprintf reimplementation that does not malloc to pretty-print NULL + RAW_VLOG(2, "MMap = 0x%"PRIxPTR" of %"PRIuS" at %llu " + "prot %d flags %d fd %d offs %lld", + reinterpret_cast<uintptr_t>(result), size, + reinterpret_cast<uint64>(start), prot, flags, fd, + static_cast<int64>(offset)); + if (result != reinterpret_cast<void*>(MAP_FAILED) && size != 0) { + RecordRegionAddition(result, size); + } +} + +void MemoryRegionMap::MunmapHook(const void* ptr, size_t size) { + RAW_VLOG(2, "MUnmap of %p %"PRIuS"", ptr, size); + if (size != 0) { + RecordRegionRemoval(ptr, size); + } +} + +void MemoryRegionMap::MremapHook(const void* result, + const void* old_addr, size_t old_size, + size_t new_size, int flags, + const void* new_addr) { + RAW_VLOG(2, "MRemap = 0x%"PRIxPTR" of 0x%"PRIxPTR" %"PRIuS" " + "to %"PRIuS" flags %d new_addr=0x%"PRIxPTR, + (uintptr_t)result, (uintptr_t)old_addr, + old_size, new_size, flags, + flags & MREMAP_FIXED ? (uintptr_t)new_addr : 0); + if (result != reinterpret_cast<void*>(-1)) { + RecordRegionRemoval(old_addr, old_size); + RecordRegionAddition(result, new_size); + } +} + +extern "C" void* __sbrk(ptrdiff_t increment); // defined in libc + +void MemoryRegionMap::SbrkHook(const void* result, ptrdiff_t increment) { + RAW_VLOG(2, "Sbrk = 0x%"PRIxPTR" of %"PRIdS"", (uintptr_t)result, increment); + if (result != reinterpret_cast<void*>(-1)) { + if (increment > 0) { + void* new_end = sbrk(0); + RecordRegionAddition(result, reinterpret_cast<uintptr_t>(new_end) - + reinterpret_cast<uintptr_t>(result)); + } else if (increment < 0) { + void* new_end = sbrk(0); + RecordRegionRemoval(new_end, reinterpret_cast<uintptr_t>(result) - + reinterpret_cast<uintptr_t>(new_end)); + } + } +} + +void MemoryRegionMap::LogAllLocked() { + RAW_CHECK(LockIsHeld(), "should be held (by this thread)"); + RAW_LOG(INFO, "List of regions:"); + uintptr_t previous = 0; + for (RegionSet::const_iterator r = regions_->begin(); + r != regions_->end(); ++r) { + RAW_LOG(INFO, "Memory region 0x%"PRIxPTR"..0x%"PRIxPTR" " + "from 0x%"PRIxPTR" stack=%d", + r->start_addr, r->end_addr, r->caller(), r->is_stack); + RAW_CHECK(previous < r->end_addr, "wow, we messed up the set order"); + // this must be caused by uncontrolled recursive operations on regions_ + previous = r->end_addr; + } + RAW_LOG(INFO, "End of regions list"); +} diff --git a/third_party/tcmalloc/chromium/src/memory_region_map.h b/third_party/tcmalloc/chromium/src/memory_region_map.h new file mode 100644 index 0000000..1289764 --- /dev/null +++ b/third_party/tcmalloc/chromium/src/memory_region_map.h @@ -0,0 +1,335 @@ +/* Copyright (c) 2006, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * Author: Maxim Lifantsev + */ + +#ifndef BASE_MEMORY_REGION_MAP_H_ +#define BASE_MEMORY_REGION_MAP_H_ + +#include <config.h> + +#ifdef HAVE_PTHREAD +#include <pthread.h> +#endif +#include <set> +#include "base/stl_allocator.h" +#include "base/spinlock.h" +#include "base/thread_annotations.h" +#include "base/low_level_alloc.h" + +// TODO(maxim): add a unittest: +// execute a bunch of mmaps and compare memory map what strace logs +// execute a bunch of mmap/munmup and compare memory map with +// own accounting of what those mmaps generated + +// Thread-safe class to collect and query the map of all memory regions +// in a process that have been created with mmap, munmap, mremap, sbrk. +// For each memory region, we keep track of (and provide to users) +// the stack trace that allocated that memory region. +// The recorded stack trace depth is bounded by +// a user-supplied max_stack_depth parameter of Init(). +// After initialization with Init() +// (which can happened even before global object constructor execution) +// we collect the map by installing and monitoring MallocHook-s +// to mmap, munmap, mremap, sbrk. +// At any time one can query this map via provided interface. +// For more details on the design of MemoryRegionMap +// see the comment at the top of our .cc file. +class MemoryRegionMap { + private: + // Max call stack recording depth supported by Init(). Set it to be + // high enough for all our clients. Note: we do not define storage + // for this (doing that requires special handling in windows), so + // don't take the address of it! + static const int kMaxStackDepth = 32; + + public: + // interface ================================================================ + + // Every client of MemoryRegionMap must call Init() before first use, + // and Shutdown() after last use. This allows us to reference count + // this (singleton) class properly. MemoryRegionMap assumes it's the + // only client of MallocHooks, so a client can only register other + // MallocHooks after calling Init() and must unregister them before + // calling Shutdown(). + + // Initialize this module to record memory allocation stack traces. + // Stack traces that have more than "max_stack_depth" frames + // are automatically shrunk to "max_stack_depth" when they are recorded. + // Init() can be called more than once w/o harm, largest max_stack_depth + // will be the effective one. + // It will install mmap, munmap, mremap, sbrk hooks + // and initialize arena_ and our hook and locks, hence one can use + // MemoryRegionMap::Lock()/Unlock() to manage the locks. + // Uses Lock/Unlock inside. + static void Init(int max_stack_depth); + + // Try to shutdown this module undoing what Init() did. + // Returns true iff could do full shutdown (or it was not attempted). + // Full shutdown is attempted when the number of Shutdown() calls equals + // the number of Init() calls. + static bool Shutdown(); + + // Check that our hooks are still in place and crash if not. + // No need for locking. + static void CheckMallocHooks(); + + // Locks to protect our internal data structures. + // These also protect use of arena_ if our Init() has been done. + // The lock is recursive. + static void Lock() EXCLUSIVE_LOCK_FUNCTION(lock_); + static void Unlock() UNLOCK_FUNCTION(lock_); + + // Returns true when the lock is held by this thread (for use in RAW_CHECK-s). + static bool LockIsHeld(); + + // Locker object that acquires the MemoryRegionMap::Lock + // for the duration of its lifetime (a C++ scope). + class LockHolder { + public: + LockHolder() { Lock(); } + ~LockHolder() { Unlock(); } + private: + DISALLOW_EVIL_CONSTRUCTORS(LockHolder); + }; + + // A memory region that we know about through malloc_hook-s. + // This is essentially an interface through which MemoryRegionMap + // exports the collected data to its clients. Thread-compatible. + struct Region { + uintptr_t start_addr; // region start address + uintptr_t end_addr; // region end address + int call_stack_depth; // number of caller stack frames that we saved + const void* call_stack[kMaxStackDepth]; // caller address stack array + // filled to call_stack_depth size + bool is_stack; // does this region contain a thread's stack: + // a user of MemoryRegionMap supplies this info + + // Convenience accessor for call_stack[0], + // i.e. (the program counter of) the immediate caller + // of this region's allocation function, + // but it also returns NULL when call_stack_depth is 0, + // i.e whe we weren't able to get the call stack. + // This usually happens in recursive calls, when the stack-unwinder + // calls mmap() which in turn calls the stack-unwinder. + uintptr_t caller() const { + return reinterpret_cast<uintptr_t>(call_stack_depth >= 1 + ? call_stack[0] : NULL); + } + + // Return true iff this region overlaps region x. + bool Overlaps(const Region& x) const { + return start_addr < x.end_addr && end_addr > x.start_addr; + } + + private: // helpers for MemoryRegionMap + friend class MemoryRegionMap; + + // The ways we create Region-s: + void Create(const void* start, size_t size) { + start_addr = reinterpret_cast<uintptr_t>(start); + end_addr = start_addr + size; + is_stack = false; // not a stack till marked such + call_stack_depth = 0; + AssertIsConsistent(); + } + void set_call_stack_depth(int depth) { + RAW_DCHECK(call_stack_depth == 0, ""); // only one such set is allowed + call_stack_depth = depth; + AssertIsConsistent(); + } + + // The ways we modify Region-s: + void set_is_stack() { is_stack = true; } + void set_start_addr(uintptr_t addr) { + start_addr = addr; + AssertIsConsistent(); + } + void set_end_addr(uintptr_t addr) { + end_addr = addr; + AssertIsConsistent(); + } + + // Verifies that *this contains consistent data, crashes if not the case. + void AssertIsConsistent() const { + RAW_DCHECK(start_addr < end_addr, ""); + RAW_DCHECK(call_stack_depth >= 0 && + call_stack_depth <= kMaxStackDepth, ""); + } + + // Post-default construction helper to make a Region suitable + // for searching in RegionSet regions_. + void SetRegionSetKey(uintptr_t addr) { + // make sure *this has no usable data: + if (DEBUG_MODE) memset(this, 0xFF, sizeof(*this)); + end_addr = addr; + } + + // Note: call_stack[kMaxStackDepth] as a member lets us make Region + // a simple self-contained struct with correctly behaving bit-vise copying. + // This simplifies the code of this module but wastes some memory: + // in most-often use case of this module (leak checking) + // only one call_stack element out of kMaxStackDepth is actually needed. + // Making the storage for call_stack variable-sized, + // substantially complicates memory management for the Region-s: + // as they need to be created and manipulated for some time + // w/o any memory allocations, yet are also given out to the users. + }; + + // Find the region that covers addr and write its data into *result if found, + // in which case *result gets filled so that it stays fully functional + // even when the underlying region gets removed from MemoryRegionMap. + // Returns success. Uses Lock/Unlock inside. + static bool FindRegion(uintptr_t addr, Region* result); + + // Find the region that contains stack_top, mark that region as + // a stack region, and write its data into *result if found, + // in which case *result gets filled so that it stays fully functional + // even when the underlying region gets removed from MemoryRegionMap. + // Returns success. Uses Lock/Unlock inside. + static bool FindAndMarkStackRegion(uintptr_t stack_top, Region* result); + + private: // our internal types ============================================== + + // Region comparator for sorting with STL + struct RegionCmp { + bool operator()(const Region& x, const Region& y) const { + return x.end_addr < y.end_addr; + } + }; + + // We allocate STL objects in our own arena. + struct MyAllocator { + static void *Allocate(size_t n) { + return LowLevelAlloc::AllocWithArena(n, arena_); + } + static void Free(const void *p) { + LowLevelAlloc::Free(const_cast<void*>(p)); + } + }; + + // Set of the memory regions + typedef std::set<Region, RegionCmp, + STL_Allocator<Region, MyAllocator> > RegionSet; + + public: // more in-depth interface ========================================== + + // STL iterator with values of Region + typedef RegionSet::const_iterator RegionIterator; + + // Return the begin/end iterators to all the regions. + // These need Lock/Unlock protection around their whole usage (loop). + // Even when the same thread causes modifications during such a loop + // (which are permitted due to recursive locking) + // the loop iterator will still be valid as long as its region + // has not been deleted, but EndRegionLocked should be + // re-evaluated whenever the set of regions has changed. + static RegionIterator BeginRegionLocked(); + static RegionIterator EndRegionLocked(); + + // Effectively private type from our .cc ================================= + // public to let us declare global objects: + union RegionSetRep; + + private: + + // representation =========================================================== + + // Counter of clients of this module that have called Init(). + static int client_count_; + + // Maximal number of caller stack frames to save (>= 0). + static int max_stack_depth_; + + // Arena used for our allocations in regions_. + static LowLevelAlloc::Arena* arena_; + + // Set of the mmap/sbrk/mremap-ed memory regions + // To be accessed *only* when Lock() is held. + // Hence we protect the non-recursive lock used inside of arena_ + // with our recursive Lock(). This lets a user prevent deadlocks + // when threads are stopped by ListAllProcessThreads at random spots + // simply by acquiring our recursive Lock() before that. + static RegionSet* regions_; + + // Lock to protect regions_ variable and the data behind. + static SpinLock lock_; + // Lock to protect the recursive lock itself. + static SpinLock owner_lock_; + + // Recursion count for the recursive lock. + static int recursion_count_; + // The thread id of the thread that's inside the recursive lock. + static pthread_t lock_owner_tid_; + + // helpers ================================================================== + + // Helper for FindRegion and FindAndMarkStackRegion: + // returns the region covering 'addr' or NULL; assumes our lock_ is held. + static const Region* DoFindRegionLocked(uintptr_t addr); + + // Verifying wrapper around regions_->insert(region) + // To be called to do InsertRegionLocked's work only! + inline static void DoInsertRegionLocked(const Region& region); + // Handle regions saved by InsertRegionLocked into a tmp static array + // by calling insert_func on them. + inline static void HandleSavedRegionsLocked( + void (*insert_func)(const Region& region)); + // Wrapper around DoInsertRegionLocked + // that handles the case of recursive allocator calls. + inline static void InsertRegionLocked(const Region& region); + + // Record addition of a memory region at address "start" of size "size" + // (called from our mmap/mremap/sbrk hooks). + static void RecordRegionAddition(const void* start, size_t size); + // Record deletion of a memory region at address "start" of size "size" + // (called from our munmap/mremap/sbrk hooks). + static void RecordRegionRemoval(const void* start, size_t size); + + // Hooks for MallocHook + static void MmapHook(const void* result, + const void* start, size_t size, + int prot, int flags, + int fd, off_t offset); + static void MunmapHook(const void* ptr, size_t size); + static void MremapHook(const void* result, const void* old_addr, + size_t old_size, size_t new_size, int flags, + const void* new_addr); + static void SbrkHook(const void* result, ptrdiff_t increment); + + // Log all memory regions; Useful for debugging only. + // Assumes Lock() is held + static void LogAllLocked(); + + DISALLOW_EVIL_CONSTRUCTORS(MemoryRegionMap); +}; + +#endif // BASE_MEMORY_REGION_MAP_H_ diff --git a/third_party/tcmalloc/chromium/src/packed-cache-inl.h b/third_party/tcmalloc/chromium/src/packed-cache-inl.h new file mode 100644 index 0000000..77d6313 --- /dev/null +++ b/third_party/tcmalloc/chromium/src/packed-cache-inl.h @@ -0,0 +1,228 @@ +// Copyright (c) 2007, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Geoff Pike +// +// This file provides a minimal cache that can hold a <key, value> pair +// with little if any wasted space. The types of the key and value +// must be unsigned integral types or at least have unsigned semantics +// for >>, casting, and similar operations. +// +// Synchronization is not provided. However, the cache is implemented +// as an array of cache entries whose type is chosen at compile time. +// If a[i] is atomic on your hardware for the chosen array type then +// raciness will not necessarily lead to bugginess. The cache entries +// must be large enough to hold a partial key and a value packed +// together. The partial keys are bit strings of length +// kKeybits - kHashbits, and the values are bit strings of length kValuebits. +// +// In an effort to use minimal space, every cache entry represents +// some <key, value> pair; the class provides no way to mark a cache +// entry as empty or uninitialized. In practice, you may want to have +// reserved keys or values to get around this limitation. For example, in +// tcmalloc's PageID-to-sizeclass cache, a value of 0 is used as +// "unknown sizeclass." +// +// Usage Considerations +// -------------------- +// +// kHashbits controls the size of the cache. The best value for +// kHashbits will of course depend on the application. Perhaps try +// tuning the value of kHashbits by measuring different values on your +// favorite benchmark. Also remember not to be a pig; other +// programs that need resources may suffer if you are. +// +// The main uses for this class will be when performance is +// critical and there's a convenient type to hold the cache's +// entries. As described above, the number of bits required +// for a cache entry is (kKeybits - kHashbits) + kValuebits. Suppose +// kKeybits + kValuebits is 43. Then it probably makes sense to +// chose kHashbits >= 11 so that cache entries fit in a uint32. +// +// On the other hand, suppose kKeybits = kValuebits = 64. Then +// using this class may be less worthwhile. You'll probably +// be using 128 bits for each entry anyway, so maybe just pick +// a hash function, H, and use an array indexed by H(key): +// void Put(K key, V value) { a_[H(key)] = pair<K, V>(key, value); } +// V GetOrDefault(K key, V default) { const pair<K, V> &p = a_[H(key)]; ... } +// etc. +// +// Further Details +// --------------- +// +// For caches used only by one thread, the following is true: +// 1. For a cache c, +// (c.Put(key, value), c.GetOrDefault(key, 0)) == value +// and +// (c.Put(key, value), <...>, c.GetOrDefault(key, 0)) == value +// if the elided code contains no c.Put calls. +// +// 2. Has(key) will return false if no <key, value> pair with that key +// has ever been Put. However, a newly initialized cache will have +// some <key, value> pairs already present. When you create a new +// cache, you must specify an "initial value." The initialization +// procedure is equivalent to Clear(initial_value), which is +// equivalent to Put(k, initial_value) for all keys k from 0 to +// 2^kHashbits - 1. +// +// 3. If key and key' differ then the only way Put(key, value) may +// cause Has(key') to change is that Has(key') may change from true to +// false. Furthermore, a Put() call that doesn't change Has(key') +// doesn't change GetOrDefault(key', ...) either. +// +// Implementation details: +// +// This is a direct-mapped cache with 2^kHashbits entries; the hash +// function simply takes the low bits of the key. We store whole keys +// if a whole key plus a whole value fits in an entry. Otherwise, an +// entry is the high bits of a key and a value, packed together. +// E.g., a 20 bit key and a 7 bit value only require a uint16 for each +// entry if kHashbits >= 11. +// +// Alternatives to this scheme will be added as needed. + +#ifndef TCMALLOC_PACKED_CACHE_INL_H_ +#define TCMALLOC_PACKED_CACHE_INL_H_ + +#include "base/basictypes.h" // for COMPILE_ASSERT +#include "internal_logging.h" + +// A safe way of doing "(1 << n) - 1" -- without worrying about overflow +// Note this will all be resolved to a constant expression at compile-time +#define N_ONES_(IntType, N) \ + ( (N) == 0 ? 0 : ((static_cast<IntType>(1) << ((N)-1))-1 + \ + (static_cast<IntType>(1) << ((N)-1))) ) + +// The types K and V provide upper bounds on the number of valid keys +// and values, but we explicitly require the keys to be less than +// 2^kKeybits and the values to be less than 2^kValuebits. The size of +// the table is controlled by kHashbits, and the type of each entry in +// the cache is T. See also the big comment at the top of the file. +template <int kKeybits, typename T> +class PackedCache { + public: + typedef uintptr_t K; + typedef size_t V; + static const int kHashbits = 12; + static const int kValuebits = 7; + static const bool kUseWholeKeys = kKeybits + kValuebits <= 8 * sizeof(T); + + explicit PackedCache(V initial_value) { + COMPILE_ASSERT(kKeybits <= sizeof(K) * 8, key_size); + COMPILE_ASSERT(kValuebits <= sizeof(V) * 8, value_size); + COMPILE_ASSERT(kHashbits <= kKeybits, hash_function); + COMPILE_ASSERT(kKeybits - kHashbits + kValuebits <= kTbits, + entry_size_must_be_big_enough); + Clear(initial_value); + } + + void Put(K key, V value) { + ASSERT(key == (key & kKeyMask)); + ASSERT(value == (value & kValueMask)); + array_[Hash(key)] = KeyToUpper(key) | value; + } + + bool Has(K key) const { + ASSERT(key == (key & kKeyMask)); + return KeyMatch(array_[Hash(key)], key); + } + + V GetOrDefault(K key, V default_value) const { + // As with other code in this class, we touch array_ as few times + // as we can. Assuming entries are read atomically (e.g., their + // type is uintptr_t on most hardware) then certain races are + // harmless. + ASSERT(key == (key & kKeyMask)); + T entry = array_[Hash(key)]; + return KeyMatch(entry, key) ? EntryToValue(entry) : default_value; + } + + void Clear(V value) { + ASSERT(value == (value & kValueMask)); + for (int i = 0; i < 1 << kHashbits; i++) { + ASSERT(kUseWholeKeys || KeyToUpper(i) == 0); + array_[i] = kUseWholeKeys ? (value | KeyToUpper(i)) : value; + } + } + + private: + // We are going to pack a value and the upper part of a key (or a + // whole key) into an entry of type T. The UPPER type is for the + // upper part of a key, after the key has been masked and shifted + // for inclusion in an entry. + typedef T UPPER; + + static V EntryToValue(T t) { return t & kValueMask; } + + // If we have space for a whole key, we just shift it left. + // Otherwise kHashbits determines where in a K to find the upper + // part of the key, and kValuebits determines where in the entry to + // put it. + static UPPER KeyToUpper(K k) { + if (kUseWholeKeys) { + return static_cast<T>(k) << kValuebits; + } else { + const int shift = kHashbits - kValuebits; + // Assume kHashbits >= kValuebits. It'd be easy to lift this assumption. + return static_cast<T>(k >> shift) & kUpperMask; + } + } + + static size_t Hash(K key) { + return static_cast<size_t>(key) & N_ONES_(size_t, kHashbits); + } + + // Does the entry match the relevant part of the given key? + static bool KeyMatch(T entry, K key) { + return kUseWholeKeys ? + (entry >> kValuebits == key) : + ((KeyToUpper(key) ^ entry) & kUpperMask) == 0; + } + + static const int kTbits = 8 * sizeof(T); + static const int kUpperbits = kUseWholeKeys ? kKeybits : kKeybits - kHashbits; + + // For masking a K. + static const K kKeyMask = N_ONES_(K, kKeybits); + + // For masking a T. + static const T kUpperMask = N_ONES_(T, kUpperbits) << kValuebits; + + // For masking a V or a T. + static const V kValueMask = N_ONES_(V, kValuebits); + + // array_ is the cache. Its elements are volatile because any + // thread can write any array element at any time. + volatile T array_[1 << kHashbits]; +}; + +#undef N_ONES_ + +#endif // TCMALLOC_PACKED_CACHE_INL_H_ diff --git a/third_party/tcmalloc/chromium/src/page_heap.cc b/third_party/tcmalloc/chromium/src/page_heap.cc new file mode 100644 index 0000000..1e63cb9 --- /dev/null +++ b/third_party/tcmalloc/chromium/src/page_heap.cc @@ -0,0 +1,519 @@ +// Copyright (c) 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Sanjay Ghemawat <opensource@google.com> + +#include <config.h> +#include "page_heap.h" + +#include "static_vars.h" +#include "system-alloc.h" + +DEFINE_double(tcmalloc_release_rate, + EnvToDouble("TCMALLOC_RELEASE_RATE", 1.0), + "Rate at which we release unused memory to the system. " + "Zero means we never release memory back to the system. " + "Increase this flag to return memory faster; decrease it " + "to return memory slower. Reasonable rates are in the " + "range [0,10]"); + +namespace tcmalloc { + +PageHeap::PageHeap() + : pagemap_(MetaDataAlloc), + pagemap_cache_(0), + scavenge_counter_(0), + // Start scavenging at kMaxPages list + release_index_(kMaxPages) { + COMPILE_ASSERT(kNumClasses <= (1 << PageMapCache::kValuebits), valuebits); + DLL_Init(&large_.normal); + DLL_Init(&large_.returned); + for (int i = 0; i < kMaxPages; i++) { + DLL_Init(&free_[i].normal); + DLL_Init(&free_[i].returned); + } +} + +Span* PageHeap::New(Length n) { + ASSERT(Check()); + ASSERT(n > 0); + + // Find first size >= n that has a non-empty list + for (Length s = n; s < kMaxPages; s++) { + Span* ll = &free_[s].normal; + // If we're lucky, ll is non-empty, meaning it has a suitable span. + if (!DLL_IsEmpty(ll)) { + ASSERT(ll->next->location == Span::ON_NORMAL_FREELIST); + return Carve(ll->next, n); + } + // Alternatively, maybe there's a usable returned span. + ll = &free_[s].returned; + if (!DLL_IsEmpty(ll)) { + ASSERT(ll->next->location == Span::ON_RETURNED_FREELIST); + return Carve(ll->next, n); + } + // Still no luck, so keep looking in larger classes. + } + + Span* result = AllocLarge(n); + if (result != NULL) return result; + + // Grow the heap and try again + if (!GrowHeap(n)) { + ASSERT(Check()); + return NULL; + } + + return AllocLarge(n); +} + +Span* PageHeap::AllocLarge(Length n) { + // find the best span (closest to n in size). + // The following loops implements address-ordered best-fit. + Span *best = NULL; + + // Search through normal list + for (Span* span = large_.normal.next; + span != &large_.normal; + span = span->next) { + if (span->length >= n) { + if ((best == NULL) + || (span->length < best->length) + || ((span->length == best->length) && (span->start < best->start))) { + best = span; + ASSERT(best->location == Span::ON_NORMAL_FREELIST); + } + } + } + + // Search through released list in case it has a better fit + for (Span* span = large_.returned.next; + span != &large_.returned; + span = span->next) { + if (span->length >= n) { + if ((best == NULL) + || (span->length < best->length) + || ((span->length == best->length) && (span->start < best->start))) { + best = span; + ASSERT(best->location == Span::ON_RETURNED_FREELIST); + } + } + } + + return best == NULL ? NULL : Carve(best, n); +} + +Span* PageHeap::Split(Span* span, Length n) { + ASSERT(0 < n); + ASSERT(n < span->length); + ASSERT(span->location == Span::IN_USE); + ASSERT(span->sizeclass == 0); + Event(span, 'T', n); + + const int extra = span->length - n; + Span* leftover = NewSpan(span->start + n, extra); + ASSERT(leftover->location == Span::IN_USE); + Event(leftover, 'U', extra); + RecordSpan(leftover); + pagemap_.set(span->start + n - 1, span); // Update map from pageid to span + span->length = n; + + return leftover; +} + +Span* PageHeap::Carve(Span* span, Length n) { + ASSERT(n > 0); + ASSERT(span->location != Span::IN_USE); + const int old_location = span->location; + RemoveFromFreeList(span); + span->location = Span::IN_USE; + Event(span, 'A', n); + + const int extra = span->length - n; + ASSERT(extra >= 0); + if (extra > 0) { + Span* leftover = NewSpan(span->start + n, extra); + leftover->location = old_location; + Event(leftover, 'S', extra); + RecordSpan(leftover); + PrependToFreeList(leftover); // Skip coalescing - no candidates possible + span->length = n; + pagemap_.set(span->start + n - 1, span); + } + ASSERT(Check()); + return span; +} + +void PageHeap::Delete(Span* span) { + ASSERT(Check()); + ASSERT(span->location == Span::IN_USE); + ASSERT(span->length > 0); + ASSERT(GetDescriptor(span->start) == span); + ASSERT(GetDescriptor(span->start + span->length - 1) == span); + const Length n = span->length; + span->sizeclass = 0; + span->sample = 0; + span->location = Span::ON_NORMAL_FREELIST; + Event(span, 'D', span->length); + MergeIntoFreeList(span); // Coalesces if possible + IncrementalScavenge(n); + ASSERT(Check()); +} + +void PageHeap::MergeIntoFreeList(Span* span) { + ASSERT(span->location != Span::IN_USE); + + // Coalesce -- we guarantee that "p" != 0, so no bounds checking + // necessary. We do not bother resetting the stale pagemap + // entries for the pieces we are merging together because we only + // care about the pagemap entries for the boundaries. + // + // Note that only similar spans are merged together. For example, + // we do not coalesce "returned" spans with "normal" spans. + const PageID p = span->start; + const Length n = span->length; + Span* prev = GetDescriptor(p-1); + if (prev != NULL && prev->location == span->location) { + // Merge preceding span into this span + ASSERT(prev->start + prev->length == p); + const Length len = prev->length; + RemoveFromFreeList(prev); + DeleteSpan(prev); + span->start -= len; + span->length += len; + pagemap_.set(span->start, span); + Event(span, 'L', len); + } + Span* next = GetDescriptor(p+n); + if (next != NULL && next->location == span->location) { + // Merge next span into this span + ASSERT(next->start == p+n); + const Length len = next->length; + RemoveFromFreeList(next); + DeleteSpan(next); + span->length += len; + pagemap_.set(span->start + span->length - 1, span); + Event(span, 'R', len); + } + + PrependToFreeList(span); +} + +void PageHeap::PrependToFreeList(Span* span) { + ASSERT(span->location != Span::IN_USE); + SpanList* list = (span->length < kMaxPages) ? &free_[span->length] : &large_; + if (span->location == Span::ON_NORMAL_FREELIST) { + stats_.free_bytes += (span->length << kPageShift); + DLL_Prepend(&list->normal, span); + } else { + stats_.unmapped_bytes += (span->length << kPageShift); + DLL_Prepend(&list->returned, span); + } +} + +void PageHeap::RemoveFromFreeList(Span* span) { + ASSERT(span->location != Span::IN_USE); + if (span->location == Span::ON_NORMAL_FREELIST) { + stats_.free_bytes -= (span->length << kPageShift); + } else { + stats_.unmapped_bytes -= (span->length << kPageShift); + } + DLL_Remove(span); +} + +void PageHeap::IncrementalScavenge(Length n) { + // Fast path; not yet time to release memory + scavenge_counter_ -= n; + if (scavenge_counter_ >= 0) return; // Not yet time to scavenge + + const double rate = FLAGS_tcmalloc_release_rate; + if (rate <= 1e-6) { + // Tiny release rate means that releasing is disabled. + scavenge_counter_ = kDefaultReleaseDelay; + return; + } + + Length released_pages = ReleaseAtLeastNPages(1); + + if (released_pages == 0) { + // Nothing to scavenge, delay for a while. + scavenge_counter_ = kDefaultReleaseDelay; + } else { + // Compute how long to wait until we return memory. + // FLAGS_tcmalloc_release_rate==1 means wait for 1000 pages + // after releasing one page. + const double mult = 1000.0 / rate; + double wait = mult * static_cast<double>(released_pages); + if (wait > kMaxReleaseDelay) { + // Avoid overflow and bound to reasonable range. + wait = kMaxReleaseDelay; + } + scavenge_counter_ = static_cast<int64_t>(wait); + } +} + +Length PageHeap::ReleaseLastNormalSpan(SpanList* slist) { + Span* s = slist->normal.prev; + ASSERT(s->location == Span::ON_NORMAL_FREELIST); + RemoveFromFreeList(s); + const Length n = s->length; + TCMalloc_SystemRelease(reinterpret_cast<void*>(s->start << kPageShift), + static_cast<size_t>(s->length << kPageShift)); + s->location = Span::ON_RETURNED_FREELIST; + MergeIntoFreeList(s); // Coalesces if possible. + return n; +} + +Length PageHeap::ReleaseAtLeastNPages(Length num_pages) { + Length released_pages = 0; + Length prev_released_pages = -1; + + // Round robin through the lists of free spans, releasing the last + // span in each list. Stop after releasing at least num_pages. + while (released_pages < num_pages) { + if (released_pages == prev_released_pages) { + // Last iteration of while loop made no progress. + break; + } + prev_released_pages = released_pages; + + for (int i = 0; i < kMaxPages+1 && released_pages < num_pages; + i++, release_index_++) { + if (release_index_ > kMaxPages) release_index_ = 0; + SpanList* slist = (release_index_ == kMaxPages) ? + &large_ : &free_[release_index_]; + if (!DLL_IsEmpty(&slist->normal)) { + Length released_len = ReleaseLastNormalSpan(slist); + released_pages += released_len; + } + } + } + return released_pages; +} + +void PageHeap::RegisterSizeClass(Span* span, size_t sc) { + // Associate span object with all interior pages as well + ASSERT(span->location == Span::IN_USE); + ASSERT(GetDescriptor(span->start) == span); + ASSERT(GetDescriptor(span->start+span->length-1) == span); + Event(span, 'C', sc); + span->sizeclass = sc; + for (Length i = 1; i < span->length-1; i++) { + pagemap_.set(span->start+i, span); + } +} + +static double MB(uint64_t bytes) { + return bytes / 1048576.0; +} + +static double PagesToMB(uint64_t pages) { + return (pages << kPageShift) / 1048576.0; +} + +void PageHeap::Dump(TCMalloc_Printer* out) { + int nonempty_sizes = 0; + for (int s = 0; s < kMaxPages; s++) { + if (!DLL_IsEmpty(&free_[s].normal) || !DLL_IsEmpty(&free_[s].returned)) { + nonempty_sizes++; + } + } + out->printf("------------------------------------------------\n"); + out->printf("PageHeap: %d sizes; %6.1f MB free; %6.1f MB unmapped\n", + nonempty_sizes, MB(stats_.free_bytes), MB(stats_.unmapped_bytes)); + out->printf("------------------------------------------------\n"); + uint64_t total_normal = 0; + uint64_t total_returned = 0; + for (int s = 0; s < kMaxPages; s++) { + const int n_length = DLL_Length(&free_[s].normal); + const int r_length = DLL_Length(&free_[s].returned); + if (n_length + r_length > 0) { + uint64_t n_pages = s * n_length; + uint64_t r_pages = s * r_length; + total_normal += n_pages; + total_returned += r_pages; + out->printf("%6u pages * %6u spans ~ %6.1f MB; %6.1f MB cum" + "; unmapped: %6.1f MB; %6.1f MB cum\n", + s, + (n_length + r_length), + PagesToMB(n_pages + r_pages), + PagesToMB(total_normal + total_returned), + PagesToMB(r_pages), + PagesToMB(total_returned)); + } + } + + uint64_t n_pages = 0; + uint64_t r_pages = 0; + int n_spans = 0; + int r_spans = 0; + out->printf("Normal large spans:\n"); + for (Span* s = large_.normal.next; s != &large_.normal; s = s->next) { + out->printf(" [ %6" PRIuPTR " pages ] %6.1f MB\n", + s->length, PagesToMB(s->length)); + n_pages += s->length; + n_spans++; + } + out->printf("Unmapped large spans:\n"); + for (Span* s = large_.returned.next; s != &large_.returned; s = s->next) { + out->printf(" [ %6" PRIuPTR " pages ] %6.1f MB\n", + s->length, PagesToMB(s->length)); + r_pages += s->length; + r_spans++; + } + total_normal += n_pages; + total_returned += r_pages; + out->printf(">255 large * %6u spans ~ %6.1f MB; %6.1f MB cum" + "; unmapped: %6.1f MB; %6.1f MB cum\n", + (n_spans + r_spans), + PagesToMB(n_pages + r_pages), + PagesToMB(total_normal + total_returned), + PagesToMB(r_pages), + PagesToMB(total_returned)); +} + +bool PageHeap::GetNextRange(PageID start, base::MallocRange* r) { + Span* span = reinterpret_cast<Span*>(pagemap_.Next(start)); + if (span == NULL) { + return false; + } + r->address = span->start << kPageShift; + r->length = span->length << kPageShift; + r->fraction = 0; + switch (span->location) { + case Span::IN_USE: + r->type = base::MallocRange::INUSE; + r->fraction = 1; + if (span->sizeclass > 0) { + // Only some of the objects in this span may be in use. + const size_t osize = Static::sizemap()->class_to_size(span->sizeclass); + r->fraction = (1.0 * osize * span->refcount) / r->length; + } + break; + case Span::ON_NORMAL_FREELIST: + r->type = base::MallocRange::FREE; + break; + case Span::ON_RETURNED_FREELIST: + r->type = base::MallocRange::UNMAPPED; + break; + default: + r->type = base::MallocRange::UNKNOWN; + break; + } + return true; +} + +static void RecordGrowth(size_t growth) { + StackTrace* t = Static::stacktrace_allocator()->New(); + t->depth = GetStackTrace(t->stack, kMaxStackDepth-1, 3); + t->size = growth; + t->stack[kMaxStackDepth-1] = reinterpret_cast<void*>(Static::growth_stacks()); + Static::set_growth_stacks(t); +} + +bool PageHeap::GrowHeap(Length n) { + ASSERT(kMaxPages >= kMinSystemAlloc); + if (n > kMaxValidPages) return false; + Length ask = (n>kMinSystemAlloc) ? n : static_cast<Length>(kMinSystemAlloc); + size_t actual_size; + void* ptr = TCMalloc_SystemAlloc(ask << kPageShift, &actual_size, kPageSize); + if (ptr == NULL) { + if (n < ask) { + // Try growing just "n" pages + ask = n; + ptr = TCMalloc_SystemAlloc(ask << kPageShift, &actual_size, kPageSize); + } + if (ptr == NULL) return false; + } + ask = actual_size >> kPageShift; + RecordGrowth(ask << kPageShift); + + uint64_t old_system_bytes = stats_.system_bytes; + stats_.system_bytes += (ask << kPageShift); + const PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift; + ASSERT(p > 0); + + // If we have already a lot of pages allocated, just pre allocate a bunch of + // memory for the page map. This prevents fragmentation by pagemap metadata + // when a program keeps allocating and freeing large blocks. + + if (old_system_bytes < kPageMapBigAllocationThreshold + && stats_.system_bytes >= kPageMapBigAllocationThreshold) { + pagemap_.PreallocateMoreMemory(); + } + + // Make sure pagemap_ has entries for all of the new pages. + // Plus ensure one before and one after so coalescing code + // does not need bounds-checking. + if (pagemap_.Ensure(p-1, ask+2)) { + // Pretend the new area is allocated and then Delete() it to cause + // any necessary coalescing to occur. + Span* span = NewSpan(p, ask); + RecordSpan(span); + Delete(span); + ASSERT(Check()); + return true; + } else { + // We could not allocate memory within "pagemap_" + // TODO: Once we can return memory to the system, return the new span + return false; + } +} + +bool PageHeap::Check() { + ASSERT(free_[0].normal.next == &free_[0].normal); + ASSERT(free_[0].returned.next == &free_[0].returned); + return true; +} + +bool PageHeap::CheckExpensive() { + bool result = Check(); + CheckList(&large_.normal, kMaxPages, 1000000000, Span::ON_NORMAL_FREELIST); + CheckList(&large_.returned, kMaxPages, 1000000000, Span::ON_RETURNED_FREELIST); + for (Length s = 1; s < kMaxPages; s++) { + CheckList(&free_[s].normal, s, s, Span::ON_NORMAL_FREELIST); + CheckList(&free_[s].returned, s, s, Span::ON_RETURNED_FREELIST); + } + return result; +} + +bool PageHeap::CheckList(Span* list, Length min_pages, Length max_pages, + int freelist) { + for (Span* s = list->next; s != list; s = s->next) { + CHECK_CONDITION(s->location == freelist); // NORMAL or RETURNED + CHECK_CONDITION(s->length >= min_pages); + CHECK_CONDITION(s->length <= max_pages); + CHECK_CONDITION(GetDescriptor(s->start) == s); + CHECK_CONDITION(GetDescriptor(s->start+s->length-1) == s); + } + return true; +} + +} // namespace tcmalloc diff --git a/third_party/tcmalloc/chromium/src/page_heap.h b/third_party/tcmalloc/chromium/src/page_heap.h new file mode 100644 index 0000000..5ab0d04 --- /dev/null +++ b/third_party/tcmalloc/chromium/src/page_heap.h @@ -0,0 +1,259 @@ +// Copyright (c) 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Sanjay Ghemawat <opensource@google.com> + +#ifndef TCMALLOC_PAGE_HEAP_H_ +#define TCMALLOC_PAGE_HEAP_H_ + +#include <config.h> +#include <google/malloc_extension.h> +#include "common.h" +#include "packed-cache-inl.h" +#include "pagemap.h" +#include "span.h" + +// This #ifdef should almost never be set. Set NO_TCMALLOC_SAMPLES if +// you're porting to a system where you really can't get a stacktrace. +#ifdef NO_TCMALLOC_SAMPLES + // We use #define so code compiles even if you #include stacktrace.h somehow. +# define GetStackTrace(stack, depth, skip) (0) +#else +# include <google/stacktrace.h> +#endif + +namespace tcmalloc { + +// ------------------------------------------------------------------------- +// Map from page-id to per-page data +// ------------------------------------------------------------------------- + +// We use PageMap2<> for 32-bit and PageMap3<> for 64-bit machines. +// We also use a simple one-level cache for hot PageID-to-sizeclass mappings, +// because sometimes the sizeclass is all the information we need. + +// Selector class -- general selector uses 3-level map +template <int BITS> class MapSelector { + public: + typedef TCMalloc_PageMap3<BITS-kPageShift> Type; + typedef PackedCache<BITS-kPageShift, uint64_t> CacheType; +}; + +// A two-level map for 32-bit machines +template <> class MapSelector<32> { + public: + typedef TCMalloc_PageMap2<32-kPageShift> Type; + typedef PackedCache<32-kPageShift, uint16_t> CacheType; +}; + +// ------------------------------------------------------------------------- +// Page-level allocator +// * Eager coalescing +// +// Heap for page-level allocation. We allow allocating and freeing a +// contiguous runs of pages (called a "span"). +// ------------------------------------------------------------------------- + +class PageHeap { + public: + PageHeap(); + + // Allocate a run of "n" pages. Returns zero if out of memory. + // Caller should not pass "n == 0" -- instead, n should have + // been rounded up already. + Span* New(Length n); + + // Delete the span "[p, p+n-1]". + // REQUIRES: span was returned by earlier call to New() and + // has not yet been deleted. + void Delete(Span* span); + + // Mark an allocated span as being used for small objects of the + // specified size-class. + // REQUIRES: span was returned by an earlier call to New() + // and has not yet been deleted. + void RegisterSizeClass(Span* span, size_t sc); + + // Split an allocated span into two spans: one of length "n" pages + // followed by another span of length "span->length - n" pages. + // Modifies "*span" to point to the first span of length "n" pages. + // Returns a pointer to the second span. + // + // REQUIRES: "0 < n < span->length" + // REQUIRES: span->location == IN_USE + // REQUIRES: span->sizeclass == 0 + Span* Split(Span* span, Length n); + + // Return the descriptor for the specified page. Returns NULL if + // this PageID was not allocated previously. + inline Span* GetDescriptor(PageID p) const { + return reinterpret_cast<Span*>(pagemap_.get(p)); + } + + // Dump state to stderr + void Dump(TCMalloc_Printer* out); + + // If this page heap is managing a range with starting page # >= start, + // store info about the range in *r and return true. Else return false. + bool GetNextRange(PageID start, base::MallocRange* r); + + // Page heap statistics + struct Stats { + Stats() : system_bytes(0), free_bytes(0), unmapped_bytes(0) {} + uint64_t system_bytes; // Total bytes allocated from system + uint64_t free_bytes; // Total bytes on normal freelists + uint64_t unmapped_bytes; // Total bytes on returned freelists + }; + inline Stats stats() const { return stats_; } + + bool Check(); + // Like Check() but does some more comprehensive checking. + bool CheckExpensive(); + bool CheckList(Span* list, Length min_pages, Length max_pages, + int freelist); // ON_NORMAL_FREELIST or ON_RETURNED_FREELIST + + // Try to release at least num_pages for reuse by the OS. Returns + // the actual number of pages released, which may be less than + // num_pages if there weren't enough pages to release. The result + // may also be larger than num_pages since page_heap might decide to + // release one large range instead of fragmenting it into two + // smaller released and unreleased ranges. + Length ReleaseAtLeastNPages(Length num_pages); + + // Return 0 if we have no information, or else the correct sizeclass for p. + // Reads and writes to pagemap_cache_ do not require locking. + // The entries are 64 bits on 64-bit hardware and 16 bits on + // 32-bit hardware, and we don't mind raciness as long as each read of + // an entry yields a valid entry, not a partially updated entry. + size_t GetSizeClassIfCached(PageID p) const { + return pagemap_cache_.GetOrDefault(p, 0); + } + void CacheSizeClass(PageID p, size_t cl) const { pagemap_cache_.Put(p, cl); } + + private: + // Allocates a big block of memory for the pagemap once we reach more than + // 128MB + static const size_t kPageMapBigAllocationThreshold = 128 << 20; + + // Minimum number of pages to fetch from system at a time. Must be + // significantly bigger than kBlockSize to amortize system-call + // overhead, and also to reduce external fragementation. Also, we + // should keep this value big because various incarnations of Linux + // have small limits on the number of mmap() regions per + // address-space. + static const int kMinSystemAlloc = 1 << (20 - kPageShift); + + // For all span-lengths < kMaxPages we keep an exact-size list. + // REQUIRED: kMaxPages >= kMinSystemAlloc; + static const size_t kMaxPages = kMinSystemAlloc; + + // Never delay scavenging for more than the following number of + // deallocated pages. With 4K pages, this comes to 4GB of + // deallocation. + static const int kMaxReleaseDelay = 1 << 20; + + // If there is nothing to release, wait for so many pages before + // scavenging again. With 4K pages, this comes to 1GB of memory. + static const int kDefaultReleaseDelay = 1 << 18; + + // Pick the appropriate map and cache types based on pointer size + typedef MapSelector<8*sizeof(uintptr_t)>::Type PageMap; + typedef MapSelector<8*sizeof(uintptr_t)>::CacheType PageMapCache; + PageMap pagemap_; + mutable PageMapCache pagemap_cache_; + + // We segregate spans of a given size into two circular linked + // lists: one for normal spans, and one for spans whose memory + // has been returned to the system. + struct SpanList { + Span normal; + Span returned; + }; + + // List of free spans of length >= kMaxPages + SpanList large_; + + // Array mapping from span length to a doubly linked list of free spans + SpanList free_[kMaxPages]; + + // Statistics on system, free, and unmapped bytes + Stats stats_; + + bool GrowHeap(Length n); + + // REQUIRES: span->length >= n + // REQUIRES: span->location != IN_USE + // Remove span from its free list, and move any leftover part of + // span into appropriate free lists. Also update "span" to have + // length exactly "n" and mark it as non-free so it can be returned + // to the client. After all that, decrease free_pages_ by n and + // return span. + Span* Carve(Span* span, Length n); + + void RecordSpan(Span* span) { + pagemap_.set(span->start, span); + if (span->length > 1) { + pagemap_.set(span->start + span->length - 1, span); + } + } + + // Allocate a large span of length == n. If successful, returns a + // span of exactly the specified length. Else, returns NULL. + Span* AllocLarge(Length n); + + // Coalesce span with neighboring spans if possible, prepend to + // appropriate free list, and adjust stats. + void MergeIntoFreeList(Span* span); + + // Prepends span to appropriate free list, and adjusts stats. + void PrependToFreeList(Span* span); + + // Removes span from its free list, and adjust stats. + void RemoveFromFreeList(Span* span); + + // Incrementally release some memory to the system. + // IncrementalScavenge(n) is called whenever n pages are freed. + void IncrementalScavenge(Length n); + + // Release the last span on the normal portion of this list. + // Return the length of that span. + Length ReleaseLastNormalSpan(SpanList* slist); + + + // Number of pages to deallocate before doing more scavenging + int64_t scavenge_counter_; + + // Index of last free list where we released memory to the OS. + int release_index_; +}; + +} // namespace tcmalloc + +#endif // TCMALLOC_PAGE_HEAP_H_ diff --git a/third_party/tcmalloc/chromium/src/page_heap_allocator.h b/third_party/tcmalloc/chromium/src/page_heap_allocator.h new file mode 100644 index 0000000..20e1ab1 --- /dev/null +++ b/third_party/tcmalloc/chromium/src/page_heap_allocator.h @@ -0,0 +1,110 @@ +// Copyright (c) 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Sanjay Ghemawat <opensource@google.com> + +#ifndef TCMALLOC_PAGE_HEAP_ALLOCATOR_H_ +#define TCMALLOC_PAGE_HEAP_ALLOCATOR_H_ + +namespace tcmalloc { + +// Simple allocator for objects of a specified type. External locking +// is required before accessing one of these objects. +template <class T> +class PageHeapAllocator { + public: + // We use an explicit Init function because these variables are statically + // allocated and their constructors might not have run by the time some + // other static variable tries to allocate memory. + void Init() { + ASSERT(kAlignedSize <= kAllocIncrement); + inuse_ = 0; + free_area_ = NULL; + free_avail_ = 0; + free_list_ = NULL; + // Reserve some space at the beginning to avoid fragmentation. + Delete(New()); + } + + T* New() { + // Consult free list + void* result; + if (free_list_ != NULL) { + result = free_list_; + free_list_ = *(reinterpret_cast<void**>(result)); + } else { + if (free_avail_ < kAlignedSize) { + // Need more room + free_area_ = reinterpret_cast<char*>(MetaDataAlloc(kAllocIncrement)); + if (free_area_ == NULL) { + CRASH("FATAL ERROR: Out of memory trying to allocate internal " + "tcmalloc data (%d bytes, object-size %d)\n", + kAllocIncrement, static_cast<int>(sizeof(T))); + } + free_avail_ = kAllocIncrement; + } + result = free_area_; + free_area_ += kAlignedSize; + free_avail_ -= kAlignedSize; + } + inuse_++; + return reinterpret_cast<T*>(result); + } + + void Delete(T* p) { + *(reinterpret_cast<void**>(p)) = free_list_; + free_list_ = p; + inuse_--; + } + + int inuse() const { return inuse_; } + + private: + // How much to allocate from system at a time + static const int kAllocIncrement = 128 << 10; + + // Aligned size of T + static const size_t kAlignedSize + = (((sizeof(T) + kAlignment - 1) / kAlignment) * kAlignment); + + // Free area from which to carve new objects + char* free_area_; + size_t free_avail_; + + // Free list of already carved objects + void* free_list_; + + // Number of allocated but unfreed objects + int inuse_; +}; + +} // namespace tcmalloc + +#endif // TCMALLOC_PAGE_HEAP_ALLOCATOR_H_ diff --git a/third_party/tcmalloc/chromium/src/pagemap.h b/third_party/tcmalloc/chromium/src/pagemap.h new file mode 100644 index 0000000..1786e68 --- /dev/null +++ b/third_party/tcmalloc/chromium/src/pagemap.h @@ -0,0 +1,320 @@ +// Copyright (c) 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Sanjay Ghemawat <opensource@google.com> +// +// A data structure used by the caching malloc. It maps from page# to +// a pointer that contains info about that page. We use two +// representations: one for 32-bit addresses, and another for 64 bit +// addresses. Both representations provide the same interface. The +// first representation is implemented as a flat array, the seconds as +// a three-level radix tree that strips away approximately 1/3rd of +// the bits every time. +// +// The BITS parameter should be the number of bits required to hold +// a page number. E.g., with 32 bit pointers and 4K pages (i.e., +// page offset fits in lower 12 bits), BITS == 20. + +#ifndef TCMALLOC_PAGEMAP_H_ +#define TCMALLOC_PAGEMAP_H_ + +#include "config.h" +#if defined HAVE_STDINT_H +#include <stdint.h> +#elif defined HAVE_INTTYPES_H +#include <inttypes.h> +#else +#include <sys/types.h> +#endif +#include "internal_logging.h" + +// Single-level array +template <int BITS> +class TCMalloc_PageMap1 { + private: + static const int LENGTH = 1 << BITS; + + void** array_; + + public: + typedef uintptr_t Number; + + explicit TCMalloc_PageMap1(void* (*allocator)(size_t)) { + array_ = reinterpret_cast<void**>((*allocator)(sizeof(void*) << BITS)); + memset(array_, 0, sizeof(void*) << BITS); + } + + // Ensure that the map contains initialized entries "x .. x+n-1". + // Returns true if successful, false if we could not allocate memory. + bool Ensure(Number x, size_t n) { + // Nothing to do since flat array was allocated at start. All + // that's left is to check for overflow (that is, we don't want to + // ensure a number y where array_[y] would be an out-of-bounds + // access). + return n <= LENGTH - x; // an overflow-free way to do "x + n <= LENGTH" + } + + void PreallocateMoreMemory() {} + + // Return the current value for KEY. Returns NULL if not yet set, + // or if k is out of range. + void* get(Number k) const { + if ((k >> BITS) > 0) { + return NULL; + } + return array_[k]; + } + + // REQUIRES "k" is in range "[0,2^BITS-1]". + // REQUIRES "k" has been ensured before. + // + // Sets the value 'v' for key 'k'. + void set(Number k, void* v) { + array_[k] = v; + } + + // Return the first non-NULL pointer found in this map for + // a page number >= k. Returns NULL if no such number is found. + void* Next(Number k) const { + while (k < (1 << BITS)) { + if (array_[k] != NULL) return array_[k]; + k++; + } + return NULL; + } +}; + +// Two-level radix tree +template <int BITS> +class TCMalloc_PageMap2 { + private: + // Put 32 entries in the root and (2^BITS)/32 entries in each leaf. + static const int ROOT_BITS = 5; + static const int ROOT_LENGTH = 1 << ROOT_BITS; + + static const int LEAF_BITS = BITS - ROOT_BITS; + static const int LEAF_LENGTH = 1 << LEAF_BITS; + + // Leaf node + struct Leaf { + void* values[LEAF_LENGTH]; + }; + + Leaf* root_[ROOT_LENGTH]; // Pointers to 32 child nodes + void* (*allocator_)(size_t); // Memory allocator + + public: + typedef uintptr_t Number; + + explicit TCMalloc_PageMap2(void* (*allocator)(size_t)) { + allocator_ = allocator; + memset(root_, 0, sizeof(root_)); + } + + void* get(Number k) const { + const Number i1 = k >> LEAF_BITS; + const Number i2 = k & (LEAF_LENGTH-1); + if ((k >> BITS) > 0 || root_[i1] == NULL) { + return NULL; + } + return root_[i1]->values[i2]; + } + + void set(Number k, void* v) { + ASSERT(k >> BITS == 0); + const Number i1 = k >> LEAF_BITS; + const Number i2 = k & (LEAF_LENGTH-1); + root_[i1]->values[i2] = v; + } + + bool Ensure(Number start, size_t n) { + for (Number key = start; key <= start + n - 1; ) { + const Number i1 = key >> LEAF_BITS; + + // Check for overflow + if (i1 >= ROOT_LENGTH) + return false; + + // Make 2nd level node if necessary + if (root_[i1] == NULL) { + Leaf* leaf = reinterpret_cast<Leaf*>((*allocator_)(sizeof(Leaf))); + if (leaf == NULL) return false; + memset(leaf, 0, sizeof(*leaf)); + root_[i1] = leaf; + } + + // Advance key past whatever is covered by this leaf node + key = ((key >> LEAF_BITS) + 1) << LEAF_BITS; + } + return true; + } + + void PreallocateMoreMemory() { + // Allocate enough to keep track of all possible pages + Ensure(0, 1 << BITS); + } + + void* Next(Number k) const { + while (k < (1 << BITS)) { + const Number i1 = k >> LEAF_BITS; + Leaf* leaf = root_[i1]; + if (leaf != NULL) { + // Scan forward in leaf + for (Number i2 = k & (LEAF_LENGTH - 1); i2 < LEAF_LENGTH; i2++) { + if (leaf->values[i2] != NULL) { + return leaf->values[i2]; + } + } + } + // Skip to next top-level entry + k = (i1 + 1) << LEAF_BITS; + } + return NULL; + } +}; + +// Three-level radix tree +template <int BITS> +class TCMalloc_PageMap3 { + private: + // How many bits should we consume at each interior level + static const int INTERIOR_BITS = (BITS + 2) / 3; // Round-up + static const int INTERIOR_LENGTH = 1 << INTERIOR_BITS; + + // How many bits should we consume at leaf level + static const int LEAF_BITS = BITS - 2*INTERIOR_BITS; + static const int LEAF_LENGTH = 1 << LEAF_BITS; + + // Interior node + struct Node { + Node* ptrs[INTERIOR_LENGTH]; + }; + + // Leaf node + struct Leaf { + void* values[LEAF_LENGTH]; + }; + + Node* root_; // Root of radix tree + void* (*allocator_)(size_t); // Memory allocator + + Node* NewNode() { + Node* result = reinterpret_cast<Node*>((*allocator_)(sizeof(Node))); + if (result != NULL) { + memset(result, 0, sizeof(*result)); + } + return result; + } + + public: + typedef uintptr_t Number; + + explicit TCMalloc_PageMap3(void* (*allocator)(size_t)) { + allocator_ = allocator; + root_ = NewNode(); + } + + void* get(Number k) const { + const Number i1 = k >> (LEAF_BITS + INTERIOR_BITS); + const Number i2 = (k >> LEAF_BITS) & (INTERIOR_LENGTH-1); + const Number i3 = k & (LEAF_LENGTH-1); + if ((k >> BITS) > 0 || + root_->ptrs[i1] == NULL || root_->ptrs[i1]->ptrs[i2] == NULL) { + return NULL; + } + return reinterpret_cast<Leaf*>(root_->ptrs[i1]->ptrs[i2])->values[i3]; + } + + void set(Number k, void* v) { + ASSERT(k >> BITS == 0); + const Number i1 = k >> (LEAF_BITS + INTERIOR_BITS); + const Number i2 = (k >> LEAF_BITS) & (INTERIOR_LENGTH-1); + const Number i3 = k & (LEAF_LENGTH-1); + reinterpret_cast<Leaf*>(root_->ptrs[i1]->ptrs[i2])->values[i3] = v; + } + + bool Ensure(Number start, size_t n) { + for (Number key = start; key <= start + n - 1; ) { + const Number i1 = key >> (LEAF_BITS + INTERIOR_BITS); + const Number i2 = (key >> LEAF_BITS) & (INTERIOR_LENGTH-1); + + // Check for overflow + if (i1 >= INTERIOR_LENGTH || i2 >= INTERIOR_LENGTH) + return false; + + // Make 2nd level node if necessary + if (root_->ptrs[i1] == NULL) { + Node* n = NewNode(); + if (n == NULL) return false; + root_->ptrs[i1] = n; + } + + // Make leaf node if necessary + if (root_->ptrs[i1]->ptrs[i2] == NULL) { + Leaf* leaf = reinterpret_cast<Leaf*>((*allocator_)(sizeof(Leaf))); + if (leaf == NULL) return false; + memset(leaf, 0, sizeof(*leaf)); + root_->ptrs[i1]->ptrs[i2] = reinterpret_cast<Node*>(leaf); + } + + // Advance key past whatever is covered by this leaf node + key = ((key >> LEAF_BITS) + 1) << LEAF_BITS; + } + return true; + } + + void PreallocateMoreMemory() { + } + + void* Next(Number k) const { + while (k < (Number(1) << BITS)) { + const Number i1 = k >> (LEAF_BITS + INTERIOR_BITS); + const Number i2 = (k >> LEAF_BITS) & (INTERIOR_LENGTH-1); + if (root_->ptrs[i1] == NULL) { + // Advance to next top-level entry + k = (i1 + 1) << (LEAF_BITS + INTERIOR_BITS); + } else { + Leaf* leaf = reinterpret_cast<Leaf*>(root_->ptrs[i1]->ptrs[i2]); + if (leaf != NULL) { + for (Number i3 = (k & (LEAF_LENGTH-1)); i3 < LEAF_LENGTH; i3++) { + if (leaf->values[i3] != NULL) { + return leaf->values[i3]; + } + } + } + // Advance to next interior entry + k = ((k >> LEAF_BITS) + 1) << LEAF_BITS; + } + } + return NULL; + } +}; + +#endif // TCMALLOC_PAGEMAP_H_ diff --git a/third_party/tcmalloc/chromium/src/pprof b/third_party/tcmalloc/chromium/src/pprof new file mode 100644 index 0000000..62cce12 --- /dev/null +++ b/third_party/tcmalloc/chromium/src/pprof @@ -0,0 +1,4260 @@ +#! /usr/bin/env perl + +# Copyright (c) 1998-2007, Google Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# --- +# Program for printing the profile generated by common/profiler.cc, +# or by the heap profiler (common/debugallocation.cc) +# +# The profile contains a sequence of entries of the form: +# <count> <stack trace> +# This program parses the profile, and generates user-readable +# output. +# +# Examples: +# +# % tools/pprof "program" "profile" +# Enters "interactive" mode +# +# % tools/pprof --text "program" "profile" +# Generates one line per procedure +# +# % tools/pprof --gv "program" "profile" +# Generates annotated call-graph and displays via "gv" +# +# % tools/pprof --gv --focus=Mutex "program" "profile" +# Restrict to code paths that involve an entry that matches "Mutex" +# +# % tools/pprof --gv --focus=Mutex --ignore=string "program" "profile" +# Restrict to code paths that involve an entry that matches "Mutex" +# and does not match "string" +# +# % tools/pprof --list=IBF_CheckDocid "program" "profile" +# Generates disassembly listing of all routines with at least one +# sample that match the --list=<regexp> pattern. The listing is +# annotated with the flat and cumulative sample counts at each line. +# +# % tools/pprof --disasm=IBF_CheckDocid "program" "profile" +# Generates disassembly listing of all routines with at least one +# sample that match the --disasm=<regexp> pattern. The listing is +# annotated with the flat and cumulative sample counts at each PC value. +# +# TODO: Use color to indicate files? + +use strict; +use warnings; +use Getopt::Long; + +my $PPROF_VERSION = "1.4"; + +# These are the object tools we use which can come from a +# user-specified location using --tools, from the PPROF_TOOLS +# environment variable, or from the environment. +my %obj_tool_map = ( + "objdump" => "objdump", + "nm" => "nm", + "addr2line" => "addr2line", + "c++filt" => "c++filt", + ## ConfigureObjTools may add architecture-specific entries: + #"nm_pdb" => "nm-pdb", # for reading windows (PDB-format) executables + #"addr2line_pdb" => "addr2line-pdb", # ditto + #"otool" => "otool", # equivalent of objdump on OS X +); +my $DOT = "dot"; # leave non-absolute, since it may be in /usr/local +my $GV = "gv"; +my $PS2PDF = "ps2pdf"; +# These are used for dynamic profiles +my $WGET = "wget"; +my $WGET_FLAGS = "--no-http-keep-alive"; # only supported by some wgets +my $CURL = "curl"; + +# These are the web pages that servers need to support for dynamic profiles +my $HEAP_PAGE = "/pprof/heap"; +my $PROFILE_PAGE = "/pprof/profile"; # must support cgi-param "?seconds=#" +my $PMUPROFILE_PAGE = "/pprof/pmuprofile(?:\\?.*)?"; # must support cgi-param + # ?seconds=#&event=x&period=n +my $GROWTH_PAGE = "/pprof/growth"; +my $CONTENTION_PAGE = "/pprof/contention"; +my $WALL_PAGE = "/pprof/wall(?:\\?.*)?"; # accepts options like namefilter +my $FILTEREDPROFILE_PAGE = "/pprof/filteredprofile(?:\\?.*)?"; +my $SYMBOL_PAGE = "/pprof/symbol"; # must support symbol lookup via POST +my $PROGRAM_NAME_PAGE = "/pprof/cmdline"; + +# default binary name +my $UNKNOWN_BINARY = "(unknown)"; + +# There is a pervasive dependency on the length (in hex characters, +# i.e., nibbles) of an address, distinguishing between 32-bit and +# 64-bit profiles. To err on the safe size, default to 64-bit here: +my $address_length = 16; + +# A list of paths to search for shared object files +my @prefix_list = (); + +# Special routine name that should not have any symbols. +# Used as separator to parse "addr2line -i" output. +my $sep_symbol = '_fini'; +my $sep_address = undef; + +##### Argument parsing ##### + +sub usage_string { + return <<EOF; +Usage: +pprof [options] <program> <profiles> + <profiles> is a space separated list of profile names. +pprof [options] <symbolized-profiles> + <symbolized-profiles> is a list of profile files where each file contains + the necessary symbol mappings as well as profile data (likely generated + with --raw). +pprof [options] <profile> + <profile> is a remote form. Symbols are obtained from host:port$SYMBOL_PAGE + + Each name can be: + /path/to/profile - a path to a profile file + host:port[/<service>] - a location of a service to get profile from + + The /<service> can be $HEAP_PAGE, $PROFILE_PAGE, /pprof/pmuprofile, + $GROWTH_PAGE, $CONTENTION_PAGE, /pprof/wall, + or /pprof/filteredprofile. + For instance: "pprof http://myserver.com:80$HEAP_PAGE". + If /<service> is omitted, the service defaults to $PROFILE_PAGE (cpu profiling). +pprof --symbols <program> + Maps addresses to symbol names. In this mode, stdin should be a + list of library mappings, in the same format as is found in the heap- + and cpu-profile files (this loosely matches that of /proc/self/maps + on linux), followed by a list of hex addresses to map, one per line. + + For more help with querying remote servers, including how to add the + necessary server-side support code, see this filename (or one like it): + + /usr/doc/google-perftools-$PPROF_VERSION/pprof_remote_servers.html + +Options: + --cum Sort by cumulative data + --base=<base> Subtract <base> from <profile> before display + --interactive Run in interactive mode (interactive "help" gives help) [default] + --seconds=<n> Length of time for dynamic profiles [default=30 secs] + --add_lib=<file> Read additional symbols and line info from the given library + --lib_prefix=<dir> Comma separated list of library path prefixes + +Reporting Granularity: + --addresses Report at address level + --lines Report at source line level + --functions Report at function level [default] + --files Report at source file level + +Output type: + --text Generate text report + --callgrind Generate callgrind format to stdout + --gv Generate Postscript and display + --list=<regexp> Generate source listing of matching routines + --disasm=<regexp> Generate disassembly of matching routines + --symbols Print demangled symbol names found at given addresses + --dot Generate DOT file to stdout + --ps Generate Postcript to stdout + --pdf Generate PDF to stdout + --gif Generate GIF to stdout + --raw Generate symbolized pprof data (useful with remote fetch) + +Heap-Profile Options: + --inuse_space Display in-use (mega)bytes [default] + --inuse_objects Display in-use objects + --alloc_space Display allocated (mega)bytes + --alloc_objects Display allocated objects + --show_bytes Display space in bytes + --drop_negative Ignore negative differences + +Contention-profile options: + --total_delay Display total delay at each region [default] + --contentions Display number of delays at each region + --mean_delay Display mean delay at each region + +Call-graph Options: + --nodecount=<n> Show at most so many nodes [default=80] + --nodefraction=<f> Hide nodes below <f>*total [default=.005] + --edgefraction=<f> Hide edges below <f>*total [default=.001] + --focus=<regexp> Focus on nodes matching <regexp> + --ignore=<regexp> Ignore nodes matching <regexp> + --scale=<n> Set GV scaling [default=0] + --heapcheck Make nodes with non-0 object counts + (i.e. direct leak generators) more visible + +Miscellaneous: + --tools=<prefix> Prefix for object tool pathnames + --test Run unit tests + --help This message + --version Version information + +Environment Variables: + PPROF_TMPDIR Profiles directory. Defaults to \$HOME/pprof + PPROF_TOOLS Prefix for object tools pathnames + +Examples: + +pprof /bin/ls ls.prof + Enters "interactive" mode +pprof --text /bin/ls ls.prof + Outputs one line per procedure +pprof --gv /bin/ls ls.prof + Displays annotated call-graph via 'gv' +pprof --gv --focus=Mutex /bin/ls ls.prof + Restricts to code paths including a .*Mutex.* entry +pprof --gv --focus=Mutex --ignore=string /bin/ls ls.prof + Code paths including Mutex but not string +pprof --list=getdir /bin/ls ls.prof + (Per-line) annotated source listing for getdir() +pprof --disasm=getdir /bin/ls ls.prof + (Per-PC) annotated disassembly for getdir() +pprof --text localhost:1234 + Outputs one line per procedure for localhost:1234 +pprof --raw localhost:1234 > ./local.raw +pprof --text ./local.raw + Fetches a remote profile for later analysis and then + analyzes it in text mode. +EOF +} + +sub version_string { + return <<EOF +pprof (part of google-perftools $PPROF_VERSION) + +Copyright 1998-2007 Google Inc. + +This is BSD licensed software; see the source for copying conditions +and license information. +There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A +PARTICULAR PURPOSE. +EOF +} + +sub usage { + my $msg = shift; + print STDERR "$msg\n\n"; + print STDERR usage_string(); + print STDERR "\nFATAL ERROR: $msg\n"; # just as a reminder + exit(1); +} + +sub Init() { + # Setup tmp-file name and handler to clean it up. + # We do this in the very beginning so that we can use + # error() and cleanup() function anytime here after. + $main::tmpfile_sym = "/tmp/pprof$$.sym"; + $main::tmpfile_ps = "/tmp/pprof$$"; + $main::next_tmpfile = 0; + $SIG{'INT'} = \&sighandler; + + # Cache from filename/linenumber to source code + $main::source_cache = (); + + $main::opt_help = 0; + $main::opt_version = 0; + + $main::opt_cum = 0; + $main::opt_base = ''; + $main::opt_addresses = 0; + $main::opt_lines = 0; + $main::opt_functions = 0; + $main::opt_files = 0; + $main::opt_lib_prefix = ""; + + $main::opt_text = 0; + $main::opt_callgrind = 0; + $main::opt_list = ""; + $main::opt_disasm = ""; + $main::opt_symbols = 0; + $main::opt_gv = 0; + $main::opt_dot = 0; + $main::opt_ps = 0; + $main::opt_pdf = 0; + $main::opt_gif = 0; + $main::opt_raw = 0; + + $main::opt_nodecount = 80; + $main::opt_nodefraction = 0.005; + $main::opt_edgefraction = 0.001; + $main::opt_focus = ''; + $main::opt_ignore = ''; + $main::opt_scale = 0; + $main::opt_heapcheck = 0; + $main::opt_seconds = 30; + $main::opt_lib = ""; + + $main::opt_inuse_space = 0; + $main::opt_inuse_objects = 0; + $main::opt_alloc_space = 0; + $main::opt_alloc_objects = 0; + $main::opt_show_bytes = 0; + $main::opt_drop_negative = 0; + $main::opt_interactive = 0; + + $main::opt_total_delay = 0; + $main::opt_contentions = 0; + $main::opt_mean_delay = 0; + + $main::opt_tools = ""; + $main::opt_debug = 0; + $main::opt_test = 0; + + # These are undocumented flags used only by unittests. + $main::opt_test_stride = 0; + + # Are we using $SYMBOL_PAGE? + $main::use_symbol_page = 0; + + # Type of profile we are dealing with + # Supported types: + # cpu + # heap + # growth + # contention + $main::profile_type = ''; # Empty type means "unknown" + + GetOptions("help!" => \$main::opt_help, + "version!" => \$main::opt_version, + "cum!" => \$main::opt_cum, + "base=s" => \$main::opt_base, + "seconds=i" => \$main::opt_seconds, + "add_lib=s" => \$main::opt_lib, + "lib_prefix=s" => \$main::opt_lib_prefix, + "functions!" => \$main::opt_functions, + "lines!" => \$main::opt_lines, + "addresses!" => \$main::opt_addresses, + "files!" => \$main::opt_files, + "text!" => \$main::opt_text, + "callgrind!" => \$main::opt_callgrind, + "list=s" => \$main::opt_list, + "disasm=s" => \$main::opt_disasm, + "symbols!" => \$main::opt_symbols, + "gv!" => \$main::opt_gv, + "dot!" => \$main::opt_dot, + "ps!" => \$main::opt_ps, + "pdf!" => \$main::opt_pdf, + "gif!" => \$main::opt_gif, + "raw!" => \$main::opt_raw, + "interactive!" => \$main::opt_interactive, + "nodecount=i" => \$main::opt_nodecount, + "nodefraction=f" => \$main::opt_nodefraction, + "edgefraction=f" => \$main::opt_edgefraction, + "focus=s" => \$main::opt_focus, + "ignore=s" => \$main::opt_ignore, + "scale=i" => \$main::opt_scale, + "heapcheck" => \$main::opt_heapcheck, + "inuse_space!" => \$main::opt_inuse_space, + "inuse_objects!" => \$main::opt_inuse_objects, + "alloc_space!" => \$main::opt_alloc_space, + "alloc_objects!" => \$main::opt_alloc_objects, + "show_bytes!" => \$main::opt_show_bytes, + "drop_negative!" => \$main::opt_drop_negative, + "total_delay!" => \$main::opt_total_delay, + "contentions!" => \$main::opt_contentions, + "mean_delay!" => \$main::opt_mean_delay, + "tools=s" => \$main::opt_tools, + "test!" => \$main::opt_test, + "debug!" => \$main::opt_debug, + # Undocumented flags used only by unittests: + "test_stride=i" => \$main::opt_test_stride, + ) || usage("Invalid option(s)"); + + # Deal with the standard --help and --version + if ($main::opt_help) { + print usage_string(); + exit(0); + } + + if ($main::opt_version) { + print version_string(); + exit(0); + } + + # Disassembly/listing/symbols mode requires address-level info + if ($main::opt_disasm || $main::opt_list || $main::opt_symbols) { + $main::opt_functions = 0; + $main::opt_lines = 0; + $main::opt_addresses = 1; + $main::opt_files = 0; + } + + # Check heap-profiling flags + if ($main::opt_inuse_space + + $main::opt_inuse_objects + + $main::opt_alloc_space + + $main::opt_alloc_objects > 1) { + usage("Specify at most on of --inuse/--alloc options"); + } + + # Check output granularities + my $grains = + $main::opt_functions + + $main::opt_lines + + $main::opt_addresses + + $main::opt_files + + 0; + if ($grains > 1) { + usage("Only specify one output granularity option"); + } + if ($grains == 0) { + $main::opt_functions = 1; + } + + # Check output modes + my $modes = + $main::opt_text + + $main::opt_callgrind + + ($main::opt_list eq '' ? 0 : 1) + + ($main::opt_disasm eq '' ? 0 : 1) + + ($main::opt_symbols == 0 ? 0 : 1) + + $main::opt_gv + + $main::opt_dot + + $main::opt_ps + + $main::opt_pdf + + $main::opt_gif + + $main::opt_raw + + $main::opt_interactive + + 0; + if ($modes > 1) { + usage("Only specify one output mode"); + } + if ($modes == 0) { + if (-t STDOUT) { # If STDOUT is a tty, activate interactive mode + $main::opt_interactive = 1; + } else { + $main::opt_text = 1; + } + } + + if ($main::opt_test) { + RunUnitTests(); + # Should not return + exit(1); + } + + # Binary name and profile arguments list + $main::prog = ""; + @main::pfile_args = (); + + # Remote profiling without a binary (using $SYMBOL_PAGE instead) + if (IsProfileURL($ARGV[0])) { + $main::use_symbol_page = 1; + } elsif (IsSymbolizedProfileFile($ARGV[0])) { + $main::use_symbolized_profile = 1; + $main::prog = $UNKNOWN_BINARY; # will be set later from the profile file + } + + if ($main::use_symbol_page || $main::use_symbolized_profile) { + # We don't need a binary! + my %disabled = ('--lines' => $main::opt_lines, + '--disasm' => $main::opt_disasm); + for my $option (keys %disabled) { + usage("$option cannot be used without a binary") if $disabled{$option}; + } + # Set $main::prog later... + scalar(@ARGV) || usage("Did not specify profile file"); + } elsif ($main::opt_symbols) { + # --symbols needs a binary-name (to run nm on, etc) but not profiles + $main::prog = shift(@ARGV) || usage("Did not specify program"); + } else { + $main::prog = shift(@ARGV) || usage("Did not specify program"); + scalar(@ARGV) || usage("Did not specify profile file"); + } + + # Parse profile file/location arguments + foreach my $farg (@ARGV) { + if ($farg =~ m/(.*)\@([0-9]+)(|\/.*)$/ ) { + my $machine = $1; + my $num_machines = $2; + my $path = $3; + for (my $i = 0; $i < $num_machines; $i++) { + unshift(@main::pfile_args, "$i.$machine$path"); + } + } else { + unshift(@main::pfile_args, $farg); + } + } + + if ($main::use_symbol_page) { + unless (IsProfileURL($main::pfile_args[0])) { + error("The first profile should be a remote form to use $SYMBOL_PAGE\n"); + } + CheckSymbolPage(); + $main::prog = FetchProgramName(); + } elsif (!$main::use_symbolized_profile) { # may not need objtools! + ConfigureObjTools($main::prog) + } + + # Check what flags our commandline utilities support + if (open(TFILE, "$WGET $WGET_FLAGS -V 2>&1 |")) { + my @lines = <TFILE>; + if (grep(/unrecognized/, @lines) > 0) { + # grep found 'unrecognized' token from WGET, clear WGET flags + $WGET_FLAGS = ""; + } + close(TFILE); + } + # TODO(csilvers): check all the other binaries and objtools to see + # if they are installed and what flags they support, and store that + # in a data structure here, rather than scattering these tests about. + # Then, ideally, rewrite code to use wget OR curl OR GET or ... + + # Break the opt_list_prefix into the prefix_list array + @prefix_list = split (',', $main::opt_lib_prefix); + + # Remove trailing / from the prefixes, in the list to prevent + # searching things like /my/path//lib/mylib.so + foreach (@prefix_list) { + s|/+$||; + } +} + +sub Main() { + Init(); + $main::collected_profile = undef; + @main::profile_files = (); + $main::op_time = time(); + + # Printing symbols is special and requires a lot less info that most. + if ($main::opt_symbols) { + PrintSymbols(*STDIN); # Get /proc/maps and symbols output from stdin + return; + } + + # Fetch all profile data + FetchDynamicProfiles(); + + # this will hold symbols that we read from the profile files + my $symbol_map = {}; + + # Read one profile, pick the last item on the list + my $data = ReadProfile($main::prog, pop(@main::profile_files)); + my $profile = $data->{profile}; + my $pcs = $data->{pcs}; + my $libs = $data->{libs}; # Info about main program and shared libraries + $symbol_map = MergeSymbols($symbol_map, $data->{symbols}); + + # Add additional profiles, if available. + if (scalar(@main::profile_files) > 0) { + foreach my $pname (@main::profile_files) { + my $data2 = ReadProfile($main::prog, $pname); + $profile = AddProfile($profile, $data2->{profile}); + $pcs = AddPcs($pcs, $data2->{pcs}); + $symbol_map = MergeSymbols($symbol_map, $data2->{symbols}); + } + } + + # Subtract base from profile, if specified + if ($main::opt_base ne '') { + my $base = ReadProfile($main::prog, $main::opt_base); + $profile = SubtractProfile($profile, $base->{profile}); + $pcs = AddPcs($pcs, $base->{pcs}); + $symbol_map = MergeSymbols($symbol_map, $base->{symbols}); + } + + # Get total data in profile + my $total = TotalProfile($profile); + + # Collect symbols + my $symbols; + if ($main::use_symbolized_profile) { + $symbols = FetchSymbols($pcs, $symbol_map); + } elsif ($main::use_symbol_page) { + $symbols = FetchSymbols($pcs); + } else { + $symbols = ExtractSymbols($libs, $pcs); + } + + # Remove uniniteresting stack items + $profile = RemoveUninterestingFrames($symbols, $profile); + + # Focus? + if ($main::opt_focus ne '') { + $profile = FocusProfile($symbols, $profile, $main::opt_focus); + } + + # Ignore? + if ($main::opt_ignore ne '') { + $profile = IgnoreProfile($symbols, $profile, $main::opt_ignore); + } + + my $calls = ExtractCalls($symbols, $profile); + + # Reduce profiles to required output granularity, and also clean + # each stack trace so a given entry exists at most once. + my $reduced = ReduceProfile($symbols, $profile); + + # Get derived profiles + my $flat = FlatProfile($reduced); + my $cumulative = CumulativeProfile($reduced); + + # Print + if (!$main::opt_interactive) { + if ($main::opt_disasm) { + PrintDisassembly($libs, $flat, $cumulative, $main::opt_disasm, $total); + } elsif ($main::opt_list) { + PrintListing($libs, $flat, $cumulative, $main::opt_list); + } elsif ($main::opt_text) { + # Make sure the output is empty when have nothing to report + # (only matters when --heapcheck is given but we must be + # compatible with old branches that did not pass --heapcheck always): + if ($total != 0) { + printf("Total: %s %s\n", Unparse($total), Units()); + } + PrintText($symbols, $flat, $cumulative, $total, -1); + } elsif ($main::opt_raw) { + PrintSymbolizedProfile($symbols, $profile, $main::prog); + } elsif ($main::opt_callgrind) { + PrintCallgrind($calls); + } else { + if (PrintDot($main::prog, $symbols, $profile, $flat, $cumulative, $total)) { + if ($main::opt_gv) { + RunGV(PsTempName($main::next_tmpfile), ""); + } + } else { + exit(1); + } + } + } else { + InteractiveMode($profile, $symbols, $libs, $total); + } + + cleanup(); + exit(0); +} + +##### Entry Point ##### + +Main(); + +# Temporary code to detect if we're running on a Goobuntu system. +# These systems don't have the right stuff installed for the special +# Readline libraries to work, so as a temporary workaround, we default +# to using the normal stdio code, rather than the fancier readline-based +# code +sub ReadlineMightFail { + if (-e '/lib/libtermcap.so.2') { + return 0; # libtermcap exists, so readline should be okay + } else { + return 1; + } +} + +sub RunGV { + my $fname = shift; + my $bg = shift; # "" or " &" if we should run in background + if (!system("$GV --version >/dev/null 2>&1")) { + # Options using double dash are supported by this gv version. + # Also, turn on noantialias to better handle bug in gv for + # postscript files with large dimensions. + # TODO: Maybe we should not pass the --noantialias flag + # if the gv version is known to work properly without the flag. + system("$GV --scale=$main::opt_scale --noantialias " . $fname . $bg); + } else { + # Old gv version - only supports options that use single dash. + print STDERR "$GV -scale $main::opt_scale\n"; + system("$GV -scale $main::opt_scale " . $fname . $bg); + } +} + + +##### Interactive helper routines ##### + +sub InteractiveMode { + $| = 1; # Make output unbuffered for interactive mode + my ($orig_profile, $symbols, $libs, $total) = @_; + + print "Welcome to pprof! For help, type 'help'.\n"; + + # Use ReadLine if it's installed. + if ( !ReadlineMightFail() && + defined(eval {require Term::ReadLine}) ) { + my $term = new Term::ReadLine 'pprof'; + while ( defined ($_ = $term->readline('(pprof) '))) { + $term->addhistory($_) if /\S/; + if (!InteractiveCommand($orig_profile, $symbols, $libs, $total, $_)) { + last; # exit when we get an interactive command to quit + } + } + } else { # don't have readline + while (1) { + print "(pprof) "; + $_ = <STDIN>; + last if ! defined $_ ; + s/\r//g; # turn windows-looking lines into unix-looking lines + + # Save some flags that might be reset by InteractiveCommand() + my $save_opt_lines = $main::opt_lines; + + if (!InteractiveCommand($orig_profile, $symbols, $libs, $total, $_)) { + last; # exit when we get an interactive command to quit + } + + # Restore flags + $main::opt_lines = $save_opt_lines; + } + } +} + +# Takes two args: orig profile, and command to run. +# Returns 1 if we should keep going, or 0 if we were asked to quit +sub InteractiveCommand { + my($orig_profile, $symbols, $libs, $total, $command) = @_; + $_ = $command; # just to make future m//'s easier + if (!defined($_)) { + print "\n"; + return 0; + } + if (m/^ *quit/) { + return 0; + } + if (m/^ *help/) { + InteractiveHelpMessage(); + return 1; + } + # Clear all the mode options -- mode is controlled by "$command" + $main::opt_text = 0; + $main::opt_callgrind = 0; + $main::opt_disasm = 0; + $main::opt_list = 0; + $main::opt_gv = 0; + $main::opt_cum = 0; + + if (m/^ *(text|top)(\d*) *(.*)/) { + $main::opt_text = 1; + + my $line_limit = ($2 ne "") ? int($2) : 10; + + my $routine; + my $ignore; + ($routine, $ignore) = ParseInteractiveArgs($3); + + my $profile = ProcessProfile($orig_profile, $symbols, "", $ignore); + my $reduced = ReduceProfile($symbols, $profile); + + # Get derived profiles + my $flat = FlatProfile($reduced); + my $cumulative = CumulativeProfile($reduced); + + PrintText($symbols, $flat, $cumulative, $total, $line_limit); + return 1; + } + if (m/^ *list *(.+)/) { + $main::opt_list = 1; + + my $routine; + my $ignore; + ($routine, $ignore) = ParseInteractiveArgs($1); + + my $profile = ProcessProfile($orig_profile, $symbols, "", $ignore); + my $reduced = ReduceProfile($symbols, $profile); + + # Get derived profiles + my $flat = FlatProfile($reduced); + my $cumulative = CumulativeProfile($reduced); + + PrintListing($libs, $flat, $cumulative, $routine); + return 1; + } + if (m/^ *disasm *(.+)/) { + $main::opt_disasm = 1; + + my $routine; + my $ignore; + ($routine, $ignore) = ParseInteractiveArgs($1); + + # Process current profile to account for various settings + my $profile = ProcessProfile($orig_profile, $symbols, "", $ignore); + my $reduced = ReduceProfile($symbols, $profile); + + # Get derived profiles + my $flat = FlatProfile($reduced); + my $cumulative = CumulativeProfile($reduced); + + PrintDisassembly($libs, $flat, $cumulative, $routine, $total); + return 1; + } + if (m/^ *gv *(.*)/) { + $main::opt_gv = 1; + + my $focus; + my $ignore; + ($focus, $ignore) = ParseInteractiveArgs($1); + + # Process current profile to account for various settings + my $profile = ProcessProfile($orig_profile, $symbols, $focus, $ignore); + my $reduced = ReduceProfile($symbols, $profile); + + # Get derived profiles + my $flat = FlatProfile($reduced); + my $cumulative = CumulativeProfile($reduced); + + if (PrintDot($main::prog, $symbols, $profile, $flat, $cumulative, $total)) { + RunGV(PsTempName($main::next_tmpfile), " &"); + $main::next_tmpfile++; + } + return 1; + } + return 1; +} + + +sub ProcessProfile { + my $orig_profile = shift; + my $symbols = shift; + my $focus = shift; + my $ignore = shift; + + # Process current profile to account for various settings + my $profile = $orig_profile; + my $total_count = TotalProfile($profile); + printf("Total: %s %s\n", Unparse($total_count), Units()); + if ($focus ne '') { + $profile = FocusProfile($symbols, $profile, $focus); + my $focus_count = TotalProfile($profile); + printf("After focusing on '%s': %s %s of %s (%0.1f%%)\n", + $focus, + Unparse($focus_count), Units(), + Unparse($total_count), ($focus_count*100.0) / $total_count); + } + if ($ignore ne '') { + $profile = IgnoreProfile($symbols, $profile, $ignore); + my $ignore_count = TotalProfile($profile); + printf("After ignoring '%s': %s %s of %s (%0.1f%%)\n", + $ignore, + Unparse($ignore_count), Units(), + Unparse($total_count), + ($ignore_count*100.0) / $total_count); + } + + return $profile; +} + +sub InteractiveHelpMessage { + print <<ENDOFHELP; +Interactive pprof mode + +Commands: + gv + gv [focus] [-ignore1] [-ignore2] + Show graphical hierarchical display of current profile. Without + any arguments, shows all samples in the profile. With the optional + "focus" argument, restricts the samples shown to just those where + the "focus" regular expression matches a routine name on the stack + trace. + + list [routine_regexp] [-ignore1] [-ignore2] + Show source listing of routines whose names match "routine_regexp" + + top [--cum] [-ignore1] [-ignore2] + top20 [--cum] [-ignore1] [-ignore2] + top37 [--cum] [-ignore1] [-ignore2] + Show top lines ordered by flat profile count, or cumulative count + if --cum is specified. If a number is present after 'top', the + top K routines will be shown (defaults to showing the top 10) + + disasm [routine_regexp] [-ignore1] [-ignore2] + Show disassembly of routines whose names match "routine_regexp", + annotated with sample counts. + + help - This listing + quit or ^D - End pprof + +For commands that accept optional -ignore tags, samples where any routine in +the stack trace matches the regular expression in any of the -ignore +parameters will be ignored. + +Further pprof details are available at this location (or one similar): + + /usr/doc/google-perftools-$PPROF_VERSION/cpu_profiler.html + /usr/doc/google-perftools-$PPROF_VERSION/heap_profiler.html + +ENDOFHELP +} +sub ParseInteractiveArgs { + my $args = shift; + my $focus = ""; + my $ignore = ""; + my @x = split(/ +/, $args); + foreach $a (@x) { + if ($a =~ m/^(--|-)lines$/) { + $main::opt_lines = 1; + } elsif ($a =~ m/^(--|-)cum$/) { + $main::opt_cum = 1; + } elsif ($a =~ m/^-(.*)/) { + $ignore .= (($ignore ne "") ? "|" : "" ) . $1; + } else { + $focus .= (($focus ne "") ? "|" : "" ) . $a; + } + } + if ($ignore ne "") { + print "Ignoring samples in call stacks that match '$ignore'\n"; + } + return ($focus, $ignore); +} + +##### Output code ##### + +sub PsTempName { + my $fnum = shift; + return "$main::tmpfile_ps" . "." . "$fnum" . ".ps"; +} + +# Print profile data in packed binary format (64-bit) to standard out +sub PrintProfileData { + my $profile = shift; + + # print header (64-bit style) + # (zero) (header-size) (version) (sample-period) (zero) + print pack('L*', 0, 0, 3, 0, 0, 0, 1, 0, 0, 0); + + foreach my $k (keys(%{$profile})) { + my $count = $profile->{$k}; + my @addrs = split(/\n/, $k); + if ($#addrs >= 0) { + my $depth = $#addrs + 1; + # int(foo / 2**32) is the only reliable way to get rid of bottom + # 32 bits on both 32- and 64-bit systems. + print pack('L*', $count & 0xFFFFFFFF, int($count / 2**32)); + print pack('L*', $depth & 0xFFFFFFFF, int($depth / 2**32)); + + foreach my $full_addr (@addrs) { + my $addr = $full_addr; + $addr =~ s/0x0*//; # strip off leading 0x, zeroes + if (length($addr) > 16) { + print STDERR "Invalid address in profile: $full_addr\n"; + next; + } + my $low_addr = substr($addr, -8); # get last 8 hex chars + my $high_addr = substr($addr, -16, 8); # get up to 8 more hex chars + print pack('L*', hex('0x' . $low_addr), hex('0x' . $high_addr)); + } + } + } +} + +# Print symbols and profile data +sub PrintSymbolizedProfile { + my $symbols = shift; + my $profile = shift; + my $prog = shift; + + $SYMBOL_PAGE =~ m,[^/]+$,; # matches everything after the last slash + my $symbol_marker = $&; + + print '--- ', $symbol_marker, "\n"; + if (defined($prog)) { + print 'binary=', $prog, "\n"; + } + while (my ($pc, $name) = each(%{$symbols})) { + my $sep = ' '; + print '0x', $pc; + # We have a list of function names, which include the inlined + # calls. They are separated (and terminated) by --, which is + # illegal in function names. + for (my $j = 2; $j <= $#{$name}; $j += 3) { + print $sep, $name->[$j]; + $sep = '--'; + } + print "\n"; + } + print '---', "\n"; + + $PROFILE_PAGE =~ m,[^/]+$,; # matches everything after the last slash + my $profile_marker = $&; + print '--- ', $profile_marker, "\n"; + if (defined($main::collected_profile)) { + # if used with remote fetch, simply dump the collected profile to output. + open(SRC, "<$main::collected_profile"); + while (<SRC>) { + print $_; + } + close(SRC); + } else { + # dump a cpu-format profile to standard out + PrintProfileData($profile); + } +} + +# Print text output +sub PrintText { + my $symbols = shift; + my $flat = shift; + my $cumulative = shift; + my $total = shift; + my $line_limit = shift; + + # Which profile to sort by? + my $s = $main::opt_cum ? $cumulative : $flat; + + my $running_sum = 0; + my $lines = 0; + foreach my $k (sort { GetEntry($s, $b) <=> GetEntry($s, $a) || $a cmp $b } + keys(%{$cumulative})) { + my $f = GetEntry($flat, $k); + my $c = GetEntry($cumulative, $k); + $running_sum += $f; + + my $sym = $k; + if (exists($symbols->{$k})) { + $sym = $symbols->{$k}->[0] . " " . $symbols->{$k}->[1]; + if ($main::opt_addresses) { + $sym = $k . " " . $sym; + } + } + + if ($f != 0 || $c != 0) { + printf("%8s %6s %6s %8s %6s %s\n", + Unparse($f), + Percent($f, $total), + Percent($running_sum, $total), + Unparse($c), + Percent($c, $total), + $sym); + } + $lines++; + last if ($line_limit >= 0 && $lines > $line_limit); + } +} + +# Print the call graph in a way that's suiteable for callgrind. +sub PrintCallgrind { + my $calls = shift; + printf("events: Hits\n\n"); + foreach my $call ( map { $_->[0] } + sort { $a->[1] cmp $b ->[1] || + $a->[2] <=> $b->[2] } + map { /([^:]+):(\d+):([^ ]+)( -> ([^:]+):(\d+):(.+))?/; + [$_, $1, $2] } + keys %$calls ) { + my $count = int($calls->{$call}); + $call =~ /([^:]+):(\d+):([^ ]+)( -> ([^:]+):(\d+):(.+))?/; + my ( $caller_file, $caller_line, $caller_function, + $callee_file, $callee_line, $callee_function ) = + ( $1, $2, $3, $5, $6, $7 ); + printf("fl=$caller_file\nfn=$caller_function\n"); + if (defined $6) { + printf("cfl=$callee_file\n"); + printf("cfn=$callee_function\n"); + printf("calls=$count $callee_line\n"); + } + printf("$caller_line $count\n\n"); + } +} + +# Print disassembly for all all routines that match $main::opt_disasm +sub PrintDisassembly { + my $libs = shift; + my $flat = shift; + my $cumulative = shift; + my $disasm_opts = shift; + my $total = shift; + + foreach my $lib (@{$libs}) { + my $symbol_table = GetProcedureBoundaries($lib->[0], $disasm_opts); + my $offset = AddressSub($lib->[1], $lib->[3]); + foreach my $routine (sort ByName keys(%{$symbol_table})) { + my $start_addr = $symbol_table->{$routine}->[0]; + my $end_addr = $symbol_table->{$routine}->[1]; + # See if there are any samples in this routine + my $length = hex(AddressSub($end_addr, $start_addr)); + my $addr = AddressAdd($start_addr, $offset); + for (my $i = 0; $i < $length; $i++) { + if (defined($cumulative->{$addr})) { + PrintDisassembledFunction($lib->[0], $offset, + $routine, $flat, $cumulative, + $start_addr, $end_addr, $total); + last; + } + $addr = AddressInc($addr); + } + } + } +} + +# Return reference to array of tuples of the form: +# [start_address, filename, linenumber, instruction, limit_address] +# E.g., +# ["0x806c43d", "/foo/bar.cc", 131, "ret", "0x806c440"] +sub Disassemble { + my $prog = shift; + my $offset = shift; + my $start_addr = shift; + my $end_addr = shift; + + my $objdump = $obj_tool_map{"objdump"}; + my $cmd = sprintf("$objdump -C -d -l --no-show-raw-insn " . + "--start-address=0x$start_addr " . + "--stop-address=0x$end_addr $prog"); + open(OBJDUMP, "$cmd |") || error("$objdump: $!\n"); + my @result = (); + my $filename = ""; + my $linenumber = -1; + my $last = ["", "", "", ""]; + while (<OBJDUMP>) { + s/\r//g; # turn windows-looking lines into unix-looking lines + chop; + if (m|\s*([^:\s]+):(\d+)\s*$|) { + # Location line of the form: + # <filename>:<linenumber> + $filename = $1; + $linenumber = $2; + } elsif (m/^ +([0-9a-f]+):\s*(.*)/) { + # Disassembly line -- zero-extend address to full length + my $addr = HexExtend($1); + my $k = AddressAdd($addr, $offset); + $last->[4] = $k; # Store ending address for previous instruction + $last = [$k, $filename, $linenumber, $2, $end_addr]; + push(@result, $last); + } + } + close(OBJDUMP); + return @result; +} + +# The input file should contain lines of the form /proc/maps-like +# output (same format as expected from the profiles) or that looks +# like hex addresses (like "0xDEADBEEF"). We will parse all +# /proc/maps output, and for all the hex addresses, we will output +# "short" symbol names, one per line, in the same order as the input. +sub PrintSymbols { + my $maps_and_symbols_file = shift; + + # ParseLibraries expects pcs to be in a set. Fine by us... + my @pclist = (); # pcs in sorted order + my $pcs = {}; + my $map = ""; + foreach my $line (<$maps_and_symbols_file>) { + $line =~ s/\r//g; # turn windows-looking lines into unix-looking lines + if ($line =~ /\b(0x[0-9a-f]+)\b/i) { + push(@pclist, HexExtend($1)); + $pcs->{$pclist[-1]} = 1; + } else { + $map .= $line; + } + } + + my $libs = ParseLibraries($main::prog, $map, $pcs); + my $symbols = ExtractSymbols($libs, $pcs); + + foreach my $pc (@pclist) { + # ->[0] is the shortname, ->[2] is the full name + print(($symbols->{$pc}->[0] || "??") . "\n"); + } +} + + +# For sorting functions by name +sub ByName { + return ShortFunctionName($a) cmp ShortFunctionName($b); +} + +# Print source-listing for all all routines that match $main::opt_list +sub PrintListing { + my $libs = shift; + my $flat = shift; + my $cumulative = shift; + my $list_opts = shift; + + foreach my $lib (@{$libs}) { + my $symbol_table = GetProcedureBoundaries($lib->[0], $list_opts); + my $offset = AddressSub($lib->[1], $lib->[3]); + foreach my $routine (sort ByName keys(%{$symbol_table})) { + # Print if there are any samples in this routine + my $start_addr = $symbol_table->{$routine}->[0]; + my $end_addr = $symbol_table->{$routine}->[1]; + my $length = hex(AddressSub($end_addr, $start_addr)); + my $addr = AddressAdd($start_addr, $offset); + for (my $i = 0; $i < $length; $i++) { + if (defined($cumulative->{$addr})) { + PrintSource($lib->[0], $offset, + $routine, $flat, $cumulative, + $start_addr, $end_addr); + last; + } + $addr = AddressInc($addr); + } + } + } +} + +# Returns the indentation of the line, if it has any non-whitespace +# characters. Otherwise, returns -1. +sub Indentation { + my $line = shift; + if (m/^(\s*)\S/) { + return length($1); + } else { + return -1; + } +} + +# Print source-listing for one routine +sub PrintSource { + my $prog = shift; + my $offset = shift; + my $routine = shift; + my $flat = shift; + my $cumulative = shift; + my $start_addr = shift; + my $end_addr = shift; + + # Disassemble all instructions (just to get line numbers) + my @instructions = Disassemble($prog, $offset, $start_addr, $end_addr); + + # Hack 1: assume that the first source file encountered in the + # disassembly contains the routine + my $filename = undef; + for (my $i = 0; $i <= $#instructions; $i++) { + if ($instructions[$i]->[2] >= 0) { + $filename = $instructions[$i]->[1]; + last; + } + } + if (!defined($filename)) { + print STDERR "no filename found in $routine\n"; + return; + } + + # Hack 2: assume that the largest line number from $filename is the + # end of the procedure. This is typically safe since if P1 contains + # an inlined call to P2, then P2 usually occurs earlier in the + # source file. If this does not work, we might have to compute a + # density profile or just print all regions we find. + my $lastline = 0; + for (my $i = 0; $i <= $#instructions; $i++) { + my $f = $instructions[$i]->[1]; + my $l = $instructions[$i]->[2]; + if (($f eq $filename) && ($l > $lastline)) { + $lastline = $l; + } + } + + # Hack 3: assume the first source location from "filename" is the start of + # the source code. + my $firstline = 1; + for (my $i = 0; $i <= $#instructions; $i++) { + if ($instructions[$i]->[1] eq $filename) { + $firstline = $instructions[$i]->[2]; + last; + } + } + + # Hack 4: Extend last line forward until its indentation is less than + # the indentation we saw on $firstline + my $oldlastline = $lastline; + { + if (!open(FILE, "<$filename")) { + print STDERR "$filename: $!\n"; + return; + } + my $l = 0; + my $first_indentation = -1; + while (<FILE>) { + s/\r//g; # turn windows-looking lines into unix-looking lines + $l++; + my $indent = Indentation($_); + if ($l >= $firstline) { + if ($first_indentation < 0 && $indent >= 0) { + $first_indentation = $indent; + last if ($first_indentation == 0); + } + } + if ($l >= $lastline && $indent >= 0) { + if ($indent >= $first_indentation) { + $lastline = $l+1; + } else { + last; + } + } + } + close(FILE); + } + + # Assign all samples to the range $firstline,$lastline, + # Hack 4: If an instruction does not occur in the range, its samples + # are moved to the next instruction that occurs in the range. + my $samples1 = {}; + my $samples2 = {}; + my $running1 = 0; # Unassigned flat counts + my $running2 = 0; # Unassigned cumulative counts + my $total1 = 0; # Total flat counts + my $total2 = 0; # Total cumulative counts + foreach my $e (@instructions) { + # Add up counts for all address that fall inside this instruction + my $c1 = 0; + my $c2 = 0; + for (my $a = $e->[0]; $a lt $e->[4]; $a = AddressInc($a)) { + $c1 += GetEntry($flat, $a); + $c2 += GetEntry($cumulative, $a); + } + $running1 += $c1; + $running2 += $c2; + $total1 += $c1; + $total2 += $c2; + my $file = $e->[1]; + my $line = $e->[2]; + if (($file eq $filename) && + ($line >= $firstline) && + ($line <= $lastline)) { + # Assign all accumulated samples to this line + AddEntry($samples1, $line, $running1); + AddEntry($samples2, $line, $running2); + $running1 = 0; + $running2 = 0; + } + } + + # Assign any leftover samples to $lastline + AddEntry($samples1, $lastline, $running1); + AddEntry($samples2, $lastline, $running2); + + printf("ROUTINE ====================== %s in %s\n" . + "%6s %6s Total %s (flat / cumulative)\n", + ShortFunctionName($routine), + $filename, + Units(), + Unparse($total1), + Unparse($total2)); + if (!open(FILE, "<$filename")) { + print STDERR "$filename: $!\n"; + return; + } + my $l = 0; + while (<FILE>) { + s/\r//g; # turn windows-looking lines into unix-looking lines + $l++; + if ($l >= $firstline - 5 && + (($l <= $oldlastline + 5) || ($l <= $lastline))) { + chop; + my $text = $_; + if ($l == $firstline) { printf("---\n"); } + printf("%6s %6s %4d: %s\n", + UnparseAlt(GetEntry($samples1, $l)), + UnparseAlt(GetEntry($samples2, $l)), + $l, + $text); + if ($l == $lastline) { printf("---\n"); } + }; + } + close(FILE); +} + +# Return the source line for the specified file/linenumber. +# Returns undef if not found. +sub SourceLine { + my $file = shift; + my $line = shift; + + # Look in cache + if (!defined($main::source_cache{$file})) { + if (100 < scalar keys(%main::source_cache)) { + # Clear the cache when it gets too big + $main::source_cache = (); + } + + # Read all lines from the file + if (!open(FILE, "<$file")) { + print STDERR "$file: $!\n"; + $main::source_cache{$file} = []; # Cache the negative result + return undef; + } + my $lines = []; + push(@{$lines}, ""); # So we can use 1-based line numbers as indices + while (<FILE>) { + push(@{$lines}, $_); + } + close(FILE); + + # Save the lines in the cache + $main::source_cache{$file} = $lines; + } + + my $lines = $main::source_cache{$file}; + if (($line < 0) || ($line > $#{$lines})) { + return undef; + } else { + return $lines->[$line]; + } +} + +# Print disassembly for one routine with interspersed source if available +sub PrintDisassembledFunction { + my $prog = shift; + my $offset = shift; + my $routine = shift; + my $flat = shift; + my $cumulative = shift; + my $start_addr = shift; + my $end_addr = shift; + my $total = shift; + + # Disassemble all instructions + my @instructions = Disassemble($prog, $offset, $start_addr, $end_addr); + + # Make array of counts per instruction + my @flat_count = (); + my @cum_count = (); + my $flat_total = 0; + my $cum_total = 0; + foreach my $e (@instructions) { + # Add up counts for all address that fall inside this instruction + my $c1 = 0; + my $c2 = 0; + for (my $a = $e->[0]; $a lt $e->[4]; $a = AddressInc($a)) { + $c1 += GetEntry($flat, $a); + $c2 += GetEntry($cumulative, $a); + } + push(@flat_count, $c1); + push(@cum_count, $c2); + $flat_total += $c1; + $cum_total += $c2; + } + + # Print header with total counts + printf("ROUTINE ====================== %s\n" . + "%6s %6s %s (flat, cumulative) %.1f%% of total\n", + ShortFunctionName($routine), + Unparse($flat_total), + Unparse($cum_total), + Units(), + ($cum_total * 100.0) / $total); + + # Process instructions in order + my $current_file = ""; + for (my $i = 0; $i <= $#instructions; ) { + my $e = $instructions[$i]; + + # Print the new file name whenever we switch files + if ($e->[1] ne $current_file) { + $current_file = $e->[1]; + my $fname = $current_file; + $fname =~ s|^\./||; # Trim leading "./" + + # Shorten long file names + if (length($fname) >= 58) { + $fname = "..." . substr($fname, -55); + } + printf("-------------------- %s\n", $fname); + } + + # TODO: Compute range of lines to print together to deal with + # small reorderings. + my $first_line = $e->[2]; + my $last_line = $first_line; + my %flat_sum = (); + my %cum_sum = (); + for (my $l = $first_line; $l <= $last_line; $l++) { + $flat_sum{$l} = 0; + $cum_sum{$l} = 0; + } + + # Find run of instructions for this range of source lines + my $first_inst = $i; + while (($i <= $#instructions) && + ($instructions[$i]->[2] >= $first_line) && + ($instructions[$i]->[2] <= $last_line)) { + $e = $instructions[$i]; + $flat_sum{$e->[2]} += $flat_count[$i]; + $cum_sum{$e->[2]} += $cum_count[$i]; + $i++; + } + my $last_inst = $i - 1; + + # Print source lines + for (my $l = $first_line; $l <= $last_line; $l++) { + my $line = SourceLine($current_file, $l); + if (!defined($line)) { + $line = "?\n"; + next; + } else { + $line =~ s/^\s+//; + } + printf("%6s %6s %5d: %s", + UnparseAlt($flat_sum{$l}), + UnparseAlt($cum_sum{$l}), + $l, + $line); + } + + # Print disassembly + for (my $x = $first_inst; $x <= $last_inst; $x++) { + my $e = $instructions[$x]; + my $address = $e->[0]; + $address = AddressSub($address, $offset); # Make relative to section + $address =~ s/^0x//; + $address =~ s/^0*//; + + # Trim symbols + my $d = $e->[3]; + while ($d =~ s/\([^()%]*\)(\s*const)?//g) { } # Argument types, not (%rax) + while ($d =~ s/(\w+)<[^<>]*>/$1/g) { } # Remove template arguments + + printf("%6s %6s %8s: %6s\n", + UnparseAlt($flat_count[$x]), + UnparseAlt($cum_count[$x]), + $address, + $d); + } + } +} + +# Print DOT graph +sub PrintDot { + my $prog = shift; + my $symbols = shift; + my $raw = shift; + my $flat = shift; + my $cumulative = shift; + my $overall_total = shift; + + # Get total + my $local_total = TotalProfile($flat); + my $nodelimit = int($main::opt_nodefraction * $local_total); + my $edgelimit = int($main::opt_edgefraction * $local_total); + my $nodecount = $main::opt_nodecount; + + # Find nodes to include + my @list = (sort { abs(GetEntry($cumulative, $b)) <=> + abs(GetEntry($cumulative, $a)) + || $a cmp $b } + keys(%{$cumulative})); + my $last = $nodecount - 1; + if ($last > $#list) { + $last = $#list; + } + while (($last >= 0) && + (abs(GetEntry($cumulative, $list[$last])) <= $nodelimit)) { + $last--; + } + if ($last < 0) { + print STDERR "No nodes to print\n"; + cleanup(); + return 0; + } + + if ($nodelimit > 0 || $edgelimit > 0) { + printf STDERR ("Dropping nodes with <= %s %s; edges with <= %s abs(%s)\n", + Unparse($nodelimit), Units(), + Unparse($edgelimit), Units()); + } + + # Open DOT output file + my $output; + if ($main::opt_gv) { + $output = "| $DOT -Tps2 >" . PsTempName($main::next_tmpfile); + } elsif ($main::opt_ps) { + $output = "| $DOT -Tps2"; + } elsif ($main::opt_pdf) { + $output = "| $DOT -Tps2 | $PS2PDF - -"; + } elsif ($main::opt_gif) { + $output = "| $DOT -Tgif"; + } else { + $output = ">&STDOUT"; + } + open(DOT, $output) || error("$output: $!\n"); + + # Title + printf DOT ("digraph \"%s; %s %s\" {\n", + $prog, + Unparse($overall_total), + Units()); + if ($main::opt_pdf) { + # The output is more printable if we set the page size for dot. + printf DOT ("size=\"8,11\"\n"); + } + printf DOT ("node [width=0.375,height=0.25];\n"); + + # Print legend + printf DOT ("Legend [shape=box,fontsize=24,shape=plaintext," . + "label=\"%s\\l%s\\l%s\\l%s\\l%s\\l\"];\n", + $prog, + sprintf("Total %s: %s", Units(), Unparse($overall_total)), + sprintf("Focusing on: %s", Unparse($local_total)), + sprintf("Dropped nodes with <= %s abs(%s)", + Unparse($nodelimit), Units()), + sprintf("Dropped edges with <= %s %s", + Unparse($edgelimit), Units()) + ); + + # Print nodes + my %node = (); + my $nextnode = 1; + foreach my $a (@list[0..$last]) { + # Pick font size + my $f = GetEntry($flat, $a); + my $c = GetEntry($cumulative, $a); + + my $fs = 8; + if ($local_total > 0) { + $fs = 8 + (50.0 * sqrt(abs($f * 1.0 / $local_total))); + } + + $node{$a} = $nextnode++; + my $sym = $a; + $sym =~ s/\s+/\\n/g; + $sym =~ s/::/\\n/g; + + # Extra cumulative info to print for non-leaves + my $extra = ""; + if ($f != $c) { + $extra = sprintf("\\rof %s (%s)", + Unparse($c), + Percent($c, $overall_total)); + } + my $style = ""; + if ($main::opt_heapcheck) { + if ($f > 0) { + # make leak-causing nodes more visible (add a background) + $style = ",style=filled,fillcolor=gray" + } elsif ($f < 0) { + # make anti-leak-causing nodes (which almost never occur) + # stand out as well (triple border) + $style = ",peripheries=3" + } + } + + printf DOT ("N%d [label=\"%s\\n%s (%s)%s\\r" . + "\",shape=box,fontsize=%.1f%s];\n", + $node{$a}, + $sym, + Unparse($f), + Percent($f, $overall_total), + $extra, + $fs, + $style, + ); + } + + # Get edges and counts per edge + my %edge = (); + my $n; + foreach my $k (keys(%{$raw})) { + # TODO: omit low %age edges + $n = $raw->{$k}; + my @translated = TranslateStack($symbols, $k); + for (my $i = 1; $i <= $#translated; $i++) { + my $src = $translated[$i]; + my $dst = $translated[$i-1]; + #next if ($src eq $dst); # Avoid self-edges? + if (exists($node{$src}) && exists($node{$dst})) { + my $edge_label = "$src\001$dst"; + if (!exists($edge{$edge_label})) { + $edge{$edge_label} = 0; + } + $edge{$edge_label} += $n; + } + } + } + + # Print edges + foreach my $e (keys(%edge)) { + my @x = split(/\001/, $e); + $n = $edge{$e}; + + if (abs($n) > $edgelimit) { + # Compute line width based on edge count + my $fraction = abs($local_total ? (3 * ($n / $local_total)) : 0); + if ($fraction > 1) { $fraction = 1; } + my $w = $fraction * 2; + #if ($w < 1) { $w = 1; } + + # Dot sometimes segfaults if given edge weights that are too large, so + # we cap the weights at a large value + my $edgeweight = abs($n) ** 0.7; + if ($edgeweight > 100000) { $edgeweight = 100000; } + $edgeweight = int($edgeweight); + + my $style = sprintf("setlinewidth(%f)", $w); + if ($x[1] =~ m/\(inline\)/) { + $style .= ",dashed"; + } + + # Use a slightly squashed function of the edge count as the weight + printf DOT ("N%s -> N%s [label=%s, weight=%d, style=\"%s\"];\n", + $node{$x[0]}, + $node{$x[1]}, + Unparse($n), + $edgeweight, + $style); + } + } + + print DOT ("}\n"); + + close(DOT); + return 1; +} + +# Translate a stack of addresses into a stack of symbols +sub TranslateStack { + my $symbols = shift; + my $k = shift; + + my @addrs = split(/\n/, $k); + my @result = (); + for (my $i = 0; $i <= $#addrs; $i++) { + my $a = $addrs[$i]; + + # Skip large addresses since they sometimes show up as fake entries on RH9 + if (length($a) > 8 && $a gt "7fffffffffffffff") { + next; + } + + if ($main::opt_disasm || $main::opt_list) { + # We want just the address for the key + push(@result, $a); + next; + } + + my $symlist = $symbols->{$a}; + if (!defined($symlist)) { + $symlist = [$a, "", $a]; + } + + # We can have a sequence of symbols for a particular entry + # (more than one symbol in the case of inlining). Callers + # come before callees in symlist, so walk backwards since + # the translated stack should contain callees before callers. + for (my $j = $#{$symlist}; $j >= 2; $j -= 3) { + my $func = $symlist->[$j-2]; + my $fileline = $symlist->[$j-1]; + my $fullfunc = $symlist->[$j]; + if ($j > 2) { + $func = "$func (inline)"; + } + if ($main::opt_addresses) { + push(@result, "$a $func $fileline"); + } elsif ($main::opt_lines) { + if ($func eq '??' && $fileline eq '??:0') { + push(@result, "$a"); + } else { + push(@result, "$func $fileline"); + } + } elsif ($main::opt_functions) { + if ($func eq '??') { + push(@result, "$a"); + } else { + push(@result, $func); + } + } elsif ($main::opt_files) { + if ($fileline eq '??:0' || $fileline eq '') { + push(@result, "$a"); + } else { + my $f = $fileline; + $f =~ s/:\d+$//; + push(@result, $f); + } + } else { + push(@result, $a); + last; # Do not print inlined info + } + } + } + + # print join(",", @addrs), " => ", join(",", @result), "\n"; + return @result; +} + +# Generate percent string for a number and a total +sub Percent { + my $num = shift; + my $tot = shift; + if ($tot != 0) { + return sprintf("%.1f%%", $num * 100.0 / $tot); + } else { + return ($num == 0) ? "nan" : (($num > 0) ? "+inf" : "-inf"); + } +} + +# Generate pretty-printed form of number +sub Unparse { + my $num = shift; + if ($main::profile_type eq 'heap' || $main::profile_type eq 'growth') { + if ($main::opt_inuse_objects || $main::opt_alloc_objects) { + return sprintf("%d", $num); + } else { + if ($main::opt_show_bytes) { + return sprintf("%d", $num); + } else { + return sprintf("%.1f", $num / 1048576.0); + } + } + } elsif ($main::profile_type eq 'contention' && !$main::opt_contentions) { + return sprintf("%.3f", $num / 1e9); # Convert nanoseconds to seconds + } else { + return sprintf("%d", $num); + } +} + +# Alternate pretty-printed form: 0 maps to "." +sub UnparseAlt { + my $num = shift; + if ($num == 0) { + return "."; + } else { + return Unparse($num); + } +} + +# Return output units +sub Units { + if ($main::profile_type eq 'heap' || $main::profile_type eq 'growth') { + if ($main::opt_inuse_objects || $main::opt_alloc_objects) { + return "objects"; + } else { + if ($main::opt_show_bytes) { + return "B"; + } else { + return "MB"; + } + } + } elsif ($main::profile_type eq 'contention' && !$main::opt_contentions) { + return "seconds"; + } else { + return "samples"; + } +} + +##### Profile manipulation code ##### + +# Generate flattened profile: +# If count is charged to stack [a,b,c,d], in generated profile, +# it will be charged to [a] +sub FlatProfile { + my $profile = shift; + my $result = {}; + foreach my $k (keys(%{$profile})) { + my $count = $profile->{$k}; + my @addrs = split(/\n/, $k); + if ($#addrs >= 0) { + AddEntry($result, $addrs[0], $count); + } + } + return $result; +} + +# Generate cumulative profile: +# If count is charged to stack [a,b,c,d], in generated profile, +# it will be charged to [a], [b], [c], [d] +sub CumulativeProfile { + my $profile = shift; + my $result = {}; + foreach my $k (keys(%{$profile})) { + my $count = $profile->{$k}; + my @addrs = split(/\n/, $k); + foreach my $a (@addrs) { + AddEntry($result, $a, $count); + } + } + return $result; +} + +# If the second-youngest PC on the stack is always the same, returns +# that pc. Otherwise, returns undef. +sub IsSecondPcAlwaysTheSame { + my $profile = shift; + + my $second_pc = undef; + foreach my $k (keys(%{$profile})) { + my @addrs = split(/\n/, $k); + if ($#addrs < 1) { + return undef; + } + if (not defined $second_pc) { + $second_pc = $addrs[1]; + } else { + if ($second_pc ne $addrs[1]) { + return undef; + } + } + } + return $second_pc; +} + +sub ExtractSymbolLocation { + my $symbols = shift; + my $address = shift; + # 'addr2line' outputs "??:0" for unknown locations; we do the + # same to be consistent. + my $location = "??:0:unknown"; + if (exists $symbols->{$address}) { + my $file = $symbols->{$address}->[1]; + if ($file eq "?") { + $file = "??:0" + } + $location = $file . ":" . $symbols->{$address}->[0]; + } + return $location; +} + +# Extracts a graph of calls. +sub ExtractCalls { + my $symbols = shift; + my $profile = shift; + + my $calls = {}; + while( my ($stack_trace, $count) = each %$profile ) { + my @address = split(/\n/, $stack_trace); + my $destination = ExtractSymbolLocation($symbols, $address[0]); + AddEntry($calls, $destination, $count); + for (my $i = 1; $i <= $#address; $i++) { + my $source = ExtractSymbolLocation($symbols, $address[$i]); + my $call = "$source -> $destination"; + AddEntry($calls, $call, $count); + $destination = $source; + } + } + + return $calls; +} + +sub RemoveUninterestingFrames { + my $symbols = shift; + my $profile = shift; + + # List of function names to skip + my %skip = (); + my $skip_regexp = 'NOMATCH'; + if ($main::profile_type eq 'heap' || $main::profile_type eq 'growth') { + foreach my $name ('calloc', + 'cfree', + 'malloc', + 'free', + 'memalign', + 'posix_memalign', + 'pvalloc', + 'valloc', + 'realloc', + 'tc_calloc', + 'tc_cfree', + 'tc_malloc', + 'tc_free', + 'tc_memalign', + 'tc_posix_memalign', + 'tc_pvalloc', + 'tc_valloc', + 'tc_realloc', + 'tc_new', + 'tc_delete', + 'tc_newarray', + 'tc_deletearray', + 'tc_new_nothrow', + 'tc_newarray_nothrow', + 'do_malloc', + '::do_malloc', # new name -- got moved to an unnamed ns + 'DoSampledAllocation', + 'simple_alloc::allocate', + '__malloc_alloc_template::allocate', + '__builtin_delete', + '__builtin_new', + '__builtin_vec_delete', + '__builtin_vec_new', + 'operator new', + 'operator new[]') { + $skip{$name} = 1; + $skip{"_" . $name} = 1; # Mach (OS X) adds a _ prefix to everything + } + # TODO: Remove TCMalloc once everything has been + # moved into the tcmalloc:: namespace and we have flushed + # old code out of the system. + $skip_regexp = "TCMalloc|^tcmalloc::"; + } elsif ($main::profile_type eq 'contention') { + foreach my $vname ('Mutex::Unlock', 'Mutex::UnlockSlow') { + $skip{$vname} = 1; + } + } elsif ($main::profile_type eq 'cpu') { + # Drop signal handlers used for CPU profile collection + # TODO(dpeng): this should not be necessary; it's taken + # care of by the general 2nd-pc mechanism below. + foreach my $name ('ProfileData::Add', # historical + 'ProfileData::prof_handler', # historical + 'CpuProfiler::prof_handler', + '__FRAME_END__', + '__pthread_sighandler', + '__restore') { + $skip{$name} = 1; + } + } else { + # Nothing skipped for unknown types + } + + if ($main::profile_type eq 'cpu') { + # If all the second-youngest program counters are the same, + # this STRONGLY suggests that it is an artifact of measurement, + # i.e., stack frames pushed by the CPU profiler signal handler. + # Hence, we delete them. + # (The topmost PC is read from the signal structure, not from + # the stack, so it does not get involved.) + while (my $second_pc = IsSecondPcAlwaysTheSame($profile)) { + my $result = {}; + my $func = ''; + if (exists($symbols->{$second_pc})) { + $second_pc = $symbols->{$second_pc}->[0]; + } + print STDERR "Removing $second_pc from all stack traces.\n"; + foreach my $k (keys(%{$profile})) { + my $count = $profile->{$k}; + my @addrs = split(/\n/, $k); + splice @addrs, 1, 1; + my $reduced_path = join("\n", @addrs); + AddEntry($result, $reduced_path, $count); + } + $profile = $result; + } + } + + my $result = {}; + foreach my $k (keys(%{$profile})) { + my $count = $profile->{$k}; + my @addrs = split(/\n/, $k); + my @path = (); + foreach my $a (@addrs) { + if (exists($symbols->{$a})) { + my $func = $symbols->{$a}->[0]; + if ($skip{$func} || ($func =~ m/$skip_regexp/)) { + next; + } + } + push(@path, $a); + } + my $reduced_path = join("\n", @path); + AddEntry($result, $reduced_path, $count); + } + return $result; +} + +# Reduce profile to granularity given by user +sub ReduceProfile { + my $symbols = shift; + my $profile = shift; + my $result = {}; + foreach my $k (keys(%{$profile})) { + my $count = $profile->{$k}; + my @translated = TranslateStack($symbols, $k); + my @path = (); + my %seen = (); + $seen{''} = 1; # So that empty keys are skipped + foreach my $e (@translated) { + # To avoid double-counting due to recursion, skip a stack-trace + # entry if it has already been seen + if (!$seen{$e}) { + $seen{$e} = 1; + push(@path, $e); + } + } + my $reduced_path = join("\n", @path); + AddEntry($result, $reduced_path, $count); + } + return $result; +} + +# Does the specified symbol array match the regexp? +sub SymbolMatches { + my $sym = shift; + my $re = shift; + if (defined($sym)) { + for (my $i = 0; $i < $#{$sym}; $i += 3) { + if ($sym->[$i] =~ m/$re/ || $sym->[$i+1] =~ m/$re/) { + return 1; + } + } + } + return 0; +} + +# Focus only on paths involving specified regexps +sub FocusProfile { + my $symbols = shift; + my $profile = shift; + my $focus = shift; + my $result = {}; + foreach my $k (keys(%{$profile})) { + my $count = $profile->{$k}; + my @addrs = split(/\n/, $k); + foreach my $a (@addrs) { + # Reply if it matches either the address/shortname/fileline + if (($a =~ m/$focus/) || SymbolMatches($symbols->{$a}, $focus)) { + AddEntry($result, $k, $count); + last; + } + } + } + return $result; +} + +# Focus only on paths not involving specified regexps +sub IgnoreProfile { + my $symbols = shift; + my $profile = shift; + my $ignore = shift; + my $result = {}; + foreach my $k (keys(%{$profile})) { + my $count = $profile->{$k}; + my @addrs = split(/\n/, $k); + my $matched = 0; + foreach my $a (@addrs) { + # Reply if it matches either the address/shortname/fileline + if (($a =~ m/$ignore/) || SymbolMatches($symbols->{$a}, $ignore)) { + $matched = 1; + last; + } + } + if (!$matched) { + AddEntry($result, $k, $count); + } + } + return $result; +} + +# Get total count in profile +sub TotalProfile { + my $profile = shift; + my $result = 0; + foreach my $k (keys(%{$profile})) { + $result += $profile->{$k}; + } + return $result; +} + +# Add A to B +sub AddProfile { + my $A = shift; + my $B = shift; + + my $R = {}; + # add all keys in A + foreach my $k (keys(%{$A})) { + my $v = $A->{$k}; + AddEntry($R, $k, $v); + } + # add all keys in B + foreach my $k (keys(%{$B})) { + my $v = $B->{$k}; + AddEntry($R, $k, $v); + } + return $R; +} + +# Merges symbol maps +sub MergeSymbols { + my $A = shift; + my $B = shift; + + my $R = {}; + foreach my $k (keys(%{$A})) { + $R->{$k} = $A->{$k}; + } + if (defined($B)) { + foreach my $k (keys(%{$B})) { + $R->{$k} = $B->{$k}; + } + } + return $R; +} + + +# Add A to B +sub AddPcs { + my $A = shift; + my $B = shift; + + my $R = {}; + # add all keys in A + foreach my $k (keys(%{$A})) { + $R->{$k} = 1 + } + # add all keys in B + foreach my $k (keys(%{$B})) { + $R->{$k} = 1 + } + return $R; +} + +# Subtract B from A +sub SubtractProfile { + my $A = shift; + my $B = shift; + + my $R = {}; + foreach my $k (keys(%{$A})) { + my $v = $A->{$k} - GetEntry($B, $k); + if ($v < 0 && $main::opt_drop_negative) { + $v = 0; + } + AddEntry($R, $k, $v); + } + if (!$main::opt_drop_negative) { + # Take care of when subtracted profile has more entries + foreach my $k (keys(%{$B})) { + if (!exists($A->{$k})) { + AddEntry($R, $k, 0 - $B->{$k}); + } + } + } + return $R; +} + +# Get entry from profile; zero if not present +sub GetEntry { + my $profile = shift; + my $k = shift; + if (exists($profile->{$k})) { + return $profile->{$k}; + } else { + return 0; + } +} + +# Add entry to specified profile +sub AddEntry { + my $profile = shift; + my $k = shift; + my $n = shift; + if (!exists($profile->{$k})) { + $profile->{$k} = 0; + } + $profile->{$k} += $n; +} + +# Add a stack of entries to specified profile, and add them to the $pcs +# list. +sub AddEntries { + my $profile = shift; + my $pcs = shift; + my $stack = shift; + my $count = shift; + my @k = (); + + foreach my $e (split(/\s+/, $stack)) { + my $pc = HexExtend($e); + $pcs->{$pc} = 1; + push @k, $pc; + } + AddEntry($profile, (join "\n", @k), $count); +} + +sub IsSymbolizedProfileFile { + my $file_name = shift; + + if (!(-e $file_name) || !(-r $file_name)) { + return 0; + } + + $SYMBOL_PAGE =~ m,[^/]+$,; # matches everything after the last slash + my $symbol_marker = $&; + # Check if the file contains a symbol-section marker. + open(TFILE, "<$file_name"); + my @lines = <TFILE>; + my $result = grep(/^--- *$symbol_marker/, @lines); + close(TFILE); + return $result > 0; +} + +##### Code to profile a server dynamically ##### + +sub CheckSymbolPage { + my $url = SymbolPageURL(); + open(SYMBOL, "$WGET $WGET_FLAGS -qO- '$url' |"); + my $line = <SYMBOL>; + $line =~ s/\r//g; # turn windows-looking lines into unix-looking lines + close(SYMBOL); + unless (defined($line)) { + error("$url doesn't exist\n"); + } + + if ($line =~ /^num_symbols:\s+(\d+)$/) { + if ($1 == 0) { + error("Stripped binary. No symbols available.\n"); + } + } else { + error("Failed to get the number of symbols from $url\n"); + } +} + +sub IsProfileURL { + my $profile_name = shift; + my ($host, $port, $path) = ParseProfileURL($profile_name); + return defined($host) and defined($port) and defined($path); +} + +sub ParseProfileURL { + my $profile_name = shift; + if (defined($profile_name) && + $profile_name =~ m,^(http://|)([^/:]+):(\d+)(|\@\d+)(|/|.*($PROFILE_PAGE|$PMUPROFILE_PAGE|$HEAP_PAGE|$GROWTH_PAGE|$CONTENTION_PAGE|$WALL_PAGE|$FILTEREDPROFILE_PAGE))$,o) { + # $6 is $PROFILE_PAGE/$HEAP_PAGE/etc. $5 is *everything* after + # the hostname, as long as that everything is the empty string, + # a slash, or something ending in $PROFILE_PAGE/$HEAP_PAGE/etc. + # So "$6 || $5" is $PROFILE_PAGE/etc if there, or else it's "/" or "". + return ($2, $3, $6 || $5); + } + return (); +} + +# We fetch symbols from the first profile argument. +sub SymbolPageURL { + my ($host, $port, $path) = ParseProfileURL($main::pfile_args[0]); + return "http://$host:$port$SYMBOL_PAGE"; +} + +sub FetchProgramName() { + my ($host, $port, $path) = ParseProfileURL($main::pfile_args[0]); + my $url = "http://$host:$port$PROGRAM_NAME_PAGE"; + my $command_line = "$WGET $WGET_FLAGS -qO- '$url'"; + open(CMDLINE, "$command_line |") or error($command_line); + my $cmdline = <CMDLINE>; + $cmdline =~ s/\r//g; # turn windows-looking lines into unix-looking lines + close(CMDLINE); + error("Failed to get program name from $url\n") unless defined($cmdline); + $cmdline =~ s/\x00.+//; # Remove argv[1] and latters. + $cmdline =~ s!\n!!g; # Remove LFs. + return $cmdline; +} + +# Gee, curl's -L (--location) option isn't reliable at least +# with its 7.12.3 version. Curl will forget to post data if +# there is a redirection. This function is a workaround for +# curl. Redirection happens on borg hosts. +sub ResolveRedirectionForCurl { + my $url = shift; + my $command_line = "$CURL -s --head '$url'"; + open(CMDLINE, "$command_line |") or error($command_line); + while (<CMDLINE>) { + s/\r//g; # turn windows-looking lines into unix-looking lines + if (/^Location: (.*)/) { + $url = $1; + } + } + close(CMDLINE); + return $url; +} + +# Reads a symbol map from the file handle name given as $1, returning +# the resulting symbol map. Also processes variables relating to symbols. +# Currently, the only variable processed is 'binary=<value>' which updates +# $main::prog to have the correct program name. +sub ReadSymbols { + my $in = shift; + my $map = {}; + while (<$in>) { + s/\r//g; # turn windows-looking lines into unix-looking lines + # Removes all the leading zeroes from the symbols, see comment below. + if (m/^0x0*([0-9a-f]+)\s+(.+)/) { + $map->{$1} = $2; + } elsif (m/^---/) { + last; + } elsif (m/^([a-z][^=]*)=(.*)$/ ) { + my ($variable, $value) = ($1, $2); + for ($variable, $value) { + s/^\s+//; + s/\s+$//; + } + if ($variable eq "binary") { + if ($main::prog ne $UNKNOWN_BINARY && $main::prog ne $value) { + printf STDERR ("Warning: Mismatched binary name '%s', using '%s'.\n", + $main::prog, $value); + } + $main::prog = $value; + } else { + printf STDERR ("Ignoring unknown variable in symbols list: " . + "'%s' = '%s'\n", $variable, $value); + } + } + } + return $map; +} + +# Fetches and processes symbols to prepare them for use in the profile output +# code. If the optional 'symbol_map' arg is not given, fetches symbols from +# $SYMBOL_PAGE for all PC values found in profile. Otherwise, the raw symbols +# are assumed to have already been fetched into 'symbol_map' and are simply +# extracted and processed. +sub FetchSymbols { + my $pcset = shift; + my $symbol_map = shift; + + + my %seen = (); + my @pcs = grep { !$seen{$_}++ } keys(%$pcset); # uniq + + if (!defined($symbol_map)) { + my $post_data = join("+", sort((map {"0x" . "$_"} @pcs))); + + open(POSTFILE, ">$main::tmpfile_sym"); + print POSTFILE $post_data; + close(POSTFILE); + + my $url = SymbolPageURL(); + # Here we use curl for sending data via POST since old + # wget doesn't have --post-file option. + $url = ResolveRedirectionForCurl($url); + my $command_line = "$CURL -sd '\@$main::tmpfile_sym' '$url'"; + # We use c++filt in case $SYMBOL_PAGE gives us mangled symbols. + my $cppfilt = $obj_tool_map{"c++filt"}; + open(SYMBOL, "$command_line | $cppfilt |") or error($command_line); + $symbol_map = ReadSymbols(*SYMBOL{IO}); + close(SYMBOL); + } + + my $symbols = {}; + foreach my $pc (@pcs) { + my $fullname; + # For 64 bits binaries, symbols are extracted with 8 leading zeroes. + # Then /symbol reads the long symbols in as uint64, and outputs + # the result with a "0x%08llx" format which get rid of the zeroes. + # By removing all the leading zeroes in both $pc and the symbols from + # /symbol, the symbols match and are retrievable from the map. + my $shortpc = $pc; + $shortpc =~ s/^0*//; + # Each line may have a list of names, which includes the function + # and also other functions it has inlined. They are separated + # (in PrintSymbolizedFile), by --, which is illegal in function names. + my $fullnames; + if (defined($symbol_map->{$shortpc})) { + $fullnames = $symbol_map->{$shortpc}; + } else { + $fullnames = "0x" . $pc; # Just use addresses + } + my $sym = []; + $symbols->{$pc} = $sym; + foreach my $fullname (split("--", $fullnames)) { + my $name = ShortFunctionName($fullname); + push(@{$sym}, $name, "?", $fullname); + } + } + return $symbols; +} + +sub BaseName { + my $file_name = shift; + $file_name =~ s!^.*/!!; # Remove directory name + return $file_name; +} + +sub MakeProfileBaseName { + my ($binary_name, $profile_name) = @_; + my ($host, $port, $path) = ParseProfileURL($profile_name); + my $binary_shortname = BaseName($binary_name); + return sprintf("%s.%s.%s-port%s", + $binary_shortname, $main::op_time, $host, $port); +} + +sub FetchDynamicProfile { + my $binary_name = shift; + my $profile_name = shift; + my $fetch_name_only = shift; + my $encourage_patience = shift; + + if (!IsProfileURL($profile_name)) { + return $profile_name; + } else { + my ($host, $port, $path) = ParseProfileURL($profile_name); + if ($path eq "" || $path eq "/") { + # Missing type specifier defaults to cpu-profile + $path = $PROFILE_PAGE; + } + + my $profile_file = MakeProfileBaseName($binary_name, $profile_name); + + my $url; + my $wget_timeout; + if (($path =~ m/$PROFILE_PAGE/) || ($path =~ m/$PMUPROFILE_PAGE/)) { + if ($path =~ m/$PROFILE_PAGE/) { + $url = sprintf("http://$host:$port$PROFILE_PAGE?seconds=%d", + $main::opt_seconds); + } else { + if ($profile_name =~ m/[?]/) { + $profile_name .= "&" + } else { + $profile_name .= "?" + } + $url = sprintf("http://$profile_name" . "seconds=%d", + $main::opt_seconds); + } + $wget_timeout = sprintf("--timeout=%d", + int($main::opt_seconds * 1.01 + 60)); + } else { + # For non-CPU profiles, we add a type-extension to + # the target profile file name. + my $suffix = $path; + $suffix =~ s,/,.,g; + $profile_file .= "$suffix"; + $url = "http://$host:$port$path"; + $wget_timeout = ""; + } + + my $profile_dir = $ENV{"PPROF_TMPDIR"} || ($ENV{HOME} . "/pprof"); + if (!(-d $profile_dir)) { + mkdir($profile_dir) + || die("Unable to create profile directory $profile_dir: $!\n"); + } + my $tmp_profile = "$profile_dir/.tmp.$profile_file"; + my $real_profile = "$profile_dir/$profile_file"; + + if ($fetch_name_only > 0) { + return $real_profile; + } + + my $cmd = "$WGET $WGET_FLAGS $wget_timeout -q -O $tmp_profile '$url'"; + if (($path =~ m/$PROFILE_PAGE/) || ($path =~ m/$PMUPROFILE_PAGE/)){ + print STDERR "Gathering CPU profile from $url for $main::opt_seconds seconds to\n ${real_profile}\n"; + if ($encourage_patience) { + print STDERR "Be patient...\n"; + } + } else { + print STDERR "Fetching $path profile from $host:$port to\n ${real_profile}\n"; + } + + (system($cmd) == 0) || error("Failed to get profile: $cmd: $!\n"); + (system("mv $tmp_profile $real_profile") == 0) || error("Unable to rename profile\n"); + print STDERR "Wrote profile to $real_profile\n"; + $main::collected_profile = $real_profile; + return $main::collected_profile; + } +} + +# Collect profiles in parallel +sub FetchDynamicProfiles { + my $items = scalar(@main::pfile_args); + my $levels = log($items) / log(2); + + if ($items == 1) { + $main::profile_files[0] = FetchDynamicProfile($main::prog, $main::pfile_args[0], 0, 1); + } else { + # math rounding issues + if ((2 ** $levels) < $items) { + $levels++; + } + my $count = scalar(@main::pfile_args); + for (my $i = 0; $i < $count; $i++) { + $main::profile_files[$i] = FetchDynamicProfile($main::prog, $main::pfile_args[$i], 1, 0); + } + print STDERR "Fetching $count profiles, Be patient...\n"; + FetchDynamicProfilesRecurse($levels, 0, 0); + $main::collected_profile = join(" \\\n ", @main::profile_files); + } +} + +# Recursively fork a process to get enough processes +# collecting profiles +sub FetchDynamicProfilesRecurse { + my $maxlevel = shift; + my $level = shift; + my $position = shift; + + if (my $pid = fork()) { + $position = 0 | ($position << 1); + TryCollectProfile($maxlevel, $level, $position); + wait; + } else { + $position = 1 | ($position << 1); + TryCollectProfile($maxlevel, $level, $position); + exit(0); + } +} + +# Collect a single profile +sub TryCollectProfile { + my $maxlevel = shift; + my $level = shift; + my $position = shift; + + if ($level >= ($maxlevel - 1)) { + if ($position < scalar(@main::pfile_args)) { + FetchDynamicProfile($main::prog, $main::pfile_args[$position], 0, 0); + } + } else { + FetchDynamicProfilesRecurse($maxlevel, $level+1, $position); + } +} + +##### Parsing code ##### + +# Provide a small streaming-read module to handle very large +# cpu-profile files. Stream in chunks along a sliding window. +BEGIN { + package CpuProfileStream; + + sub new { + my ($class, $file) = @_; + my $self = { file => $file, + base => 0, + stride => 512 * 1024, # must be a multiple of |long| + slots => [] + }; + bless $self, $class; + # Let unittests adjust the stride + if ($main::opt_test_stride > 0) { + $self->{stride} = $main::opt_test_stride; + } + $self->overflow(); + return $self; + } + + # Load more data when we access slots->get(X) which is not yet in memory. + sub overflow { + my ($self) = @_; + my $slots = $self->{slots}; + $self->{base} += $#$slots + 1; # skip over data we're replacing + my $str; + read($self->{file}, $str, $self->{stride}); + @$slots = unpack("L*", $str); + } + + # Access the i-th long in the file (logically), or -1 at EOF. + sub get { + my ($self, $idx) = @_; + my $slots = $self->{slots}; + while ($#$slots >= 0) { + if ($idx < $self->{base}) { + # The only time we expect a reference to $slots[$i - something] + # after referencing $slots[$i] is reading the very first header. + # Since $stride > |header|, that shouldn't cause any lookback + # errors. And everything after the header is sequential. + print STDERR "Unexpected look-back reading CPU profile"; + return -1; # shrug, don't know what better to return + } elsif ($idx > $self->{base} + $#$slots) { + $self->overflow(); + } else { + return $slots->[$idx - $self->{base}]; + } + } + # If we get here, $slots is [], which means we've reached EOF + return -1; # unique since slots is supposed to hold unsigned numbers + } +} + +# Parse profile generated by common/profiler.cc and return a reference +# to a map: +# $result->{version} Version number of profile file +# $result->{period} Sampling period (in microseconds) +# $result->{profile} Profile object +# $result->{map} Memory map info from profile +# $result->{pcs} Hash of all PC values seen, key is hex address +sub ReadProfile { + my $prog = shift; + my $fname = shift; + + if (IsSymbolizedProfileFile($fname) && !$main::use_symbolized_profile) { + # we have both a binary and symbolized profiles, abort + usage("Symbolized profile '$fname' cannot be used with a binary arg. " . + "Try again without passing '$prog'."); + } + + $main::profile_type = ''; + + $CONTENTION_PAGE =~ m,[^/]+$,; # matches everything after the last slash + my $contention_marker = $&; + $GROWTH_PAGE =~ m,[^/]+$,; # matches everything after the last slash + my $growth_marker = $&; + $SYMBOL_PAGE =~ m,[^/]+$,; # matches everything after the last slash + my $symbol_marker = $&; + $PROFILE_PAGE =~ m,[^/]+$,; # matches everything after the last slash + my $profile_marker = $&; + + # Look at first line to see if it is a heap or a CPU profile. + # CPU profile may start with no header at all, and just binary data + # (starting with \0\0\0\0) -- in that case, don't try to read the + # whole firstline, since it may be gigabytes(!) of data. + open(PROFILE, "<$fname") || error("$fname: $!\n"); + binmode PROFILE; # New perls do UTF-8 processing + my $firstchar = ""; + my $header = ""; + read(PROFILE, $firstchar, 1); + seek(PROFILE, -1, 1); # unread the firstchar + if ($firstchar ne "\0") { + $header = <PROFILE>; + $header =~ s/\r//g; # turn windows-looking lines into unix-looking lines + } + + my $symbols; + if ($header =~ m/^--- *$symbol_marker/o) { + # read the symbol section of the symbolized profile file + $symbols = ReadSymbols(*PROFILE{IO}); + + # read the next line to get the header for the remaining profile + $header = ""; + read(PROFILE, $firstchar, 1); + seek(PROFILE, -1, 1); # unread the firstchar + if ($firstchar ne "\0") { + $header = <PROFILE>; + $header =~ s/\r//g; + } + } + + my $result; + + if ($header =~ m/^heap profile:.*$growth_marker/o) { + $main::profile_type = 'growth'; + $result = ReadHeapProfile($prog, $fname, $header); + } elsif ($header =~ m/^heap profile:/) { + $main::profile_type = 'heap'; + $result = ReadHeapProfile($prog, $fname, $header); + } elsif ($header =~ m/^--- *$contention_marker/o) { + $main::profile_type = 'contention'; + $result = ReadSynchProfile($prog, $fname); + } elsif ($header =~ m/^--- *Stacks:/) { + print STDERR + "Old format contention profile: mistakenly reports " . + "condition variable signals as lock contentions.\n"; + $main::profile_type = 'contention'; + $result = ReadSynchProfile($prog, $fname); + } elsif ($header =~ m/^--- *$profile_marker/) { + # the binary cpu profile data starts immediately after this line + $main::profile_type = 'cpu'; + $result = ReadCPUProfile($prog, $fname); + } else { + if (defined($symbols)) { + # a symbolized profile contains a format we don't recognize, bail out + error("$fname: Cannot recognize profile section after symbols.\n"); + } + # no ascii header present -- must be a CPU profile + $main::profile_type = 'cpu'; + $result = ReadCPUProfile($prog, $fname); + } + + # if we got symbols along with the profile, return those as well + if (defined($symbols)) { + $result->{symbols} = $symbols; + } + + return $result; +} + +# CPU profile reader +sub ReadCPUProfile { + my $prog = shift; + my $fname = shift; + my $version; + my $period; + my $i; + my $profile = {}; + my $pcs = {}; + + # Parse string into array of slots. + # L! cannot be used because with a native 64-bit build, it will cause + # 1) a valid 64-bit profile to use the 32-bit codepath, and + # 2) a valid 32-bit profile to be unrecognized. + my $slots = CpuProfileStream->new(*PROFILE); + + # Read header. The current header version is a 5-element structure + # containing: + # 0: header count (always 0) + # 1: header "words" (after this one: 3) + # 2: format version (0) + # 3: sampling period (usec) + # 4: unused padding (always 0) + # The header words are 32-bit or 64-bit depending on the ABI of the program + # that generated the profile. In the 64-bit case, since our x86-architecture + # machines are little-endian, the actual value of each of these elements is + # in the first 32-bit word, and the second is always zero. The @slots array + # above was read as a sequence of 32-bit words in both cases, so we need to + # explicitly check for both cases. A typical slot sequence for each is: + # 32-bit: 0 3 0 100 0 + # 64-bit: 0 0 3 0 0 0 100 0 0 0 + # + if ($slots->get(0) != 0 ) { + error("$fname: not a profile file, or old format profile file\n"); + } + if ($slots->get(1) >= 3) { + # Normal 32-bit header: + $version = $slots->get(2); + $period = $slots->get(3); + $i = 2 + $slots->get(1); + $address_length = 8; + + # Parse profile + while ($slots->get($i) != -1) { + my $n = $slots->get($i++); + my $d = $slots->get($i++); + if ($slots->get($i) == 0) { + # End of profile data marker + $i += $d; + last; + } + + # Make key out of the stack entries + my @k = (); + for (my $j = 0; $j < $d; $j++) { + my $pc = sprintf("%08x", $slots->get($i+$j)); + $pcs->{$pc} = 1; + push @k, $pc; + } + + AddEntry($profile, (join "\n", @k), $n); + $i += $d; + } + + # Normal 64-bit header: All entries are doubled in size. The first + # word (little-endian) should contain the real value, the second should + # be zero. + } elsif ($slots->get(1) != 0 || + $slots->get(2) < 3 || + $slots->get(3) != 0 || + $slots->get(5) != 0 || + $slots->get(7) != 0) { + error("$fname: not a profile file, or old format profile file\n"); + } else { + $version = $slots->get(4); + $period = $slots->get(6); + $i = 4 + 2 * $slots->get(2); + $address_length = 16; + + # Parse profile + while ($slots->get($i) != -1) { + my $n = $slots->get($i++); + my $nhi = $slots->get($i++); + # Huge counts may coerce to floating point, keeping scale, not precision + if ($nhi != 0) { $n += $nhi*(2**32); } + my $d = $slots->get($i++); + if ($slots->get($i++) != 0) { + my $addr = sprintf("%o", 4 * $i); + print STDERR "At index $i ($addr):\n"; + error("$fname: stack trace depth >= 2**32\n"); + } + if ($slots->get($i) == 0 && $slots->get($i+1) == 0) { + # End of profile data marker + $i += 2 * $d; + last; + } + + # Make key out of the stack entries + my @k = (); + for (my $j = 0; $j < $d; $j++) { + my $pclo = $slots->get($i++); + my $pchi = $slots->get($i++); + if ($pclo == -1 || $pchi == -1) { + error("$fname: Unexpected EOF when reading stack of depth $d\n"); + } + + # Subtract one from caller pc so we map back to call instr. + # However, don't do this if we're reading a symbolized profile + # file, in which case the subtract-one was done when the file + # was written. + if ($j > 0 && !$main::use_symbolized_profile) { + if ($pclo == 0) { + $pchi--; + $pclo = 0xffffffff; + } else { + $pclo--; + } + } + + my $pc = sprintf("%08x%08x", $pchi, $pclo); + $pcs->{$pc} = 1; + push @k, $pc; + } + AddEntry($profile, (join "\n", @k), $n); + } + } + + # Parse map + my $map = ''; + seek(PROFILE, $i * 4, 0); + read(PROFILE, $map, (stat PROFILE)[7]); + close(PROFILE); + + my $r = {}; + $r->{version} = $version; + $r->{period} = $period; + $r->{profile} = $profile; + $r->{libs} = ParseLibraries($prog, $map, $pcs); + $r->{pcs} = $pcs; + + return $r; +} + +sub ReadHeapProfile { + my $prog = shift; + my $fname = shift; + my $header = shift; + + my $index = 1; + if ($main::opt_inuse_space) { + $index = 1; + } elsif ($main::opt_inuse_objects) { + $index = 0; + } elsif ($main::opt_alloc_space) { + $index = 3; + } elsif ($main::opt_alloc_objects) { + $index = 2; + } + + # Find the type of this profile. The header line looks like: + # heap profile: 1246: 8800744 [ 1246: 8800744] @ <heap-url>/266053 + # There are two pairs <count: size>, the first inuse objects/space, and the + # second allocated objects/space. This is followed optionally by a profile + # type, and if that is present, optionally by a sampling frequency. + # For remote heap profiles (v1): + # The interpretation of the sampling frequency is that the profiler, for + # each sample, calculates a uniformly distributed random integer less than + # the given value, and records the next sample after that many bytes have + # been allocated. Therefore, the expected sample interval is half of the + # given frequency. By default, if not specified, the expected sample + # interval is 128KB. Only remote-heap-page profiles are adjusted for + # sample size. + # For remote heap profiles (v2): + # The sampling frequency is the rate of a Poisson process. This means that + # the probability of sampling an allocation of size X with sampling rate Y + # is 1 - exp(-X/Y) + # For version 2, a typical header line might look like this: + # heap profile: 1922: 127792360 [ 1922: 127792360] @ <heap-url>_v2/524288 + # the trailing number (524288) is the sampling rate. (Version 1 showed + # double the 'rate' here) + my $sampling_algorithm = 0; + my $sample_adjustment = 0; + chomp($header); + my $type = "unknown"; + if ($header =~ m"^heap profile:\s*(\d+):\s+(\d+)\s+\[\s*(\d+):\s+(\d+)\](\s*@\s*([^/]*)(/(\d+))?)?") { + if (defined($6) && ($6 ne '')) { + $type = $6; + my $sample_period = $8; + # $type is "heapprofile" for profiles generated by the + # heap-profiler, and either "heap" or "heap_v2" for profiles + # generated by sampling directly within tcmalloc. It can also + # be "growth" for heap-growth profiles. The first is typically + # found for profiles generated locally, and the others for + # remote profiles. + if (($type eq "heapprofile") || ($type !~ /heap/) ) { + # No need to adjust for the sampling rate with heap-profiler-derived data + $sampling_algorithm = 0; + } elsif ($type =~ /_v2/) { + $sampling_algorithm = 2; # version 2 sampling + if (defined($sample_period) && ($sample_period ne '')) { + $sample_adjustment = int($sample_period); + } + } else { + $sampling_algorithm = 1; # version 1 sampling + if (defined($sample_period) && ($sample_period ne '')) { + $sample_adjustment = int($sample_period)/2; + } + } + } else { + # We detect whether or not this is a remote-heap profile by checking + # that the total-allocated stats ($n2,$s2) are exactly the + # same as the in-use stats ($n1,$s1). It is remotely conceivable + # that a non-remote-heap profile may pass this check, but it is hard + # to imagine how that could happen. + # In this case it's so old it's guaranteed to be remote-heap version 1. + my ($n1, $s1, $n2, $s2) = ($1, $2, $3, $4); + if (($n1 == $n2) && ($s1 == $s2)) { + # This is likely to be a remote-heap based sample profile + $sampling_algorithm = 1; + } + } + } + + if ($sampling_algorithm > 0) { + # For remote-heap generated profiles, adjust the counts and sizes to + # account for the sample rate (we sample once every 128KB by default). + if ($sample_adjustment == 0) { + # Turn on profile adjustment. + $sample_adjustment = 128*1024; + print STDERR "Adjusting heap profiles for 1-in-128KB sampling rate\n"; + } else { + printf STDERR ("Adjusting heap profiles for 1-in-%d sampling rate\n", + $sample_adjustment); + } + if ($sampling_algorithm > 1) { + # We don't bother printing anything for the original version (version 1) + printf STDERR "Heap version $sampling_algorithm\n"; + } + } + + my $profile = {}; + my $pcs = {}; + my $map = ""; + + while (<PROFILE>) { + s/\r//g; # turn windows-looking lines into unix-looking lines + if (/^MAPPED_LIBRARIES:/) { + # Read the /proc/self/maps data + while (<PROFILE>) { + s/\r//g; # turn windows-looking lines into unix-looking lines + $map .= $_; + } + last; + } + + if (/^--- Memory map:/) { + # Read /proc/self/maps data as formatted by DumpAddressMap() + my $buildvar = ""; + while (<PROFILE>) { + s/\r//g; # turn windows-looking lines into unix-looking lines + # Parse "build=<dir>" specification if supplied + if (m/^\s*build=(.*)\n/) { + $buildvar = $1; + } + + # Expand "$build" variable if available + $_ =~ s/\$build\b/$buildvar/g; + + $map .= $_; + } + last; + } + + # Read entry of the form: + # <count1>: <bytes1> [<count2>: <bytes2>] @ a1 a2 a3 ... an + s/^\s*//; + s/\s*$//; + if (m/^\s*(\d+):\s+(\d+)\s+\[\s*(\d+):\s+(\d+)\]\s+@\s+(.*)$/) { + my $stack = $5; + my ($n1, $s1, $n2, $s2) = ($1, $2, $3, $4); + + if ($sample_adjustment) { + if ($sampling_algorithm == 2) { + # Remote-heap version 2 + # The sampling frequency is the rate of a Poisson process. + # This means that the probability of sampling an allocation of + # size X with sampling rate Y is 1 - exp(-X/Y) + my $ratio; + $ratio = (($s1*1.0)/$n1)/($sample_adjustment); + my $scale_factor; + $scale_factor = 1/(1 - exp(-$ratio)); + $n1 *= $scale_factor; + $s1 *= $scale_factor; + $ratio = (($s2*1.0)/$n2)/($sample_adjustment); + $scale_factor = 1/(1 - exp(-$ratio)); + $n2 *= $scale_factor; + $s2 *= $scale_factor; + } else { + # Remote-heap version 1 + my $ratio; + $ratio = (($s1*1.0)/$n1)/($sample_adjustment); + if ($ratio < 1) { + $n1 /= $ratio; + $s1 /= $ratio; + } + $ratio = (($s2*1.0)/$n2)/($sample_adjustment); + if ($ratio < 1) { + $n2 /= $ratio; + $s2 /= $ratio; + } + } + } + + my @counts = ($n1, $s1, $n2, $s2); + AddEntries($profile, $pcs, $stack, $counts[$index]); + } + } + + my $r = {}; + $r->{version} = "heap"; + $r->{period} = 1; + $r->{profile} = $profile; + $r->{libs} = ParseLibraries($prog, $map, $pcs); + $r->{pcs} = $pcs; + return $r; +} + +sub ReadSynchProfile { + my ($prog, $fname, $header) = @_; + + my $map = ''; + my $profile = {}; + my $pcs = {}; + my $sampling_period = 1; + my $cyclespernanosec = 2.8; # Default assumption for old binaries + my $seen_clockrate = 0; + my $line; + + my $index = 0; + if ($main::opt_total_delay) { + $index = 0; + } elsif ($main::opt_contentions) { + $index = 1; + } elsif ($main::opt_mean_delay) { + $index = 2; + } + + while ( $line = <PROFILE> ) { + $line =~ s/\r//g; # turn windows-looking lines into unix-looking lines + if ( $line =~ /^\s*(\d+)\s+(\d+) \@\s*(.*?)\s*$/ ) { + my ($cycles, $count, $stack) = ($1, $2, $3); + + # Convert cycles to nanoseconds + $cycles /= $cyclespernanosec; + + # Adjust for sampling done by application + $cycles *= $sampling_period; + $count *= $sampling_period; + + my @values = ($cycles, $count, $cycles / $count); + AddEntries($profile, $pcs, $stack, $values[$index]); + + } elsif ( $line =~ /^(slow release).*thread \d+ \@\s*(.*?)\s*$/ || + $line =~ /^\s*(\d+) \@\s*(.*?)\s*$/ ) { + my ($cycles, $stack) = ($1, $2); + if ($cycles !~ /^\d+$/) { + next; + } + + # Convert cycles to nanoseconds + $cycles /= $cyclespernanosec; + + # Adjust for sampling done by application + $cycles *= $sampling_period; + + AddEntries($profile, $pcs, $stack, $cycles); + + } elsif ( $line =~ m/^([a-z][^=]*)=(.*)$/ ) { + my ($variable, $value) = ($1,$2); + for ($variable, $value) { + s/^\s+//; + s/\s+$//; + } + if ($variable eq "cycles/second") { + $cyclespernanosec = $value / 1e9; + $seen_clockrate = 1; + } elsif ($variable eq "sampling period") { + $sampling_period = $value; + } elsif ($variable eq "ms since reset") { + # Currently nothing is done with this value in pprof + # So we just silently ignore it for now + } elsif ($variable eq "discarded samples") { + # Currently nothing is done with this value in pprof + # So we just silently ignore it for now + } else { + printf STDERR ("Ignoring unnknown variable in /contention output: " . + "'%s' = '%s'\n",$variable,$value); + } + } else { + # Memory map entry + $map .= $line; + } + } + close PROFILE; + + if (!$seen_clockrate) { + printf STDERR ("No cycles/second entry in profile; Guessing %.1f GHz\n", + $cyclespernanosec); + } + + my $r = {}; + $r->{version} = 0; + $r->{period} = $sampling_period; + $r->{profile} = $profile; + $r->{libs} = ParseLibraries($prog, $map, $pcs); + $r->{pcs} = $pcs; + return $r; +} + +# Given a hex value in the form "0x1abcd" return "0001abcd" or +# "000000000001abcd", depending on the current address length. +# There's probably a more idiomatic (or faster) way to do this... +sub HexExtend { + my $addr = shift; + + $addr =~ s/^0x//; + + if (length $addr > $address_length) { + printf STDERR "Warning: address $addr is longer than address length $address_length\n"; + } + + return substr("000000000000000".$addr, -$address_length); +} + +##### Symbol extraction ##### + +# Aggressively search the lib_prefix values for the given library +# If all else fails, just return the name of the library unmodified. +# If the lib_prefix is "/my/path,/other/path" and $file is "/lib/dir/mylib.so" +# it will search the following locations in this order, until it finds a file: +# /my/path/lib/dir/mylib.so +# /other/path/lib/dir/mylib.so +# /my/path/dir/mylib.so +# /other/path/dir/mylib.so +# /my/path/mylib.so +# /other/path/mylib.so +# /lib/dir/mylib.so (returned as last resort) +sub FindLibrary { + my $file = shift; + my $suffix = $file; + + # Search for the library as described above + do { + foreach my $prefix (@prefix_list) { + my $fullpath = $prefix . $suffix; + if (-e $fullpath) { + return $fullpath; + } + } + } while ($suffix =~ s|^/[^/]+/|/|); + return $file; +} + +# Return path to library with debugging symbols. +# For libc libraries, the copy in /usr/lib/debug contains debugging symbols +sub DebuggingLibrary { + my $file = shift; + if ($file =~ m|^/| && -f "/usr/lib/debug$file") { + return "/usr/lib/debug$file"; + } + return undef; +} + +# Parse text section header of a library using objdump +sub ParseTextSectionHeaderFromObjdump { + my $lib = shift; + + my $size = undef; + my $vma; + my $file_offset; + # Get objdump output from the library file to figure out how to + # map between mapped addresses and addresses in the library. + my $objdump = $obj_tool_map{"objdump"}; + open(OBJDUMP, "$objdump -h $lib |") + || error("$objdump $lib: $!\n"); + while (<OBJDUMP>) { + s/\r//g; # turn windows-looking lines into unix-looking lines + # Idx Name Size VMA LMA File off Algn + # 10 .text 00104b2c 420156f0 420156f0 000156f0 2**4 + # For 64-bit objects, VMA and LMA will be 16 hex digits, size and file + # offset may still be 8. But AddressSub below will still handle that. + my @x = split; + if (($#x >= 6) && ($x[1] eq '.text')) { + $size = $x[2]; + $vma = $x[3]; + $file_offset = $x[5]; + last; + } + } + close(OBJDUMP); + + if (!defined($size)) { + return undef; + } + + my $r = {}; + $r->{size} = $size; + $r->{vma} = $vma; + $r->{file_offset} = $file_offset; + + return $r; +} + +# Parse text section header of a library using otool (on OS X) +sub ParseTextSectionHeaderFromOtool { + my $lib = shift; + + my $size = undef; + my $vma = undef; + my $file_offset = undef; + # Get otool output from the library file to figure out how to + # map between mapped addresses and addresses in the library. + my $otool = $obj_tool_map{"otool"}; + open(OTOOL, "$otool -l $lib |") + || error("$otool $lib: $!\n"); + my $cmd = ""; + my $sectname = ""; + my $segname = ""; + foreach my $line (<OTOOL>) { + $line =~ s/\r//g; # turn windows-looking lines into unix-looking lines + # Load command <#> + # cmd LC_SEGMENT + # [...] + # Section + # sectname __text + # segname __TEXT + # addr 0x000009f8 + # size 0x00018b9e + # offset 2552 + # align 2^2 (4) + # We will need to strip off the leading 0x from the hex addresses, + # and convert the offset into hex. + if ($line =~ /Load command/) { + $cmd = ""; + $sectname = ""; + $segname = ""; + } elsif ($line =~ /Section/) { + $sectname = ""; + $segname = ""; + } elsif ($line =~ /cmd (\w+)/) { + $cmd = $1; + } elsif ($line =~ /sectname (\w+)/) { + $sectname = $1; + } elsif ($line =~ /segname (\w+)/) { + $segname = $1; + } elsif (!(($cmd eq "LC_SEGMENT" || $cmd eq "LC_SEGMENT_64") && + $sectname eq "__text" && + $segname eq "__TEXT")) { + next; + } elsif ($line =~ /\baddr 0x([0-9a-fA-F]+)/) { + $vma = $1; + } elsif ($line =~ /\bsize 0x([0-9a-fA-F]+)/) { + $size = $1; + } elsif ($line =~ /\boffset ([0-9]+)/) { + $file_offset = sprintf("%016x", $1); + } + if (defined($vma) && defined($size) && defined($file_offset)) { + last; + } + } + close(OTOOL); + + if (!defined($vma) || !defined($size) || !defined($file_offset)) { + return undef; + } + + my $r = {}; + $r->{size} = $size; + $r->{vma} = $vma; + $r->{file_offset} = $file_offset; + + return $r; +} + +sub ParseTextSectionHeader { + # obj_tool_map("otool") is only defined if we're in a Mach-O environment + if (defined($obj_tool_map{"otool"})) { + my $r = ParseTextSectionHeaderFromOtool(@_); + if (defined($r)){ + return $r; + } + } + # If otool doesn't work, or we don't have it, fall back to objdump + return ParseTextSectionHeaderFromObjdump(@_); +} + +# Split /proc/pid/maps dump into a list of libraries +sub ParseLibraries { + return if $main::use_symbol_page; # We don't need libraries info. + my $prog = shift; + my $map = shift; + my $pcs = shift; + + my $result = []; + my $h = "[a-f0-9]+"; + my $zero_offset = HexExtend("0"); + + my $buildvar = ""; + foreach my $l (split("\n", $map)) { + if ($l =~ m/^\s*build=(.*)$/) { + $buildvar = $1; + } + + my $start; + my $finish; + my $offset; + my $lib; + if ($l =~ /^($h)-($h)\s+..x.\s+($h)\s+\S+:\S+\s+\d+\s+(\S+\.(so|dll|dylib)((\.\d+)+\w*)?)$/i) { + # Full line from /proc/self/maps. Example: + # 40000000-40015000 r-xp 00000000 03:01 12845071 /lib/ld-2.3.2.so + $start = HexExtend($1); + $finish = HexExtend($2); + $offset = HexExtend($3); + $lib = $4; + $lib =~ s|\\|/|g; # turn windows-style paths into unix-style paths + } elsif ($l =~ /^\s*($h)-($h):\s*(\S+\.so(\.\d+)*)/) { + # Cooked line from DumpAddressMap. Example: + # 40000000-40015000: /lib/ld-2.3.2.so + $start = HexExtend($1); + $finish = HexExtend($2); + $offset = $zero_offset; + $lib = $3; + } else { + next; + } + + # Expand "$build" variable if available + $lib =~ s/\$build\b/$buildvar/g; + + $lib = FindLibrary($lib); + + # Check for pre-relocated libraries, which use pre-relocated symbol tables + # and thus require adjusting the offset that we'll use to translate + # VM addresses into symbol table addresses. + # Only do this if we're not going to fetch the symbol table from a + # debugging copy of the library. + if (!DebuggingLibrary($lib)) { + my $text = ParseTextSectionHeader($lib); + if (defined($text)) { + my $vma_offset = AddressSub($text->{vma}, $text->{file_offset}); + $offset = AddressAdd($offset, $vma_offset); + } + } + + push(@{$result}, [$lib, $start, $finish, $offset]); + } + + # Append special entry for additional library (not relocated) + if ($main::opt_lib ne "") { + my $text = ParseTextSectionHeader($main::opt_lib); + if (defined($text)) { + my $start = $text->{vma}; + my $finish = AddressAdd($start, $text->{size}); + + push(@{$result}, [$main::opt_lib, $start, $finish, $start]); + } + } + + # Append special entry for the main program. This covers + # 0..max_pc_value_seen, so that we assume pc values not found in one + # of the library ranges will be treated as coming from the main + # program binary. + my $min_pc = HexExtend("0"); + my $max_pc = $min_pc; # find the maximal PC value in any sample + foreach my $pc (keys(%{$pcs})) { + if (HexExtend($pc) gt $max_pc) { $max_pc = HexExtend($pc); } + } + push(@{$result}, [$prog, $min_pc, $max_pc, $zero_offset]); + + return $result; +} + +# Add two hex addresses of length $address_length. +# Run pprof --test for unit test if this is changed. +sub AddressAdd { + my $addr1 = shift; + my $addr2 = shift; + my $sum; + + if ($address_length == 8) { + # Perl doesn't cope with wraparound arithmetic, so do it explicitly: + $sum = (hex($addr1)+hex($addr2)) % (0x10000000 * 16); + return sprintf("%08x", $sum); + + } else { + # Do the addition in 7-nibble chunks to trivialize carry handling. + + if ($main::opt_debug and $main::opt_test) { + print STDERR "AddressAdd $addr1 + $addr2 = "; + } + + my $a1 = substr($addr1,-7); + $addr1 = substr($addr1,0,-7); + my $a2 = substr($addr2,-7); + $addr2 = substr($addr2,0,-7); + $sum = hex($a1) + hex($a2); + my $c = 0; + if ($sum > 0xfffffff) { + $c = 1; + $sum -= 0x10000000; + } + my $r = sprintf("%07x", $sum); + + $a1 = substr($addr1,-7); + $addr1 = substr($addr1,0,-7); + $a2 = substr($addr2,-7); + $addr2 = substr($addr2,0,-7); + $sum = hex($a1) + hex($a2) + $c; + $c = 0; + if ($sum > 0xfffffff) { + $c = 1; + $sum -= 0x10000000; + } + $r = sprintf("%07x", $sum) . $r; + + $sum = hex($addr1) + hex($addr2) + $c; + if ($sum > 0xff) { $sum -= 0x100; } + $r = sprintf("%02x", $sum) . $r; + + if ($main::opt_debug and $main::opt_test) { print STDERR "$r\n"; } + + return $r; + } +} + + +# Subtract two hex addresses of length $address_length. +# Run pprof --test for unit test if this is changed. +sub AddressSub { + my $addr1 = shift; + my $addr2 = shift; + my $diff; + + if ($address_length == 8) { + # Perl doesn't cope with wraparound arithmetic, so do it explicitly: + $diff = (hex($addr1)-hex($addr2)) % (0x10000000 * 16); + return sprintf("%08x", $diff); + + } else { + # Do the addition in 7-nibble chunks to trivialize borrow handling. + # if ($main::opt_debug) { print STDERR "AddressSub $addr1 - $addr2 = "; } + + my $a1 = hex(substr($addr1,-7)); + $addr1 = substr($addr1,0,-7); + my $a2 = hex(substr($addr2,-7)); + $addr2 = substr($addr2,0,-7); + my $b = 0; + if ($a2 > $a1) { + $b = 1; + $a1 += 0x10000000; + } + $diff = $a1 - $a2; + my $r = sprintf("%07x", $diff); + + $a1 = hex(substr($addr1,-7)); + $addr1 = substr($addr1,0,-7); + $a2 = hex(substr($addr2,-7)) + $b; + $addr2 = substr($addr2,0,-7); + $b = 0; + if ($a2 > $a1) { + $b = 1; + $a1 += 0x10000000; + } + $diff = $a1 - $a2; + $r = sprintf("%07x", $diff) . $r; + + $a1 = hex($addr1); + $a2 = hex($addr2) + $b; + if ($a2 > $a1) { $a1 += 0x100; } + $diff = $a1 - $a2; + $r = sprintf("%02x", $diff) . $r; + + # if ($main::opt_debug) { print STDERR "$r\n"; } + + return $r; + } +} + +# Increment a hex addresses of length $address_length. +# Run pprof --test for unit test if this is changed. +sub AddressInc { + my $addr = shift; + my $sum; + + if ($address_length == 8) { + # Perl doesn't cope with wraparound arithmetic, so do it explicitly: + $sum = (hex($addr)+1) % (0x10000000 * 16); + return sprintf("%08x", $sum); + + } else { + # Do the addition in 7-nibble chunks to trivialize carry handling. + # We are always doing this to step through the addresses in a function, + # and will almost never overflow the first chunk, so we check for this + # case and exit early. + + # if ($main::opt_debug) { print STDERR "AddressInc $addr1 = "; } + + my $a1 = substr($addr,-7); + $addr = substr($addr,0,-7); + $sum = hex($a1) + 1; + my $r = sprintf("%07x", $sum); + if ($sum <= 0xfffffff) { + $r = $addr . $r; + # if ($main::opt_debug) { print STDERR "$r\n"; } + return HexExtend($r); + } else { + $r = "0000000"; + } + + $a1 = substr($addr,-7); + $addr = substr($addr,0,-7); + $sum = hex($a1) + 1; + $r = sprintf("%07x", $sum) . $r; + if ($sum <= 0xfffffff) { + $r = $addr . $r; + # if ($main::opt_debug) { print STDERR "$r\n"; } + return HexExtend($r); + } else { + $r = "00000000000000"; + } + + $sum = hex($addr) + 1; + if ($sum > 0xff) { $sum -= 0x100; } + $r = sprintf("%02x", $sum) . $r; + + # if ($main::opt_debug) { print STDERR "$r\n"; } + return $r; + } +} + +# Extract symbols for all PC values found in profile +sub ExtractSymbols { + my $libs = shift; + my $pcset = shift; + + my $symbols = {}; + + # Map each PC value to the containing library + my %seen = (); + foreach my $lib (@{$libs}) { + my $libname = $lib->[0]; + my $start = $lib->[1]; + my $finish = $lib->[2]; + my $offset = $lib->[3]; + + # Get list of pcs that belong in this library. + my $contained = []; + foreach my $pc (keys(%{$pcset})) { + if (!$seen{$pc} && ($pc ge $start) && ($pc le $finish)) { + $seen{$pc} = 1; + push(@{$contained}, $pc); + } + } + # Map to symbols + MapToSymbols($libname, AddressSub($start, $offset), $contained, $symbols); + } + + return $symbols; +} + +# Map list of PC values to symbols for a given image +sub MapToSymbols { + my $image = shift; + my $offset = shift; + my $pclist = shift; + my $symbols = shift; + + my $debug = 0; + + # Ignore empty binaries + if ($#{$pclist} < 0) { return; } + + # Figure out the addr2line command to use + my $addr2line = $obj_tool_map{"addr2line"}; + my $cmd = "$addr2line -f -C -e $image"; + if (exists $obj_tool_map{"addr2line_pdb"}) { + $addr2line = $obj_tool_map{"addr2line_pdb"}; + $cmd = "$addr2line --demangle -f -C -e $image"; + } + + # If "addr2line" isn't installed on the system at all, just use + # nm to get what info we can (function names, but not line numbers). + if (system("$addr2line --help >/dev/null 2>&1") != 0) { + MapSymbolsWithNM($image, $offset, $pclist, $symbols); + return; + } + + # "addr2line -i" can produce a variable number of lines per input + # address, with no separator that allows us to tell when data for + # the next address starts. So we find the address for a special + # symbol (_fini) and interleave this address between all real + # addresses passed to addr2line. The name of this special symbol + # can then be used as a separator. + $sep_address = undef; # May be filled in by MapSymbolsWithNM() + my $nm_symbols = {}; + MapSymbolsWithNM($image, $offset, $pclist, $nm_symbols); + # TODO(csilvers): only add '-i' if addr2line supports it. + if (defined($sep_address)) { + # Only add " -i" to addr2line if the binary supports it. + # addr2line --help returns 0, but not if it sees an unknown flag first. + if (system("$cmd -i --help >/dev/null 2>&1") == 0) { + $cmd .= " -i"; + } else { + $sep_address = undef; # no need for sep_address if we don't support -i + } + } + + # Make file with all PC values with intervening 'sep_address' so + # that we can reliably detect the end of inlined function list + open(ADDRESSES, ">$main::tmpfile_sym") || error("$main::tmpfile_sym: $!\n"); + if ($debug) { print("---- $image ---\n"); } + for (my $i = 0; $i <= $#{$pclist}; $i++) { + # addr2line always reads hex addresses, and does not need '0x' prefix. + if ($debug) { printf("%s\n", $pclist->[$i]); } + printf ADDRESSES ("%s\n", AddressSub($pclist->[$i], $offset)); + if (defined($sep_address)) { + printf ADDRESSES ("%s\n", $sep_address); + } + } + close(ADDRESSES); + if ($debug) { + print("----\n"); + system("cat $main::tmpfile_sym"); + print("----\n"); + system("$cmd <$main::tmpfile_sym"); + print("----\n"); + } + + open(SYMBOLS, "$cmd <$main::tmpfile_sym |") || error("$cmd: $!\n"); + my $count = 0; # Index in pclist + while (<SYMBOLS>) { + # Read fullfunction and filelineinfo from next pair of lines + s/\r?\n$//g; + my $fullfunction = $_; + $_ = <SYMBOLS>; + s/\r?\n$//g; + my $filelinenum = $_; + + if (defined($sep_address) && $fullfunction eq $sep_symbol) { + # Terminating marker for data for this address + $count++; + next; + } + + $filelinenum =~ s|\\|/|g; # turn windows-style paths into unix-style paths + + my $pcstr = $pclist->[$count]; + my $function = ShortFunctionName($fullfunction); + if ($fullfunction eq '??') { + # See if nm found a symbol + my $nms = $nm_symbols->{$pcstr}; + if (defined($nms)) { + $function = $nms->[0]; + $fullfunction = $nms->[2]; + } + } + + # Prepend to accumulated symbols for pcstr + # (so that caller comes before callee) + my $sym = $symbols->{$pcstr}; + if (!defined($sym)) { + $sym = []; + $symbols->{$pcstr} = $sym; + } + unshift(@{$sym}, $function, $filelinenum, $fullfunction); + if ($debug) { printf("%s => [%s]\n", $pcstr, join(" ", @{$sym})); } + if (!defined($sep_address)) { + # Inlining is off, se this entry ends immediately + $count++; + } + } + close(SYMBOLS); +} + +# Use nm to map the list of referenced PCs to symbols. Return true iff we +# are able to read procedure information via nm. +sub MapSymbolsWithNM { + my $image = shift; + my $offset = shift; + my $pclist = shift; + my $symbols = shift; + + # Get nm output sorted by increasing address + my $symbol_table = GetProcedureBoundaries($image, "."); + if (!%{$symbol_table}) { + return 0; + } + # Start addresses are already the right length (8 or 16 hex digits). + my @names = sort { $symbol_table->{$a}->[0] cmp $symbol_table->{$b}->[0] } + keys(%{$symbol_table}); + + if ($#names < 0) { + # No symbols: just use addresses + foreach my $pc (@{$pclist}) { + my $pcstr = "0x" . $pc; + $symbols->{$pc} = [$pcstr, "?", $pcstr]; + } + return 0; + } + + # Sort addresses so we can do a join against nm output + my $index = 0; + my $fullname = $names[0]; + my $name = ShortFunctionName($fullname); + foreach my $pc (sort { $a cmp $b } @{$pclist}) { + # Adjust for mapped offset + my $mpc = AddressSub($pc, $offset); + while (($index < $#names) && ($mpc ge $symbol_table->{$fullname}->[1])){ + $index++; + $fullname = $names[$index]; + $name = ShortFunctionName($fullname); + } + if ($mpc lt $symbol_table->{$fullname}->[1]) { + $symbols->{$pc} = [$name, "?", $fullname]; + } else { + my $pcstr = "0x" . $pc; + $symbols->{$pc} = [$pcstr, "?", $pcstr]; + } + } + return 1; +} + +sub ShortFunctionName { + my $function = shift; + while ($function =~ s/\([^()]*\)(\s*const)?//g) { } # Argument types + while ($function =~ s/<[^<>]*>//g) { } # Remove template arguments + $function =~ s/^.*\s+(\w+::)/$1/; # Remove leading type + return $function; +} + +##### Miscellaneous ##### + +# Find the right versions of the above object tools to use. The +# argument is the program file being analyzed, and should be an ELF +# 32-bit or ELF 64-bit executable file. The location of the tools +# is determined by considering the following options in this order: +# 1) --tools option, if set +# 2) PPROF_TOOLS environment variable, if set +# 3) the environment +sub ConfigureObjTools { + my $prog_file = shift; + + # Check for the existence of $prog_file because /usr/bin/file does not + # predictably return error status in prod. + (-e $prog_file) || error("$prog_file does not exist.\n"); + + # Follow symlinks (at least for systems where "file" supports that) + my $file_type = `/usr/bin/file -L $prog_file 2>/dev/null || /usr/bin/file $prog_file`; + if ($file_type =~ /64-bit/) { + # Change $address_length to 16 if the program file is ELF 64-bit. + # We can't detect this from many (most?) heap or lock contention + # profiles, since the actual addresses referenced are generally in low + # memory even for 64-bit programs. + $address_length = 16; + } + + if ($file_type =~ /MS Windows/) { + # For windows, we provide a version of nm and addr2line as part of + # the opensource release, which is capable of parsing + # Windows-style PDB executables. It should live in the path, or + # in the same directory as pprof. + $obj_tool_map{"nm_pdb"} = "nm-pdb"; + $obj_tool_map{"addr2line_pdb"} = "addr2line-pdb"; + } + + if ($file_type =~ /Mach-O/) { + # OS X uses otool to examine Mach-O files, rather than objdump. + $obj_tool_map{"otool"} = "otool"; + } + + # Go fill in %obj_tool_map with the pathnames to use: + foreach my $tool (keys %obj_tool_map) { + $obj_tool_map{$tool} = ConfigureTool($obj_tool_map{$tool}); + } +} + +# Returns the path of a caller-specified object tool. If --tools or +# PPROF_TOOLS are specified, then returns the full path to the tool +# with that prefix. Otherwise, returns the path unmodified (which +# means we will look for it on PATH). +sub ConfigureTool { + my $tool = shift; + my $path; + + if ($main::opt_tools ne "") { + # Use a prefix specified by the --tools option... + $path = $main::opt_tools . $tool; + if (!-x $path) { + error("No '$tool' found with prefix specified by --tools $main::opt_tools\n"); + } + } elsif (exists $ENV{"PPROF_TOOLS"} && + $ENV{"PPROF_TOOLS"} ne "") { + #... or specified with the PPROF_TOOLS environment variable... + $path = $ENV{"PPROF_TOOLS"} . $tool; + if (!-x $path) { + error("No '$tool' found with prefix specified by PPROF_TOOLS=$ENV{PPROF_TOOLS}\n"); + } + } else { + # ... otherwise use the version that exists in the same directory as + # pprof. If there's nothing there, use $PATH. + $0 =~ m,[^/]*$,; # this is everything after the last slash + my $dirname = $`; # this is everything up to and including the last slash + if (-x "$dirname$tool") { + $path = "$dirname$tool"; + } else { + $path = $tool; + } + } + if ($main::opt_debug) { print STDERR "Using '$path' for '$tool'.\n"; } + return $path; +} + +sub cleanup { + unlink($main::tmpfile_sym); + for (my $i = 0; $i < $main::next_tmpfile; $i++) { + unlink(PsTempName($i)); + } + # We leave any collected profiles in $HOME/pprof in case the user wants + # to look at them later. We print a message informing them of this. + if ((scalar(@main::profile_files) > 0) && + defined($main::collected_profile)) { + if (scalar(@main::profile_files) == 1) { + print STDERR "Dynamically gathered profile is in $main::collected_profile\n"; + } + print STDERR "If you want to investigate this profile further, you can do:\n"; + print STDERR "\n"; + print STDERR " pprof \\\n"; + print STDERR " $main::prog \\\n"; + print STDERR " $main::collected_profile\n"; + print STDERR "\n"; + } +} + +sub sighandler { + cleanup(); + exit(1); +} + +sub error { + my $msg = shift; + print STDERR $msg; + cleanup(); + exit(1); +} + + +# Run $nm_command and get all the resulting procedure boundaries whose +# names match "$regexp" and returns them in a hashtable mapping from +# procedure name to a two-element vector of [start address, end address] +sub GetProcedureBoundariesViaNm { + my $nm_command = shift; + my $regexp = shift; + + my $symbol_table = {}; + open(NM, "$nm_command |") || error("$nm_command: $!\n"); + my $last_start = "0"; + my $routine = ""; + while (<NM>) { + s/\r//g; # turn windows-looking lines into unix-looking lines + if (m/^([0-9a-f]+) (.) (..*)/) { + my $start_val = $1; + my $type = $2; + my $this_routine = $3; + + # It's possible for two symbols to share the same address, if + # one is a zero-length variable (like __start_google_malloc) or + # one symbol is a weak alias to another (like __libc_malloc). + # In such cases, we want to ignore all values except for the + # actual symbol, which in nm-speak has type "T". The logic + # below does this, though it's a bit tricky: what happens when + # we have a series of lines with the same address, is the first + # one gets queued up to be processed. However, it won't + # *actually* be processed until later, when we read a line with + # a different address. That means that as long as we're reading + # lines with the same address, we have a chance to replace that + # item in the queue, which we do whenever we see a 'T' entry -- + # that is, a line with type 'T'. If we never see a 'T' entry, + # we'll just go ahead and process the first entry (which never + # got touched in the queue), and ignore the others. + if ($start_val eq $last_start && $type =~ /t/i) { + # We are the 'T' symbol at this address, replace previous symbol. + $routine = $this_routine; + next; + } elsif ($start_val eq $last_start) { + # We're not the 'T' symbol at this address, so ignore us. + next; + } + + if ($this_routine eq $sep_symbol) { + $sep_address = HexExtend($start_val); + } + + # Tag this routine with the starting address in case the image + # has multiple occurrences of this routine. We use a syntax + # that resembles template paramters that are automatically + # stripped out by ShortFunctionName() + $this_routine .= "<$start_val>"; + + if (defined($routine) && $routine =~ m/$regexp/) { + $symbol_table->{$routine} = [HexExtend($last_start), + HexExtend($start_val)]; + } + $last_start = $start_val; + $routine = $this_routine; + } elsif (m/^Loaded image name: (.+)/) { + # The win32 nm workalike emits information about the binary it is using. + if ($main::opt_debug) { print STDERR "Using Image $1\n"; } + } elsif (m/^PDB file name: (.+)/) { + # The win32 nm workalike emits information about the pdb it is using. + if ($main::opt_debug) { print STDERR "Using PDB $1\n"; } + } + } + close(NM); + # Handle the last line in the nm output. Unfortunately, we don't know + # how big this last symbol is, because we don't know how big the file + # is. For now, we just give it a size of 0. + # TODO(csilvers): do better here. + if (defined($routine) && $routine =~ m/$regexp/) { + $symbol_table->{$routine} = [HexExtend($last_start), + HexExtend($last_start)]; + } + + return $symbol_table; +} + +# Gets the procedure boundaries for all routines in "$image" whose names +# match "$regexp" and returns them in a hashtable mapping from procedure +# name to a two-element vector of [start address, end address]. +# Will return an empty map if nm is not installed or not working properly. +sub GetProcedureBoundaries { + my $image = shift; + my $regexp = shift; + + # For libc libraries, the copy in /usr/lib/debug contains debugging symbols + my $debugging = DebuggingLibrary($image); + if ($debugging) { + $image = $debugging; + } + + my $nm = $obj_tool_map{"nm"}; + my $cppfilt = $obj_tool_map{"c++filt"}; + + # nm can fail for two reasons: 1) $image isn't a debug library; 2) nm + # binary doesn't support --demangle. In addition, for OS X we need + # to use the -f flag to get 'flat' nm output (otherwise we don't sort + # properly and get incorrect results). Unfortunately, GNU nm uses -f + # in an incompatible way. So first we test whether our nm supports + # --demangle and -f. + my $demangle_flag = ""; + my $cppfilt_flag = ""; + if (system("$nm --demangle $image >/dev/null 2>&1") == 0) { + # In this mode, we do "nm --demangle <foo>" + $demangle_flag = "--demangle"; + $cppfilt_flag = ""; + } elsif (system("$cppfilt $image >/dev/null 2>&1") == 0) { + # In this mode, we do "nm <foo> | c++filt" + $cppfilt_flag = " | $cppfilt"; + }; + my $flatten_flag = ""; + if (system("$nm -f $image >/dev/null 2>&1") == 0) { + $flatten_flag = "-f"; + } + + # Finally, in the case $imagie isn't a debug library, we try again with + # -D to at least get *exported* symbols. If we can't use --demangle, + # we use c++filt instead, if it exists on this system. + my @nm_commands = ("$nm -n $flatten_flag $demangle_flag" . + " $image 2>/dev/null $cppfilt_flag", + "$nm -D -n $flatten_flag $demangle_flag" . + " $image 2>/dev/null $cppfilt_flag"); + # If the executable is an MS Windows PDB-format executable, we'll + # have set up obj_tool_map("nm_pdb"). In this case, we actually + # want to use both unix nm and windows-specific nm_pdb, since + # PDB-format executables can apparently include dwarf .o files. + if (exists $obj_tool_map{"nm_pdb"}) { + my $nm_pdb = $obj_tool_map{"nm_pdb"}; + push(@nm_commands, "$nm_pdb --demangle $image 2>/dev/null"); + } + + foreach my $nm_command (@nm_commands) { + my $symbol_table = GetProcedureBoundariesViaNm($nm_command, $regexp); + return $symbol_table if (%{$symbol_table}); + } + my $symbol_table = {}; + return $symbol_table; +} + + +# The test vectors for AddressAdd/Sub/Inc are 8-16-nibble hex strings. +# To make them more readable, we add underscores at interesting places. +# This routine removes the underscores, producing the canonical representation +# used by pprof to represent addresses, particularly in the tested routines. +sub CanonicalHex { + my $arg = shift; + return join '', (split '_',$arg); +} + + +# Unit test for AddressAdd: +sub AddressAddUnitTest { + my $test_data_8 = shift; + my $test_data_16 = shift; + my $error_count = 0; + my $fail_count = 0; + my $pass_count = 0; + # print STDERR "AddressAddUnitTest: ", 1+$#{$test_data_8}, " tests\n"; + + # First a few 8-nibble addresses. Note that this implementation uses + # plain old arithmetic, so a quick sanity check along with verifying what + # happens to overflow (we want it to wrap): + $address_length = 8; + foreach my $row (@{$test_data_8}) { + if ($main::opt_debug and $main::opt_test) { print STDERR "@{$row}\n"; } + my $sum = AddressAdd ($row->[0], $row->[1]); + if ($sum ne $row->[2]) { + printf STDERR "ERROR: %s != %s + %s = %s\n", $sum, + $row->[0], $row->[1], $row->[2]; + ++$fail_count; + } else { + ++$pass_count; + } + } + printf STDERR "AddressAdd 32-bit tests: %d passes, %d failures\n", + $pass_count, $fail_count; + $error_count = $fail_count; + $fail_count = 0; + $pass_count = 0; + + # Now 16-nibble addresses. + $address_length = 16; + foreach my $row (@{$test_data_16}) { + if ($main::opt_debug and $main::opt_test) { print STDERR "@{$row}\n"; } + my $sum = AddressAdd (CanonicalHex($row->[0]), CanonicalHex($row->[1])); + my $expected = join '', (split '_',$row->[2]); + if ($sum ne CanonicalHex($row->[2])) { + printf STDERR "ERROR: %s != %s + %s = %s\n", $sum, + $row->[0], $row->[1], $row->[2]; + ++$fail_count; + } else { + ++$pass_count; + } + } + printf STDERR "AddressAdd 64-bit tests: %d passes, %d failures\n", + $pass_count, $fail_count; + $error_count += $fail_count; + + return $error_count; +} + + +# Unit test for AddressSub: +sub AddressSubUnitTest { + my $test_data_8 = shift; + my $test_data_16 = shift; + my $error_count = 0; + my $fail_count = 0; + my $pass_count = 0; + # print STDERR "AddressSubUnitTest: ", 1+$#{$test_data_8}, " tests\n"; + + # First a few 8-nibble addresses. Note that this implementation uses + # plain old arithmetic, so a quick sanity check along with verifying what + # happens to overflow (we want it to wrap): + $address_length = 8; + foreach my $row (@{$test_data_8}) { + if ($main::opt_debug and $main::opt_test) { print STDERR "@{$row}\n"; } + my $sum = AddressSub ($row->[0], $row->[1]); + if ($sum ne $row->[3]) { + printf STDERR "ERROR: %s != %s - %s = %s\n", $sum, + $row->[0], $row->[1], $row->[3]; + ++$fail_count; + } else { + ++$pass_count; + } + } + printf STDERR "AddressSub 32-bit tests: %d passes, %d failures\n", + $pass_count, $fail_count; + $error_count = $fail_count; + $fail_count = 0; + $pass_count = 0; + + # Now 16-nibble addresses. + $address_length = 16; + foreach my $row (@{$test_data_16}) { + if ($main::opt_debug and $main::opt_test) { print STDERR "@{$row}\n"; } + my $sum = AddressSub (CanonicalHex($row->[0]), CanonicalHex($row->[1])); + if ($sum ne CanonicalHex($row->[3])) { + printf STDERR "ERROR: %s != %s - %s = %s\n", $sum, + $row->[0], $row->[1], $row->[3]; + ++$fail_count; + } else { + ++$pass_count; + } + } + printf STDERR "AddressSub 64-bit tests: %d passes, %d failures\n", + $pass_count, $fail_count; + $error_count += $fail_count; + + return $error_count; +} + + +# Unit test for AddressInc: +sub AddressIncUnitTest { + my $test_data_8 = shift; + my $test_data_16 = shift; + my $error_count = 0; + my $fail_count = 0; + my $pass_count = 0; + # print STDERR "AddressIncUnitTest: ", 1+$#{$test_data_8}, " tests\n"; + + # First a few 8-nibble addresses. Note that this implementation uses + # plain old arithmetic, so a quick sanity check along with verifying what + # happens to overflow (we want it to wrap): + $address_length = 8; + foreach my $row (@{$test_data_8}) { + if ($main::opt_debug and $main::opt_test) { print STDERR "@{$row}\n"; } + my $sum = AddressInc ($row->[0]); + if ($sum ne $row->[4]) { + printf STDERR "ERROR: %s != %s + 1 = %s\n", $sum, + $row->[0], $row->[4]; + ++$fail_count; + } else { + ++$pass_count; + } + } + printf STDERR "AddressInc 32-bit tests: %d passes, %d failures\n", + $pass_count, $fail_count; + $error_count = $fail_count; + $fail_count = 0; + $pass_count = 0; + + # Now 16-nibble addresses. + $address_length = 16; + foreach my $row (@{$test_data_16}) { + if ($main::opt_debug and $main::opt_test) { print STDERR "@{$row}\n"; } + my $sum = AddressInc (CanonicalHex($row->[0])); + if ($sum ne CanonicalHex($row->[4])) { + printf STDERR "ERROR: %s != %s + 1 = %s\n", $sum, + $row->[0], $row->[4]; + ++$fail_count; + } else { + ++$pass_count; + } + } + printf STDERR "AddressInc 64-bit tests: %d passes, %d failures\n", + $pass_count, $fail_count; + $error_count += $fail_count; + + return $error_count; +} + + +# Driver for unit tests. +# Currently just the address add/subtract/increment routines for 64-bit. +sub RunUnitTests { + my $error_count = 0; + + # This is a list of tuples [a, b, a+b, a-b, a+1] + my $unit_test_data_8 = [ + [qw(aaaaaaaa 50505050 fafafafa 5a5a5a5a aaaaaaab)], + [qw(50505050 aaaaaaaa fafafafa a5a5a5a6 50505051)], + [qw(ffffffff aaaaaaaa aaaaaaa9 55555555 00000000)], + [qw(00000001 ffffffff 00000000 00000002 00000002)], + [qw(00000001 fffffff0 fffffff1 00000011 00000002)], + ]; + my $unit_test_data_16 = [ + # The implementation handles data in 7-nibble chunks, so those are the + # interesting boundaries. + [qw(aaaaaaaa 50505050 + 00_000000f_afafafa 00_0000005_a5a5a5a 00_000000a_aaaaaab)], + [qw(50505050 aaaaaaaa + 00_000000f_afafafa ff_ffffffa_5a5a5a6 00_0000005_0505051)], + [qw(ffffffff aaaaaaaa + 00_000001a_aaaaaa9 00_0000005_5555555 00_0000010_0000000)], + [qw(00000001 ffffffff + 00_0000010_0000000 ff_ffffff0_0000002 00_0000000_0000002)], + [qw(00000001 fffffff0 + 00_000000f_ffffff1 ff_ffffff0_0000011 00_0000000_0000002)], + + [qw(00_a00000a_aaaaaaa 50505050 + 00_a00000f_afafafa 00_a000005_a5a5a5a 00_a00000a_aaaaaab)], + [qw(0f_fff0005_0505050 aaaaaaaa + 0f_fff000f_afafafa 0f_ffefffa_5a5a5a6 0f_fff0005_0505051)], + [qw(00_000000f_fffffff 01_800000a_aaaaaaa + 01_800001a_aaaaaa9 fe_8000005_5555555 00_0000010_0000000)], + [qw(00_0000000_0000001 ff_fffffff_fffffff + 00_0000000_0000000 00_0000000_0000002 00_0000000_0000002)], + [qw(00_0000000_0000001 ff_fffffff_ffffff0 + ff_fffffff_ffffff1 00_0000000_0000011 00_0000000_0000002)], + ]; + + $error_count += AddressAddUnitTest($unit_test_data_8, $unit_test_data_16); + $error_count += AddressSubUnitTest($unit_test_data_8, $unit_test_data_16); + $error_count += AddressIncUnitTest($unit_test_data_8, $unit_test_data_16); + if ($error_count > 0) { + print STDERR $error_count, " errors: FAILED\n"; + } else { + print STDERR "PASS\n"; + } + exit ($error_count); +} + diff --git a/third_party/tcmalloc/chromium/src/profile-handler.cc b/third_party/tcmalloc/chromium/src/profile-handler.cc new file mode 100644 index 0000000..e658d30 --- /dev/null +++ b/third_party/tcmalloc/chromium/src/profile-handler.cc @@ -0,0 +1,499 @@ +// Copyright (c) 2009, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Sanjay Ghemawat +// Nabeel Mian +// +// Implements management of profile timers and the corresponding signal handler. + +#include "config.h" +#include "profile-handler.h" + +#if !(defined(__CYGWIN__) || defined(__CYGWIN32__)) + +#include <stdio.h> +#include <errno.h> +#include <sys/time.h> + +#include <list> +#include <string> + +#include "base/dynamic_annotations.h" +#include "base/logging.h" +#include "base/spinlock.h" +#include "maybe_threads.h" + +using std::list; +using std::string; + +// This structure is used by ProfileHandlerRegisterCallback and +// ProfileHandlerUnregisterCallback as a handle to a registered callback. +struct ProfileHandlerToken { + // Sets the callback and associated arg. + ProfileHandlerToken(ProfileHandlerCallback cb, void* cb_arg) + : callback(cb), + callback_arg(cb_arg) { + } + + // Callback function to be invoked on receiving a profile timer interrupt. + ProfileHandlerCallback callback; + // Argument for the callback function. + void* callback_arg; +}; + +// This class manages profile timers and associated signal handler. This is a +// a singleton. +class ProfileHandler { + public: + // Registers the current thread with the profile handler. On systems which + // have a separate interval timer for each thread, this function starts the + // timer for the current thread. + // + // The function also attempts to determine whether or not timers are shared by + // all threads in the process. (With LinuxThreads, and with NPTL on some + // Linux kernel versions, each thread has separate timers.) + // + // Prior to determining whether timers are shared, this function will + // unconditionally start the timer. However, if this function determines + // that timers are shared, then it will stop the timer if no callbacks are + // currently registered. + void RegisterThread(); + + // Registers a callback routine to receive profile timer ticks. The returned + // token is to be used when unregistering this callback and must not be + // deleted by the caller. Registration of the first callback enables the + // SIGPROF handler. + ProfileHandlerToken* RegisterCallback(ProfileHandlerCallback callback, + void* callback_arg); + + // Unregisters a previously registered callback. Expects the token returned + // by the corresponding RegisterCallback routine. Unregistering the last + // callback disables the SIGPROF handler. + void UnregisterCallback(ProfileHandlerToken* token) + NO_THREAD_SAFETY_ANALYSIS; + + // Unregisters all the callbacks, stops the timer if shared, disables the + // SIGPROF handler and clears the timer_sharing_ state. + void Reset(); + + // Gets the current state of profile handler. + void GetState(ProfileHandlerState* state); + + // Initializes and returns the ProfileHandler singleton. + static ProfileHandler* Instance(); + + private: + ProfileHandler(); + ~ProfileHandler(); + + // Largest allowed frequency. + static const int32 kMaxFrequency = 4000; + // Default frequency. + static const int32 kDefaultFrequency = 100; + + // ProfileHandler singleton. + static ProfileHandler* instance_; + + // pthread_once_t for one time initialization of ProfileHandler singleton. + static pthread_once_t once_; + + // Initializes the ProfileHandler singleton via GoogleOnceInit. + static void Init(); + + // Counts the number of SIGPROF interrupts received. + int64 interrupts_ GUARDED_BY(signal_lock_); + + // SIGPROF interrupt frequency, read-only after construction. + int32 frequency_; + + // Counts the number of callbacks registered. + int32 callback_count_ GUARDED_BY(control_lock_); + + // Whether or not the threading system provides interval timers that are + // shared by all threads in a process. + enum { + // No timer initialization attempted yet. + TIMERS_UNTOUCHED, + // First thread has registered and set timer. + TIMERS_ONE_SET, + // Timers are shared by all threads. + TIMERS_SHARED, + // Timers are separate in each thread. + TIMERS_SEPARATE + } timer_sharing_ GUARDED_BY(control_lock_); + + // This lock serializes the registration of threads and protects the + // callbacks_ list below. + // Locking order: + // In the context of a signal handler, acquire signal_lock_ to walk the + // callback list. Otherwise, acquire control_lock_, disable the signal + // handler and then acquire signal_lock_. + SpinLock control_lock_ ACQUIRED_BEFORE(signal_lock_); + SpinLock signal_lock_; + + // Holds the list of registered callbacks. We expect the list to be pretty + // small. Currently, the cpu profiler (base/profiler) and thread module + // (base/thread.h) are the only two components registering callbacks. + // Following are the locking requirements for callbacks_: + // For read-write access outside the SIGPROF handler: + // - Acquire control_lock_ + // - Disable SIGPROF handler. + // - Acquire signal_lock_ + // For read-only access in the context of SIGPROF handler + // (Read-write access is *not allowed* in the SIGPROF handler) + // - Acquire signal_lock_ + // For read-only access outside SIGPROF handler: + // - Acquire control_lock_ + typedef list<ProfileHandlerToken*> CallbackList; + typedef CallbackList::iterator CallbackIterator; + CallbackList callbacks_ GUARDED_BY(signal_lock_); + + // Starts the interval timer. If the thread library shares timers between + // threads, this function starts the shared timer. Otherwise, this will start + // the timer in the current thread. + void StartTimer() EXCLUSIVE_LOCKS_REQUIRED(control_lock_); + + // Stops the interval timer. If the thread library shares timers between + // threads, this fucntion stops the shared timer. Otherwise, this will stop + // the timer in the current thread. + void StopTimer() EXCLUSIVE_LOCKS_REQUIRED(control_lock_); + + // Returns true if the profile interval timer is enabled in the current + // thread. This actually checks the kernel's interval timer setting. (It is + // used to detect whether timers are shared or separate.) + bool IsTimerRunning() EXCLUSIVE_LOCKS_REQUIRED(control_lock_); + + // Sets the timer interrupt signal handler. + void EnableHandler() EXCLUSIVE_LOCKS_REQUIRED(control_lock_); + + // Disables (ignores) the timer interrupt signal. + void DisableHandler() EXCLUSIVE_LOCKS_REQUIRED(control_lock_); + + // SIGPROF handler. Iterate over and call all the registered callbacks. + static void SignalHandler(int sig, siginfo_t* sinfo, void* ucontext); + + DISALLOW_EVIL_CONSTRUCTORS(ProfileHandler); +}; + +ProfileHandler* ProfileHandler::instance_ = NULL; +pthread_once_t ProfileHandler::once_ = PTHREAD_ONCE_INIT; + +const int32 ProfileHandler::kMaxFrequency; +const int32 ProfileHandler::kDefaultFrequency; + +// If we are LD_PRELOAD-ed against a non-pthreads app, then +// pthread_once won't be defined. We declare it here, for that +// case (with weak linkage) which will cause the non-definition to +// resolve to NULL. We can then check for NULL or not in Instance. +#ifndef __THROW // I guess we're not on a glibc system +# define __THROW // __THROW is just an optimization, so ok to make it "" +#endif +extern "C" int pthread_once(pthread_once_t *, void (*)(void)) + __THROW ATTRIBUTE_WEAK; + +void ProfileHandler::Init() { + instance_ = new ProfileHandler(); +} + +ProfileHandler* ProfileHandler::Instance() { + if (pthread_once) { + pthread_once(&once_, Init); + } + if (instance_ == NULL) { + // This will be true on systems that don't link in pthreads, + // including on FreeBSD where pthread_once has a non-zero address + // (but doesn't do anything) even when pthreads isn't linked in. + Init(); + assert(instance_ != NULL); + } + return instance_; +} + +ProfileHandler::ProfileHandler() + : interrupts_(0), + callback_count_(0), + timer_sharing_(TIMERS_UNTOUCHED) { + SpinLockHolder cl(&control_lock_); + // Get frequency of interrupts (if specified) + char junk; + const char* fr = getenv("CPUPROFILE_FREQUENCY"); + if (fr != NULL && (sscanf(fr, "%u%c", &frequency_, &junk) == 1) && + (frequency_ > 0)) { + // Limit to kMaxFrequency + frequency_ = (frequency_ > kMaxFrequency) ? kMaxFrequency : frequency_; + } else { + frequency_ = kDefaultFrequency; + } + + // Ignore signals until we decide to turn profiling on. (Paranoia; + // should already be ignored.) + DisableHandler(); +} + +ProfileHandler::~ProfileHandler() { + Reset(); +} + +void ProfileHandler::RegisterThread() { + SpinLockHolder cl(&control_lock_); + + // We try to detect whether timers are being shared by setting a + // timer in the first call to this function, then checking whether + // it's set in the second call. + // + // Note that this detection method requires that the first two calls + // to RegisterThread must be made from different threads. (Subsequent + // calls will see timer_sharing_ set to either TIMERS_SEPARATE or + // TIMERS_SHARED, and won't try to detect the timer sharing type.) + // + // Also note that if timer settings were inherited across new thread + // creation but *not* shared, this approach wouldn't work. That's + // not an issue for any Linux threading implementation, and should + // not be a problem for a POSIX-compliant threads implementation. + switch (timer_sharing_) { + case TIMERS_UNTOUCHED: + StartTimer(); + timer_sharing_ = TIMERS_ONE_SET; + break; + case TIMERS_ONE_SET: + // If the timer is running, that means that the main thread's + // timer setup is seen in this (second) thread -- and therefore + // that timers are shared. + if (IsTimerRunning()) { + timer_sharing_ = TIMERS_SHARED; + // If callback is already registered, we have to keep the timer + // running. If not, we disable the timer here. + if (callback_count_ == 0) { + StopTimer(); + } + } else { + timer_sharing_ = TIMERS_SEPARATE; + StartTimer(); + } + break; + case TIMERS_SHARED: + // Nothing needed. + break; + case TIMERS_SEPARATE: + StartTimer(); + break; + } +} + +ProfileHandlerToken* ProfileHandler::RegisterCallback( + ProfileHandlerCallback callback, void* callback_arg) { + ProfileHandlerToken* token = new ProfileHandlerToken(callback, callback_arg); + + SpinLockHolder cl(&control_lock_); + DisableHandler(); + { + SpinLockHolder sl(&signal_lock_); + callbacks_.push_back(token); + } + // Start the timer if timer is shared and this is a first callback. + if ((callback_count_ == 0) && (timer_sharing_ == TIMERS_SHARED)) { + StartTimer(); + } + ++callback_count_; + EnableHandler(); + return token; +} + +void ProfileHandler::UnregisterCallback(ProfileHandlerToken* token) { + SpinLockHolder cl(&control_lock_); + for (CallbackIterator it = callbacks_.begin(); it != callbacks_.end(); + ++it) { + if ((*it) == token) { + RAW_CHECK(callback_count_ > 0, "Invalid callback count"); + DisableHandler(); + { + SpinLockHolder sl(&signal_lock_); + delete *it; + callbacks_.erase(it); + } + --callback_count_; + if (callback_count_ > 0) { + EnableHandler(); + } else if (timer_sharing_ == TIMERS_SHARED) { + StopTimer(); + } + return; + } + } + // Unknown token. + RAW_LOG(FATAL, "Invalid token"); +} + +void ProfileHandler::Reset() { + SpinLockHolder cl(&control_lock_); + DisableHandler(); + { + SpinLockHolder sl(&signal_lock_); + CallbackIterator it = callbacks_.begin(); + while (it != callbacks_.end()) { + CallbackIterator tmp = it; + ++it; + delete *tmp; + callbacks_.erase(tmp); + } + } + callback_count_ = 0; + if (timer_sharing_ == TIMERS_SHARED) { + StopTimer(); + } + timer_sharing_ = TIMERS_UNTOUCHED; +} + +void ProfileHandler::GetState(ProfileHandlerState* state) { + SpinLockHolder cl(&control_lock_); + DisableHandler(); + { + SpinLockHolder sl(&signal_lock_); // Protects interrupts_. + state->interrupts = interrupts_; + } + if (callback_count_ > 0) { + EnableHandler(); + } + state->frequency = frequency_; + state->callback_count = callback_count_; +} + +void ProfileHandler::StartTimer() { + struct itimerval timer; + timer.it_interval.tv_sec = 0; + timer.it_interval.tv_usec = 1000000 / frequency_; + timer.it_value = timer.it_interval; + setitimer(ITIMER_PROF, &timer, 0); +} + +void ProfileHandler::StopTimer() { + struct itimerval timer; + memset(&timer, 0, sizeof timer); + setitimer(ITIMER_PROF, &timer, 0); +} + +bool ProfileHandler::IsTimerRunning() { + struct itimerval current_timer; + RAW_CHECK(0 == getitimer(ITIMER_PROF, ¤t_timer), "getitimer"); + return (current_timer.it_value.tv_sec != 0 || + current_timer.it_value.tv_usec != 0); +} + +void ProfileHandler::EnableHandler() { + struct sigaction sa; + sa.sa_sigaction = SignalHandler; + sa.sa_flags = SA_RESTART | SA_SIGINFO; + sigemptyset(&sa.sa_mask); + RAW_CHECK(sigaction(SIGPROF, &sa, NULL) == 0, "sigprof (enable)"); +} + +void ProfileHandler::DisableHandler() { + struct sigaction sa; + sa.sa_handler = SIG_IGN; + sa.sa_flags = SA_RESTART; + sigemptyset(&sa.sa_mask); + RAW_CHECK(sigaction(SIGPROF, &sa, NULL) == 0, "sigprof (disable)"); +} + +void ProfileHandler::SignalHandler(int sig, siginfo_t* sinfo, void* ucontext) { + int saved_errno = errno; + RAW_CHECK(instance_ != NULL, "ProfileHandler is not initialized"); + { + SpinLockHolder sl(&instance_->signal_lock_); + ++instance_->interrupts_; + for (CallbackIterator it = instance_->callbacks_.begin(); + it != instance_->callbacks_.end(); + ++it) { + (*it)->callback(sig, sinfo, ucontext, (*it)->callback_arg); + } + } + errno = saved_errno; +} + +// The sole purpose of this class is to initialize the ProfileHandler singleton +// when the global static objects are created. Note that the main thread will +// be registered at this time. +class ProfileHandlerInitializer { + public: + ProfileHandlerInitializer() { + ProfileHandler::Instance()->RegisterThread(); + } + + private: + DISALLOW_EVIL_CONSTRUCTORS(ProfileHandlerInitializer); +}; +// ProfileHandlerInitializer singleton +static ProfileHandlerInitializer profile_handler_initializer; + +extern "C" void ProfileHandlerRegisterThread() { + ProfileHandler::Instance()->RegisterThread(); +} + +extern "C" ProfileHandlerToken* ProfileHandlerRegisterCallback( + ProfileHandlerCallback callback, void* callback_arg) { + return ProfileHandler::Instance()->RegisterCallback(callback, callback_arg); +} + +extern "C" void ProfileHandlerUnregisterCallback(ProfileHandlerToken* token) { + ProfileHandler::Instance()->UnregisterCallback(token); +} + +extern "C" void ProfileHandlerReset() { + return ProfileHandler::Instance()->Reset(); +} + +extern "C" void ProfileHandlerGetState(ProfileHandlerState* state) { + ProfileHandler::Instance()->GetState(state); +} + +#else // OS_CYGWIN + +// ITIMER_PROF doesn't work under cygwin. ITIMER_REAL is available, but doesn't +// work as well for profiling, and also interferes with alarm(). Because of +// these issues, unless a specific need is identified, profiler support is +// disabled under Cygwin. +extern "C" void ProfileHandlerRegisterThread() { +} + +extern "C" ProfileHandlerToken* ProfileHandlerRegisterCallback( + ProfileHandlerCallback callback, void* callback_arg) { + return NULL; +} + +extern "C" void ProfileHandlerUnregisterCallback(ProfileHandlerToken* token) { +} + +extern "C" void ProfileHandlerReset() { +} + +extern "C" void ProfileHandlerGetState(ProfileHandlerState* state) { +} + +#endif // OS_CYGWIN diff --git a/third_party/tcmalloc/chromium/src/profile-handler.h b/third_party/tcmalloc/chromium/src/profile-handler.h new file mode 100644 index 0000000..1cbe253 --- /dev/null +++ b/third_party/tcmalloc/chromium/src/profile-handler.h @@ -0,0 +1,147 @@ +/* Copyright (c) 2009, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * Author: Nabeel Mian + * + * This module manages the cpu profile timers and the associated interrupt + * handler. When enabled, all registered threads in the program are profiled. + * (Note: if using linux 2.4 or earlier, you must use the Thread class, in + * google3/thread, to ensure all threads are profiled.) + * + * Any component interested in receiving a profile timer interrupt can do so by + * registering a callback. All registered callbacks must be async-signal-safe. + * + * Note: This module requires the sole ownership of ITIMER_PROF timer and the + * SIGPROF signal. + */ + +#ifndef BASE_PROFILE_HANDLER_H_ +#define BASE_PROFILE_HANDLER_H_ + +#include "config.h" +#include <signal.h> +#ifdef COMPILER_MSVC +#include "conflict-signal.h" +#endif +#include "base/basictypes.h" + +/* All this code should be usable from within C apps. */ +#ifdef __cplusplus +extern "C" { +#endif + +/* Forward declaration. */ +struct ProfileHandlerToken; + +/* + * Callback function to be used with ProfilefHandlerRegisterCallback. This + * function will be called in the context of SIGPROF signal handler and must + * be async-signal-safe. The first three arguments are the values provided by + * the SIGPROF signal handler. We use void* to avoid using ucontext_t on + * non-POSIX systems. + * + * Requirements: + * - Callback must be async-signal-safe. + * - None of the functions in ProfileHandler are async-signal-safe. Therefore, + * callback function *must* not call any of the ProfileHandler functions. + * - Callback is not required to be re-entrant. At most one instance of + * callback can run at a time. + * + * Notes: + * - The SIGPROF signal handler saves and restores errno, so the callback + * doesn't need to. + * - Callback code *must* not acquire lock(s) to serialize access to data shared + * with the code outside the signal handler (callback must be + * async-signal-safe). If such a serialization is needed, follow the model + * used by profiler.cc: + * + * When code other than the signal handler modifies the shared data it must: + * - Acquire lock. + * - Unregister the callback with the ProfileHandler. + * - Modify shared data. + * - Re-register the callback. + * - Release lock. + * and the callback code gets a lockless, read-write access to the data. + */ +typedef void (*ProfileHandlerCallback)(int sig, siginfo_t* sig_info, + void* ucontext, void* callback_arg); + +/* + * Registers a new thread with profile handler and should be called only once + * per thread. The main thread is registered at program startup. This routine + * is called by the Thread module in google3/thread whenever a new thread is + * created. This function is not async-signal-safe. + */ +void ProfileHandlerRegisterThread(); + +/* + * Registers a callback routine. This callback function will be called in the + * context of SIGPROF handler, so must be async-signal-safe. The returned token + * is to be used when unregistering this callback via + * ProfileHandlerUnregisterCallback. Registering the first callback enables + * the SIGPROF signal handler. Caller must not free the returned token. This + * function is not async-signal-safe. + */ +ProfileHandlerToken* ProfileHandlerRegisterCallback( + ProfileHandlerCallback callback, void* callback_arg); + +/* + * Unregisters a previously registered callback. Expects the token returned + * by the corresponding ProfileHandlerRegisterCallback and asserts that the + * passed token is valid. Unregistering the last callback disables the SIGPROF + * signal handler. It waits for the currently running callback to + * complete before returning. This function is not async-signal-safe. + */ +void ProfileHandlerUnregisterCallback(ProfileHandlerToken* token); + +/* + * FOR TESTING ONLY + * Unregisters all the callbacks, stops the timers (if shared) and disables the + * SIGPROF handler. All the threads, including the main thread, need to be + * re-registered after this call. This function is not async-signal-safe. + */ +void ProfileHandlerReset(); + +/* + * Stores profile handler's current state. This function is not + * async-signal-safe. + */ +struct ProfileHandlerState { + int32 frequency; /* Profiling frequency */ + int32 callback_count; /* Number of callbacks registered */ + int64 interrupts; /* Number of interrupts received */ +}; +void ProfileHandlerGetState(struct ProfileHandlerState* state); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* BASE_PROFILE_HANDLER_H_ */ diff --git a/third_party/tcmalloc/chromium/src/profiledata.cc b/third_party/tcmalloc/chromium/src/profiledata.cc new file mode 100644 index 0000000..5f2531b --- /dev/null +++ b/third_party/tcmalloc/chromium/src/profiledata.cc @@ -0,0 +1,331 @@ +// Copyright (c) 2007, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// --- +// Author: Sanjay Ghemawat +// Chris Demetriou (refactoring) +// +// Collect profiling data. + +#include <config.h> +#include <assert.h> +#include <stdio.h> +#include <stdlib.h> +#include <errno.h> +#ifdef HAVE_UNISTD_H +#include <unistd.h> +#endif +#include <sys/time.h> +#include <string.h> +#include <fcntl.h> + +#include "profiledata.h" + +#include "base/logging.h" +#include "base/sysinfo.h" + +// All of these are initialized in profiledata.h. +const int ProfileData::kMaxStackDepth; +const int ProfileData::kAssociativity; +const int ProfileData::kBuckets; +const int ProfileData::kBufferLength; + +ProfileData::Options::Options() + : frequency_(1) { +} + +// This function is safe to call from asynchronous signals (but is not +// re-entrant). However, that's not part of its public interface. +void ProfileData::Evict(const Entry& entry) { + const int d = entry.depth; + const int nslots = d + 2; // Number of slots needed in eviction buffer + if (num_evicted_ + nslots > kBufferLength) { + FlushEvicted(); + assert(num_evicted_ == 0); + assert(nslots <= kBufferLength); + } + evict_[num_evicted_++] = entry.count; + evict_[num_evicted_++] = d; + memcpy(&evict_[num_evicted_], entry.stack, d * sizeof(Slot)); + num_evicted_ += d; +} + +ProfileData::ProfileData() + : hash_(0), + evict_(0), + num_evicted_(0), + out_(-1), + count_(0), + evictions_(0), + total_bytes_(0), + fname_(0), + start_time_(0) { +} + +bool ProfileData::Start(const char* fname, + const ProfileData::Options& options) { + if (enabled()) { + return false; + } + + // Open output file and initialize various data structures + int fd = open(fname, O_CREAT | O_WRONLY | O_TRUNC, 0666); + if (fd < 0) { + // Can't open outfile for write + return false; + } + + start_time_ = time(NULL); + fname_ = strdup(fname); + + // Reset counters + num_evicted_ = 0; + count_ = 0; + evictions_ = 0; + total_bytes_ = 0; + + hash_ = new Bucket[kBuckets]; + evict_ = new Slot[kBufferLength]; + memset(hash_, 0, sizeof(hash_[0]) * kBuckets); + + // Record special entries + evict_[num_evicted_++] = 0; // count for header + evict_[num_evicted_++] = 3; // depth for header + evict_[num_evicted_++] = 0; // Version number + CHECK_NE(0, options.frequency()); + int period = 1000000 / options.frequency(); + evict_[num_evicted_++] = period; // Period (microseconds) + evict_[num_evicted_++] = 0; // Padding + + out_ = fd; + + return true; +} + +ProfileData::~ProfileData() { + Stop(); +} + +// Dump /proc/maps data to fd. Copied from heap-profile-table.cc. +#define NO_INTR(fn) do {} while ((fn) < 0 && errno == EINTR) + +static void FDWrite(int fd, const char* buf, size_t len) { + while (len > 0) { + ssize_t r; + NO_INTR(r = write(fd, buf, len)); + RAW_CHECK(r >= 0, "write failed"); + buf += r; + len -= r; + } +} + +static void DumpProcSelfMaps(int fd) { + ProcMapsIterator::Buffer iterbuf; + ProcMapsIterator it(0, &iterbuf); // 0 means "current pid" + + uint64 start, end, offset; + int64 inode; + char *flags, *filename; + ProcMapsIterator::Buffer linebuf; + while (it.Next(&start, &end, &flags, &offset, &inode, &filename)) { + int written = it.FormatLine(linebuf.buf_, sizeof(linebuf.buf_), + start, end, flags, offset, inode, filename, + 0); + FDWrite(fd, linebuf.buf_, written); + } +} + +void ProfileData::Stop() { + if (!enabled()) { + return; + } + + // Move data from hash table to eviction buffer + for (int b = 0; b < kBuckets; b++) { + Bucket* bucket = &hash_[b]; + for (int a = 0; a < kAssociativity; a++) { + if (bucket->entry[a].count > 0) { + Evict(bucket->entry[a]); + } + } + } + + if (num_evicted_ + 3 > kBufferLength) { + // Ensure there is enough room for end of data marker + FlushEvicted(); + } + + // Write end of data marker + evict_[num_evicted_++] = 0; // count + evict_[num_evicted_++] = 1; // depth + evict_[num_evicted_++] = 0; // end of data marker + FlushEvicted(); + + // Dump "/proc/self/maps" so we get list of mapped shared libraries + DumpProcSelfMaps(out_); + + Reset(); + fprintf(stderr, "PROFILE: interrupts/evictions/bytes = %d/%d/%" PRIuS "\n", + count_, evictions_, total_bytes_); +} + +void ProfileData::Reset() { + if (!enabled()) { + return; + } + + // Don't reset count_, evictions_, or total_bytes_ here. They're used + // by Stop to print information about the profile after reset, and are + // cleared by Start when starting a new profile. + close(out_); + delete[] hash_; + hash_ = 0; + delete[] evict_; + evict_ = 0; + num_evicted_ = 0; + free(fname_); + fname_ = 0; + start_time_ = 0; + + out_ = -1; +} + +// This function is safe to call from asynchronous signals (but is not +// re-entrant). However, that's not part of its public interface. +void ProfileData::GetCurrentState(State* state) const { + if (enabled()) { + state->enabled = true; + state->start_time = start_time_; + state->samples_gathered = count_; + int buf_size = sizeof(state->profile_name); + strncpy(state->profile_name, fname_, buf_size); + state->profile_name[buf_size-1] = '\0'; + } else { + state->enabled = false; + state->start_time = 0; + state->samples_gathered = 0; + state->profile_name[0] = '\0'; + } +} + +// This function is safe to call from asynchronous signals (but is not +// re-entrant). However, that's not part of its public interface. +void ProfileData::FlushTable() { + if (!enabled()) { + return; + } + + // Move data from hash table to eviction buffer + for (int b = 0; b < kBuckets; b++) { + Bucket* bucket = &hash_[b]; + for (int a = 0; a < kAssociativity; a++) { + if (bucket->entry[a].count > 0) { + Evict(bucket->entry[a]); + bucket->entry[a].depth = 0; + bucket->entry[a].count = 0; + } + } + } + + // Write out all pending data + FlushEvicted(); +} + +void ProfileData::Add(int depth, const void* const* stack) { + if (!enabled()) { + return; + } + + if (depth > kMaxStackDepth) depth = kMaxStackDepth; + RAW_CHECK(depth > 0, "ProfileData::Add depth <= 0"); + + // Make hash-value + Slot h = 0; + for (int i = 0; i < depth; i++) { + Slot slot = reinterpret_cast<Slot>(stack[i]); + h = (h << 8) | (h >> (8*(sizeof(h)-1))); + h += (slot * 31) + (slot * 7) + (slot * 3); + } + + count_++; + + // See if table already has an entry for this trace + bool done = false; + Bucket* bucket = &hash_[h % kBuckets]; + for (int a = 0; a < kAssociativity; a++) { + Entry* e = &bucket->entry[a]; + if (e->depth == depth) { + bool match = true; + for (int i = 0; i < depth; i++) { + if (e->stack[i] != reinterpret_cast<Slot>(stack[i])) { + match = false; + break; + } + } + if (match) { + e->count++; + done = true; + break; + } + } + } + + if (!done) { + // Evict entry with smallest count + Entry* e = &bucket->entry[0]; + for (int a = 1; a < kAssociativity; a++) { + if (bucket->entry[a].count < e->count) { + e = &bucket->entry[a]; + } + } + if (e->count > 0) { + evictions_++; + Evict(*e); + } + + // Use the newly evicted entry + e->depth = depth; + e->count = 1; + for (int i = 0; i < depth; i++) { + e->stack[i] = reinterpret_cast<Slot>(stack[i]); + } + } +} + +// This function is safe to call from asynchronous signals (but is not +// re-entrant). However, that's not part of its public interface. +void ProfileData::FlushEvicted() { + if (num_evicted_ > 0) { + const char* buf = reinterpret_cast<char*>(evict_); + size_t bytes = sizeof(evict_[0]) * num_evicted_; + total_bytes_ += bytes; + FDWrite(out_, buf, bytes); + } + num_evicted_ = 0; +} diff --git a/third_party/tcmalloc/chromium/src/profiledata.h b/third_party/tcmalloc/chromium/src/profiledata.h new file mode 100644 index 0000000..da7ea9e --- /dev/null +++ b/third_party/tcmalloc/chromium/src/profiledata.h @@ -0,0 +1,183 @@ +// Copyright (c) 2007, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// --- +// Author: Sanjay Ghemawat +// Chris Demetriou (refactoring) +// +// Collect profiling data. +// +// The profile data file format is documented in +// doc/cpuprofile-fileformat.html + + +#ifndef BASE_PROFILEDATA_H_ +#define BASE_PROFILEDATA_H_ + +#include <config.h> +#include <time.h> // for time_t +#include <stdint.h> +#include "base/basictypes.h" + +// A class that accumulates profile samples and writes them to a file. +// +// Each sample contains a stack trace and a count. Memory usage is +// reduced by combining profile samples that have the same stack trace +// by adding up the associated counts. +// +// Profile data is accumulated in a bounded amount of memory, and will +// flushed to a file as necessary to stay within the memory limit. +// +// Use of this class assumes external synchronization. The exact +// requirements of that synchronization are that: +// +// - 'Add' may be called from asynchronous signals, but is not +// re-entrant. +// +// - None of 'Start', 'Stop', 'Reset', 'Flush', and 'Add' may be +// called at the same time. +// +// - 'Start', 'Stop', or 'Reset' should not be called while 'Enabled' +// or 'GetCurrent' are running, and vice versa. +// +// A profiler which uses asyncronous signals to add samples will +// typically use two locks to protect this data structure: +// +// - A SpinLock which is held over all calls except for the 'Add' +// call made from the signal handler. +// +// - A SpinLock which is held over calls to 'Start', 'Stop', 'Reset', +// 'Flush', and 'Add'. (This SpinLock should be acquired after +// the first SpinLock in all cases where both are needed.) +class ProfileData { + public: + struct State { + bool enabled; // Is profiling currently enabled? + time_t start_time; // If enabled, when was profiling started? + char profile_name[1024]; // Name of file being written, or '\0' + int samples_gathered; // Number of samples gathered to far (or 0) + }; + + class Options { + public: + Options(); + + // Get and set the sample frequency. + int frequency() const { + return frequency_; + } + void set_frequency(int frequency) { + frequency_ = frequency; + } + + private: + int frequency_; // Sample frequency. + }; + + static const int kMaxStackDepth = 64; // Max stack depth stored in profile + + ProfileData(); + ~ProfileData(); + + // If data collection is not already enabled start to collect data + // into fname. Parameters related to this profiling run are specified + // by 'options'. + // + // Returns true if data collection could be started, otherwise (if an + // error occurred or if data collection was already enabled) returns + // false. + bool Start(const char *fname, const Options& options); + + // If data collection is enabled, stop data collection and write the + // data to disk. + void Stop(); + + // Stop data collection without writing anything else to disk, and + // discard any collected data. + void Reset(); + + // If data collection is enabled, record a sample with 'depth' + // entries from 'stack'. (depth must be > 0.) At most + // kMaxStackDepth stack entries will be recorded, starting with + // stack[0]. + // + // This function is safe to call from asynchronous signals (but is + // not re-entrant). + void Add(int depth, const void* const* stack); + + // If data collection is enabled, write the data to disk (and leave + // the collector enabled). + void FlushTable(); + + // Is data collection currently enabled? + bool enabled() const { return out_ >= 0; } + + // Get the current state of the data collector. + void GetCurrentState(State* state) const; + + private: + static const int kAssociativity = 4; // For hashtable + static const int kBuckets = 1 << 10; // For hashtable + static const int kBufferLength = 1 << 18; // For eviction buffer + + // Type of slots: each slot can be either a count, or a PC value + typedef uintptr_t Slot; + + // Hash-table/eviction-buffer entry (a.k.a. a sample) + struct Entry { + Slot count; // Number of hits + Slot depth; // Stack depth + Slot stack[kMaxStackDepth]; // Stack contents + }; + + // Hash table bucket + struct Bucket { + Entry entry[kAssociativity]; + }; + + Bucket* hash_; // hash table + Slot* evict_; // evicted entries + int num_evicted_; // how many evicted entries? + int out_; // fd for output file. + int count_; // How many samples recorded + int evictions_; // How many evictions + size_t total_bytes_; // How much output + char* fname_; // Profile file name + time_t start_time_; // Start time, or 0 + + // Move 'entry' to the eviction buffer. + void Evict(const Entry& entry); + + // Write contents of eviction buffer to disk. + void FlushEvicted(); + + DISALLOW_EVIL_CONSTRUCTORS(ProfileData); +}; + +#endif // BASE_PROFILEDATA_H_ diff --git a/third_party/tcmalloc/chromium/src/profiler.cc b/third_party/tcmalloc/chromium/src/profiler.cc new file mode 100644 index 0000000..3ac51d4 --- /dev/null +++ b/third_party/tcmalloc/chromium/src/profiler.cc @@ -0,0 +1,342 @@ +// Copyright (c) 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Sanjay Ghemawat +// Chris Demetriou (refactoring) +// +// Profile current program by sampling stack-trace every so often + +#include "config.h" +#include "getpc.h" // should be first to get the _GNU_SOURCE dfn +#include <signal.h> +#include <assert.h> +#include <stdio.h> +#include <errno.h> +#include <string.h> +#ifdef HAVE_UNISTD_H +#include <unistd.h> // for getpid() +#endif +#if defined(HAVE_SYS_UCONTEXT_H) +#include <sys/ucontext.h> +#elif defined(HAVE_UCONTEXT_H) +#include <ucontext.h> +#elif defined(HAVE_CYGWIN_SIGNAL_H) +#include <cygwin/signal.h> +typedef ucontext ucontext_t; +#else +typedef int ucontext_t; // just to quiet the compiler, mostly +#endif +#include <sys/time.h> +#include <string> +#include <google/profiler.h> +#include <google/stacktrace.h> +#include "base/commandlineflags.h" +#include "base/logging.h" +#include "base/googleinit.h" +#include "base/spinlock.h" +#include "base/sysinfo.h" /* for GetUniquePathFromEnv, etc */ +#include "profiledata.h" +#include "profile-handler.h" +#ifdef HAVE_CONFLICT_SIGNAL_H +#include "conflict-signal.h" /* used on msvc machines */ +#endif + +using std::string; + +// Collects up all profile data. This is a singleton, which is +// initialized by a constructor at startup. +class CpuProfiler { + public: + CpuProfiler(); + ~CpuProfiler(); + + // Start profiler to write profile info into fname + bool Start(const char* fname, const ProfilerOptions* options); + + // Stop profiling and write the data to disk. + void Stop(); + + // Write the data to disk (and continue profiling). + void FlushTable(); + + bool Enabled(); + + void GetCurrentState(ProfilerState* state); + + static CpuProfiler instance_; + + private: + // This lock implements the locking requirements described in the ProfileData + // documentation, specifically: + // + // lock_ is held all over all collector_ method calls except for the 'Add' + // call made from the signal handler, to protect against concurrent use of + // collector_'s control routines. Code other than signal handler must + // unregister the signal handler before calling any collector_ method. + // 'Add' method in the collector is protected by a guarantee from + // ProfileHandle that only one instance of prof_handler can run at a time. + SpinLock lock_; + ProfileData collector_; + + // Filter function and its argument, if any. (NULL means include all + // samples). Set at start, read-only while running. Written while holding + // lock_, read and executed in the context of SIGPROF interrupt. + int (*filter_)(void*); + void* filter_arg_; + + // Opague token returned by the profile handler. To be used when calling + // ProfileHandlerUnregisterCallback. + ProfileHandlerToken* prof_handler_token_; + + // Sets up a callback to receive SIGPROF interrupt. + void EnableHandler(); + + // Disables receiving SIGPROF interrupt. + void DisableHandler(); + + // Signal handler that records the interrupted pc in the profile data. + static void prof_handler(int sig, siginfo_t*, void* signal_ucontext, + void* cpu_profiler); +}; + +// Profile data structure singleton: Constructor will check to see if +// profiling should be enabled. Destructor will write profile data +// out to disk. +CpuProfiler CpuProfiler::instance_; + +// Initialize profiling: activated if getenv("CPUPROFILE") exists. +CpuProfiler::CpuProfiler() + : prof_handler_token_(NULL) { + // TODO(cgd) Move this code *out* of the CpuProfile constructor into a + // separate object responsible for initialization. With ProfileHandler there + // is no need to limit the number of profilers. + char fname[PATH_MAX]; + if (!GetUniquePathFromEnv("CPUPROFILE", fname)) { + return; + } + // We don't enable profiling if setuid -- it's a security risk +#ifdef HAVE_GETEUID + if (getuid() != geteuid()) + return; +#endif + + if (!Start(fname, NULL)) { + RAW_LOG(FATAL, "Can't turn on cpu profiling for '%s': %s\n", + fname, strerror(errno)); + } +} + +bool CpuProfiler::Start(const char* fname, const ProfilerOptions* options) { + SpinLockHolder cl(&lock_); + + if (collector_.enabled()) { + return false; + } + + ProfileHandlerState prof_handler_state; + ProfileHandlerGetState(&prof_handler_state); + + ProfileData::Options collector_options; + collector_options.set_frequency(prof_handler_state.frequency); + if (!collector_.Start(fname, collector_options)) { + return false; + } + + filter_ = NULL; + if (options != NULL && options->filter_in_thread != NULL) { + filter_ = options->filter_in_thread; + filter_arg_ = options->filter_in_thread_arg; + } + + // Setup handler for SIGPROF interrupts + EnableHandler(); + + return true; +} + +CpuProfiler::~CpuProfiler() { + Stop(); +} + +// Stop profiling and write out any collected profile data +void CpuProfiler::Stop() { + SpinLockHolder cl(&lock_); + + if (!collector_.enabled()) { + return; + } + + // Unregister prof_handler to stop receiving SIGPROF interrupts before + // stopping the collector. + DisableHandler(); + + // DisableHandler waits for the currently running callback to complete and + // guarantees no future invocations. It is safe to stop the collector. + collector_.Stop(); +} + +void CpuProfiler::FlushTable() { + SpinLockHolder cl(&lock_); + + if (!collector_.enabled()) { + return; + } + + // Unregister prof_handler to stop receiving SIGPROF interrupts before + // flushing the profile data. + DisableHandler(); + + // DisableHandler waits for the currently running callback to complete and + // guarantees no future invocations. It is safe to flush the profile data. + collector_.FlushTable(); + + EnableHandler(); +} + +bool CpuProfiler::Enabled() { + SpinLockHolder cl(&lock_); + return collector_.enabled(); +} + +void CpuProfiler::GetCurrentState(ProfilerState* state) { + ProfileData::State collector_state; + { + SpinLockHolder cl(&lock_); + collector_.GetCurrentState(&collector_state); + } + + state->enabled = collector_state.enabled; + state->start_time = static_cast<time_t>(collector_state.start_time); + state->samples_gathered = collector_state.samples_gathered; + int buf_size = sizeof(state->profile_name); + strncpy(state->profile_name, collector_state.profile_name, buf_size); + state->profile_name[buf_size-1] = '\0'; +} + +void CpuProfiler::EnableHandler() { + RAW_CHECK(prof_handler_token_ == NULL, "SIGPROF handler already registered"); + prof_handler_token_ = ProfileHandlerRegisterCallback(prof_handler, this); + RAW_CHECK(prof_handler_token_ != NULL, "Failed to set up SIGPROF handler"); +} + +void CpuProfiler::DisableHandler() { + RAW_CHECK(prof_handler_token_ != NULL, "SIGPROF handler is not registered"); + ProfileHandlerUnregisterCallback(prof_handler_token_); + prof_handler_token_ = NULL; +} + +// Signal handler that records the pc in the profile-data structure. We do no +// synchronization here. profile-handler.cc guarantees that at most one +// instance of prof_handler() will run at a time. All other routines that +// access the data touched by prof_handler() disable this signal handler before +// accessing the data and therefore cannot execute concurrently with +// prof_handler(). +void CpuProfiler::prof_handler(int sig, siginfo_t*, void* signal_ucontext, + void* cpu_profiler) { + CpuProfiler* instance = static_cast<CpuProfiler*>(cpu_profiler); + + if (instance->filter_ == NULL || + (*instance->filter_)(instance->filter_arg_)) { + void* stack[ProfileData::kMaxStackDepth]; + + // The top-most active routine doesn't show up as a normal + // frame, but as the "pc" value in the signal handler context. + stack[0] = GetPC(*reinterpret_cast<ucontext_t*>(signal_ucontext)); + + // We skip the top two stack trace entries (this function and one + // signal handler frame) since they are artifacts of profiling and + // should not be measured. Other profiling related frames may be + // removed by "pprof" at analysis time. Instead of skipping the top + // frames, we could skip nothing, but that would increase the + // profile size unnecessarily. + int depth = GetStackTraceWithContext(stack + 1, arraysize(stack) - 1, + 2, signal_ucontext); + depth++; // To account for pc value in stack[0]; + + instance->collector_.Add(depth, stack); + } +} + +#if !(defined(__CYGWIN__) || defined(__CYGWIN32__)) + +extern "C" PERFTOOLS_DLL_DECL void ProfilerRegisterThread() { + ProfileHandlerRegisterThread(); +} + +extern "C" PERFTOOLS_DLL_DECL void ProfilerFlush() { + CpuProfiler::instance_.FlushTable(); +} + +extern "C" PERFTOOLS_DLL_DECL int ProfilingIsEnabledForAllThreads() { + return CpuProfiler::instance_.Enabled(); +} + +extern "C" PERFTOOLS_DLL_DECL int ProfilerStart(const char* fname) { + return CpuProfiler::instance_.Start(fname, NULL); +} + +extern "C" PERFTOOLS_DLL_DECL int ProfilerStartWithOptions( + const char *fname, const ProfilerOptions *options) { + return CpuProfiler::instance_.Start(fname, options); +} + +extern "C" PERFTOOLS_DLL_DECL void ProfilerStop() { + CpuProfiler::instance_.Stop(); +} + +extern "C" PERFTOOLS_DLL_DECL void ProfilerGetCurrentState( + ProfilerState* state) { + CpuProfiler::instance_.GetCurrentState(state); +} + +#else // OS_CYGWIN + +// ITIMER_PROF doesn't work under cygwin. ITIMER_REAL is available, but doesn't +// work as well for profiling, and also interferes with alarm(). Because of +// these issues, unless a specific need is identified, profiler support is +// disabled under Cygwin. +extern "C" void ProfilerRegisterThread() { } +extern "C" void ProfilerFlush() { } +extern "C" int ProfilingIsEnabledForAllThreads() { return 0; } +extern "C" int ProfilerStart(const char* fname) { return 0; } +extern "C" int ProfilerStartWithOptions(const char *fname, + const ProfilerOptions *options) { + return 0; +} +extern "C" void ProfilerStop() { } +extern "C" void ProfilerGetCurrentState(ProfilerState* state) { + memset(state, 0, sizeof(*state)); +} + +#endif // OS_CYGWIN + +// DEPRECATED routines +extern "C" PERFTOOLS_DLL_DECL void ProfilerEnable() { } +extern "C" PERFTOOLS_DLL_DECL void ProfilerDisable() { } diff --git a/third_party/tcmalloc/chromium/src/raw_printer.cc b/third_party/tcmalloc/chromium/src/raw_printer.cc new file mode 100644 index 0000000..019555a --- /dev/null +++ b/third_party/tcmalloc/chromium/src/raw_printer.cc @@ -0,0 +1,71 @@ +// Copyright (c) 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: sanjay@google.com (Sanjay Ghemawat) + +#include <config.h> +#include <stdarg.h> +#include <stdio.h> +#include "raw_printer.h" +#include "base/logging.h" + +namespace base { + +RawPrinter::RawPrinter(char* buf, int length) + : base_(buf), + ptr_(buf), + limit_(buf + length - 1) { + RAW_DCHECK(length > 0, ""); + *ptr_ = '\0'; + *limit_ = '\0'; +} + +void RawPrinter::Printf(const char* format, ...) { + if (limit_ > ptr_) { + va_list ap; + va_start(ap, format); + int avail = limit_ - ptr_; + // We pass avail+1 to vsnprintf() since that routine needs room + // to store the trailing \0. + const int r = vsnprintf(ptr_, avail+1, format, ap); + va_end(ap); + if (r < 0) { + // Perhaps an old glibc that returns -1 on truncation? + ptr_ = limit_; + } else if (r > avail) { + // Truncation + ptr_ = limit_; + } else { + ptr_ += r; + } + } +} + +} diff --git a/third_party/tcmalloc/chromium/src/raw_printer.h b/third_party/tcmalloc/chromium/src/raw_printer.h new file mode 100644 index 0000000..62340bb --- /dev/null +++ b/third_party/tcmalloc/chromium/src/raw_printer.h @@ -0,0 +1,89 @@ +// Copyright (c) 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Sanjay Ghemawat +// +// A printf() wrapper that writes into a fixed length buffer. +// Useful in low-level code that does not want to use allocating +// routines like StringPrintf(). +// +// The implementation currently uses vsnprintf(). This seems to +// be fine for use in many low-level contexts, but we may need to +// rethink this decision if we hit a problem with it calling +// down into malloc() etc. + +#ifndef BASE_RAW_PRINTER_H_ +#define BASE_RAW_PRINTER_H_ + +#include <config.h> +#include "base/basictypes.h" + +namespace base { + +class RawPrinter { + public: + // REQUIRES: "length > 0" + // Will printf any data added to this into "buf[0,length-1]" and + // will arrange to always keep buf[] null-terminated. + RawPrinter(char* buf, int length); + + // Return the number of bytes that have been appended to the string + // so far. Does not count any bytes that were dropped due to overflow. + int length() const { return (ptr_ - base_); } + + // Return the number of bytes that can be added to this. + int space_left() const { return (limit_ - ptr_); } + + // Format the supplied arguments according to the "format" string + // and append to this. Will silently truncate the output if it does + // not fit. + void Printf(const char* format, ...) +#ifdef HAVE___ATTRIBUTE__ + __attribute__ ((__format__ (__printf__, 2, 3))) +#endif +; + + private: + // We can write into [ptr_ .. limit_-1]. + // *limit_ is also writable, but reserved for a terminating \0 + // in case we overflow. + // + // Invariants: *ptr_ == \0 + // Invariants: *limit_ == \0 + char* base_; // Initial pointer + char* ptr_; // Where should we write next + char* limit_; // One past last non-\0 char we can write + + DISALLOW_COPY_AND_ASSIGN(RawPrinter); +}; + +} + +#endif // BASE_RAW_PRINTER_H_ diff --git a/third_party/tcmalloc/chromium/src/sampler.cc b/third_party/tcmalloc/chromium/src/sampler.cc new file mode 100644 index 0000000..cbc6ab4 --- /dev/null +++ b/third_party/tcmalloc/chromium/src/sampler.cc @@ -0,0 +1,130 @@ +// Copyright (c) 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// All Rights Reserved. +// +// Author: Daniel Ford + +#include "sampler.h" + +#include <algorithm> // For min() +#include <cmath> + +using std::min; + +// The approximate gap in bytes between sampling actions. +// I.e., we take one sample approximately once every +// tcmalloc_sample_parameter bytes of allocation +// i.e. about once every 512KB. +#ifdef NO_TCMALLOC_SAMPLES +DEFINE_int64(tcmalloc_sample_parameter, 0, + "Unused: code is compiled with NO_TCMALLOC_SAMPLES"); +#else +DEFINE_int64(tcmalloc_sample_parameter, + EnvToInt64("TCMALLOC_SAMPLE_PARAMETER", 1<<19), + "The approximate gap in bytes between sampling actions. " + "This must be between 1 and 1<<58."); +// Note: there are other places in this file where the number 19 occurs. +#endif + +namespace tcmalloc { + +// Statics for Sampler +double Sampler::log_table_[1<<kFastlogNumBits]; + +// Populate the lookup table for FastLog2. +// This approximates the log2 curve with a step function. +// Steps have height equal to log2 of the mid-point of the step. +void Sampler::PopulateFastLog2Table() { + for (int i = 0; i < (1<<kFastlogNumBits); i++) { + log_table_[i] = (log(1.0 + static_cast<double>(i+0.5)/(1<<kFastlogNumBits)) + / log(2.0)); + } +} + +int Sampler::GetSamplePeriod() { + return FLAGS_tcmalloc_sample_parameter; +} + +// Run this before using your sampler +void Sampler::Init(uint32_t seed) { + // Initialize PRNG + if (seed != 0) { + rnd_ = seed; + } else { + rnd_ = static_cast<uint32_t>(reinterpret_cast<uintptr_t>(this)); + if (rnd_ == 0) { + rnd_ = 1; + } + } + // Step it forward 20 times for good measure + for (int i = 0; i < 20; i++) { + rnd_ = NextRandom(rnd_); + } + // Initialize counter + bytes_until_sample_ = PickNextSamplingPoint(); +} + +// Initialize the Statics for the Sampler class +void Sampler::InitStatics() { + PopulateFastLog2Table(); +} + +// Generates a geometric variable with the specified mean (512K by default). +// This is done by generating a random number between 0 and 1 and applying +// the inverse cumulative distribution function for an exponential. +// Specifically: Let m be the inverse of the sample period, then +// the probability distribution function is m*exp(-mx) so the CDF is +// p = 1 - exp(-mx), so +// q = 1 - p = exp(-mx) +// log_e(q) = -mx +// -log_e(q)/m = x +// log_2(q) * (-log_e(2) * 1/m) = x +// In the code, q is actually in the range 1 to 2**26, hence the -26 below +size_t Sampler::PickNextSamplingPoint() { + rnd_ = NextRandom(rnd_); + // Take the top 26 bits as the random number + // (This plus the 1<<58 sampling bound give a max possible step of + // 5194297183973780480 bytes.) + const uint64_t prng_mod_power = 48; // Number of bits in prng + // The uint32_t cast is to prevent a (hard-to-reproduce) NAN + // under piii debug for some binaries. + double q = static_cast<uint32_t>(rnd_ >> (prng_mod_power - 26)) + 1.0; + // Put the computed p-value through the CDF of a geometric. + // For faster performance (save ~1/20th exec time), replace + // min(0.0, FastLog2(q) - 26) by (Fastlog2(q) - 26.000705) + // The value 26.000705 is used rather than 26 to compensate + // for inaccuracies in FastLog2 which otherwise result in a + // negative answer. + return static_cast<size_t>(min(0.0, (FastLog2(q) - 26)) * (-log(2.0) + * FLAGS_tcmalloc_sample_parameter) + 1); +} + +} // namespace tcmalloc diff --git a/third_party/tcmalloc/chromium/src/sampler.h b/third_party/tcmalloc/chromium/src/sampler.h new file mode 100644 index 0000000..fa9e554 --- /dev/null +++ b/third_party/tcmalloc/chromium/src/sampler.h @@ -0,0 +1,174 @@ +// Copyright (c) 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// All Rights Reserved. +// +// Author: Daniel Ford + +#ifndef TCMALLOC_SAMPLER_H_ +#define TCMALLOC_SAMPLER_H_ + +#include "config.h" +#include "common.h" +#include "static_vars.h" + +namespace tcmalloc { + +//------------------------------------------------------------------- +// Sampler to decide when to create a sample trace for an allocation +// Not thread safe: Each thread should have it's own sampler object. +// Caller must use external synchronization if used +// from multiple threads. +// +// With 512K average sample step (the default): +// the probability of sampling a 4K allocation is about 0.00778 +// the probability of sampling a 1MB allocation is about 0.865 +// the probability of sampling a 1GB allocation is about 1.00000 +// In general, the probablity of sampling is an allocation of size X +// given a flag value of Y (default 1M) is: +// 1 - e^(-X/Y) +// +// With 128K average sample step: +// the probability of sampling a 1MB allocation is about 0.99966 +// the probability of sampling a 1GB allocation is about 1.0 +// (about 1 - 2**(-26)) +// With 1M average sample step: +// the probability of sampling a 4K allocation is about 0.00390 +// the probability of sampling a 1MB allocation is about 0.632 +// the probability of sampling a 1GB allocation is about 1.0 +// +// The sampler works by representing memory as a long stream from +// which allocations are taken. Some of the bytes in this stream are +// marked and if an allocation includes a marked byte then it is +// sampled. Bytes are marked according to a Poisson point process +// with each byte being marked independently with probability +// p = 1/tcmalloc_sample_parameter. This makes the probability +// of sampling an allocation of X bytes equal to the CDF of +// a geometric with mean tcmalloc_sample_parameter. (ie. the +// probability that at least one byte in the range is marked). This +// is accurately given by the CDF of the corresponding exponential +// distribution : 1 - e^(X/tcmalloc_sample_parameter_) +// Independence of the byte marking ensures independence of +// the sampling of each allocation. +// +// This scheme is implemented by noting that, starting from any +// fixed place, the number of bytes until the next marked byte +// is geometrically distributed. This number is recorded as +// bytes_until_sample_. Every allocation subtracts from this +// number until it is less than 0. When this happens the current +// allocation is sampled. +// +// When an allocation occurs, bytes_until_sample_ is reset to +// a new independtly sampled geometric number of bytes. The +// memoryless property of the point process means that this may +// be taken as the number of bytes after the end of the current +// allocation until the next marked byte. This ensures that +// very large allocations which would intersect many marked bytes +// only result in a single call to PickNextSamplingPoint. +//------------------------------------------------------------------- + +class PERFTOOLS_DLL_DECL Sampler { + public: + // Initialize this sampler. + // Passing a seed of 0 gives a non-deterministic + // seed value given by casting the object ("this") + void Init(uint32_t seed); + void Cleanup(); + + // Record allocation of "k" bytes. Return true iff allocation + // should be sampled + bool SampleAllocation(size_t k); + + // Generate a geometric with mean 512K (or FLAG_tcmalloc_sample_parameter) + size_t PickNextSamplingPoint(); + + // Initialize the statics for the Sampler class + static void InitStatics(); + + // Returns the current sample period + int GetSamplePeriod(); + + // The following are public for the purposes of testing + static uint64_t NextRandom(uint64_t rnd_); // Returns the next prng value + static double FastLog2(const double & d); // Computes Log2(x) quickly + static void PopulateFastLog2Table(); // Populate the lookup table + + private: + size_t bytes_until_sample_; // Bytes until we sample next + uint64_t rnd_; // Cheap random number generator + + // Statics for the fast log + // Note that this code may not depend on anything in //util + // hence the duplication of functionality here + static const int kFastlogNumBits = 10; + static const int kFastlogMask = (1 << kFastlogNumBits) - 1; + static double log_table_[1<<kFastlogNumBits]; // Constant +}; + +inline bool Sampler::SampleAllocation(size_t k) { + if (bytes_until_sample_ < k) { + bytes_until_sample_ = PickNextSamplingPoint(); + return true; + } else { + bytes_until_sample_ -= k; + return false; + } +} + +// Inline functions which are public for testing purposes + +// Returns the next prng value. +// pRNG is: aX+b mod c with a = 0x5DEECE66D, b = 0xB, c = 1<<48 +// This is the lrand64 generator. +inline uint64_t Sampler::NextRandom(uint64_t rnd) { + const uint64_t prng_mult = 0x5DEECE66DLL; + const uint64_t prng_add = 0xB; + const uint64_t prng_mod_power = 48; + const uint64_t prng_mod_mask = + ~((~static_cast<uint64_t>(0)) << prng_mod_power); + return (prng_mult * rnd + prng_add) & prng_mod_mask; +} + +// Adapted from //util/math/fastmath.[h|cc] by Noam Shazeer +// This mimics the VeryFastLog2 code in those files +inline double Sampler::FastLog2(const double & d) { + ASSERT(d>0); + COMPILE_ASSERT(sizeof(d) == sizeof(uint64_t), DoubleMustBe64Bits); + uint64_t x; + memcpy(&x, &d, sizeof(x)); // we depend on the compiler inlining this + const uint32_t x_high = x >> 32; + const uint32_t y = x_high >> (20 - kFastlogNumBits) & kFastlogMask; + const int32_t exponent = ((x_high >> 20) & 0x7FF) - 1023; + return exponent + log_table_[y]; +} + +} // namespace tcmalloc + +#endif // TCMALLOC_SAMPLER_H_ diff --git a/third_party/tcmalloc/chromium/src/span.cc b/third_party/tcmalloc/chromium/src/span.cc new file mode 100644 index 0000000..ca0bab3 --- /dev/null +++ b/third_party/tcmalloc/chromium/src/span.cc @@ -0,0 +1,111 @@ +// Copyright (c) 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Sanjay Ghemawat <opensource@google.com> + +#include <config.h> +#include "span.h" + +#ifdef HAVE_INTTYPES_H +#include <inttypes.h> +#endif + +#include "static_vars.h" + +namespace tcmalloc { + +#ifdef SPAN_HISTORY +void Event(Span* span, char op, int v = 0) { + span->history[span->nexthistory] = op; + span->value[span->nexthistory] = v; + span->nexthistory++; + if (span->nexthistory == sizeof(span->history)) span->nexthistory = 0; +} +#endif + +Span* NewSpan(PageID p, Length len) { + Span* result = Static::span_allocator()->New(); + memset(result, 0, sizeof(*result)); + result->start = p; + result->length = len; +#ifdef SPAN_HISTORY + result->nexthistory = 0; +#endif + return result; +} + +void DeleteSpan(Span* span) { +#ifndef NDEBUG + // In debug mode, trash the contents of deleted Spans + memset(span, 0x3f, sizeof(*span)); +#endif + Static::span_allocator()->Delete(span); +} + +void DLL_Init(Span* list) { + list->next = list; + list->prev = list; +} + +void DLL_Remove(Span* span) { + span->prev->next = span->next; + span->next->prev = span->prev; + span->prev = NULL; + span->next = NULL; +} + +int DLL_Length(const Span* list) { + int result = 0; + for (Span* s = list->next; s != list; s = s->next) { + result++; + } + return result; +} + +#if 0 // This isn't used. If that changes, rewrite to use TCMalloc_Printer. +void DLL_Print(const char* label, const Span* list) { + MESSAGE("%-10s %p:", label, list); + for (const Span* s = list->next; s != list; s = s->next) { + MESSAGE(" <%p,%"PRIuPTR",%"PRIuPTR">", s, s->start, s->length); + } + MESSAGE("%s\n", ""); // %s is to get around a compiler error. +} +#endif + +void DLL_Prepend(Span* list, Span* span) { + ASSERT(span->next == NULL); + ASSERT(span->prev == NULL); + span->next = list->next; + span->prev = list; + list->next->prev = span; + list->next = span; +} + +} // namespace tcmalloc diff --git a/third_party/tcmalloc/chromium/src/span.h b/third_party/tcmalloc/chromium/src/span.h new file mode 100644 index 0000000..ab9a796 --- /dev/null +++ b/third_party/tcmalloc/chromium/src/span.h @@ -0,0 +1,106 @@ +// Copyright (c) 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Sanjay Ghemawat <opensource@google.com> +// +// A Span is a contiguous run of pages. + +#ifndef TCMALLOC_SPAN_H_ +#define TCMALLOC_SPAN_H_ + +#include <config.h> +#include "common.h" + +namespace tcmalloc { + +// Information kept for a span (a contiguous run of pages). +struct Span { + PageID start; // Starting page number + Length length; // Number of pages in span + Span* next; // Used when in link list + Span* prev; // Used when in link list + void* objects; // Linked list of free objects + unsigned int refcount : 16; // Number of non-free objects + unsigned int sizeclass : 8; // Size-class for small objects (or 0) + unsigned int location : 2; // Is the span on a freelist, and if so, which? + unsigned int sample : 1; // Sampled object? + +#undef SPAN_HISTORY +#ifdef SPAN_HISTORY + // For debugging, we can keep a log events per span + int nexthistory; + char history[64]; + int value[64]; +#endif + + // What freelist the span is on: IN_USE if on none, or normal or returned + enum { IN_USE, ON_NORMAL_FREELIST, ON_RETURNED_FREELIST }; +}; + +#ifdef SPAN_HISTORY +void Event(Span* span, char op, int v = 0); +#else +#define Event(s,o,v) ((void) 0) +#endif + +// Allocator/deallocator for spans +Span* NewSpan(PageID p, Length len); +void DeleteSpan(Span* span); + +// ------------------------------------------------------------------------- +// Doubly linked list of spans. +// ------------------------------------------------------------------------- + +// Initialize *list to an empty list. +void DLL_Init(Span* list); + +// Remove 'span' from the linked list in which it resides, updating the +// pointers of adjacent Spans and setting span's next and prev to NULL. +void DLL_Remove(Span* span); + +// Return true iff "list" is empty. +inline bool DLL_IsEmpty(const Span* list) { + return list->next == list; +} + +// Add span to the front of list. +void DLL_Prepend(Span* list, Span* span); + +// Return the length of the linked list. O(n) +int DLL_Length(const Span* list); + +// Print the contents of the list to stderr. +#if 0 // This isn't used. +void DLL_Print(const char* label, const Span* list); +#endif + +} // namespace tcmalloc + +#endif // TCMALLOC_SPAN_H_ diff --git a/third_party/tcmalloc/chromium/src/stack_trace_table.cc b/third_party/tcmalloc/chromium/src/stack_trace_table.cc new file mode 100644 index 0000000..6672af9 --- /dev/null +++ b/third_party/tcmalloc/chromium/src/stack_trace_table.cc @@ -0,0 +1,154 @@ +// Copyright (c) 2009, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Andrew Fikes + +#include <config.h> +#include "base/spinlock.h" +#include "common.h" +#include "static_vars.h" +#include "stack_trace_table.h" + +namespace tcmalloc { + +bool StackTraceTable::Bucket::KeyEqual(uintptr_t h, + const StackTrace& t) const { + const bool eq = (this->hash == h && this->trace.depth == t.depth); + for (int i = 0; eq && i < t.depth; ++i) { + if (this->trace.stack[i] != t.stack[i]) { + return false; + } + } + return eq; +} + +StackTraceTable::StackTraceTable() + : error_(false), + depth_total_(0), + bucket_total_(0), + table_(new Bucket*[kHashTableSize]()) { + memset(table_, 0, kHashTableSize * sizeof(Bucket*)); +} + +StackTraceTable::~StackTraceTable() { + delete[] table_; +} + +void StackTraceTable::AddTrace(const StackTrace& t) { + if (error_) { + return; + } + + // Hash function borrowed from base/heap-profile-table.cc + uintptr_t h = 0; + for (int i = 0; i < t.depth; ++i) { + h += reinterpret_cast<uintptr_t>(t.stack[i]); + h += h << 10; + h ^= h >> 6; + } + h += h << 3; + h ^= h >> 11; + + const int idx = h % kHashTableSize; + + Bucket* b = table_[idx]; + while (b != NULL && !b->KeyEqual(h, t)) { + b = b->next; + } + if (b != NULL) { + b->count++; + b->trace.size += t.size; // keep cumulative size + } else { + depth_total_ += t.depth; + bucket_total_++; + b = Static::bucket_allocator()->New(); + if (b == NULL) { + MESSAGE("tcmalloc: could not allocate bucket", sizeof(*b)); + error_ = true; + } else { + b->hash = h; + b->trace = t; + b->count = 1; + b->next = table_[idx]; + table_[idx] = b; + } + } +} + +void** StackTraceTable::ReadStackTracesAndClear() { + if (error_) { + return NULL; + } + + // Allocate output array + const int out_len = bucket_total_ * 3 + depth_total_ + 1; + void** out = new void*[out_len]; + if (out == NULL) { + MESSAGE("tcmalloc: allocation failed for stack traces\n", + out_len * sizeof(*out)); + return NULL; + } + + // Fill output array + int idx = 0; + for (int i = 0; i < kHashTableSize; ++i) { + Bucket* b = table_[i]; + while (b != NULL) { + out[idx++] = reinterpret_cast<void*>(static_cast<uintptr_t>(b->count)); + out[idx++] = reinterpret_cast<void*>(b->trace.size); // cumulative size + out[idx++] = reinterpret_cast<void*>(b->trace.depth); + for (int d = 0; d < b->trace.depth; ++d) { + out[idx++] = b->trace.stack[d]; + } + b = b->next; + } + } + out[idx++] = static_cast<uintptr_t>(0); + ASSERT(idx == out_len); + + // Clear state + error_ = false; + depth_total_ = 0; + bucket_total_ = 0; + SpinLockHolder h(Static::pageheap_lock()); + for (int i = 0; i < kHashTableSize; ++i) { + Bucket* b = table_[i]; + while (b != NULL) { + Bucket* next = b->next; + Static::bucket_allocator()->Delete(b); + b = next; + } + table_[i] = NULL; + } + + return out; +} + +} // namespace tcmalloc diff --git a/third_party/tcmalloc/chromium/src/stack_trace_table.h b/third_party/tcmalloc/chromium/src/stack_trace_table.h new file mode 100644 index 0000000..799571a --- /dev/null +++ b/third_party/tcmalloc/chromium/src/stack_trace_table.h @@ -0,0 +1,88 @@ +// Copyright (c) 2009, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Andrew Fikes +// +// Utility class for coalescing sampled stack traces. Not thread-safe. + +#ifndef TCMALLOC_STACK_TRACE_TABLE_H_ +#define TCMALLOC_STACK_TRACE_TABLE_H_ + +#include <config.h> +#include "common.h" + +namespace tcmalloc { + +class PERFTOOLS_DLL_DECL StackTraceTable { + public: + // REQUIRES: L < pageheap_lock + StackTraceTable(); + ~StackTraceTable(); + + // Adds stack trace "t" to table. + // + // REQUIRES: L >= pageheap_lock + void AddTrace(const StackTrace& t); + + // Returns stack traces formatted per MallocExtension guidelines. + // May return NULL on error. Clears state before returning. + // + // REQUIRES: L < pageheap_lock + void** ReadStackTracesAndClear(); + + // Exposed for PageHeapAllocator + struct Bucket { + // Key + uintptr_t hash; + StackTrace trace; + + // Payload + int count; + Bucket* next; + + bool KeyEqual(uintptr_t h, const StackTrace& t) const; + }; + + // For testing + int depth_total() const { return depth_total_; } + int bucket_total() const { return bucket_total_; } + + private: + static const int kHashTableSize = 1 << 14; // => table_ is 128k + + bool error_; + int depth_total_; + int bucket_total_; + Bucket** table_; +}; + +} // namespace tcmalloc + +#endif // TCMALLOC_STACK_TRACE_TABLE_H_ diff --git a/third_party/tcmalloc/chromium/src/stacktrace.cc b/third_party/tcmalloc/chromium/src/stacktrace.cc new file mode 100644 index 0000000..d158eea --- /dev/null +++ b/third_party/tcmalloc/chromium/src/stacktrace.cc @@ -0,0 +1,71 @@ +// Copyright (c) 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Sanjay Ghemawat +// +// Produce stack trace. +// +// There are three different ways we can try to get the stack trace: +// +// 1) Our hand-coded stack-unwinder. This depends on a certain stack +// layout, which is used by gcc (and those systems using a +// gcc-compatible ABI) on x86 systems, at least since gcc 2.95. +// It uses the frame pointer to do its work. +// +// 2) The libunwind library. This is still in development, and as a +// separate library adds a new dependency, abut doesn't need a frame +// pointer. It also doesn't call malloc. +// +// 3) The gdb unwinder -- also the one used by the c++ exception code. +// It's obviously well-tested, but has a fatal flaw: it can call +// malloc() from the unwinder. This is a problem because we're +// trying to use the unwinder to instrument malloc(). +// +// Note: if you add a new implementation here, make sure it works +// correctly when GetStackTrace() is called with max_depth == 0. +// Some code may do that. + +#include <config.h> +#include <google/stacktrace.h> +#include "stacktrace_config.h" + +#if defined(STACKTRACE_INL_HEADER) +# include STACKTRACE_INL_HEADER +#elif 0 +// This is for the benefit of code analysis tools that may have +// trouble with the computed #include above. +# include "base/stacktrace_x86-inl.h" +# include "base/stacktrace_libunwind-inl.h" +# include "base/stacktrace_generic-inl.h" +# include "base/stacktrace_powerpc-inl.h" +# include "base/stacktrace_win32-inl.h" +#else +# error Cannot calculate stack trace: will need to write for your environment +#endif diff --git a/third_party/tcmalloc/chromium/src/stacktrace_config.h b/third_party/tcmalloc/chromium/src/stacktrace_config.h new file mode 100644 index 0000000..b58ab1d --- /dev/null +++ b/third_party/tcmalloc/chromium/src/stacktrace_config.h @@ -0,0 +1,76 @@ +// Copyright (c) 2009, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Paul Pluzhnikov +// +// Figure out which unwinder to use on a given platform. +// +// Defines STACKTRACE_INL_HEADER to the *-inl.h containing +// actual unwinder implementation. +// +// Defines STACKTRACE_SKIP_CONTEXT_ROUTINES if a separate +// GetStack{Trace,Frames}WithContext should not be provided. +// +// This header is "private" to stacktrace.cc and +// stacktrace_with_context.cc. +// +// DO NOT include it into any other files. + +#ifndef BASE_STACKTRACE_CONFIG_H_ +#define BASE_STACKTRACE_CONFIG_H_ + +// First, the i386 and x86_64 case. +#if (defined(__i386__) || defined(__x86_64__)) && __GNUC__ >= 2 +# if !defined(NO_FRAME_POINTER) +# define STACKTRACE_INL_HEADER "stacktrace_x86-inl.h" +# define STACKTRACE_SKIP_CONTEXT_ROUTINES 1 +# elif defined(HAVE_LIBUNWIND_H) // a proxy for having libunwind installed +# define STACKTRACE_INL_HEADER "stacktrace_libunwind-inl.h" +# elif defined(__linux) +# error Cannnot calculate stack trace: need either libunwind or frame-pointers (see INSTALL file) +# else +# error Cannnot calculate stack trace: need libunwind (see INSTALL file) +# endif + +// The PowerPC case +#elif (defined(__ppc__) || defined(__PPC__)) && __GNUC__ >= 2 +# if !defined(NO_FRAME_POINTER) +# define STACKTRACE_INL_HEADER "stacktrace_powerpc-inl.h" +# else +# define STACKTRACE_INL_HEADER "stacktrace_generic-inl.h" +# endif + +// The Windows case -- probably cygwin and mingw will use one of the +// x86-includes above, but if not, we can fall back to windows intrinsics. +#elif defined(_WIN32) || defined(__CYGWIN__) || defined(__CYGWIN32__) || defined(__MINGW32__) +# define STACKTRACE_INL_HEADER "stacktrace_win32-inl.h" + +#endif // all the cases +#endif // BASE_STACKTRACE_CONFIG_H_ diff --git a/third_party/tcmalloc/chromium/src/stacktrace_generic-inl.h b/third_party/tcmalloc/chromium/src/stacktrace_generic-inl.h new file mode 100644 index 0000000..490cd9d --- /dev/null +++ b/third_party/tcmalloc/chromium/src/stacktrace_generic-inl.h @@ -0,0 +1,106 @@ +// Copyright (c) 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Sanjay Ghemawat +// +// Portable implementation - just use glibc +// +// Note: The glibc implementation may cause a call to malloc. +// This can cause a deadlock in HeapProfiler. +#include <execinfo.h> +#include <string.h> +#include "google/stacktrace.h" + +// If you change this function, also change GetStackFrames below. +int GetStackTrace(void** result, int max_depth, int skip_count) { + static const int kStackLength = 64; + void * stack[kStackLength]; + int size; + + size = backtrace(stack, kStackLength); + skip_count++; // we want to skip the current frame as well + int result_count = size - skip_count; + if (result_count < 0) + result_count = 0; + if (result_count > max_depth) + result_count = max_depth; + for (int i = 0; i < result_count; i++) + result[i] = stack[i + skip_count]; + + return result_count; +} + +// If you change this function, also change GetStackTrace above: +// +// This GetStackFrames routine shares a lot of code with GetStackTrace +// above. This code could have been refactored into a common routine, +// and then both GetStackTrace/GetStackFrames could call that routine. +// There are two problems with that: +// +// (1) The performance of the refactored-code suffers substantially - the +// refactored needs to be able to record the stack trace when called +// from GetStackTrace, and both the stack trace and stack frame sizes, +// when called from GetStackFrames - this introduces enough new +// conditionals that GetStackTrace performance can degrade by as much +// as 50%. +// +// (2) Whether the refactored routine gets inlined into GetStackTrace and +// GetStackFrames depends on the compiler, and we can't guarantee the +// behavior either-way, even with "__attribute__ ((always_inline))" +// or "__attribute__ ((noinline))". But we need this guarantee or the +// frame counts may be off by one. +// +// Both (1) and (2) can be addressed without this code duplication, by +// clever use of template functions, and by defining GetStackTrace and +// GetStackFrames as macros that expand to these template functions. +// However, this approach comes with its own set of problems - namely, +// macros and preprocessor trouble - for example, if GetStackTrace +// and/or GetStackFrames is ever defined as a member functions in some +// class, we are in trouble. +int GetStackFrames(void** pcs, int* sizes, int max_depth, int skip_count) { + static const int kStackLength = 64; + void * stack[kStackLength]; + int size; + + size = backtrace(stack, kStackLength); + skip_count++; // we want to skip the current frame as well + int result_count = size - skip_count; + if (result_count < 0) + result_count = 0; + if (result_count > max_depth) + result_count = max_depth; + for (int i = 0; i < result_count; i++) + pcs[i] = stack[i + skip_count]; + + // No implementation for finding out the stack frame sizes yet. + memset(sizes, 0, sizeof(*sizes) * result_count); + + return result_count; +} diff --git a/third_party/tcmalloc/chromium/src/stacktrace_libunwind-inl.h b/third_party/tcmalloc/chromium/src/stacktrace_libunwind-inl.h new file mode 100644 index 0000000..d9d829a --- /dev/null +++ b/third_party/tcmalloc/chromium/src/stacktrace_libunwind-inl.h @@ -0,0 +1,156 @@ +// Copyright (c) 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Arun Sharma +// +// Produce stack trace using libunwind + +// We only need local unwinder. +#define UNW_LOCAL_ONLY + +extern "C" { +#include <assert.h> +#include <string.h> // for memset() +#include <libunwind.h> +} +#include "google/stacktrace.h" +#include "base/logging.h" + +// Sometimes, we can try to get a stack trace from within a stack +// trace, because libunwind can call mmap (maybe indirectly via an +// internal mmap based memory allocator), and that mmap gets trapped +// and causes a stack-trace request. If were to try to honor that +// recursive request, we'd end up with infinite recursion or deadlock. +// Luckily, it's safe to ignore those subsequent traces. In such +// cases, we return 0 to indicate the situation. +static __thread int recursive; + +// If you change this function, also change GetStackFrames below. +int GetStackTrace(void** result, int max_depth, int skip_count) { + void *ip; + int n = 0; + unw_cursor_t cursor; + unw_context_t uc; + + if (recursive) { + return 0; + } + ++recursive; + + unw_getcontext(&uc); + int ret = unw_init_local(&cursor, &uc); + assert(ret >= 0); + skip_count++; // Do not include the "GetStackTrace" frame + + while (n < max_depth) { + if (unw_get_reg(&cursor, UNW_REG_IP, (unw_word_t *) &ip) < 0) { + break; + } + if (skip_count > 0) { + skip_count--; + } else { + result[n++] = ip; + } + if (unw_step(&cursor) <= 0) { + break; + } + } + --recursive; + return n; +} + +// If you change this function, also change GetStackTrace above: +// +// This GetStackFrames routine shares a lot of code with GetStackTrace +// above. This code could have been refactored into a common routine, +// and then both GetStackTrace/GetStackFrames could call that routine. +// There are two problems with that: +// +// (1) The performance of the refactored-code suffers substantially - the +// refactored needs to be able to record the stack trace when called +// from GetStackTrace, and both the stack trace and stack frame sizes, +// when called from GetStackFrames - this introduces enough new +// conditionals that GetStackTrace performance can degrade by as much +// as 50%. +// +// (2) Whether the refactored routine gets inlined into GetStackTrace and +// GetStackFrames depends on the compiler, and we can't guarantee the +// behavior either-way, even with "__attribute__ ((always_inline))" +// or "__attribute__ ((noinline))". But we need this guarantee or the +// frame counts may be off by one. +// +// Both (1) and (2) can be addressed without this code duplication, by +// clever use of template functions, and by defining GetStackTrace and +// GetStackFrames as macros that expand to these template functions. +// However, this approach comes with its own set of problems - namely, +// macros and preprocessor trouble - for example, if GetStackTrace +// and/or GetStackFrames is ever defined as a member functions in some +// class, we are in trouble. +int GetStackFrames(void** pcs, int* sizes, int max_depth, int skip_count) { + void *ip; + int n = 0; + unw_cursor_t cursor; + unw_context_t uc; + unw_word_t sp = 0, next_sp = 0; + + if (recursive) { + return 0; + } + ++recursive; + + unw_getcontext(&uc); + RAW_CHECK(unw_init_local(&cursor, &uc) >= 0, "unw_init_local failed"); + skip_count++; // Do not include the "GetStackFrames" frame + + while (skip_count--) { + if (unw_step(&cursor) <= 0 || + unw_get_reg(&cursor, UNW_REG_SP, &next_sp) < 0) { + goto out; + } + } + while (n < max_depth) { + sp = next_sp; + if (unw_get_reg(&cursor, UNW_REG_IP, (unw_word_t *) &ip) < 0) + break; + if (unw_step(&cursor) <= 0 || + unw_get_reg(&cursor, UNW_REG_SP, &next_sp)) { + // We couldn't step any further (possibly because we reached _start). + // Provide the last good PC we've got, and get out. + sizes[n] = 0; + pcs[n++] = ip; + break; + } + sizes[n] = next_sp - sp; + pcs[n++] = ip; + } + out: + --recursive; + return n; +} diff --git a/third_party/tcmalloc/chromium/src/stacktrace_powerpc-inl.h b/third_party/tcmalloc/chromium/src/stacktrace_powerpc-inl.h new file mode 100644 index 0000000..5631e49 --- /dev/null +++ b/third_party/tcmalloc/chromium/src/stacktrace_powerpc-inl.h @@ -0,0 +1,206 @@ +// Copyright (c) 2007, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Craig Silverstein +// +// Produce stack trace. I'm guessing (hoping!) the code is much like +// for x86. For apple machines, at least, it seems to be; see +// http://developer.apple.com/documentation/mac/runtimehtml/RTArch-59.html +// http://www.linux-foundation.org/spec/ELF/ppc64/PPC-elf64abi-1.9.html#STACK +// Linux has similar code: http://patchwork.ozlabs.org/linuxppc/patch?id=8882 + +#include <stdint.h> // for uintptr_t +#include <stdlib.h> // for NULL +#include <google/stacktrace.h> + +// Given a pointer to a stack frame, locate and return the calling +// stackframe, or return NULL if no stackframe can be found. Perform sanity +// checks (the strictness of which is controlled by the boolean parameter +// "STRICT_UNWINDING") to reduce the chance that a bad pointer is returned. +template<bool STRICT_UNWINDING> +static void **NextStackFrame(void **old_sp) { + void **new_sp = (void **) *old_sp; + + // Check that the transition from frame pointer old_sp to frame + // pointer new_sp isn't clearly bogus + if (STRICT_UNWINDING) { + // With the stack growing downwards, older stack frame must be + // at a greater address that the current one. + if (new_sp <= old_sp) return NULL; + // Assume stack frames larger than 100,000 bytes are bogus. + if ((uintptr_t)new_sp - (uintptr_t)old_sp > 100000) return NULL; + } else { + // In the non-strict mode, allow discontiguous stack frames. + // (alternate-signal-stacks for example). + if (new_sp == old_sp) return NULL; + // And allow frames upto about 1MB. + if ((new_sp > old_sp) + && ((uintptr_t)new_sp - (uintptr_t)old_sp > 1000000)) return NULL; + } + if ((uintptr_t)new_sp & (sizeof(void *) - 1)) return NULL; + return new_sp; +} + +// This ensures that GetStackTrace stes up the Link Register properly. +void StacktracePowerPCDummyFunction() __attribute__((noinline)); +void StacktracePowerPCDummyFunction() { __asm__ volatile(""); } + +// If you change this function, also change GetStackFrames below. +int GetStackTrace(void** result, int max_depth, int skip_count) { + void **sp; + // Apple OS X uses an old version of gnu as -- both Darwin 7.9.0 (Panther) + // and Darwin 8.8.1 (Tiger) use as 1.38. This means we have to use a + // different asm syntax. I don't know quite the best way to discriminate + // systems using the old as from the new one; I've gone with __APPLE__. + // TODO(csilvers): use autoconf instead, to look for 'as --version' == 1 or 2 +#ifdef __APPLE__ + __asm__ volatile ("mr %0,r1" : "=r" (sp)); +#else + __asm__ volatile ("mr %0,1" : "=r" (sp)); +#endif + + // On PowerPC, the "Link Register" or "Link Record" (LR), is a stack + // entry that holds the return address of the subroutine call (what + // instruction we run after our function finishes). This is the + // same as the stack-pointer of our parent routine, which is what we + // want here. While the compiler will always(?) set up LR for + // subroutine calls, it may not for leaf functions (such as this one). + // This routine forces the compiler (at least gcc) to push it anyway. + StacktracePowerPCDummyFunction(); + + // The LR save area is used by the callee, so the top entry is bogus. + skip_count++; + + int n = 0; + while (sp && n < max_depth) { + if (skip_count > 0) { + skip_count--; + } else { + // PowerPC has 3 main ABIs, which say where in the stack the + // Link Register is. For DARWIN and AIX (used by apple and + // linux ppc64), it's in sp[2]. For SYSV (used by linux ppc), + // it's in sp[1]. +#if defined(_CALL_AIX) || defined(_CALL_DARWIN) + result[n++] = *(sp+2); +#elif defined(_CALL_SYSV) + result[n++] = *(sp+1); +#elif defined(__APPLE__) || (defined(__linux) && defined(__PPC64__)) + // This check is in case the compiler doesn't define _CALL_AIX/etc. + result[n++] = *(sp+2); +#elif defined(__linux) + // This check is in case the compiler doesn't define _CALL_SYSV. + result[n++] = *(sp+1); +#else +#error Need to specify the PPC ABI for your archiecture. +#endif + } + // Use strict unwinding rules. + sp = NextStackFrame<true>(sp); + } + return n; +} + +// If you change this function, also change GetStackTrace above: +// +// This GetStackFrames routine shares a lot of code with GetStackTrace +// above. This code could have been refactored into a common routine, +// and then both GetStackTrace/GetStackFrames could call that routine. +// There are two problems with that: +// +// (1) The performance of the refactored-code suffers substantially - the +// refactored needs to be able to record the stack trace when called +// from GetStackTrace, and both the stack trace and stack frame sizes, +// when called from GetStackFrames - this introduces enough new +// conditionals that GetStackTrace performance can degrade by as much +// as 50%. +// +// (2) Whether the refactored routine gets inlined into GetStackTrace and +// GetStackFrames depends on the compiler, and we can't guarantee the +// behavior either-way, even with "__attribute__ ((always_inline))" +// or "__attribute__ ((noinline))". But we need this guarantee or the +// frame counts may be off by one. +// +// Both (1) and (2) can be addressed without this code duplication, by +// clever use of template functions, and by defining GetStackTrace and +// GetStackFrames as macros that expand to these template functions. +// However, this approach comes with its own set of problems - namely, +// macros and preprocessor trouble - for example, if GetStackTrace +// and/or GetStackFrames is ever defined as a member functions in some +// class, we are in trouble. +int GetStackFrames(void** pcs, int *sizes, int max_depth, int skip_count) { + void **sp; +#ifdef __APPLE__ + __asm__ volatile ("mr %0,r1" : "=r" (sp)); +#else + __asm__ volatile ("mr %0,1" : "=r" (sp)); +#endif + + StacktracePowerPCDummyFunction(); + // Note we do *not* increment skip_count here for the SYSV ABI. If + // we did, the list of stack frames wouldn't properly match up with + // the list of return addresses. Note this means the top pc entry + // is probably bogus for linux/ppc (and other SYSV-ABI systems). + + int n = 0; + while (sp && n < max_depth) { + // The GetStackFrames routine is called when we are in some + // informational context (the failure signal handler for example). + // Use the non-strict unwinding rules to produce a stack trace + // that is as complete as possible (even if it contains a few bogus + // entries in some rare cases). + void **next_sp = NextStackFrame<false>(sp); + if (skip_count > 0) { + skip_count--; + } else { +#if defined(_CALL_AIX) || defined(_CALL_DARWIN) + pcs[n++] = *(sp+2); +#elif defined(_CALL_SYSV) + pcs[n++] = *(sp+1); +#elif defined(__APPLE__) || (defined(__linux) && defined(__PPC64__)) + // This check is in case the compiler doesn't define _CALL_AIX/etc. + pcs[n++] = *(sp+2); +#elif defined(__linux) + // This check is in case the compiler doesn't define _CALL_SYSV. + pcs[n++] = *(sp+1); +#else +#error Need to specify the PPC ABI for your archiecture. +#endif + if (next_sp > sp) { + sizes[n] = (uintptr_t)next_sp - (uintptr_t)sp; + } else { + // A frame-size of 0 is used to indicate unknown frame size. + sizes[n] = 0; + } + n++; + } + sp = next_sp; + } + return n; +} diff --git a/third_party/tcmalloc/chromium/src/stacktrace_win32-inl.h b/third_party/tcmalloc/chromium/src/stacktrace_win32-inl.h new file mode 100644 index 0000000..892cd7c --- /dev/null +++ b/third_party/tcmalloc/chromium/src/stacktrace_win32-inl.h @@ -0,0 +1,84 @@ +// Copyright (c) 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// --- +// Produces a stack trace for Windows. Normally, one could use +// stacktrace_x86-inl.h or stacktrace_x86_64-inl.h -- and indeed, that +// should work for binaries compiled using MSVC in "debug" mode. +// However, in "release" mode, Windows uses frame-pointer +// optimization, which makes getting a stack trace very difficult. +// +// There are several approaches one can take. One is to use Windows +// intrinsics like StackWalk64. These can work, but have restrictions +// on how successful they can be. Another attempt is to write a +// version of stacktrace_x86-inl.h that has heuristic support for +// dealing with FPO, similar to what WinDbg does (see +// http://www.nynaeve.net/?p=97). +// +// The solution we've ended up doing is to call the undocumented +// windows function RtlCaptureStackBackTrace, which probably doesn't +// work with FPO but at least is fast, and doesn't require a symbol +// server. +// +// This code is inspired by a patch from David Vitek: +// http://code.google.com/p/google-perftools/issues/detail?id=83 + +#include "config.h" +#include <windows.h> // for GetProcAddress and GetModuleHandle +#include <assert.h> + +typedef USHORT NTAPI RtlCaptureStackBackTrace_Function( + IN ULONG frames_to_skip, + IN ULONG frames_to_capture, + OUT PVOID *backtrace, + OUT PULONG backtrace_hash); + +// Load the function we need at static init time, where we don't have +// to worry about someone else holding the loader's lock. +static RtlCaptureStackBackTrace_Function* const RtlCaptureStackBackTrace_fn = + (RtlCaptureStackBackTrace_Function*) + GetProcAddress(GetModuleHandleA("ntdll.dll"), "RtlCaptureStackBackTrace"); + +PERFTOOLS_DLL_DECL int GetStackTrace(void** result, int max_depth, + int skip_count) { + if (!RtlCaptureStackBackTrace_fn) { + // TODO(csilvers): should we log an error here? + return 0; // can't find a stacktrace with no function to call + } + return (int)RtlCaptureStackBackTrace_fn(skip_count + 2, max_depth, + result, 0); +} + +PERFTOOLS_DLL_DECL int GetStackFrames(void** /* pcs */, + int* /* sizes */, + int /* max_depth */, + int /* skip_count */) { + assert(0 == "Not yet implemented"); + return 0; +} diff --git a/third_party/tcmalloc/chromium/src/stacktrace_with_context.cc b/third_party/tcmalloc/chromium/src/stacktrace_with_context.cc new file mode 100644 index 0000000..ed7bfe3 --- /dev/null +++ b/third_party/tcmalloc/chromium/src/stacktrace_with_context.cc @@ -0,0 +1,61 @@ +// Copyright (c) 2009, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Paul Pluzhnikov +// +// This code logically belongs in stacktrace.cc, but +// it is moved into (this) separate file in order to +// prevent inlining of routines defined here. +// +// Inlining causes skip_count to be incorrect, and there +// is no portable way to prevent it. +// +// Eventually LTO (link-time optimization) and/or LLVM +// may inline this code anyway. Let's hope they respect +// ATTRIBUTE_NOINLINE. + +#include <config.h> +#include <google/stacktrace.h> +#include "stacktrace_config.h" +#include "base/basictypes.h" + +#if !defined(STACKTRACE_SKIP_CONTEXT_ROUTINES) +ATTRIBUTE_NOINLINE PERFTOOLS_DLL_DECL +int GetStackFramesWithContext(void** pcs, int* sizes, int max_depth, + int skip_count, const void * /* uc */) { + return GetStackFrames(pcs, sizes, max_depth, skip_count + 1); +} + +ATTRIBUTE_NOINLINE PERFTOOLS_DLL_DECL +int GetStackTraceWithContext(void** result, int max_depth, + int skip_count, const void * /* uc */) { + return GetStackTrace(result, max_depth, skip_count + 1); +} +#endif diff --git a/third_party/tcmalloc/chromium/src/stacktrace_x86-inl.h b/third_party/tcmalloc/chromium/src/stacktrace_x86-inl.h new file mode 100644 index 0000000..5650c86 --- /dev/null +++ b/third_party/tcmalloc/chromium/src/stacktrace_x86-inl.h @@ -0,0 +1,530 @@ +// Copyright (c) 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Sanjay Ghemawat +// +// Produce stack trace +// +// NOTE: there is code duplication between +// GetStackTrace, GetStackTraceWithContext, GetStackFrames and +// GetStackFramesWithContext. If you update one, update them all. +// +// There is no easy way to avoid this, because inlining +// interferes with skip_count, and there is no portable +// way to turn inlining off, or force it always on. + +#include "config.h" + +#include <stdlib.h> // for NULL +#include <assert.h> +#if defined(HAVE_SYS_UCONTEXT_H) +#include <sys/ucontext.h> +#elif defined(HAVE_UCONTEXT_H) +#include <ucontext.h> // for ucontext_t +#elif defined(HAVE_CYGWIN_SIGNAL_H) +#include <cygwin/signal.h> +typedef ucontext ucontext_t; +#endif +#ifdef HAVE_STDINT_H +#include <stdint.h> // for uintptr_t +#endif +#ifdef HAVE_UNISTD_H +#include <unistd.h> +#endif +#ifdef HAVE_MMAP +#include <sys/mman.h> // for msync +#include "base/vdso_support.h" +#endif + +#include "google/stacktrace.h" + +#if defined(__linux__) && defined(__i386__) && defined(__ELF__) && defined(HAVE_MMAP) +// Count "push %reg" instructions in VDSO __kernel_vsyscall(), +// preceeding "syscall" or "sysenter". +// If __kernel_vsyscall uses frame pointer, answer 0. +// +// kMaxBytes tells how many instruction bytes of __kernel_vsyscall +// to analyze before giving up. Up to kMaxBytes+1 bytes of +// instructions could be accessed. +// +// Here are known __kernel_vsyscall instruction sequences: +// +// SYSENTER (linux-2.6.26/arch/x86/vdso/vdso32/sysenter.S). +// Used on Intel. +// 0xffffe400 <__kernel_vsyscall+0>: push %ecx +// 0xffffe401 <__kernel_vsyscall+1>: push %edx +// 0xffffe402 <__kernel_vsyscall+2>: push %ebp +// 0xffffe403 <__kernel_vsyscall+3>: mov %esp,%ebp +// 0xffffe405 <__kernel_vsyscall+5>: sysenter +// +// SYSCALL (see linux-2.6.26/arch/x86/vdso/vdso32/syscall.S). +// Used on AMD. +// 0xffffe400 <__kernel_vsyscall+0>: push %ebp +// 0xffffe401 <__kernel_vsyscall+1>: mov %ecx,%ebp +// 0xffffe403 <__kernel_vsyscall+3>: syscall +// +// i386 (see linux-2.6.26/arch/x86/vdso/vdso32/int80.S) +// 0xffffe400 <__kernel_vsyscall+0>: int $0x80 +// 0xffffe401 <__kernel_vsyscall+1>: ret +// +static const int kMaxBytes = 10; + +// We use assert()s instead of DCHECK()s -- this is too low level +// for DCHECK(). + +static int CountPushInstructions(const unsigned char *const addr) { + int result = 0; + for (int i = 0; i < kMaxBytes; ++i) { + if (addr[i] == 0x89) { + // "mov reg,reg" + if (addr[i + 1] == 0xE5) { + // Found "mov %esp,%ebp". + return 0; + } + ++i; // Skip register encoding byte. + } else if (addr[i] == 0x0F && + (addr[i + 1] == 0x34 || addr[i + 1] == 0x05)) { + // Found "sysenter" or "syscall". + return result; + } else if ((addr[i] & 0xF0) == 0x50) { + // Found "push %reg". + ++result; + } else if (addr[i] == 0xCD && addr[i + 1] == 0x80) { + // Found "int $0x80" + assert(result == 0); + return 0; + } else { + // Unexpected instruction. + assert(0 == "unexpected instruction in __kernel_vsyscall"); + return 0; + } + } + // Unexpected: didn't find SYSENTER or SYSCALL in + // [__kernel_vsyscall, __kernel_vsyscall + kMaxBytes) interval. + assert(0 == "did not find SYSENTER or SYSCALL in __kernel_vsyscall"); + return 0; +} +#endif + +// Given a pointer to a stack frame, locate and return the calling +// stackframe, or return NULL if no stackframe can be found. Perform sanity +// checks (the strictness of which is controlled by the boolean parameter +// "STRICT_UNWINDING") to reduce the chance that a bad pointer is returned. +template<bool STRICT_UNWINDING, bool WITH_CONTEXT> +static void **NextStackFrame(void **old_sp, const void *uc) { + void **new_sp = (void **) *old_sp; + +#if defined(__linux__) && defined(__i386__) && defined(HAVE_VDSO_SUPPORT) + if (WITH_CONTEXT && uc != NULL) { + // How many "push %reg" instructions are there at __kernel_vsyscall? + // This is constant for a given kernel and processor, so compute + // it only once. + static int num_push_instructions = -1; // Sentinel: not computed yet. + // Initialize with sentinel value: __kernel_rt_sigreturn can not possibly + // be there. + static const unsigned char *kernel_rt_sigreturn_address = NULL; + static const unsigned char *kernel_vsyscall_address = NULL; + if (num_push_instructions == -1) { + base::VDSOSupport vdso; + if (vdso.IsPresent()) { + base::VDSOSupport::SymbolInfo rt_sigreturn_symbol_info; + base::VDSOSupport::SymbolInfo vsyscall_symbol_info; + if (!vdso.LookupSymbol("__kernel_rt_sigreturn", "LINUX_2.5", + STT_FUNC, &rt_sigreturn_symbol_info) || + !vdso.LookupSymbol("__kernel_vsyscall", "LINUX_2.5", + STT_FUNC, &vsyscall_symbol_info) || + rt_sigreturn_symbol_info.address == NULL || + vsyscall_symbol_info.address == NULL) { + // Unexpected: 32-bit VDSO is present, yet one of the expected + // symbols is missing or NULL. + assert(0 == "VDSO is present, but doesn't have expected symbols"); + num_push_instructions = 0; + } else { + kernel_rt_sigreturn_address = + reinterpret_cast<const unsigned char *>( + rt_sigreturn_symbol_info.address); + kernel_vsyscall_address = + reinterpret_cast<const unsigned char *>( + vsyscall_symbol_info.address); + num_push_instructions = + CountPushInstructions(kernel_vsyscall_address); + } + } else { + num_push_instructions = 0; + } + } + if (num_push_instructions != 0 && kernel_rt_sigreturn_address != NULL && + old_sp[1] == kernel_rt_sigreturn_address) { + const ucontext_t *ucv = static_cast<const ucontext_t *>(uc); + // This kernel does not use frame pointer in its VDSO code, + // and so %ebp is not suitable for unwinding. + const void **const reg_ebp = + reinterpret_cast<const void **>(ucv->uc_mcontext.gregs[REG_EBP]); + const unsigned char *const reg_eip = + reinterpret_cast<unsigned char *>(ucv->uc_mcontext.gregs[REG_EIP]); + if (new_sp == reg_ebp && + kernel_vsyscall_address <= reg_eip && + reg_eip - kernel_vsyscall_address < kMaxBytes) { + // We "stepped up" to __kernel_vsyscall, but %ebp is not usable. + // Restore from 'ucv' instead. + void **const reg_esp = + reinterpret_cast<void **>(ucv->uc_mcontext.gregs[REG_ESP]); + // Check that alleged %esp is not NULL and is reasonably aligned. + if (reg_esp && + ((uintptr_t)reg_esp & (sizeof(reg_esp) - 1)) == 0) { + // Check that alleged %esp is actually readable. This is to prevent + // "double fault" in case we hit the first fault due to e.g. stack + // corruption. + // + // page_size is linker-initalized to avoid async-unsafe locking + // that GCC would otherwise insert (__cxa_guard_acquire etc). + static int page_size; + if (page_size == 0) { + // First time through. + page_size = getpagesize(); + } + void *const reg_esp_aligned = + reinterpret_cast<void *>( + (uintptr_t)(reg_esp + num_push_instructions - 1) & + ~(page_size - 1)); + if (msync(reg_esp_aligned, page_size, MS_ASYNC) == 0) { + // Alleged %esp is readable, use it for further unwinding. + new_sp = reinterpret_cast<void **>( + reg_esp[num_push_instructions - 1]); + } + } + } + } + } +#endif + + // Check that the transition from frame pointer old_sp to frame + // pointer new_sp isn't clearly bogus + if (STRICT_UNWINDING) { + // With the stack growing downwards, older stack frame must be + // at a greater address that the current one. + if (new_sp <= old_sp) return NULL; + // Assume stack frames larger than 100,000 bytes are bogus. + if ((uintptr_t)new_sp - (uintptr_t)old_sp > 100000) return NULL; + } else { + // In the non-strict mode, allow discontiguous stack frames. + // (alternate-signal-stacks for example). + if (new_sp == old_sp) return NULL; + // And allow frames upto about 1MB. + if ((new_sp > old_sp) + && ((uintptr_t)new_sp - (uintptr_t)old_sp > 1000000)) return NULL; + } + if ((uintptr_t)new_sp & (sizeof(void *) - 1)) return NULL; +#ifdef __i386__ + // On 64-bit machines, the stack pointer can be very close to + // 0xffffffff, so we explicitly check for a pointer into the + // last two pages in the address space + if ((uintptr_t)new_sp >= 0xffffe000) return NULL; +#endif +#ifdef HAVE_MMAP + if (!STRICT_UNWINDING) { + // Lax sanity checks cause a crash on AMD-based machines with + // VDSO-enabled kernels. + // Make an extra sanity check to insure new_sp is readable. + // Note: NextStackFrame<false>() is only called while the program + // is already on its last leg, so it's ok to be slow here. + static int page_size = getpagesize(); + void *new_sp_aligned = (void *)((uintptr_t)new_sp & ~(page_size - 1)); + if (msync(new_sp_aligned, page_size, MS_ASYNC) == -1) + return NULL; + } +#endif + return new_sp; +} + +// If you change this function, see NOTE at the top of file. +// Same as above, but with signal ucontext_t pointer. +int GetStackTraceWithContext(void** result, + int max_depth, + int skip_count, + const void *uc) { + void **sp; +#if (__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 2) || __llvm__ + // __builtin_frame_address(0) can return the wrong address on gcc-4.1.0-k8. + // It's always correct on llvm, and the techniques below aren't (in + // particular, llvm-gcc will make a copy of pcs, so it's not in sp[2]), + // so we also prefer __builtin_frame_address when running under llvm. + sp = reinterpret_cast<void**>(__builtin_frame_address(0)); +#elif defined(__i386__) + // Stack frame format: + // sp[0] pointer to previous frame + // sp[1] caller address + // sp[2] first argument + // ... + // NOTE: This will break under llvm, since result is a copy and not in sp[2] + sp = (void **)&result - 2; +#elif defined(__x86_64__) + unsigned long rbp; + // Move the value of the register %rbp into the local variable rbp. + // We need 'volatile' to prevent this instruction from getting moved + // around during optimization to before function prologue is done. + // An alternative way to achieve this + // would be (before this __asm__ instruction) to call Noop() defined as + // static void Noop() __attribute__ ((noinline)); // prevent inlining + // static void Noop() { asm(""); } // prevent optimizing-away + __asm__ volatile ("mov %%rbp, %0" : "=r" (rbp)); + // Arguments are passed in registers on x86-64, so we can't just + // offset from &result + sp = (void **) rbp; +#else +# error Using stacktrace_x86-inl.h on a non x86 architecture! +#endif + + int n = 0; + while (sp && n < max_depth) { + if (*(sp+1) == reinterpret_cast<void *>(0)) { + // In 64-bit code, we often see a frame that + // points to itself and has a return address of 0. + break; + } + if (skip_count > 0) { + skip_count--; + } else { + result[n++] = *(sp+1); + } + // Use strict unwinding rules. + sp = NextStackFrame<true, true>(sp, uc); + } + return n; +} + +int GetStackTrace(void** result, int max_depth, int skip_count) { + void **sp; +#if (__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 2) || __llvm__ + // __builtin_frame_address(0) can return the wrong address on gcc-4.1.0-k8. + // It's always correct on llvm, and the techniques below aren't (in + // particular, llvm-gcc will make a copy of pcs, so it's not in sp[2]), + // so we also prefer __builtin_frame_address when running under llvm. + sp = reinterpret_cast<void**>(__builtin_frame_address(0)); +#elif defined(__i386__) + // Stack frame format: + // sp[0] pointer to previous frame + // sp[1] caller address + // sp[2] first argument + // ... + // NOTE: This will break under llvm, since result is a copy and not in sp[2] + sp = (void **)&result - 2; +#elif defined(__x86_64__) + unsigned long rbp; + // Move the value of the register %rbp into the local variable rbp. + // We need 'volatile' to prevent this instruction from getting moved + // around during optimization to before function prologue is done. + // An alternative way to achieve this + // would be (before this __asm__ instruction) to call Noop() defined as + // static void Noop() __attribute__ ((noinline)); // prevent inlining + // static void Noop() { asm(""); } // prevent optimizing-away + __asm__ volatile ("mov %%rbp, %0" : "=r" (rbp)); + // Arguments are passed in registers on x86-64, so we can't just + // offset from &result + sp = (void **) rbp; +#else +# error Using stacktrace_x86-inl.h on a non x86 architecture! +#endif + + int n = 0; + while (sp && n < max_depth) { + if (*(sp+1) == reinterpret_cast<void *>(0)) { + // In 64-bit code, we often see a frame that + // points to itself and has a return address of 0. + break; + } + if (skip_count > 0) { + skip_count--; + } else { + result[n++] = *(sp+1); + } + // Use strict unwinding rules. + sp = NextStackFrame<true, false>(sp, NULL); + } + return n; +} + +// If you change this function, see NOTE at the top of file. +// +// This GetStackFrames routine shares a lot of code with GetStackTrace +// above. This code could have been refactored into a common routine, +// and then both GetStackTrace/GetStackFrames could call that routine. +// There are two problems with that: +// +// (1) The performance of the refactored-code suffers substantially - the +// refactored needs to be able to record the stack trace when called +// from GetStackTrace, and both the stack trace and stack frame sizes, +// when called from GetStackFrames - this introduces enough new +// conditionals that GetStackTrace performance can degrade by as much +// as 50%. +// +// (2) Whether the refactored routine gets inlined into GetStackTrace and +// GetStackFrames depends on the compiler, and we can't guarantee the +// behavior either-way, even with "__attribute__ ((always_inline))" +// or "__attribute__ ((noinline))". But we need this guarantee or the +// frame counts may be off by one. +// +// Both (1) and (2) can be addressed without this code duplication, by +// clever use of template functions, and by defining GetStackTrace and +// GetStackFrames as macros that expand to these template functions. +// However, this approach comes with its own set of problems - namely, +// macros and preprocessor trouble - for example, if GetStackTrace +// and/or GetStackFrames is ever defined as a member functions in some +// class, we are in trouble. +int GetStackFrames(void** pcs, int* sizes, int max_depth, int skip_count) { + void **sp; +#if (__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 2) || __llvm__ + // __builtin_frame_address(0) can return the wrong address on gcc-4.1.0-k8. + // It's always correct on llvm, and the techniques below aren't (in + // particular, llvm-gcc will make a copy of pcs, so it's not in sp[2]), + // so we also prefer __builtin_frame_address when running under llvm. + sp = reinterpret_cast<void**>(__builtin_frame_address(0)); +#elif defined(__i386__) + // Stack frame format: + // sp[0] pointer to previous frame + // sp[1] caller address + // sp[2] first argument + // ... + sp = (void **)&pcs - 2; +#elif defined(__x86_64__) + unsigned long rbp; + // Move the value of the register %rbp into the local variable rbp. + // We need 'volatile' to prevent this instruction from getting moved + // around during optimization to before function prologue is done. + // An alternative way to achieve this + // would be (before this __asm__ instruction) to call Noop() defined as + // static void Noop() __attribute__ ((noinline)); // prevent inlining + // static void Noop() { asm(""); } // prevent optimizing-away + __asm__ volatile ("mov %%rbp, %0" : "=r" (rbp)); + // Arguments are passed in registers on x86-64, so we can't just + // offset from &result + sp = (void **) rbp; +#else +# error Using stacktrace_x86-inl.h on a non x86 architecture! +#endif + + int n = 0; + while (sp && n < max_depth) { + if (*(sp+1) == reinterpret_cast<void *>(0)) { + // In 64-bit code, we often see a frame that + // points to itself and has a return address of 0. + break; + } + // The GetStackFrames routine is called when we are in some + // informational context (the failure signal handler for example). + // Use the non-strict unwinding rules to produce a stack trace + // that is as complete as possible (even if it contains a few bogus + // entries in some rare cases). + void **next_sp = NextStackFrame<false, false>(sp, NULL); + if (skip_count > 0) { + skip_count--; + } else { + pcs[n] = *(sp+1); + if (next_sp > sp) { + sizes[n] = (uintptr_t)next_sp - (uintptr_t)sp; + } else { + // A frame-size of 0 is used to indicate unknown frame size. + sizes[n] = 0; + } + n++; + } + sp = next_sp; + } + return n; +} + +// If you change this function, see NOTE at the top of file. +// Same as above, but with signal ucontext_t pointer. +int GetStackFramesWithContext(void** pcs, + int* sizes, + int max_depth, + int skip_count, + const void *uc) { + void **sp; +#if (__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 2) || __llvm__ + // __builtin_frame_address(0) can return the wrong address on gcc-4.1.0-k8. + // It's always correct on llvm, and the techniques below aren't (in + // particular, llvm-gcc will make a copy of pcs, so it's not in sp[2]), + // so we also prefer __builtin_frame_address when running under llvm. + sp = reinterpret_cast<void**>(__builtin_frame_address(0)); +#elif defined(__i386__) + // Stack frame format: + // sp[0] pointer to previous frame + // sp[1] caller address + // sp[2] first argument + // ... + // NOTE: This will break under llvm, since result is a copy and not in sp[2] + sp = (void **)&pcs - 2; +#elif defined(__x86_64__) + unsigned long rbp; + // Move the value of the register %rbp into the local variable rbp. + // We need 'volatile' to prevent this instruction from getting moved + // around during optimization to before function prologue is done. + // An alternative way to achieve this + // would be (before this __asm__ instruction) to call Noop() defined as + // static void Noop() __attribute__ ((noinline)); // prevent inlining + // static void Noop() { asm(""); } // prevent optimizing-away + __asm__ volatile ("mov %%rbp, %0" : "=r" (rbp)); + // Arguments are passed in registers on x86-64, so we can't just + // offset from &result + sp = (void **) rbp; +#else +# error Using stacktrace_x86-inl.h on a non x86 architecture! +#endif + + int n = 0; + while (sp && n < max_depth) { + if (*(sp+1) == reinterpret_cast<void *>(0)) { + // In 64-bit code, we often see a frame that + // points to itself and has a return address of 0. + break; + } + // The GetStackFrames routine is called when we are in some + // informational context (the failure signal handler for example). + // Use the non-strict unwinding rules to produce a stack trace + // that is as complete as possible (even if it contains a few bogus + // entries in some rare cases). + void **next_sp = NextStackFrame<false, true>(sp, uc); + if (skip_count > 0) { + skip_count--; + } else { + pcs[n] = *(sp+1); + if (next_sp > sp) { + sizes[n] = (uintptr_t)next_sp - (uintptr_t)sp; + } else { + // A frame-size of 0 is used to indicate unknown frame size. + sizes[n] = 0; + } + n++; + } + sp = next_sp; + } + return n; +} diff --git a/third_party/tcmalloc/chromium/src/stacktrace_x86_64-inl.h b/third_party/tcmalloc/chromium/src/stacktrace_x86_64-inl.h new file mode 100644 index 0000000..767ef9b --- /dev/null +++ b/third_party/tcmalloc/chromium/src/stacktrace_x86_64-inl.h @@ -0,0 +1,151 @@ +// Copyright (c) 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Arun Sharma +// +// Produce stack trace using libgcc + +extern "C" { +#include <stdlib.h> // for NULL +#include <unwind.h> // ABI defined unwinder +#include <string.h> // for memset +} +#include "google/stacktrace.h" + +typedef struct { + void **result; + int max_depth; + int skip_count; + int count; +} trace_arg_t; + + +// Workaround for the malloc() in _Unwind_Backtrace() issue. +static _Unwind_Reason_Code nop_backtrace(struct _Unwind_Context *uc, void *opq) { + return _URC_NO_REASON; +} + + +// This code is not considered ready to run until +// static initializers run so that we are guaranteed +// that any malloc-related initialization is done. +static bool ready_to_run = false; +class StackTraceInit { + public: + StackTraceInit() { + // Extra call to force initialization + _Unwind_Backtrace(nop_backtrace, NULL); + ready_to_run = true; + } +}; + +static StackTraceInit module_initializer; // Force initialization + +static _Unwind_Reason_Code GetOneFrame(struct _Unwind_Context *uc, void *opq) { + trace_arg_t *targ = (trace_arg_t *) opq; + + if (targ->skip_count > 0) { + targ->skip_count--; + } else { + targ->result[targ->count++] = (void *) _Unwind_GetIP(uc); + } + + if (targ->count == targ->max_depth) + return _URC_END_OF_STACK; + + return _URC_NO_REASON; +} + +// If you change this function, also change GetStackFrames below. +int GetStackTrace(void** result, int max_depth, int skip_count) { + if (!ready_to_run) + return 0; + + trace_arg_t targ; + + skip_count += 1; // Do not include the "GetStackTrace" frame + + targ.result = result; + targ.max_depth = max_depth; + targ.skip_count = skip_count; + targ.count = 0; + + _Unwind_Backtrace(GetOneFrame, &targ); + + return targ.count; +} + +// If you change this function, also change GetStackTrace above: +// +// This GetStackFrames routine shares a lot of code with GetStackTrace +// above. This code could have been refactored into a common routine, +// and then both GetStackTrace/GetStackFrames could call that routine. +// There are two problems with that: +// +// (1) The performance of the refactored-code suffers substantially - the +// refactored needs to be able to record the stack trace when called +// from GetStackTrace, and both the stack trace and stack frame sizes, +// when called from GetStackFrames - this introduces enough new +// conditionals that GetStackTrace performance can degrade by as much +// as 50%. +// +// (2) Whether the refactored routine gets inlined into GetStackTrace and +// GetStackFrames depends on the compiler, and we can't guarantee the +// behavior either-way, even with "__attribute__ ((always_inline))" +// or "__attribute__ ((noinline))". But we need this guarantee or the +// frame counts may be off by one. +// +// Both (1) and (2) can be addressed without this code duplication, by +// clever use of template functions, and by defining GetStackTrace and +// GetStackFrames as macros that expand to these template functions. +// However, this approach comes with its own set of problems - namely, +// macros and preprocessor trouble - for example, if GetStackTrace +// and/or GetStackFrames is ever defined as a member functions in some +// class, we are in trouble. +int GetStackFrames(void** pcs, int* sizes, int max_depth, int skip_count) { + if (!ready_to_run) + return 0; + + trace_arg_t targ; + + skip_count += 1; // Do not include the "GetStackFrames" frame + + targ.result = pcs; + targ.max_depth = max_depth; + targ.skip_count = skip_count; + targ.count = 0; + + _Unwind_Backtrace(GetOneFrame, &targ); + + // No implementation for finding out the stack frame sizes yet. + memset(sizes, 0, sizeof(*sizes) * targ.count); + + return targ.count; +} diff --git a/third_party/tcmalloc/chromium/src/static_vars.cc b/third_party/tcmalloc/chromium/src/static_vars.cc new file mode 100644 index 0000000..18d5146 --- /dev/null +++ b/third_party/tcmalloc/chromium/src/static_vars.cc @@ -0,0 +1,65 @@ +// Copyright (c) 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Ken Ashcraft <opensource@google.com> + +#include "static_vars.h" +#include "sampler.h" // for the init function + +namespace tcmalloc { + +SpinLock Static::pageheap_lock_(SpinLock::LINKER_INITIALIZED); +SizeMap Static::sizemap_; +CentralFreeListPadded Static::central_cache_[kNumClasses]; +PageHeapAllocator<Span> Static::span_allocator_; +PageHeapAllocator<StackTrace> Static::stacktrace_allocator_; +Span Static::sampled_objects_; +PageHeapAllocator<StackTraceTable::Bucket> Static::bucket_allocator_; +StackTrace* Static::growth_stacks_ = NULL; +char Static::pageheap_memory_[sizeof(PageHeap)]; + +void Static::InitStaticVars() { + sizemap_.Init(); + span_allocator_.Init(); + span_allocator_.New(); // Reduce cache conflicts + span_allocator_.New(); // Reduce cache conflicts + stacktrace_allocator_.Init(); + bucket_allocator_.Init(); + // Do a bit of sanitizing: make sure central_cache is aligned properly + CHECK_CONDITION((sizeof(central_cache_[0]) % 64) == 0); + for (int i = 0; i < kNumClasses; ++i) { + central_cache_[i].Init(i); + } + new ((void*)pageheap_memory_) PageHeap; + DLL_Init(&sampled_objects_); + Sampler::InitStatics(); +} + +} // namespace tcmalloc diff --git a/third_party/tcmalloc/chromium/src/static_vars.h b/third_party/tcmalloc/chromium/src/static_vars.h new file mode 100644 index 0000000..b21fe2b --- /dev/null +++ b/third_party/tcmalloc/chromium/src/static_vars.h @@ -0,0 +1,116 @@ +// Copyright (c) 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Ken Ashcraft <opensource@google.com> +// +// Static variables shared by multiple classes. + +#ifndef TCMALLOC_STATIC_VARS_H_ +#define TCMALLOC_STATIC_VARS_H_ + +#include <config.h> +#include "base/spinlock.h" +#include "central_freelist.h" +#include "common.h" +#include "page_heap.h" +#include "page_heap_allocator.h" +#include "span.h" +#include "stack_trace_table.h" + +namespace tcmalloc { + +class Static { + public: + // Linker initialized, so this lock can be accessed at any time. + static SpinLock* pageheap_lock() { return &pageheap_lock_; } + + // Must be called before calling any of the accessors below. + static void InitStaticVars(); + + // Central cache -- an array of free-lists, one per size-class. + // We have a separate lock per free-list to reduce contention. + static CentralFreeListPadded* central_cache() { return central_cache_; } + + static SizeMap* sizemap() { return &sizemap_; } + + ////////////////////////////////////////////////////////////////////// + // In addition to the explicit initialization comment, the variables below + // must be protected by pageheap_lock. + + // Page-level allocator. + static PageHeap* pageheap() { + return reinterpret_cast<PageHeap*>(pageheap_memory_); + } + + static PageHeapAllocator<Span>* span_allocator() { return &span_allocator_; } + + static PageHeapAllocator<StackTrace>* stacktrace_allocator() { + return &stacktrace_allocator_; + } + + static StackTrace* growth_stacks() { return growth_stacks_; } + static void set_growth_stacks(StackTrace* s) { growth_stacks_ = s; } + + // State kept for sampled allocations (/pprof/heap support) + static Span* sampled_objects() { return &sampled_objects_; } + static PageHeapAllocator<StackTraceTable::Bucket>* bucket_allocator() { + return &bucket_allocator_; + } + + private: + static SpinLock pageheap_lock_; + + // These static variables require explicit initialization. We cannot + // count on their constructors to do any initialization because other + // static variables may try to allocate memory before these variables + // can run their constructors. + + static SizeMap sizemap_; + static CentralFreeListPadded central_cache_[kNumClasses]; + static PageHeapAllocator<Span> span_allocator_; + static PageHeapAllocator<StackTrace> stacktrace_allocator_; + static Span sampled_objects_; + static PageHeapAllocator<StackTraceTable::Bucket> bucket_allocator_; + + // Linked list of stack traces recorded every time we allocated memory + // from the system. Useful for finding allocation sites that cause + // increase in the footprint of the system. The linked list pointer + // is stored in trace->stack[kMaxStackDepth-1]. + static StackTrace* growth_stacks_; + + // PageHeap uses a constructor for initialization. Like the members above, + // we can't depend on initialization order, so pageheap is new'd + // into this buffer. + static char pageheap_memory_[sizeof(PageHeap)]; +}; + +} // namespace tcmalloc + +#endif // TCMALLOC_STATIC_VARS_H_ diff --git a/third_party/tcmalloc/chromium/src/symbolize.cc b/third_party/tcmalloc/chromium/src/symbolize.cc new file mode 100644 index 0000000..6fe44b9 --- /dev/null +++ b/third_party/tcmalloc/chromium/src/symbolize.cc @@ -0,0 +1,183 @@ +// Copyright (c) 2009, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Craig Silverstein +// +// This forks out to pprof to do the actual symbolizing. We might +// be better off writing our own in C++. + +#include "config.h" +#include "symbolize.h" +#include <stdlib.h> +#ifdef HAVE_UNISTD_H +#include <unistd.h> // for write() +#endif +#ifdef HAVE_SYS_SOCKET_H +#include <sys/socket.h> // for socketpair() -- needed by Symbolize +#endif +#ifdef HAVE_SYS_WAIT_H +#include <sys/wait.h> // for wait() -- needed by Symbolize +#endif +#ifdef HAVE_POLL_H +#include <poll.h> +#endif +#include <string> +#include "base/commandlineflags.h" +#include "base/sysinfo.h" + +using std::string; +using tcmalloc::DumpProcSelfMaps; // from sysinfo.h + + +DEFINE_string(symbolize_pprof, + EnvToString("PPROF_PATH", "pprof"), + "Path to pprof to call for reporting function names."); + +// heap_profile_table_pprof may be referenced after destructors are +// called (since that's when leak-checking is done), so we make +// a more-permanent copy that won't ever get destroyed. +static string* g_pprof_path = new string(FLAGS_symbolize_pprof); + +// Updates symbolization_table with the pointers to symbol names corresponding +// to its keys. The symbol names are stored in out, which is allocated and +// freed by the caller of this routine. +// Note that the forking/etc is not thread-safe or re-entrant. That's +// ok for the purpose we need -- reporting leaks detected by heap-checker +// -- but be careful if you decide to use this routine for other purposes. +extern bool Symbolize(char *out, int out_size, + SymbolMap *symbolization_table) { +#if !defined(HAVE_UNISTD_H) || !defined(HAVE_SYS_SOCKET_H) || !defined(HAVE_SYS_WAIT_H) + return false; +#elif !defined(HAVE_PROGRAM_INVOCATION_NAME) + return false; // TODO(csilvers): get argv[0] somehow +#else + // All this work is to do two-way communication. ugh. + extern char* program_invocation_name; // gcc provides this + int child_in[2]; // file descriptors + int child_out[2]; // for now, we don't worry about child_err + if (socketpair(AF_UNIX, SOCK_STREAM, 0, child_in) == -1) { + return false; + } + if (socketpair(AF_UNIX, SOCK_STREAM, 0, child_out) == -1) { + close(child_in[0]); + close(child_in[1]); + return false; + } + switch (fork()) { + case -1: { // error + close(child_in[0]); + close(child_in[1]); + close(child_out[0]); + close(child_out[1]); + return false; + } + case 0: { // child + close(child_in[1]); // child uses the 0's, parent uses the 1's + close(child_out[1]); // child uses the 0's, parent uses the 1's + close(0); + close(1); + if (dup2(child_in[0], 0) == -1) _exit(1); + if (dup2(child_out[0], 1) == -1) _exit(2); + // Unset vars that might cause trouble when we fork + unsetenv("CPUPROFILE"); + unsetenv("HEAPPROFILE"); + unsetenv("HEAPCHECK"); + unsetenv("PERFTOOLS_VERBOSE"); + execlp(g_pprof_path->c_str(), g_pprof_path->c_str(), + "--symbols", program_invocation_name, NULL); + _exit(3); // if execvp fails, it's bad news for us + } + default: { // parent + close(child_in[0]); // child uses the 0's, parent uses the 1's + close(child_out[0]); // child uses the 0's, parent uses the 1's +#ifdef HAVE_POLL_H + // For maximum safety, we check to make sure the execlp + // succeeded before trying to write. (Otherwise we'll get a + // SIGPIPE.) For systems without poll.h, we'll just skip this + // check, and trust that the user set PPROF_PATH correctly! + struct pollfd pfd = { child_in[1], POLLOUT, 0 }; + if (!poll(&pfd, 1, 0) || !(pfd.revents & POLLOUT) || + (pfd.revents & (POLLHUP|POLLERR))) { + return false; + } +#endif + DumpProcSelfMaps(child_in[1]); // what pprof expects on stdin + + char pcstr[64]; // enough for a single address + for (SymbolMap::const_iterator iter = symbolization_table->begin(); + iter != symbolization_table->end(); ++iter) { + snprintf(pcstr, sizeof(pcstr), // pprof expects format to be 0xXXXXXX + "0x%" PRIxPTR "\n", iter->first); + // TODO(glider): the number of write()s can be reduced by using + // snprintf() here. + write(child_in[1], pcstr, strlen(pcstr)); + } + close(child_in[1]); // that's all we need to write + + int total_bytes_read = 0; + memset(out, '\0', out_size); + while (1) { + int bytes_read = read(child_out[1], out + total_bytes_read, + out_size - total_bytes_read); + if (bytes_read < 0) { + close(child_out[1]); + return false; + } else if (bytes_read == 0) { + close(child_out[1]); + wait(NULL); + break; + } else { + total_bytes_read += bytes_read; + } + } + // We have successfully read the output of pprof into out. Make sure + // the last symbol is full (we can tell because it ends with a \n). + // TODO(glider): even when the last symbol is full, the list of symbols + // may be incomplete. We should check for that and return the number of + // symbols we actually get from pprof. + if (total_bytes_read == 0 || out[total_bytes_read - 1] != '\n') + return false; + // make the symbolization_table values point to the output vector + SymbolMap::iterator fill = symbolization_table->begin(); + const char *current_name = out; + for (int i = 0; i < total_bytes_read; i++) { + if (out[i] == '\n') { + fill->second = current_name; + out[i] = '\0'; + current_name = out + i + 1; + fill++; + } + } + return true; + } + } + return false; // shouldn't be reachable +#endif +} diff --git a/third_party/tcmalloc/chromium/src/symbolize.h b/third_party/tcmalloc/chromium/src/symbolize.h new file mode 100644 index 0000000..8fb0366 --- /dev/null +++ b/third_party/tcmalloc/chromium/src/symbolize.h @@ -0,0 +1,54 @@ +// Copyright (c) 2009, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Craig Silverstein + +#ifndef TCMALLOC_SYMBOLIZE_H_ +#define TCMALLOC_SYMBOLIZE_H_ + +#include "config.h" +#ifdef HAVE_STDINT_H +#include <stdint.h> // for uintptr_t +#endif +#include <map> + +using std::map; + +// An average size of memory allocated for a stack trace symbol. +static const int kSymbolSize = 1024; + +// TODO(glider): it's better to make SymbolMap a class that encapsulates the +// address operations and has the Symbolize() method. +typedef map<uintptr_t, const char*> SymbolMap; + +extern bool Symbolize(char *out, int out_size, + SymbolMap *symbolization_table); + +#endif // TCMALLOC_SYMBOLIZE_H_ diff --git a/third_party/tcmalloc/chromium/src/system-alloc.cc b/third_party/tcmalloc/chromium/src/system-alloc.cc new file mode 100644 index 0000000..3341f17 --- /dev/null +++ b/third_party/tcmalloc/chromium/src/system-alloc.cc @@ -0,0 +1,504 @@ +// Copyright (c) 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Sanjay Ghemawat + +#include <config.h> +#if defined HAVE_STDINT_H +#include <stdint.h> +#elif defined HAVE_INTTYPES_H +#include <inttypes.h> +#else +#include <sys/types.h> +#endif +#ifdef HAVE_UNISTD_H +#include <unistd.h> +#endif +#include <fcntl.h> // for open() +#ifdef HAVE_MMAP +#include <sys/mman.h> +#endif +#include <errno.h> +#include "system-alloc.h" +#include "internal_logging.h" +#include "base/logging.h" +#include "base/commandlineflags.h" +#include "base/spinlock.h" + +// On systems (like freebsd) that don't define MAP_ANONYMOUS, use the old +// form of the name instead. +#ifndef MAP_ANONYMOUS +# define MAP_ANONYMOUS MAP_ANON +#endif + +// Solaris has a bug where it doesn't declare madvise() for C++. +// http://www.opensolaris.org/jive/thread.jspa?threadID=21035&tstart=0 +#if defined(__sun) && defined(__SVR4) +# include <sys/types.h> // for caddr_t + extern "C" { extern int madvise(caddr_t, size_t, int); } +#endif + +// Set kDebugMode mode so that we can have use C++ conditionals +// instead of preprocessor conditionals. +#ifdef NDEBUG +static const bool kDebugMode = false; +#else +static const bool kDebugMode = true; +#endif + +// Structure for discovering alignment +union MemoryAligner { + void* p; + double d; + size_t s; +}; + +static SpinLock spinlock(SpinLock::LINKER_INITIALIZED); + +#if defined(HAVE_MMAP) || defined(MADV_DONTNEED) +// Page size is initialized on demand (only needed for mmap-based allocators) +static size_t pagesize = 0; +#endif + +// Configuration parameters. + +DEFINE_int32(malloc_devmem_start, + EnvToInt("TCMALLOC_DEVMEM_START", 0), + "Physical memory starting location in MB for /dev/mem allocation." + " Setting this to 0 disables /dev/mem allocation"); +DEFINE_int32(malloc_devmem_limit, + EnvToInt("TCMALLOC_DEVMEM_LIMIT", 0), + "Physical memory limit location in MB for /dev/mem allocation." + " Setting this to 0 means no limit."); +DEFINE_bool(malloc_skip_sbrk, + EnvToBool("TCMALLOC_SKIP_SBRK", false), + "Whether sbrk can be used to obtain memory."); +DEFINE_bool(malloc_skip_mmap, + EnvToBool("TCMALLOC_SKIP_MMAP", false), + "Whether mmap can be used to obtain memory."); + +// static allocators +class SbrkSysAllocator : public SysAllocator { +public: + SbrkSysAllocator() : SysAllocator() { + } + void* Alloc(size_t size, size_t *actual_size, size_t alignment); + void DumpStats(TCMalloc_Printer* printer); +}; +static char sbrk_space[sizeof(SbrkSysAllocator)]; + +class MmapSysAllocator : public SysAllocator { +public: + MmapSysAllocator() : SysAllocator() { + } + void* Alloc(size_t size, size_t *actual_size, size_t alignment); + void DumpStats(TCMalloc_Printer* printer); +}; +static char mmap_space[sizeof(MmapSysAllocator)]; + +class DevMemSysAllocator : public SysAllocator { +public: + DevMemSysAllocator() : SysAllocator() { + } + void* Alloc(size_t size, size_t *actual_size, size_t alignment); + void DumpStats(TCMalloc_Printer* printer); +}; +static char devmem_space[sizeof(DevMemSysAllocator)]; + +static const int kStaticAllocators = 3; +// kMaxDynamicAllocators + kStaticAllocators; +static const int kMaxAllocators = 5; +static SysAllocator *allocators[kMaxAllocators]; + +bool RegisterSystemAllocator(SysAllocator *a, int priority) { + SpinLockHolder lock_holder(&spinlock); + + // No two allocators should have a priority conflict, since the order + // is determined at compile time. + CHECK_CONDITION(allocators[priority] == NULL); + allocators[priority] = a; + return true; +} + + +void* SbrkSysAllocator::Alloc(size_t size, size_t *actual_size, + size_t alignment) { +#ifndef HAVE_SBRK + failed_ = true; + return NULL; +#else + // Check if we should use sbrk allocation. + // FLAGS_malloc_skip_sbrk starts out as false (its uninitialized + // state) and eventually gets initialized to the specified value. Note + // that this code runs for a while before the flags are initialized. + // That means that even if this flag is set to true, some (initial) + // memory will be allocated with sbrk before the flag takes effect. + if (FLAGS_malloc_skip_sbrk) { + return NULL; + } + + // sbrk will release memory if passed a negative number, so we do + // a strict check here + if (static_cast<ptrdiff_t>(size + alignment) < 0) return NULL; + + // could theoretically return the "extra" bytes here, but this + // is simple and correct. + if (actual_size) { + *actual_size = size; + } + + // This doesn't overflow because TCMalloc_SystemAlloc has already + // tested for overflow at the alignment boundary. + size = ((size + alignment - 1) / alignment) * alignment; + + // Check that we we're not asking for so much more memory that we'd + // wrap around the end of the virtual address space. (This seems + // like something sbrk() should check for us, and indeed opensolaris + // does, but glibc does not: + // http://src.opensolaris.org/source/xref/onnv/onnv-gate/usr/src/lib/libc/port/sys/sbrk.c?a=true + // http://sourceware.org/cgi-bin/cvsweb.cgi/~checkout~/libc/misc/sbrk.c?rev=1.1.2.1&content-type=text/plain&cvsroot=glibc + // Without this check, sbrk may succeed when it ought to fail.) + if (reinterpret_cast<intptr_t>(sbrk(0)) + size < size) { + failed_ = true; + return NULL; + } + + void* result = sbrk(size); + if (result == reinterpret_cast<void*>(-1)) { + failed_ = true; + return NULL; + } + + // Is it aligned? + uintptr_t ptr = reinterpret_cast<uintptr_t>(result); + if ((ptr & (alignment-1)) == 0) return result; + + // Try to get more memory for alignment + size_t extra = alignment - (ptr & (alignment-1)); + void* r2 = sbrk(extra); + if (reinterpret_cast<uintptr_t>(r2) == (ptr + size)) { + // Contiguous with previous result + return reinterpret_cast<void*>(ptr + extra); + } + + // Give up and ask for "size + alignment - 1" bytes so + // that we can find an aligned region within it. + result = sbrk(size + alignment - 1); + if (result == reinterpret_cast<void*>(-1)) { + failed_ = true; + return NULL; + } + ptr = reinterpret_cast<uintptr_t>(result); + if ((ptr & (alignment-1)) != 0) { + ptr += alignment - (ptr & (alignment-1)); + } + return reinterpret_cast<void*>(ptr); +#endif // HAVE_SBRK +} + +void SbrkSysAllocator::DumpStats(TCMalloc_Printer* printer) { + printer->printf("SbrkSysAllocator: failed_=%d\n", failed_); +} + +void* MmapSysAllocator::Alloc(size_t size, size_t *actual_size, + size_t alignment) { +#ifndef HAVE_MMAP + failed_ = true; + return NULL; +#else + // Check if we should use mmap allocation. + // FLAGS_malloc_skip_mmap starts out as false (its uninitialized + // state) and eventually gets initialized to the specified value. Note + // that this code runs for a while before the flags are initialized. + // Chances are we never get here before the flags are initialized since + // sbrk is used until the heap is exhausted (before mmap is used). + if (FLAGS_malloc_skip_mmap) { + return NULL; + } + + // could theoretically return the "extra" bytes here, but this + // is simple and correct. + if (actual_size) { + *actual_size = size; + } + + // Enforce page alignment + if (pagesize == 0) pagesize = getpagesize(); + if (alignment < pagesize) alignment = pagesize; + size_t aligned_size = ((size + alignment - 1) / alignment) * alignment; + if (aligned_size < size) { + return NULL; + } + size = aligned_size; + + // Ask for extra memory if alignment > pagesize + size_t extra = 0; + if (alignment > pagesize) { + extra = alignment - pagesize; + } + + // Note: size + extra does not overflow since: + // size + alignment < (1<<NBITS). + // and extra <= alignment + // therefore size + extra < (1<<NBITS) + void* result = mmap(NULL, size + extra, + PROT_READ|PROT_WRITE, + MAP_PRIVATE|MAP_ANONYMOUS, + -1, 0); + if (result == reinterpret_cast<void*>(MAP_FAILED)) { + failed_ = true; + return NULL; + } + + // Adjust the return memory so it is aligned + uintptr_t ptr = reinterpret_cast<uintptr_t>(result); + size_t adjust = 0; + if ((ptr & (alignment - 1)) != 0) { + adjust = alignment - (ptr & (alignment - 1)); + } + + // Return the unused memory to the system + if (adjust > 0) { + munmap(reinterpret_cast<void*>(ptr), adjust); + } + if (adjust < extra) { + munmap(reinterpret_cast<void*>(ptr + adjust + size), extra - adjust); + } + + ptr += adjust; + return reinterpret_cast<void*>(ptr); +#endif // HAVE_MMAP +} + +void MmapSysAllocator::DumpStats(TCMalloc_Printer* printer) { + printer->printf("MmapSysAllocator: failed_=%d\n", failed_); +} + +void* DevMemSysAllocator::Alloc(size_t size, size_t *actual_size, + size_t alignment) { +#ifndef HAVE_MMAP + failed_ = true; + return NULL; +#else + static bool initialized = false; + static off_t physmem_base; // next physical memory address to allocate + static off_t physmem_limit; // maximum physical address allowed + static int physmem_fd; // file descriptor for /dev/mem + + // Check if we should use /dev/mem allocation. Note that it may take + // a while to get this flag initialized, so meanwhile we fall back to + // the next allocator. (It looks like 7MB gets allocated before + // this flag gets initialized -khr.) + if (FLAGS_malloc_devmem_start == 0) { + // NOTE: not a devmem_failure - we'd like TCMalloc_SystemAlloc to + // try us again next time. + return NULL; + } + + if (!initialized) { + physmem_fd = open("/dev/mem", O_RDWR); + if (physmem_fd < 0) { + failed_ = true; + return NULL; + } + physmem_base = FLAGS_malloc_devmem_start*1024LL*1024LL; + physmem_limit = FLAGS_malloc_devmem_limit*1024LL*1024LL; + initialized = true; + } + + // could theoretically return the "extra" bytes here, but this + // is simple and correct. + if (actual_size) { + *actual_size = size; + } + + // Enforce page alignment + if (pagesize == 0) pagesize = getpagesize(); + if (alignment < pagesize) alignment = pagesize; + size_t aligned_size = ((size + alignment - 1) / alignment) * alignment; + if (aligned_size < size) { + return NULL; + } + size = aligned_size; + + // Ask for extra memory if alignment > pagesize + size_t extra = 0; + if (alignment > pagesize) { + extra = alignment - pagesize; + } + + // check to see if we have any memory left + if (physmem_limit != 0 && + ((size + extra) > (physmem_limit - physmem_base))) { + failed_ = true; + return NULL; + } + + // Note: size + extra does not overflow since: + // size + alignment < (1<<NBITS). + // and extra <= alignment + // therefore size + extra < (1<<NBITS) + void *result = mmap(0, size + extra, PROT_WRITE|PROT_READ, + MAP_SHARED, physmem_fd, physmem_base); + if (result == reinterpret_cast<void*>(MAP_FAILED)) { + failed_ = true; + return NULL; + } + uintptr_t ptr = reinterpret_cast<uintptr_t>(result); + + // Adjust the return memory so it is aligned + size_t adjust = 0; + if ((ptr & (alignment - 1)) != 0) { + adjust = alignment - (ptr & (alignment - 1)); + } + + // Return the unused virtual memory to the system + if (adjust > 0) { + munmap(reinterpret_cast<void*>(ptr), adjust); + } + if (adjust < extra) { + munmap(reinterpret_cast<void*>(ptr + adjust + size), extra - adjust); + } + + ptr += adjust; + physmem_base += adjust + size; + + return reinterpret_cast<void*>(ptr); +#endif // HAVE_MMAP +} + +void DevMemSysAllocator::DumpStats(TCMalloc_Printer* printer) { + printer->printf("DevMemSysAllocator: failed_=%d\n", failed_); +} + +static bool system_alloc_inited = false; +void InitSystemAllocators(void) { + // This determines the order in which system allocators are called + int i = kMaxDynamicAllocators; + allocators[i++] = new (devmem_space) DevMemSysAllocator(); + + // In 64-bit debug mode, place the mmap allocator first since it + // allocates pointers that do not fit in 32 bits and therefore gives + // us better testing of code's 64-bit correctness. It also leads to + // less false negatives in heap-checking code. (Numbers are less + // likely to look like pointers and therefore the conservative gc in + // the heap-checker is less likely to misinterpret a number as a + // pointer). + if (kDebugMode && sizeof(void*) > 4) { + allocators[i++] = new (mmap_space) MmapSysAllocator(); + allocators[i++] = new (sbrk_space) SbrkSysAllocator(); + } else { + allocators[i++] = new (sbrk_space) SbrkSysAllocator(); + allocators[i++] = new (mmap_space) MmapSysAllocator(); + } +} + +void* TCMalloc_SystemAlloc(size_t size, size_t *actual_size, + size_t alignment) { + // Discard requests that overflow + if (size + alignment < size) return NULL; + + SpinLockHolder lock_holder(&spinlock); + + if (!system_alloc_inited) { + InitSystemAllocators(); + system_alloc_inited = true; + } + + // Enforce minimum alignment + if (alignment < sizeof(MemoryAligner)) alignment = sizeof(MemoryAligner); + + // Try twice, once avoiding allocators that failed before, and once + // more trying all allocators even if they failed before. + for (int i = 0; i < 2; i++) { + for (int j = 0; j < kMaxAllocators; j++) { + SysAllocator *a = allocators[j]; + if (a == NULL) continue; + if (a->usable_ && !a->failed_) { + void* result = a->Alloc(size, actual_size, alignment); + if (result != NULL) return result; + } + } + + // nothing worked - reset failed_ flags and try again + for (int j = 0; j < kMaxAllocators; j++) { + SysAllocator *a = allocators[j]; + if (a == NULL) continue; + a->failed_ = false; + } + } + return NULL; +} + +void TCMalloc_SystemRelease(void* start, size_t length) { +#ifdef MADV_DONTNEED + if (FLAGS_malloc_devmem_start) { + // It's not safe to use MADV_DONTNEED if we've been mapping + // /dev/mem for heap memory + return; + } + if (pagesize == 0) pagesize = getpagesize(); + const size_t pagemask = pagesize - 1; + + size_t new_start = reinterpret_cast<size_t>(start); + size_t end = new_start + length; + size_t new_end = end; + + // Round up the starting address and round down the ending address + // to be page aligned: + new_start = (new_start + pagesize - 1) & ~pagemask; + new_end = new_end & ~pagemask; + + ASSERT((new_start & pagemask) == 0); + ASSERT((new_end & pagemask) == 0); + ASSERT(new_start >= reinterpret_cast<size_t>(start)); + ASSERT(new_end <= end); + + if (new_end > new_start) { + // Note -- ignoring most return codes, because if this fails it + // doesn't matter... + while (madvise(reinterpret_cast<char*>(new_start), new_end - new_start, + MADV_DONTNEED) == -1 && + errno == EAGAIN) { + // NOP + } + } +#endif +} + +void DumpSystemAllocatorStats(TCMalloc_Printer* printer) { + for (int j = 0; j < kMaxAllocators; j++) { + SysAllocator *a = allocators[j]; + if (a == NULL) continue; + if (a->usable_) { + a->DumpStats(printer); + } + } +} diff --git a/third_party/tcmalloc/chromium/src/system-alloc.h b/third_party/tcmalloc/chromium/src/system-alloc.h new file mode 100644 index 0000000..44b0333 --- /dev/null +++ b/third_party/tcmalloc/chromium/src/system-alloc.h @@ -0,0 +1,113 @@ +// Copyright (c) 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Sanjay Ghemawat +// +// Routine that uses sbrk/mmap to allocate memory from the system. +// Useful for implementing malloc. + +#ifndef TCMALLOC_SYSTEM_ALLOC_H_ +#define TCMALLOC_SYSTEM_ALLOC_H_ + +#include <config.h> +#include "internal_logging.h" + +// REQUIRES: "alignment" is a power of two or "0" to indicate default alignment +// +// Allocate and return "N" bytes of zeroed memory. +// +// If actual_bytes is NULL then the returned memory is exactly the +// requested size. If actual bytes is non-NULL then the allocator +// may optionally return more bytes than asked for (i.e. return an +// entire "huge" page if a huge page allocator is in use). +// +// The returned pointer is a multiple of "alignment" if non-zero. +// +// Returns NULL when out of memory. +extern void* TCMalloc_SystemAlloc(size_t bytes, size_t *actual_bytes, + size_t alignment = 0); + +// This call is a hint to the operating system that the pages +// contained in the specified range of memory will not be used for a +// while, and can be released for use by other processes or the OS. +// Pages which are released in this way may be destroyed (zeroed) by +// the OS. The benefit of this function is that it frees memory for +// use by the system, the cost is that the pages are faulted back into +// the address space next time they are touched, which can impact +// performance. (Only pages fully covered by the memory region will +// be released, partial pages will not.) +extern void TCMalloc_SystemRelease(void* start, size_t length); + +// Interface to a pluggable system allocator. +class SysAllocator { + public: + SysAllocator() + : usable_(true), + failed_(false) { + }; + virtual ~SysAllocator() {}; + + virtual void* Alloc(size_t size, size_t *actual_size, size_t alignment) = 0; + + // Populate the map with whatever properties the specified allocator finds + // useful for debugging (such as number of bytes allocated and whether the + // allocator has failed). The callee is responsible for any necessary + // locking (and avoiding deadlock). + virtual void DumpStats(TCMalloc_Printer* printer) = 0; + + // So the allocator can be turned off at compile time + bool usable_; + + // Did this allocator fail? If so, we don't need to retry more than twice. + bool failed_; +}; + +// Register a new system allocator. The priority determines the order in +// which the allocators will be invoked. Allocators with numerically lower +// priority are tried first. To keep things simple, the priority of various +// allocators is known at compile time. +// +// Valid range of priorities: [0, kMaxDynamicAllocators) +// +// Please note that we can't use complex data structures and cause +// recursive calls to malloc within this function. So all data structures +// are statically allocated. +// +// Returns true on success. Does nothing on failure. +extern PERFTOOLS_DLL_DECL bool RegisterSystemAllocator(SysAllocator *allocator, + int priority); + +// Number of SysAllocators known to call RegisterSystemAllocator +static const int kMaxDynamicAllocators = 2; + +// Retrieve the current state of various system allocators. +extern PERFTOOLS_DLL_DECL void DumpSystemAllocatorStats(TCMalloc_Printer* printer); + +#endif /* TCMALLOC_SYSTEM_ALLOC_H_ */ diff --git a/third_party/tcmalloc/chromium/src/tcmalloc.cc b/third_party/tcmalloc/chromium/src/tcmalloc.cc new file mode 100644 index 0000000..450c1ab --- /dev/null +++ b/third_party/tcmalloc/chromium/src/tcmalloc.cc @@ -0,0 +1,1434 @@ +// Copyright (c) 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Sanjay Ghemawat <opensource@google.com> +// +// A malloc that uses a per-thread cache to satisfy small malloc requests. +// (The time for malloc/free of a small object drops from 300 ns to 50 ns.) +// +// See doc/tcmalloc.html for a high-level +// description of how this malloc works. +// +// SYNCHRONIZATION +// 1. The thread-specific lists are accessed without acquiring any locks. +// This is safe because each such list is only accessed by one thread. +// 2. We have a lock per central free-list, and hold it while manipulating +// the central free list for a particular size. +// 3. The central page allocator is protected by "pageheap_lock". +// 4. The pagemap (which maps from page-number to descriptor), +// can be read without holding any locks, and written while holding +// the "pageheap_lock". +// 5. To improve performance, a subset of the information one can get +// from the pagemap is cached in a data structure, pagemap_cache_, +// that atomically reads and writes its entries. This cache can be +// read and written without locking. +// +// This multi-threaded access to the pagemap is safe for fairly +// subtle reasons. We basically assume that when an object X is +// allocated by thread A and deallocated by thread B, there must +// have been appropriate synchronization in the handoff of object +// X from thread A to thread B. The same logic applies to pagemap_cache_. +// +// THE PAGEID-TO-SIZECLASS CACHE +// Hot PageID-to-sizeclass mappings are held by pagemap_cache_. If this cache +// returns 0 for a particular PageID then that means "no information," not that +// the sizeclass is 0. The cache may have stale information for pages that do +// not hold the beginning of any free()'able object. Staleness is eliminated +// in Populate() for pages with sizeclass > 0 objects, and in do_malloc() and +// do_memalign() for all other relevant pages. +// +// PAGEMAP +// ------- +// Page map contains a mapping from page id to Span. +// +// If Span s occupies pages [p..q], +// pagemap[p] == s +// pagemap[q] == s +// pagemap[p+1..q-1] are undefined +// pagemap[p-1] and pagemap[q+1] are defined: +// NULL if the corresponding page is not yet in the address space. +// Otherwise it points to a Span. This span may be free +// or allocated. If free, it is in one of pageheap's freelist. +// +// TODO: Bias reclamation to larger addresses +// TODO: implement mallinfo/mallopt +// TODO: Better testing +// +// 9/28/2003 (new page-level allocator replaces ptmalloc2): +// * malloc/free of small objects goes from ~300 ns to ~50 ns. +// * allocation of a reasonably complicated struct +// goes from about 1100 ns to about 300 ns. + +#include <config.h> +#include <new> +#include <stdio.h> +#include <stddef.h> +#if defined HAVE_STDINT_H +#include <stdint.h> +#elif defined HAVE_INTTYPES_H +#include <inttypes.h> +#else +#include <sys/types.h> +#endif +#if defined(HAVE_MALLOC_H) && defined(HAVE_STRUCT_MALLINFO) +#include <malloc.h> // for struct mallinfo +#endif +#include <string.h> +#ifdef HAVE_PTHREAD +#include <pthread.h> +#endif +#ifdef HAVE_UNISTD_H +#include <unistd.h> +#endif +#include <errno.h> +#include <stdarg.h> +#include <algorithm> +#include <google/tcmalloc.h> +#include "base/commandlineflags.h" +#include "base/basictypes.h" // gets us PRIu64 +#include "base/sysinfo.h" +#include "base/spinlock.h" +#include "common.h" +#include "malloc_hook-inl.h" +#include <google/malloc_hook.h> +#include <google/malloc_extension.h> +#include "central_freelist.h" +#include "internal_logging.h" +#include "linked_list.h" +#include "maybe_threads.h" +#include "page_heap.h" +#include "page_heap_allocator.h" +#include "pagemap.h" +#include "span.h" +#include "static_vars.h" +#include "system-alloc.h" +#include "tcmalloc_guard.h" +#include "thread_cache.h" + +#if (defined(_WIN32) && !defined(__CYGWIN__) && !defined(__CYGWIN32__)) && !defined(WIN32_OVERRIDE_ALLOCATORS) +# define WIN32_DO_PATCHING 1 +#endif + +using std::max; +using tcmalloc::PageHeap; +using tcmalloc::PageHeapAllocator; +using tcmalloc::SizeMap; +using tcmalloc::Span; +using tcmalloc::StackTrace; +using tcmalloc::Static; +using tcmalloc::ThreadCache; + +// __THROW is defined in glibc systems. It means, counter-intuitively, +// "This function will never throw an exception." It's an optional +// optimization tool, but we may need to use it to match glibc prototypes. +#ifndef __THROW // I guess we're not on a glibc system +# define __THROW // __THROW is just an optimization, so ok to make it "" +#endif + +DECLARE_int64(tcmalloc_sample_parameter); +DECLARE_double(tcmalloc_release_rate); + +// For windows, the printf we use to report large allocs is +// potentially dangerous: it could cause a malloc that would cause an +// infinite loop. So by default we set the threshold to a huge number +// on windows, so this bad situation will never trigger. You can +// always set TCMALLOC_LARGE_ALLOC_REPORT_THRESHOLD manually if you +// want this functionality. +#ifdef _WIN32 +const int64 kDefaultLargeAllocReportThreshold = static_cast<int64>(1) << 62; +#else +const int64 kDefaultLargeAllocReportThreshold = static_cast<int64>(1) << 30; +#endif +DEFINE_int64(tcmalloc_large_alloc_report_threshold, + EnvToInt64("TCMALLOC_LARGE_ALLOC_REPORT_THRESHOLD", + kDefaultLargeAllocReportThreshold), + "Allocations larger than this value cause a stack " + "trace to be dumped to stderr. The threshold for " + "dumping stack traces is increased by a factor of 1.125 " + "every time we print a message so that the threshold " + "automatically goes up by a factor of ~1000 every 60 " + "messages. This bounds the amount of extra logging " + "generated by this flag. Default value of this flag " + "is very large and therefore you should see no extra " + "logging unless the flag is overridden. Set to 0 to " + "disable reporting entirely."); + + +// We already declared these functions in tcmalloc.h, but we have to +// declare them again to give them an ATTRIBUTE_SECTION: we want to +// put all callers of MallocHook::Invoke* in this module into +// ATTRIBUTE_SECTION(google_malloc) section, so that +// MallocHook::GetCallerStackTrace can function accurately. +#ifndef _WIN32 // windows doesn't have attribute_section, so don't bother +extern "C" { + void* tc_malloc(size_t size) __THROW + ATTRIBUTE_SECTION(google_malloc); + void tc_free(void* ptr) __THROW + ATTRIBUTE_SECTION(google_malloc); + void* tc_realloc(void* ptr, size_t size) __THROW + ATTRIBUTE_SECTION(google_malloc); + void* tc_calloc(size_t nmemb, size_t size) __THROW + ATTRIBUTE_SECTION(google_malloc); + void tc_cfree(void* ptr) __THROW + ATTRIBUTE_SECTION(google_malloc); + + void* tc_memalign(size_t __alignment, size_t __size) __THROW + ATTRIBUTE_SECTION(google_malloc); + int tc_posix_memalign(void** ptr, size_t align, size_t size) __THROW + ATTRIBUTE_SECTION(google_malloc); + void* tc_valloc(size_t __size) __THROW + ATTRIBUTE_SECTION(google_malloc); + void* tc_pvalloc(size_t __size) __THROW + ATTRIBUTE_SECTION(google_malloc); + + void tc_malloc_stats(void) __THROW + ATTRIBUTE_SECTION(google_malloc); + int tc_mallopt(int cmd, int value) __THROW + ATTRIBUTE_SECTION(google_malloc); +#ifdef HAVE_STRUCT_MALLINFO // struct mallinfo isn't defined on freebsd + struct mallinfo tc_mallinfo(void) __THROW + ATTRIBUTE_SECTION(google_malloc); +#endif + + void* tc_new(size_t size) + ATTRIBUTE_SECTION(google_malloc); + void tc_delete(void* p) __THROW + ATTRIBUTE_SECTION(google_malloc); + void* tc_newarray(size_t size) + ATTRIBUTE_SECTION(google_malloc); + void tc_deletearray(void* p) __THROW + ATTRIBUTE_SECTION(google_malloc); + + // And the nothrow variants of these: + void* tc_new_nothrow(size_t size, const std::nothrow_t&) __THROW + ATTRIBUTE_SECTION(google_malloc); + void* tc_newarray_nothrow(size_t size, const std::nothrow_t&) __THROW + ATTRIBUTE_SECTION(google_malloc); + // Surprisingly, compilers use a nothrow-delete internally. See, eg: + // http://www.dinkumware.com/manuals/?manual=compleat&page=new.html + void tc_delete_nothrow(void* ptr, const std::nothrow_t&) __THROW + ATTRIBUTE_SECTION(google_malloc); + void tc_deletearray_nothrow(void* ptr, const std::nothrow_t&) __THROW + ATTRIBUTE_SECTION(google_malloc); +} // extern "C" +#endif // #ifndef _WIN32 + +// Override the libc functions to prefer our own instead. This comes +// first so code in tcmalloc.cc can use the overridden versions. One +// exception: in windows, by default, we patch our code into these +// functions (via src/windows/patch_function.cc) rather than override +// them. In that case, we don't want to do this overriding here. +#if !defined(WIN32_DO_PATCHING) && !defined(TCMALLOC_FOR_DEBUGALLOCATION) + +#if defined(__GNUC__) && !defined(__MACH__) + // Potentially faster variants that use the gcc alias extension. + // FreeBSD does support aliases, but apparently not correctly. :-( + // NOTE: we make many of these symbols weak, but do so in the makefile + // (via objcopy -W) and not here. That ends up being more portable. +# define ALIAS(x) __attribute__ ((alias (x))) +void* operator new(size_t size) ALIAS("tc_new"); +void operator delete(void* p) __THROW ALIAS("tc_delete"); +void* operator new[](size_t size) ALIAS("tc_newarray"); +void operator delete[](void* p) __THROW ALIAS("tc_deletearray"); +void* operator new(size_t size, const std::nothrow_t&) __THROW + ALIAS("tc_new_nothrow"); +void* operator new[](size_t size, const std::nothrow_t&) __THROW + ALIAS("tc_newarray_nothrow"); +void operator delete(void* size, const std::nothrow_t&) __THROW + ALIAS("tc_delete_nothrow"); +void operator delete[](void* size, const std::nothrow_t&) __THROW + ALIAS("tc_deletearray_nothrow"); +extern "C" { + void* malloc(size_t size) __THROW ALIAS("tc_malloc"); + void free(void* ptr) __THROW ALIAS("tc_free"); + void* realloc(void* ptr, size_t size) __THROW ALIAS("tc_realloc"); + void* calloc(size_t n, size_t size) __THROW ALIAS("tc_calloc"); + void cfree(void* ptr) __THROW ALIAS("tc_cfree"); + void* memalign(size_t align, size_t s) __THROW ALIAS("tc_memalign"); + void* valloc(size_t size) __THROW ALIAS("tc_valloc"); + void* pvalloc(size_t size) __THROW ALIAS("tc_pvalloc"); + int posix_memalign(void** r, size_t a, size_t s) __THROW + ALIAS("tc_posix_memalign"); + void malloc_stats(void) __THROW ALIAS("tc_malloc_stats"); + int mallopt(int cmd, int value) __THROW ALIAS("tc_mallopt"); +#ifdef HAVE_STRUCT_MALLINFO + struct mallinfo mallinfo(void) __THROW ALIAS("tc_mallinfo"); +#endif +} // extern "C" +#else // #if defined(__GNUC__) && !defined(__MACH__) +// Portable wrappers +void* operator new(size_t size) { return tc_new(size); } +void operator delete(void* p) __THROW { tc_delete(p); } +void* operator new[](size_t size) { return tc_newarray(size); } +void operator delete[](void* p) __THROW { tc_deletearray(p); } +void* operator new(size_t size, const std::nothrow_t& nt) __THROW { + return tc_new_nothrow(size, nt); +} +void* operator new[](size_t size, const std::nothrow_t& nt) __THROW { + return tc_newarray_nothrow(size, nt); +} +void operator delete(void* ptr, const std::nothrow_t& nt) __THROW { + return tc_delete_nothrow(ptr, nt); +} +void operator delete[](void* ptr, const std::nothrow_t& nt) __THROW { + return tc_deletearray_nothrow(ptr, nt); +} +extern "C" { + void* malloc(size_t s) __THROW { return tc_malloc(s); } + void free(void* p) __THROW { tc_free(p); } + void* realloc(void* p, size_t s) __THROW { return tc_realloc(p, s); } + void* calloc(size_t n, size_t s) __THROW { return tc_calloc(n, s); } + void cfree(void* p) __THROW { tc_cfree(p); } + void* memalign(size_t a, size_t s) __THROW { return tc_memalign(a, s); } + void* valloc(size_t s) __THROW { return tc_valloc(s); } + void* pvalloc(size_t s) __THROW { return tc_pvalloc(s); } + int posix_memalign(void** r, size_t a, size_t s) __THROW { + return tc_posix_memalign(r, a, s); + } + void malloc_stats(void) __THROW { tc_malloc_stats(); } + int mallopt(int cmd, int v) __THROW { return tc_mallopt(cmd, v); } +#ifdef HAVE_STRUCT_MALLINFO + struct mallinfo mallinfo(void) __THROW { return tc_mallinfo(); } +#endif +} // extern "C" +#endif // #if defined(__GNUC__) + +// Some library routines on RedHat 9 allocate memory using malloc() +// and free it using __libc_free() (or vice-versa). Since we provide +// our own implementations of malloc/free, we need to make sure that +// the __libc_XXX variants (defined as part of glibc) also point to +// the same implementations. +#ifdef __GLIBC__ // only glibc defines __libc_* +extern "C" { +#ifdef ALIAS + void* __libc_malloc(size_t size) ALIAS("tc_malloc"); + void __libc_free(void* ptr) ALIAS("tc_free"); + void* __libc_realloc(void* ptr, size_t size) ALIAS("tc_realloc"); + void* __libc_calloc(size_t n, size_t size) ALIAS("tc_calloc"); + void __libc_cfree(void* ptr) ALIAS("tc_cfree"); + void* __libc_memalign(size_t align, size_t s) ALIAS("tc_memalign"); + void* __libc_valloc(size_t size) ALIAS("tc_valloc"); + void* __libc_pvalloc(size_t size) ALIAS("tc_pvalloc"); + int __posix_memalign(void** r, size_t a, size_t s) ALIAS("tc_posix_memalign"); +#else // #ifdef ALIAS + void* __libc_malloc(size_t size) { return malloc(size); } + void __libc_free(void* ptr) { free(ptr); } + void* __libc_realloc(void* ptr, size_t size) { return realloc(ptr, size); } + void* __libc_calloc(size_t n, size_t size) { return calloc(n, size); } + void __libc_cfree(void* ptr) { cfree(ptr); } + void* __libc_memalign(size_t align, size_t s) { return memalign(align, s); } + void* __libc_valloc(size_t size) { return valloc(size); } + void* __libc_pvalloc(size_t size) { return pvalloc(size); } + int __posix_memalign(void** r, size_t a, size_t s) { + return posix_memalign(r, a, s); + } +#endif // #ifdef ALIAS +} // extern "C" +#endif // ifdef __GLIBC__ + +#undef ALIAS + +#endif // #ifndef(WIN32_DO_PATCHING) && ndef(TCMALLOC_FOR_DEBUGALLOCATION) + + +// ----------------------- IMPLEMENTATION ------------------------------- + +// Routines such as free() and realloc() catch some erroneous pointers +// passed to them, and invoke the below when they do. (An erroneous pointer +// won't be caught if it's within a valid span or a stale span for which +// the pagemap cache has a non-zero sizeclass.) This is a cheap (source-editing +// required) kind of exception handling for these routines. +namespace { +void InvalidFree(void* ptr) { + CRASH("Attempt to free invalid pointer: %p\n", ptr); +} + +size_t InvalidGetSizeForRealloc(void* old_ptr) { + CRASH("Attempt to realloc invalid pointer: %p\n", old_ptr); + return 0; +} + +size_t InvalidGetAllocatedSize(void* ptr) { + CRASH("Attempt to get the size of an invalid pointer: %p\n", ptr); + return 0; +} +} // unnamed namespace + +// Extract interesting stats +struct TCMallocStats { + uint64_t thread_bytes; // Bytes in thread caches + uint64_t central_bytes; // Bytes in central cache + uint64_t transfer_bytes; // Bytes in central transfer cache + uint64_t metadata_bytes; // Bytes alloced for metadata + PageHeap::Stats pageheap; // Stats from page heap +}; + +// Get stats into "r". Also get per-size-class counts if class_count != NULL +static void ExtractStats(TCMallocStats* r, uint64_t* class_count) { + r->central_bytes = 0; + r->transfer_bytes = 0; + for (int cl = 0; cl < kNumClasses; ++cl) { + const int length = Static::central_cache()[cl].length(); + const int tc_length = Static::central_cache()[cl].tc_length(); + const size_t size = static_cast<uint64_t>( + Static::sizemap()->ByteSizeForClass(cl)); + r->central_bytes += (size * length); + r->transfer_bytes += (size * tc_length); + if (class_count) class_count[cl] = length + tc_length; + } + + // Add stats from per-thread heaps + r->thread_bytes = 0; + { // scope + SpinLockHolder h(Static::pageheap_lock()); + ThreadCache::GetThreadStats(&r->thread_bytes, class_count); + r->metadata_bytes = tcmalloc::metadata_system_bytes(); + r->pageheap = Static::pageheap()->stats(); + } +} + +// WRITE stats to "out" +static void DumpStats(TCMalloc_Printer* out, int level) { + TCMallocStats stats; + uint64_t class_count[kNumClasses]; + ExtractStats(&stats, (level >= 2 ? class_count : NULL)); + + static const double MB = 1048576.0; + + if (level >= 2) { + out->printf("------------------------------------------------\n"); + out->printf("Size class breakdown\n"); + out->printf("------------------------------------------------\n"); + uint64_t cumulative = 0; + for (int cl = 0; cl < kNumClasses; ++cl) { + if (class_count[cl] > 0) { + uint64_t class_bytes = + class_count[cl] * Static::sizemap()->ByteSizeForClass(cl); + cumulative += class_bytes; + out->printf("class %3d [ %8" PRIuS " bytes ] : " + "%8" PRIu64 " objs; %5.1f MB; %5.1f cum MB\n", + cl, Static::sizemap()->ByteSizeForClass(cl), + class_count[cl], + class_bytes / MB, + cumulative / MB); + } + } + + SpinLockHolder h(Static::pageheap_lock()); + Static::pageheap()->Dump(out); + + out->printf("------------------------------------------------\n"); + DumpSystemAllocatorStats(out); + } + + const uint64_t bytes_in_use = stats.pageheap.system_bytes + - stats.pageheap.free_bytes + - stats.pageheap.unmapped_bytes + - stats.central_bytes + - stats.transfer_bytes + - stats.thread_bytes; + + out->printf("------------------------------------------------\n" + "MALLOC: %12" PRIu64 " (%7.1f MB) Heap size\n" + "MALLOC: %12" PRIu64 " (%7.1f MB) Bytes in use by application\n" + "MALLOC: %12" PRIu64 " (%7.1f MB) Bytes free in page heap\n" + "MALLOC: %12" PRIu64 " (%7.1f MB) Bytes unmapped in page heap\n" + "MALLOC: %12" PRIu64 " (%7.1f MB) Bytes free in central cache\n" + "MALLOC: %12" PRIu64 " (%7.1f MB) Bytes free in transfer cache\n" + "MALLOC: %12" PRIu64 " (%7.1f MB) Bytes free in thread caches\n" + "MALLOC: %12" PRIu64 " Spans in use\n" + "MALLOC: %12" PRIu64 " Thread heaps in use\n" + "MALLOC: %12" PRIu64 " (%7.1f MB) Metadata allocated\n" + "------------------------------------------------\n", + stats.pageheap.system_bytes, stats.pageheap.system_bytes / MB, + bytes_in_use, bytes_in_use / MB, + stats.pageheap.free_bytes, stats.pageheap.free_bytes / MB, + stats.pageheap.unmapped_bytes, stats.pageheap.unmapped_bytes / MB, + stats.central_bytes, stats.central_bytes / MB, + stats.transfer_bytes, stats.transfer_bytes / MB, + stats.thread_bytes, stats.thread_bytes / MB, + uint64_t(Static::span_allocator()->inuse()), + uint64_t(ThreadCache::HeapsInUse()), + stats.metadata_bytes, stats.metadata_bytes / MB); +} + +static void PrintStats(int level) { + const int kBufferSize = 16 << 10; + char* buffer = new char[kBufferSize]; + TCMalloc_Printer printer(buffer, kBufferSize); + DumpStats(&printer, level); + write(STDERR_FILENO, buffer, strlen(buffer)); + delete[] buffer; +} + +static void** DumpHeapGrowthStackTraces() { + // Count how much space we need + int needed_slots = 0; + { + SpinLockHolder h(Static::pageheap_lock()); + for (StackTrace* t = Static::growth_stacks(); + t != NULL; + t = reinterpret_cast<StackTrace*>( + t->stack[tcmalloc::kMaxStackDepth-1])) { + needed_slots += 3 + t->depth; + } + needed_slots += 100; // Slop in case list grows + needed_slots += needed_slots/8; // An extra 12.5% slop + } + + void** result = new void*[needed_slots]; + if (result == NULL) { + MESSAGE("tcmalloc: allocation failed for stack trace slots", + needed_slots * sizeof(*result)); + return NULL; + } + + SpinLockHolder h(Static::pageheap_lock()); + int used_slots = 0; + for (StackTrace* t = Static::growth_stacks(); + t != NULL; + t = reinterpret_cast<StackTrace*>( + t->stack[tcmalloc::kMaxStackDepth-1])) { + ASSERT(used_slots < needed_slots); // Need to leave room for terminator + if (used_slots + 3 + t->depth >= needed_slots) { + // No more room + break; + } + + result[used_slots+0] = reinterpret_cast<void*>(static_cast<uintptr_t>(1)); + result[used_slots+1] = reinterpret_cast<void*>(t->size); + result[used_slots+2] = reinterpret_cast<void*>(t->depth); + for (int d = 0; d < t->depth; d++) { + result[used_slots+3+d] = t->stack[d]; + } + used_slots += 3 + t->depth; + } + result[used_slots] = reinterpret_cast<void*>(static_cast<uintptr_t>(0)); + return result; +} + +static void IterateOverRanges(void* arg, MallocExtension::RangeFunction func) { + PageID page = 1; // Some code may assume that page==0 is never used + bool done = false; + while (!done) { + // Accumulate a small number of ranges in a local buffer + static const int kNumRanges = 16; + static base::MallocRange ranges[kNumRanges]; + int n = 0; + { + SpinLockHolder h(Static::pageheap_lock()); + while (n < kNumRanges) { + if (!Static::pageheap()->GetNextRange(page, &ranges[n])) { + done = true; + break; + } else { + uintptr_t limit = ranges[n].address + ranges[n].length; + page = (limit + kPageSize - 1) >> kPageShift; + n++; + } + } + } + + for (int i = 0; i < n; i++) { + (*func)(arg, &ranges[i]); + } + } +} + +// TCMalloc's support for extra malloc interfaces +class TCMallocImplementation : public MallocExtension { + private: + // ReleaseToSystem() might release more than the requested bytes because + // the page heap releases at the span granularity, and spans are of wildly + // different sizes. This member keeps track of the extra bytes bytes + // released so that the app can periodically call ReleaseToSystem() to + // release memory at a constant rate. + // NOTE: Protected by Static::pageheap_lock(). + size_t extra_bytes_released_; + + public: + TCMallocImplementation() + : extra_bytes_released_(0) { + } + + virtual void GetStats(char* buffer, int buffer_length) { + ASSERT(buffer_length > 0); + TCMalloc_Printer printer(buffer, buffer_length); + + // Print level one stats unless lots of space is available + if (buffer_length < 10000) { + DumpStats(&printer, 1); + } else { + DumpStats(&printer, 2); + } + } + + virtual void** ReadStackTraces(int* sample_period) { + tcmalloc::StackTraceTable table; + { + SpinLockHolder h(Static::pageheap_lock()); + Span* sampled = Static::sampled_objects(); + for (Span* s = sampled->next; s != sampled; s = s->next) { + table.AddTrace(*reinterpret_cast<StackTrace*>(s->objects)); + } + } + *sample_period = ThreadCache::GetCache()->GetSamplePeriod(); + return table.ReadStackTracesAndClear(); // grabs and releases pageheap_lock + } + + virtual void** ReadHeapGrowthStackTraces() { + return DumpHeapGrowthStackTraces(); + } + + virtual void Ranges(void* arg, RangeFunction func) { + IterateOverRanges(arg, func); + } + + virtual bool GetNumericProperty(const char* name, size_t* value) { + ASSERT(name != NULL); + + if (strcmp(name, "generic.current_allocated_bytes") == 0) { + TCMallocStats stats; + ExtractStats(&stats, NULL); + *value = stats.pageheap.system_bytes + - stats.thread_bytes + - stats.central_bytes + - stats.transfer_bytes + - stats.pageheap.free_bytes + - stats.pageheap.unmapped_bytes; + return true; + } + + if (strcmp(name, "generic.heap_size") == 0) { + TCMallocStats stats; + ExtractStats(&stats, NULL); + *value = stats.pageheap.system_bytes; + return true; + } + + if (strcmp(name, "tcmalloc.slack_bytes") == 0) { + // We assume that bytes in the page heap are not fragmented too + // badly, and are therefore available for allocation without + // growing the pageheap system byte count. + SpinLockHolder l(Static::pageheap_lock()); + PageHeap::Stats stats = Static::pageheap()->stats(); + *value = stats.free_bytes + stats.unmapped_bytes; + return true; + } + + if (strcmp(name, "tcmalloc.pageheap_free_bytes") == 0) { + SpinLockHolder l(Static::pageheap_lock()); + *value = Static::pageheap()->stats().free_bytes; + return true; + } + + if (strcmp(name, "tcmalloc.pageheap_unmapped_bytes") == 0) { + SpinLockHolder l(Static::pageheap_lock()); + *value = Static::pageheap()->stats().unmapped_bytes; + return true; + } + + if (strcmp(name, "tcmalloc.max_total_thread_cache_bytes") == 0) { + SpinLockHolder l(Static::pageheap_lock()); + *value = ThreadCache::overall_thread_cache_size(); + return true; + } + + if (strcmp(name, "tcmalloc.current_total_thread_cache_bytes") == 0) { + TCMallocStats stats; + ExtractStats(&stats, NULL); + *value = stats.thread_bytes; + return true; + } + + return false; + } + + virtual bool SetNumericProperty(const char* name, size_t value) { + ASSERT(name != NULL); + + if (strcmp(name, "tcmalloc.max_total_thread_cache_bytes") == 0) { + SpinLockHolder l(Static::pageheap_lock()); + ThreadCache::set_overall_thread_cache_size(value); + return true; + } + + return false; + } + + virtual void MarkThreadIdle() { + ThreadCache::BecomeIdle(); + } + + virtual void MarkThreadBusy(); // Implemented below + + virtual void ReleaseToSystem(ssize_t num_bytes) { + if (num_bytes <= 0) { + return; + } + SpinLockHolder h(Static::pageheap_lock()); + if (num_bytes <= extra_bytes_released_) { + // We released too much on a prior call, so don't release any + // more this time. + extra_bytes_released_ = extra_bytes_released_ - num_bytes; + return; + } + num_bytes = num_bytes - extra_bytes_released_; + // num_bytes might be less than one page. If we pass zero to + // ReleaseAtLeastNPages, it won't do anything, so we release a whole + // page now and let extra_bytes_released_ smooth it out over time. + Length num_pages = max<Length>(num_bytes >> kPageShift, 1); + size_t bytes_released = Static::pageheap()->ReleaseAtLeastNPages( + num_pages) << kPageShift; + if (bytes_released > num_bytes) { + extra_bytes_released_ = bytes_released - num_bytes; + } else { + // The PageHeap wasn't able to release num_bytes. Don't try to + // compensate with a big release next time. Specifically, + // ReleaseFreeMemory() calls ReleaseToSystem(LONG_MAX). + extra_bytes_released_ = 0; + } + } + + virtual void SetMemoryReleaseRate(double rate) { + FLAGS_tcmalloc_release_rate = rate; + } + + virtual double GetMemoryReleaseRate() { + return FLAGS_tcmalloc_release_rate; + } + virtual size_t GetEstimatedAllocatedSize(size_t size) { + if (size <= kMaxSize) { + const size_t cl = Static::sizemap()->SizeClass(size); + const size_t alloc_size = Static::sizemap()->ByteSizeForClass(cl); + return alloc_size; + } else { + return tcmalloc::pages(size) << kPageShift; + } + } + + // This just calls GetSizeWithCallback, but because that's in an + // unnamed namespace, we need to move the definition below it in the + // file. + virtual size_t GetAllocatedSize(void* ptr); +}; + +// The constructor allocates an object to ensure that initialization +// runs before main(), and therefore we do not have a chance to become +// multi-threaded before initialization. We also create the TSD key +// here. Presumably by the time this constructor runs, glibc is in +// good enough shape to handle pthread_key_create(). +// +// The constructor also takes the opportunity to tell STL to use +// tcmalloc. We want to do this early, before construct time, so +// all user STL allocations go through tcmalloc (which works really +// well for STL). +// +// The destructor prints stats when the program exits. +static int tcmallocguard_refcount = 0; // no lock needed: runs before main() +TCMallocGuard::TCMallocGuard() { + if (tcmallocguard_refcount++ == 0) { +#ifdef HAVE_TLS // this is true if the cc/ld/libc combo support TLS + // Check whether the kernel also supports TLS (needs to happen at runtime) + tcmalloc::CheckIfKernelSupportsTLS(); +#endif +#ifdef WIN32_DO_PATCHING + // patch the windows VirtualAlloc, etc. + PatchWindowsFunctions(); // defined in windows/patch_functions.cc +#endif + free(malloc(1)); + ThreadCache::InitTSD(); + free(malloc(1)); + MallocExtension::Register(new TCMallocImplementation); + } +} + +TCMallocGuard::~TCMallocGuard() { + if (--tcmallocguard_refcount == 0) { + const char* env = getenv("MALLOCSTATS"); + if (env != NULL) { + int level = atoi(env); + if (level < 1) level = 1; + PrintStats(level); + } + } +} +#ifndef WIN32_OVERRIDE_ALLOCATORS +static TCMallocGuard module_enter_exit_hook; +#endif + +//------------------------------------------------------------------- +// Helpers for the exported routines below +//------------------------------------------------------------------- + +static Span* DoSampledAllocation(size_t size) { + // Grab the stack trace outside the heap lock + StackTrace tmp; + tmp.depth = GetStackTrace(tmp.stack, tcmalloc::kMaxStackDepth, 1); + tmp.size = size; + + SpinLockHolder h(Static::pageheap_lock()); + // Allocate span + Span *span = Static::pageheap()->New(tcmalloc::pages(size == 0 ? 1 : size)); + if (span == NULL) { + return NULL; + } + + // Allocate stack trace + StackTrace *stack = Static::stacktrace_allocator()->New(); + if (stack == NULL) { + // Sampling failed because of lack of memory + return span; + } + + *stack = tmp; + span->sample = 1; + span->objects = stack; + tcmalloc::DLL_Prepend(Static::sampled_objects(), span); + + return span; +} + +static inline bool CheckCachedSizeClass(void *ptr) { + PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift; + size_t cached_value = Static::pageheap()->GetSizeClassIfCached(p); + return cached_value == 0 || + cached_value == Static::pageheap()->GetDescriptor(p)->sizeclass; +} + +static inline void* CheckedMallocResult(void *result) +{ + ASSERT(result == 0 || CheckCachedSizeClass(result)); + return result; +} + +static inline void* SpanToMallocResult(Span *span) { + Static::pageheap()->CacheSizeClass(span->start, 0); + return + CheckedMallocResult(reinterpret_cast<void*>(span->start << kPageShift)); +} + +// Copy of FLAGS_tcmalloc_large_alloc_report_threshold with +// automatic increases factored in. +static int64_t large_alloc_threshold = + (kPageSize > FLAGS_tcmalloc_large_alloc_report_threshold + ? kPageSize : FLAGS_tcmalloc_large_alloc_report_threshold); + +static void ReportLargeAlloc(Length num_pages, void* result) { + StackTrace stack; + stack.depth = GetStackTrace(stack.stack, tcmalloc::kMaxStackDepth, 1); + + static const int N = 1000; + char buffer[N]; + TCMalloc_Printer printer(buffer, N); + printer.printf("tcmalloc: large alloc %llu bytes == %p @ ", + static_cast<unsigned long long>(num_pages) << kPageShift, + result); + for (int i = 0; i < stack.depth; i++) { + printer.printf(" %p", stack.stack[i]); + } + printer.printf("\n"); + write(STDERR_FILENO, buffer, strlen(buffer)); +} + +namespace { + +// Helper for do_malloc(). +inline void* do_malloc_pages(Length num_pages) { + Span *span; + bool report_large = false; + { + SpinLockHolder h(Static::pageheap_lock()); + span = Static::pageheap()->New(num_pages); + const int64 threshold = large_alloc_threshold; + if (threshold > 0 && num_pages >= (threshold >> kPageShift)) { + // Increase the threshold by 1/8 every time we generate a report. + // We cap the threshold at 8GB to avoid overflow problems. + large_alloc_threshold = (threshold + threshold/8 < 8ll<<30 + ? threshold + threshold/8 : 8ll<<30); + report_large = true; + } + } + + void* result = (span == NULL ? NULL : SpanToMallocResult(span)); + if (report_large) { + ReportLargeAlloc(num_pages, result); + } + return result; +} + +inline void* do_malloc(size_t size) { + void* ret = NULL; + + // The following call forces module initialization + ThreadCache* heap = ThreadCache::GetCache(); + if ((FLAGS_tcmalloc_sample_parameter > 0) && heap->SampleAllocation(size)) { + Span* span = DoSampledAllocation(size); + if (span != NULL) { + ret = SpanToMallocResult(span); + } + } else if (size <= kMaxSize) { + // The common case, and also the simplest. This just pops the + // size-appropriate freelist, after replenishing it if it's empty. + ret = CheckedMallocResult(heap->Allocate(size)); + } else { + ret = do_malloc_pages(tcmalloc::pages(size)); + } + if (ret == NULL) errno = ENOMEM; + return ret; +} + +inline void* do_calloc(size_t n, size_t elem_size) { + // Overflow check + const size_t size = n * elem_size; + if (elem_size != 0 && size / elem_size != n) return NULL; + + void* result = do_malloc(size); + if (result != NULL) { + memset(result, 0, size); + } + return result; +} + +static inline ThreadCache* GetCacheIfPresent() { + void* const p = ThreadCache::GetCacheIfPresent(); + return reinterpret_cast<ThreadCache*>(p); +} + +// This lets you call back to a given function pointer if ptr is invalid. +// It is used primarily by windows code which wants a specialized callback. +inline void do_free_with_callback(void* ptr, void (*invalid_free_fn)(void*)) { + if (ptr == NULL) return; + ASSERT(Static::pageheap() != NULL); // Should not call free() before malloc() + const PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift; + Span* span = NULL; + size_t cl = Static::pageheap()->GetSizeClassIfCached(p); + + if (cl == 0) { + span = Static::pageheap()->GetDescriptor(p); + if (!span) { + // span can be NULL because the pointer passed in is invalid + // (not something returned by malloc or friends), or because the + // pointer was allocated with some other allocator besides + // tcmalloc. The latter can happen if tcmalloc is linked in via + // a dynamic library, but is not listed last on the link line. + // In that case, libraries after it on the link line will + // allocate with libc malloc, but free with tcmalloc's free. + (*invalid_free_fn)(ptr); // Decide how to handle the bad free request + return; + } + cl = span->sizeclass; + Static::pageheap()->CacheSizeClass(p, cl); + } + if (cl != 0) { + ASSERT(!Static::pageheap()->GetDescriptor(p)->sample); + ThreadCache* heap = GetCacheIfPresent(); + if (heap != NULL) { + heap->Deallocate(ptr, cl); + } else { + // Delete directly into central cache + tcmalloc::SLL_SetNext(ptr, NULL); + Static::central_cache()[cl].InsertRange(ptr, ptr, 1); + } + } else { + SpinLockHolder h(Static::pageheap_lock()); + ASSERT(reinterpret_cast<uintptr_t>(ptr) % kPageSize == 0); + ASSERT(span != NULL && span->start == p); + if (span->sample) { + tcmalloc::DLL_Remove(span); + Static::stacktrace_allocator()->Delete( + reinterpret_cast<StackTrace*>(span->objects)); + span->objects = NULL; + } + Static::pageheap()->Delete(span); + } +} + +// The default "do_free" that uses the default callback. +inline void do_free(void* ptr) { + return do_free_with_callback(ptr, &InvalidFree); +} + +inline size_t GetSizeWithCallback(void* ptr, + size_t (*invalid_getsize_fn)(void*)) { + if (ptr == NULL) + return 0; + const PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift; + size_t cl = Static::pageheap()->GetSizeClassIfCached(p); + if (cl != 0) { + return Static::sizemap()->ByteSizeForClass(cl); + } else { + Span *span = Static::pageheap()->GetDescriptor(p); + if (span == NULL) { // means we do not own this memory + return (*invalid_getsize_fn)(ptr); + } else if (span->sizeclass != 0) { + Static::pageheap()->CacheSizeClass(p, span->sizeclass); + return Static::sizemap()->ByteSizeForClass(span->sizeclass); + } else { + return span->length << kPageShift; + } + } +} + +// This lets you call back to a given function pointer if ptr is invalid. +// It is used primarily by windows code which wants a specialized callback. +inline void* do_realloc_with_callback( + void* old_ptr, size_t new_size, + void (*invalid_free_fn)(void*), + size_t (*invalid_get_size_fn)(void*)) { + // Get the size of the old entry + const size_t old_size = GetSizeWithCallback(old_ptr, invalid_get_size_fn); + + // Reallocate if the new size is larger than the old size, + // or if the new size is significantly smaller than the old size. + // We do hysteresis to avoid resizing ping-pongs: + // . If we need to grow, grow to max(new_size, old_size * 1.X) + // . Don't shrink unless new_size < old_size * 0.Y + // X and Y trade-off time for wasted space. For now we do 1.25 and 0.5. + const int lower_bound_to_grow = old_size + old_size / 4; + const int upper_bound_to_shrink = old_size / 2; + if ((new_size > old_size) || (new_size < upper_bound_to_shrink)) { + // Need to reallocate. + void* new_ptr = NULL; + + if (new_size > old_size && new_size < lower_bound_to_grow) { + new_ptr = do_malloc(lower_bound_to_grow); + } + if (new_ptr == NULL) { + // Either new_size is not a tiny increment, or last do_malloc failed. + new_ptr = do_malloc(new_size); + } + if (new_ptr == NULL) { + return NULL; + } + MallocHook::InvokeNewHook(new_ptr, new_size); + memcpy(new_ptr, old_ptr, ((old_size < new_size) ? old_size : new_size)); + MallocHook::InvokeDeleteHook(old_ptr); + // We could use a variant of do_free() that leverages the fact + // that we already know the sizeclass of old_ptr. The benefit + // would be small, so don't bother. + do_free_with_callback(old_ptr, invalid_free_fn); + return new_ptr; + } else { + // We still need to call hooks to report the updated size: + MallocHook::InvokeDeleteHook(old_ptr); + MallocHook::InvokeNewHook(old_ptr, new_size); + return old_ptr; + } +} + +inline void* do_realloc(void* old_ptr, size_t new_size) { + return do_realloc_with_callback(old_ptr, new_size, + &InvalidFree, &InvalidGetSizeForRealloc); +} + +// For use by exported routines below that want specific alignments +// +// Note: this code can be slow, and can significantly fragment memory. +// The expectation is that memalign/posix_memalign/valloc/pvalloc will +// not be invoked very often. This requirement simplifies our +// implementation and allows us to tune for expected allocation +// patterns. +void* do_memalign(size_t align, size_t size) { + ASSERT((align & (align - 1)) == 0); + ASSERT(align > 0); + if (size + align < size) return NULL; // Overflow + + if (Static::pageheap() == NULL) ThreadCache::InitModule(); + + // Allocate at least one byte to avoid boundary conditions below + if (size == 0) size = 1; + + if (size <= kMaxSize && align < kPageSize) { + // Search through acceptable size classes looking for one with + // enough alignment. This depends on the fact that + // InitSizeClasses() currently produces several size classes that + // are aligned at powers of two. We will waste time and space if + // we miss in the size class array, but that is deemed acceptable + // since memalign() should be used rarely. + int cl = Static::sizemap()->SizeClass(size); + while (cl < kNumClasses && + ((Static::sizemap()->class_to_size(cl) & (align - 1)) != 0)) { + cl++; + } + if (cl < kNumClasses) { + ThreadCache* heap = ThreadCache::GetCache(); + return CheckedMallocResult(heap->Allocate( + Static::sizemap()->class_to_size(cl))); + } + } + + // We will allocate directly from the page heap + SpinLockHolder h(Static::pageheap_lock()); + + if (align <= kPageSize) { + // Any page-level allocation will be fine + // TODO: We could put the rest of this page in the appropriate + // TODO: cache but it does not seem worth it. + Span* span = Static::pageheap()->New(tcmalloc::pages(size)); + return span == NULL ? NULL : SpanToMallocResult(span); + } + + // Allocate extra pages and carve off an aligned portion + const Length alloc = tcmalloc::pages(size + align); + Span* span = Static::pageheap()->New(alloc); + if (span == NULL) return NULL; + + // Skip starting portion so that we end up aligned + Length skip = 0; + while ((((span->start+skip) << kPageShift) & (align - 1)) != 0) { + skip++; + } + ASSERT(skip < alloc); + if (skip > 0) { + Span* rest = Static::pageheap()->Split(span, skip); + Static::pageheap()->Delete(span); + span = rest; + } + + // Skip trailing portion that we do not need to return + const Length needed = tcmalloc::pages(size); + ASSERT(span->length >= needed); + if (span->length > needed) { + Span* trailer = Static::pageheap()->Split(span, needed); + Static::pageheap()->Delete(trailer); + } + return SpanToMallocResult(span); +} + +// Helpers for use by exported routines below: + +inline void do_malloc_stats() { + PrintStats(1); +} + +inline int do_mallopt(int cmd, int value) { + return 1; // Indicates error +} + +#ifdef HAVE_STRUCT_MALLINFO // mallinfo isn't defined on freebsd, for instance +inline struct mallinfo do_mallinfo() { + TCMallocStats stats; + ExtractStats(&stats, NULL); + + // Just some of the fields are filled in. + struct mallinfo info; + memset(&info, 0, sizeof(info)); + + // Unfortunately, the struct contains "int" field, so some of the + // size values will be truncated. + info.arena = static_cast<int>(stats.pageheap.system_bytes); + info.fsmblks = static_cast<int>(stats.thread_bytes + + stats.central_bytes + + stats.transfer_bytes); + info.fordblks = static_cast<int>(stats.pageheap.free_bytes + + stats.pageheap.unmapped_bytes); + info.uordblks = static_cast<int>(stats.pageheap.system_bytes + - stats.thread_bytes + - stats.central_bytes + - stats.transfer_bytes + - stats.pageheap.free_bytes + - stats.pageheap.unmapped_bytes); + + return info; +} +#endif // #ifndef HAVE_STRUCT_MALLINFO + +static SpinLock set_new_handler_lock(SpinLock::LINKER_INITIALIZED); + +inline void* cpp_alloc(size_t size, bool nothrow) { + for (;;) { + void* p = do_malloc(size); +#ifdef PREANSINEW + return p; +#else + if (p == NULL) { // allocation failed + // Get the current new handler. NB: this function is not + // thread-safe. We make a feeble stab at making it so here, but + // this lock only protects against tcmalloc interfering with + // itself, not with other libraries calling set_new_handler. + std::new_handler nh; + { + SpinLockHolder h(&set_new_handler_lock); + nh = std::set_new_handler(0); + (void) std::set_new_handler(nh); + } +#if (defined(__GNUC__) && !defined(__EXCEPTIONS)) || (defined(_HAS_EXCEPTIONS) && !_HAS_EXCEPTIONS) + if (nh) { + // Since exceptions are disabled, we don't really know if new_handler + // failed. Assume it will abort if it fails. + (*nh)(); + continue; + } + return 0; +#else + // If no new_handler is established, the allocation failed. + if (!nh) { + if (nothrow) return 0; + throw std::bad_alloc(); + } + // Otherwise, try the new_handler. If it returns, retry the + // allocation. If it throws std::bad_alloc, fail the allocation. + // if it throws something else, don't interfere. + try { + (*nh)(); + } catch (const std::bad_alloc&) { + if (!nothrow) throw; + return p; + } +#endif // (defined(__GNUC__) && !defined(__EXCEPTIONS)) || (defined(_HAS_EXCEPTIONS) && !_HAS_EXCEPTIONS) + } else { // allocation success + return p; + } +#endif // PREANSINEW + } +} + +} // end unnamed namespace + +// As promised, the definition of this function, declared above. +size_t TCMallocImplementation::GetAllocatedSize(void* ptr) { + return GetSizeWithCallback(ptr, &InvalidGetAllocatedSize); +} + +void TCMallocImplementation::MarkThreadBusy() { + // Allocate to force the creation of a thread cache, but avoid + // invoking any hooks. + do_free(do_malloc(0)); +} + +//------------------------------------------------------------------- +// Exported routines +//------------------------------------------------------------------- + +extern "C" PERFTOOLS_DLL_DECL const char* tc_version( + int* major, int* minor, const char** patch) __THROW { + if (major) *major = TC_VERSION_MAJOR; + if (minor) *minor = TC_VERSION_MINOR; + if (patch) *patch = TC_VERSION_PATCH; + return TC_VERSION_STRING; +} + +// CAVEAT: The code structure below ensures that MallocHook methods are always +// called from the stack frame of the invoked allocation function. +// heap-checker.cc depends on this to start a stack trace from +// the call to the (de)allocation function. + +static int tc_new_mode = 0; // See tc_set_new_mode(). +extern "C" PERFTOOLS_DLL_DECL void* tc_malloc(size_t size) __THROW { + void* result = (tc_new_mode ? cpp_alloc(size, false) : do_malloc(size)); + MallocHook::InvokeNewHook(result, size); + return result; +} + +extern "C" PERFTOOLS_DLL_DECL void tc_free(void* ptr) __THROW { + MallocHook::InvokeDeleteHook(ptr); + do_free(ptr); +} + +extern "C" PERFTOOLS_DLL_DECL void* tc_calloc(size_t n, + size_t elem_size) __THROW { + void* result = do_calloc(n, elem_size); + MallocHook::InvokeNewHook(result, n * elem_size); + return result; +} + +extern "C" PERFTOOLS_DLL_DECL void tc_cfree(void* ptr) __THROW { + MallocHook::InvokeDeleteHook(ptr); + do_free(ptr); +} + +extern "C" PERFTOOLS_DLL_DECL void* tc_realloc(void* old_ptr, + size_t new_size) __THROW { + if (old_ptr == NULL) { + void* result = do_malloc(new_size); + MallocHook::InvokeNewHook(result, new_size); + return result; + } + if (new_size == 0) { + MallocHook::InvokeDeleteHook(old_ptr); + do_free(old_ptr); + return NULL; + } + return do_realloc(old_ptr, new_size); +} + +extern "C" PERFTOOLS_DLL_DECL void* tc_new(size_t size) { + void* p = cpp_alloc(size, false); + // We keep this next instruction out of cpp_alloc for a reason: when + // it's in, and new just calls cpp_alloc, the optimizer may fold the + // new call into cpp_alloc, which messes up our whole section-based + // stacktracing (see ATTRIBUTE_SECTION, above). This ensures cpp_alloc + // isn't the last thing this fn calls, and prevents the folding. + MallocHook::InvokeNewHook(p, size); + return p; +} + +extern "C" PERFTOOLS_DLL_DECL void* tc_new_nothrow( + size_t size, const std::nothrow_t&) __THROW { + void* p = cpp_alloc(size, true); + MallocHook::InvokeNewHook(p, size); + return p; +} + +extern "C" PERFTOOLS_DLL_DECL void tc_delete(void* p) __THROW { + MallocHook::InvokeDeleteHook(p); + do_free(p); +} + +// Compilers define and use this (via ::operator delete(ptr, nothrow)). +// But it's really the same as normal delete, so we just do the same thing. +extern "C" PERFTOOLS_DLL_DECL void tc_delete_nothrow( + void* p, const std::nothrow_t&) __THROW { + MallocHook::InvokeDeleteHook(p); + do_free(p); +} + +extern "C" PERFTOOLS_DLL_DECL void* tc_newarray(size_t size) { + void* p = cpp_alloc(size, false); + // We keep this next instruction out of cpp_alloc for a reason: when + // it's in, and new just calls cpp_alloc, the optimizer may fold the + // new call into cpp_alloc, which messes up our whole section-based + // stacktracing (see ATTRIBUTE_SECTION, above). This ensures cpp_alloc + // isn't the last thing this fn calls, and prevents the folding. + MallocHook::InvokeNewHook(p, size); + return p; +} + +extern "C" PERFTOOLS_DLL_DECL void* tc_newarray_nothrow( + size_t size, const std::nothrow_t&) __THROW { + void* p = cpp_alloc(size, true); + MallocHook::InvokeNewHook(p, size); + return p; +} + +extern "C" PERFTOOLS_DLL_DECL void tc_deletearray(void* p) __THROW { + MallocHook::InvokeDeleteHook(p); + do_free(p); +} + +extern "C" PERFTOOLS_DLL_DECL void tc_deletearray_nothrow( + void* p, const std::nothrow_t&) __THROW { + MallocHook::InvokeDeleteHook(p); + do_free(p); +} + +extern "C" PERFTOOLS_DLL_DECL void* tc_memalign(size_t align, + size_t size) __THROW { + void* result = do_memalign(align, size); + MallocHook::InvokeNewHook(result, size); + return result; +} + +extern "C" PERFTOOLS_DLL_DECL int tc_posix_memalign( + void** result_ptr, size_t align, size_t size) __THROW { + if (((align % sizeof(void*)) != 0) || + ((align & (align - 1)) != 0) || + (align == 0)) { + return EINVAL; + } + + void* result = do_memalign(align, size); + MallocHook::InvokeNewHook(result, size); + if (result == NULL) { + return ENOMEM; + } else { + *result_ptr = result; + return 0; + } +} + +static size_t pagesize = 0; + +extern "C" PERFTOOLS_DLL_DECL void* tc_valloc(size_t size) __THROW { + // Allocate page-aligned object of length >= size bytes + if (pagesize == 0) pagesize = getpagesize(); + void* result = do_memalign(pagesize, size); + MallocHook::InvokeNewHook(result, size); + return result; +} + +extern "C" PERFTOOLS_DLL_DECL void* tc_pvalloc(size_t size) __THROW { + // Round up size to a multiple of pagesize + if (pagesize == 0) pagesize = getpagesize(); + if (size == 0) { // pvalloc(0) should allocate one page, according to + size = pagesize; // http://man.free4web.biz/man3/libmpatrol.3.html + } + size = (size + pagesize - 1) & ~(pagesize - 1); + void* result = do_memalign(pagesize, size); + MallocHook::InvokeNewHook(result, size); + return result; +} + +extern "C" PERFTOOLS_DLL_DECL void tc_malloc_stats(void) __THROW { + do_malloc_stats(); +} + +extern "C" PERFTOOLS_DLL_DECL int tc_mallopt(int cmd, int value) __THROW { + return do_mallopt(cmd, value); +} + +#ifdef HAVE_STRUCT_MALLINFO +extern "C" PERFTOOLS_DLL_DECL struct mallinfo tc_mallinfo(void) __THROW { + return do_mallinfo(); +} +#endif + +// This function behaves similarly to MSVC's _set_new_mode. +// If flag is 0 (default), calls to malloc will behave normally. +// If flag is 1, calls to malloc will behave like calls to new, +// and the std_new_handler will be invoked on failure. +// Returns the previous mode. +extern "C" PERFTOOLS_DLL_DECL int tc_set_new_mode(int flag) __THROW { + int old_mode = tc_new_mode; + tc_new_mode = flag; + return old_mode; +} + + +// Override __libc_memalign in libc on linux boxes specially. +// They have a bug in libc that causes them to (very rarely) allocate +// with __libc_memalign() yet deallocate with free() and the +// definitions above don't catch it. +// This function is an exception to the rule of calling MallocHook method +// from the stack frame of the allocation function; +// heap-checker handles this special case explicitly. +#ifndef TCMALLOC_FOR_DEBUGALLOCATION +static void *MemalignOverride(size_t align, size_t size, const void *caller) + __THROW ATTRIBUTE_SECTION(google_malloc); + +static void *MemalignOverride(size_t align, size_t size, const void *caller) + __THROW { + void* result = do_memalign(align, size); + MallocHook::InvokeNewHook(result, size); + return result; +} +void *(*__memalign_hook)(size_t, size_t, const void *) = MemalignOverride; +#endif // #ifndef TCMALLOC_FOR_DEBUGALLOCATION diff --git a/third_party/tcmalloc/chromium/src/tcmalloc.h b/third_party/tcmalloc/chromium/src/tcmalloc.h new file mode 100644 index 0000000..3b0fe7c --- /dev/null +++ b/third_party/tcmalloc/chromium/src/tcmalloc.h @@ -0,0 +1,69 @@ +// Copyright (c) 2007, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Craig Silverstein <opensource@google.com> +// +// Some obscure memory-allocation routines may not be declared on all +// systems. In those cases, we'll just declare them ourselves. +// This file is meant to be used only internally, for unittests. + +#include <config.h> + +#ifndef _XOPEN_SOURCE +# define _XOPEN_SOURCE 600 // for posix_memalign +#endif +#include <stdlib.h> // for posix_memalign +// FreeBSD has malloc.h, but complains if you use it +#if defined(HAVE_MALLOC_H) && !defined(__FreeBSD__) +#include <malloc.h> // for memalign, valloc, pvalloc +#endif + +// __THROW is defined in glibc systems. It means, counter-intuitively, +// "This function will never throw an exception." It's an optional +// optimization tool, but we may need to use it to match glibc prototypes. +#ifndef __THROW // I guess we're not on a glibc system +# define __THROW // __THROW is just an optimization, so ok to make it "" +#endif + +#if !HAVE_CFREE_SYMBOL +extern "C" void cfree(void* ptr) __THROW; +#endif +#if !HAVE_POSIX_MEMALIGN_SYMBOL +extern "C" int posix_memalign(void** ptr, size_t align, size_t size) __THROW; +#endif +#if !HAVE_MEMALIGN_SYMBOL +extern "C" void* memalign(size_t __alignment, size_t __size) __THROW; +#endif +#if !HAVE_VALLOC_SYMBOL +extern "C" void* valloc(size_t __size) __THROW; +#endif +#if !HAVE_PVALLOC_SYMBOL +extern "C" void* pvalloc(size_t __size) __THROW; +#endif diff --git a/third_party/tcmalloc/chromium/src/tcmalloc_guard.h b/third_party/tcmalloc/chromium/src/tcmalloc_guard.h new file mode 100644 index 0000000..7874dad --- /dev/null +++ b/third_party/tcmalloc/chromium/src/tcmalloc_guard.h @@ -0,0 +1,48 @@ +// Copyright (c) 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Craig Silverstein +// +// We expose the TCMallocGuard class -- which initializes the tcmalloc +// allocator -- so classes that need to be sure tcmalloc is loaded +// before they do stuff -- notably heap-profiler -- can. To use this +// create a static TCMallocGuard instance at the top of a file where +// you need tcmalloc to be initialized before global constructors run. + +#ifndef TCMALLOC_TCMALLOC_GUARD_H_ +#define TCMALLOC_TCMALLOC_GUARD_H_ + +class TCMallocGuard { + public: + TCMallocGuard(); + ~TCMallocGuard(); +}; + +#endif // TCMALLOC_TCMALLOC_GUARD_H_ diff --git a/third_party/tcmalloc/chromium/src/tests/addressmap_unittest.cc b/third_party/tcmalloc/chromium/src/tests/addressmap_unittest.cc new file mode 100644 index 0000000..bfbb9a8 --- /dev/null +++ b/third_party/tcmalloc/chromium/src/tests/addressmap_unittest.cc @@ -0,0 +1,170 @@ +// Copyright (c) 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Sanjay Ghemawat + +#include <stdlib.h> // for rand() +#include <vector> +#include <set> +#include <algorithm> +#include <utility> +#include "addressmap-inl.h" +#include "base/logging.h" +#include "base/commandlineflags.h" + +DEFINE_int32(iters, 20, "Number of test iterations"); +DEFINE_int32(N, 100000, "Number of elements to test per iteration"); + +using std::pair; +using std::make_pair; +using std::vector; +using std::set; +using std::random_shuffle; + +struct UniformRandomNumberGenerator { + size_t Uniform(size_t max_size) { + if (max_size == 0) + return 0; + return rand() % max_size; // not a great random-number fn, but portable + } +}; +static UniformRandomNumberGenerator rnd; + + +// pair of associated value and object size +typedef pair<int, size_t> ValueT; + +struct PtrAndSize { + char* ptr; + size_t size; + PtrAndSize(char* p, size_t s) : ptr(p), size(s) {} +}; + +size_t SizeFunc(const ValueT& v) { return v.second; } + +static void SetCheckCallback(const void* ptr, ValueT* val, + set<pair<const void*, int> >* check_set) { + check_set->insert(make_pair(ptr, val->first)); +} + +int main(int argc, char** argv) { + // Get a bunch of pointers + const int N = FLAGS_N; + static const int kMaxRealSize = 49; + // 100Mb to stress not finding previous object (AddressMap's cluster is 1Mb): + static const size_t kMaxSize = 100*1000*1000; + vector<PtrAndSize> ptrs_and_sizes; + for (int i = 0; i < N; ++i) { + size_t s = rnd.Uniform(kMaxRealSize); + ptrs_and_sizes.push_back(PtrAndSize(new char[s], s)); + } + + for (int x = 0; x < FLAGS_iters; ++x) { + RAW_LOG(INFO, "Iteration %d/%d...\n", x, FLAGS_iters); + + // Permute pointers to get rid of allocation order issues + random_shuffle(ptrs_and_sizes.begin(), ptrs_and_sizes.end()); + + AddressMap<ValueT> map(malloc, free); + const ValueT* result; + const void* res_p; + + // Insert a bunch of entries + for (int i = 0; i < N; ++i) { + char* p = ptrs_and_sizes[i].ptr; + CHECK(!map.Find(p)); + int offs = rnd.Uniform(ptrs_and_sizes[i].size); + CHECK(!map.FindInside(&SizeFunc, kMaxSize, p + offs, &res_p)); + map.Insert(p, make_pair(i, ptrs_and_sizes[i].size)); + CHECK(result = map.Find(p)); + CHECK_EQ(result->first, i); + CHECK(result = map.FindInside(&SizeFunc, kMaxRealSize, p + offs, &res_p)); + CHECK_EQ(res_p, p); + CHECK_EQ(result->first, i); + map.Insert(p, make_pair(i + N, ptrs_and_sizes[i].size)); + CHECK(result = map.Find(p)); + CHECK_EQ(result->first, i + N); + } + + // Delete the even entries + for (int i = 0; i < N; i += 2) { + void* p = ptrs_and_sizes[i].ptr; + ValueT removed; + CHECK(map.FindAndRemove(p, &removed)); + CHECK_EQ(removed.first, i + N); + } + + // Lookup the odd entries and adjust them + for (int i = 1; i < N; i += 2) { + char* p = ptrs_and_sizes[i].ptr; + CHECK(result = map.Find(p)); + CHECK_EQ(result->first, i + N); + int offs = rnd.Uniform(ptrs_and_sizes[i].size); + CHECK(result = map.FindInside(&SizeFunc, kMaxRealSize, p + offs, &res_p)); + CHECK_EQ(res_p, p); + CHECK_EQ(result->first, i + N); + map.Insert(p, make_pair(i + 2*N, ptrs_and_sizes[i].size)); + CHECK(result = map.Find(p)); + CHECK_EQ(result->first, i + 2*N); + } + + // Insert even entries back + for (int i = 0; i < N; i += 2) { + char* p = ptrs_and_sizes[i].ptr; + int offs = rnd.Uniform(ptrs_and_sizes[i].size); + CHECK(!map.FindInside(&SizeFunc, kMaxSize, p + offs, &res_p)); + map.Insert(p, make_pair(i + 2*N, ptrs_and_sizes[i].size)); + CHECK(result = map.Find(p)); + CHECK_EQ(result->first, i + 2*N); + CHECK(result = map.FindInside(&SizeFunc, kMaxRealSize, p + offs, &res_p)); + CHECK_EQ(res_p, p); + CHECK_EQ(result->first, i + 2*N); + } + + // Check all entries + set<pair<const void*, int> > check_set; + map.Iterate(SetCheckCallback, &check_set); + CHECK_EQ(check_set.size(), N); + for (int i = 0; i < N; ++i) { + void* p = ptrs_and_sizes[i].ptr; + check_set.erase(make_pair(p, i + 2*N)); + CHECK(result = map.Find(p)); + CHECK_EQ(result->first, i + 2*N); + } + CHECK_EQ(check_set.size(), 0); + } + + for (int i = 0; i < N; ++i) { + delete[] ptrs_and_sizes[i].ptr; + } + + printf("PASS\n"); + return 0; +} diff --git a/third_party/tcmalloc/chromium/src/tests/atomicops_unittest.cc b/third_party/tcmalloc/chromium/src/tests/atomicops_unittest.cc new file mode 100644 index 0000000..5a620f5 --- /dev/null +++ b/third_party/tcmalloc/chromium/src/tests/atomicops_unittest.cc @@ -0,0 +1,257 @@ +/* Copyright (c) 2006, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * Author: Sanjay Ghemawat + */ + +#include <stdio.h> +#include "base/logging.h" +#include "base/atomicops.h" + +#define GG_ULONGLONG(x) static_cast<uint64>(x) + +template <class AtomicType> +static void TestAtomicIncrement() { + // For now, we just test single threaded execution + + // use a guard value to make sure the NoBarrier_AtomicIncrement doesn't go + // outside the expected address bounds. This is in particular to + // test that some future change to the asm code doesn't cause the + // 32-bit NoBarrier_AtomicIncrement doesn't do the wrong thing on 64-bit + // machines. + struct { + AtomicType prev_word; + AtomicType count; + AtomicType next_word; + } s; + + AtomicType prev_word_value, next_word_value; + memset(&prev_word_value, 0xFF, sizeof(AtomicType)); + memset(&next_word_value, 0xEE, sizeof(AtomicType)); + + s.prev_word = prev_word_value; + s.count = 0; + s.next_word = next_word_value; + + CHECK_EQ(base::subtle::NoBarrier_AtomicIncrement(&s.count, 1), 1); + CHECK_EQ(s.count, 1); + CHECK_EQ(s.prev_word, prev_word_value); + CHECK_EQ(s.next_word, next_word_value); + + CHECK_EQ(base::subtle::NoBarrier_AtomicIncrement(&s.count, 2), 3); + CHECK_EQ(s.count, 3); + CHECK_EQ(s.prev_word, prev_word_value); + CHECK_EQ(s.next_word, next_word_value); + + CHECK_EQ(base::subtle::NoBarrier_AtomicIncrement(&s.count, 3), 6); + CHECK_EQ(s.count, 6); + CHECK_EQ(s.prev_word, prev_word_value); + CHECK_EQ(s.next_word, next_word_value); + + CHECK_EQ(base::subtle::NoBarrier_AtomicIncrement(&s.count, -3), 3); + CHECK_EQ(s.count, 3); + CHECK_EQ(s.prev_word, prev_word_value); + CHECK_EQ(s.next_word, next_word_value); + + CHECK_EQ(base::subtle::NoBarrier_AtomicIncrement(&s.count, -2), 1); + CHECK_EQ(s.count, 1); + CHECK_EQ(s.prev_word, prev_word_value); + CHECK_EQ(s.next_word, next_word_value); + + CHECK_EQ(base::subtle::NoBarrier_AtomicIncrement(&s.count, -1), 0); + CHECK_EQ(s.count, 0); + CHECK_EQ(s.prev_word, prev_word_value); + CHECK_EQ(s.next_word, next_word_value); + + CHECK_EQ(base::subtle::NoBarrier_AtomicIncrement(&s.count, -1), -1); + CHECK_EQ(s.count, -1); + CHECK_EQ(s.prev_word, prev_word_value); + CHECK_EQ(s.next_word, next_word_value); + + CHECK_EQ(base::subtle::NoBarrier_AtomicIncrement(&s.count, -4), -5); + CHECK_EQ(s.count, -5); + CHECK_EQ(s.prev_word, prev_word_value); + CHECK_EQ(s.next_word, next_word_value); + + CHECK_EQ(base::subtle::NoBarrier_AtomicIncrement(&s.count, 5), 0); + CHECK_EQ(s.count, 0); + CHECK_EQ(s.prev_word, prev_word_value); + CHECK_EQ(s.next_word, next_word_value); +} + + +#define NUM_BITS(T) (sizeof(T) * 8) + + +template <class AtomicType> +static void TestCompareAndSwap() { + AtomicType value = 0; + AtomicType prev = base::subtle::NoBarrier_CompareAndSwap(&value, 0, 1); + CHECK_EQ(1, value); + CHECK_EQ(0, prev); + + // Use test value that has non-zero bits in both halves, more for testing + // 64-bit implementation on 32-bit platforms. + const AtomicType k_test_val = (GG_ULONGLONG(1) << + (NUM_BITS(AtomicType) - 2)) + 11; + value = k_test_val; + prev = base::subtle::NoBarrier_CompareAndSwap(&value, 0, 5); + CHECK_EQ(k_test_val, value); + CHECK_EQ(k_test_val, prev); + + value = k_test_val; + prev = base::subtle::NoBarrier_CompareAndSwap(&value, k_test_val, 5); + CHECK_EQ(5, value); + CHECK_EQ(k_test_val, prev); +} + + +template <class AtomicType> +static void TestAtomicExchange() { + AtomicType value = 0; + AtomicType new_value = base::subtle::NoBarrier_AtomicExchange(&value, 1); + CHECK_EQ(1, value); + CHECK_EQ(0, new_value); + + // Use test value that has non-zero bits in both halves, more for testing + // 64-bit implementation on 32-bit platforms. + const AtomicType k_test_val = (GG_ULONGLONG(1) << + (NUM_BITS(AtomicType) - 2)) + 11; + value = k_test_val; + new_value = base::subtle::NoBarrier_AtomicExchange(&value, k_test_val); + CHECK_EQ(k_test_val, value); + CHECK_EQ(k_test_val, new_value); + + value = k_test_val; + new_value = base::subtle::NoBarrier_AtomicExchange(&value, 5); + CHECK_EQ(5, value); + CHECK_EQ(k_test_val, new_value); +} + + +template <class AtomicType> +static void TestAtomicIncrementBounds() { + // Test increment at the half-width boundary of the atomic type. + // It is primarily for testing at the 32-bit boundary for 64-bit atomic type. + AtomicType test_val = GG_ULONGLONG(1) << (NUM_BITS(AtomicType) / 2); + AtomicType value = test_val - 1; + AtomicType new_value = base::subtle::NoBarrier_AtomicIncrement(&value, 1); + CHECK_EQ(test_val, value); + CHECK_EQ(value, new_value); + + base::subtle::NoBarrier_AtomicIncrement(&value, -1); + CHECK_EQ(test_val - 1, value); +} + +// This is a simple sanity check that values are correct. Not testing +// atomicity +template <class AtomicType> +static void TestStore() { + const AtomicType kVal1 = static_cast<AtomicType>(0xa5a5a5a5a5a5a5a5LL); + const AtomicType kVal2 = static_cast<AtomicType>(-1); + + AtomicType value; + + base::subtle::NoBarrier_Store(&value, kVal1); + CHECK_EQ(kVal1, value); + base::subtle::NoBarrier_Store(&value, kVal2); + CHECK_EQ(kVal2, value); + + base::subtle::Acquire_Store(&value, kVal1); + CHECK_EQ(kVal1, value); + base::subtle::Acquire_Store(&value, kVal2); + CHECK_EQ(kVal2, value); + + base::subtle::Release_Store(&value, kVal1); + CHECK_EQ(kVal1, value); + base::subtle::Release_Store(&value, kVal2); + CHECK_EQ(kVal2, value); +} + +// This is a simple sanity check that values are correct. Not testing +// atomicity +template <class AtomicType> +static void TestLoad() { + const AtomicType kVal1 = static_cast<AtomicType>(0xa5a5a5a5a5a5a5a5LL); + const AtomicType kVal2 = static_cast<AtomicType>(-1); + + AtomicType value; + + value = kVal1; + CHECK_EQ(kVal1, base::subtle::NoBarrier_Load(&value)); + value = kVal2; + CHECK_EQ(kVal2, base::subtle::NoBarrier_Load(&value)); + + value = kVal1; + CHECK_EQ(kVal1, base::subtle::Acquire_Load(&value)); + value = kVal2; + CHECK_EQ(kVal2, base::subtle::Acquire_Load(&value)); + + value = kVal1; + CHECK_EQ(kVal1, base::subtle::Release_Load(&value)); + value = kVal2; + CHECK_EQ(kVal2, base::subtle::Release_Load(&value)); +} + +template <class AtomicType> +static void TestAtomicOps() { + TestCompareAndSwap<AtomicType>(); + TestAtomicExchange<AtomicType>(); + TestAtomicIncrementBounds<AtomicType>(); + TestStore<AtomicType>(); + TestLoad<AtomicType>(); +} + +int main(int argc, char** argv) { + TestAtomicIncrement<AtomicWord>(); + TestAtomicIncrement<Atomic32>(); + + TestAtomicOps<AtomicWord>(); + TestAtomicOps<Atomic32>(); + + // I've commented the Atomic64 tests out for now, because Atomic64 + // doesn't work on x86 systems that are not compiled to support mmx + // registers. Since I want this project to be as portable as + // possible -- that is, not to assume we've compiled for mmx or even + // that the processor supports it -- and we don't actually use + // Atomic64 anywhere, I've commented it out of the test for now. + // (Luckily, if we ever do use Atomic64 by accident, we'll get told + // via a compiler error rather than some obscure runtime failure, so + // this course of action is safe.) + // If we ever *do* want to enable this, try adding -msse (or -mmmx?) + // to the CXXFLAGS in Makefile.am. +#if 0 and defined(BASE_HAS_ATOMIC64) + TestAtomicIncrement<base::subtle::Atomic64>(); + TestAtomicOps<base::subtle::Atomic64>(); +#endif + + printf("PASS\n"); + return 0; +} diff --git a/third_party/tcmalloc/chromium/src/tests/debugallocation_test.cc b/third_party/tcmalloc/chromium/src/tests/debugallocation_test.cc new file mode 100644 index 0000000..4274b7e --- /dev/null +++ b/third_party/tcmalloc/chromium/src/tests/debugallocation_test.cc @@ -0,0 +1,223 @@ +// Copyright (c) 2007, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Fred Akalin + +#include <stdio.h> +#include <stdlib.h> +#include <vector> +#include "google/malloc_extension.h" +#include "base/logging.h" + +using std::vector; + +vector<void (*)()> g_testlist; // the tests to run + +#define TEST(a, b) \ + struct Test_##a##_##b { \ + Test_##a##_##b() { g_testlist.push_back(&Run); } \ + static void Run(); \ + }; \ + static Test_##a##_##b g_test_##a##_##b; \ + void Test_##a##_##b::Run() + + +static int RUN_ALL_TESTS() { + vector<void (*)()>::const_iterator it; + for (it = g_testlist.begin(); it != g_testlist.end(); ++it) { + (*it)(); // The test will error-exit if there's a problem. + } + fprintf(stderr, "\nPassed %d tests\n\nPASS\n", + static_cast<int>(g_testlist.size())); + return 0; +} + +// The death tests are meant to be run from a shell-script driver, which +// passes in an integer saying which death test to run. We store that +// test-to-run here, and in the macro use a counter to see when we get +// to that test, so we can run it. +static int test_to_run = 0; // set in main() based on argv +static int test_counter = 0; // incremented every time the macro is called +#define IF_DEBUG_EXPECT_DEATH(statement, regex) do { \ + if (test_counter++ == test_to_run) { \ + fprintf(stderr, "Expected regex:%s\n", regex); \ + statement; \ + } \ +} while (false) + +// This flag won't be compiled in in opt mode. +DECLARE_int32(max_free_queue_size); + +TEST(DebugAllocationTest, DeallocMismatch) { + // Allocate with malloc. + { + int* x = static_cast<int*>(malloc(sizeof(*x))); + IF_DEBUG_EXPECT_DEATH(delete x, "mismatch.*being dealloc.*delete"); + IF_DEBUG_EXPECT_DEATH(delete [] x, "mismatch.*being dealloc.*delete *[[]"); + // Should work fine. + free(x); + } + + // Allocate with new. + { + int* x = new int; + IF_DEBUG_EXPECT_DEATH(free(x), "mismatch.*being dealloc.*free"); + IF_DEBUG_EXPECT_DEATH(delete [] x, "mismatch.*being dealloc.*delete *[[]"); + delete x; + } + + // Allocate with new[]. + { + int* x = new int[1]; + IF_DEBUG_EXPECT_DEATH(free(x), "mismatch.*being dealloc.*free"); + IF_DEBUG_EXPECT_DEATH(delete x, "mismatch.*being dealloc.*delete"); + delete [] x; + } +} + +TEST(DebugAllocationTest, FreeQueueTest) { + // Verify that the allocator doesn't return blocks that were recently freed. + int* x = new int; + int* old_x = x; + delete x; + x = new int; + #if 1 + // This check should not be read as a universal guarantee of behavior. If + // other threads are executing, it would be theoretically possible for this + // check to fail despite the efforts of debugallocation.cc to the contrary. + // It should always hold under the controlled conditions of this unittest, + // however. + EXPECT_NE(x, old_x); // Allocator shouldn't return recently freed blocks + #else + // The below check passes, but since it isn't *required* to pass, I've left + // it commented out. + // EXPECT_EQ(x, old_x); + #endif + old_x = NULL; // avoid breaking opt build with an unused variable warning. + delete x; +} + +TEST(DebugAllocationTest, DanglingPointerWriteTest) { + // This test can only be run if debugging. + // + // If not debugging, the 'new' following the dangling write might not be + // safe. When debugging, we expect the (trashed) deleted block to be on the + // list of recently-freed blocks, so the following 'new' will be safe. +#if 1 + int* x = new int; + delete x; + int poisoned_x_value = *x; + *x = 1; // a dangling write. + + char* s = new char[FLAGS_max_free_queue_size]; + // When we delete s, we push the storage that was previously allocated to x + // off the end of the free queue. At that point, the write to that memory + // will be detected. + IF_DEBUG_EXPECT_DEATH(delete [] s, "Memory was written to after being freed."); + + // restore the poisoned value of x so that we can delete s without causing a + // crash. + *x = poisoned_x_value; + delete [] s; +#endif +} + +TEST(DebugAllocationTest, DanglingWriteAtExitTest) { + int *x = new int; + delete x; + int old_x_value = *x; + *x = 1; + // verify that dangling writes are caught at program termination if the + // corrupted block never got pushed off of the end of the free queue. + IF_DEBUG_EXPECT_DEATH(exit(0), "Memory was written to after being freed."); + *x = old_x_value; // restore x so that the test can exit successfully. +} + +static size_t CurrentlyAllocatedBytes() { + size_t value; + CHECK(MallocExtension::instance()->GetNumericProperty( + "generic.current_allocated_bytes", &value)); + return value; +} + +TEST(DebugAllocationTest, CurrentlyAllocated) { + // Clear the free queue +#if 1 + FLAGS_max_free_queue_size = 0; + // Force a round-trip through the queue management code so that the + // new size is seen and the queue of recently-freed blocks is flushed. + free(malloc(1)); + FLAGS_max_free_queue_size = 1048576; +#endif + + // Free something and check that it disappears from allocated bytes + // immediately. + char* p = new char[1000]; + size_t after_malloc = CurrentlyAllocatedBytes(); + delete[] p; + size_t after_free = CurrentlyAllocatedBytes(); + EXPECT_LE(after_free, after_malloc - 1000); +} + +TEST(DebugAllocationTest, GetAllocatedSizeTest) { +#if 1 + // When debug_allocation is in effect, GetAllocatedSize should return + // exactly requested size, since debug_allocation doesn't allow users + // to write more than that. + for (int i = 0; i < 10; ++i) { + void *p = malloc(i); + EXPECT_EQ(i, MallocExtension::instance()->GetAllocatedSize(p)); + free(p); + } +#endif + void* a = malloc(1000); + EXPECT_GE(MallocExtension::instance()->GetAllocatedSize(a), 1000); + // This is just a sanity check. If we allocated too much, alloc is broken + EXPECT_LE(MallocExtension::instance()->GetAllocatedSize(a), 5000); + EXPECT_GE(MallocExtension::instance()->GetEstimatedAllocatedSize(1000), 1000); + free(a); +} + +int main(int argc, char** argv) { + // If you run without args, we run the non-death parts of the test. + // Otherwise, argv[1] should be a number saying which death-test + // to run. We will output a regexp we expect the death-message + // to include, and then run the given death test (which hopefully + // will produce that error message). If argv[1] > the number of + // death tests, we will run only the non-death parts. One way to + // tell when you are done with all tests is when no 'expected + // regexp' message is printed for a given argv[1]. + if (argc < 2) { + test_to_run = -1; // will never match + } else { + test_to_run = atoi(argv[1]); + } + return RUN_ALL_TESTS(); +} diff --git a/third_party/tcmalloc/chromium/src/tests/debugallocation_test.sh b/third_party/tcmalloc/chromium/src/tests/debugallocation_test.sh new file mode 100644 index 0000000..2568d54 --- /dev/null +++ b/third_party/tcmalloc/chromium/src/tests/debugallocation_test.sh @@ -0,0 +1,78 @@ +#!/bin/sh + +# Copyright (c) 2009, Google Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# --- +# Author: Craig Silverstein + +BINDIR="${BINDIR:-.}" + +if [ "x$1" = "x-h" -o "x$1" = "x--help" ]; then + echo "USAGE: $0 [unittest dir]" + echo " By default, unittest_dir=$BINDIR" + exit 1 +fi + +DEBUGALLOCATION_TEST="${1:-$BINDIR/debugallocation_test}" + +num_failures=0 + +# Run the i-th death test and make sure the test has the expected +# regexp. We can depend on the first line of the output being +# Expected regex:<regex> +# Evaluates to "done" if we are not actually a death-test (so $1 is +# too big a number, and we can stop). Evaluates to "" otherwise. +# Increments num_failures if the death test does not succeed. +OneDeathTest() { + "$DEBUGALLOCATION_TEST" "$1" 2>&1 | { + read regex_line + regex=`expr "$regex_line" : "Expected regex:\(.*\)"` + test -z "$regex" && echo "done" # no regex line, not a death-case + grep "$regex" >/dev/null 2>&1 # pass the rest of the lines through grep + } || num_failures=`expr $num_failures + 1` +} + +death_test_num=0 # which death test to run +while test -z `OneDeathTest "$death_test_num"`; do + echo "Done with death test $death_test_num" + death_test_num=`expr $death_test_num + 1` +done + +# Test the non-death parts of the test too +if ! "$DEBUGALLOCATION_TEST"; then + num_failures=`expr $num_failures + 1` +fi + +if [ "$num_failures" = 0 ]; then + echo "PASS" +else + echo "Failed with $num_failures failures" +fi +exit $num_failures diff --git a/third_party/tcmalloc/chromium/src/tests/frag_unittest.cc b/third_party/tcmalloc/chromium/src/tests/frag_unittest.cc new file mode 100644 index 0000000..08494b4 --- /dev/null +++ b/third_party/tcmalloc/chromium/src/tests/frag_unittest.cc @@ -0,0 +1,121 @@ +// Copyright (c) 2003, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Sanjay Ghemawat +// +// Test speed of handling fragmented heap + +#include "config_for_unittests.h" +#include <stdlib.h> +#include <stdio.h> +#ifdef HAVE_SYS_RESOURCE_H +#include <sys/time.h> // for struct timeval +#include <sys/resource.h> // for getrusage +#endif +#ifdef _WIN32 +#include <windows.h> // for GetTickCount() +#endif +#include <vector> +#include "base/logging.h" +#include <google/malloc_extension.h> + +using std::vector; + +int main(int argc, char** argv) { + static const int kAllocSize = 36<<10; // Bigger than tcmalloc page size + static const int kTotalAlloc = 400 << 20; // Allocate 400MB in total + static const int kAllocIterations = kTotalAlloc / kAllocSize; + + // Allocate lots of objects + vector<char*> saved(kAllocIterations); + for (int i = 0; i < kAllocIterations; i++) { + saved[i] = new char[kAllocSize]; + } + + // Free alternating ones to fragment heap + size_t free_bytes = 0; + for (int i = 0; i < saved.size(); i += 2) { + delete[] saved[i]; + free_bytes += kAllocSize; + } + + // Check that slack is within 10% of expected + size_t slack; + MallocExtension::instance()->GetNumericProperty("tcmalloc.slack_bytes", + &slack); + CHECK_GT(double(slack), 0.9*free_bytes); + CHECK_LT(double(slack), 1.1*free_bytes); + + // Dump malloc stats + static const int kBufSize = 1<<20; + char* buffer = new char[kBufSize]; + MallocExtension::instance()->GetStats(buffer, kBufSize); + VLOG(1, "%s", buffer); + delete[] buffer; + + // Now do timing tests + for (int i = 0; i < 5; i++) { + static const int kIterations = 100000; +#ifdef HAVE_SYS_RESOURCE_H + struct rusage r; + getrusage(RUSAGE_SELF, &r); // figure out user-time spent on this + struct timeval tv_start = r.ru_utime; +#elif defined(_WIN32) + long long int tv_start = GetTickCount(); +#else +# error No way to calculate time on your system +#endif + + for (int i = 0; i < kIterations; i++) { + size_t s; + MallocExtension::instance()->GetNumericProperty("tcmalloc.slack_bytes", + &s); + } + +#ifdef HAVE_SYS_RESOURCE_H + getrusage(RUSAGE_SELF, &r); + struct timeval tv_end = r.ru_utime; + int64 sumsec = static_cast<int64>(tv_end.tv_sec) - tv_start.tv_sec; + int64 sumusec = static_cast<int64>(tv_end.tv_usec) - tv_start.tv_usec; +#elif defined(_WIN32) + long long int tv_end = GetTickCount(); + int64 sumsec = (tv_end - tv_start) / 1000; + // Resolution in windows is only to the millisecond, alas + int64 sumusec = ((tv_end - tv_start) % 1000) * 1000; +#else +# error No way to calculate time on your system +#endif + fprintf(stderr, "getproperty: %6.1f ns/call\n", + (sumsec * 1e9 + sumusec * 1e3) / kIterations); + } + + printf("PASS\n"); + return 0; +} diff --git a/third_party/tcmalloc/chromium/src/tests/getpc_test.cc b/third_party/tcmalloc/chromium/src/tests/getpc_test.cc new file mode 100644 index 0000000..f1497d5 --- /dev/null +++ b/third_party/tcmalloc/chromium/src/tests/getpc_test.cc @@ -0,0 +1,120 @@ +// Copyright (c) 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Craig Silverstein +// +// This verifies that GetPC works correctly. This test uses a minimum +// of Google infrastructure, to make it very easy to port to various +// O/Ses and CPUs and test that GetPC is working. + +#include "config.h" +#include "getpc.h" // should be first to get the _GNU_SOURCE dfn +#include <stdio.h> +#include <stdlib.h> +#include <signal.h> +#include <sys/time.h> // for setitimer + +// Needs to be volatile so compiler doesn't try to optimize it away +static volatile void* getpc_retval = NULL; // what GetPC returns +static volatile bool prof_handler_called = false; + +static void prof_handler(int sig, siginfo_t*, void* signal_ucontext) { + if (!prof_handler_called) + getpc_retval = GetPC(*reinterpret_cast<ucontext_t*>(signal_ucontext)); + prof_handler_called = true; // only store the retval once +} + +static void RoutineCallingTheSignal() { + struct sigaction sa; + sa.sa_sigaction = prof_handler; + sa.sa_flags = SA_RESTART | SA_SIGINFO; + sigemptyset(&sa.sa_mask); + if (sigaction(SIGPROF, &sa, NULL) != 0) { + perror("sigaction"); + exit(1); + } + + struct itimerval timer; + timer.it_interval.tv_sec = 0; + timer.it_interval.tv_usec = 1000; + timer.it_value = timer.it_interval; + setitimer(ITIMER_PROF, &timer, 0); + + // Now we need to do some work for a while, that doesn't call any + // other functions, so we can be guaranteed that when the SIGPROF + // fires, we're the routine executing. + int r = 0; + for (int i = 0; !prof_handler_called; ++i) { + for (int j = 0; j < i; j++) { + r ^= i; + r <<= 1; + r ^= j; + r >>= 1; + } + } + + // Now make sure the above loop doesn't get optimized out + srand(r); +} + +// This is an upper bound of how many bytes the instructions for +// RoutineCallingTheSignal might be. There's probably a more +// principled way to do this, but I don't know how portable it would be. +// (The function is 372 bytes when compiled with -g on Mac OS X 10.4. +// I can imagine it would be even bigger in 64-bit architectures.) +const int kRoutineSize = 512 * sizeof(void*)/4; // allow 1024 for 64-bit + +int main(int argc, char** argv) { + RoutineCallingTheSignal(); + + // Annoyingly, C++ disallows casting pointer-to-function to + // pointer-to-object, so we use a C-style cast instead. + char* expected = (char*)&RoutineCallingTheSignal; + char* actual = (char*)getpc_retval; + + // For ia64, ppc64, and parisc64, the function pointer is actually + // a struct. For instance, ia64's dl-fptr.h: + // struct fdesc { /* An FDESC is a function descriptor. */ + // ElfW(Addr) ip; /* code entry point */ + // ElfW(Addr) gp; /* global pointer */ + // }; + // We want the code entry point. +#if defined(__ia64) || defined(__ppc64) // NOTE: ppc64 is UNTESTED + expected = ((char**)expected)[0]; // this is "ip" +#endif + + if (actual < expected || actual > expected + kRoutineSize) { + printf("Test FAILED: actual PC: %p, expected PC: %p\n", actual, expected); + return 1; + } else { + printf("PASS\n"); + return 0; + } +} diff --git a/third_party/tcmalloc/chromium/src/tests/heap-checker-death_unittest.sh b/third_party/tcmalloc/chromium/src/tests/heap-checker-death_unittest.sh new file mode 100644 index 0000000..9f0c08c --- /dev/null +++ b/third_party/tcmalloc/chromium/src/tests/heap-checker-death_unittest.sh @@ -0,0 +1,176 @@ +#!/bin/sh +# Copyright (c) 2005, Google Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# --- +# Author: Maxim Lifantsev +# +# Run the heap checker unittest in a mode where it is supposed to crash and +# return an error if it doesn't. + +# We expect BINDIR to be set in the environment. +# If not, we set it to some reasonable value. +BINDIR="${BINDIR:-.}" + +if [ "x$1" = "x-h" -o "x$1" = "x--help" ]; then + echo "USAGE: $0 [unittest dir]" + echo " By default, unittest_dir=$BINDIR" + exit 1 +fi + +EXE="${1:-$BINDIR}/heap-checker_unittest" +TMPDIR="/tmp/heap_check_death_info" + +ALARM() { + # You need perl to run pprof, so I assume it's installed + perl -e ' + $timeout=$ARGV[0]; shift; + $retval = 255; # the default retval, for the case where we timed out + eval { # need to run in an eval-block to trigger during system() + local $SIG{ALRM} = sub { die "alarm\n" }; # \n is required! + alarm $timeout; + $retval = system(@ARGV); + # Make retval bash-style: exit status, or 128+n if terminated by signal n + $retval = ($retval & 127) ? (128 + $retval) : ($retval >> 8); + alarm 0; + }; + exit $retval; # return system()-retval, or 255 if system() never returned +' "$@" +} + +# $1: timeout for alarm; +# $2: regexp of expected exit code(s); +# $3: regexp to match a line in the output; +# $4: regexp to not match a line in the output; +# $5+ args to pass to $EXE +Test() { + # Note: make sure these varnames don't conflict with any vars outside Test()! + timeout="$1" + shift + expected_ec="$1" + shift + expected_regexp="$1" + shift + unexpected_regexp="$1" + shift + + echo -n "Testing $EXE with $@ ... " + output="$TMPDIR/output" + ALARM $timeout env "$@" $EXE > "$output" 2>&1 + actual_ec=$? + ec_ok=$(expr "$actual_ec" : "$expected_ec$" >/dev/null || echo false) + matches_ok=$(test -z "$expected_regexp" || \ + grep -q "$expected_regexp" "$output" || echo false) + negmatches_ok=$(test -z "$unexpected_regexp" || \ + ! grep -q "$unexpected_regexp" "$output" || echo false) + if $ec_ok && $matches_ok && $negmatches_ok; then + echo "PASS" + return 0 # 0: success + fi + # If we get here, we failed. Now we just need to report why + echo "FAIL" + if [ $actual_ec -eq 255 ]; then # 255 == SIGTERM due to $ALARM + echo "Test was taking unexpectedly long time to run and so we aborted it." + echo "Try the test case manually or raise the timeout from $timeout" + echo "to distinguish test slowness from a real problem." + else + $ec_ok || \ + echo "Wrong exit code: expected: '$expected_ec'; actual: $actual_ec" + $matches_ok || \ + echo "Output did not match '$expected_regexp'" + $negmatches_ok || \ + echo "Output unexpectedly matched '$unexpected_regexp'" + fi + echo "Output from failed run:" + echo "---" + cat "$output" + echo "---" + return 1 # 1: failure +} + +TMPDIR=/tmp/heap_check_death_info +rm -rf $TMPDIR || exit 1 +mkdir $TMPDIR || exit 2 + +export HEAPCHECK=strict # default mode + +# These invocations should pass (0 == PASS): + +# This tests that turning leak-checker off dynamically works fine +Test 120 0 "^PASS$" "" HEAPCHECK="" || exit 1 + +# This disables threads so we can cause leaks reliably and test finding them +Test 120 0 "^PASS$" "" HEAP_CHECKER_TEST_NO_THREADS=1 || exit 2 + +# Test that --test_cancel_global_check works +Test 20 0 "Canceling .* whole-program .* leak check$" "" \ + HEAP_CHECKER_TEST_TEST_LEAK=1 HEAP_CHECKER_TEST_TEST_CANCEL_GLOBAL_CHECK=1 || exit 3 +Test 20 0 "Canceling .* whole-program .* leak check$" "" \ + HEAP_CHECKER_TEST_TEST_LOOP_LEAK=1 HEAP_CHECKER_TEST_TEST_CANCEL_GLOBAL_CHECK=1 || exit 4 + +# Test that very early log messages are present and controllable: +EARLY_MSG="Starting tracking the heap$" + +Test 60 0 "$EARLY_MSG" "" \ + HEAPCHECK="" HEAP_CHECKER_TEST_TEST_LEAK=1 HEAP_CHECKER_TEST_NO_THREADS=1 \ + PERFTOOLS_VERBOSE=1 || exit 5 +Test 60 0 "MemoryRegionMap Init$" "" \ + HEAPCHECK="" HEAP_CHECKER_TEST_TEST_LEAK=1 HEAP_CHECKER_TEST_NO_THREADS=1 \ + PERFTOOLS_VERBOSE=2 || exit 6 +Test 60 0 "" "$EARLY_MSG" \ + HEAPCHECK="" HEAP_CHECKER_TEST_TEST_LEAK=1 HEAP_CHECKER_TEST_NO_THREADS=1 \ + PERFTOOLS_VERBOSE=-2 || exit 7 + +# These invocations should fail with very high probability, +# rather than return 0 or hang (1 == exit(1), 134 == abort(), 139 = SIGSEGV): + +Test 60 1 "Exiting .* because of .* leaks$" "" \ + HEAP_CHECKER_TEST_TEST_LEAK=1 HEAP_CHECKER_TEST_NO_THREADS=1 || exit 8 +Test 60 1 "Exiting .* because of .* leaks$" "" \ + HEAP_CHECKER_TEST_TEST_LOOP_LEAK=1 HEAP_CHECKER_TEST_NO_THREADS=1 || exit 9 + +# Test that we produce a reasonable textual leak report. +Test 60 1 "MakeALeak" "" \ + HEAP_CHECKER_TEST_TEST_LEAK=1 HEAP_CHECK_TEST_NO_THREADS=1 \ + || exit 10 + +# Test that very early log messages are present and controllable: +Test 60 1 "Starting tracking the heap$" "" \ + HEAP_CHECKER_TEST_TEST_LEAK=1 HEAP_CHECKER_TEST_NO_THREADS=1 PERFTOOLS_VERBOSE=1 \ + || exit 11 +Test 60 1 "" "Starting tracking the heap" \ + HEAP_CHECKER_TEST_TEST_LEAK=1 HEAP_CHECKER_TEST_NO_THREADS=1 PERFTOOLS_VERBOSE=-1 \ + || exit 12 + +cd / # so we're not in TMPDIR when we delete it +rm -rf $TMPDIR + +echo "PASS" + +exit 0 diff --git a/third_party/tcmalloc/chromium/src/tests/heap-checker_unittest.cc b/third_party/tcmalloc/chromium/src/tests/heap-checker_unittest.cc new file mode 100644 index 0000000..3f4e7f1 --- /dev/null +++ b/third_party/tcmalloc/chromium/src/tests/heap-checker_unittest.cc @@ -0,0 +1,1493 @@ +// Copyright (c) 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Maxim Lifantsev +// +// Running: +// ./heap-checker_unittest +// +// If the unittest crashes because it can't find pprof, try: +// PPROF_PATH=/usr/local/someplace/bin/pprof ./heap-checker_unittest +// +// To test that the whole-program heap checker will actually cause a leak, try: +// HEAPCHECK_TEST_LEAK= ./heap-checker_unittest +// HEAPCHECK_TEST_LOOP_LEAK= ./heap-checker_unittest +// +// Note: Both of the above commands *should* abort with an error message. + +// CAVEAT: Do not use vector<> and string on-heap objects in this test, +// otherwise the test can sometimes fail for tricky leak checks +// when we want some allocated object not to be found live by the heap checker. +// This can happen with memory allocators like tcmalloc that can allocate +// heap objects back to back without any book-keeping data in between. +// What happens is that end-of-storage pointers of a live vector +// (or a string depending on the STL implementation used) +// can happen to point to that other heap-allocated +// object that is not reachable otherwise and that +// we don't want to be reachable. +// +// The implication of this for real leak checking +// is just one more chance for the liveness flood to be inexact +// (see the comment in our .h file). + +#include "config_for_unittests.h" +#ifdef HAVE_POLL_H +#include <poll.h> +#endif +#if defined HAVE_STDINT_H +#include <stdint.h> // to get uint16_t (ISO naming madness) +#elif defined HAVE_INTTYPES_H +#include <inttypes.h> // another place uint16_t might be defined +#endif +#include <sys/types.h> +#include <stdlib.h> +#include <errno.h> // errno +#ifdef HAVE_UNISTD_H +#include <unistd.h> // for sleep(), geteuid() +#endif +#ifdef HAVE_MMAP +#include <sys/mman.h> +#endif +#include <fcntl.h> // for open(), close() +// FreeBSD has malloc.h, but complains if you use it +#if defined(HAVE_MALLOC_H) && !defined(__FreeBSD__) +#include <malloc.h> +#endif + +#ifdef HAVE_EXECINFO_H +#include <execinfo.h> // backtrace +#endif +#ifdef HAVE_GRP_H +#include <grp.h> // getgrent, getgrnam +#endif +#ifdef HAVE_PWD_H +#include <pwd.h> +#endif + +#include <iostream> // for cout +#include <iomanip> // for hex +#include <set> +#include <map> +#include <list> +#include <memory> +#include <vector> +#include <string> + +#include "base/googleinit.h" +#include "base/logging.h" +#include "base/commandlineflags.h" +#include "base/thread_lister.h" +#include <google/heap-checker.h> +#include "memory_region_map.h" +#include <google/malloc_extension.h> +#include <google/stacktrace.h> + +// On systems (like freebsd) that don't define MAP_ANONYMOUS, use the old +// form of the name instead. +#ifndef MAP_ANONYMOUS +# define MAP_ANONYMOUS MAP_ANON +#endif + +using namespace std; + +// ========================================================================= // + +// TODO(maxim): write a shell script to test that these indeed crash us +// (i.e. we do detect leaks) +// Maybe add more such crash tests. + +DEFINE_bool(test_leak, + EnvToBool("HEAP_CHECKER_TEST_TEST_LEAK", false), + "If should cause a leak crash"); +DEFINE_bool(test_loop_leak, + EnvToBool("HEAP_CHECKER_TEST_TEST_LOOP_LEAK", false), + "If should cause a looped leak crash"); +DEFINE_bool(test_register_leak, + EnvToBool("HEAP_CHECKER_TEST_TEST_REGISTER_LEAK", false), + "If should cause a leak crash by hiding a pointer " + "that is only in a register"); +DEFINE_bool(test_cancel_global_check, + EnvToBool("HEAP_CHECKER_TEST_TEST_CANCEL_GLOBAL_CHECK", false), + "If should test HeapLeakChecker::CancelGlobalCheck " + "when --test_leak or --test_loop_leak are given; " + "the test should not fail then"); +DEFINE_bool(maybe_stripped, + EnvToBool("HEAP_CHECKER_TEST_MAYBE_STRIPPED", true), + "If we think we can be a stripped binary"); +DEFINE_bool(interfering_threads, + EnvToBool("HEAP_CHECKER_TEST_INTERFERING_THREADS", true), + "If we should use threads trying " + "to interfere with leak checking"); +DEFINE_bool(hoarding_threads, + EnvToBool("HEAP_CHECKER_TEST_HOARDING_THREADS", true), + "If threads (usually the manager thread) are known " + "to retain some old state in their global buffers, " + "so that it's hard to force leaks when threads are around"); + // TODO(maxim): Chage the default to false + // when the standard environment used NTPL threads: + // they do not seem to have this problem. +DEFINE_bool(no_threads, + EnvToBool("HEAP_CHECKER_TEST_NO_THREADS", false), + "If we should not use any threads"); + // This is used so we can make can_create_leaks_reliably true + // for any pthread implementation and test with that. + +DECLARE_int64(heap_check_max_pointer_offset); // heap-checker.cc +DECLARE_string(heap_check); // in heap-checker.cc + +#define WARN_IF(cond, msg) LOG_IF(WARNING, cond, msg) + +// This is an evil macro! Be very careful using it... +#undef VLOG // and we start by evilling overriding logging.h VLOG +#define VLOG(lvl) if (FLAGS_verbose >= (lvl)) cout << "\n" +// This is, likewise, evil +#define LOGF VLOG(INFO) + +static void RunHeapBusyThreads(); // below + + +class Closure { + public: + virtual ~Closure() { } + virtual void Run() = 0; +}; + +class Callback0 : public Closure { + public: + typedef void (*FunctionSignature)(); + + inline Callback0(FunctionSignature f) : f_(f) {} + virtual void Run() { (*f_)(); delete this; } + + private: + FunctionSignature f_; +}; + +template <class P1> class Callback1 : public Closure { + public: + typedef void (*FunctionSignature)(P1); + + inline Callback1<P1>(FunctionSignature f, P1 p1) : f_(f), p1_(p1) {} + virtual void Run() { (*f_)(p1_); delete this; } + + private: + FunctionSignature f_; + P1 p1_; +}; + +template <class P1, class P2> class Callback2 : public Closure { + public: + typedef void (*FunctionSignature)(P1,P2); + + inline Callback2<P1,P2>(FunctionSignature f, P1 p1, P2 p2) : f_(f), p1_(p1), p2_(p2) {} + virtual void Run() { (*f_)(p1_, p2_); delete this; } + + private: + FunctionSignature f_; + P1 p1_; + P2 p2_; +}; + +inline Callback0* NewCallback(void (*function)()) { + return new Callback0(function); +} + +template <class P1> +inline Callback1<P1>* NewCallback(void (*function)(P1), P1 p1) { + return new Callback1<P1>(function, p1); +} + +template <class P1, class P2> +inline Callback2<P1,P2>* NewCallback(void (*function)(P1,P2), P1 p1, P2 p2) { + return new Callback2<P1,P2>(function, p1, p2); +} + + +// Set to true at end of main, so threads know. Not entirely thread-safe!, +// but probably good enough. +static bool g_have_exited_main = false; + +// If we can reliably create leaks (i.e. make leaked object +// really unreachable from any global data). +static bool can_create_leaks_reliably = false; + +// We use a simple allocation wrapper +// to make sure we wipe out the newly allocated objects +// in case they still happened to contain some pointer data +// accidentally left by the memory allocator. +struct Initialized { }; +static Initialized initialized; +void* operator new(size_t size, const Initialized&) { + // Below we use "p = new(initialized) Foo[1];" and "delete[] p;" + // instead of "p = new(initialized) Foo;" + // when we need to delete an allocated object. + void* p = malloc(size); + memset(p, 0, size); + return p; +} +void* operator new[](size_t size, const Initialized&) { + char* p = new char[size]; + memset(p, 0, size); + return p; +} + +static void DoWipeStack(int n); // defined below +static void WipeStack() { DoWipeStack(20); } + +static void Pause() { + poll(NULL, 0, 77); // time for thread activity in HeapBusyThreadBody + + // Indirectly test malloc_extension.*: + CHECK(MallocExtension::instance()->VerifyAllMemory()); + int blocks; + size_t total; + int histogram[kMallocHistogramSize]; + if (MallocExtension::instance() + ->MallocMemoryStats(&blocks, &total, histogram) && total != 0) { + VLOG(3) << "Malloc stats: " << blocks << " blocks of " + << total << " bytes"; + for (int i = 0; i < kMallocHistogramSize; ++i) { + if (histogram[i]) { + VLOG(3) << " Malloc histogram at " << i << " : " << histogram[i]; + } + } + } + WipeStack(); // e.g. MallocExtension::VerifyAllMemory + // can leave pointers to heap objects on stack +} + +// Make gcc think a pointer is "used" +template <class T> +static void Use(T** foo) { + VLOG(2) << "Dummy-using " << static_cast<void*>(*foo) << " at " << foo; +} + +// Arbitrary value, but not such that xor'ing with it is likely +// to map one valid pointer to another valid pointer: +static const uintptr_t kHideMask = 0xF03A5F7B; + +// Helpers to hide a pointer from live data traversal. +// We just xor the pointer so that (with high probability) +// it's not a valid address of a heap object anymore. +// Both Hide and UnHide must be executed within RunHidden() below +// to prevent leaving stale data on active stack that can be a pointer +// to a heap object that is not actually reachable via live variables. +// (UnHide might leave heap pointer value for an object +// that will be deallocated but later another object +// can be allocated at the same heap address.) +template <class T> +static void Hide(T** ptr) { + // we cast values, not dereferenced pointers, so no aliasing issues: + *ptr = reinterpret_cast<T*>(reinterpret_cast<uintptr_t>(*ptr) ^ kHideMask); + VLOG(2) << "hid: " << static_cast<void*>(*ptr); +} + +template <class T> +static void UnHide(T** ptr) { + VLOG(2) << "unhiding: " << static_cast<void*>(*ptr); + // we cast values, not dereferenced pointers, so no aliasing issues: + *ptr = reinterpret_cast<T*>(reinterpret_cast<uintptr_t>(*ptr) ^ kHideMask); +} + +static void LogHidden(const char* message, const void* ptr) { + LOGF << message << " : " + << ptr << " ^ " << reinterpret_cast<void*>(kHideMask) << endl; +} + +// volatile to fool the compiler against inlining the calls to these +void (*volatile run_hidden_ptr)(Closure* c, int n); +void (*volatile wipe_stack_ptr)(int n); + +static void DoRunHidden(Closure* c, int n) { + if (n) { + VLOG(10) << "Level " << n << " at " << &n; + (*run_hidden_ptr)(c, n-1); + (*wipe_stack_ptr)(n); + sleep(0); // undo -foptimize-sibling-calls + } else { + c->Run(); + } +} + +/*static*/ void DoWipeStack(int n) { + VLOG(10) << "Wipe level " << n << " at " << &n; + if (n) { + const int sz = 30; + volatile int arr[sz]; + for (int i = 0; i < sz; ++i) arr[i] = 0; + (*wipe_stack_ptr)(n-1); + sleep(0); // undo -foptimize-sibling-calls + } +} + +// This executes closure c several stack frames down from the current one +// and then makes an effort to also wipe out the stack data that was used by +// the closure. +// This way we prevent leak checker from finding any temporary pointers +// of the closure execution on the stack and deciding that +// these pointers (and the pointed objects) are still live. +static void RunHidden(Closure* c) { + DoRunHidden(c, 15); + DoWipeStack(20); +} + +static void DoAllocHidden(size_t size, void** ptr) { + void* p = new(initialized) char[size]; + Hide(&p); + Use(&p); // use only hidden versions + VLOG(2) << "Allocated hidden " << p << " at " << &p; + *ptr = p; // assign the hidden versions +} + +static void* AllocHidden(size_t size) { + void* r; + RunHidden(NewCallback(DoAllocHidden, size, &r)); + return r; +} + +static void DoDeAllocHidden(void** ptr) { + Use(ptr); // use only hidden versions + void* p = *ptr; + VLOG(2) << "Deallocating hidden " << p; + UnHide(&p); + delete [] reinterpret_cast<char*>(p); +} + +static void DeAllocHidden(void** ptr) { + RunHidden(NewCallback(DoDeAllocHidden, ptr)); + *ptr = NULL; + Use(ptr); +} + +void PreventHeapReclaiming(size_t size) { +#ifdef NDEBUG + if (true) { + static void** no_reclaim_list = NULL; + CHECK(size >= sizeof(void*)); + // We can't use malloc_reclaim_memory flag in opt mode as debugallocation.cc + // is not used. Instead we allocate a bunch of heap objects that are + // of the same size as what we are going to leak to ensure that the object + // we are about to leak is not at the same address as some old allocated + // and freed object that might still have pointers leading to it. + for (int i = 0; i < 100; ++i) { + void** p = reinterpret_cast<void**>(new(initialized) char[size]); + p[0] = no_reclaim_list; + no_reclaim_list = p; + } + } +#endif +} + +static bool RunSilent(HeapLeakChecker* check, + bool (HeapLeakChecker::* func)()) { + // By default, don't print the 'we detected a leak' message in the + // cases we're expecting a leak (we still print when --v is >= 1). + // This way, the logging output is less confusing: we only print + // "we detected a leak", and how to diagnose it, for *unexpected* leaks. + int32 old_FLAGS_verbose = FLAGS_verbose; + if (!VLOG_IS_ON(1)) // not on a verbose setting + FLAGS_verbose = FATAL; // only log fatal errors + const bool retval = (check->*func)(); + FLAGS_verbose = old_FLAGS_verbose; + return retval; +} + +#define RUN_SILENT(check, func) RunSilent(&(check), &HeapLeakChecker::func) + +enum CheckType { SAME_HEAP, NO_LEAKS }; + +static void VerifyLeaks(HeapLeakChecker* check, CheckType type, + int leaked_bytes, int leaked_objects) { + WipeStack(); // to help with can_create_leaks_reliably + const bool no_leaks = + type == NO_LEAKS ? RUN_SILENT(*check, BriefNoLeaks) + : RUN_SILENT(*check, BriefSameHeap); + if (can_create_leaks_reliably) { + // these might still fail occasionally, but it should be very rare + CHECK_EQ(no_leaks, false); + CHECK_EQ(check->BytesLeaked(), leaked_bytes); + CHECK_EQ(check->ObjectsLeaked(), leaked_objects); + } else { + WARN_IF(no_leaks != false, + "Expected leaks not found: " + "Some liveness flood must be too optimistic"); + } +} + +// not deallocates +static void TestHeapLeakCheckerDeathSimple() { + HeapLeakChecker check("death_simple"); + void* foo = AllocHidden(100 * sizeof(int)); + Use(&foo); + void* bar = AllocHidden(300); + Use(&bar); + LogHidden("Leaking", foo); + LogHidden("Leaking", bar); + Pause(); + VerifyLeaks(&check, NO_LEAKS, 300 + 100 * sizeof(int), 2); + DeAllocHidden(&foo); + DeAllocHidden(&bar); +} + +static void MakeDeathLoop(void** arr1, void** arr2) { + PreventHeapReclaiming(2 * sizeof(void*)); + void** a1 = new(initialized) void*[2]; + void** a2 = new(initialized) void*[2]; + a1[1] = reinterpret_cast<void*>(a2); + a2[1] = reinterpret_cast<void*>(a1); + Hide(&a1); + Hide(&a2); + Use(&a1); + Use(&a2); + VLOG(2) << "Made hidden loop at " << &a1 << " to " << arr1; + *arr1 = a1; + *arr2 = a2; +} + +// not deallocates two objects linked together +static void TestHeapLeakCheckerDeathLoop() { + HeapLeakChecker check("death_loop"); + void* arr1; + void* arr2; + RunHidden(NewCallback(MakeDeathLoop, &arr1, &arr2)); + Use(&arr1); + Use(&arr2); + LogHidden("Leaking", arr1); + LogHidden("Leaking", arr2); + Pause(); + VerifyLeaks(&check, NO_LEAKS, 4 * sizeof(void*), 2); + DeAllocHidden(&arr1); + DeAllocHidden(&arr2); +} + +// deallocates more than allocates +static void TestHeapLeakCheckerDeathInverse() { + void* bar = AllocHidden(250 * sizeof(int)); + Use(&bar); + LogHidden("Pre leaking", bar); + Pause(); + HeapLeakChecker check("death_inverse"); + void* foo = AllocHidden(100 * sizeof(int)); + Use(&foo); + LogHidden("Leaking", foo); + DeAllocHidden(&bar); + Pause(); + VerifyLeaks(&check, SAME_HEAP, + 100 * static_cast<int64>(sizeof(int)), + 1); + DeAllocHidden(&foo); +} + +// deallocates more than allocates +static void TestHeapLeakCheckerDeathNoLeaks() { + void* foo = AllocHidden(100 * sizeof(int)); + Use(&foo); + void* bar = AllocHidden(250 * sizeof(int)); + Use(&bar); + HeapLeakChecker check("death_noleaks"); + DeAllocHidden(&bar); + CHECK_EQ(check.BriefNoLeaks(), true); + DeAllocHidden(&foo); +} + +// have less objecs +static void TestHeapLeakCheckerDeathCountLess() { + void* bar1 = AllocHidden(50 * sizeof(int)); + Use(&bar1); + void* bar2 = AllocHidden(50 * sizeof(int)); + Use(&bar2); + LogHidden("Pre leaking", bar1); + LogHidden("Pre leaking", bar2); + Pause(); + HeapLeakChecker check("death_count_less"); + void* foo = AllocHidden(100 * sizeof(int)); + Use(&foo); + LogHidden("Leaking", foo); + DeAllocHidden(&bar1); + DeAllocHidden(&bar2); + Pause(); + VerifyLeaks(&check, SAME_HEAP, + 100 * sizeof(int), + 1); + DeAllocHidden(&foo); +} + +// have more objecs +static void TestHeapLeakCheckerDeathCountMore() { + void* foo = AllocHidden(100 * sizeof(int)); + Use(&foo); + LogHidden("Pre leaking", foo); + Pause(); + HeapLeakChecker check("death_count_more"); + void* bar1 = AllocHidden(50 * sizeof(int)); + Use(&bar1); + void* bar2 = AllocHidden(50 * sizeof(int)); + Use(&bar2); + LogHidden("Leaking", bar1); + LogHidden("Leaking", bar2); + DeAllocHidden(&foo); + Pause(); + VerifyLeaks(&check, SAME_HEAP, + 100 * sizeof(int), + 2); + DeAllocHidden(&bar1); + DeAllocHidden(&bar2); +} + +static void TestHiddenPointer() { + int i; + void* foo = &i; + HiddenPointer<void> p(foo); + CHECK_EQ(foo, p.get()); + + // Confirm pointer doesn't appear to contain a byte sequence + // that == the pointer. We don't really need to test that + // the xor trick itself works, as without it nothing in this + // test suite would work. See the Hide/Unhide/*Hidden* set + // of helper methods. + CHECK_NE(foo, *reinterpret_cast<void**>(&p)); +} + +// simple tests that deallocate what they allocated +static void TestHeapLeakChecker() { + { HeapLeakChecker check("trivial"); + int foo = 5; + int* p = &foo; + Use(&p); + Pause(); + CHECK(check.BriefSameHeap()); + } + Pause(); + { HeapLeakChecker check("simple"); + void* foo = AllocHidden(100 * sizeof(int)); + Use(&foo); + void* bar = AllocHidden(200 * sizeof(int)); + Use(&bar); + DeAllocHidden(&foo); + DeAllocHidden(&bar); + Pause(); + CHECK(check.BriefSameHeap()); + } +} + +// no false positives +static void TestHeapLeakCheckerNoFalsePositives() { + { HeapLeakChecker check("trivial_p"); + int foo = 5; + int* p = &foo; + Use(&p); + Pause(); + CHECK(check.BriefSameHeap()); + } + Pause(); + { HeapLeakChecker check("simple_p"); + void* foo = AllocHidden(100 * sizeof(int)); + Use(&foo); + void* bar = AllocHidden(200 * sizeof(int)); + Use(&bar); + DeAllocHidden(&foo); + DeAllocHidden(&bar); + Pause(); + CHECK(check.SameHeap()); + } +} + +// test that we detect leaks when we have same total # of bytes and +// objects, but different individual object sizes +static void TestLeakButTotalsMatch() { + void* bar1 = AllocHidden(240 * sizeof(int)); + Use(&bar1); + void* bar2 = AllocHidden(160 * sizeof(int)); + Use(&bar2); + LogHidden("Pre leaking", bar1); + LogHidden("Pre leaking", bar2); + Pause(); + HeapLeakChecker check("trick"); + void* foo1 = AllocHidden(280 * sizeof(int)); + Use(&foo1); + void* foo2 = AllocHidden(120 * sizeof(int)); + Use(&foo2); + LogHidden("Leaking", foo1); + LogHidden("Leaking", foo2); + DeAllocHidden(&bar1); + DeAllocHidden(&bar2); + Pause(); + + // foo1 and foo2 leaked + VerifyLeaks(&check, NO_LEAKS, (280+120)*sizeof(int), 2); + + DeAllocHidden(&foo1); + DeAllocHidden(&foo2); +} + +// no false negatives from pprof +static void TestHeapLeakCheckerDeathTrick() { + void* bar1 = AllocHidden(240 * sizeof(int)); + Use(&bar1); + void* bar2 = AllocHidden(160 * sizeof(int)); + Use(&bar2); + HeapLeakChecker check("death_trick"); + DeAllocHidden(&bar1); + DeAllocHidden(&bar2); + void* foo1 = AllocHidden(280 * sizeof(int)); + Use(&foo1); + void* foo2 = AllocHidden(120 * sizeof(int)); + Use(&foo2); + // TODO(maxim): use the above if we make pprof work in automated test runs + if (!FLAGS_maybe_stripped) { + CHECK_EQ(RUN_SILENT(check, SameHeap), false); + // pprof checking should catch the leak + } else { + WARN_IF(RUN_SILENT(check, SameHeap) != false, + "death_trick leak is not caught; " + "we must be using a stripped binary"); + } + DeAllocHidden(&foo1); + DeAllocHidden(&foo2); +} + +// simple leak +static void TransLeaks() { + AllocHidden(1 * sizeof(char)); +} + +// range-based disabling using Disabler +static void ScopedDisabledLeaks() { + HeapLeakChecker::Disabler disabler; + AllocHidden(3 * sizeof(int)); + TransLeaks(); + malloc(10); // Direct leak +} + +// have different disabled leaks +static void* RunDisabledLeaks(void* a) { + ScopedDisabledLeaks(); + return a; +} + +// have different disabled leaks inside of a thread +static void ThreadDisabledLeaks() { + if (FLAGS_no_threads) return; + pthread_t tid; + pthread_attr_t attr; + CHECK_EQ(pthread_attr_init(&attr), 0); + CHECK_EQ(pthread_create(&tid, &attr, RunDisabledLeaks, NULL), 0); + void* res; + CHECK_EQ(pthread_join(tid, &res), 0); +} + +// different disabled leaks (some in threads) +static void TestHeapLeakCheckerDisabling() { + HeapLeakChecker check("disabling"); + + RunDisabledLeaks(NULL); + RunDisabledLeaks(NULL); + ThreadDisabledLeaks(); + RunDisabledLeaks(NULL); + ThreadDisabledLeaks(); + ThreadDisabledLeaks(); + + Pause(); + + CHECK(check.SameHeap()); +} + +typedef set<int> IntSet; + +static int some_ints[] = { 1, 2, 3, 21, 22, 23, 24, 25 }; + +static void DoTestSTLAlloc() { + IntSet* x = new(initialized) IntSet[1]; + *x = IntSet(some_ints, some_ints + 6); + for (int i = 0; i < 1000; i++) { + x->insert(i*3); + } + delete [] x; +} + +// Check that normal STL usage does not result in a leak report. +// (In particular we test that there's no complex STL's own allocator +// running on top of our allocator with hooks to heap profiler +// that can result in false leak report in this case.) +static void TestSTLAlloc() { + HeapLeakChecker check("stl"); + RunHidden(NewCallback(DoTestSTLAlloc)); + CHECK_EQ(check.BriefSameHeap(), true); +} + +static void DoTestSTLAllocInverse(IntSet** setx) { + IntSet* x = new(initialized) IntSet[1]; + *x = IntSet(some_ints, some_ints + 3); + for (int i = 0; i < 100; i++) { + x->insert(i*2); + } + Hide(&x); + *setx = x; +} + +static void FreeTestSTLAllocInverse(IntSet** setx) { + IntSet* x = *setx; + UnHide(&x); + delete [] x; +} + +// Check that normal leaked STL usage *does* result in a leak report. +// (In particular we test that there's no complex STL's own allocator +// running on top of our allocator with hooks to heap profiler +// that can result in false absence of leak report in this case.) +static void TestSTLAllocInverse() { + HeapLeakChecker check("death_inverse_stl"); + IntSet* x; + RunHidden(NewCallback(DoTestSTLAllocInverse, &x)); + LogHidden("Leaking", x); + if (can_create_leaks_reliably) { + WipeStack(); // to help with can_create_leaks_reliably + // these might still fail occasionally, but it should be very rare + CHECK_EQ(RUN_SILENT(check, BriefNoLeaks), false); + CHECK_GE(check.BytesLeaked(), 100 * sizeof(int)); + CHECK_GE(check.ObjectsLeaked(), 100); + // assumes set<>s are represented by some kind of binary tree + // or something else allocating >=1 heap object per set object + } else { + WARN_IF(RUN_SILENT(check, BriefNoLeaks) != false, + "Expected leaks not found: " + "Some liveness flood must be too optimistic"); + } + RunHidden(NewCallback(FreeTestSTLAllocInverse, &x)); +} + +template<class Alloc> +static void DirectTestSTLAlloc(Alloc allocator, const char* name) { + HeapLeakChecker check((string("direct_stl-") + name).c_str()); + static const int kSize = 1000; + typename Alloc::pointer ptrs[kSize]; + for (int i = 0; i < kSize; ++i) { + typename Alloc::pointer p = allocator.allocate(i*3+1); + HeapLeakChecker::IgnoreObject(p); + // This will crash if p is not known to heap profiler: + // (i.e. STL's "allocator" does not have a direct hook to heap profiler) + HeapLeakChecker::UnIgnoreObject(p); + ptrs[i] = p; + } + for (int i = 0; i < kSize; ++i) { + allocator.deallocate(ptrs[i], i*3+1); + ptrs[i] = NULL; + } + CHECK(check.BriefSameHeap()); // just in case +} + +static struct group* grp = NULL; +static const int kKeys = 50; +static pthread_key_t key[kKeys]; + +static void KeyFree(void* ptr) { + delete [] reinterpret_cast<char*>(ptr); +} + +static bool key_init_has_run = false; + +static void KeyInit() { + for (int i = 0; i < kKeys; ++i) { + CHECK_EQ(pthread_key_create(&key[i], KeyFree), 0); + VLOG(2) << "pthread key " << i << " : " << key[i]; + } + key_init_has_run = true; // needed for a sanity-check +} + +// force various C library static and thread-specific allocations +static void TestLibCAllocate() { + CHECK(key_init_has_run); + for (int i = 0; i < kKeys; ++i) { + void* p = pthread_getspecific(key[i]); + if (NULL == p) { + if (i == 0) { + // Test-logging inside threads which (potentially) creates and uses + // thread-local data inside standard C++ library: + VLOG(0) << "Adding pthread-specifics for thread " << pthread_self() + << " pid " << getpid(); + } + p = new(initialized) char[77 + i]; + VLOG(2) << "pthread specific " << i << " : " << p; + pthread_setspecific(key[i], p); + } + } + + strerror(errno); + const time_t now = time(NULL); + ctime(&now); +#ifdef HAVE_EXECINFO_H + void *stack[1]; + backtrace(stack, 1); +#endif +#ifdef HAVE_GRP_H + gid_t gid = getgid(); + getgrgid(gid); + if (grp == NULL) grp = getgrent(); // a race condition here is okay + getgrnam(grp->gr_name); +#endif +#ifdef HAVE_PWD_H + getpwuid(geteuid()); +#endif +} + +// Continuous random heap memory activity to try to disrupt heap checking. +static void* HeapBusyThreadBody(void* a) { + const int thread_num = reinterpret_cast<intptr_t>(a); + VLOG(0) << "A new HeapBusyThread " << thread_num; + TestLibCAllocate(); + + int user = 0; + // Try to hide ptr from heap checker in a CPU register: + // Here we are just making a best effort to put the only pointer + // to a heap object into a thread register to test + // the thread-register finding machinery in the heap checker. +#if defined(__i386__) && defined(__GNUC__) + register int** ptr asm("esi"); +#elif defined(__x86_64__) && defined(__GNUC__) + register int** ptr asm("r15"); +#else + register int** ptr; +#endif + ptr = NULL; + typedef set<int> Set; + Set s1; + while (1) { + // TestLibCAllocate() calls libc functions that don't work so well + // after main() has exited. So we just don't do the test then. + if (!g_have_exited_main) + TestLibCAllocate(); + + if (ptr == NULL) { + ptr = new(initialized) int*[1]; + *ptr = new(initialized) int[1]; + } + set<int>* s2 = new(initialized) set<int>[1]; + s1.insert(random()); + s2->insert(*s1.begin()); + user += *s2->begin(); + **ptr += user; + if (random() % 51 == 0) { + s1.clear(); + if (random() % 2 == 0) { + s1.~Set(); + new(&s1) Set; + } + } + VLOG(3) << pthread_self() << " (" << getpid() << "): in wait: " + << ptr << ", " << *ptr << "; " << s1.size(); + VLOG(2) << pthread_self() << " (" << getpid() << "): in wait, ptr = " + << reinterpret_cast<void*>( + reinterpret_cast<uintptr_t>(ptr) ^ kHideMask) + << "^" << reinterpret_cast<void*>(kHideMask); + if (FLAGS_test_register_leak && thread_num % 5 == 0) { + // Hide the register "ptr" value with an xor mask. + // If one provides --test_register_leak flag, the test should + // (with very high probability) crash on some leak check + // with a leak report (of some x * sizeof(int) + y * sizeof(int*) bytes) + // pointing at the two lines above in this function + // with "new(initialized) int" in them as the allocators + // of the leaked objects. + // CAVEAT: We can't really prevent a compiler to save some + // temporary values of "ptr" on the stack and thus let us find + // the heap objects not via the register. + // Hence it's normal if for certain compilers or optimization modes + // --test_register_leak does not cause a leak crash of the above form + // (this happens e.g. for gcc 4.0.1 in opt mode). + ptr = reinterpret_cast<int **>( + reinterpret_cast<uintptr_t>(ptr) ^ kHideMask); + // busy loop to get the thread interrupted at: + for (int i = 1; i < 10000000; ++i) user += (1 + user * user * 5) / i; + ptr = reinterpret_cast<int **>( + reinterpret_cast<uintptr_t>(ptr) ^ kHideMask); + } else { + poll(NULL, 0, random() % 100); + } + VLOG(2) << pthread_self() << ": continuing"; + if (random() % 3 == 0) { + delete [] *ptr; + delete [] ptr; + ptr = NULL; + } + delete [] s2; + } + return a; +} + +static void RunHeapBusyThreads() { + KeyInit(); + if (!FLAGS_interfering_threads || FLAGS_no_threads) return; + + const int n = 17; // make many threads + + pthread_t tid; + pthread_attr_t attr; + CHECK_EQ(pthread_attr_init(&attr), 0); + // make them and let them run + for (int i = 0; i < n; ++i) { + VLOG(0) << "Creating extra thread " << i + 1; + CHECK(pthread_create(&tid, &attr, HeapBusyThreadBody, + reinterpret_cast<void*>(i)) == 0); + } + + Pause(); + Pause(); +} + +// ========================================================================= // + +// This code section is to test that objects that are reachable from global +// variables are not reported as leaks +// as well as that (Un)IgnoreObject work for such objects fine. + +// An object making functions: +// returns a "weird" pointer to a new object for which +// it's worth checking that the object is reachable via that pointer. +typedef void* (*ObjMakerFunc)(); +static list<ObjMakerFunc> obj_makers; // list of registered object makers + +// Helper macro to register an object making function +// 'name' is an identifier of this object maker, +// 'body' is its function body that must declare +// pointer 'p' to the nex object to return. +// Usage example: +// REGISTER_OBJ_MAKER(trivial, int* p = new(initialized) int;) +#define REGISTER_OBJ_MAKER(name, body) \ + void* ObjMaker_##name##_() { \ + VLOG(1) << "Obj making " << #name; \ + body; \ + return p; \ + } \ + static ObjMakerRegistrar maker_reg_##name##__(&ObjMaker_##name##_); +// helper class for REGISTER_OBJ_MAKER +struct ObjMakerRegistrar { + ObjMakerRegistrar(ObjMakerFunc obj_maker) { obj_makers.push_back(obj_maker); } +}; + +// List of the objects/pointers made with all the obj_makers +// to test reachability via global data pointers during leak checks. +static list<void*>* live_objects = new list<void*>; + // pointer so that it does not get destructed on exit + +// Exerciser for one ObjMakerFunc. +static void TestPointerReach(ObjMakerFunc obj_maker) { + HeapLeakChecker::IgnoreObject(obj_maker()); // test IgnoreObject + + void* obj = obj_maker(); + HeapLeakChecker::IgnoreObject(obj); + HeapLeakChecker::UnIgnoreObject(obj); // test UnIgnoreObject + HeapLeakChecker::IgnoreObject(obj); // not to need deletion for obj + + live_objects->push_back(obj_maker()); // test reachability at leak check +} + +// Test all ObjMakerFunc registred via REGISTER_OBJ_MAKER. +static void TestObjMakers() { + for (list<ObjMakerFunc>::const_iterator i = obj_makers.begin(); + i != obj_makers.end(); ++i) { + TestPointerReach(*i); + TestPointerReach(*i); // a couple more times would not hurt + TestPointerReach(*i); + } +} + +// A dummy class to mimic allocation behavior of string-s. +template<class T> +struct Array { + Array() { + size = 3 + random() % 30; + ptr = new(initialized) T[size]; + } + ~Array() { delete [] ptr; } + Array(const Array& x) { + size = x.size; + ptr = new(initialized) T[size]; + for (size_t i = 0; i < size; ++i) { + ptr[i] = x.ptr[i]; + } + } + void operator=(const Array& x) { + delete [] ptr; + size = x.size; + ptr = new(initialized) T[size]; + for (size_t i = 0; i < size; ++i) { + ptr[i] = x.ptr[i]; + } + } + void append(const Array& x) { + T* p = new(initialized) T[size + x.size]; + for (size_t i = 0; i < size; ++i) { + p[i] = ptr[i]; + } + for (size_t i = 0; i < x.size; ++i) { + p[size+i] = x.ptr[i]; + } + size += x.size; + delete [] ptr; + ptr = p; + } + private: + size_t size; + T* ptr; +}; + +// to test pointers to objects, built-in arrays, string, etc: +REGISTER_OBJ_MAKER(plain, int* p = new(initialized) int;) +REGISTER_OBJ_MAKER(int_array_1, int* p = new(initialized) int[1];) +REGISTER_OBJ_MAKER(int_array, int* p = new(initialized) int[10];) +REGISTER_OBJ_MAKER(string, Array<char>* p = new(initialized) Array<char>();) +REGISTER_OBJ_MAKER(string_array, + Array<char>* p = new(initialized) Array<char>[5];) +REGISTER_OBJ_MAKER(char_array, char* p = new(initialized) char[5];) +REGISTER_OBJ_MAKER(appended_string, + Array<char>* p = new Array<char>(); + p->append(Array<char>()); +) +REGISTER_OBJ_MAKER(plain_ptr, int** p = new(initialized) int*;) +REGISTER_OBJ_MAKER(linking_ptr, + int** p = new(initialized) int*; + *p = new(initialized) int; +) + +// small objects: +REGISTER_OBJ_MAKER(0_sized, void* p = malloc(0);) // 0-sized object (important) +REGISTER_OBJ_MAKER(1_sized, void* p = malloc(1);) +REGISTER_OBJ_MAKER(2_sized, void* p = malloc(2);) +REGISTER_OBJ_MAKER(3_sized, void* p = malloc(3);) +REGISTER_OBJ_MAKER(4_sized, void* p = malloc(4);) + +static int set_data[] = { 1, 2, 3, 4, 5, 6, 7, 21, 22, 23, 24, 25, 26, 27 }; +static set<int> live_leak_set(set_data, set_data+7); +static const set<int> live_leak_const_set(set_data, set_data+14); + +REGISTER_OBJ_MAKER(set, + set<int>* p = new(initialized) set<int>(set_data, set_data + 13); +) + +class ClassA { + public: + explicit ClassA(int a) : ptr(NULL) { } + mutable char* ptr; +}; +static const ClassA live_leak_mutable(1); + +template<class C> +class TClass { + public: + explicit TClass(int a) : ptr(NULL) { } + mutable C val; + mutable C* ptr; +}; +static const TClass<Array<char> > live_leak_templ_mutable(1); + +class ClassB { + public: + ClassB() { } + char b[7]; + virtual void f() { } + virtual ~ClassB() { } +}; + +class ClassB2 { + public: + ClassB2() { } + char b2[11]; + virtual void f2() { } + virtual ~ClassB2() { } +}; + +class ClassD1 : public ClassB { + char d1[15]; + virtual void f() { } +}; + +class ClassD2 : public ClassB2 { + char d2[19]; + virtual void f2() { } +}; + +class ClassD : public ClassD1, public ClassD2 { + char d[3]; + virtual void f() { } + virtual void f2() { } +}; + +// to test pointers to objects of base subclasses: + +REGISTER_OBJ_MAKER(B, ClassB* p = new(initialized) ClassB;) +REGISTER_OBJ_MAKER(D1, ClassD1* p = new(initialized) ClassD1;) +REGISTER_OBJ_MAKER(D2, ClassD2* p = new(initialized) ClassD2;) +REGISTER_OBJ_MAKER(D, ClassD* p = new(initialized) ClassD;) + +REGISTER_OBJ_MAKER(D1_as_B, ClassB* p = new(initialized) ClassD1;) +REGISTER_OBJ_MAKER(D2_as_B2, ClassB2* p = new(initialized) ClassD2;) +REGISTER_OBJ_MAKER(D_as_B, ClassB* p = new(initialized) ClassD;) +REGISTER_OBJ_MAKER(D_as_D1, ClassD1* p = new(initialized) ClassD;) +// inside-object pointers: +REGISTER_OBJ_MAKER(D_as_B2, ClassB2* p = new(initialized) ClassD;) +REGISTER_OBJ_MAKER(D_as_D2, ClassD2* p = new(initialized) ClassD;) + +class InterfaceA { + public: + virtual void A() = 0; + virtual ~InterfaceA() { } + protected: + InterfaceA() { } +}; + +class InterfaceB { + public: + virtual void B() = 0; + virtual ~InterfaceB() { } + protected: + InterfaceB() { } +}; + +class InterfaceC : public InterfaceA { + public: + virtual void C() = 0; + virtual ~InterfaceC() { } + protected: + InterfaceC() { } +}; + +class ClassMltD1 : public ClassB, public InterfaceB, public InterfaceC { + public: + char d1[11]; + virtual void f() { } + virtual void A() { } + virtual void B() { } + virtual void C() { } +}; + +class ClassMltD2 : public InterfaceA, public InterfaceB, public ClassB { + public: + char d2[15]; + virtual void f() { } + virtual void A() { } + virtual void B() { } +}; + +// to specifically test heap reachability under +// inerface-only multiple inheritance (some use inside-object pointers): +REGISTER_OBJ_MAKER(MltD1, ClassMltD1* p = new(initialized) ClassMltD1;) +REGISTER_OBJ_MAKER(MltD1_as_B, ClassB* p = new(initialized) ClassMltD1;) +REGISTER_OBJ_MAKER(MltD1_as_IA, InterfaceA* p = new(initialized) ClassMltD1;) +REGISTER_OBJ_MAKER(MltD1_as_IB, InterfaceB* p = new(initialized) ClassMltD1;) +REGISTER_OBJ_MAKER(MltD1_as_IC, InterfaceC* p = new(initialized) ClassMltD1;) + +REGISTER_OBJ_MAKER(MltD2, ClassMltD2* p = new(initialized) ClassMltD2;) +REGISTER_OBJ_MAKER(MltD2_as_B, ClassB* p = new(initialized) ClassMltD2;) +REGISTER_OBJ_MAKER(MltD2_as_IA, InterfaceA* p = new(initialized) ClassMltD2;) +REGISTER_OBJ_MAKER(MltD2_as_IB, InterfaceB* p = new(initialized) ClassMltD2;) + +// to mimic UnicodeString defined in third_party/icu, +// which store a platform-independent-sized refcount in the first +// few bytes and keeps a pointer pointing behind the refcount. +REGISTER_OBJ_MAKER(unicode_string, + char* p = new char[sizeof(uint32) * 10]; + p += sizeof(uint32); +) +// similar, but for platform-dependent-sized refcount +REGISTER_OBJ_MAKER(ref_counted, + char* p = new char[sizeof(int) * 20]; + p += sizeof(int); +) + +struct Nesting { + struct Inner { + Nesting* parent; + Inner(Nesting* p) : parent(p) {} + }; + Inner i0; + char n1[5]; + Inner i1; + char n2[11]; + Inner i2; + char n3[27]; + Inner i3; + Nesting() : i0(this), i1(this), i2(this), i3(this) {} +}; + +// to test inside-object pointers pointing at objects nested into heap objects: +REGISTER_OBJ_MAKER(nesting_i0, Nesting::Inner* p = &((new Nesting())->i0);) +REGISTER_OBJ_MAKER(nesting_i1, Nesting::Inner* p = &((new Nesting())->i1);) +REGISTER_OBJ_MAKER(nesting_i2, Nesting::Inner* p = &((new Nesting())->i2);) +REGISTER_OBJ_MAKER(nesting_i3, Nesting::Inner* p = &((new Nesting())->i3);) + +// allocate many objects reachable from global data +static void TestHeapLeakCheckerLiveness() { + live_leak_mutable.ptr = new(initialized) char[77]; + live_leak_templ_mutable.ptr = new(initialized) Array<char>(); + live_leak_templ_mutable.val = Array<char>(); + + TestObjMakers(); +} + +// ========================================================================= // + +// Get address (PC value) following the mmap call into addr_after_mmap_call +static void* Mmapper(uintptr_t* addr_after_mmap_call) { + void* r = mmap(NULL, 100, PROT_READ|PROT_WRITE, + MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); + // Get current PC value into addr_after_mmap_call + void* stack[1]; + CHECK_EQ(GetStackTrace(stack, 1, 0), 1); + *addr_after_mmap_call = reinterpret_cast<uintptr_t>(stack[0]); + sleep(0); // undo -foptimize-sibling-calls + return r; +} + +// to trick complier into preventing inlining +static void* (*mmapper_addr)(uintptr_t* addr) = &Mmapper; + +// TODO(maxim): copy/move this to memory_region_map_unittest +// TODO(maxim): expand this test to include mmap64, mremap and sbrk calls. +static void VerifyMemoryRegionMapStackGet() { + uintptr_t caller_addr_limit; + void* addr = (*mmapper_addr)(&caller_addr_limit); + uintptr_t caller = 0; + { MemoryRegionMap::LockHolder l; + for (MemoryRegionMap::RegionIterator + i = MemoryRegionMap::BeginRegionLocked(); + i != MemoryRegionMap::EndRegionLocked(); ++i) { + if (i->start_addr == reinterpret_cast<uintptr_t>(addr)) { + CHECK_EQ(caller, 0); + caller = i->caller(); + } + } + } + // caller must point into Mmapper function: + if (!(reinterpret_cast<uintptr_t>(mmapper_addr) <= caller && + caller < caller_addr_limit)) { + LOGF << std::hex << "0x" << caller + << " does not seem to point into code of function Mmapper at " + << "0x" << reinterpret_cast<uintptr_t>(mmapper_addr) + << "! Stack frame collection must be off in MemoryRegionMap!"; + LOG(FATAL, "\n"); + } + munmap(addr, 100); +} + +static void* Mallocer(uintptr_t* addr_after_malloc_call) { + void* r = malloc(100); + sleep(0); // undo -foptimize-sibling-calls + // Get current PC value into addr_after_malloc_call + void* stack[1]; + CHECK_EQ(GetStackTrace(stack, 1, 0), 1); + *addr_after_malloc_call = reinterpret_cast<uintptr_t>(stack[0]); + return r; +} + +// to trick complier into preventing inlining +static void* (*mallocer_addr)(uintptr_t* addr) = &Mallocer; + +// non-static for friendship with HeapProfiler +// TODO(maxim): expand this test to include +// realloc, calloc, memalign, valloc, pvalloc, new, and new[]. +extern void VerifyHeapProfileTableStackGet() { + uintptr_t caller_addr_limit; + void* addr = (*mallocer_addr)(&caller_addr_limit); + uintptr_t caller = + reinterpret_cast<uintptr_t>(HeapLeakChecker::GetAllocCaller(addr)); + // caller must point into Mallocer function: + if (!(reinterpret_cast<uintptr_t>(mallocer_addr) <= caller && + caller < caller_addr_limit)) { + LOGF << std::hex << "0x" << caller + << " does not seem to point into code of function Mallocer at " + << "0x" << reinterpret_cast<uintptr_t>(mallocer_addr) + << "! Stack frame collection must be off in heap profiler!"; + LOG(FATAL, "\n"); + } + free(addr); +} + +// ========================================================================= // + +static void MakeALeak(void** arr) { + PreventHeapReclaiming(10 * sizeof(int)); + void* a = new(initialized) int[10]; + Hide(&a); + *arr = a; +} + +// Helper to do 'return 0;' inside main(): insted we do 'return Pass();' +static int Pass() { + fprintf(stdout, "PASS\n"); + g_have_exited_main = true; + return 0; +} + +int main(int argc, char** argv) { + run_hidden_ptr = DoRunHidden; + wipe_stack_ptr = DoWipeStack; + if (!HeapLeakChecker::IsActive()) { + CHECK_EQ(FLAGS_heap_check, ""); + LOG(WARNING, "HeapLeakChecker got turned off; we won't test much..."); + } else { + VerifyMemoryRegionMapStackGet(); + VerifyHeapProfileTableStackGet(); + } + + KeyInit(); + + // glibc 2.4, on x86_64 at least, has a lock-ordering bug, which + // means deadlock is possible when one thread calls dl_open at the + // same time another thread is calling dl_iterate_phdr. libunwind + // calls dl_iterate_phdr, and TestLibCAllocate calls dl_open (or the + // various syscalls in it do), at least the first time it's run. + // To avoid the deadlock, we run TestLibCAllocate once before getting + // multi-threaded. + // TODO(csilvers): once libc is fixed, or libunwind can work around it, + // get rid of this early call. We *want* our test to + // find potential problems like this one! + TestLibCAllocate(); + + if (FLAGS_interfering_threads) { + RunHeapBusyThreads(); // add interference early + } + TestLibCAllocate(); + + LOGF << "In main(): heap_check=" << FLAGS_heap_check << endl; + + CHECK(HeapLeakChecker::NoGlobalLeaks()); // so far, so good + + if (FLAGS_test_leak) { + void* arr; + RunHidden(NewCallback(MakeALeak, &arr)); + Use(&arr); + LogHidden("Leaking", arr); + if (FLAGS_test_cancel_global_check) HeapLeakChecker::CancelGlobalCheck(); + return Pass(); + // whole-program leak-check should (with very high probability) + // catch the leak of arr (10 * sizeof(int) bytes) + // (when !FLAGS_test_cancel_global_check) + } + + if (FLAGS_test_loop_leak) { + void* arr1; + void* arr2; + RunHidden(NewCallback(MakeDeathLoop, &arr1, &arr2)); + Use(&arr1); + Use(&arr2); + LogHidden("Loop leaking", arr1); + LogHidden("Loop leaking", arr2); + if (FLAGS_test_cancel_global_check) HeapLeakChecker::CancelGlobalCheck(); + return Pass(); + // whole-program leak-check should (with very high probability) + // catch the leak of arr1 and arr2 (4 * sizeof(void*) bytes) + // (when !FLAGS_test_cancel_global_check) + } + + if (FLAGS_test_register_leak) { + // make us fail only where the .sh test expects: + Pause(); + for (int i = 0; i < 100; ++i) { // give it some time to crash + CHECK(HeapLeakChecker::NoGlobalLeaks()); + Pause(); + } + return Pass(); + } + + TestHeapLeakCheckerLiveness(); + + HeapLeakChecker heap_check("all"); + + TestHiddenPointer(); + + TestHeapLeakChecker(); + Pause(); + TestLeakButTotalsMatch(); + Pause(); + + TestHeapLeakCheckerDeathSimple(); + Pause(); + TestHeapLeakCheckerDeathLoop(); + Pause(); + TestHeapLeakCheckerDeathInverse(); + Pause(); + TestHeapLeakCheckerDeathNoLeaks(); + Pause(); + TestHeapLeakCheckerDeathCountLess(); + Pause(); + TestHeapLeakCheckerDeathCountMore(); + Pause(); + + TestHeapLeakCheckerDeathTrick(); + Pause(); + + CHECK(HeapLeakChecker::NoGlobalLeaks()); // so far, so good + + TestHeapLeakCheckerNoFalsePositives(); + Pause(); + + TestHeapLeakCheckerDisabling(); + Pause(); + + TestSTLAlloc(); + Pause(); + TestSTLAllocInverse(); + Pause(); + + // Test that various STL allocators work. Some of these are redundant, but + // we don't know how STL might change in the future. For example, + // http://wiki/Main/StringNeStdString. +#define DTSL(a) { DirectTestSTLAlloc(a, #a); \ + Pause(); } + DTSL(std::allocator<char>()); + DTSL(std::allocator<int>()); + DTSL(std::string().get_allocator()); + DTSL(string().get_allocator()); + DTSL(vector<int>().get_allocator()); + DTSL(vector<double>().get_allocator()); + DTSL(vector<vector<int> >().get_allocator()); + DTSL(vector<string>().get_allocator()); + DTSL((map<string, string>().get_allocator())); + DTSL((map<string, int>().get_allocator())); + DTSL(set<char>().get_allocator()); +#undef DTSL + + TestLibCAllocate(); + Pause(); + + CHECK(HeapLeakChecker::NoGlobalLeaks()); // so far, so good + + Pause(); + + if (!FLAGS_maybe_stripped) { + CHECK(heap_check.SameHeap()); + } else { + WARN_IF(heap_check.SameHeap() != true, + "overall leaks are caught; we must be using a stripped binary"); + } + + CHECK(HeapLeakChecker::NoGlobalLeaks()); // so far, so good + + return Pass(); +} diff --git a/third_party/tcmalloc/chromium/src/tests/heap-checker_unittest.sh b/third_party/tcmalloc/chromium/src/tests/heap-checker_unittest.sh new file mode 100644 index 0000000..765e6c7 --- /dev/null +++ b/third_party/tcmalloc/chromium/src/tests/heap-checker_unittest.sh @@ -0,0 +1,89 @@ +#!/bin/sh + +# Copyright (c) 2005, Google Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# --- +# Author: Craig Silverstein +# +# Runs the heap-checker unittest with various environment variables. +# This is necessary because we turn on features like the heap profiler +# and heap checker via environment variables. This test makes sure +# they all play well together. + +# We expect BINDIR and PPROF_PATH to be set in the environment. +# If not, we set them to some reasonable values +BINDIR="${BINDIR:-.}" +PPROF_PATH="${PPROF_PATH:-$BINDIR/src/pprof}" + +if [ "x$1" = "x-h" -o "$1" = "x--help" ]; then + echo "USAGE: $0 [unittest dir] [path to pprof]" + echo " By default, unittest_dir=$BINDIR, pprof_path=$PPROF_PATH" + exit 1 +fi + +HEAP_CHECKER="${1:-$BINDIR}/heap-checker_unittest" +PPROF_PATH="${2:-$PPROF_PATH}" + +TMPDIR=/tmp/heap_check_info +rm -rf $TMPDIR || exit 2 +mkdir $TMPDIR || exit 3 + +# $1: value of heap-check env. var. +run_check() { + export PPROF_PATH="$PPROF_PATH" + [ -n "$1" ] && export HEAPCHECK="$1" || unset HEAPPROFILE + + echo -n "Testing $HEAP_CHECKER with HEAPCHECK=$1 ... " + if $HEAP_CHECKER > $TMPDIR/output 2>&1; then + echo "OK" + else + echo "FAILED" + echo "Output from the failed run:" + echo "----" + cat $TMPDIR/output + echo "----" + exit 4 + fi + + # If we set HEAPPROFILE, then we expect it to actually have emitted + # a profile. Check that it did. + if [ -n "$HEAPPROFILE" ]; then + [ -e "$HEAPPROFILE.0001.heap" ] || exit 5 + fi +} + +run_check "" +run_check "local" +run_check "normal" +run_check "strict" + +rm -rf $TMPDIR # clean up + +echo "PASS" diff --git a/third_party/tcmalloc/chromium/src/tests/heap-profiler_unittest.cc b/third_party/tcmalloc/chromium/src/tests/heap-profiler_unittest.cc new file mode 100644 index 0000000..e5dedee --- /dev/null +++ b/third_party/tcmalloc/chromium/src/tests/heap-profiler_unittest.cc @@ -0,0 +1,163 @@ +// Copyright (c) 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Craig Silverstein +// +// A small program that just exercises our heap profiler by allocating +// memory and letting the heap-profiler emit a profile. We don't test +// threads (TODO). By itself, this unittest tests that the heap-profiler +// doesn't crash on simple programs, but its output can be analyzed by +// another testing script to actually verify correctness. See, eg, +// heap-profiler_unittest.sh. + +#include "config_for_unittests.h" +#include <stdlib.h> +#include <stdio.h> +#include <fcntl.h> // for mkdir() +#include <sys/stat.h> // for mkdir() on freebsd and os x +#ifdef HAVE_UNISTD_H +#include <unistd.h> // for fork() +#endif +#include <sys/wait.h> // for wait() +#include <string> +#include "base/basictypes.h" +#include "base/logging.h" +#include <google/heap-profiler.h> + +using std::string; + +static const int kMaxCount = 100000; +int* g_array[kMaxCount]; // an array of int-vectors + +static ATTRIBUTE_NOINLINE void Allocate(int start, int end, int size) { + for (int i = start; i < end; ++i) { + if (i < kMaxCount) + g_array[i] = new int[size]; + } +} + +static ATTRIBUTE_NOINLINE void Allocate2(int start, int end, int size) { + for (int i = start; i < end; ++i) { + if (i < kMaxCount) + g_array[i] = new int[size]; + } +} + +static void Deallocate(int start, int end) { + for (int i = start; i < end; ++i) { + delete[] g_array[i]; + g_array[i] = 0; + } +} + +static void TestHeapProfilerStartStopIsRunning() { + // If you run this with whole-program heap-profiling on, than + // IsHeapProfilerRunning should return true. + if (!IsHeapProfilerRunning()) { + const char* tmpdir = getenv("TMPDIR"); + if (tmpdir == NULL) + tmpdir = "/tmp"; + mkdir(tmpdir, 0755); // if necessary + HeapProfilerStart((string(tmpdir) + "/start_stop").c_str()); + CHECK(IsHeapProfilerRunning()); + + Allocate(0, 40, 100); + Deallocate(0, 40); + + HeapProfilerStop(); + CHECK(!IsHeapProfilerRunning()); + } +} + +static void TestDumpHeapProfiler() { + // If you run this with whole-program heap-profiling on, than + // IsHeapProfilerRunning should return true. + if (!IsHeapProfilerRunning()) { + const char* tmpdir = getenv("TMPDIR"); + if (tmpdir == NULL) + tmpdir = "/tmp"; + mkdir(tmpdir, 0755); // if necessary + HeapProfilerStart((string(tmpdir) + "/dump").c_str()); + CHECK(IsHeapProfilerRunning()); + + Allocate(0, 40, 100); + Deallocate(0, 40); + + char* output = GetHeapProfile(); + free(output); + HeapProfilerStop(); + } +} + + +int main(int argc, char** argv) { + if (argc > 2 || (argc == 2 && argv[1][0] == '-')) { + printf("USAGE: %s [number of children to fork]\n", argv[0]); + exit(0); + } + int num_forks = 0; + if (argc == 2) { + num_forks = atoi(argv[1]); + } + + TestHeapProfilerStartStopIsRunning(); + TestDumpHeapProfiler(); + + Allocate(0, 40, 100); + Deallocate(0, 40); + + Allocate(0, 40, 100); + Allocate(0, 40, 100); + Allocate2(40, 400, 1000); + Allocate2(400, 1000, 10000); + Deallocate(0, 1000); + + Allocate(0, 100, 100000); + Deallocate(0, 10); + Deallocate(10, 20); + Deallocate(90, 100); + Deallocate(20, 90); + + while (num_forks-- > 0) { + switch (fork()) { + case -1: + printf("FORK failed!\n"); + return 1; + case 0: // child + return execl(argv[0], argv[0], NULL); // run child with no args + default: + wait(NULL); // we'll let the kids run one at a time + } + } + + printf("DONE.\n"); + + return 0; +} diff --git a/third_party/tcmalloc/chromium/src/tests/heap-profiler_unittest.sh b/third_party/tcmalloc/chromium/src/tests/heap-profiler_unittest.sh new file mode 100644 index 0000000..ad0a1ec --- /dev/null +++ b/third_party/tcmalloc/chromium/src/tests/heap-profiler_unittest.sh @@ -0,0 +1,150 @@ +#!/bin/sh + +# Copyright (c) 2005, Google Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# --- +# Author: Craig Silverstein +# +# Runs the heap-profiler unittest and makes sure the profile looks appropriate. +# +# We run under the assumption that if $HEAP_PROFILER is run with --help, +# it prints a usage line of the form +# USAGE: <actual executable being run> [...] +# +# This is because libtool sometimes turns the 'executable' into a +# shell script which runs an actual binary somewhere else. + +# We expect BINDIR and PPROF_PATH to be set in the environment. +# If not, we set them to some reasonable values +BINDIR="${BINDIR:-.}" +PPROF_PATH="${PPROF_PATH:-$BINDIR/src/pprof}" + +if [ "x$1" = "x-h" -o "x$1" = "x--help" ]; then + echo "USAGE: $0 [unittest dir] [path to pprof]" + echo " By default, unittest_dir=$BINDIR, pprof_path=$PPROF_PATH" + exit 1 +fi + +HEAP_PROFILER="${1:-$BINDIR}/heap-profiler_unittest" +PPROF="${2:-$PPROF_PATH}" +TEST_TMPDIR=/tmp/heap_profile_info + +# It's meaningful to the profiler, so make sure we know its state +unset HEAPPROFILE + +rm -rf "$TEST_TMPDIR" +mkdir "$TEST_TMPDIR" || exit 2 + +num_failures=0 + +# Given one profile (to check the contents of that profile) or two +# profiles (to check the diff between the profiles), and a function +# name, verify that the function name takes up at least 90% of the +# allocated memory. The function name is actually specified first. +VerifyMemFunction() { + function="$1" + shift + + # get program name. Note we have to unset HEAPPROFILE so running + # help doesn't overwrite existing profiles. + exec=`unset HEAPPROFILE; $HEAP_PROFILER --help | awk '{print $2; exit;}'` + + if [ $# = 2 ]; then + [ -f "$1" ] || { echo "Profile not found: $1"; exit 1; } + [ -f "$2" ] || { echo "Profile not found: $2"; exit 1; } + $PPROF --base="$1" $exec "$2" >"$TEST_TMPDIR/output.pprof" 2>&1 + else + [ -f "$1" ] || { echo "Profile not found: $1"; exit 1; } + $PPROF $exec "$1" >"$TEST_TMPDIR/output.pprof" 2>&1 + fi + + cat "$TEST_TMPDIR/output.pprof" \ + | tr -d % | awk '$6 ~ /^'$function'$/ && $2 > 90 {exit 1;}' + if [ $? != 1 ]; then + echo + echo "--- Test failed for $function: didn't account for 90% of executable memory" + echo "--- Program output:" + cat "$TEST_TMPDIR/output" + echo "--- pprof output:" + cat "$TEST_TMPDIR/output.pprof" + echo "---" + num_failures=`expr $num_failures + 1` + fi +} + +VerifyOutputContains() { + text="$1" + + if ! grep "$text" "$TEST_TMPDIR/output" >/dev/null 2>&1; then + echo "--- Test failed: output does not contain '$text'" + echo "--- Program output:" + cat "$TEST_TMPDIR/output" + echo "---" + num_failures=`expr $num_failures + 1` + fi +} + +HEAPPROFILE="$TEST_TMPDIR/test" +HEAP_PROFILE_INUSE_INTERVAL="10240" # need this to be 10Kb +HEAP_PROFILE_ALLOCATION_INTERVAL="$HEAP_PROFILE_INUSE_INTERVAL" +HEAP_PROFILE_DEALLOCATION_INTERVAL="$HEAP_PROFILE_INUSE_INTERVAL" +export HEAPPROFILE +export HEAP_PROFILE_INUSE_INTERVAL +export HEAP_PROFILE_ALLOCATION_INTERVAL +export HEAP_PROFILE_DEALLOCATION_INTERVAL + +# We make the unittest run a child process, to test that the child +# process doesn't try to write a heap profile as well and step on the +# parent's toes. If it does, we expect the parent-test to fail. +$HEAP_PROFILER 1 >$TEST_TMPDIR/output 2>&1 # run program, with 1 child proc + +VerifyMemFunction Allocate2 "$HEAPPROFILE.1329.heap" +VerifyMemFunction Allocate "$HEAPPROFILE.1448.heap" "$HEAPPROFILE.1548.heap" + +# Check the child process got to emit its own profile as well. +VerifyMemFunction Allocate2 "$HEAPPROFILE"_*.1329.heap +VerifyMemFunction Allocate "$HEAPPROFILE"_*.1448.heap "$HEAPPROFILE"_*.1548.heap + +# Make sure we logged both about allocating and deallocating memory +VerifyOutputContains "62 MB allocated" +VerifyOutputContains "62 MB freed" + +# Now try running without --heap_profile specified, to allow +# testing of the HeapProfileStart/Stop functionality. +$HEAP_PROFILER >"$TEST_TMPDIR/output2" 2>&1 + +rm -rf $TMPDIR # clean up + +if [ $num_failures = 0 ]; then + echo "PASS" +else + echo "Tests finished with $num_failures failures" +fi +exit $num_failures diff --git a/third_party/tcmalloc/chromium/src/tests/low_level_alloc_unittest.cc b/third_party/tcmalloc/chromium/src/tests/low_level_alloc_unittest.cc new file mode 100644 index 0000000..f98f8a5 --- /dev/null +++ b/third_party/tcmalloc/chromium/src/tests/low_level_alloc_unittest.cc @@ -0,0 +1,204 @@ +/* Copyright (c) 2006, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +// A test for low_level_alloc.cc + +#include <stdio.h> +#include <map> +#include "base/low_level_alloc.h" +#include "base/logging.h" +#include <google/malloc_hook.h> + +using std::map; + +// a block of memory obtained from the allocator +struct BlockDesc { + char *ptr; // pointer to memory + int len; // number of bytes + int fill; // filled with data starting with this +}; + +// Check that the pattern placed in the block d +// by RandomizeBlockDesc is still there. +static void CheckBlockDesc(const BlockDesc &d) { + for (int i = 0; i != d.len; i++) { + CHECK((d.ptr[i] & 0xff) == ((d.fill + i) & 0xff)); + } +} + +// Fill the block "*d" with a pattern +// starting with a random byte. +static void RandomizeBlockDesc(BlockDesc *d) { + d->fill = rand() & 0xff; + for (int i = 0; i != d->len; i++) { + d->ptr[i] = (d->fill + i) & 0xff; + } +} + +// Use to indicate to the malloc hooks that +// this calls is from LowLevelAlloc. +static bool using_low_level_alloc = false; + +// n times, toss a coin, and based on the outcome +// either allocate a new block or deallocate an old block. +// New blocks are placed in a map with a random key +// and initialized with RandomizeBlockDesc(). +// If keys conflict, the older block is freed. +// Old blocks are always checked with CheckBlockDesc() +// before being freed. At the end of the run, +// all remaining allocated blocks are freed. +// If use_new_arena is true, use a fresh arena, and then delete it. +// If call_malloc_hook is true and user_arena is true, +// allocations and deallocations are reported via the MallocHook +// interface. +static void Test(bool use_new_arena, bool call_malloc_hook, int n) { + typedef map<int, BlockDesc> AllocMap; + AllocMap allocated; + AllocMap::iterator it; + BlockDesc block_desc; + int rnd; + LowLevelAlloc::Arena *arena = 0; + if (use_new_arena) { + int32 flags = call_malloc_hook? LowLevelAlloc::kCallMallocHook : 0; + arena = LowLevelAlloc::NewArena(flags, LowLevelAlloc::DefaultArena()); + } + for (int i = 0; i != n; i++) { + if (i != 0 && i % 10000 == 0) { + printf("."); + fflush(stdout); + } + + switch(rand() & 1) { // toss a coin + case 0: // coin came up heads: add a block + using_low_level_alloc = true; + block_desc.len = rand() & 0x3fff; + block_desc.ptr = + reinterpret_cast<char *>( + arena == 0 + ? LowLevelAlloc::Alloc(block_desc.len) + : LowLevelAlloc::AllocWithArena(block_desc.len, arena)); + using_low_level_alloc = false; + RandomizeBlockDesc(&block_desc); + rnd = rand(); + it = allocated.find(rnd); + if (it != allocated.end()) { + CheckBlockDesc(it->second); + using_low_level_alloc = true; + LowLevelAlloc::Free(it->second.ptr); + using_low_level_alloc = false; + it->second = block_desc; + } else { + allocated[rnd] = block_desc; + } + break; + case 1: // coin came up tails: remove a block + it = allocated.begin(); + if (it != allocated.end()) { + CheckBlockDesc(it->second); + using_low_level_alloc = true; + LowLevelAlloc::Free(it->second.ptr); + using_low_level_alloc = false; + allocated.erase(it); + } + break; + } + } + // remove all remaniing blocks + while ((it = allocated.begin()) != allocated.end()) { + CheckBlockDesc(it->second); + using_low_level_alloc = true; + LowLevelAlloc::Free(it->second.ptr); + using_low_level_alloc = false; + allocated.erase(it); + } + if (use_new_arena) { + CHECK(LowLevelAlloc::DeleteArena(arena)); + } +} + +// used for counting allocates and frees +static int32 allocates; +static int32 frees; +static MallocHook::NewHook old_alloc_hook; +static MallocHook::DeleteHook old_free_hook; + +// called on each alloc if kCallMallocHook specified +static void AllocHook(const void *p, size_t size) { + if (using_low_level_alloc) { + allocates++; + } + if (old_alloc_hook != 0) { + (*old_alloc_hook)(p, size); + } +} + +// called on each free if kCallMallocHook specified +static void FreeHook(const void *p) { + if (using_low_level_alloc) { + frees++; + } + if (old_free_hook != 0) { + (*old_free_hook)(p); + } +} + +int main(int argc, char *argv[]) { + // This is needed by maybe_threads_unittest.sh, which parses argv[0] + // to figure out what directory low_level_alloc_unittest is in. + if (argc != 1) { + fprintf(stderr, "USAGE: %s\n", argv[0]); + return 1; + } + + old_alloc_hook = MallocHook::SetNewHook(AllocHook); + old_free_hook = MallocHook::SetDeleteHook(FreeHook); + CHECK_EQ(allocates, 0); + CHECK_EQ(frees, 0); + Test(false, false, 50000); + CHECK_NE(allocates, 0); // default arena calls hooks + CHECK_NE(frees, 0); + for (int i = 0; i != 16; i++) { + bool call_hooks = ((i & 1) == 1); + allocates = 0; + frees = 0; + Test(true, call_hooks, 15000); + if (call_hooks) { + CHECK_GT(allocates, 5000); // arena calls hooks + CHECK_GT(frees, 5000); + } else { + CHECK_EQ(allocates, 0); // arena doesn't call hooks + CHECK_EQ(frees, 0); + } + } + printf("\nPASS\n"); + CHECK_EQ(MallocHook::SetNewHook(old_alloc_hook), AllocHook); + CHECK_EQ(MallocHook::SetDeleteHook(old_free_hook), FreeHook); + return 0; +} diff --git a/third_party/tcmalloc/chromium/src/tests/malloc_extension_c_test.c b/third_party/tcmalloc/chromium/src/tests/malloc_extension_c_test.c new file mode 100644 index 0000000..b6319a1 --- /dev/null +++ b/third_party/tcmalloc/chromium/src/tests/malloc_extension_c_test.c @@ -0,0 +1,129 @@ +/* Copyright (c) 2009, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * Author: Craig Silverstein + * + * This tests the c shims: malloc_extension_c.h and malloc_hook_c.h. + * Mostly, we'll just care that these shims compile under gcc + * (*not* g++!) + * + * NOTE: this is C code, not C++ code! + */ + +#include <stdio.h> +#include <stdlib.h> +#include <stddef.h> /* for size_t */ +#include <google/malloc_extension_c.h> +#include <google/malloc_hook_c.h> + +#define FAIL(msg) do { \ + fprintf(stderr, "FATAL ERROR: %s\n", msg); \ + exit(1); \ +} while (0) + +static int g_new_hook_calls = 0; +static int g_delete_hook_calls = 0; + +void TestNewHook(const void* ptr, size_t size) { + g_new_hook_calls++; +} + +void TestDeleteHook(const void* ptr) { + g_delete_hook_calls++; +} + +void TestMallocHook(void) { + /* TODO(csilvers): figure out why we get: + * E0100 00:00:00.000000 7383 malloc_hook.cc:244] RAW: google_malloc section is missing, thus InHookCaller is broken! + */ +#if 0 + void* result[5]; + + if (MallocHook_GetCallerStackTrace(result, sizeof(result)/sizeof(*result), + 0) < 2) { /* should have this and main */ + FAIL("GetCallerStackTrace failed"); + } +#endif + + MallocHook_SetNewHook(&TestNewHook); + MallocHook_SetDeleteHook(&TestDeleteHook); + free(malloc(10)); + free(malloc(20)); + if (g_new_hook_calls != 2) { + FAIL("Wrong number of calls to the new hook"); + } + if (g_delete_hook_calls != 2) { + FAIL("Wrong number of calls to the delete hook"); + } +} + +void TestMallocExtension(void) { + int blocks; + size_t total; + int hist[64]; + char buffer[200]; + char* x = (char*)malloc(10); + + MallocExtension_VerifyAllMemory(); + MallocExtension_VerifyMallocMemory(x); + MallocExtension_MallocMemoryStats(&blocks, &total, hist); + MallocExtension_GetStats(buffer, sizeof(buffer)); + if (!MallocExtension_GetNumericProperty("generic.current_allocated_bytes", + &total)) { + FAIL("GetNumericProperty failed for generic.current_allocated_bytes"); + } + if (total < 10) { + FAIL("GetNumericProperty had bad return for generic.current_allocated_bytes"); + } + if (!MallocExtension_GetNumericProperty("generic.current_allocated_bytes", + &total)) { + FAIL("GetNumericProperty failed for generic.current_allocated_bytes"); + } + MallocExtension_MarkThreadIdle(); + MallocExtension_MarkThreadBusy(); + MallocExtension_ReleaseToSystem(1); + MallocExtension_ReleaseFreeMemory(); + if (MallocExtension_GetEstimatedAllocatedSize(10) < 10) { + FAIL("GetEstimatedAllocatedSize returned a bad value (too small)"); + } + if (MallocExtension_GetAllocatedSize(x) < 10) { + FAIL("GetEstimatedAllocatedSize returned a bad value (too small)"); + } + + free(x); +} + +int main(int argc, char** argv) { + TestMallocHook(); + TestMallocExtension(); + + printf("PASS\n"); + return 0; +} diff --git a/third_party/tcmalloc/chromium/src/tests/malloc_extension_test.cc b/third_party/tcmalloc/chromium/src/tests/malloc_extension_test.cc new file mode 100644 index 0000000..1f00f73 --- /dev/null +++ b/third_party/tcmalloc/chromium/src/tests/malloc_extension_test.cc @@ -0,0 +1,77 @@ +// Copyright (c) 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Craig Silverstein +// +// Simple test of malloc_extension. Includes test of C shims. + +#include "config_for_unittests.h" +#include <stdio.h> +#include <sys/types.h> +#include "base/logging.h" +#include <google/malloc_extension.h> +#include <google/malloc_extension_c.h> + +int main(int argc, char** argv) { + void* a = malloc(1000); + + size_t cxx_bytes_used, c_bytes_used; + CHECK(MallocExtension::instance()->GetNumericProperty( + "generic.current_allocated_bytes", &cxx_bytes_used)); + CHECK(MallocExtension_GetNumericProperty( + "generic.current_allocated_bytes", &c_bytes_used)); + CHECK_GT(cxx_bytes_used, 1000); + CHECK_EQ(cxx_bytes_used, c_bytes_used); + + CHECK(MallocExtension::instance()->VerifyAllMemory()); + CHECK(MallocExtension_VerifyAllMemory()); + + CHECK_GE(MallocExtension::instance()->GetAllocatedSize(a), 1000); + // This is just a sanity check. If we allocated too much, tcmalloc is broken + CHECK_LE(MallocExtension::instance()->GetAllocatedSize(a), 5000); + CHECK_GE(MallocExtension::instance()->GetEstimatedAllocatedSize(1000), 1000); + + for (int i = 0; i < 10; ++i) { + void *p = malloc(i); + CHECK_GE(MallocExtension::instance()->GetAllocatedSize(p), + MallocExtension::instance()->GetEstimatedAllocatedSize(i)); + free(p); + } + + // Check the c-shim version too. + CHECK_GE(MallocExtension_GetAllocatedSize(a), 1000); + CHECK_LE(MallocExtension_GetAllocatedSize(a), 5000); + CHECK_GE(MallocExtension_GetEstimatedAllocatedSize(1000), 1000); + + free(a); + + printf("DONE\n"); + return 0; +} diff --git a/third_party/tcmalloc/chromium/src/tests/markidle_unittest.cc b/third_party/tcmalloc/chromium/src/tests/markidle_unittest.cc new file mode 100644 index 0000000..ac9971f --- /dev/null +++ b/third_party/tcmalloc/chromium/src/tests/markidle_unittest.cc @@ -0,0 +1,108 @@ +// Copyright (c) 2003, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Sanjay Ghemawat +// +// MallocExtension::MarkThreadIdle() testing +#include <stdio.h> + +#include "config_for_unittests.h" +#include "base/logging.h" +#include <google/malloc_extension.h> +#include "tests/testutil.h" // for RunThread() + +// Helper routine to do lots of allocations +static void TestAllocation() { + static const int kNum = 100; + void* ptr[kNum]; + for (int size = 8; size <= 65536; size*=2) { + for (int i = 0; i < kNum; i++) { + ptr[i] = malloc(size); + } + for (int i = 0; i < kNum; i++) { + free(ptr[i]); + } + } +} + +// Routine that does a bunch of MarkThreadIdle() calls in sequence +// without any intervening allocations +static void MultipleIdleCalls() { + for (int i = 0; i < 4; i++) { + MallocExtension::instance()->MarkThreadIdle(); + } +} + +// Routine that does a bunch of MarkThreadIdle() calls in sequence +// with intervening allocations +static void MultipleIdleNonIdlePhases() { + for (int i = 0; i < 4; i++) { + TestAllocation(); + MallocExtension::instance()->MarkThreadIdle(); + } +} + +// Get current thread cache usage +static size_t GetTotalThreadCacheSize() { + size_t result; + CHECK(MallocExtension::instance()->GetNumericProperty( + "tcmalloc.current_total_thread_cache_bytes", + &result)); + return result; +} + +// Check that MarkThreadIdle() actually reduces the amount +// of per-thread memory. +static void TestIdleUsage() { + const size_t original = GetTotalThreadCacheSize(); + + TestAllocation(); + const size_t post_allocation = GetTotalThreadCacheSize(); + CHECK_GT(post_allocation, original); + + MallocExtension::instance()->MarkThreadIdle(); + const size_t post_idle = GetTotalThreadCacheSize(); + CHECK_LE(post_idle, original); + + // Log after testing because logging can allocate heap memory. + VLOG(0, "Original usage: %"PRIuS"\n", original); + VLOG(0, "Post allocation: %"PRIuS"\n", post_allocation); + VLOG(0, "Post idle: %"PRIuS"\n", post_idle); +} + +int main(int argc, char** argv) { + RunThread(&TestIdleUsage); + RunThread(&TestAllocation); + RunThread(&MultipleIdleCalls); + RunThread(&MultipleIdleNonIdlePhases); + + printf("PASS\n"); + return 0; +} diff --git a/third_party/tcmalloc/chromium/src/tests/maybe_threads_unittest.sh b/third_party/tcmalloc/chromium/src/tests/maybe_threads_unittest.sh new file mode 100644 index 0000000..77b3b78 --- /dev/null +++ b/third_party/tcmalloc/chromium/src/tests/maybe_threads_unittest.sh @@ -0,0 +1,79 @@ +#!/bin/sh + +# Copyright (c) 2007, Google Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# --- +# Author: Craig Silverstein +# +# maybe_threads.cc was written to allow LD_PRELOAD=libtcmalloc.so to +# work even on binaries that were not linked with pthreads. This +# unittest tests that, by running low_level_alloc_unittest with an +# LD_PRELOAD. (low_level_alloc_unittest was chosen because it doesn't +# link in tcmalloc.) +# +# We assume all the .so files are in the same directory as both +# addressmap_unittest and profiler1_unittest. The reason we need +# profiler1_unittest is because it's instrumented to show the directory +# it's "really" in when run without any args. In practice this will either +# be BINDIR, or, when using libtool, BINDIR/.lib. + +# We expect BINDIR to be set in the environment. +# If not, we set them to some reasonable values. +BINDIR="${BINDIR:-.}" + +if [ "x$1" = "x-h" -o "x$1" = "x--help" ]; then + echo "USAGE: $0 [unittest dir]" + echo " By default, unittest_dir=$BINDIR" + exit 1 +fi + +UNITTEST_DIR=${1:-$BINDIR} + +# Figure out the "real" unittest directory. Also holds the .so files. +UNITTEST_DIR=`$UNITTEST_DIR/low_level_alloc_unittest --help 2>&1 \ + | awk '{print $2; exit;}' \ + | xargs dirname` + +# Figure out where libtcmalloc lives. It should be in UNITTEST_DIR, +# but with libtool it might be in a subdir. +if [ -r "$UNITTEST_DIR/libtcmalloc_minimal.so" ]; then + LIB_PATH="$UNITTEST_DIR/libtcmalloc_minimal.so" +elif [ -r "$UNITTEST_DIR/.libs/libtcmalloc_minimal.so" ]; then + LIB_PATH="$UNITTEST_DIR/.libs/libtcmalloc_minimal.so" +elif [ -r "$UNITTEST_DIR/libtcmalloc_minimal.dylib" ]; then # for os x + LIB_PATH="$UNITTEST_DIR/libtcmalloc_minimal.dylib" +elif [ -r "$UNITTEST_DIR/.libs/libtcmalloc_minimal.dylib" ]; then + LIB_PATH="$UNITTEST_DIR/.libs/libtcmalloc_minimal.dylib" +else + echo "Cannot run $0: cannot find libtcmalloc_minimal.so" + exit 2 +fi + +LD_PRELOAD="$LIB_PATH" $UNITTEST_DIR/low_level_alloc_unittest diff --git a/third_party/tcmalloc/chromium/src/tests/memalign_unittest.cc b/third_party/tcmalloc/chromium/src/tests/memalign_unittest.cc new file mode 100644 index 0000000..d5b60db --- /dev/null +++ b/third_party/tcmalloc/chromium/src/tests/memalign_unittest.cc @@ -0,0 +1,214 @@ +// Copyright (c) 2004, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Sanjay Ghemawat +// +// Check memalign related routines. +// +// We can't really do a huge amount of checking, but at the very +// least, the following code checks that return values are properly +// aligned, and that writing into the objects works. + +#include "config_for_unittests.h" + +// Complicated ordering requirements. tcmalloc.h defines (indirectly) +// _POSIX_C_SOURCE, which it needs so stdlib.h defines posix_memalign. +// unistd.h, on the other hand, requires _POSIX_C_SOURCE to be unset, +// at least on Mac OS X, in order to define getpagesize. The solution +// is to #include unistd.h first. This is safe because unistd.h +// doesn't sub-include stdlib.h, so we'll still get posix_memalign +// when we #include stdlib.h. Blah. +#ifdef HAVE_UNISTD_H +#include <unistd.h> // for getpagesize() +#endif +#include "tcmalloc.h" // must come early, to pick up posix_memalign +#include <stdlib.h> // defines posix_memalign +#include <stdio.h> // for the printf at the end +#ifdef HAVE_STDINT_H +#include <stdint.h> // for uintptr_t +#endif +#ifdef HAVE_UNISTD_H +#include <unistd.h> // for getpagesize() +#endif +#ifdef HAVE_MALLOC_H +#include <malloc.h> +#endif +#include "base/basictypes.h" +#include "base/logging.h" +#include "tests/testutil.h" + + +// Return the next interesting size/delta to check. Returns -1 if no more. +static int NextSize(int size) { + if (size < 100) { + return size+1; + } else if (size < 1048576) { + // Find next power of two + int power = 1; + while (power < size) { + power <<= 1; + } + + // Yield (power-1, power, power+1) + if (size < power-1) { + return power-1; + } else if (size == power-1) { + return power; + } else { + assert(size == power); + return power+1; + } + } else { + return -1; + } +} + +// Shortform for cast +static uintptr_t Number(void* p) { + return reinterpret_cast<uintptr_t>(p); +} + +// Check alignment +static void CheckAlignment(void* p, int align) { + if ((Number(p) & (align-1)) != 0) + LOG(FATAL, "wrong alignment; wanted 0x%x; got %p\n", align, p); +} + +// Fill a buffer of the specified size with a predetermined pattern +static void Fill(void* p, int n, char seed) { + unsigned char* buffer = reinterpret_cast<unsigned char*>(p); + for (int i = 0; i < n; i++) { + buffer[i] = ((seed + i) & 0xff); + } +} + +// Check that the specified buffer has the predetermined pattern +// generated by Fill() +static bool Valid(const void* p, int n, char seed) { + const unsigned char* buffer = reinterpret_cast<const unsigned char*>(p); + for (int i = 0; i < n; i++) { + if (buffer[i] != ((seed + i) & 0xff)) { + return false; + } + } + return true; +} + +int main(int argc, char** argv) { + SetTestResourceLimit(); + + // Try allocating data with a bunch of alignments and sizes + for (int a = 1; a < 1048576; a *= 2) { + for (int s = 0; s != -1; s = NextSize(s)) { + void* ptr = memalign(a, s); + CheckAlignment(ptr, a); + Fill(ptr, s, 'x'); + CHECK(Valid(ptr, s, 'x')); + free(ptr); + + if ((a >= sizeof(void*)) && ((a & (a-1)) == 0)) { + CHECK(posix_memalign(&ptr, a, s) == 0); + CheckAlignment(ptr, a); + Fill(ptr, s, 'y'); + CHECK(Valid(ptr, s, 'y')); + free(ptr); + } + } + } + + { + // Check various corner cases + void* p1 = memalign(1<<20, 1<<19); + void* p2 = memalign(1<<19, 1<<19); + void* p3 = memalign(1<<21, 1<<19); + CheckAlignment(p1, 1<<20); + CheckAlignment(p2, 1<<19); + CheckAlignment(p3, 1<<21); + Fill(p1, 1<<19, 'a'); + Fill(p2, 1<<19, 'b'); + Fill(p3, 1<<19, 'c'); + CHECK(Valid(p1, 1<<19, 'a')); + CHECK(Valid(p2, 1<<19, 'b')); + CHECK(Valid(p3, 1<<19, 'c')); + free(p1); + free(p2); + free(p3); + } + + { + // posix_memalign + void* ptr; + CHECK(posix_memalign(&ptr, 0, 1) == EINVAL); + CHECK(posix_memalign(&ptr, sizeof(void*)/2, 1) == EINVAL); + CHECK(posix_memalign(&ptr, sizeof(void*)+1, 1) == EINVAL); + CHECK(posix_memalign(&ptr, 4097, 1) == EINVAL); + + // Grab some memory so that the big allocation below will definitely fail. + void* p_small = malloc(4*1048576); + CHECK(p_small != NULL); + + // Make sure overflow is returned as ENOMEM + const size_t zero = 0; + static const size_t kMinusNTimes = 10; + for ( size_t i = 1; i < kMinusNTimes; ++i ) { + int r = posix_memalign(&ptr, 1024, zero - i); + CHECK(r == ENOMEM); + } + + free(p_small); + } + + const int pagesize = getpagesize(); + { + // valloc + for (int s = 0; s != -1; s = NextSize(s)) { + void* p = valloc(s); + CheckAlignment(p, pagesize); + Fill(p, s, 'v'); + CHECK(Valid(p, s, 'v')); + free(p); + } + } + + { + // pvalloc + for (int s = 0; s != -1; s = NextSize(s)) { + void* p = pvalloc(s); + CheckAlignment(p, pagesize); + int alloc_needed = ((s + pagesize - 1) / pagesize) * pagesize; + Fill(p, alloc_needed, 'x'); + CHECK(Valid(p, alloc_needed, 'x')); + free(p); + } + } + + printf("PASS\n"); + return 0; +} diff --git a/third_party/tcmalloc/chromium/src/tests/packed-cache_test.cc b/third_party/tcmalloc/chromium/src/tests/packed-cache_test.cc new file mode 100644 index 0000000..7f9aea6 --- /dev/null +++ b/third_party/tcmalloc/chromium/src/tests/packed-cache_test.cc @@ -0,0 +1,62 @@ +// Copyright (c) 2007, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Geoff Pike + +#include <stdio.h> +#include "base/logging.h" +#include "packed-cache-inl.h" + +static const int kHashbits = PackedCache<64, uint64>::kHashbits; + +// A basic sanity test. +void PackedCacheTest_basic() { + PackedCache<32, uint32> cache(0); + CHECK_EQ(cache.GetOrDefault(0, 1), 0); + cache.Put(0, 17); + CHECK(cache.Has(0)); + CHECK_EQ(cache.GetOrDefault(0, 1), 17); + cache.Put(19, 99); + CHECK(cache.Has(0) && cache.Has(19)); + CHECK_EQ(cache.GetOrDefault(0, 1), 17); + CHECK_EQ(cache.GetOrDefault(19, 1), 99); + // Knock <0, 17> out by using a conflicting key. + cache.Put(1 << kHashbits, 22); + CHECK(!cache.Has(0)); + CHECK_EQ(cache.GetOrDefault(0, 1), 1); + CHECK_EQ(cache.GetOrDefault(1 << kHashbits, 1), 22); +} + +int main(int argc, char **argv) { + PackedCacheTest_basic(); + + printf("PASS\n"); + return 0; +} diff --git a/third_party/tcmalloc/chromium/src/tests/page_heap_test.cc b/third_party/tcmalloc/chromium/src/tests/page_heap_test.cc new file mode 100644 index 0000000..9120b78 --- /dev/null +++ b/third_party/tcmalloc/chromium/src/tests/page_heap_test.cc @@ -0,0 +1,55 @@ +// Copyright 2009 Google Inc. All Rights Reserved. +// Author: fikes@google.com (Andrew Fikes) + +#include <stdio.h> +#include "config_for_unittests.h" +#include "base/logging.h" +#include "common.h" +#include "page_heap.h" + +namespace { + +static void CheckStats(const tcmalloc::PageHeap* ph, + uint64_t system_pages, + uint64_t free_pages, + uint64_t unmapped_pages) { + tcmalloc::PageHeap::Stats stats = ph->stats(); + EXPECT_EQ(system_pages, stats.system_bytes >> kPageShift); + EXPECT_EQ(free_pages, stats.free_bytes >> kPageShift); + EXPECT_EQ(unmapped_pages, stats.unmapped_bytes >> kPageShift); +} + +static void TestPageHeap_Stats() { + tcmalloc::PageHeap* ph = new tcmalloc::PageHeap(); + + // Empty page heap + CheckStats(ph, 0, 0, 0); + + // Allocate a span 's1' + tcmalloc::Span* s1 = ph->New(256); + CheckStats(ph, 256, 0, 0); + + // Split span 's1' into 's1', 's2'. Delete 's2' + tcmalloc::Span* s2 = ph->Split(s1, 128); + Length s2_len = s2->length; + ph->Delete(s2); + CheckStats(ph, 256, 128, 0); + + // Unmap deleted span 's2' + EXPECT_EQ(s2_len, ph->ReleaseAtLeastNPages(1)); + CheckStats(ph, 256, 0, 128); + + // Delete span 's1' + ph->Delete(s1); + CheckStats(ph, 256, 128, 128); + + delete ph; +} + +} // namespace + +int main(int argc, char **argv) { + TestPageHeap_Stats(); + printf("PASS\n"); + return 0; +} diff --git a/third_party/tcmalloc/chromium/src/tests/pagemap_unittest.cc b/third_party/tcmalloc/chromium/src/tests/pagemap_unittest.cc new file mode 100644 index 0000000..83e76e2 --- /dev/null +++ b/third_party/tcmalloc/chromium/src/tests/pagemap_unittest.cc @@ -0,0 +1,177 @@ +// Copyright (c) 2003, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Sanjay Ghemawat + +#include "config_for_unittests.h" +#include <stdio.h> +#include <stdlib.h> +#if defined HAVE_STDINT_H +#include <stdint.h> // to get intptr_t +#elif defined HAVE_INTTYPES_H +#include <inttypes.h> // another place intptr_t might be defined +#endif +#include <sys/types.h> +#include <vector> +#include "base/logging.h" +#include "pagemap.h" + +using std::vector; + +static void Permute(vector<intptr_t>* elements) { + if (elements->empty()) + return; + const size_t num_elements = elements->size(); + for (size_t i = num_elements - 1; i > 0; --i) { + const size_t newpos = rand() % (i + 1); + const intptr_t tmp = (*elements)[i]; // swap + (*elements)[i] = (*elements)[newpos]; + (*elements)[newpos] = tmp; + } +} + +// Note: we leak memory every time a map is constructed, so do not +// create too many maps. + +// Test specified map type +template <class Type> +void TestMap(int limit, bool limit_is_below_the_overflow_boundary) { + RAW_LOG(INFO, "Running test with %d iterations...\n", limit); + + { // Test sequential ensure/assignment + Type map(malloc); + for (intptr_t i = 0; i < static_cast<intptr_t>(limit); i++) { + map.Ensure(i, 1); + map.set(i, (void*)(i+1)); + CHECK_EQ(map.get(i), (void*)(i+1)); + } + for (intptr_t i = 0; i < static_cast<intptr_t>(limit); i++) { + CHECK_EQ(map.get(i), (void*)(i+1)); + } + } + + { // Test bulk Ensure + Type map(malloc); + map.Ensure(0, limit); + for (intptr_t i = 0; i < static_cast<intptr_t>(limit); i++) { + map.set(i, (void*)(i+1)); + CHECK_EQ(map.get(i), (void*)(i+1)); + } + for (intptr_t i = 0; i < static_cast<intptr_t>(limit); i++) { + CHECK_EQ(map.get(i), (void*)(i+1)); + } + } + + // Test that we correctly notice overflow + { + Type map(malloc); + CHECK_EQ(map.Ensure(limit, limit+1), limit_is_below_the_overflow_boundary); + } + + { // Test randomized accesses + srand(301); // srand isn't great, but it's portable + vector<intptr_t> elements; + for (intptr_t i = 0; i < static_cast<intptr_t>(limit); i++) elements.push_back(i); + Permute(&elements); + + Type map(malloc); + for (intptr_t i = 0; i < static_cast<intptr_t>(limit); i++) { + map.Ensure(elements[i], 1); + map.set(elements[i], (void*)(elements[i]+1)); + CHECK_EQ(map.get(elements[i]), (void*)(elements[i]+1)); + } + for (intptr_t i = 0; i < static_cast<intptr_t>(limit); i++) { + CHECK_EQ(map.get(i), (void*)(i+1)); + } + } +} + +// REQUIRES: BITS==10, i.e., valid range is [0,1023]. +// Representations for different types will end up being: +// PageMap1: array[1024] +// PageMap2: array[32][32] +// PageMap3: array[16][16][4] +template <class Type> +void TestNext(const char* name) { + RAW_LOG(ERROR, "Running NextTest %s\n", name); + Type map(malloc); + char a, b, c, d, e; + + // When map is empty + CHECK(map.Next(0) == NULL); + CHECK(map.Next(5) == NULL); + CHECK(map.Next(1<<30) == NULL); + + // Add a single value + map.Ensure(40, 1); + map.set(40, &a); + CHECK(map.Next(0) == &a); + CHECK(map.Next(39) == &a); + CHECK(map.Next(40) == &a); + CHECK(map.Next(41) == NULL); + CHECK(map.Next(1<<30) == NULL); + + // Add a few values + map.Ensure(41, 1); + map.Ensure(100, 3); + map.set(41, &b); + map.set(100, &c); + map.set(101, &d); + map.set(102, &e); + CHECK(map.Next(0) == &a); + CHECK(map.Next(39) == &a); + CHECK(map.Next(40) == &a); + CHECK(map.Next(41) == &b); + CHECK(map.Next(42) == &c); + CHECK(map.Next(63) == &c); + CHECK(map.Next(64) == &c); + CHECK(map.Next(65) == &c); + CHECK(map.Next(99) == &c); + CHECK(map.Next(100) == &c); + CHECK(map.Next(101) == &d); + CHECK(map.Next(102) == &e); + CHECK(map.Next(103) == NULL); +} + +int main(int argc, char** argv) { + TestMap< TCMalloc_PageMap1<10> > (100, true); + TestMap< TCMalloc_PageMap1<10> > (1 << 10, false); + TestMap< TCMalloc_PageMap2<20> > (100, true); + TestMap< TCMalloc_PageMap2<20> > (1 << 20, false); + TestMap< TCMalloc_PageMap3<20> > (100, true); + TestMap< TCMalloc_PageMap3<20> > (1 << 20, false); + + TestNext< TCMalloc_PageMap1<10> >("PageMap1"); + TestNext< TCMalloc_PageMap2<10> >("PageMap2"); + TestNext< TCMalloc_PageMap3<10> >("PageMap3"); + + printf("PASS\n"); + return 0; +} diff --git a/third_party/tcmalloc/chromium/src/tests/profile-handler_unittest.cc b/third_party/tcmalloc/chromium/src/tests/profile-handler_unittest.cc new file mode 100644 index 0000000..1e72b2e --- /dev/null +++ b/third_party/tcmalloc/chromium/src/tests/profile-handler_unittest.cc @@ -0,0 +1,437 @@ +// Copyright 2009 Google Inc. All Rights Reserved. +// Author: Nabeel Mian (nabeelmian@google.com) +// Chris Demetriou (cgd@google.com) +// +// This file contains the unit tests for profile-handler.h interface. + +#include "config.h" +#include "profile-handler.h" + +#include <assert.h> +#include <pthread.h> +#include <sys/time.h> +#include <time.h> +#include "base/logging.h" +#include "base/simple_mutex.h" + +// Some helpful macros for the test class +#define TEST_F(cls, fn) void cls :: fn() + +namespace { + +// TODO(csilvers): error-checking on the pthreads routines +class Thread { + public: + Thread() : joinable_(false) { } + void SetJoinable(bool value) { joinable_ = value; } + void Start() { + pthread_attr_t attr; + pthread_attr_init(&attr); + pthread_attr_setdetachstate(&attr, joinable_ ? PTHREAD_CREATE_JOINABLE + : PTHREAD_CREATE_DETACHED); + pthread_create(&thread_, &attr, &DoRun, this); + pthread_attr_destroy(&attr); + } + void Join() { + assert(joinable_); + pthread_join(thread_, NULL); + } + virtual void Run() = 0; + private: + static void* DoRun(void* cls) { + ProfileHandlerRegisterThread(); + reinterpret_cast<Thread*>(cls)->Run(); + return NULL; + } + pthread_t thread_; + bool joinable_; +}; + +// timespec of the sleep interval. To ensure a SIGPROF timer interrupt under +// heavy load, this is set to a 20x of ProfileHandler timer interval (i.e 100Hz) +// TODO(nabeelmian) Under very heavy loads, the worker thread may not accumulate +// enough cpu usage to get a profile tick. +const struct timespec sleep_interval = { 0, 200000000 }; // 200 ms + +// Whether each thread has separate timers. +static bool timer_separate_ = false; + +// Checks whether the profile timer is enabled for the current thread. +bool IsTimerEnabled() { + itimerval current_timer; + EXPECT_EQ(0, getitimer(ITIMER_PROF, ¤t_timer)); + return (current_timer.it_value.tv_sec != 0 || + current_timer.it_value.tv_usec != 0); +} + +class VirtualTimerGetterThread : public Thread { + public: + VirtualTimerGetterThread() { + memset(&virtual_timer_, 0, sizeof virtual_timer_); + } + struct itimerval virtual_timer_; + + private: + void Run() { + CHECK_EQ(0, getitimer(ITIMER_VIRTUAL, &virtual_timer_)); + } +}; + +// This function checks whether the timers are shared between thread. This +// function spawns a thread, so use it carefully when testing thread-dependent +// behaviour. +static bool threads_have_separate_timers() { + struct itimerval new_timer_val; + + // Enable the virtual timer in the current thread. + memset(&new_timer_val, 0, sizeof new_timer_val); + new_timer_val.it_value.tv_sec = 1000000; // seconds + CHECK_EQ(0, setitimer(ITIMER_VIRTUAL, &new_timer_val, NULL)); + + // Spawn a thread, get the virtual timer's value there. + VirtualTimerGetterThread thread; + thread.SetJoinable(true); + thread.Start(); + thread.Join(); + + // Disable timer here. + memset(&new_timer_val, 0, sizeof new_timer_val); + CHECK_EQ(0, setitimer(ITIMER_VIRTUAL, &new_timer_val, NULL)); + + bool target_timer_enabled = (thread.virtual_timer_.it_value.tv_sec != 0 || + thread.virtual_timer_.it_value.tv_usec != 0); + if (!target_timer_enabled) { + LOG(INFO, "threads have separate timers"); + return true; + } else { + LOG(INFO, "threads have shared timers"); + return false; + } +} + +// Dummy worker thread to accumulate cpu time. +class BusyThread : public Thread { + public: + BusyThread() : stop_work_(false) { + } + + // Setter/Getters + bool stop_work() { + MutexLock lock(&mu_); + return stop_work_; + } + void set_stop_work(bool stop_work) { + MutexLock lock(&mu_); + stop_work_ = stop_work; + } + + private: + // Protects stop_work_ below. + Mutex mu_; + // Whether to stop work? + bool stop_work_; + + // Do work until asked to stop. + void Run() { + while (!stop_work()) { + } + // If timers are separate, check that timer is enabled for this thread. + EXPECT_TRUE(!timer_separate_ || IsTimerEnabled()); + } +}; + +class NullThread : public Thread { + private: + void Run() { + // If timers are separate, check that timer is enabled for this thread. + EXPECT_TRUE(!timer_separate_ || IsTimerEnabled()); + } +}; + +// Signal handler which tracks the profile timer ticks. +static void TickCounter(int sig, siginfo_t* sig_info, void *vuc, + void* tick_counter) { + int* counter = static_cast<int*>(tick_counter); + ++(*counter); +} + +// This class tests the profile-handler.h interface. +class ProfileHandlerTest { + protected: + + // Determines whether threads have separate timers. + static void SetUpTestCase() { + timer_separate_ = threads_have_separate_timers(); + } + + // Sets up the profile timers and SIGPROF handler in a known state. It does + // the following: + // 1. Unregisters all the callbacks, stops the timer (if shared) and + // clears out timer_sharing state in the ProfileHandler. This clears + // out any state left behind by the previous test or during module + // initialization when the test program was started. + // 2. Spawns two threads which will be registered with the ProfileHandler. + // At this time ProfileHandler knows if the timers are shared. + // 3. Starts a busy worker thread to accumulate CPU usage. + virtual void SetUp() { + // Reset the state of ProfileHandler between each test. This unregisters + // all callbacks, stops timer (if shared) and clears timer sharing state. + ProfileHandlerReset(); + EXPECT_EQ(GetCallbackCount(), 0); + VerifyDisabled(); + // ProfileHandler requires at least two threads to be registerd to determine + // whether timers are shared. + RegisterThread(); + RegisterThread(); + // Now that two threads are started, verify that the signal handler is + // disabled and the timers are correctly enabled/disabled. + VerifyDisabled(); + // Start worker to accumulate cpu usage. + StartWorker(); + } + + virtual void TearDown() { + ProfileHandlerReset(); + // Stops the worker thread. + StopWorker(); + } + + // Starts a no-op thread that gets registered with the ProfileHandler. Waits + // for the thread to stop. + void RegisterThread() { + NullThread t; + t.SetJoinable(true); + t.Start(); + t.Join(); + } + + // Starts a busy worker thread to accumulate cpu time. There should be only + // one busy worker running. This is required for the case where there are + // separate timers for each thread. + void StartWorker() { + busy_worker_ = new BusyThread(); + busy_worker_->SetJoinable(true); + busy_worker_->Start(); + // Wait for worker to start up and register with the ProfileHandler. + // TODO(nabeelmian) This may not work under very heavy load. + nanosleep(&sleep_interval, NULL); + } + + // Stops the worker thread. + void StopWorker() { + busy_worker_->set_stop_work(true); + busy_worker_->Join(); + delete busy_worker_; + } + + // Checks whether SIGPROF signal handler is enabled. + bool IsSignalEnabled() { + struct sigaction sa; + CHECK_EQ(sigaction(SIGPROF, NULL, &sa), 0); + return ((sa.sa_handler == SIG_IGN) || (sa.sa_handler == SIG_DFL)) ? + false : true; + } + + // Gets the number of callbacks registered with the ProfileHandler. + uint32 GetCallbackCount() { + ProfileHandlerState state; + ProfileHandlerGetState(&state); + return state.callback_count; + } + + // Gets the current ProfileHandler interrupt count. + uint64 GetInterruptCount() { + ProfileHandlerState state; + ProfileHandlerGetState(&state); + return state.interrupts; + } + + // Verifies that a callback is correctly registered and receiving + // profile ticks. + void VerifyRegistration(const int& tick_counter) { + // Check the callback count. + EXPECT_GT(GetCallbackCount(), 0); + // Check that the profile timer is enabled. + EXPECT_TRUE(IsTimerEnabled()); + // Check that the signal handler is enabled. + EXPECT_TRUE(IsSignalEnabled()); + uint64 interrupts_before = GetInterruptCount(); + // Sleep for a bit and check that tick counter is making progress. + int old_tick_count = tick_counter; + nanosleep(&sleep_interval, NULL); + int new_tick_count = tick_counter; + EXPECT_GT(new_tick_count, old_tick_count); + uint64 interrupts_after = GetInterruptCount(); + EXPECT_GT(interrupts_after, interrupts_before); + } + + // Verifies that a callback is not receiving profile ticks. + void VerifyUnregistration(const int& tick_counter) { + // Sleep for a bit and check that tick counter is not making progress. + int old_tick_count = tick_counter; + nanosleep(&sleep_interval, NULL); + int new_tick_count = tick_counter; + EXPECT_EQ(new_tick_count, old_tick_count); + // If no callbacks, signal handler and shared timer should be disabled. + if (GetCallbackCount() == 0) { + EXPECT_FALSE(IsSignalEnabled()); + if (timer_separate_) { + EXPECT_TRUE(IsTimerEnabled()); + } else { + EXPECT_FALSE(IsTimerEnabled()); + } + } + } + + // Verifies that the SIGPROF interrupt handler is disabled and the timer, + // if shared, is disabled. Expects the worker to be running. + void VerifyDisabled() { + // Check that the signal handler is disabled. + EXPECT_FALSE(IsSignalEnabled()); + // Check that the callback count is 0. + EXPECT_EQ(GetCallbackCount(), 0); + // Check that the timer is disabled if shared, enabled otherwise. + if (timer_separate_) { + EXPECT_TRUE(IsTimerEnabled()); + } else { + EXPECT_FALSE(IsTimerEnabled()); + } + // Verify that the ProfileHandler is not accumulating profile ticks. + uint64 interrupts_before = GetInterruptCount(); + nanosleep(&sleep_interval, NULL); + uint64 interrupts_after = GetInterruptCount(); + EXPECT_EQ(interrupts_after, interrupts_before); + } + + // Busy worker thread to accumulate cpu usage. + BusyThread* busy_worker_; + + private: + // The tests to run + void RegisterUnregisterCallback(); + void MultipleCallbacks(); + void Reset(); + void RegisterCallbackBeforeThread(); + + public: +#define RUN(test) do { \ + printf("Running %s\n", #test); \ + ProfileHandlerTest pht; \ + pht.SetUp(); \ + pht.test(); \ + pht.TearDown(); \ +} while (0) + + static int RUN_ALL_TESTS() { + SetUpTestCase(); + RUN(RegisterUnregisterCallback); + RUN(MultipleCallbacks); + RUN(Reset); + RUN(RegisterCallbackBeforeThread); + printf("Done\n"); + return 0; + } +}; + +// Verifies ProfileHandlerRegisterCallback and +// ProfileHandlerUnregisterCallback. +TEST_F(ProfileHandlerTest, RegisterUnregisterCallback) { + int tick_count = 0; + ProfileHandlerToken* token = ProfileHandlerRegisterCallback( + TickCounter, &tick_count); + VerifyRegistration(tick_count); + ProfileHandlerUnregisterCallback(token); + VerifyUnregistration(tick_count); +} + +// Verifies that multiple callbacks can be registered. +TEST_F(ProfileHandlerTest, MultipleCallbacks) { + // Register first callback. + int first_tick_count; + ProfileHandlerToken* token1 = ProfileHandlerRegisterCallback( + TickCounter, &first_tick_count); + // Check that callback was registered correctly. + VerifyRegistration(first_tick_count); + EXPECT_EQ(GetCallbackCount(), 1); + + // Register second callback. + int second_tick_count; + ProfileHandlerToken* token2 = ProfileHandlerRegisterCallback( + TickCounter, &second_tick_count); + // Check that callback was registered correctly. + VerifyRegistration(second_tick_count); + EXPECT_EQ(GetCallbackCount(), 2); + + // Unregister first callback. + ProfileHandlerUnregisterCallback(token1); + VerifyUnregistration(first_tick_count); + EXPECT_EQ(GetCallbackCount(), 1); + // Verify that second callback is still registered. + VerifyRegistration(second_tick_count); + + // Unregister second callback. + ProfileHandlerUnregisterCallback(token2); + VerifyUnregistration(second_tick_count); + EXPECT_EQ(GetCallbackCount(), 0); + + // Verify that the signal handler and timers are correctly disabled. + VerifyDisabled(); +} + +// Verifies ProfileHandlerReset +TEST_F(ProfileHandlerTest, Reset) { + // Verify that the profile timer interrupt is disabled. + VerifyDisabled(); + int first_tick_count; + ProfileHandlerRegisterCallback(TickCounter, &first_tick_count); + VerifyRegistration(first_tick_count); + EXPECT_EQ(GetCallbackCount(), 1); + + // Register second callback. + int second_tick_count; + ProfileHandlerRegisterCallback(TickCounter, &second_tick_count); + VerifyRegistration(second_tick_count); + EXPECT_EQ(GetCallbackCount(), 2); + + // Reset the profile handler and verify that callback were correctly + // unregistered and timer/signal are disabled. + ProfileHandlerReset(); + VerifyUnregistration(first_tick_count); + VerifyUnregistration(second_tick_count); + VerifyDisabled(); +} + +// Verifies that ProfileHandler correctly handles a case where a callback was +// registered before the second thread started. +TEST_F(ProfileHandlerTest, RegisterCallbackBeforeThread) { + // Stop the worker. + StopWorker(); + // Unregister all existing callbacks, stop the timer (if shared), disable + // the signal handler and reset the timer sharing state in the Profile + // Handler. + ProfileHandlerReset(); + EXPECT_EQ(GetCallbackCount(), 0); + VerifyDisabled(); + + // Start the worker. At this time ProfileHandler doesn't know if timers are + // shared as only one thread has registered so far. + StartWorker(); + // Register a callback and check that profile ticks are being delivered. + int tick_count; + ProfileHandlerRegisterCallback(TickCounter, &tick_count); + EXPECT_EQ(GetCallbackCount(), 1); + VerifyRegistration(tick_count); + + // Register a second thread and verify that timer and signal handler are + // correctly enabled. + RegisterThread(); + EXPECT_EQ(GetCallbackCount(), 1); + EXPECT_TRUE(IsTimerEnabled()); + EXPECT_TRUE(IsSignalEnabled()); +} + +} // namespace + +int main(int argc, char** argv) { + return ProfileHandlerTest::RUN_ALL_TESTS(); +} diff --git a/third_party/tcmalloc/chromium/src/tests/profiledata_unittest.cc b/third_party/tcmalloc/chromium/src/tests/profiledata_unittest.cc new file mode 100644 index 0000000..f569f64 --- /dev/null +++ b/third_party/tcmalloc/chromium/src/tests/profiledata_unittest.cc @@ -0,0 +1,610 @@ +// Copyright (c) 2007, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// --- +// Author: Chris Demetriou +// +// This file contains the unit tests for the ProfileData class. + +#if defined HAVE_STDINT_H +#include <stdint.h> // to get uintptr_t +#elif defined HAVE_INTTYPES_H +#include <inttypes.h> // another place uintptr_t might be defined +#endif +#include <sys/stat.h> +#include <sys/types.h> +#include <fcntl.h> +#include <string.h> +#include <string> + +#include "profiledata.h" + +#include "base/commandlineflags.h" +#include "base/logging.h" + +using std::string; + +// Some helpful macros for the test class +#define TEST_F(cls, fn) void cls :: fn() + +namespace { + +template<typename T> class scoped_array { + public: + scoped_array(T* data) : data_(data) { } + ~scoped_array() { delete[] data_; } + T* get() { return data_; } + T& operator[](int i) { return data_[i]; } + private: + T* const data_; +}; + +// Re-runs fn until it doesn't cause EINTR. +#define NO_INTR(fn) do {} while ((fn) < 0 && errno == EINTR) + +// Read up to "count" bytes from file descriptor "fd" into the buffer +// starting at "buf" while handling short reads and EINTR. On +// success, return the number of bytes read. Otherwise, return -1. +static ssize_t ReadPersistent(const int fd, void *buf, const size_t count) { + CHECK_GE(fd, 0); + char *buf0 = reinterpret_cast<char *>(buf); + ssize_t num_bytes = 0; + while (num_bytes < count) { + ssize_t len; + NO_INTR(len = read(fd, buf0 + num_bytes, count - num_bytes)); + if (len < 0) { // There was an error other than EINTR. + return -1; + } + if (len == 0) { // Reached EOF. + break; + } + num_bytes += len; + } + CHECK(num_bytes <= count); + return num_bytes; +} + +// Thin wrapper around a file descriptor so that the file descriptor +// gets closed for sure. +struct FileDescriptor { + const int fd_; + explicit FileDescriptor(int fd) : fd_(fd) {} + ~FileDescriptor() { + if (fd_ >= 0) { + NO_INTR(close(fd_)); + } + } + int get() { return fd_; } +}; + +// must be the same as with ProfileData::Slot. +typedef uintptr_t ProfileDataSlot; + +// Quick and dirty function to make a number into a void* for use in a +// sample. +inline void* V(intptr_t x) { return reinterpret_cast<void*>(x); } + +// String returned by ProfileDataChecker helper functions to indicate success. +const char kNoError[] = ""; + +class ProfileDataChecker { + public: + ProfileDataChecker() { + const char* tmpdir = getenv("TMPDIR"); + if (tmpdir == NULL) + tmpdir = "/tmp"; + mkdir(tmpdir, 0755); // if necessary + filename_ = string(tmpdir) + "/profiledata_unittest.tmp"; + } + + string filename() const { return filename_; } + + // Checks the first 'num_slots' profile data slots in the file + // against the data pointed to by 'slots'. Returns kNoError if the + // data matched, otherwise returns an indication of the cause of the + // mismatch. + string Check(const ProfileDataSlot* slots, int num_slots) { + return CheckWithSkips(slots, num_slots, NULL, 0); + } + + // Checks the first 'num_slots' profile data slots in the file + // against the data pointed to by 'slots', skipping over entries + // described by 'skips' and 'num_skips'. + // + // 'skips' must be a sorted list of (0-based) slot numbers to be + // skipped, of length 'num_skips'. Note that 'num_slots' includes + // any skipped slots, i.e., the first 'num_slots' profile data slots + // will be considered, but some may be skipped. + // + // Returns kNoError if the data matched, otherwise returns an + // indication of the cause of the mismatch. + string CheckWithSkips(const ProfileDataSlot* slots, int num_slots, + const int* skips, int num_skips); + + // Validate that a profile is correctly formed. The profile is + // assumed to have been created by the same kind of binary (e.g., + // same slot size, same endian, etc.) as is validating the profile. + // + // Returns kNoError if the profile appears valid, otherwise returns + // an indication of the problem with the profile. + string ValidateProfile(); + + private: + string filename_; +}; + +string ProfileDataChecker::CheckWithSkips(const ProfileDataSlot* slots, + int num_slots, const int* skips, + int num_skips) { + FileDescriptor fd(open(filename_.c_str(), O_RDONLY)); + if (fd.get() < 0) + return "file open error"; + + scoped_array<ProfileDataSlot> filedata(new ProfileDataSlot[num_slots]); + size_t expected_bytes = num_slots * sizeof filedata[0]; + ssize_t bytes_read = ReadPersistent(fd.get(), filedata.get(), expected_bytes); + if (expected_bytes != bytes_read) + return "file too small"; + + for (int i = 0; i < num_slots; i++) { + if (num_skips > 0 && *skips == i) { + num_skips--; + skips++; + continue; + } + if (slots[i] != filedata[i]) + return "data mismatch"; + } + return kNoError; +} + +string ProfileDataChecker::ValidateProfile() { + FileDescriptor fd(open(filename_.c_str(), O_RDONLY)); + if (fd.get() < 0) + return "file open error"; + + struct stat statbuf; + if (fstat(fd.get(), &statbuf) != 0) + return "fstat error"; + if (statbuf.st_size != static_cast<ssize_t>(statbuf.st_size)) + return "file impossibly large"; + ssize_t filesize = statbuf.st_size; + + scoped_array<char> filedata(new char[filesize]); + if (ReadPersistent(fd.get(), filedata.get(), filesize) != filesize) + return "read of whole file failed"; + + // Must have enough data for the header and the trailer. + if (filesize < (5 + 3) * sizeof(ProfileDataSlot)) + return "not enough data in profile for header + trailer"; + + // Check the header + if (reinterpret_cast<ProfileDataSlot*>(filedata.get())[0] != 0) + return "error in header: non-zero count"; + if (reinterpret_cast<ProfileDataSlot*>(filedata.get())[1] != 3) + return "error in header: num_slots != 3"; + if (reinterpret_cast<ProfileDataSlot*>(filedata.get())[2] != 0) + return "error in header: non-zero format version"; + // Period (slot 3) can have any value. + if (reinterpret_cast<ProfileDataSlot*>(filedata.get())[4] != 0) + return "error in header: non-zero padding value"; + ssize_t cur_offset = 5 * sizeof(ProfileDataSlot); + + // While there are samples, skip them. Each sample consists of + // at least three slots. + bool seen_trailer = false; + while (!seen_trailer) { + if (cur_offset > filesize - 3 * sizeof(ProfileDataSlot)) + return "truncated sample header"; + ProfileDataSlot* sample = + reinterpret_cast<ProfileDataSlot*>(filedata.get() + cur_offset); + ProfileDataSlot slots_this_sample = 2 + sample[1]; + ssize_t size_this_sample = slots_this_sample * sizeof(ProfileDataSlot); + if (cur_offset > filesize - size_this_sample) + return "truncated sample"; + + if (sample[0] == 0 && sample[1] == 1 && sample[2] == 0) { + seen_trailer = true; + } else { + if (sample[0] < 1) + return "error in sample: sample count < 1"; + if (sample[1] < 1) + return "error in sample: num_pcs < 1"; + for (int i = 2; i < slots_this_sample; i++) { + if (sample[i] == 0) + return "error in sample: NULL PC"; + } + } + cur_offset += size_this_sample; + } + + // There must be at least one line in the (text) list of mapped objects, + // and it must be terminated by a newline. Note, the use of newline + // here and below Might not be reasonable on non-UNIX systems. + if (cur_offset >= filesize) + return "no list of mapped objects"; + if (filedata[filesize - 1] != '\n') + return "profile did not end with a complete line"; + + while (cur_offset < filesize) { + char* line_start = filedata.get() + cur_offset; + + // Find the end of the line, and replace it with a NUL for easier + // scanning. + char* line_end = strchr(line_start, '\n'); + *line_end = '\0'; + + // Advance past any leading space. It's allowed in some lines, + // but not in others. + bool has_leading_space = false; + char* line_cur = line_start; + while (*line_cur == ' ') { + has_leading_space = true; + line_cur++; + } + + bool found_match = false; + + // Check for build lines. + if (!found_match) { + found_match = (strncmp(line_cur, "build=", 6) == 0); + // Anything may follow "build=", and leading space is allowed. + } + + // A line from ProcMapsIterator::FormatLine, of the form: + // + // 40000000-40015000 r-xp 00000000 03:01 12845071 /lib/ld-2.3.2.so + // + // Leading space is not allowed. The filename may be omitted or + // may consist of multiple words, so we scan only up to the + // space before the filename. + if (!found_match) { + int chars_scanned = -1; + sscanf(line_cur, "%*x-%*x %*c%*c%*c%*c %*x %*x:%*x %*d %n", + &chars_scanned); + found_match = (chars_scanned > 0 && !has_leading_space); + } + + // A line from DumpAddressMap, of the form: + // + // 40000000-40015000: /lib/ld-2.3.2.so + // + // Leading space is allowed. The filename may be omitted or may + // consist of multiple words, so we scan only up to the space + // before the filename. + if (!found_match) { + int chars_scanned = -1; + sscanf(line_cur, "%*x-%*x: %n", &chars_scanned); + found_match = (chars_scanned > 0); + } + + if (!found_match) + return "unrecognized line in text section"; + + cur_offset += (line_end - line_start) + 1; + } + + return kNoError; +} + +class ProfileDataTest { + protected: + void ExpectStopped() { + EXPECT_FALSE(collector_.enabled()); + } + + void ExpectRunningSamples(int samples) { + ProfileData::State state; + collector_.GetCurrentState(&state); + EXPECT_TRUE(state.enabled); + EXPECT_EQ(samples, state.samples_gathered); + } + + void ExpectSameState(const ProfileData::State& before, + const ProfileData::State& after) { + EXPECT_EQ(before.enabled, after.enabled); + EXPECT_EQ(before.samples_gathered, after.samples_gathered); + EXPECT_EQ(before.start_time, after.start_time); + EXPECT_STREQ(before.profile_name, after.profile_name); + } + + ProfileData collector_; + ProfileDataChecker checker_; + + private: + // The tests to run + void OpsWhenStopped(); + void StartStopEmpty(); + void StartStopNoOptionsEmpty(); + void StartWhenStarted(); + void StartStopEmpty2(); + void CollectOne(); + void CollectTwoMatching(); + void CollectTwoFlush(); + void StartResetRestart(); + + public: +#define RUN(test) do { \ + printf("Running %s\n", #test); \ + ProfileDataTest pdt; \ + pdt.test(); \ +} while (0) + + static int RUN_ALL_TESTS() { + RUN(OpsWhenStopped); + RUN(StartStopEmpty); + RUN(StartWhenStarted); + RUN(StartStopEmpty2); + RUN(CollectOne); + RUN(CollectTwoMatching); + RUN(CollectTwoFlush); + RUN(StartResetRestart); + return 0; + } +}; + +// Check that various operations are safe when stopped. +TEST_F(ProfileDataTest, OpsWhenStopped) { + ExpectStopped(); + EXPECT_FALSE(collector_.enabled()); + + // Verify that state is disabled, all-empty/all-0 + ProfileData::State state_before; + collector_.GetCurrentState(&state_before); + EXPECT_FALSE(state_before.enabled); + EXPECT_EQ(0, state_before.samples_gathered); + EXPECT_EQ(0, state_before.start_time); + EXPECT_STREQ("", state_before.profile_name); + + // Safe to call stop again. + collector_.Stop(); + + // Safe to call FlushTable. + collector_.FlushTable(); + + // Safe to call Add. + const void *trace[] = { V(100), V(101), V(102), V(103), V(104) }; + collector_.Add(arraysize(trace), trace); + + ProfileData::State state_after; + collector_.GetCurrentState(&state_after); + + ExpectSameState(state_before, state_after); +} + +// Start and Stop, collecting no samples. Verify output contents. +TEST_F(ProfileDataTest, StartStopEmpty) { + const int frequency = 1; + ProfileDataSlot slots[] = { + 0, 3, 0, 1000000 / frequency, 0, // binary header + 0, 1, 0 // binary trailer + }; + + ExpectStopped(); + ProfileData::Options options; + options.set_frequency(frequency); + EXPECT_TRUE(collector_.Start(checker_.filename().c_str(), options)); + ExpectRunningSamples(0); + collector_.Stop(); + ExpectStopped(); + EXPECT_EQ(kNoError, checker_.ValidateProfile()); + EXPECT_EQ(kNoError, checker_.Check(slots, arraysize(slots))); +} + +// Start and Stop with no options, collecting no samples. Verify +// output contents. +TEST_F(ProfileDataTest, StartStopNoOptionsEmpty) { + // We're not requesting a specific period, implementation can do + // whatever it likes. + ProfileDataSlot slots[] = { + 0, 3, 0, 0 /* skipped */, 0, // binary header + 0, 1, 0 // binary trailer + }; + int slots_to_skip[] = { 3 }; + + ExpectStopped(); + EXPECT_TRUE(collector_.Start(checker_.filename().c_str(), + ProfileData::Options())); + ExpectRunningSamples(0); + collector_.Stop(); + ExpectStopped(); + EXPECT_EQ(kNoError, checker_.ValidateProfile()); + EXPECT_EQ(kNoError, checker_.CheckWithSkips(slots, arraysize(slots), + slots_to_skip, + arraysize(slots_to_skip))); +} + +// Start after already started. Should return false and not impact +// collected data or state. +TEST_F(ProfileDataTest, StartWhenStarted) { + const int frequency = 1; + ProfileDataSlot slots[] = { + 0, 3, 0, 1000000 / frequency, 0, // binary header + 0, 1, 0 // binary trailer + }; + + ProfileData::Options options; + options.set_frequency(frequency); + EXPECT_TRUE(collector_.Start(checker_.filename().c_str(), options)); + + ProfileData::State state_before; + collector_.GetCurrentState(&state_before); + + options.set_frequency(frequency * 2); + CHECK(!collector_.Start("foobar", options)); + + ProfileData::State state_after; + collector_.GetCurrentState(&state_after); + ExpectSameState(state_before, state_after); + + collector_.Stop(); + ExpectStopped(); + EXPECT_EQ(kNoError, checker_.ValidateProfile()); + EXPECT_EQ(kNoError, checker_.Check(slots, arraysize(slots))); +} + +// Like StartStopEmpty, but uses a different file name and frequency. +TEST_F(ProfileDataTest, StartStopEmpty2) { + const int frequency = 2; + ProfileDataSlot slots[] = { + 0, 3, 0, 1000000 / frequency, 0, // binary header + 0, 1, 0 // binary trailer + }; + + ExpectStopped(); + ProfileData::Options options; + options.set_frequency(frequency); + EXPECT_TRUE(collector_.Start(checker_.filename().c_str(), options)); + ExpectRunningSamples(0); + collector_.Stop(); + ExpectStopped(); + EXPECT_EQ(kNoError, checker_.ValidateProfile()); + EXPECT_EQ(kNoError, checker_.Check(slots, arraysize(slots))); +} + +TEST_F(ProfileDataTest, CollectOne) { + const int frequency = 2; + ProfileDataSlot slots[] = { + 0, 3, 0, 1000000 / frequency, 0, // binary header + 1, 5, 100, 101, 102, 103, 104, // our sample + 0, 1, 0 // binary trailer + }; + + ExpectStopped(); + ProfileData::Options options; + options.set_frequency(frequency); + EXPECT_TRUE(collector_.Start(checker_.filename().c_str(), options)); + ExpectRunningSamples(0); + + const void *trace[] = { V(100), V(101), V(102), V(103), V(104) }; + collector_.Add(arraysize(trace), trace); + ExpectRunningSamples(1); + + collector_.Stop(); + ExpectStopped(); + EXPECT_EQ(kNoError, checker_.ValidateProfile()); + EXPECT_EQ(kNoError, checker_.Check(slots, arraysize(slots))); +} + +TEST_F(ProfileDataTest, CollectTwoMatching) { + const int frequency = 2; + ProfileDataSlot slots[] = { + 0, 3, 0, 1000000 / frequency, 0, // binary header + 2, 5, 100, 201, 302, 403, 504, // our two samples + 0, 1, 0 // binary trailer + }; + + ExpectStopped(); + ProfileData::Options options; + options.set_frequency(frequency); + EXPECT_TRUE(collector_.Start(checker_.filename().c_str(), options)); + ExpectRunningSamples(0); + + for (int i = 0; i < 2; ++i) { + const void *trace[] = { V(100), V(201), V(302), V(403), V(504) }; + collector_.Add(arraysize(trace), trace); + ExpectRunningSamples(i + 1); + } + + collector_.Stop(); + ExpectStopped(); + EXPECT_EQ(kNoError, checker_.ValidateProfile()); + EXPECT_EQ(kNoError, checker_.Check(slots, arraysize(slots))); +} + +TEST_F(ProfileDataTest, CollectTwoFlush) { + const int frequency = 2; + ProfileDataSlot slots[] = { + 0, 3, 0, 1000000 / frequency, 0, // binary header + 1, 5, 100, 201, 302, 403, 504, // first sample (flushed) + 1, 5, 100, 201, 302, 403, 504, // second identical sample + 0, 1, 0 // binary trailer + }; + + ExpectStopped(); + ProfileData::Options options; + options.set_frequency(frequency); + EXPECT_TRUE(collector_.Start(checker_.filename().c_str(), options)); + ExpectRunningSamples(0); + + const void *trace[] = { V(100), V(201), V(302), V(403), V(504) }; + + collector_.Add(arraysize(trace), trace); + ExpectRunningSamples(1); + collector_.FlushTable(); + + collector_.Add(arraysize(trace), trace); + ExpectRunningSamples(2); + + collector_.Stop(); + ExpectStopped(); + EXPECT_EQ(kNoError, checker_.ValidateProfile()); + EXPECT_EQ(kNoError, checker_.Check(slots, arraysize(slots))); +} + +// Start then reset, verify that the result is *not* a valid profile. +// Then start again and make sure the result is OK. +TEST_F(ProfileDataTest, StartResetRestart) { + ExpectStopped(); + ProfileData::Options options; + options.set_frequency(1); + EXPECT_TRUE(collector_.Start(checker_.filename().c_str(), options)); + ExpectRunningSamples(0); + collector_.Reset(); + ExpectStopped(); + // We expect the resulting file to be empty. This is a minimal test + // of ValidateProfile. + EXPECT_NE(kNoError, checker_.ValidateProfile()); + + struct stat statbuf; + EXPECT_EQ(0, stat(checker_.filename().c_str(), &statbuf)); + EXPECT_EQ(0, statbuf.st_size); + + const int frequency = 2; // Different frequency than used above. + ProfileDataSlot slots[] = { + 0, 3, 0, 1000000 / frequency, 0, // binary header + 0, 1, 0 // binary trailer + }; + + options.set_frequency(frequency); + EXPECT_TRUE(collector_.Start(checker_.filename().c_str(), options)); + ExpectRunningSamples(0); + collector_.Stop(); + ExpectStopped(); + EXPECT_EQ(kNoError, checker_.ValidateProfile()); + EXPECT_EQ(kNoError, checker_.Check(slots, arraysize(slots))); +} + +} // namespace + +int main(int argc, char** argv) { + int rc = ProfileDataTest::RUN_ALL_TESTS(); + printf("%s\n", rc == 0 ? "PASS" : "FAIL"); + return rc; +} diff --git a/third_party/tcmalloc/chromium/src/tests/profiler_unittest.cc b/third_party/tcmalloc/chromium/src/tests/profiler_unittest.cc new file mode 100644 index 0000000..1908b03 --- /dev/null +++ b/third_party/tcmalloc/chromium/src/tests/profiler_unittest.cc @@ -0,0 +1,144 @@ +// Copyright (c) 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Craig Silverstein +// +// Does some simple arithmetic and a few libc routines, so we can profile it. +// Define WITH_THREADS to add pthread functionality as well (otherwise, btw, +// the num_threads argument to this program is ingored). + +#include "config_for_unittests.h" +#include <stdio.h> +#include <stdlib.h> +#ifdef HAVE_UNISTD_H +#include <unistd.h> // for fork() +#endif +#include <sys/wait.h> // for wait() +#include "google/profiler.h" +#include "base/simple_mutex.h" +#include "tests/testutil.h" + +static int result = 0; +static int g_iters = 0; // argv[1] + +Mutex mutex(Mutex::LINKER_INITIALIZED); + +static void test_other_thread() { +#ifndef NO_THREADS + ProfilerRegisterThread(); + + int i, m; + char b[128]; + for (m = 0; m < 1000000; ++m) { // run millions of times + for (i = 0; i < g_iters; ++i ) { + MutexLock ml(&mutex); + result ^= i; + } + MutexLock ml(&mutex); + snprintf(b, sizeof(b), "%d", result); // get some libc action + } +#endif +} + +static void test_main_thread() { + int i, m; + char b[128]; + for (m = 0; m < 1000000; ++m) { // run millions of times + for (i = 0; i < g_iters; ++i ) { + MutexLock ml(&mutex); + result ^= i; + } + MutexLock ml(&mutex); + snprintf(b, sizeof(b), "%d", result); // get some libc action + } +} + +int main(int argc, char** argv) { + if ( argc <= 1 ) { + fprintf(stderr, "USAGE: %s <iters> [num_threads] [filename]\n", argv[0]); + fprintf(stderr, " iters: How many million times to run the XOR test.\n"); + fprintf(stderr, " num_threads: how many concurrent threads.\n"); + fprintf(stderr, " 0 or 1 for single-threaded mode,\n"); + fprintf(stderr, " -# to fork instead of thread.\n"); + fprintf(stderr, " filename: The name of the output profile.\n"); + fprintf(stderr, (" If you don't specify, set CPUPROFILE " + "in the environment instead!\n")); + return 1; + } + + g_iters = atoi(argv[1]); + int num_threads = 1; + const char* filename = NULL; + if (argc > 2) { + num_threads = atoi(argv[2]); + } + if (argc > 3) { + filename = argv[3]; + } + + if (filename) { + ProfilerStart(filename); + } + + test_main_thread(); + + ProfilerFlush(); // just because we can + + // The other threads, if any, will run only half as long as the main thread + RunManyThreads(test_other_thread, num_threads); + + // Or maybe they asked to fork. The fork test is only interesting + // when we use CPUPROFILE to name, so check for that +#ifdef HAVE_UNISTD_H + for (; num_threads < 0; ++num_threads) { // -<num_threads> to fork + if (filename) { + printf("FORK test only makes sense when no filename is specified.\n"); + return 2; + } + switch (fork()) { + case -1: + printf("FORK failed!\n"); + return 1; + case 0: // child + return execl(argv[0], argv[0], argv[1], NULL); + default: + wait(NULL); // we'll let the kids run one at a time + } + } +#endif + + test_main_thread(); + + if (filename) { + ProfilerStop(); + } + + return 0; +} diff --git a/third_party/tcmalloc/chromium/src/tests/profiler_unittest.sh b/third_party/tcmalloc/chromium/src/tests/profiler_unittest.sh new file mode 100644 index 0000000..73be680 --- /dev/null +++ b/third_party/tcmalloc/chromium/src/tests/profiler_unittest.sh @@ -0,0 +1,256 @@ +#!/bin/sh + +# Copyright (c) 2005, Google Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# --- +# Author: Craig Silverstein +# +# Runs the 4 profiler unittests and makes sure their profiles look +# appropriate. We expect two commandline args, as described below. +# +# We run under the assumption that if $PROFILER1 is run with no +# arguments, it prints a usage line of the form +# USAGE: <actual executable being run> [...] +# +# This is because libtool sometimes turns the 'executable' into a +# shell script which runs an actual binary somewhere else. + +# We expect BINDIR and PPROF_PATH to be set in the environment. +# If not, we set them to some reasonable values +BINDIR="${BINDIR:-.}" +PPROF_PATH="${PPROF_PATH:-$BINDIR/src/pprof}" + +if [ "x$1" = "x-h" -o "x$1" = "x--help" ]; then + echo "USAGE: $0 [unittest dir] [path to pprof]" + echo " By default, unittest_dir=$BINDIR, pprof_path=$PPROF_PATH" + exit 1 +fi + +TMPDIR=/tmp/profile_info + +UNITTEST_DIR=${1:-$BINDIR} +PPROF=${2:-$PPROF_PATH} + +# We test the sliding-window functionality of the cpu-profile reader +# by using a small stride, forcing lots of reads. +PPROF_FLAGS="--test_stride=128" + +PROFILER1="$UNITTEST_DIR/profiler1_unittest" +PROFILER2="$UNITTEST_DIR/profiler2_unittest" +PROFILER3="$UNITTEST_DIR/profiler3_unittest" +PROFILER4="$UNITTEST_DIR/profiler4_unittest" + +# Unfortunately, for us, libtool can replace executables with a shell +# script that does some work before calling the 'real' executable +# under a different name. We need the 'real' executable name to run +# pprof on it. We've constructed all the binaries used in this +# unittest so when they are called with no arguments, they report +# their argv[0], which is the real binary name. +Realname() { + "$1" 2>&1 | awk '{print $2; exit;}' +} + +PROFILER1_REALNAME=`Realname "$PROFILER1"` +PROFILER2_REALNAME=`Realname "$PROFILER2"` +PROFILER3_REALNAME=`Realname "$PROFILER3"` +PROFILER4_REALNAME=`Realname "$PROFILER4"` + +# It's meaningful to the profiler, so make sure we know its state +unset CPUPROFILE + +rm -rf "$TMPDIR" +mkdir "$TMPDIR" || exit 2 + +num_failures=0 + +RegisterFailure() { + num_failures=`expr $num_failures + 1` +} + +# Takes two filenames representing profiles, with their executable scripts, +# and a multiplier, and verifies that the 'contentful' functions in +# each profile take the same time (possibly scaled by the given +# multiplier). It used to be "same" meant within 50%, after adding an +# noise-reducing X units to each value. But even that would often +# spuriously fail, so now it's "both non-zero". We're pretty forgiving. +VerifySimilar() { + prof1="$TMPDIR/$1" + exec1="$2" + prof2="$TMPDIR/$3" + exec2="$4" + mult="$5" + + # We are careful not to put exec1 and exec2 in quotes, because if + # they are the empty string, it means we want to use the 1-arg + # version of pprof. + mthread1=`"$PPROF" $PPROF_FLAGS $exec1 "$prof1" | grep test_main_thread | awk '{print $1}'` + mthread2=`"$PPROF" $PPROF_FLAGS $exec2 "$prof2" | grep test_main_thread | awk '{print $1}'` + mthread1_plus=`expr $mthread1 + 5` + mthread2_plus=`expr $mthread2 + 5` + if [ -z "$mthread1" ] || [ -z "$mthread2" ] || \ + [ "$mthread1" -le 0 -o "$mthread2" -le 0 ] +# || [ `expr $mthread1_plus \* $mult` -gt `expr $mthread2_plus \* 2` -o \ +# `expr $mthread1_plus \* $mult \* 2` -lt `expr $mthread2_plus` ] + then + echo + echo ">>> profile on $exec1 vs $exec2 with multiplier $mult failed:" + echo "Actual times (in profiling units) were '$mthread1' vs. '$mthread2'" + echo + RegisterFailure + fi +} + +# Takes two filenames representing profiles, and optionally their +# executable scripts (these may be empty if the profiles include +# symbols), and verifies that the two profiles are identical. +VerifyIdentical() { + prof1="$TMPDIR/$1" + exec1="$2" + prof2="$TMPDIR/$3" + exec2="$4" + + # We are careful not to put exec1 and exec2 in quotes, because if + # they are the empty string, it means we want to use the 1-arg + # version of pprof. + "$PPROF" $PPROF_FLAGS $exec1 "$prof1" > "$TMPDIR/out1" + "$PPROF" $PPROF_FLAGS $exec2 "$prof2" > "$TMPDIR/out2" + diff=`diff "$TMPDIR/out1" "$TMPDIR/out2"` + + if [ ! -z "$diff" ]; then + echo + echo ">>> profile doesn't match, args: $exec1 $prof1 vs. $exec2 $prof2" + echo ">>> Diff:" + echo "$diff" + echo + RegisterFailure + fi +} + +# Takes a filename representing a profile, with its executable, +# and a multiplier, and verifies that the main-thread function takes +# the same amount of time as the other-threads function (possibly scaled +# by the given multiplier). Figuring out the multiplier can be tricky, +# since by design the main thread runs twice as long as each of the +# 'other' threads! It used to be "same" meant within 50%, after adding an +# noise-reducing X units to each value. But even that would often +# spuriously fail, so now it's "both non-zero". We're pretty forgiving. +VerifyAcrossThreads() { + prof1="$TMPDIR/$1" + # We need to run the script with no args to get the actual exe name + exec1="$2" + mult="$3" + + # We are careful not to put exec1 in quotes, because if it is the + # empty string, it means we want to use the 1-arg version of pprof. + mthread=`$PPROF $PPROF_FLAGS $exec1 "$prof1" | grep test_main_thread | awk '{print $1}'` + othread=`$PPROF $PPROF_FLAGS $exec1 "$prof1" | grep test_other_thread | awk '{print $1}'` + if [ -z "$mthread" ] || [ -z "$othread" ] || \ + [ "$mthread" -le 0 -o "$othread" -le 0 ] +# || [ `expr $mthread \* $mult \* 3` -gt `expr $othread \* 10` -o \ +# `expr $mthread \* $mult \* 10` -lt `expr $othread \* 3` ] + then + echo + echo ">>> profile on $exec1 (main vs thread) with multiplier $mult failed:" + echo "Actual times (in profiling units) were '$mthread' vs. '$othread'" + echo + RegisterFailure + fi +} + +echo +echo ">>> WARNING <<<" +echo "This test looks at timing information to determine correctness." +echo "If your system is loaded, the test may spuriously fail." +echo "If the test does fail with an 'Actual times' error, try running again." +echo + +# profiler1 is a non-threaded version +"$PROFILER1" 50 1 "$TMPDIR/p1" || RegisterFailure +"$PROFILER1" 100 1 "$TMPDIR/p2" || RegisterFailure +VerifySimilar p1 "$PROFILER1_REALNAME" p2 "$PROFILER1_REALNAME" 2 + +# Verify the same thing works if we statically link +"$PROFILER2" 50 1 "$TMPDIR/p3" || RegisterFailure +"$PROFILER2" 100 1 "$TMPDIR/p4" || RegisterFailure +VerifySimilar p3 "$PROFILER2_REALNAME" p4 "$PROFILER2_REALNAME" 2 + +# Verify the same thing works if we specify via CPUPROFILE +CPUPROFILE="$TMPDIR/p5" "$PROFILER2" 50 || RegisterFailure +CPUPROFILE="$TMPDIR/p6" "$PROFILER2" 100 || RegisterFailure +VerifySimilar p5 "$PROFILER2_REALNAME" p6 "$PROFILER2_REALNAME" 2 + +# When we compile with threads, things take a lot longer even when we only use 1 +CPUPROFILE="$TMPDIR/p5b" "$PROFILER3" 10 || RegisterFailure +CPUPROFILE="$TMPDIR/p5c" "$PROFILER3" 20 || RegisterFailure +VerifySimilar p5b "$PROFILER3_REALNAME" p5c "$PROFILER3_REALNAME" 2 + +# Now try what happens when we use threads +"$PROFILER3" 5 2 "$TMPDIR/p7" || RegisterFailure +"$PROFILER3" 10 2 "$TMPDIR/p8" || RegisterFailure +VerifySimilar p7 "$PROFILER3_REALNAME" p8 "$PROFILER3_REALNAME" 2 + +"$PROFILER4" 5 2 "$TMPDIR/p9" || RegisterFailure +"$PROFILER4" 10 2 "$TMPDIR/p10" || RegisterFailure +VerifySimilar p9 "$PROFILER4_REALNAME" p10 "$PROFILER4_REALNAME" 2 + +# More threads! +"$PROFILER4" 2 3 "$TMPDIR/p9" || RegisterFailure +"$PROFILER4" 4 3 "$TMPDIR/p10" || RegisterFailure +VerifySimilar p9 "$PROFILER4_REALNAME" p10 "$PROFILER4_REALNAME" 2 + +# Compare how much time the main thread takes compared to the other threads +# Recall the main thread runs twice as long as the other threads, by design. +"$PROFILER4" 2 4 "$TMPDIR/p11" || RegisterFailure +VerifyAcrossThreads p11 "$PROFILER4_REALNAME" 2 + +# Test symbol save and restore +"$PROFILER1" 50 1 "$TMPDIR/p12" || RegisterFailure +"$PPROF" $PPROF_FLAGS "$PROFILER1_REALNAME" "$TMPDIR/p12" --raw \ + >"$TMPDIR/p13" 2>/dev/null || RegisterFailure +VerifyIdentical p12 "$PROFILER1_REALNAME" p13 "" || RegisterFailure + +"$PROFILER3" 5 2 "$TMPDIR/p14" || RegisterFailure +"$PPROF" $PPROF_FLAGS "$PROFILER3_REALNAME" "$TMPDIR/p14" --raw \ + >"$TMPDIR/p15" 2>/dev/null || RegisterFailure +VerifyIdentical p14 "$PROFILER3_REALNAME" p15 "" || RegisterFailure + +# Make sure that when we have a process with a fork, the profiles don't +# clobber each other +CPUPROFILE="$TMPDIR/p6" "$PROFILER1" 1 -2 || RegisterFailure +n=`ls $TMPDIR/p6* | wc -l` +if [ $n != 3 ]; then + echo "FORK test FAILED: expected 3 profiles (for main + 2 children), found $n" + num_failures=`expr $num_failures + 1` +fi + +rm -rf "$TMPDIR" # clean up + +echo "Tests finished with $num_failures failures" +exit $num_failures diff --git a/third_party/tcmalloc/chromium/src/tests/raw_printer_test.cc b/third_party/tcmalloc/chromium/src/tests/raw_printer_test.cc new file mode 100644 index 0000000..3138b50 --- /dev/null +++ b/third_party/tcmalloc/chromium/src/tests/raw_printer_test.cc @@ -0,0 +1,60 @@ +// Copyright 2009 Google Inc. All Rights Reserved. +// Author: sanjay@google.com (Sanjay Ghemawat) + +#include "raw_printer.h" +#include <stdio.h> +#include <string> +#include "base/logging.h" + +using std::string; + +#define TEST(a, b) void TEST_##a##_##b() +#define RUN_TEST(a, b) TEST_##a##_##b() + +TEST(RawPrinter, Empty) { + char buffer[1]; + base::RawPrinter printer(buffer, arraysize(buffer)); + CHECK_EQ(0, printer.length()); + CHECK_EQ(string(""), buffer); + CHECK_EQ(0, printer.space_left()); + printer.Printf("foo"); + CHECK_EQ(string(""), string(buffer)); + CHECK_EQ(0, printer.length()); + CHECK_EQ(0, printer.space_left()); +} + +TEST(RawPrinter, PartiallyFilled) { + char buffer[100]; + base::RawPrinter printer(buffer, arraysize(buffer)); + printer.Printf("%s %s", "hello", "world"); + CHECK_EQ(string("hello world"), string(buffer)); + CHECK_EQ(11, printer.length()); + CHECK_LT(0, printer.space_left()); +} + +TEST(RawPrinter, Truncated) { + char buffer[3]; + base::RawPrinter printer(buffer, arraysize(buffer)); + printer.Printf("%d", 12345678); + CHECK_EQ(string("12"), string(buffer)); + CHECK_EQ(2, printer.length()); + CHECK_EQ(0, printer.space_left()); +} + +TEST(RawPrinter, ExactlyFilled) { + char buffer[12]; + base::RawPrinter printer(buffer, arraysize(buffer)); + printer.Printf("%s %s", "hello", "world"); + CHECK_EQ(string("hello world"), string(buffer)); + CHECK_EQ(11, printer.length()); + CHECK_EQ(0, printer.space_left()); +} + +int main(int argc, char **argv) { + RUN_TEST(RawPrinter, Empty); + RUN_TEST(RawPrinter, PartiallyFilled); + RUN_TEST(RawPrinter, Truncated); + RUN_TEST(RawPrinter, ExactlyFilled); + printf("PASS\n"); + return 0; // 0 means success +} diff --git a/third_party/tcmalloc/chromium/src/tests/realloc_unittest.cc b/third_party/tcmalloc/chromium/src/tests/realloc_unittest.cc new file mode 100644 index 0000000..20edb50 --- /dev/null +++ b/third_party/tcmalloc/chromium/src/tests/realloc_unittest.cc @@ -0,0 +1,121 @@ +// Copyright (c) 2004, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Sanjay Ghemawat +// +// Test realloc() functionality + +#include "config_for_unittests.h" +#include <stdio.h> +#include <stdlib.h> +#include <algorithm> // for min() +#include "base/logging.h" + +using std::min; + +// Fill a buffer of the specified size with a predetermined pattern +static void Fill(unsigned char* buffer, int n) { + for (int i = 0; i < n; i++) { + buffer[i] = (i & 0xff); + } +} + +// Check that the specified buffer has the predetermined pattern +// generated by Fill() +static bool Valid(unsigned char* buffer, int n) { + for (int i = 0; i < n; i++) { + if (buffer[i] != (i & 0xff)) { + return false; + } + } + return true; +} + +// Return the next interesting size/delta to check. Returns -1 if no more. +static int NextSize(int size) { + if (size < 100) { + return size+1; + } else if (size < 100000) { + // Find next power of two + int power = 1; + while (power < size) { + power <<= 1; + } + + // Yield (power-1, power, power+1) + if (size < power-1) { + return power-1; + } else if (size == power-1) { + return power; + } else { + assert(size == power); + return power+1; + } + } else { + return -1; + } +} + +int main(int argc, char** argv) { + for (int src_size = 0; src_size >= 0; src_size = NextSize(src_size)) { + for (int dst_size = 0; dst_size >= 0; dst_size = NextSize(dst_size)) { + unsigned char* src = (unsigned char*) malloc(src_size); + Fill(src, src_size); + unsigned char* dst = (unsigned char*) realloc(src, dst_size); + CHECK(Valid(dst, min(src_size, dst_size))); + Fill(dst, dst_size); + CHECK(Valid(dst, dst_size)); + if (dst != NULL) free(dst); + } + } + + // Now make sure realloc works correctly even when we overflow the + // packed cache, so some entries are evicted from the cache. + // The cache has 2^12 entries, keyed by page number. + const int kNumEntries = 1 << 14; + int** p = (int**)malloc(sizeof(*p) * kNumEntries); + int sum = 0; + for (int i = 0; i < kNumEntries; i++) { + p[i] = (int*)malloc(8192); // no page size is likely to be bigger + p[i][1000] = i; // use memory deep in the heart of p + } + for (int i = 0; i < kNumEntries; i++) { + p[i] = (int*)realloc(p[i], 9000); + } + for (int i = 0; i < kNumEntries; i++) { + sum += p[i][1000]; + free(p[i]); + } + CHECK_EQ(kNumEntries/2 * (kNumEntries - 1), sum); // assume kNE is even + free(p); + + printf("PASS\n"); + return 0; +} diff --git a/third_party/tcmalloc/chromium/src/tests/sampler_test.cc b/third_party/tcmalloc/chromium/src/tests/sampler_test.cc new file mode 100644 index 0000000..fca10ac --- /dev/null +++ b/third_party/tcmalloc/chromium/src/tests/sampler_test.cc @@ -0,0 +1,652 @@ +// Copyright (c) 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// All Rights Reserved. +// +// Author: Daniel Ford +// +// Checks basic properties of the sampler + +#include "config_for_unittests.h" +#include <stdlib.h> // defines posix_memalign +#include <stdio.h> // for the printf at the end +#if defined HAVE_STDINT_H +#include <stdint.h> // to get uintptr_t +#elif defined HAVE_INTTYPES_H +#include <inttypes.h> // another place uintptr_t might be defined +#endif +#include <sys/types.h> +#include <iostream> +#include <algorithm> +#include <vector> +#include <string> +#include <cmath> +#include "base/logging.h" +#include "base/commandlineflags.h" +#include "sampler.h" // The Sampler class being tested + +using std::sort; +using std::min; +using std::max; +using std::vector; +using std::abs; + +vector<void (*)()> g_testlist; // the tests to run + +#define TEST(a, b) \ + struct Test_##a##_##b { \ + Test_##a##_##b() { g_testlist.push_back(&Run); } \ + static void Run(); \ + }; \ + static Test_##a##_##b g_test_##a##_##b; \ + void Test_##a##_##b::Run() + + +static int RUN_ALL_TESTS() { + vector<void (*)()>::const_iterator it; + for (it = g_testlist.begin(); it != g_testlist.end(); ++it) { + (*it)(); // The test will error-exit if there's a problem. + } + fprintf(stderr, "\nPassed %d tests\n\nPASS\n", (int)g_testlist.size()); + return 0; +} + +#undef LOG // defined in base/logging.h +// Ideally, we'd put the newline at the end, but this hack puts the +// newline at the end of the previous log message, which is good enough :-) +#define LOG(level) std::cerr << "\n" + +static std::string StringPrintf(const char* format, ...) { + char buf[256]; // should be big enough for all logging + va_list ap; + va_start(ap, format); + vsnprintf(buf, sizeof(buf), format, ap); + va_end(ap); + return buf; +} + +namespace { +template<typename T> class scoped_array { + public: + scoped_array(T* p) : p_(p) { } + ~scoped_array() { delete[] p_; } + const T* get() const { return p_; } + T* get() { return p_; } + T& operator[](int i) { return p_[i]; } + private: + T* p_; +}; +} + +// Note that these tests are stochastic. +// This mean that the chance of correct code passing the test is, +// in the case of 5 standard deviations: +// kSigmas=5: ~99.99994267% +// in the case of 4 standard deviations: +// kSigmas=4: ~99.993666% +static const double kSigmas = 4; +static const size_t kSamplingInterval = 512*1024; + +// Tests that GetSamplePeriod returns the expected value +// which is 1<<19 +TEST(Sampler, TestGetSamplePeriod) { + tcmalloc::Sampler sampler; + sampler.Init(1); + uint64_t sample_period; + sample_period = sampler.GetSamplePeriod(); + CHECK_GT(sample_period, 0); +} + +// Tests of the quality of the random numbers generated +// This uses the Anderson Darling test for uniformity. +// See "Evaluating the Anderson-Darling Distribution" by Marsaglia +// for details. + +// Short cut version of ADinf(z), z>0 (from Marsaglia) +// This returns the p-value for Anderson Darling statistic in +// the limit as n-> infinity. For finite n, apply the error fix below. +double AndersonDarlingInf(double z) { + if (z < 2) { + return exp(-1.2337141 / z) / sqrt(z) * (2.00012 + (0.247105 - + (0.0649821 - (0.0347962 - (0.011672 - 0.00168691 + * z) * z) * z) * z) * z); + } + return exp( - exp(1.0776 - (2.30695 - (0.43424 - (0.082433 - + (0.008056 - 0.0003146 * z) * z) * z) * z) * z)); +} + +// Corrects the approximation error in AndersonDarlingInf for small values of n +// Add this to AndersonDarlingInf to get a better approximation +// (from Marsaglia) +double AndersonDarlingErrFix(int n, double x) { + if (x > 0.8) { + return (-130.2137 + (745.2337 - (1705.091 - (1950.646 - + (1116.360 - 255.7844 * x) * x) * x) * x) * x) / n; + } + double cutoff = 0.01265 + 0.1757 / n; + double t; + if (x < cutoff) { + t = x / cutoff; + t = sqrt(t) * (1 - t) * (49 * t - 102); + return t * (0.0037 / (n * n) + 0.00078 / n + 0.00006) / n; + } else { + t = (x - cutoff) / (0.8 - cutoff); + t = -0.00022633 + (6.54034 - (14.6538 - (14.458 - (8.259 - 1.91864 + * t) * t) * t) * t) * t; + return t * (0.04213 + 0.01365 / n) / n; + } +} + +// Returns the AndersonDarling p-value given n and the value of the statistic +double AndersonDarlingPValue(int n, double z) { + double ad = AndersonDarlingInf(z); + double errfix = AndersonDarlingErrFix(n, ad); + return ad + errfix; +} + +double AndersonDarlingStatistic(int n, double* random_sample) { + double ad_sum = 0; + for (int i = 0; i < n; i++) { + ad_sum += (2*i + 1) * log(random_sample[i] * (1 - random_sample[n-1-i])); + } + double ad_statistic = - n - 1/static_cast<double>(n) * ad_sum; + return ad_statistic; +} + +// Tests if the array of doubles is uniformly distributed. +// Returns the p-value of the Anderson Darling Statistic +// for the given set of sorted random doubles +// See "Evaluating the Anderson-Darling Distribution" by +// Marsaglia and Marsaglia for details. +double AndersonDarlingTest(int n, double* random_sample) { + double ad_statistic = AndersonDarlingStatistic(n, random_sample); + LOG(INFO) << StringPrintf("AD stat = %f, n=%d\n", ad_statistic, n); + double p = AndersonDarlingPValue(n, ad_statistic); + return p; +} + +// Test the AD Test. The value of the statistic should go to zero as n->infty +// Not run as part of regular tests +void ADTestTest(int n) { + scoped_array<double> random_sample(new double[n]); + for (int i = 0; i < n; i++) { + random_sample[i] = (i+0.01)/n; + } + sort(random_sample.get(), random_sample.get() + n); + double ad_stat = AndersonDarlingStatistic(n, random_sample.get()); + LOG(INFO) << StringPrintf("Testing the AD test. n=%d, ad_stat = %f", + n, ad_stat); +} + +// Print the CDF of the distribution of the Anderson-Darling Statistic +// Used for checking the Anderson-Darling Test +// Not run as part of regular tests +void ADCDF() { + for (int i = 1; i < 40; i++) { + double x = i/10.0; + LOG(INFO) << "x= " << x << " adpv= " + << AndersonDarlingPValue(100, x) << ", " + << AndersonDarlingPValue(1000, x); + } +} + +// Testing that NextRandom generates uniform +// random numbers. +// Applies the Anderson-Darling test for uniformity +void TestNextRandom(int n) { + tcmalloc::Sampler sampler; + sampler.Init(1); + uint64_t x = 1; + // This assumes that the prng returns 48 bit numbers + uint64_t max_prng_value = static_cast<uint64_t>(1)<<48; + // Initialize + for (int i = 1; i <= 20; i++) { // 20 mimics sampler.Init() + x = sampler.NextRandom(x); + } + scoped_array<uint64_t> int_random_sample(new uint64_t[n]); + // Collect samples + for (int i = 0; i < n; i++) { + int_random_sample[i] = x; + x = sampler.NextRandom(x); + } + // First sort them... + sort(int_random_sample.get(), int_random_sample.get() + n); + scoped_array<double> random_sample(new double[n]); + // Convert them to uniform randoms (in the range [0,1]) + for (int i = 0; i < n; i++) { + random_sample[i] = static_cast<double>(int_random_sample[i])/max_prng_value; + } + // Now compute the Anderson-Darling statistic + double ad_pvalue = AndersonDarlingTest(n, random_sample.get()); + LOG(INFO) << StringPrintf("pvalue for AndersonDarlingTest " + "with n= %d is p= %f\n", n, ad_pvalue); + CHECK_GT(min(ad_pvalue, 1 - ad_pvalue), 0.0001); + // << StringPrintf("prng is not uniform, %d\n", n); +} + + +TEST(Sampler, TestNextRandom_MultipleValues) { + TestNextRandom(10); // Check short-range correlation + TestNextRandom(100); + TestNextRandom(1000); + TestNextRandom(10000); // Make sure there's no systematic error +} + +// Tests that PickNextSamplePeriod generates +// geometrically distributed random numbers. +// First converts to uniforms then applied the +// Anderson-Darling test for uniformity. +void TestPickNextSample(int n) { + tcmalloc::Sampler sampler; + sampler.Init(1); + scoped_array<uint64_t> int_random_sample(new uint64_t[n]); + int sample_period = sampler.GetSamplePeriod(); + int ones_count = 0; + for (int i = 0; i < n; i++) { + int_random_sample[i] = sampler.PickNextSamplingPoint(); + CHECK_GE(int_random_sample[i], 1); + if (int_random_sample[i] == 1) { + ones_count += 1; + } + CHECK_LT(ones_count, 4); // << " out of " << i << " samples."; + } + // First sort them... + sort(int_random_sample.get(), int_random_sample.get() + n); + scoped_array<double> random_sample(new double[n]); + // Convert them to uniform random numbers + // by applying the geometric CDF + for (int i = 0; i < n; i++) { + random_sample[i] = 1 - exp(-static_cast<double>(int_random_sample[i]) + / sample_period); + } + // Now compute the Anderson-Darling statistic + double geom_ad_pvalue = AndersonDarlingTest(n, random_sample.get()); + LOG(INFO) << StringPrintf("pvalue for geometric AndersonDarlingTest " + "with n= %d is p= %f\n", n, geom_ad_pvalue); + CHECK_GT(min(geom_ad_pvalue, 1 - geom_ad_pvalue), 0.0001); + // << "PickNextSamplingPoint does not produce good " + // "geometric/exponential random numbers\n"; +} + +TEST(Sampler, TestPickNextSample_MultipleValues) { + TestPickNextSample(10); // Make sure the first few are good (enough) + TestPickNextSample(100); + TestPickNextSample(1000); + TestPickNextSample(10000); // Make sure there's no systematic error +} + + +// This is superceeded by the Anderson-Darling Test +// and it not run now. +// Tests how fast nearby values are spread out with LRand64 +// The purpose of this code is to determine how many +// steps to apply to the seed during initialization +void TestLRand64Spread() { + tcmalloc::Sampler sampler; + sampler.Init(1); + uint64_t current_value; + printf("Testing LRand64 Spread\n"); + for (int i = 1; i < 10; i++) { + printf("%d ", i); + current_value = i; + for (int j = 1; j < 100; j++) { + current_value = sampler.NextRandom(current_value); + } + LOG(INFO) << current_value; + } +} + + +// Test for Fastlog2 code +// We care about the percentage error because we're using this +// for choosing step sizes, so "close" is relative to the size of +// the step we would get if we used the built-in log function +TEST(Sampler, FastLog2) { + tcmalloc::Sampler sampler; + sampler.Init(1); + double max_ratio_error = 0; + for (double d = -1021.9; d < 1; d+= 0.13124235) { + double e = pow(2.0, d); + double truelog = log(e) / log(2.0); // log_2(e) + double fastlog = sampler.FastLog2(e); + max_ratio_error = max(max_ratio_error, + max(truelog/fastlog-1, fastlog/truelog-1)); + CHECK_LE(max_ratio_error, 0.01); + // << StringPrintf("d = %f, e=%f, truelog = %f, fastlog= %f\n", + // d, e, truelog, fastlog); + } + LOG(INFO) << StringPrintf("Fastlog2: max_ratio_error = %f\n", + max_ratio_error); +} + +// Futher tests + +bool CheckMean(size_t mean, int num_samples) { + tcmalloc::Sampler sampler; + sampler.Init(1); + size_t total = 0; + for (int i = 0; i < num_samples; i++) { + total += sampler.PickNextSamplingPoint(); + } + double empirical_mean = total / static_cast<double>(num_samples); + double expected_sd = mean / pow(num_samples * 1.0, 0.5); + return(abs(mean-empirical_mean) < expected_sd * kSigmas); +} + +// Prints a sequence so you can look at the distribution +void OutputSequence(int sequence_length) { + tcmalloc::Sampler sampler; + sampler.Init(1); + size_t next_step; + for (int i = 0; i< sequence_length; i++) { + next_step = sampler.PickNextSamplingPoint(); + LOG(INFO) << next_step; + } +} + + +double StandardDeviationsErrorInSample( + int total_samples, int picked_samples, + int alloc_size, int sampling_interval) { + double p = 1 - exp(-(static_cast<double>(alloc_size) / sampling_interval)); + double expected_samples = total_samples * p; + double sd = pow(p*(1-p)*total_samples, 0.5); + return((picked_samples - expected_samples) / sd); +} + +TEST(Sampler, LargeAndSmallAllocs_CombinedTest) { + tcmalloc::Sampler sampler; + sampler.Init(1); + int counter_big = 0; + int counter_small = 0; + int size_big = 129*8*1024+1; + int size_small = 1024*8; + int num_iters = 128*4*8; + // Allocate in mixed chunks + for (int i = 0; i < num_iters; i++) { + if (sampler.SampleAllocation(size_big)) { + counter_big += 1; + } + for (int i = 0; i < 129; i++) { + if (sampler.SampleAllocation(size_small)) { + counter_small += 1; + } + } + } + // Now test that there are the right number of each + double large_allocs_sds = + StandardDeviationsErrorInSample(num_iters, counter_big, + size_big, kSamplingInterval); + double small_allocs_sds = + StandardDeviationsErrorInSample(num_iters*129, counter_small, + size_small, kSamplingInterval); + LOG(INFO) << StringPrintf("large_allocs_sds = %f\n", large_allocs_sds); + LOG(INFO) << StringPrintf("small_allocs_sds = %f\n", small_allocs_sds); + CHECK_LE(abs(large_allocs_sds), kSigmas); + CHECK_LE(abs(small_allocs_sds), kSigmas); +} + +// Tests whether the mean is about right over 1000 samples +TEST(Sampler, IsMeanRight) { + CHECK(CheckMean(kSamplingInterval, 1000)); +} + +// This flag is for the OldSampler class to use +const int64 FLAGS_mock_tcmalloc_sample_parameter = 1<<19; + +// A cut down and slightly refactored version of the old Sampler +class OldSampler { + public: + void Init(uint32_t seed); + void Cleanup() {} + + // Record allocation of "k" bytes. Return true iff allocation + // should be sampled + bool SampleAllocation(size_t k); + + // Generate a geometric with mean 1M (or FLAG value) + void PickNextSample(size_t k); + + // Initialize the statics for the Sample class + static void InitStatics() { + sample_period = 1048583; + } + size_t bytes_until_sample_; + + private: + uint32_t rnd_; // Cheap random number generator + static uint64_t sample_period; + // Should be a prime just above a power of 2: + // 2, 5, 11, 17, 37, 67, 131, 257, + // 521, 1031, 2053, 4099, 8209, 16411, + // 32771, 65537, 131101, 262147, 524309, 1048583, + // 2097169, 4194319, 8388617, 16777259, 33554467 +}; + +// Statics for OldSampler +uint64_t OldSampler::sample_period; + +void OldSampler::Init(uint32_t seed) { + // Initialize PRNG -- run it for a bit to get to good values + if (seed != 0) { + rnd_ = seed; + } else { + rnd_ = 12345; + } + bytes_until_sample_ = 0; + for (int i = 0; i < 100; i++) { + PickNextSample(sample_period * 2); + } +}; + +// A cut-down version of the old PickNextSampleRoutine +void OldSampler::PickNextSample(size_t k) { + // Make next "random" number + // x^32+x^22+x^2+x^1+1 is a primitive polynomial for random numbers + static const uint32_t kPoly = (1 << 22) | (1 << 2) | (1 << 1) | (1 << 0); + uint32_t r = rnd_; + rnd_ = (r << 1) ^ ((static_cast<int32_t>(r) >> 31) & kPoly); + + // Next point is "rnd_ % (sample_period)". I.e., average + // increment is "sample_period/2". + const int flag_value = FLAGS_mock_tcmalloc_sample_parameter; + static int last_flag_value = -1; + + if (flag_value != last_flag_value) { + // There should be a spinlock here, but this code is + // for benchmarking only. + sample_period = 1048583; + last_flag_value = flag_value; + } + + bytes_until_sample_ += rnd_ % sample_period; + + if (k > (static_cast<size_t>(-1) >> 2)) { + // If the user has asked for a huge allocation then it is possible + // for the code below to loop infinitely. Just return (note that + // this throws off the sampling accuracy somewhat, but a user who + // is allocating more than 1G of memory at a time can live with a + // minor inaccuracy in profiling of small allocations, and also + // would rather not wait for the loop below to terminate). + return; + } + + while (bytes_until_sample_ < k) { + // Increase bytes_until_sample_ by enough average sampling periods + // (sample_period >> 1) to allow us to sample past the current + // allocation. + bytes_until_sample_ += (sample_period >> 1); + } + + bytes_until_sample_ -= k; +} + +inline bool OldSampler::SampleAllocation(size_t k) { + if (bytes_until_sample_ < k) { + PickNextSample(k); + return true; + } else { + bytes_until_sample_ -= k; + return false; + } +} + +// This checks that the stated maximum value for the +// tcmalloc_sample_parameter flag never overflows bytes_until_sample_ +TEST(Sampler, bytes_until_sample_Overflow_Underflow) { + tcmalloc::Sampler sampler; + sampler.Init(1); + uint64_t one = 1; + // sample_parameter = 0; // To test the edge case + uint64_t sample_parameter_array[4] = {0, 1, one<<19, one<<58}; + for (int i = 0; i < 4; i++) { + uint64_t sample_parameter = sample_parameter_array[i]; + LOG(INFO) << "sample_parameter = " << sample_parameter; + double sample_scaling = - log(2.0) * sample_parameter; + // Take the top 26 bits as the random number + // (This plus the 1<<26 sampling bound give a max step possible of + // 1209424308 bytes.) + const uint64_t prng_mod_power = 48; // Number of bits in prng + + // First, check the largest_prng value + uint64_t largest_prng_value = (static_cast<uint64_t>(1)<<48) - 1; + double q = (largest_prng_value >> (prng_mod_power - 26)) + 1.0; + LOG(INFO) << StringPrintf("q = %f\n", q); + LOG(INFO) << StringPrintf("FastLog2(q) = %f\n", sampler.FastLog2(q)); + LOG(INFO) << StringPrintf("log2(q) = %f\n", log(q)/log(2.0)); + // Replace min(sampler.FastLog2(q) - 26, 0.0) with + // (sampler.FastLog2(q) - 26.000705) when using that optimization + uint64_t smallest_sample_step + = static_cast<uint64_t>(min(sampler.FastLog2(q) - 26, 0.0) + * sample_scaling + 1); + LOG(INFO) << "Smallest sample step is " << smallest_sample_step; + uint64_t cutoff = static_cast<uint64_t>(10) + * (sample_parameter/(one<<24) + 1); + LOG(INFO) << "Acceptable value is < " << cutoff; + // This checks that the answer is "small" and positive + CHECK_LE(smallest_sample_step, cutoff); + + // Next, check with the smallest prng value + uint64_t smallest_prng_value = 0; + q = (smallest_prng_value >> (prng_mod_power - 26)) + 1.0; + LOG(INFO) << StringPrintf("q = %f\n", q); + // Replace min(sampler.FastLog2(q) - 26, 0.0) with + // (sampler.FastLog2(q) - 26.000705) when using that optimization + uint64_t largest_sample_step + = static_cast<uint64_t>(min(sampler.FastLog2(q) - 26, 0.0) + * sample_scaling + 1); + LOG(INFO) << "Largest sample step is " << largest_sample_step; + CHECK_LE(largest_sample_step, one<<63); + CHECK_GE(largest_sample_step, smallest_sample_step); + } +} + + +// Test that NextRand is in the right range. Unfortunately, this is a +// stochastic test which could miss problems. +TEST(Sampler, NextRand_range) { + tcmalloc::Sampler sampler; + sampler.Init(1); + uint64_t one = 1; + // The next number should be (one << 48) - 1 + uint64_t max_value = (one << 48) - 1; + uint64_t x = (one << 55); + int n = 22; // 27; + LOG(INFO) << "Running sampler.NextRandom 1<<" << n << " times"; + for (int i = 1; i <= (1<<n); i++) { // 20 mimics sampler.Init() + x = sampler.NextRandom(x); + CHECK_LE(x, max_value); + } +} + +// Tests certain arithmetic operations to make sure they compute what we +// expect them too (for testing across different platforms) +TEST(Sampler, arithmetic_1) { + tcmalloc::Sampler sampler; + sampler.Init(1); + uint64_t rnd; // our 48 bit random number, which we don't trust + const uint64_t prng_mod_power = 48; + uint64_t one = 1; + rnd = one; + uint64_t max_value = (one << 48) - 1; + for (int i = 1; i <= (1>>27); i++) { // 20 mimics sampler.Init() + rnd = sampler.NextRandom(rnd); + CHECK_LE(rnd, max_value); + double q = (rnd >> (prng_mod_power - 26)) + 1.0; + CHECK_GE(q, 0); // << rnd << " " << prng_mod_power; + } + // Test some potentially out of bounds value for rnd + for (int i = 1; i <= 66; i++) { + rnd = one << i; + double q = (rnd >> (prng_mod_power - 26)) + 1.0; + LOG(INFO) << "rnd = " << rnd << " i=" << i << " q=" << q; + CHECK_GE(q, 0); + // << " rnd=" << rnd << " i=" << i << " prng_mod_power" << prng_mod_power; + } +} + +void test_arithmetic(uint64_t rnd) { + const uint64_t prng_mod_power = 48; // Number of bits in prng + uint64_t shifted_rnd = rnd >> (prng_mod_power - 26); + CHECK_GE(shifted_rnd, 0); + CHECK_LT(shifted_rnd, (1<<26)); + LOG(INFO) << shifted_rnd; + LOG(INFO) << static_cast<double>(shifted_rnd); + CHECK_GE(static_cast<double>(static_cast<uint32_t>(shifted_rnd)), 0); + // << " rnd=" << rnd << " srnd=" << shifted_rnd; + CHECK_GE(static_cast<double>(shifted_rnd), 0); + // << " rnd=" << rnd << " srnd=" << shifted_rnd; + double q = static_cast<double>(shifted_rnd) + 1.0; + CHECK_GT(q, 0); +} + +// Tests certain arithmetic operations to make sure they compute what we +// expect them too (for testing across different platforms) +// know bad values under with -c dbg --cpu piii for _some_ binaries: +// rnd=227453640600554 +// shifted_rnd=54229173 +// (hard to reproduce) +TEST(Sampler, arithmetic_2) { + uint64_t rnd = 227453640600554LL; + test_arithmetic(rnd); +} + + +// It's not really a test, but it's good to know +TEST(Sample, size_of_class) { + tcmalloc::Sampler sampler; + sampler.Init(1); + LOG(INFO) << "Size of Sampler class is: " << sizeof(tcmalloc::Sampler); + LOG(INFO) << "Size of Sampler object is: " << sizeof(sampler); +} + +int main(int argc, char **argv) { + return RUN_ALL_TESTS(); +} diff --git a/third_party/tcmalloc/chromium/src/tests/sampling_test.cc b/third_party/tcmalloc/chromium/src/tests/sampling_test.cc new file mode 100644 index 0000000..b75e70e --- /dev/null +++ b/third_party/tcmalloc/chromium/src/tests/sampling_test.cc @@ -0,0 +1,80 @@ +// Copyright (c) 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Craig Silverstein +// +// This tests ReadStackTraces and ReadGrowthStackTraces. It does this +// by doing a bunch of allocations and then calling those functions. +// A driver shell-script can call this, and then call pprof, and +// verify the expected output. The output is written to +// argv[1].heap and argv[1].growth + +#include "config_for_unittests.h" +#include <stdio.h> +#include <stdlib.h> +#include <string> +#include "base/logging.h" +#include <google/malloc_extension.h> + +using std::string; + +extern "C" void* AllocateAllocate() { + // The VLOG's are mostly to discourage inlining + VLOG(1, "Allocating some more"); + void* p = malloc(10000); + VLOG(1, "Done allocating"); + return p; +} + +static void WriteStringToFile(const string& s, const string& filename) { + FILE* fp = fopen(filename.c_str(), "w"); + fwrite(s.data(), 1, s.length(), fp); + fclose(fp); +} + +int main(int argc, char** argv) { + if (argc < 2) { + fprintf(stderr, "USAGE: %s <base of output files>\n", argv[0]); + exit(1); + } + for (int i = 0; i < 8000; i++) { + AllocateAllocate(); + } + + string s; + MallocExtension::instance()->GetHeapSample(&s); + WriteStringToFile(s, string(argv[1]) + ".heap"); + + s.clear(); + MallocExtension::instance()->GetHeapGrowthStacks(&s); + WriteStringToFile(s, string(argv[1]) + ".growth"); + + return 0; +} diff --git a/third_party/tcmalloc/chromium/src/tests/sampling_test.sh b/third_party/tcmalloc/chromium/src/tests/sampling_test.sh new file mode 100644 index 0000000..149d27b --- /dev/null +++ b/third_party/tcmalloc/chromium/src/tests/sampling_test.sh @@ -0,0 +1,94 @@ +#!/bin/sh + +# Copyright (c) 2008, Google Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# --- +# Author: Craig Silverstein +# +# This is a test that tcmalloc creates, and pprof reads, sampling data +# correctly: both for the heap profile (ReadStackTraces) and for +# growth in the heap sized (ReadGrowthStackTraces). + +BINDIR="${BINDIR:-.}" +PPROF_PATH="${PPROF_PATH:-$BINDIR/src/pprof}" + +if [ "x$1" = "x-h" -o "x$1" = "x--help" ]; then + echo "USAGE: $0 [unittest dir] [path to pprof]" + echo " By default, unittest_dir=$BINDIR, pprof_path=$PPROF_PATH" + exit 1 +fi + +SAMPLING_TEST="${1:-$BINDIR/sampling_test}" +PPROF="${2:-$PPROF_PATH}" +OUTDIR="/tmp/sampling_test_dir" + +# libtool is annoying, and puts the actual executable in a different +# directory, replacing the seeming-executable with a shell script. +# We use the error output of sampling_test to indicate its real location +SAMPLING_TEST_BINARY=`"$SAMPLING_TEST" 2>&1 | awk '/USAGE/ {print $2; exit;}'` + +# A kludge for cygwin. Unfortunately, 'test -f' says that 'foo' exists +# even when it doesn't, and only foo.exe exists. Other unix utilities +# (like nm) need you to say 'foo.exe'. We use one such utility, cat, to +# see what the *real* binary name is. +if ! cat "$SAMPLING_TEST_BINARY" >/dev/null 2>&1; then + SAMPLING_TEST_BINARY="$SAMPLING_TEST_BINARY".exe +fi + +die() { + echo "FAILED" + echo "reason:" + echo "$@" + echo "----" + exit 1 +} + +rm -rf "$OUTDIR" || die "Unable to delete $OUTDIR" +mkdir "$OUTDIR" || die "Unable to create $OUTDIR" + +# This puts the output into out.heap and out.growth. It allocates +# 8*10^7 bytes of memory, which is 76M. Because we sample, the +# estimate may be a bit high or a bit low: we accept anything from +# 50M to 99M. +"$SAMPLING_TEST" "$OUTDIR/out" + +echo -n "Testing heap output..." +"$PPROF" --text "$SAMPLING_TEST_BINARY" "$OUTDIR/out.heap" \ + | grep '^ *[5-9][0-9]\.[0-9][ 0-9.%]*_*AllocateAllocate' >/dev/null \ + || die `"$PPROF" --text "$SAMPLING_TEST_BINARY" "$OUTDIR/out.heap"` +echo "OK" + +echo -n "Testing growth output..." +"$PPROF" --text "$SAMPLING_TEST_BINARY" "$OUTDIR/out.growth" \ + | grep '^ *[5-9][0-9]\.[0-9][ 0-9.%]*_*AllocateAllocate' >/dev/null \ + || die `"$PPROF" --text "$SAMPLING_TEST_BINARY" "$OUTDIR/out.growth"` +echo "OK" + +echo "PASS" diff --git a/third_party/tcmalloc/chromium/src/tests/stack_trace_table_test.cc b/third_party/tcmalloc/chromium/src/tests/stack_trace_table_test.cc new file mode 100644 index 0000000..61f9e64 --- /dev/null +++ b/third_party/tcmalloc/chromium/src/tests/stack_trace_table_test.cc @@ -0,0 +1,97 @@ +// Copyright 2009 Google Inc. All Rights Reserved. +// Author: fikes@google.com (Andrew Fikes) + +#include "config_for_unittests.h" +#include <stdio.h> // for puts() +#include "stack_trace_table.h" +#include "base/logging.h" +#include "base/spinlock.h" +#include "static_vars.h" + +#undef ARRAYSIZE // may be defined on, eg, windows +#define ARRAYSIZE(a) ( sizeof(a) / sizeof(*(a)) ) + +static void CheckTracesAndReset(tcmalloc::StackTraceTable* table, + const uintptr_t* expected, int len) { + void** entries = table->ReadStackTracesAndClear(); + for (int i = 0; i < len; ++i) { + CHECK_EQ(reinterpret_cast<uintptr_t>(entries[i]), expected[i]); + } + delete[] entries; +} + +static void AddTrace(tcmalloc::StackTraceTable* table, + const tcmalloc::StackTrace& t) { + // Normally we'd need this lock, but since the test is single-threaded + // we don't. I comment it out on windows because the DLL-decl thing + // is really annoying in this case. +#ifndef _MSC_VER + SpinLockHolder h(tcmalloc::Static::pageheap_lock()); +#endif + table->AddTrace(t); +} + +int main(int argc, char **argv) { + tcmalloc::StackTraceTable table; + + // Empty table + CHECK_EQ(table.depth_total(), 0); + CHECK_EQ(table.bucket_total(), 0); + static const uintptr_t k1[] = {0}; + CheckTracesAndReset(&table, k1, ARRAYSIZE(k1)); + + tcmalloc::StackTrace t1; + t1.size = static_cast<uintptr_t>(1024); + t1.depth = static_cast<uintptr_t>(2); + t1.stack[0] = reinterpret_cast<void*>(1); + t1.stack[1] = reinterpret_cast<void*>(2); + + + tcmalloc::StackTrace t2; + t2.size = static_cast<uintptr_t>(512); + t2.depth = static_cast<uintptr_t>(2); + t2.stack[0] = reinterpret_cast<void*>(2); + t2.stack[1] = reinterpret_cast<void*>(1); + + // Table w/ just t1 + AddTrace(&table, t1); + CHECK_EQ(table.depth_total(), 2); + CHECK_EQ(table.bucket_total(), 1); + static const uintptr_t k2[] = {1, 1024, 2, 1, 2, 0}; + CheckTracesAndReset(&table, k2, ARRAYSIZE(k2)); + + // Table w/ t1, t2 + AddTrace(&table, t1); + AddTrace(&table, t2); + CHECK_EQ(table.depth_total(), 4); + CHECK_EQ(table.bucket_total(), 2); + static const uintptr_t k3[] = {1, 1024, 2, 1, 2, 1, 512, 2, 2, 1, 0}; + CheckTracesAndReset(&table, k3, ARRAYSIZE(k3)); + + // Table w/ 2 x t1, 1 x t2 + AddTrace(&table, t1); + AddTrace(&table, t2); + AddTrace(&table, t1); + CHECK_EQ(table.depth_total(), 4); + CHECK_EQ(table.bucket_total(), 2); + static const uintptr_t k4[] = {2, 2048, 2, 1, 2, 1, 512, 2, 2, 1, 0}; + CheckTracesAndReset(&table, k4, ARRAYSIZE(k4)); + + // Same stack as t1, but w/ different size + tcmalloc::StackTrace t3; + t3.size = static_cast<uintptr_t>(2); + t3.depth = static_cast<uintptr_t>(2); + t3.stack[0] = reinterpret_cast<void*>(1); + t3.stack[1] = reinterpret_cast<void*>(2); + + // Table w/ t1, t3 + AddTrace(&table, t1); + AddTrace(&table, t3); + CHECK_EQ(table.depth_total(), 2); + CHECK_EQ(table.bucket_total(), 1); + static const uintptr_t k5[] = {2, 1026, 2, 1, 2, 0}; + CheckTracesAndReset(&table, k5, ARRAYSIZE(k5)); + + puts("PASS"); + return 0; +} diff --git a/third_party/tcmalloc/chromium/src/tests/stacktrace_unittest.cc b/third_party/tcmalloc/chromium/src/tests/stacktrace_unittest.cc new file mode 100644 index 0000000..69e20ba --- /dev/null +++ b/third_party/tcmalloc/chromium/src/tests/stacktrace_unittest.cc @@ -0,0 +1,194 @@ +// Copyright (c) 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "config_for_unittests.h" +#ifdef HAVE_EXECINFO_H +#include <execinfo.h> +#endif +#include <stdio.h> +#include <stdlib.h> +#include "base/commandlineflags.h" +#include "base/logging.h" +#include <google/stacktrace.h> + +namespace { + +// Obtain a backtrace, verify that the expected callers are present in the +// backtrace, and maybe print the backtrace to stdout. + +// The sequence of functions whose return addresses we expect to see in the +// backtrace. +const int BACKTRACE_STEPS = 6; + +struct AddressRange { + const void *start, *end; +}; + +// Expected function [start,end] range. +AddressRange expected_range[BACKTRACE_STEPS]; + +#if __GNUC__ +// Using GCC extension: address of a label can be taken with '&&label'. +// Start should be a label somewhere before recursive call, end somewhere +// after it. +#define INIT_ADDRESS_RANGE(fn, start_label, end_label, prange) \ + do { \ + (prange)->start = &&start_label; \ + (prange)->end = &&end_label; \ + CHECK_LT((prange)->start, (prange)->end); \ + } while (0) +// This macro expands into "unmovable" code (opaque to GCC), and that +// prevents GCC from moving a_label up or down in the code. +// Without it, there is no code following the 'end' label, and GCC +// (4.3.1, 4.4.0) thinks it safe to assign &&end an address that is before +// the recursive call. +#define DECLARE_ADDRESS_LABEL(a_label) \ + a_label: do { __asm__ __volatile__(""); } while (0) +// Gcc 4.4.0 may split function into multiple chunks, and the chunk +// performing recursive call may end up later in the code then the return +// instruction (this actually happens with FDO). +// Adjust function range from __builtin_return_address. +#define ADJUST_ADDRESS_RANGE_FROM_RA(prange) \ + do { \ + void *ra = __builtin_return_address(0); \ + CHECK_LT((prange)->start, ra); \ + if (ra > (prange)->end) { \ + printf("Adjusting range from %p..%p to %p..%p\n", \ + (prange)->start, (prange)->end, \ + (prange)->start, ra); \ + (prange)->end = ra; \ + } \ + } while (0) +#else +// Assume the Check* functions below are not longer than 256 bytes. +#define INIT_ADDRESS_RANGE(fn, start_label, end_label, prange) \ + do { \ + (prange)->start = reinterpret_cast<const void *>(&fn); \ + (prange)->end = reinterpret_cast<const char *>(&fn) + 256; \ + } while (0) +#define DECLARE_ADDRESS_LABEL(a_label) do { } while (0) +#define ADJUST_ADDRESS_RANGE_FROM_RA(prange) do { } while (0) +#endif // __GNUC__ + +//-----------------------------------------------------------------------// + +void CheckRetAddrIsInFunction(void *ret_addr, const AddressRange &range) +{ + CHECK_GE(ret_addr, range.start); + CHECK_LE(ret_addr, range.end); +} + +//-----------------------------------------------------------------------// + +void ATTRIBUTE_NOINLINE CheckStackTrace(int); +void ATTRIBUTE_NOINLINE CheckStackTraceLeaf(void) { + const int STACK_LEN = 10; + void *stack[STACK_LEN]; + int size; + + ADJUST_ADDRESS_RANGE_FROM_RA(&expected_range[1]); + INIT_ADDRESS_RANGE(CheckStackTraceLeaf, start, end, &expected_range[0]); + DECLARE_ADDRESS_LABEL(start); + size = GetStackTrace(stack, STACK_LEN, 0); + printf("Obtained %d stack frames.\n", size); + CHECK_GE(size, 1); + CHECK_LE(size, STACK_LEN); + +#ifdef HAVE_EXECINFO_H + { + char **strings = backtrace_symbols(stack, size); + printf("Obtained %d stack frames.\n", size); + for (int i = 0; i < size; i++) + printf("%s %p\n", strings[i], stack[i]); + printf("CheckStackTrace() addr: %p\n", &CheckStackTrace); + free(strings); + } +#endif + + for (int i = 0; i < BACKTRACE_STEPS; i++) { + printf("Backtrace %d: expected: %p..%p actual: %p ... ", + i, expected_range[i].start, expected_range[i].end, stack[i]); + fflush(stdout); + CheckRetAddrIsInFunction(stack[i], expected_range[i]); + printf("OK\n"); + } + DECLARE_ADDRESS_LABEL(end); +} + +//-----------------------------------------------------------------------// + +/* Dummy functions to make the backtrace more interesting. */ +void ATTRIBUTE_NOINLINE CheckStackTrace4(int i) { + ADJUST_ADDRESS_RANGE_FROM_RA(&expected_range[2]); + INIT_ADDRESS_RANGE(CheckStackTrace4, start, end, &expected_range[1]); + DECLARE_ADDRESS_LABEL(start); + for (int j = i; j >= 0; j--) + CheckStackTraceLeaf(); + DECLARE_ADDRESS_LABEL(end); +} +void ATTRIBUTE_NOINLINE CheckStackTrace3(int i) { + ADJUST_ADDRESS_RANGE_FROM_RA(&expected_range[3]); + INIT_ADDRESS_RANGE(CheckStackTrace3, start, end, &expected_range[2]); + DECLARE_ADDRESS_LABEL(start); + for (int j = i; j >= 0; j--) + CheckStackTrace4(j); + DECLARE_ADDRESS_LABEL(end); +} +void ATTRIBUTE_NOINLINE CheckStackTrace2(int i) { + ADJUST_ADDRESS_RANGE_FROM_RA(&expected_range[4]); + INIT_ADDRESS_RANGE(CheckStackTrace2, start, end, &expected_range[3]); + DECLARE_ADDRESS_LABEL(start); + for (int j = i; j >= 0; j--) + CheckStackTrace3(j); + DECLARE_ADDRESS_LABEL(end); +} +void ATTRIBUTE_NOINLINE CheckStackTrace1(int i) { + ADJUST_ADDRESS_RANGE_FROM_RA(&expected_range[5]); + INIT_ADDRESS_RANGE(CheckStackTrace1, start, end, &expected_range[4]); + DECLARE_ADDRESS_LABEL(start); + for (int j = i; j >= 0; j--) + CheckStackTrace2(j); + DECLARE_ADDRESS_LABEL(end); +} +void ATTRIBUTE_NOINLINE CheckStackTrace(int i) { + INIT_ADDRESS_RANGE(CheckStackTrace, start, end, &expected_range[5]); + DECLARE_ADDRESS_LABEL(start); + for (int j = i; j >= 0; j--) + CheckStackTrace1(j); + DECLARE_ADDRESS_LABEL(end); +} + +} // namespace +//-----------------------------------------------------------------------// + +int main(int argc, char ** argv) { + CheckStackTrace(0); + printf("PASS\n"); + return 0; +} diff --git a/third_party/tcmalloc/chromium/src/tests/system-alloc_unittest.cc b/third_party/tcmalloc/chromium/src/tests/system-alloc_unittest.cc new file mode 100644 index 0000000..a160a34 --- /dev/null +++ b/third_party/tcmalloc/chromium/src/tests/system-alloc_unittest.cc @@ -0,0 +1,106 @@ +// Copyright (c) 2007, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Arun Sharma + +#include "config_for_unittests.h" +#include <stdio.h> +#if defined HAVE_STDINT_H +#include <stdint.h> // to get uintptr_t +#elif defined HAVE_INTTYPES_H +#include <inttypes.h> // another place uintptr_t might be defined +#endif +#include <sys/types.h> +#include "base/logging.h" +#include "system-alloc.h" + +class ArraySysAllocator : public SysAllocator { +public: + // Was this allocator invoked at least once? + bool invoked_; + + ArraySysAllocator() : SysAllocator() { + ptr_ = 0; + invoked_ = false; + } + + void* Alloc(size_t size, size_t *actual_size, size_t alignment) { + invoked_ = true; + void *result = &array_[ptr_]; + uintptr_t ptr = reinterpret_cast<uintptr_t>(result); + + if (actual_size) { + *actual_size = size; + } + + // Try to get more memory for alignment + size_t extra = alignment - (ptr & (alignment-1)); + size += extra; + CHECK_LT(ptr_ + size, kArraySize); + + if ((ptr & (alignment-1)) != 0) { + ptr += alignment - (ptr & (alignment-1)); + } + + ptr_ += size; + return reinterpret_cast<void *>(ptr); + } + + void DumpStats(TCMalloc_Printer* printer) { + } + +private: + static const int kArraySize = 8 * 1024 * 1024; + char array_[kArraySize]; + // We allocate the next chunk from here + int ptr_; + +}; +const int ArraySysAllocator::kArraySize; +ArraySysAllocator a; + +static void TestBasicInvoked() { + RegisterSystemAllocator(&a, 0); + + // An allocation size that is likely to trigger the system allocator. + // XXX: this is implementation specific. + char *p = new char[1024 * 1024]; + delete [] p; + + // Make sure that our allocator was invoked. + CHECK(a.invoked_); +} + +int main(int argc, char** argv) { + TestBasicInvoked(); + + printf("PASS\n"); + return 0; +} diff --git a/third_party/tcmalloc/chromium/src/tests/tcmalloc_large_unittest.cc b/third_party/tcmalloc/chromium/src/tests/tcmalloc_large_unittest.cc new file mode 100644 index 0000000..260ac29 --- /dev/null +++ b/third_party/tcmalloc/chromium/src/tests/tcmalloc_large_unittest.cc @@ -0,0 +1,137 @@ +// Copyright (c) 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Michael Chastain +// +// This is a unit test for large allocations in malloc and friends. +// "Large" means "so large that they overflow the address space". +// For 32 bits, this means allocations near 2^32 bytes and 2^31 bytes. +// For 64 bits, this means allocations near 2^64 bytes and 2^63 bytes. + +#include <stddef.h> +#include <stdlib.h> +#include <stdio.h> +#include <set> + +#include "base/logging.h" + +using std::set; + +// Alloc a size that should always fail. + +void TryAllocExpectFail(size_t size) { + void* p1 = malloc(size); + CHECK(p1 == NULL); + + void* p2 = malloc(1); + CHECK(p2 != NULL); + + void* p3 = realloc(p2, size); + CHECK(p3 == NULL); + + free(p2); +} + +// Alloc a size that might work and might fail. +// If it does work, touch some pages. + +void TryAllocMightFail(size_t size) { + unsigned char* p = static_cast<unsigned char*>(malloc(size)); + if ( p != NULL ) { + unsigned char volatile* vp = p; // prevent optimizations + static const size_t kPoints = 1024; + + for ( size_t i = 0; i < kPoints; ++i ) { + vp[i * (size / kPoints)] = static_cast<unsigned char>(i); + } + + for ( size_t i = 0; i < kPoints; ++i ) { + CHECK(vp[i * (size / kPoints)] == static_cast<unsigned char>(i)); + } + + vp[size-1] = 'M'; + CHECK(vp[size-1] == 'M'); + } + + free(p); +} + +int main (int argc, char** argv) { + // Allocate some 0-byte objects. They better be unique. + // 0 bytes is not large but it exercises some paths related to + // large-allocation code. + { + static const int kZeroTimes = 1024; + printf("Test malloc(0) x %d\n", kZeroTimes); + set<char*> p_set; + for ( int i = 0; i < kZeroTimes; ++i ) { + char* p = new char; + CHECK(p != NULL); + CHECK(p_set.find(p) == p_set.end()); + p_set.insert(p_set.end(), p); + } + // Just leak the memory. + } + + // Grab some memory so that some later allocations are guaranteed to fail. + printf("Test small malloc\n"); + void* p_small = malloc(4*1048576); + CHECK(p_small != NULL); + + // Test sizes up near the maximum size_t. + // These allocations test the wrap-around code. + printf("Test malloc(0 - N)\n"); + const size_t zero = 0; + static const size_t kMinusNTimes = 16384; + for ( size_t i = 1; i < kMinusNTimes; ++i ) { + TryAllocExpectFail(zero - i); + } + + // Test sizes a bit smaller. + // The small malloc above guarantees that all these return NULL. + printf("Test malloc(0 - 1048576 - N)\n"); + static const size_t kMinusMBMinusNTimes = 16384; + for ( size_t i = 0; i < kMinusMBMinusNTimes; ++i) { + TryAllocExpectFail(zero - 1048576 - i); + } + + // Test sizes at half of size_t. + // These might or might not fail to allocate. + printf("Test malloc(max/2 +- N)\n"); + static const size_t kHalfPlusMinusTimes = 64; + const size_t half = (zero - 2) / 2 + 1; + for ( size_t i = 0; i < kHalfPlusMinusTimes; ++i) { + TryAllocMightFail(half - i); + TryAllocMightFail(half + i); + } + + printf("PASS\n"); + return 0; +} diff --git a/third_party/tcmalloc/chromium/src/tests/tcmalloc_unittest.cc b/third_party/tcmalloc/chromium/src/tests/tcmalloc_unittest.cc new file mode 100644 index 0000000..8eccb18 --- /dev/null +++ b/third_party/tcmalloc/chromium/src/tests/tcmalloc_unittest.cc @@ -0,0 +1,1179 @@ +// Copyright (c) 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Unittest for the TCMalloc implementation. +// +// * The test consists of a set of threads. +// * Each thread maintains a set of allocated objects, with +// a bound on the total amount of data in the set. +// * Each allocated object's contents are generated by +// hashing the object pointer, and a generation count +// in the object. This allows us to easily check for +// data corruption. +// * At any given step, the thread can do any of the following: +// a. Allocate an object +// b. Increment an object's generation count and update +// its contents. +// c. Pass the object to another thread +// d. Free an object +// Also, at the end of every step, object(s) are freed to maintain +// the memory upper-bound. +// +// If this test is compiled with -DDEBUGALLOCATION, then we don't +// run some tests that test the inner workings of tcmalloc and +// break on debugallocation: that certain allocations are aligned +// in a certain way (even though no standard requires it), and that +// realloc() tries to minimize copying (which debug allocators don't +// care about). + +#include "config_for_unittests.h" +// Complicated ordering requirements. tcmalloc.h defines (indirectly) +// _POSIX_C_SOURCE, which it needs so stdlib.h defines posix_memalign. +// unistd.h, on the other hand, requires _POSIX_C_SOURCE to be unset, +// at least on FreeBSD, in order to define sbrk. The solution +// is to #include unistd.h first. This is safe because unistd.h +// doesn't sub-include stdlib.h, so we'll still get posix_memalign +// when we #include stdlib.h. Blah. +#ifdef HAVE_UNISTD_H +#include <unistd.h> // for testing sbrk hooks +#endif +#include "tcmalloc.h" // must come early, to pick up posix_memalign +#include <stdlib.h> +#include <string.h> +#include <stdio.h> +#if defined HAVE_STDINT_H +#include <stdint.h> // for intptr_t +#endif +#include <sys/types.h> // for size_t +#ifdef HAVE_FCNTL_H +#include <fcntl.h> // for open; used with mmap-hook test +#endif +#ifdef HAVE_MMAP +#include <sys/mman.h> // for testing mmap hooks +#endif +#ifdef HAVE_MALLOC_H +#include <malloc.h> // defines pvalloc/etc on cygwin +#endif +#include <assert.h> +#include <vector> +#include <algorithm> +#include <string> +#include <new> +#include "base/logging.h" +#include "base/simple_mutex.h" +#include "google/malloc_hook.h" +#include "google/malloc_extension.h" +#include "google/tcmalloc.h" +#include "thread_cache.h" +#include "tests/testutil.h" + +// Windows doesn't define pvalloc and a few other obsolete unix +// functions; nor does it define posix_memalign (which is not obsolete). +#if defined(_MSC_VER) || defined(__MINGW32__) +# define cfree free // don't bother to try to test these obsolete fns +# define valloc malloc +# define pvalloc malloc +# ifdef PERFTOOLS_NO_ALIGNED_MALLOC +# define _aligned_malloc(size, alignment) malloc(size) +# else +# include <malloc.h> // for _aligned_malloc +# endif +# define memalign(alignment, size) _aligned_malloc(size, alignment) +// Assume if we fail, it's because of out-of-memory. +// Note, this isn't a perfect analogue: we don't enforce constraints on "align" +# include <errno.h> +# define posix_memalign(pptr, align, size) \ + ((*(pptr)=_aligned_malloc(size, align)) ? 0 : ENOMEM) +#endif + +// On systems (like freebsd) that don't define MAP_ANONYMOUS, use the old +// form of the name instead. +#ifndef MAP_ANONYMOUS +# define MAP_ANONYMOUS MAP_ANON +#endif + +#define LOGSTREAM stdout + +using std::vector; +using std::string; + +DECLARE_double(tcmalloc_release_rate); +DECLARE_int32(max_free_queue_size); // in debugallocation.cc + +namespace testing { + +static const int FLAGS_numtests = 50000; +static const int FLAGS_log_every_n_tests = 50000; // log exactly once + +// Testing parameters +static const int FLAGS_lgmaxsize = 16; // lg() of the max size object to alloc +static const int FLAGS_numthreads = 10; // Number of threads +static const int FLAGS_threadmb = 4; // Max memory size allocated by thread +static const int FLAGS_lg_max_memalign = 18; // lg of max alignment for memalign + +static const double FLAGS_memalign_min_fraction = 0; // min expected% +static const double FLAGS_memalign_max_fraction = 0.4; // max expected% +static const double FLAGS_memalign_max_alignment_ratio = 6; // alignment/size + +// Weights of different operations +static const int FLAGS_allocweight = 50; // Weight for picking allocation +static const int FLAGS_freeweight = 50; // Weight for picking free +static const int FLAGS_updateweight = 10; // Weight for picking update +static const int FLAGS_passweight = 1; // Weight for passing object + +static const int kSizeBits = 8 * sizeof(size_t); +static const size_t kMaxSize = ~static_cast<size_t>(0); +static const size_t kMaxSignedSize = ((size_t(1) << (kSizeBits-1)) - 1); + +static const size_t kNotTooBig = 100000; +static const size_t kTooBig = kMaxSize; + +static int news_handled = 0; + +// Global array of threads +class TesterThread; +static TesterThread** threads; + +// To help with generating random numbers +class TestHarness { + private: + // Information kept per type + struct Type { + string name; + int type; + int weight; + }; + + public: + TestHarness(int seed) + : types_(new vector<Type>), total_weight_(0), num_tests_(0) { + srandom(seed); + } + ~TestHarness() { + delete types_; + } + + // Add operation type with specified weight. When starting a new + // iteration, an operation type is picked with probability + // proportional to its weight. + // + // "type" must be non-negative. + // "weight" must be non-negative. + void AddType(int type, int weight, const char* name); + + // Call this to get the type of operation for the next iteration. + // It returns a random operation type from the set of registered + // operations. Returns -1 if tests should finish. + int PickType(); + + // If n == 0, returns the next pseudo-random number in the range [0 .. 0] + // If n != 0, returns the next pseudo-random number in the range [0 .. n) + int Uniform(int n) { + if (n == 0) { + return random() * 0; + } else { + return random() % n; + } + } + // Pick "base" uniformly from range [0,max_log] and then return + // "base" random bits. The effect is to pick a number in the range + // [0,2^max_log-1] with bias towards smaller numbers. + int Skewed(int max_log) { + const int base = random() % (max_log+1); + return random() % (1 << base); + } + + private: + vector<Type>* types_; // Registered types + int total_weight_; // Total weight of all types + int num_tests_; // Num tests run so far +}; + +void TestHarness::AddType(int type, int weight, const char* name) { + Type t; + t.name = name; + t.type = type; + t.weight = weight; + types_->push_back(t); + total_weight_ += weight; +} + +int TestHarness::PickType() { + if (num_tests_ >= FLAGS_numtests) return -1; + num_tests_++; + + assert(total_weight_ > 0); + // This is a little skewed if total_weight_ doesn't divide 2^31, but it's close + int v = Uniform(total_weight_); + int i; + for (i = 0; i < types_->size(); i++) { + v -= (*types_)[i].weight; + if (v < 0) { + break; + } + } + + assert(i < types_->size()); + if ((num_tests_ % FLAGS_log_every_n_tests) == 0) { + fprintf(LOGSTREAM, " Test %d out of %d: %s\n", + num_tests_, FLAGS_numtests, (*types_)[i].name.c_str()); + } + return (*types_)[i].type; +} + +class AllocatorState : public TestHarness { + public: + explicit AllocatorState(int seed) : TestHarness(seed) { + CHECK_GE(FLAGS_memalign_max_fraction, 0); + CHECK_LE(FLAGS_memalign_max_fraction, 1); + CHECK_GE(FLAGS_memalign_min_fraction, 0); + CHECK_LE(FLAGS_memalign_min_fraction, 1); + double delta = FLAGS_memalign_max_fraction - FLAGS_memalign_min_fraction; + CHECK_GE(delta, 0); + memalign_fraction_ = (Uniform(10000)/10000.0 * delta + + FLAGS_memalign_min_fraction); + //fprintf(LOGSTREAM, "memalign fraction: %f\n", memalign_fraction_); + } + virtual ~AllocatorState() {} + + // Allocate memory. Randomly choose between malloc() or posix_memalign(). + void* alloc(size_t size) { + if (Uniform(100) < memalign_fraction_ * 100) { + // Try a few times to find a reasonable alignment, or fall back on malloc. + for (int i = 0; i < 5; i++) { + size_t alignment = 1 << Uniform(FLAGS_lg_max_memalign); + if (alignment >= sizeof(intptr_t) && + (size < sizeof(intptr_t) || + alignment < FLAGS_memalign_max_alignment_ratio * size)) { + void *result = reinterpret_cast<void*>(static_cast<intptr_t>(0x1234)); + int err = posix_memalign(&result, alignment, size); + if (err != 0) { + CHECK_EQ(err, ENOMEM); + } + return err == 0 ? result : NULL; + } + } + } + return malloc(size); + } + + private: + double memalign_fraction_; +}; + + +// Info kept per thread +class TesterThread { + private: + // Info kept per allocated object + struct Object { + char* ptr; // Allocated pointer + int size; // Allocated size + int generation; // Generation counter of object contents + }; + + Mutex lock_; // For passing in another thread's obj + int id_; // My thread id + AllocatorState rnd_; // For generating random numbers + vector<Object> heap_; // This thread's heap + vector<Object> passed_; // Pending objects passed from others + size_t heap_size_; // Current heap size + int locks_ok_; // Number of OK TryLock() ops + int locks_failed_; // Number of failed TryLock() ops + + // Type of operations + enum Type { ALLOC, FREE, UPDATE, PASS }; + + // ACM minimal standard random number generator. (re-entrant.) + class ACMRandom { + int32 seed_; + public: + explicit ACMRandom(int32 seed) { seed_ = seed; } + int32 Next() { + const int32 M = 2147483647L; // 2^31-1 + const int32 A = 16807; + // In effect, we are computing seed_ = (seed_ * A) % M, where M = 2^31-1 + uint32 lo = A * (int32)(seed_ & 0xFFFF); + uint32 hi = A * (int32)((uint32)seed_ >> 16); + lo += (hi & 0x7FFF) << 16; + if (lo > M) { + lo &= M; + ++lo; + } + lo += hi >> 15; + if (lo > M) { + lo &= M; + ++lo; + } + return (seed_ = (int32) lo); + } + }; + + public: + TesterThread(int id) + : id_(id), + rnd_(id+1), + heap_size_(0), + locks_ok_(0), + locks_failed_(0) { + } + + virtual ~TesterThread() { + if (FLAGS_verbose) + fprintf(LOGSTREAM, "Thread %2d: locks %6d ok; %6d trylocks failed\n", + id_, locks_ok_, locks_failed_); + if (locks_ok_ + locks_failed_ >= 1000) { + CHECK_LE(locks_failed_, locks_ok_ / 2); + } + } + + virtual void Run() { + rnd_.AddType(ALLOC, FLAGS_allocweight, "allocate"); + rnd_.AddType(FREE, FLAGS_freeweight, "free"); + rnd_.AddType(UPDATE, FLAGS_updateweight, "update"); + rnd_.AddType(PASS, FLAGS_passweight, "pass"); + + while (true) { + AcquirePassedObjects(); + + switch (rnd_.PickType()) { + case ALLOC: AllocateObject(); break; + case FREE: FreeObject(); break; + case UPDATE: UpdateObject(); break; + case PASS: PassObject(); break; + case -1: goto done; + default: assert(NULL == "Unknown type"); + } + + ShrinkHeap(); + } + + done: + DeleteHeap(); + } + + // Allocate a new object + void AllocateObject() { + Object object; + object.size = rnd_.Skewed(FLAGS_lgmaxsize); + object.ptr = static_cast<char*>(rnd_.alloc(object.size)); + CHECK(object.ptr); + object.generation = 0; + FillContents(&object); + heap_.push_back(object); + heap_size_ += object.size; + } + + // Mutate a random object + void UpdateObject() { + if (heap_.empty()) return; + const int index = rnd_.Uniform(heap_.size()); + CheckContents(heap_[index]); + heap_[index].generation++; + FillContents(&heap_[index]); + } + + // Free a random object + void FreeObject() { + if (heap_.empty()) return; + const int index = rnd_.Uniform(heap_.size()); + Object object = heap_[index]; + CheckContents(object); + free(object.ptr); + heap_size_ -= object.size; + heap_[index] = heap_[heap_.size()-1]; + heap_.pop_back(); + } + + // Delete all objects in the heap + void DeleteHeap() { + while (!heap_.empty()) { + FreeObject(); + } + } + + // Free objects until our heap is small enough + void ShrinkHeap() { + while (heap_size_ > FLAGS_threadmb << 20) { + assert(!heap_.empty()); + FreeObject(); + } + } + + // Pass a random object to another thread + void PassObject() { + // Pick object to pass + if (heap_.empty()) return; + const int index = rnd_.Uniform(heap_.size()); + Object object = heap_[index]; + CheckContents(object); + + // Pick thread to pass + const int tid = rnd_.Uniform(FLAGS_numthreads); + TesterThread* thread = threads[tid]; + + if (thread->lock_.TryLock()) { + // Pass the object + locks_ok_++; + thread->passed_.push_back(object); + thread->lock_.Unlock(); + heap_size_ -= object.size; + heap_[index] = heap_[heap_.size()-1]; + heap_.pop_back(); + } else { + locks_failed_++; + } + } + + // Grab any objects passed to this thread by another thread + void AcquirePassedObjects() { + // We do not create unnecessary contention by always using + // TryLock(). Plus we unlock immediately after swapping passed + // objects into a local vector. + vector<Object> copy; + { // Locking scope + if (!lock_.TryLock()) { + locks_failed_++; + return; + } + locks_ok_++; + swap(copy, passed_); + lock_.Unlock(); + } + + for (int i = 0; i < copy.size(); ++i) { + const Object& object = copy[i]; + CheckContents(object); + heap_.push_back(object); + heap_size_ += object.size; + } + } + + // Fill object contents according to ptr/generation + void FillContents(Object* object) { + ACMRandom r(reinterpret_cast<intptr_t>(object->ptr) & 0x7fffffff); + for (int i = 0; i < object->generation; ++i) { + r.Next(); + } + const char c = static_cast<char>(r.Next()); + memset(object->ptr, c, object->size); + } + + // Check object contents + void CheckContents(const Object& object) { + ACMRandom r(reinterpret_cast<intptr_t>(object.ptr) & 0x7fffffff); + for (int i = 0; i < object.generation; ++i) { + r.Next(); + } + + // For large objects, we just check a prefix/suffix + const char expected = static_cast<char>(r.Next()); + const int limit1 = object.size < 32 ? object.size : 32; + const int start2 = limit1 > object.size - 32 ? limit1 : object.size - 32; + for (int i = 0; i < limit1; ++i) { + CHECK_EQ(object.ptr[i], expected); + } + for (int i = start2; i < object.size; ++i) { + CHECK_EQ(object.ptr[i], expected); + } + } +}; + +static void RunThread(int thread_id) { + threads[thread_id]->Run(); +} + +static void TryHugeAllocation(size_t s, AllocatorState* rnd) { + void* p = rnd->alloc(s); + CHECK(p == NULL); // huge allocation s should fail! +} + +static void TestHugeAllocations(AllocatorState* rnd) { + // Check that asking for stuff tiny bit smaller than largest possible + // size returns NULL. + for (size_t i = 0; i < 70000; i += rnd->Uniform(20)) { + TryHugeAllocation(kMaxSize - i, rnd); + } + // Asking for memory sizes near signed/unsigned boundary (kMaxSignedSize) + // might work or not, depending on the amount of virtual memory. +#ifndef DEBUGALLOCATION // debug allocation takes forever for huge allocs + for (size_t i = 0; i < 100; i++) { + void* p = NULL; + p = rnd->alloc(kMaxSignedSize + i); + if (p) free(p); // if: free(NULL) is not necessarily defined + p = rnd->alloc(kMaxSignedSize - i); + if (p) free(p); + } +#endif + + // Check that ReleaseFreeMemory has no visible effect (aka, does not + // crash the test): + MallocExtension* inst = MallocExtension::instance(); + CHECK(inst); + inst->ReleaseFreeMemory(); +} + +static void TestCalloc(size_t n, size_t s, bool ok) { + char* p = reinterpret_cast<char*>(calloc(n, s)); + if (FLAGS_verbose) + fprintf(LOGSTREAM, "calloc(%"PRIxS", %"PRIxS"): %p\n", n, s, p); + if (!ok) { + CHECK(p == NULL); // calloc(n, s) should not succeed + } else { + CHECK(p != NULL); // calloc(n, s) should succeed + for (int i = 0; i < n*s; i++) { + CHECK(p[i] == '\0'); + } + free(p); + } +} + +// This makes sure that reallocing a small number of bytes in either +// direction doesn't cause us to allocate new memory. +static void TestRealloc() { +#ifndef DEBUGALLOCATION // debug alloc doesn't try to minimize reallocs + int start_sizes[] = { 100, 1000, 10000, 100000 }; + int deltas[] = { 1, -2, 4, -8, 16, -32, 64, -128 }; + + for (int s = 0; s < sizeof(start_sizes)/sizeof(*start_sizes); ++s) { + void* p = malloc(start_sizes[s]); + CHECK(p); + // The larger the start-size, the larger the non-reallocing delta. + for (int d = 0; d < s*2; ++d) { + void* new_p = realloc(p, start_sizes[s] + deltas[d]); + CHECK(p == new_p); // realloc should not allocate new memory + } + // Test again, but this time reallocing smaller first. + for (int d = 0; d < s*2; ++d) { + void* new_p = realloc(p, start_sizes[s] - deltas[d]); + CHECK(p == new_p); // realloc should not allocate new memory + } + free(p); + } +#endif +} + +static void TestNewHandler() throw (std::bad_alloc) { + ++news_handled; + throw std::bad_alloc(); +} + +static void TestOneNew(void* (*func)(size_t)) { + // success test + try { + void* ptr = (*func)(kNotTooBig); + if (0 == ptr) { + fprintf(LOGSTREAM, "allocation should not have failed.\n"); + abort(); + } + } catch (...) { + fprintf(LOGSTREAM, "allocation threw unexpected exception.\n"); + abort(); + } + + // failure test + // we should always receive a bad_alloc exception + try { + (*func)(kTooBig); + fprintf(LOGSTREAM, "allocation should have failed.\n"); + abort(); + } catch (const std::bad_alloc&) { + // correct + } catch (...) { + fprintf(LOGSTREAM, "allocation threw unexpected exception.\n"); + abort(); + } +} + +static void TestNew(void* (*func)(size_t)) { + news_handled = 0; + + // test without new_handler: + std::new_handler saved_handler = std::set_new_handler(0); + TestOneNew(func); + + // test with new_handler: + std::set_new_handler(TestNewHandler); + TestOneNew(func); + if (news_handled != 1) { + fprintf(LOGSTREAM, "new_handler was not called.\n"); + abort(); + } + std::set_new_handler(saved_handler); +} + +static void TestOneNothrowNew(void* (*func)(size_t, const std::nothrow_t&)) { + // success test + try { + void* ptr = (*func)(kNotTooBig, std::nothrow); + if (0 == ptr) { + fprintf(LOGSTREAM, "allocation should not have failed.\n"); + abort(); + } + } catch (...) { + fprintf(LOGSTREAM, "allocation threw unexpected exception.\n"); + abort(); + } + + // failure test + // we should always receive a bad_alloc exception + try { + if ((*func)(kTooBig, std::nothrow) != 0) { + fprintf(LOGSTREAM, "allocation should have failed.\n"); + abort(); + } + } catch (...) { + fprintf(LOGSTREAM, "nothrow allocation threw unexpected exception.\n"); + abort(); + } +} + +static void TestNothrowNew(void* (*func)(size_t, const std::nothrow_t&)) { + news_handled = 0; + + // test without new_handler: + std::new_handler saved_handler = std::set_new_handler(0); + TestOneNothrowNew(func); + + // test with new_handler: + std::set_new_handler(TestNewHandler); + TestOneNothrowNew(func); + if (news_handled != 1) { + fprintf(LOGSTREAM, "nothrow new_handler was not called.\n"); + abort(); + } + std::set_new_handler(saved_handler); +} + + +// These are used as callbacks by the sanity-check. Set* and Reset* +// register the hook that counts how many times the associated memory +// function is called. After each such call, call Verify* to verify +// that we used the tcmalloc version of the call, and not the libc. +// Note the ... in the hook signature: we don't care what arguments +// the hook takes. +#define MAKE_HOOK_CALLBACK(hook_type) \ + static int g_##hook_type##_calls = 0; \ + static void IncrementCallsTo##hook_type(...) { \ + g_##hook_type##_calls++; \ + } \ + static void Verify##hook_type##WasCalled() { \ + CHECK_GT(g_##hook_type##_calls, 0); \ + g_##hook_type##_calls = 0; /* reset for next call */ \ + } \ + static MallocHook::hook_type g_old_##hook_type; \ + static void Set##hook_type() { \ + g_old_##hook_type = MallocHook::Set##hook_type( \ + (MallocHook::hook_type)&IncrementCallsTo##hook_type); \ + } \ + static void Reset##hook_type() { \ + CHECK_EQ(MallocHook::Set##hook_type(g_old_##hook_type), \ + (MallocHook::hook_type)&IncrementCallsTo##hook_type); \ + } + +// We do one for each hook typedef in malloc_hook.h +MAKE_HOOK_CALLBACK(NewHook); +MAKE_HOOK_CALLBACK(DeleteHook); +MAKE_HOOK_CALLBACK(MmapHook); +MAKE_HOOK_CALLBACK(MremapHook); +MAKE_HOOK_CALLBACK(MunmapHook); +MAKE_HOOK_CALLBACK(SbrkHook); + +static void TestAlignmentForSize(int size) { + fprintf(LOGSTREAM, "Testing alignment of malloc(%d)\n", size); + static const int kNum = 100; + void* ptrs[kNum]; + for (int i = 0; i < kNum; i++) { + ptrs[i] = malloc(size); + uintptr_t p = reinterpret_cast<uintptr_t>(ptrs[i]); + CHECK((p % sizeof(void*)) == 0); + CHECK((p % sizeof(double)) == 0); + + // Must have 16-byte alignment for large enough objects +#ifndef DEBUGALLOCATION // debug allocation doesn't need to align like this + if (size >= 16) { + CHECK((p % 16) == 0); + } +#endif + } + for (int i = 0; i < kNum; i++) { + free(ptrs[i]); + } +} + +static void TestMallocAlignment() { + for (int lg = 0; lg < 16; lg++) { + TestAlignmentForSize((1<<lg) - 1); + TestAlignmentForSize((1<<lg) + 0); + TestAlignmentForSize((1<<lg) + 1); + } +} + +static void TestHugeThreadCache() { + fprintf(LOGSTREAM, "==== Testing huge thread cache\n"); + // More than 2^16 to cause integer overflow of 16 bit counters. + static const int kNum = 70000; + char** array = new char*[kNum]; + for (int i = 0; i < kNum; ++i) { + array[i] = new char[10]; + } + for (int i = 0; i < kNum; ++i) { + delete[] array[i]; + } + delete[] array; +} + +namespace { + +struct RangeCallbackState { + uintptr_t ptr; + base::MallocRange::Type expected_type; + size_t min_size; + bool matched; +}; + +static void RangeCallback(void* arg, const base::MallocRange* r) { + RangeCallbackState* state = reinterpret_cast<RangeCallbackState*>(arg); + if (state->ptr >= r->address && + state->ptr < r->address + r->length) { + CHECK_EQ(r->type, state->expected_type); + CHECK_GE(r->length, state->min_size); + state->matched = true; + } +} + +// Check that at least one of the callbacks from Ranges() contains +// the specified address with the specified type, and has size +// >= min_size. +static void CheckRangeCallback(void* ptr, base::MallocRange::Type type, + size_t min_size) { + RangeCallbackState state; + state.ptr = reinterpret_cast<uintptr_t>(ptr); + state.expected_type = type; + state.min_size = min_size; + state.matched = false; + MallocExtension::instance()->Ranges(&state, RangeCallback); + CHECK(state.matched); +} + +} + +static void TestRanges() { + static const int MB = 1048576; + void* a = malloc(MB); + void* b = malloc(MB); + CheckRangeCallback(a, base::MallocRange::INUSE, MB); + CheckRangeCallback(b, base::MallocRange::INUSE, MB); + free(a); + CheckRangeCallback(a, base::MallocRange::FREE, MB); + CheckRangeCallback(b, base::MallocRange::INUSE, MB); + MallocExtension::instance()->ReleaseFreeMemory(); + CheckRangeCallback(a, base::MallocRange::UNMAPPED, MB); + CheckRangeCallback(b, base::MallocRange::INUSE, MB); + free(b); + CheckRangeCallback(a, base::MallocRange::UNMAPPED, MB); + CheckRangeCallback(b, base::MallocRange::FREE, MB); +} + +static size_t GetUnmappedBytes() { + size_t bytes; + CHECK(MallocExtension::instance()->GetNumericProperty( + "tcmalloc.pageheap_unmapped_bytes", &bytes)); + return bytes; +} + +static void TestReleaseToSystem() { + // Debug allocation mode adds overhead to each allocation which + // messes up all the equality tests here. I just disable the + // teset in this mode. TODO(csilvers): get it to work for debugalloc? +#ifndef DEBUGALLOCATION + const double old_tcmalloc_release_rate = FLAGS_tcmalloc_release_rate; + FLAGS_tcmalloc_release_rate = 0; + + static const int MB = 1048576; + void* a = malloc(MB); + void* b = malloc(MB); + MallocExtension::instance()->ReleaseFreeMemory(); + size_t starting_bytes = GetUnmappedBytes(); + + // Calling ReleaseFreeMemory() a second time shouldn't do anything. + MallocExtension::instance()->ReleaseFreeMemory(); + EXPECT_EQ(starting_bytes, GetUnmappedBytes()); + + // ReleaseToSystem shouldn't do anything either. + MallocExtension::instance()->ReleaseToSystem(MB); + EXPECT_EQ(starting_bytes, GetUnmappedBytes()); + + free(a); + + // Negative numbers should be ignored. + MallocExtension::instance()->ReleaseToSystem(-5); + EXPECT_EQ(starting_bytes, GetUnmappedBytes()); + + // The span to release should be 1MB. + MallocExtension::instance()->ReleaseToSystem(MB/2); + EXPECT_EQ(starting_bytes + MB, GetUnmappedBytes()); + + // Should do nothing since the previous call released too much. + MallocExtension::instance()->ReleaseToSystem(MB/4); + EXPECT_EQ(starting_bytes + MB, GetUnmappedBytes()); + + free(b); + + // Use up the extra MB/4 bytes from 'a' and also release 'b'. + MallocExtension::instance()->ReleaseToSystem(MB/2); + EXPECT_EQ(starting_bytes + 2*MB, GetUnmappedBytes()); + + // Should do nothing since the previous call released too much. + MallocExtension::instance()->ReleaseToSystem(MB/2); + EXPECT_EQ(starting_bytes + 2*MB, GetUnmappedBytes()); + + // Nothing else to release. + MallocExtension::instance()->ReleaseFreeMemory(); + EXPECT_EQ(starting_bytes + 2*MB, GetUnmappedBytes()); + + a = malloc(MB); + free(a); + EXPECT_EQ(starting_bytes + MB, GetUnmappedBytes()); + + // Releasing less than a page should still trigger a release. + MallocExtension::instance()->ReleaseToSystem(1); + EXPECT_EQ(starting_bytes + 2*MB, GetUnmappedBytes()); + + FLAGS_tcmalloc_release_rate = old_tcmalloc_release_rate; +#endif // #ifndef DEBUGALLOCATION +} + +static int RunAllTests(int argc, char** argv) { + // Optional argv[1] is the seed + AllocatorState rnd(argc > 1 ? atoi(argv[1]) : 100); + + SetTestResourceLimit(); + + // TODO(odo): This test has been disabled because it is only by luck that it + // does not result in fragmentation. When tcmalloc makes an allocation which + // spans previously unused leaves of the pagemap it will allocate and fill in + // the leaves to cover the new allocation. The leaves happen to be 256MiB in + // the 64-bit build, and with the sbrk allocator these allocations just + // happen to fit in one leaf by luck. With other allocators (mmap, + // memfs_malloc when used with small pages) the allocations generally span + // two leaves and this results in a very bad fragmentation pattern with this + // code. The same failure can be forced with the sbrk allocator just by + // allocating something on the order of 128MiB prior to starting this test so + // that the test allocations straddle a 256MiB boundary. + + // TODO(csilvers): port MemoryUsage() over so the test can use that +#if 0 +# include <unistd.h> // for getpid() + // Allocate and deallocate blocks of increasing sizes to check if the alloc + // metadata fragments the memory. (Do not put other allocations/deallocations + // before this test, it may break). + { + size_t memory_usage = MemoryUsage(getpid()); + fprintf(LOGSTREAM, "Testing fragmentation\n"); + for ( int i = 200; i < 240; ++i ) { + int size = i << 20; + void *test1 = rnd.alloc(size); + CHECK(test1); + for ( int j = 0; j < size; j += (1 << 12) ) { + static_cast<char*>(test1)[j] = 1; + } + free(test1); + } + // There may still be a bit of fragmentation at the beginning, until we + // reach kPageMapBigAllocationThreshold bytes so we check for + // 200 + 240 + margin. + CHECK_LT(MemoryUsage(getpid()), memory_usage + (450 << 20) ); + } +#endif + + // Check that empty allocation works + fprintf(LOGSTREAM, "Testing empty allocation\n"); + { + void* p1 = rnd.alloc(0); + CHECK(p1 != NULL); + void* p2 = rnd.alloc(0); + CHECK(p2 != NULL); + CHECK(p1 != p2); + free(p1); + free(p2); + } + + // This code stresses some of the memory allocation via STL. + // In particular, it calls operator delete(void*, nothrow_t). + fprintf(LOGSTREAM, "Testing STL use\n"); + { + std::vector<int> v; + v.push_back(1); + v.push_back(2); + v.push_back(3); + v.push_back(0); + std::stable_sort(v.begin(), v.end()); + } + + // Test each of the memory-allocation functions once, just as a sanity-check + fprintf(LOGSTREAM, "Sanity-testing all the memory allocation functions\n"); + { + // We use new-hook and delete-hook to verify we actually called the + // tcmalloc version of these routines, and not the libc version. + SetNewHook(); // defined as part of MAKE_HOOK_CALLBACK, above + SetDeleteHook(); // ditto + + void* p1 = malloc(10); + VerifyNewHookWasCalled(); + free(p1); + VerifyDeleteHookWasCalled(); + + p1 = calloc(10, 2); + VerifyNewHookWasCalled(); + p1 = realloc(p1, 30); + VerifyNewHookWasCalled(); + VerifyDeleteHookWasCalled(); + cfree(p1); // synonym for free + VerifyDeleteHookWasCalled(); + + CHECK_EQ(posix_memalign(&p1, sizeof(p1), 40), 0); + VerifyNewHookWasCalled(); + free(p1); + VerifyDeleteHookWasCalled(); + + p1 = memalign(sizeof(p1) * 2, 50); + VerifyNewHookWasCalled(); + free(p1); + VerifyDeleteHookWasCalled(); + + p1 = valloc(60); + VerifyNewHookWasCalled(); + free(p1); + VerifyDeleteHookWasCalled(); + + p1 = pvalloc(70); + VerifyNewHookWasCalled(); + free(p1); + VerifyDeleteHookWasCalled(); + + char* p2 = new char; + VerifyNewHookWasCalled(); + delete p2; + VerifyDeleteHookWasCalled(); + + p2 = new char[100]; + VerifyNewHookWasCalled(); + delete[] p2; + VerifyDeleteHookWasCalled(); + + p2 = new(std::nothrow) char; + VerifyNewHookWasCalled(); + delete p2; + VerifyDeleteHookWasCalled(); + + p2 = new(std::nothrow) char[100]; + VerifyNewHookWasCalled(); + delete[] p2; + VerifyDeleteHookWasCalled(); + + // Another way of calling operator new + p2 = static_cast<char*>(::operator new(100)); + VerifyNewHookWasCalled(); + ::operator delete(p2); + VerifyDeleteHookWasCalled(); + + // Try to call nothrow's delete too. Compilers use this. + p2 = static_cast<char*>(::operator new(100, std::nothrow)); + VerifyNewHookWasCalled(); + ::operator delete(p2, std::nothrow); + VerifyDeleteHookWasCalled(); + + // Test mmap too: both anonymous mmap and mmap of a file + // Note that for right now we only override mmap on linux + // systems, so those are the only ones for which we check. + SetMmapHook(); + SetMremapHook(); + SetMunmapHook(); +#if defined(HAVE_MMAP) && defined(__linux) && \ + (defined(__i386__) || defined(__x86_64__)) + int size = 8192*2; + p1 = mmap(NULL, size, PROT_WRITE|PROT_READ, MAP_ANONYMOUS|MAP_PRIVATE, + -1, 0); + VerifyMmapHookWasCalled(); + p1 = mremap(p1, size, size/2, 0); + VerifyMremapHookWasCalled(); + size /= 2; + munmap(p1, size); + VerifyMunmapHookWasCalled(); + + int fd = open("/dev/zero", O_RDONLY); + CHECK_GE(fd, 0); // make sure the open succeeded + p1 = mmap(NULL, 8192, PROT_READ, MAP_SHARED, fd, 0); + VerifyMmapHookWasCalled(); + munmap(p1, 8192); + VerifyMunmapHookWasCalled(); + close(fd); +#else // this is just to quiet the compiler: make sure all fns are called + IncrementCallsToMmapHook(); + IncrementCallsToMunmapHook(); + IncrementCallsToMremapHook(); + VerifyMmapHookWasCalled(); + VerifyMremapHookWasCalled(); + VerifyMunmapHookWasCalled(); +#endif + + // Test sbrk + SetSbrkHook(); +#if defined(HAVE_SBRK) && defined(__linux) && \ + (defined(__i386__) || defined(__x86_64__)) + p1 = sbrk(8192); + VerifySbrkHookWasCalled(); + p1 = sbrk(-8192); + VerifySbrkHookWasCalled(); + // However, sbrk hook should *not* be called with sbrk(0) + p1 = sbrk(0); + CHECK_EQ(g_SbrkHook_calls, 0); +#else // this is just to quiet the compiler: make sure all fns are called + IncrementCallsToSbrkHook(); + VerifySbrkHookWasCalled(); +#endif + + // Reset the hooks to what they used to be. These are all + // defined as part of MAKE_HOOK_CALLBACK, above. + ResetNewHook(); + ResetDeleteHook(); + ResetMmapHook(); + ResetMremapHook(); + ResetMunmapHook(); + ResetSbrkHook(); + } + + // Check that "lots" of memory can be allocated + fprintf(LOGSTREAM, "Testing large allocation\n"); + { + const int mb_to_allocate = 100; + void* p = rnd.alloc(mb_to_allocate << 20); + CHECK(p != NULL); // could not allocate + free(p); + } + + TestMallocAlignment(); + + // Check calloc() with various arguments + fprintf(LOGSTREAM, "Testing calloc\n"); + TestCalloc(0, 0, true); + TestCalloc(0, 1, true); + TestCalloc(1, 1, true); + TestCalloc(1<<10, 0, true); + TestCalloc(1<<20, 0, true); + TestCalloc(0, 1<<10, true); + TestCalloc(0, 1<<20, true); + TestCalloc(1<<20, 2, true); + TestCalloc(2, 1<<20, true); + TestCalloc(1000, 1000, true); + + TestCalloc(kMaxSize, 2, false); + TestCalloc(2, kMaxSize, false); + TestCalloc(kMaxSize, kMaxSize, false); + + TestCalloc(kMaxSignedSize, 3, false); + TestCalloc(3, kMaxSignedSize, false); + TestCalloc(kMaxSignedSize, kMaxSignedSize, false); + + // Test that realloc doesn't always reallocate and copy memory. + fprintf(LOGSTREAM, "Testing realloc\n"); + TestRealloc(); + + fprintf(LOGSTREAM, "Testing operator new(nothrow).\n"); + TestNothrowNew(&::operator new); + fprintf(LOGSTREAM, "Testing operator new[](nothrow).\n"); + TestNothrowNew(&::operator new[]); + fprintf(LOGSTREAM, "Testing operator new.\n"); + TestNew(&::operator new); + fprintf(LOGSTREAM, "Testing operator new[].\n"); + TestNew(&::operator new[]); + + // Create threads + fprintf(LOGSTREAM, "Testing threaded allocation/deallocation (%d threads)\n", + FLAGS_numthreads); + threads = new TesterThread*[FLAGS_numthreads]; + for (int i = 0; i < FLAGS_numthreads; ++i) { + threads[i] = new TesterThread(i); + } + + // This runs all the tests at the same time, with a 1M stack size each + RunManyThreadsWithId(RunThread, FLAGS_numthreads, 1<<20); + + for (int i = 0; i < FLAGS_numthreads; ++i) delete threads[i]; // Cleanup + + // Do the memory intensive tests after threads are done, since exhausting + // the available address space can make pthread_create to fail. + + // Check that huge allocations fail with NULL instead of crashing + fprintf(LOGSTREAM, "Testing huge allocations\n"); + TestHugeAllocations(&rnd); + + // Check that large allocations fail with NULL instead of crashing +#ifndef DEBUGALLOCATION // debug allocation takes forever for huge allocs + fprintf(LOGSTREAM, "Testing out of memory\n"); + for (int s = 0; ; s += (10<<20)) { + void* large_object = rnd.alloc(s); + if (large_object == NULL) break; + free(large_object); + } +#endif + + TestHugeThreadCache(); + TestRanges(); + TestReleaseToSystem(); + + return 0; +} + +} + +using testing::RunAllTests; + +int main(int argc, char** argv) { +#ifdef DEBUGALLOCATION // debug allocation takes forever for huge allocs + FLAGS_max_free_queue_size = 0; // return freed blocks to tcmalloc immediately +#endif + + RunAllTests(argc, argv); + + // Test tc_version() + fprintf(LOGSTREAM, "Testing tc_version()\n"); + int major; + int minor; + const char* patch; + char mmp[64]; + const char* human_version = tc_version(&major, &minor, &patch); + snprintf(mmp, sizeof(mmp), "%d.%d%s", major, minor, patch); + CHECK(!strcmp(PACKAGE_STRING, human_version)); + CHECK(!strcmp(PACKAGE_VERSION, mmp)); + + fprintf(LOGSTREAM, "PASS\n"); +} diff --git a/third_party/tcmalloc/chromium/src/tests/testutil.cc b/third_party/tcmalloc/chromium/src/tests/testutil.cc new file mode 100644 index 0000000..f2b8592 --- /dev/null +++ b/third_party/tcmalloc/chromium/src/tests/testutil.cc @@ -0,0 +1,223 @@ +// Copyright (c) 2007, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Craig Silverstein +// +// A few routines that are useful for multiple tests in this directory. + +#include "config_for_unittests.h" +#include <stdlib.h> // for NULL, abort() +// On FreeBSD, if you #include <sys/resource.h>, you have to get stdint first. +#ifdef HAVE_STDINT_H +#include <stdint.h> +#endif +#ifdef HAVE_SYS_RESOURCE_H +#include <sys/resource.h> +#endif +#include "tests/testutil.h" + + +// When compiled 64-bit and run on systems with swap several unittests will end +// up trying to consume all of RAM+swap, and that can take quite some time. By +// limiting the address-space size we get sufficient coverage without blowing +// out job limits. +void SetTestResourceLimit() { +#ifdef HAVE_SYS_RESOURCE_H + // The actual resource we need to set varies depending on which flavour of + // unix. On Linux we need RLIMIT_AS because that covers the use of mmap. + // Otherwise hopefully RLIMIT_RSS is good enough. (Unfortunately 64-bit + // and 32-bit headers disagree on the type of these constants!) +#ifdef RLIMIT_AS +#define USE_RESOURCE RLIMIT_AS +#else +#define USE_RESOURCE RLIMIT_RSS +#endif + + // Restrict the test to 1GiB, which should fit comfortably well on both + // 32-bit and 64-bit hosts, and executes in ~1s. + const rlim_t kMaxMem = 1<<30; + + struct rlimit rlim; + if (getrlimit(USE_RESOURCE, &rlim) == 0) { + if (rlim.rlim_cur == RLIM_INFINITY || rlim.rlim_cur > kMaxMem) { + rlim.rlim_cur = kMaxMem; + setrlimit(USE_RESOURCE, &rlim); // ignore result + } + } +#endif /* HAVE_SYS_RESOURCE_H */ +} + + +struct FunctionAndId { + void (*ptr_to_function)(int); + int id; +}; + +#if defined(NO_THREADS) || !(defined(HAVE_PTHREADS) || defined(_WIN32)) + +extern "C" void RunThread(void (*fn)()) { + (*fn)(); +} + +extern "C" void RunManyThreads(void (*fn)(), int count) { + // I guess the best we can do is run fn sequentially, 'count' times + for (int i = 0; i < count; i++) + (*fn)(); +} + +extern "C" void RunManyThreadsWithId(void (*fn)(int), int count, int) { + for (int i = 0; i < count; i++) + (*fn)(i); // stacksize doesn't make sense in a non-threaded context +} + +#elif defined(_WIN32) + +#ifndef WIN32_LEAN_AND_MEAN +#define WIN32_LEAN_AND_MEAN /* We always want minimal includes */ +#endif +#include <windows.h> + +extern "C" { + // This helper function has the signature that pthread_create wants. + DWORD WINAPI RunFunctionInThread(LPVOID ptr_to_ptr_to_fn) { + (**static_cast<void (**)()>(ptr_to_ptr_to_fn))(); // runs fn + return 0; + } + + DWORD WINAPI RunFunctionInThreadWithId(LPVOID ptr_to_fnid) { + FunctionAndId* fn_and_id = static_cast<FunctionAndId*>(ptr_to_fnid); + (*fn_and_id->ptr_to_function)(fn_and_id->id); // runs fn + return 0; + } + + void RunManyThreads(void (*fn)(), int count) { + DWORD dummy; + HANDLE* hThread = new HANDLE[count]; + for (int i = 0; i < count; i++) { + hThread[i] = CreateThread(NULL, 0, RunFunctionInThread, &fn, 0, &dummy); + if (hThread[i] == NULL) ExitProcess(i); + } + WaitForMultipleObjects(count, hThread, TRUE, INFINITE); + for (int i = 0; i < count; i++) { + CloseHandle(hThread[i]); + } + delete[] hThread; + } + + void RunThread(void (*fn)()) { + RunManyThreads(fn, 1); + } + + void RunManyThreadsWithId(void (*fn)(int), int count, int stacksize) { + DWORD dummy; + HANDLE* hThread = new HANDLE[count]; + FunctionAndId* fn_and_ids = new FunctionAndId[count]; + for (int i = 0; i < count; i++) { + fn_and_ids[i].ptr_to_function = fn; + fn_and_ids[i].id = i; + hThread[i] = CreateThread(NULL, stacksize, RunFunctionInThreadWithId, + &fn_and_ids[i], 0, &dummy); + if (hThread[i] == NULL) ExitProcess(i); + } + WaitForMultipleObjects(count, hThread, TRUE, INFINITE); + for (int i = 0; i < count; i++) { + CloseHandle(hThread[i]); + } + delete[] fn_and_ids; + delete[] hThread; + } +} + +#else // not NO_THREADS, not !HAVE_PTHREAD, not _WIN32 + +#include <pthread.h> + +#define SAFE_PTHREAD(fncall) do { if ((fncall) != 0) abort(); } while (0) + +extern "C" { + // This helper function has the signature that pthread_create wants. + static void* RunFunctionInThread(void *ptr_to_ptr_to_fn) { + (**static_cast<void (**)()>(ptr_to_ptr_to_fn))(); // runs fn + return NULL; + } + + static void* RunFunctionInThreadWithId(void *ptr_to_fnid) { + FunctionAndId* fn_and_id = static_cast<FunctionAndId*>(ptr_to_fnid); + (*fn_and_id->ptr_to_function)(fn_and_id->id); // runs fn + return NULL; + } + + // Run a function in a thread of its own and wait for it to finish. + // This is useful for tcmalloc testing, because each thread is + // handled separately in tcmalloc, so there's interesting stuff to + // test even if the threads are not running concurrently. + void RunThread(void (*fn)()) { + pthread_t thr; + // Even though fn is on the stack, it's safe to pass a pointer to it, + // because we pthread_join immediately (ie, before RunInThread exits). + SAFE_PTHREAD(pthread_create(&thr, NULL, RunFunctionInThread, &fn)); + SAFE_PTHREAD(pthread_join(thr, NULL)); + } + + void RunManyThreads(void (*fn)(), int count) { + pthread_t* thr = new pthread_t[count]; + for (int i = 0; i < count; i++) { + SAFE_PTHREAD(pthread_create(&thr[i], NULL, RunFunctionInThread, &fn)); + } + for (int i = 0; i < count; i++) { + SAFE_PTHREAD(pthread_join(thr[i], NULL)); + } + delete[] thr; + } + + void RunManyThreadsWithId(void (*fn)(int), int count, int stacksize) { + pthread_attr_t attr; + pthread_attr_init(&attr); + pthread_attr_setstacksize(&attr, stacksize); + + pthread_t* thr = new pthread_t[count]; + FunctionAndId* fn_and_ids = new FunctionAndId[count]; + for (int i = 0; i < count; i++) { + fn_and_ids[i].ptr_to_function = fn; + fn_and_ids[i].id = i; + SAFE_PTHREAD(pthread_create(&thr[i], &attr, + RunFunctionInThreadWithId, &fn_and_ids[i])); + } + for (int i = 0; i < count; i++) { + SAFE_PTHREAD(pthread_join(thr[i], NULL)); + } + delete[] fn_and_ids; + delete[] thr; + + pthread_attr_destroy(&attr); + } +} + +#endif diff --git a/third_party/tcmalloc/chromium/src/tests/testutil.h b/third_party/tcmalloc/chromium/src/tests/testutil.h new file mode 100644 index 0000000..26b04e4 --- /dev/null +++ b/third_party/tcmalloc/chromium/src/tests/testutil.h @@ -0,0 +1,61 @@ +// Copyright (c) 2007, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Craig Silverstein + +#ifndef TCMALLOC_TOOLS_TESTUTIL_H_ +#define TCMALLOC_TOOLS_TESTUTIL_H_ + +// Run a function in a thread of its own and wait for it to finish. +// The function you pass in must have the signature +// void MyFunction(); +extern "C" void RunThread(void (*fn)()); + +// Run a function X times, in X threads, and wait for them all to finish. +// The function you pass in must have the signature +// void MyFunction(); +extern "C" void RunManyThreads(void (*fn)(), int count); + +// The 'advanced' version: run a function X times, in X threads, and +// wait for them all to finish. Give them all the specified stack-size. +// (If you're curious why this takes a stacksize and the others don't, +// it's because the one client of this fn wanted to specify stacksize. :-) ) +// The function you pass in must have the signature +// void MyFunction(int idx); +// where idx is the index of the thread (which of the X threads this is). +extern "C" void RunManyThreadsWithId(void (*fn)(int), int count, int stacksize); + +// When compiled 64-bit and run on systems with swap several unittests will end +// up trying to consume all of RAM+swap, and that can take quite some time. By +// limiting the address-space size we get sufficient coverage without blowing +// out job limits. +void SetTestResourceLimit(); + +#endif // TCMALLOC_TOOLS_TESTUTIL_H_ diff --git a/third_party/tcmalloc/chromium/src/tests/thread_dealloc_unittest.cc b/third_party/tcmalloc/chromium/src/tests/thread_dealloc_unittest.cc new file mode 100644 index 0000000..32923e1 --- /dev/null +++ b/third_party/tcmalloc/chromium/src/tests/thread_dealloc_unittest.cc @@ -0,0 +1,83 @@ +// Copyright (c) 2004, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Sanjay Ghemawat +// +// Check that we do not leak memory when cycling through lots of threads. + +#include "config_for_unittests.h" +#include <stdio.h> +#ifdef HAVE_UNISTD_H +#include <unistd.h> // for sleep() +#endif +#include "base/logging.h" +#include <google/malloc_extension.h> +#include "tests/testutil.h" // for RunThread() + +// Size/number of objects to allocate per thread (1 MB per thread) +static const int kObjectSize = 1024; +static const int kNumObjects = 1024; + +// Number of threads to create and destroy +static const int kNumThreads = 1000; + +// Allocate lots of stuff +static void AllocStuff() { + void** objects = new void*[kNumObjects]; + for (int i = 0; i < kNumObjects; i++) { + objects[i] = malloc(kObjectSize); + } + for (int i = 0; i < kNumObjects; i++) { + free(objects[i]); + } + delete[] objects; +} + +int main(int argc, char** argv) { + static const int kDisplaySize = 1048576; + char* display = new char[kDisplaySize]; + + for (int i = 0; i < kNumThreads; i++) { + RunThread(&AllocStuff); + + if (((i+1) % 200) == 0) { + fprintf(stderr, "Iteration: %d of %d\n", (i+1), kNumThreads); + MallocExtension::instance()->GetStats(display, kDisplaySize); + fprintf(stderr, "%s\n", display); + } + } + delete[] display; + + printf("PASS\n"); +#ifdef HAVE_UNISTD_H + sleep(1); // Prevent exit race problem with glibc +#endif + return 0; +} diff --git a/third_party/tcmalloc/chromium/src/thread_cache.cc b/third_party/tcmalloc/chromium/src/thread_cache.cc new file mode 100644 index 0000000..fd44a70 --- /dev/null +++ b/third_party/tcmalloc/chromium/src/thread_cache.cc @@ -0,0 +1,513 @@ +// Copyright (c) 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Ken Ashcraft <opensource@google.com> + +#include <config.h> +#ifdef HAVE_INTTYPES_H +#include <inttypes.h> +#endif +#include <algorithm> // for min and max +#include "thread_cache.h" +#include "maybe_threads.h" + +using std::min; +using std::max; + +DEFINE_int64(tcmalloc_max_total_thread_cache_bytes, + EnvToInt64("TCMALLOC_MAX_TOTAL_THREAD_CACHE_BYTES", 16<<20), + "Bound on the total amount of bytes allocated to " + "thread caches. This bound is not strict, so it is possible " + "for the cache to go over this bound in certain circumstances. "); + +namespace tcmalloc { + +static bool phinited = false; + +volatile size_t ThreadCache::per_thread_cache_size_ = kMaxThreadCacheSize; +size_t ThreadCache::overall_thread_cache_size_ = kDefaultOverallThreadCacheSize; +ssize_t ThreadCache::unclaimed_cache_space_ = kDefaultOverallThreadCacheSize; +PageHeapAllocator<ThreadCache> threadcache_allocator; +ThreadCache* ThreadCache::thread_heaps_ = NULL; +int ThreadCache::thread_heap_count_ = 0; +ThreadCache* ThreadCache::next_memory_steal_ = NULL; +#ifdef HAVE_TLS +__thread ThreadCache* ThreadCache::threadlocal_heap_ +# ifdef HAVE___ATTRIBUTE__ + __attribute__ ((tls_model ("initial-exec"))) +# endif + ; +#endif +bool ThreadCache::tsd_inited_ = false; +pthread_key_t ThreadCache::heap_key_; + +#if defined(HAVE_TLS) +bool kernel_supports_tls = false; // be conservative +# if !HAVE_DECL_UNAME // if too old for uname, probably too old for TLS + void CheckIfKernelSupportsTLS() { + kernel_supports_tls = false; + } +# else +# include <sys/utsname.h> // DECL_UNAME checked for <sys/utsname.h> too + void CheckIfKernelSupportsTLS() { + struct utsname buf; + if (uname(&buf) != 0) { // should be impossible + MESSAGE("uname failed assuming no TLS support (errno=%d)\n", errno); + kernel_supports_tls = false; + } else if (strcasecmp(buf.sysname, "linux") == 0) { + // The linux case: the first kernel to support TLS was 2.6.0 + if (buf.release[0] < '2' && buf.release[1] == '.') // 0.x or 1.x + kernel_supports_tls = false; + else if (buf.release[0] == '2' && buf.release[1] == '.' && + buf.release[2] >= '0' && buf.release[2] < '6' && + buf.release[3] == '.') // 2.0 - 2.5 + kernel_supports_tls = false; + else + kernel_supports_tls = true; + } else { // some other kernel, we'll be optimisitic + kernel_supports_tls = true; + } + // TODO(csilvers): VLOG(1) the tls status once we support RAW_VLOG + } +# endif // HAVE_DECL_UNAME +#endif // HAVE_TLS + +void ThreadCache::Init(pthread_t tid) { + size_ = 0; + + max_size_ = 0; + IncreaseCacheLimitLocked(); + if (max_size_ == 0) { + // There isn't enough memory to go around. Just give the minimum to + // this thread. + max_size_ = kMinThreadCacheSize; + + // Take unclaimed_cache_space_ negative. + unclaimed_cache_space_ -= kMinThreadCacheSize; + ASSERT(unclaimed_cache_space_ < 0); + } + + next_ = NULL; + prev_ = NULL; + tid_ = tid; + in_setspecific_ = false; + for (size_t cl = 0; cl < kNumClasses; ++cl) { + list_[cl].Init(); + } + + uint32_t sampler_seed; + memcpy(&sampler_seed, &tid, sizeof(sampler_seed)); + sampler_.Init(sampler_seed); +} + +void ThreadCache::Cleanup() { + // Put unused memory back into central cache + for (int cl = 0; cl < kNumClasses; ++cl) { + if (list_[cl].length() > 0) { + ReleaseToCentralCache(&list_[cl], cl, list_[cl].length()); + } + } +} + +// Remove some objects of class "cl" from central cache and add to thread heap. +// On success, return the first object for immediate use; otherwise return NULL. +void* ThreadCache::FetchFromCentralCache(size_t cl, size_t byte_size) { + FreeList* list = &list_[cl]; + ASSERT(list->empty()); + const int batch_size = Static::sizemap()->num_objects_to_move(cl); + + const int num_to_move = min<int>(list->max_length(), batch_size); + void *start, *end; + int fetch_count = Static::central_cache()[cl].RemoveRange( + &start, &end, num_to_move); + + ASSERT((start == NULL) == (fetch_count == 0)); + if (--fetch_count >= 0) { + size_ += byte_size * fetch_count; + list->PushRange(fetch_count, SLL_Next(start), end); + } + + // Increase max length slowly up to batch_size. After that, + // increase by batch_size in one shot so that the length is a + // multiple of batch_size. + if (list->max_length() < batch_size) { + list->set_max_length(list->max_length() + 1); + } else { + // Don't let the list get too long. In 32 bit builds, the length + // is represented by a 16 bit int, so we need to watch out for + // integer overflow. + int new_length = min<int>(list->max_length() + batch_size, + kMaxDynamicFreeListLength); + // The list's max_length must always be a multiple of batch_size, + // and kMaxDynamicFreeListLength is not necessarily a multiple + // of batch_size. + new_length -= new_length % batch_size; + ASSERT(new_length % batch_size == 0); + list->set_max_length(new_length); + } + return start; +} + +void ThreadCache::ListTooLong(FreeList* list, size_t cl) { + const int batch_size = Static::sizemap()->num_objects_to_move(cl); + ReleaseToCentralCache(list, cl, batch_size); + + // If the list is too long, we need to transfer some number of + // objects to the central cache. Ideally, we would transfer + // num_objects_to_move, so the code below tries to make max_length + // converge on num_objects_to_move. + + if (list->max_length() < batch_size) { + // Slow start the max_length so we don't overreserve. + list->set_max_length(list->max_length() + 1); + } else if (list->max_length() > batch_size) { + // If we consistently go over max_length, shrink max_length. If we don't + // shrink it, some amount of memory will always stay in this freelist. + list->set_length_overages(list->length_overages() + 1); + if (list->length_overages() > kMaxOverages) { + ASSERT(list->max_length() > batch_size); + list->set_max_length(list->max_length() - batch_size); + list->set_length_overages(0); + } + } +} + +// Remove some objects of class "cl" from thread heap and add to central cache +void ThreadCache::ReleaseToCentralCache(FreeList* src, size_t cl, int N) { + ASSERT(src == &list_[cl]); + if (N > src->length()) N = src->length(); + size_t delta_bytes = N * Static::sizemap()->ByteSizeForClass(cl); + + // We return prepackaged chains of the correct size to the central cache. + // TODO: Use the same format internally in the thread caches? + int batch_size = Static::sizemap()->num_objects_to_move(cl); + while (N > batch_size) { + void *tail, *head; + src->PopRange(batch_size, &head, &tail); + Static::central_cache()[cl].InsertRange(head, tail, batch_size); + N -= batch_size; + } + void *tail, *head; + src->PopRange(N, &head, &tail); + Static::central_cache()[cl].InsertRange(head, tail, N); + size_ -= delta_bytes; +} + +// Release idle memory to the central cache +void ThreadCache::Scavenge() { + // If the low-water mark for the free list is L, it means we would + // not have had to allocate anything from the central cache even if + // we had reduced the free list size by L. We aim to get closer to + // that situation by dropping L/2 nodes from the free list. This + // may not release much memory, but if so we will call scavenge again + // pretty soon and the low-water marks will be high on that call. + //int64 start = CycleClock::Now(); + for (int cl = 0; cl < kNumClasses; cl++) { + FreeList* list = &list_[cl]; + const int lowmark = list->lowwatermark(); + if (lowmark > 0) { + const int drop = (lowmark > 1) ? lowmark/2 : 1; + ReleaseToCentralCache(list, cl, drop); + + // Shrink the max length if it isn't used. Only shrink down to + // batch_size -- if the thread was active enough to get the max_length + // above batch_size, it will likely be that active again. If + // max_length shinks below batch_size, the thread will have to + // go through the slow-start behavior again. The slow-start is useful + // mainly for threads that stay relatively idle for their entire + // lifetime. + const int batch_size = Static::sizemap()->num_objects_to_move(cl); + if (list->max_length() > batch_size) { + list->set_max_length( + max<int>(list->max_length() - batch_size, batch_size)); + } + } + list->clear_lowwatermark(); + } + + IncreaseCacheLimit(); + +// int64 finish = CycleClock::Now(); +// CycleTimer ct; +// MESSAGE("GC: %.0f ns\n", ct.CyclesToUsec(finish-start)*1000.0); +} + +void ThreadCache::IncreaseCacheLimit() { + SpinLockHolder h(Static::pageheap_lock()); + IncreaseCacheLimitLocked(); +} + +void ThreadCache::IncreaseCacheLimitLocked() { + if (unclaimed_cache_space_ > 0) { + // Possibly make unclaimed_cache_space_ negative. + unclaimed_cache_space_ -= kStealAmount; + max_size_ += kStealAmount; + return; + } + // Don't hold pageheap_lock too long. Try to steal from 10 other + // threads before giving up. The i < 10 condition also prevents an + // infinite loop in case none of the existing thread heaps are + // suitable places to steal from. + for (int i = 0; i < 10; + ++i, next_memory_steal_ = next_memory_steal_->next_) { + // Reached the end of the linked list. Start at the beginning. + if (next_memory_steal_ == NULL) { + ASSERT(thread_heaps_ != NULL); + next_memory_steal_ = thread_heaps_; + } + if (next_memory_steal_ == this || + next_memory_steal_->max_size_ <= kMinThreadCacheSize) { + continue; + } + next_memory_steal_->max_size_ -= kStealAmount; + max_size_ += kStealAmount; + + next_memory_steal_ = next_memory_steal_->next_; + return; + } +} + +int ThreadCache::GetSamplePeriod() { + return sampler_.GetSamplePeriod(); +} + +void ThreadCache::InitModule() { + // There is a slight potential race here because of double-checked + // locking idiom. However, as long as the program does a small + // allocation before switching to multi-threaded mode, we will be + // fine. We increase the chances of doing such a small allocation + // by doing one in the constructor of the module_enter_exit_hook + // object declared below. + SpinLockHolder h(Static::pageheap_lock()); + if (!phinited) { + Static::InitStaticVars(); + threadcache_allocator.Init(); + phinited = 1; + } +} + +void ThreadCache::InitTSD() { + ASSERT(!tsd_inited_); + perftools_pthread_key_create(&heap_key_, DestroyThreadCache); + tsd_inited_ = true; + + // We may have used a fake pthread_t for the main thread. Fix it. + pthread_t zero; + memset(&zero, 0, sizeof(zero)); + SpinLockHolder h(Static::pageheap_lock()); + for (ThreadCache* h = thread_heaps_; h != NULL; h = h->next_) { + if (h->tid_ == zero) { + h->tid_ = pthread_self(); + } + } +} + +ThreadCache* ThreadCache::CreateCacheIfNecessary() { + // Initialize per-thread data if necessary + ThreadCache* heap = NULL; + { + SpinLockHolder h(Static::pageheap_lock()); + + // Early on in glibc's life, we cannot even call pthread_self() + pthread_t me; + if (!tsd_inited_) { + memset(&me, 0, sizeof(me)); + } else { + me = pthread_self(); + } + + // This may be a recursive malloc call from pthread_setspecific() + // In that case, the heap for this thread has already been created + // and added to the linked list. So we search for that first. + for (ThreadCache* h = thread_heaps_; h != NULL; h = h->next_) { + if (h->tid_ == me) { + heap = h; + break; + } + } + + if (heap == NULL) heap = NewHeap(me); + } + + // We call pthread_setspecific() outside the lock because it may + // call malloc() recursively. We check for the recursive call using + // the "in_setspecific_" flag so that we can avoid calling + // pthread_setspecific() if we are already inside pthread_setspecific(). + if (!heap->in_setspecific_ && tsd_inited_) { + heap->in_setspecific_ = true; + perftools_pthread_setspecific(heap_key_, heap); +#ifdef HAVE_TLS + // Also keep a copy in __thread for faster retrieval + threadlocal_heap_ = heap; +#endif + heap->in_setspecific_ = false; + } + return heap; +} + +ThreadCache* ThreadCache::NewHeap(pthread_t tid) { + // Create the heap and add it to the linked list + ThreadCache *heap = threadcache_allocator.New(); + heap->Init(tid); + heap->next_ = thread_heaps_; + heap->prev_ = NULL; + if (thread_heaps_ != NULL) { + thread_heaps_->prev_ = heap; + } else { + // This is the only thread heap at the momment. + ASSERT(next_memory_steal_ == NULL); + next_memory_steal_ = heap; + } + thread_heaps_ = heap; + thread_heap_count_++; + return heap; +} + +void ThreadCache::BecomeIdle() { + if (!tsd_inited_) return; // No caches yet + ThreadCache* heap = GetThreadHeap(); + if (heap == NULL) return; // No thread cache to remove + if (heap->in_setspecific_) return; // Do not disturb the active caller + + heap->in_setspecific_ = true; + perftools_pthread_setspecific(heap_key_, NULL); +#ifdef HAVE_TLS + // Also update the copy in __thread + threadlocal_heap_ = NULL; +#endif + heap->in_setspecific_ = false; + if (GetThreadHeap() == heap) { + // Somehow heap got reinstated by a recursive call to malloc + // from pthread_setspecific. We give up in this case. + return; + } + + // We can now get rid of the heap + DeleteCache(heap); +} + +void ThreadCache::DestroyThreadCache(void* ptr) { + // Note that "ptr" cannot be NULL since pthread promises not + // to invoke the destructor on NULL values, but for safety, + // we check anyway. + if (ptr == NULL) return; +#ifdef HAVE_TLS + // Prevent fast path of GetThreadHeap() from returning heap. + threadlocal_heap_ = NULL; +#endif + DeleteCache(reinterpret_cast<ThreadCache*>(ptr)); +} + +void ThreadCache::DeleteCache(ThreadCache* heap) { + // Remove all memory from heap + heap->Cleanup(); + + // Remove from linked list + SpinLockHolder h(Static::pageheap_lock()); + if (heap->next_ != NULL) heap->next_->prev_ = heap->prev_; + if (heap->prev_ != NULL) heap->prev_->next_ = heap->next_; + if (thread_heaps_ == heap) thread_heaps_ = heap->next_; + thread_heap_count_--; + + if (next_memory_steal_ == heap) next_memory_steal_ = heap->next_; + if (next_memory_steal_ == NULL) next_memory_steal_ = thread_heaps_; + unclaimed_cache_space_ += heap->max_size_; + + threadcache_allocator.Delete(heap); +} + +void ThreadCache::RecomputePerThreadCacheSize() { + // Divide available space across threads + int n = thread_heap_count_ > 0 ? thread_heap_count_ : 1; + size_t space = overall_thread_cache_size_ / n; + + // Limit to allowed range + if (space < kMinThreadCacheSize) space = kMinThreadCacheSize; + if (space > kMaxThreadCacheSize) space = kMaxThreadCacheSize; + + double ratio = space / max<double>(1, per_thread_cache_size_); + size_t claimed = 0; + for (ThreadCache* h = thread_heaps_; h != NULL; h = h->next_) { + // Increasing the total cache size should not circumvent the + // slow-start growth of max_size_. + if (ratio < 1.0) { + h->max_size_ = static_cast<size_t>(h->max_size_ * ratio); + } + claimed += h->max_size_; + } + unclaimed_cache_space_ = overall_thread_cache_size_ - claimed; + per_thread_cache_size_ = space; + // TCMalloc_MESSAGE(__FILE__, __LINE__, "Threads %d => cache size %8d\n", n, int(space)); +} + +void ThreadCache::Print(TCMalloc_Printer* out) const { + for (int cl = 0; cl < kNumClasses; ++cl) { + out->printf(" %5" PRIuS " : %4" PRIuS " len; %4d lo; %4"PRIuS + " max; %4"PRIuS" overages;\n", + Static::sizemap()->ByteSizeForClass(cl), + list_[cl].length(), + list_[cl].lowwatermark(), + list_[cl].max_length(), + list_[cl].length_overages()); + } +} + +void ThreadCache::PrintThreads(TCMalloc_Printer* out) { + size_t actual_limit = 0; + for (ThreadCache* h = thread_heaps_; h != NULL; h = h->next_) { + h->Print(out); + actual_limit += h->max_size_; + } + out->printf("ThreadCache overall: %"PRIuS ", unclaimed: %"PRIuS + ", actual: %"PRIuS"\n", + overall_thread_cache_size_, unclaimed_cache_space_, actual_limit); +} + +void ThreadCache::GetThreadStats(uint64_t* total_bytes, uint64_t* class_count) { + for (ThreadCache* h = thread_heaps_; h != NULL; h = h->next_) { + *total_bytes += h->Size(); + if (class_count) { + for (int cl = 0; cl < kNumClasses; ++cl) { + class_count[cl] += h->freelist_length(cl); + } + } + } +} + +void ThreadCache::set_overall_thread_cache_size(size_t new_size) { + // Clip the value to a reasonable range + if (new_size < kMinThreadCacheSize) new_size = kMinThreadCacheSize; + if (new_size > (1<<30)) new_size = (1<<30); // Limit to 1GB + overall_thread_cache_size_ = new_size; + + RecomputePerThreadCacheSize(); +} + +} // namespace tcmalloc diff --git a/third_party/tcmalloc/chromium/src/thread_cache.h b/third_party/tcmalloc/chromium/src/thread_cache.h new file mode 100644 index 0000000..4c6a233 --- /dev/null +++ b/third_party/tcmalloc/chromium/src/thread_cache.h @@ -0,0 +1,396 @@ +// Copyright (c) 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// --- +// Author: Sanjay Ghemawat <opensource@google.com> + +#ifndef TCMALLOC_THREAD_CACHE_H_ +#define TCMALLOC_THREAD_CACHE_H_ + +#include <config.h> +#ifdef HAVE_PTHREAD +#include <pthread.h> +#endif +#include "common.h" +#include "linked_list.h" +#include "maybe_threads.h" +#include "page_heap_allocator.h" +#include "sampler.h" +#include "static_vars.h" + +namespace tcmalloc { + +// Even if we have support for thread-local storage in the compiler +// and linker, the OS may not support it. We need to check that at +// runtime. Right now, we have to keep a manual set of "bad" OSes. +#if defined(HAVE_TLS) +extern bool kernel_supports_tls; // defined in thread_cache.cc +void CheckIfKernelSupportsTLS(); +inline bool KernelSupportsTLS() { + return kernel_supports_tls; +} +#endif // HAVE_TLS + +//------------------------------------------------------------------- +// Data kept per thread +//------------------------------------------------------------------- + +class ThreadCache { + public: + // Default bound on the total amount of thread caches. + static const size_t kDefaultOverallThreadCacheSize = 16 << 20; + + // All ThreadCache objects are kept in a linked list (for stats collection) + ThreadCache* next_; + ThreadCache* prev_; + + void Init(pthread_t tid); + void Cleanup(); + + // Accessors (mostly just for printing stats) + int freelist_length(size_t cl) const { return list_[cl].length(); } + + // Total byte size in cache + size_t Size() const { return size_; } + + void* Allocate(size_t size); + void Deallocate(void* ptr, size_t size_class); + + void Scavenge(); + void Print(TCMalloc_Printer* out) const; + + int GetSamplePeriod(); + + // Record allocation of "k" bytes. Return true iff allocation + // should be sampled + bool SampleAllocation(size_t k); + + static void InitModule(); + static void InitTSD(); + static ThreadCache* GetThreadHeap(); + static ThreadCache* GetCache(); + static ThreadCache* GetCacheIfPresent(); + static ThreadCache* CreateCacheIfNecessary(); + static void BecomeIdle(); + + // Return the number of thread heaps in use. + static inline int HeapsInUse(); + + // Writes to total_bytes the total number of bytes used by all thread heaps. + // class_count must be an array of size kNumClasses. Writes the number of + // items on the corresponding freelist. class_count may be NULL. + // The storage of both parameters must be zero intialized. + // REQUIRES: Static::pageheap_lock is held. + static void GetThreadStats(uint64_t* total_bytes, uint64_t* class_count); + + // Write debugging statistics to 'out'. + // REQUIRES: Static::pageheap_lock is held. + static void PrintThreads(TCMalloc_Printer* out); + + // Sets the total thread cache size to new_size, recomputing the + // individual thread cache sizes as necessary. + // REQUIRES: Static::pageheap lock is held. + static void set_overall_thread_cache_size(size_t new_size); + static size_t overall_thread_cache_size() { + return overall_thread_cache_size_; + } + + private: + class FreeList { + private: + void* list_; // Linked list of nodes + +#ifdef _LP64 + // On 64-bit hardware, manipulating 16-bit values may be slightly slow. + uint32_t length_; // Current length. + uint32_t lowater_; // Low water mark for list length. + uint32_t max_length_; // Dynamic max list length based on usage. + // Tracks the number of times a deallocation has caused + // length_ > max_length_. After the kMaxOverages'th time, max_length_ + // shrinks and length_overages_ is reset to zero. + uint32_t length_overages_; +#else + // If we aren't using 64-bit pointers then pack these into less space. + uint16_t length_; + uint16_t lowater_; + uint16_t max_length_; + uint16_t length_overages_; +#endif + + public: + void Init() { + list_ = NULL; + length_ = 0; + lowater_ = 0; + max_length_ = 1; + length_overages_ = 0; + } + + // Return current length of list + size_t length() const { + return length_; + } + + // Return the maximum length of the list. + size_t max_length() const { + return max_length_; + } + + // Set the maximum length of the list. If 'new_max' > length(), the + // client is responsible for removing objects from the list. + void set_max_length(size_t new_max) { + max_length_ = new_max; + } + + // Return the number of times that length() has gone over max_length(). + size_t length_overages() const { + return length_overages_; + } + + void set_length_overages(size_t new_count) { + length_overages_ = new_count; + } + + // Is list empty? + bool empty() const { + return list_ == NULL; + } + + // Low-water mark management + int lowwatermark() const { return lowater_; } + void clear_lowwatermark() { lowater_ = length_; } + + void Push(void* ptr) { + SLL_Push(&list_, ptr); + length_++; + } + + void* Pop() { + ASSERT(list_ != NULL); + length_--; + if (length_ < lowater_) lowater_ = length_; + return SLL_Pop(&list_); + } + + void PushRange(int N, void *start, void *end) { + SLL_PushRange(&list_, start, end); + length_ += N; + } + + void PopRange(int N, void **start, void **end) { + SLL_PopRange(&list_, N, start, end); + ASSERT(length_ >= N); + length_ -= N; + if (length_ < lowater_) lowater_ = length_; + } + }; + + // The number of bytes one ThreadCache will steal from another when + // the first ThreadCache is forced to Scavenge(), delaying the + // next call to Scavenge for this thread. + static const size_t kStealAmount = 1 << 16; + + // Lower and upper bounds on the per-thread cache sizes + static const size_t kMinThreadCacheSize = kMaxSize * 2; //kStealAmount; + static const size_t kMaxThreadCacheSize = 2 << 20; + + // The number of times that a deallocation can cause a freelist to + // go over its max_length() before shrinking max_length(). + static const int kMaxOverages = 3; + + // Gets and returns an object from the central cache, and, if possible, + // also adds some objects of that size class to this thread cache. + void* FetchFromCentralCache(size_t cl, size_t byte_size); + + // Releases some number of items from src. Adjusts the list's max_length + // to eventually converge on num_objects_to_move(cl). + void ListTooLong(FreeList* src, size_t cl); + + // Releases N items from this thread cache. + void ReleaseToCentralCache(FreeList* src, size_t cl, int N); + + // Increase max_size_ by reducing unclaimed_cache_space_ or by + // reducing the max_size_ of some other thread. In both cases, + // the delta is kStealAmount. + void IncreaseCacheLimit(); + // Same as above but requires Static::pageheap_lock() is held. + void IncreaseCacheLimitLocked(); + + // If TLS is available, we also store a copy of the per-thread object + // in a __thread variable since __thread variables are faster to read + // than pthread_getspecific(). We still need pthread_setspecific() + // because __thread variables provide no way to run cleanup code when + // a thread is destroyed. + // We also give a hint to the compiler to use the "initial exec" TLS + // model. This is faster than the default TLS model, at the cost that + // you cannot dlopen this library. (To see the difference, look at + // the CPU use of __tls_get_addr with and without this attribute.) + // Since we don't really use dlopen in google code -- and using dlopen + // on a malloc replacement is asking for trouble in any case -- that's + // a good tradeoff for us. +#ifdef HAVE_TLS + static __thread ThreadCache* threadlocal_heap_ +# ifdef HAVE___ATTRIBUTE__ + __attribute__ ((tls_model ("initial-exec"))) +# endif + ; +#endif + + // Thread-specific key. Initialization here is somewhat tricky + // because some Linux startup code invokes malloc() before it + // is in a good enough state to handle pthread_keycreate(). + // Therefore, we use TSD keys only after tsd_inited is set to true. + // Until then, we use a slow path to get the heap object. + static bool tsd_inited_; + static pthread_key_t heap_key_; + + // Linked list of heap objects. Protected by Static::pageheap_lock. + static ThreadCache* thread_heaps_; + static int thread_heap_count_; + + // A pointer to one of the objects in thread_heaps_. Represents + // the next ThreadCache from which a thread over its max_size_ should + // steal memory limit. Round-robin through all of the objects in + // thread_heaps_. Protected by Static::pageheap_lock. + static ThreadCache* next_memory_steal_; + + // Overall thread cache size. Protected by Static::pageheap_lock. + static size_t overall_thread_cache_size_; + + // Global per-thread cache size. Writes are protected by + // Static::pageheap_lock. Reads are done without any locking, which should be + // fine as long as size_t can be written atomically and we don't place + // invariants between this variable and other pieces of state. + static volatile size_t per_thread_cache_size_; + + // Represents overall_thread_cache_size_ minus the sum of max_size_ + // across all ThreadCaches. Protected by Static::pageheap_lock. + static ssize_t unclaimed_cache_space_; + + // Warning: the offset of list_ affects performance. On general + // principles, we don't like list_[x] to span multiple L1 cache + // lines. However, merely placing list_ at offset 0 here seems to + // cause cache conflicts. + + size_t size_; // Combined size of data + size_t max_size_; // size_ > max_size_ --> Scavenge() + pthread_t tid_; // Which thread owns it + FreeList list_[kNumClasses]; // Array indexed by size-class + bool in_setspecific_; // In call to pthread_setspecific? + + // Allocate a new heap. REQUIRES: Static::pageheap_lock is held. + static ThreadCache* NewHeap(pthread_t tid); + + // Use only as pthread thread-specific destructor function. + static void DestroyThreadCache(void* ptr); + + static void DeleteCache(ThreadCache* heap); + static void RecomputePerThreadCacheSize(); + + // We sample allocations, biased by the size of the allocation + Sampler sampler_; // A sampler +}; + +// Allocator for thread heaps +// This is logically part of the ThreadCache class, but MSVC, at +// least, does not like using ThreadCache as a template argument +// before the class is fully defined. So we put it outside the class. +extern PageHeapAllocator<ThreadCache> threadcache_allocator; + +inline int ThreadCache::HeapsInUse() { + return threadcache_allocator.inuse(); +} + +inline bool ThreadCache::SampleAllocation(size_t k) { + return sampler_.SampleAllocation(k); +} + +inline void* ThreadCache::Allocate(size_t size) { + ASSERT(size <= kMaxSize); + const size_t cl = Static::sizemap()->SizeClass(size); + const size_t alloc_size = Static::sizemap()->ByteSizeForClass(cl); + FreeList* list = &list_[cl]; + if (list->empty()) { + return FetchFromCentralCache(cl, alloc_size); + } + size_ -= alloc_size; + return list->Pop(); +} + +inline void ThreadCache::Deallocate(void* ptr, size_t cl) { + FreeList* list = &list_[cl]; + size_ += Static::sizemap()->ByteSizeForClass(cl); + ssize_t size_headroom = max_size_ - size_ - 1; + list->Push(ptr); + ssize_t list_headroom = + static_cast<ssize_t>(list->max_length()) - list->length(); + + // There are two relatively uncommon things that require further work. + // In the common case we're done, and in that case we need a single branch + // because of the bitwise-or trick that follows. + if ((list_headroom | size_headroom) < 0) { + if (list_headroom < 0) { + ListTooLong(list, cl); + } + if (size_ >= max_size_) Scavenge(); + } +} + +inline ThreadCache* ThreadCache::GetThreadHeap() { +#ifdef HAVE_TLS + // __thread is faster, but only when the kernel supports it + if (KernelSupportsTLS()) + return threadlocal_heap_; +#endif + return reinterpret_cast<ThreadCache *>( + perftools_pthread_getspecific(heap_key_)); +} + +inline ThreadCache* ThreadCache::GetCache() { + ThreadCache* ptr = NULL; + if (!tsd_inited_) { + InitModule(); + } else { + ptr = GetThreadHeap(); + } + if (ptr == NULL) ptr = CreateCacheIfNecessary(); + return ptr; +} + +// In deletion paths, we do not try to create a thread-cache. This is +// because we may be in the thread destruction code and may have +// already cleaned up the cache for this thread. +inline ThreadCache* ThreadCache::GetCacheIfPresent() { + if (!tsd_inited_) return NULL; + return GetThreadHeap(); +} + +} // namespace tcmalloc + +#endif // TCMALLOC_THREAD_CACHE_H_ diff --git a/third_party/tcmalloc/chromium/src/windows/TODO b/third_party/tcmalloc/chromium/src/windows/TODO new file mode 100644 index 0000000..708ec23 --- /dev/null +++ b/third_party/tcmalloc/chromium/src/windows/TODO @@ -0,0 +1,86 @@ +* Get heap-profile-table.cc using DeleteMatchingFiles +* Get heap-profile-table.cc using FillProcSelfMaps, DumpProcSelfMaps +* Play around with ExperimentalGetStackTrace +* Support the windows-level memory-allocation functions? See + /home/build/googleclient/earth/client/tools/memorytracking/client/memorytrace/src/memorytrace.cpp + /home/build/googleclient/total_recall/common/sitestep/* + http://www.internals.com/articles/apispy/apispy.htm + http://www.wheaty.net/APISPY32.zip +* Verify /proc/xxx/maps: + http://www.geocities.com/wah_java_dotnet/procmap/index.html +* Figure out how to edit the executable IAT so tcmalloc.dll is loaded first +* Use QueryPerformanceCounter instead of GetTickCount() (also for sparsehash) + +---- +More info on windows-level memory-allocation functions: + C runtime malloc + LocalAlloc + GlobalAlloc + HeapAlloc + VirtualAlloc + mmap stuff + +malloc, LocalAlloc and GlobalAlloc call HeapAlloc, which calls +VirtualAlloc when needed, which calls VirtualAllocEx (the __sbrk equiv?) + +siggi sez: If you want to do a generic job, you probably need to +preserve the semantics of all of these Win32 calls: + Heap32First + Heap32ListFirst + Heap32ListNext + Heap32Next + HeapAlloc + HeapCompact + HeapCreate + HeapCreateTagsW + HeapDestroy + HeapExtend + HeapFree + HeapLock + HeapQueryInformation + HeapQueryTagW + HeapReAlloc + HeapSetInformation + HeapSize + HeapSummary + HeapUnlock + HeapUsage + HeapValidate + HeapWalk + +kernel32.dll export functions and nt.dll export functions: + http://www.shorthike.com/svn/trunk/tools_win32/dm/lib/kernel32.def + http://undocumented.ntinternals.net/ + +You can edit the executable IAT to have the patching DLL be the +first one loaded. + +Most complete way to intercept system calls is patch the functions +(not the IAT). + +Microsoft has somee built-in routines for heap-checking: + http://support.microsoft.com/kb/268343 + +---- +Itimer replacement: + http://msdn2.microsoft.com/en-us/library/ms712713.aspx + +---- +Changes I've had to make to the project file: + +0) When creating the project file, click on "no autogenerated files" + +--- For each project: +1) Alt-F7 -> General -> [pulldown "all configurations" ] -> Output Directory -> $(SolutionDir)$(ConfigurationName) +2) Alt-F7 -> General -> [pulldown "all configurations" ] -> Intermediate Directory -> $(ConfigurationName) + +--- For each .cc file: +1) Alt-F7 -> C/C++ -> General -> [pulldown "all configurations"] -> Additional Include Directives --> src/windows + src/ +2) Alt-F7 -> C/C++ -> Code Generation -> Runtime Library -> Multi-threaded, debug/release, DLL or not + +--- For DLL: +3) Alt-F7 -> Linker -> Input -> [pulldown "all configurations" ] -> Module Definition File -> src\windows\vc7and8.def +--- For binaries depending on a DLL: +3) Right-click on project -> Project Dependencies -> [add dll] +--- For static binaries (not depending on a DLL) +3) Alt-F7 -> C/C++ -> Command Line -> [pulldown "all configurations"] -> /D PERFTOOLS_DLL_DECL= diff --git a/third_party/tcmalloc/chromium/src/windows/addr2line-pdb.c b/third_party/tcmalloc/chromium/src/windows/addr2line-pdb.c new file mode 100644 index 0000000..97b614b --- /dev/null +++ b/third_party/tcmalloc/chromium/src/windows/addr2line-pdb.c @@ -0,0 +1,156 @@ +/* Copyright (c) 2008, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * Author: David Vitek + * + * Dump function addresses using Microsoft debug symbols. This works + * on PDB files. Note that this program will download symbols to + * c:\websymbols without asking. + */ + +#define WIN32_LEAN_AND_MEAN +#define _CRT_SECURE_NO_WARNINGS +#define _CRT_SECURE_NO_DEPRECATE + +#include <stdio.h> +#include <stdlib.h> + +#include <windows.h> +#include <dbghelp.h> + +#define SEARCH_CAP (1024*1024) +#define WEBSYM "SRV*c:\\websymbols*http://msdl.microsoft.com/download/symbols" + +int main(int argc, char *argv[]) { + DWORD error; + HANDLE process; + ULONG64 module_base; + int i; + char* search; + char buf[256]; /* Enough to hold one hex address, I trust! */ + int rv = 0; + /* We may add SYMOPT_UNDNAME if --demangle is specified: */ + DWORD symopts = SYMOPT_DEFERRED_LOADS | SYMOPT_DEBUG | SYMOPT_LOAD_LINES; + char* filename = "a.out"; /* The default if -e isn't specified */ + int print_function_name = 0; /* Set to 1 if -f is specified */ + + for (i = 1; i < argc; i++) { + if (strcmp(argv[i], "--functions") == 0 || strcmp(argv[i], "-f") == 0) { + print_function_name = 1; + } else if (strcmp(argv[i], "--demangle") == 0 || + strcmp(argv[i], "-C") == 0) { + symopts |= SYMOPT_UNDNAME; + } else if (strcmp(argv[i], "-e") == 0) { + if (i + 1 >= argc) { + fprintf(stderr, "FATAL ERROR: -e must be followed by a filename\n"); + return 1; + } + filename = argv[i+1]; + i++; /* to skip over filename too */ + } else { + fprintf(stderr, "usage: " + "addr2line-pdb [-f|--functions] [-C|--demangle] [-e filename]\n"); + fprintf(stderr, "(Then list the hex addresses on stdin, one per line)\n"); + exit(1); + } + } + + process = GetCurrentProcess(); + + if (!SymInitialize(process, NULL, FALSE)) { + error = GetLastError(); + fprintf(stderr, "SymInitialize returned error : %d\n", error); + return 1; + } + + search = malloc(SEARCH_CAP); + if (SymGetSearchPath(process, search, SEARCH_CAP)) { + if (strlen(search) + sizeof(";" WEBSYM) > SEARCH_CAP) { + fprintf(stderr, "Search path too long\n"); + SymCleanup(process); + return 1; + } + strcat(search, ";" WEBSYM); + } else { + error = GetLastError(); + fprintf(stderr, "SymGetSearchPath returned error : %d\n", error); + rv = 1; /* An error, but not a fatal one */ + strcpy(search, WEBSYM); /* Use a default value */ + } + if (!SymSetSearchPath(process, search)) { + error = GetLastError(); + fprintf(stderr, "SymSetSearchPath returned error : %d\n", error); + rv = 1; /* An error, but not a fatal one */ + } + + SymSetOptions(symopts); + module_base = SymLoadModuleEx(process, NULL, filename, NULL, 0, 0, NULL, 0); + if (!module_base) { + /* SymLoadModuleEx failed */ + error = GetLastError(); + fprintf(stderr, "SymLoadModuleEx returned error : %d for %s\n", + error, filename); + SymCleanup(process); + return 1; + } + + buf[sizeof(buf)-1] = '\0'; /* Just to be safe */ + while (fgets(buf, sizeof(buf)-1, stdin)) { + /* GNU addr2line seems to just do a strtol and ignore any + * weird characters it gets, so we will too. + */ + unsigned __int64 addr = _strtoui64(buf, NULL, 16); + ULONG64 buffer[(sizeof(SYMBOL_INFO) + + MAX_SYM_NAME*sizeof(TCHAR) + + sizeof(ULONG64) - 1) + / sizeof(ULONG64)]; + PSYMBOL_INFO pSymbol = (PSYMBOL_INFO)buffer; + IMAGEHLP_LINE64 line; + DWORD dummy; + pSymbol->SizeOfStruct = sizeof(SYMBOL_INFO); + pSymbol->MaxNameLen = MAX_SYM_NAME; + if (print_function_name) { + if (SymFromAddr(process, (DWORD64)addr, NULL, pSymbol)) { + printf("%s\n", pSymbol->Name); + } else { + printf("??\n"); + } + } + line.SizeOfStruct = sizeof(IMAGEHLP_LINE64); + if (SymGetLineFromAddr64(process, (DWORD64)addr, &dummy, &line)) { + printf("%s:%d\n", line.FileName, (int)line.LineNumber); + } else { + printf("??:0\n"); + } + } + SymUnloadModule64(process, module_base); + SymCleanup(process); + return rv; +} diff --git a/third_party/tcmalloc/chromium/src/windows/config.h b/third_party/tcmalloc/chromium/src/windows/config.h new file mode 100644 index 0000000..b3a6852 --- /dev/null +++ b/third_party/tcmalloc/chromium/src/windows/config.h @@ -0,0 +1,273 @@ +/* A manual version of config.h fit for windows machines. */ + +/* Sometimes we accidentally #include this config.h instead of the one + in .. -- this is particularly true for msys/mingw, which uses the + unix config.h but also runs code in the windows directory. + */ +#ifdef __MINGW32__ +#include "../config.h" +#define GOOGLE_PERFTOOLS_WINDOWS_CONFIG_H_ +#endif + +#ifndef GOOGLE_PERFTOOLS_WINDOWS_CONFIG_H_ +#define GOOGLE_PERFTOOLS_WINDOWS_CONFIG_H_ + +/* define this if you are linking tcmalloc statically and overriding the + * default allocators. + * For instructions on how to use this mode, see + * http://groups.google.com/group/google-perftools/browse_thread/thread/41cd3710af85e57b + */ +#undef WIN32_OVERRIDE_ALLOCATORS + +/* Define to 1 if your libc has a snprintf implementation */ +#undef HAVE_SNPRINTF + +/* Define to 1 if compiler supports __builtin_stack_pointer */ +#undef HAVE_BUILTIN_STACK_POINTER + +/* Define to 1 if you have the <conflict-signal.h> header file. */ +#undef HAVE_CONFLICT_SIGNAL_H + +/* Define to 1 if you have the <cygwin/signal.h> header file. */ +#undef HAVE_CYGWIN_SIGNAL_H + +/* Define to 1 if you have the declaration of `cfree', and to 0 if you don't. + */ +#undef HAVE_DECL_CFREE + +/* Define to 1 if you have the declaration of `memalign', and to 0 if you + don't. */ +#undef HAVE_DECL_MEMALIGN + +/* Define to 1 if you have the declaration of `posix_memalign', and to 0 if + you don't. */ +#undef HAVE_DECL_POSIX_MEMALIGN + +/* Define to 1 if you have the declaration of `pvalloc', and to 0 if you + don't. */ +#undef HAVE_DECL_PVALLOC + +/* Define to 1 if you have the declaration of `uname', and to 0 if you don't. + */ +#undef HAVE_DECL_UNAME + +/* Define to 1 if you have the declaration of `valloc', and to 0 if you don't. + */ +#undef HAVE_DECL_VALLOC + +/* Define to 1 if you have the <dlfcn.h> header file. */ +#undef HAVE_DLFCN_H + +/* Define to 1 if the system has the type `Elf32_Versym'. */ +#undef HAVE_ELF32_VERSYM + +/* Define to 1 if you have the <execinfo.h> header file. */ +#undef HAVE_EXECINFO_H + +/* Define to 1 if you have the <fcntl.h> header file. */ +#undef HAVE_FCNTL_H + +/* Define to 1 if you have the <features.h> header file. */ +#undef HAVE_FEATURES_H + +/* Define to 1 if you have the `geteuid' function. */ +#undef HAVE_GETEUID + +/* Define to 1 if you have the `getpagesize' function. */ +#define HAVE_GETPAGESIZE 1 /* we define it in windows/port.cc */ + +/* Define to 1 if you have the <glob.h> header file. */ +#undef HAVE_GLOB_H + +/* Define to 1 if you have the <grp.h> header file. */ +#undef HAVE_GRP_H + +/* Define to 1 if you have the <inttypes.h> header file. */ +#undef HAVE_INTTYPES_H + +/* Define to 1 if you have the <libunwind.h> header file. */ +#undef HAVE_LIBUNWIND_H + +/* Define to 1 if you have the <linux/ptrace.h> header file. */ +#undef HAVE_LINUX_PTRACE_H + +/* Define to 1 if you have the <malloc.h> header file. */ +#undef HAVE_MALLOC_H + +/* Define to 1 if you have the <memory.h> header file. */ +#undef HAVE_MEMORY_H + +/* Define to 1 if you have a working `mmap' system call. */ +#undef HAVE_MMAP + +/* define if the compiler implements namespaces */ +#define HAVE_NAMESPACES 1 + +/* Define to 1 if you have the <poll.h> header file. */ +#undef HAVE_POLL_H + +/* define if libc has program_invocation_name */ +#undef HAVE_PROGRAM_INVOCATION_NAME + +/* Define if you have POSIX threads libraries and header files. */ +#undef HAVE_PTHREAD + +/* Define to 1 if you have the <pwd.h> header file. */ +#undef HAVE_PWD_H + +/* Define to 1 if you have the `sbrk' function. */ +#undef HAVE_SBRK + +/* Define to 1 if you have the <sched.h> header file. */ +#undef HAVE_SCHED_H + +/* Define to 1 if you have the <stdint.h> header file. */ +#undef HAVE_STDINT_H + +/* Define to 1 if you have the <stdlib.h> header file. */ +#define HAVE_STDLIB_H 1 + +/* Define to 1 if you have the <strings.h> header file. */ +#undef HAVE_STRINGS_H + +/* Define to 1 if you have the <string.h> header file. */ +#define HAVE_STRING_H 1 + +/* Define to 1 if the system has the type `struct mallinfo'. */ +#undef HAVE_STRUCT_MALLINFO + +/* Define to 1 if you have the <sys/prctl.h> header file. */ +#undef HAVE_SYS_PRCTL_H + +/* Define to 1 if you have the <sys/resource.h> header file. */ +#undef HAVE_SYS_RESOURCE_H + +/* Define to 1 if you have the <sys/socket.h> header file. */ +#undef HAVE_SYS_SOCKET_H + +/* Define to 1 if you have the <sys/stat.h> header file. */ +#define HAVE_SYS_STAT_H 1 + +/* Define to 1 if you have the <sys/syscall.h> header file. */ +#undef HAVE_SYS_SYSCALL_H + +/* Define to 1 if you have the <sys/types.h> header file. */ +#define HAVE_SYS_TYPES_H 1 + +/* Define to 1 if you have the <sys/ucontext.h> header file. */ +#undef HAVE_SYS_UCONTEXT_H + +/* Define to 1 if you have the <sys/wait.h> header file. */ +#undef HAVE_SYS_WAIT_H + +/* Define to 1 if compiler supports __thread */ +#define HAVE_TLS 1 + +/* Define to 1 if you have the <ucontext.h> header file. */ +#undef HAVE_UCONTEXT_H + +/* Define to 1 if you have the <unistd.h> header file. */ +#undef HAVE_UNISTD_H + +/* Define to 1 if you have the <unwind.h> header file. */ +#undef HAVE_UNWIND_H + +/* define if your compiler has __attribute__ */ +#undef HAVE___ATTRIBUTE__ + +/* Define to 1 if the system has the type `__int64'. */ +#define HAVE___INT64 1 + +/* prefix where we look for installed files */ +#undef INSTALL_PREFIX + +/* Define to 1 if int32_t is equivalent to intptr_t */ +#undef INT32_EQUALS_INTPTR + +/* Define to 1 if your C compiler doesn't accept -c and -o together. */ +#undef NO_MINUS_C_MINUS_O + +/* Name of package */ +#define PACKAGE "google-perftools" + +/* Define to the address where bug reports for this package should be sent. */ +#define PACKAGE_BUGREPORT "opensource@google.com" + +/* Define to the full name of this package. */ +#define PACKAGE_NAME "google-perftools" + +/* Define to the full name and version of this package. */ +#define PACKAGE_STRING "google-perftools 1.4" + +/* Define to the one symbol short name of this package. */ +#define PACKAGE_TARNAME "google-perftools" + +/* Define to the home page for this package. */ +#undef PACKAGE_URL + +/* Define to the version of this package. */ +#define PACKAGE_VERSION "1.4" + +/* How to access the PC from a struct ucontext */ +#undef PC_FROM_UCONTEXT + +/* Always the empty-string on non-windows systems. On windows, should be + "__declspec(dllexport)". This way, when we compile the dll, we export our + functions/classes. It's safe to define this here because config.h is only + used internally, to compile the DLL, and every DLL source file #includes + "config.h" before anything else. */ +#ifndef PERFTOOLS_DLL_DECL +# define PERFTOOLS_IS_A_DLL 1 /* not set if you're statically linking */ +# define PERFTOOLS_DLL_DECL __declspec(dllexport) +# define PERFTOOLS_DLL_DECL_FOR_UNITTESTS __declspec(dllimport) +#endif + +/* printf format code for printing a size_t and ssize_t */ +#define PRIdS "Id" + +/* printf format code for printing a size_t and ssize_t */ +#define PRIuS "Iu" + +/* printf format code for printing a size_t and ssize_t */ +#define PRIxS "Ix" + +/* Define to necessary symbol if this constant uses a non-standard name on + your system. */ +#undef PTHREAD_CREATE_JOINABLE + +/* Define to 1 if you have the ANSI C header files. */ +#define STDC_HEADERS 1 + +/* the namespace where STL code like vector<> is defined */ +#define STL_NAMESPACE std + +/* Version number of package */ +#undef VERSION + +/* C99 says: define this to get the PRI... macros from stdint.h */ +#ifndef __STDC_FORMAT_MACROS +# define __STDC_FORMAT_MACROS 1 +#endif + +/* Define to `__inline__' or `__inline' if that's what the C compiler + calls it, or to nothing if 'inline' is not supported under any name. */ +#ifndef __cplusplus +#undef inline +#endif + +// --------------------------------------------------------------------- +// Extra stuff not found in config.h.in + +// This must be defined before the windows.h is included. It's needed +// for mutex.h, to give access to the TryLock method. +#ifndef _WIN32_WINNT +# define _WIN32_WINNT 0x0400 +#endif + +// We want to make sure not to ever try to #include heap-checker.h +#define NO_HEAP_CHECK 1 + +// TODO(csilvers): include windows/port.h in every relevant source file instead? +#include "windows/port.h" + +#endif /* GOOGLE_PERFTOOLS_WINDOWS_CONFIG_H_ */ diff --git a/third_party/tcmalloc/chromium/src/windows/get_mangled_names.cc b/third_party/tcmalloc/chromium/src/windows/get_mangled_names.cc new file mode 100644 index 0000000..e8a96df --- /dev/null +++ b/third_party/tcmalloc/chromium/src/windows/get_mangled_names.cc @@ -0,0 +1,64 @@ +// Copyright (c) 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// --- +// Author: Craig Silverstein (opensource@google.com) + +// When you are porting perftools to a new compiler or architecture +// (win64 vs win32) for instance, you'll need to change the mangled +// symbol names for operator new and friends at the top of +// patch_functions.cc. This file helps you do that. +// +// It does this by defining these functions with the proper signature. +// All you need to do is compile this file and the run dumpbin on it. +// (See http://msdn.microsoft.com/en-us/library/5x49w699.aspx for more +// on dumpbin). To do this in MSVC, use the MSVC commandline shell: +// http://msdn.microsoft.com/en-us/library/ms235639(VS.80).aspx) +// +// The run: +// cl /c get_mangled_names.cc +// dumpbin /symbols get_mangled_names.obj +// +// It will print out the mangled (and associated unmangled) names of +// the 8 symbols you need to put at the top of patch_functions.cc + +#include <sys/types.h> // for size_t +#include <new> // for nothrow_t + +static char m; // some dummy memory so new doesn't return NULL. + +void* operator new(size_t size) { return &m; } +void operator delete(void* p) throw() { } +void* operator new[](size_t size) { return &m; } +void operator delete[](void* p) throw() { } + +void* operator new(size_t size, const std::nothrow_t&) throw() { return &m; } +void operator delete(void* p, const std::nothrow_t&) throw() { } +void* operator new[](size_t size, const std::nothrow_t&) throw() { return &m; } +void operator delete[](void* p, const std::nothrow_t&) throw() { } diff --git a/third_party/tcmalloc/chromium/src/windows/google/tcmalloc.h b/third_party/tcmalloc/chromium/src/windows/google/tcmalloc.h new file mode 100644 index 0000000..15a2e19 --- /dev/null +++ b/third_party/tcmalloc/chromium/src/windows/google/tcmalloc.h @@ -0,0 +1,109 @@ +/* Copyright (c) 2003, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * Author: Sanjay Ghemawat <opensource@google.com> + * .h file by Craig Silverstein <opensource@google.com> + */ + +#ifndef TCMALLOC_TCMALLOC_H_ +#define TCMALLOC_TCMALLOC_H_ + +// Define the version number so folks can check against it +#define TC_VERSION_MAJOR 1 +#define TC_VERSION_MINOR 4 +#define TC_VERSION_PATCH "" +#define TC_VERSION_STRING "google-perftools 1.4" + +// __THROW is defined in glibc systems. It means, counter-intuitively, +// "This function will never throw an exception." It's an optional +// optimization tool, but we may need to use it to match glibc prototypes. +#ifndef __THROW /* I guess we're not on a glibc system */ +# define __THROW /* __THROW is just an optimization, so ok to make it "" */ +#endif + + +#include <stdlib.h> // for struct mallinfo, if it's defined + +// Annoying stuff for windows -- makes sure clients can import these functions +#ifndef PERFTOOLS_DLL_DECL +# ifdef _WIN32 +# define PERFTOOLS_DLL_DECL __declspec(dllimport) +# else +# define PERFTOOLS_DLL_DECL +# endif +#endif + +#ifdef __cplusplus +#include <new> // for nothrow_t +extern "C" { +#endif + // Returns a human-readable version string. If major, minor, + // and/or patch are not NULL, they are set to the major version, + // minor version, and patch-code (a string, usually ""). + PERFTOOLS_DLL_DECL const char* tc_version(int* major, int* minor, + const char** patch) __THROW; + + PERFTOOLS_DLL_DECL void* tc_malloc(size_t size) __THROW; + PERFTOOLS_DLL_DECL void tc_free(void* ptr) __THROW; + PERFTOOLS_DLL_DECL void* tc_realloc(void* ptr, size_t size) __THROW; + PERFTOOLS_DLL_DECL void* tc_calloc(size_t nmemb, size_t size) __THROW; + PERFTOOLS_DLL_DECL void tc_cfree(void* ptr) __THROW; + + PERFTOOLS_DLL_DECL void* tc_memalign(size_t __alignment, + size_t __size) __THROW; + PERFTOOLS_DLL_DECL int tc_posix_memalign(void** ptr, + size_t align, size_t size) __THROW; + PERFTOOLS_DLL_DECL void* tc_valloc(size_t __size) __THROW; + PERFTOOLS_DLL_DECL void* tc_pvalloc(size_t __size) __THROW; + + PERFTOOLS_DLL_DECL void tc_malloc_stats(void) __THROW; + PERFTOOLS_DLL_DECL int tc_mallopt(int cmd, int value) __THROW; +#if 0 + PERFTOOLS_DLL_DECL struct mallinfo tc_mallinfo(void) __THROW; +#endif + +#ifdef __cplusplus + PERFTOOLS_DLL_DECL void* tc_new(size_t size); + PERFTOOLS_DLL_DECL void tc_delete(void* p) __THROW; + PERFTOOLS_DLL_DECL void* tc_newarray(size_t size); + PERFTOOLS_DLL_DECL void tc_deletearray(void* p) __THROW; + + PERFTOOLS_DLL_DECL void* tc_new_nothrow(size_t size, + const std::nothrow_t&) __THROW; + PERFTOOLS_DLL_DECL void* tc_newarray_nothrow(size_t size, + const std::nothrow_t&) __THROW; + PERFTOOLS_DLL_DECL void tc_delete_nothrow(void* p, + const std::nothrow_t&) __THROW; + PERFTOOLS_DLL_DECL void tc_deletearray_nothrow(void* p, + const std::nothrow_t&) __THROW; +} +#endif + +#endif // #ifndef TCMALLOC_TCMALLOC_H_ diff --git a/third_party/tcmalloc/chromium/src/windows/ia32_modrm_map.cc b/third_party/tcmalloc/chromium/src/windows/ia32_modrm_map.cc new file mode 100644 index 0000000..f1f19062 --- /dev/null +++ b/third_party/tcmalloc/chromium/src/windows/ia32_modrm_map.cc @@ -0,0 +1,121 @@ +/* Copyright (c) 2007, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * Author: Joi Sigurdsson + * + * Table of relevant information about how to decode the ModR/M byte. + * Based on information in the IA-32 Intel® Architecture + * Software Developer’s Manual Volume 2: Instruction Set Reference. + */ + +#include "mini_disassembler.h" +#include "mini_disassembler_types.h" + +namespace sidestep { + +const ModrmEntry MiniDisassembler::s_ia16_modrm_map_[] = { +// mod == 00 + /* r/m == 000 */ { false, false, OS_ZERO }, + /* r/m == 001 */ { false, false, OS_ZERO }, + /* r/m == 010 */ { false, false, OS_ZERO }, + /* r/m == 011 */ { false, false, OS_ZERO }, + /* r/m == 100 */ { false, false, OS_ZERO }, + /* r/m == 101 */ { false, false, OS_ZERO }, + /* r/m == 110 */ { true, false, OS_WORD }, + /* r/m == 111 */ { false, false, OS_ZERO }, +// mod == 01 + /* r/m == 000 */ { true, false, OS_BYTE }, + /* r/m == 001 */ { true, false, OS_BYTE }, + /* r/m == 010 */ { true, false, OS_BYTE }, + /* r/m == 011 */ { true, false, OS_BYTE }, + /* r/m == 100 */ { true, false, OS_BYTE }, + /* r/m == 101 */ { true, false, OS_BYTE }, + /* r/m == 110 */ { true, false, OS_BYTE }, + /* r/m == 111 */ { true, false, OS_BYTE }, +// mod == 10 + /* r/m == 000 */ { true, false, OS_WORD }, + /* r/m == 001 */ { true, false, OS_WORD }, + /* r/m == 010 */ { true, false, OS_WORD }, + /* r/m == 011 */ { true, false, OS_WORD }, + /* r/m == 100 */ { true, false, OS_WORD }, + /* r/m == 101 */ { true, false, OS_WORD }, + /* r/m == 110 */ { true, false, OS_WORD }, + /* r/m == 111 */ { true, false, OS_WORD }, +// mod == 11 + /* r/m == 000 */ { false, false, OS_ZERO }, + /* r/m == 001 */ { false, false, OS_ZERO }, + /* r/m == 010 */ { false, false, OS_ZERO }, + /* r/m == 011 */ { false, false, OS_ZERO }, + /* r/m == 100 */ { false, false, OS_ZERO }, + /* r/m == 101 */ { false, false, OS_ZERO }, + /* r/m == 110 */ { false, false, OS_ZERO }, + /* r/m == 111 */ { false, false, OS_ZERO } +}; + +const ModrmEntry MiniDisassembler::s_ia32_modrm_map_[] = { +// mod == 00 + /* r/m == 000 */ { false, false, OS_ZERO }, + /* r/m == 001 */ { false, false, OS_ZERO }, + /* r/m == 010 */ { false, false, OS_ZERO }, + /* r/m == 011 */ { false, false, OS_ZERO }, + /* r/m == 100 */ { false, true, OS_ZERO }, + /* r/m == 101 */ { true, false, OS_DOUBLE_WORD }, + /* r/m == 110 */ { false, false, OS_ZERO }, + /* r/m == 111 */ { false, false, OS_ZERO }, +// mod == 01 + /* r/m == 000 */ { true, false, OS_BYTE }, + /* r/m == 001 */ { true, false, OS_BYTE }, + /* r/m == 010 */ { true, false, OS_BYTE }, + /* r/m == 011 */ { true, false, OS_BYTE }, + /* r/m == 100 */ { true, true, OS_BYTE }, + /* r/m == 101 */ { true, false, OS_BYTE }, + /* r/m == 110 */ { true, false, OS_BYTE }, + /* r/m == 111 */ { true, false, OS_BYTE }, +// mod == 10 + /* r/m == 000 */ { true, false, OS_DOUBLE_WORD }, + /* r/m == 001 */ { true, false, OS_DOUBLE_WORD }, + /* r/m == 010 */ { true, false, OS_DOUBLE_WORD }, + /* r/m == 011 */ { true, false, OS_DOUBLE_WORD }, + /* r/m == 100 */ { true, true, OS_DOUBLE_WORD }, + /* r/m == 101 */ { true, false, OS_DOUBLE_WORD }, + /* r/m == 110 */ { true, false, OS_DOUBLE_WORD }, + /* r/m == 111 */ { true, false, OS_DOUBLE_WORD }, +// mod == 11 + /* r/m == 000 */ { false, false, OS_ZERO }, + /* r/m == 001 */ { false, false, OS_ZERO }, + /* r/m == 010 */ { false, false, OS_ZERO }, + /* r/m == 011 */ { false, false, OS_ZERO }, + /* r/m == 100 */ { false, false, OS_ZERO }, + /* r/m == 101 */ { false, false, OS_ZERO }, + /* r/m == 110 */ { false, false, OS_ZERO }, + /* r/m == 111 */ { false, false, OS_ZERO }, +}; + +}; // namespace sidestep diff --git a/third_party/tcmalloc/chromium/src/windows/ia32_opcode_map.cc b/third_party/tcmalloc/chromium/src/windows/ia32_opcode_map.cc new file mode 100644 index 0000000..c9ec18b --- /dev/null +++ b/third_party/tcmalloc/chromium/src/windows/ia32_opcode_map.cc @@ -0,0 +1,1188 @@ +/* Copyright (c) 2007, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * Author: Joi Sigurdsson + * + * Opcode decoding maps. Based on the IA-32 Intel® Architecture + * Software Developer’s Manual Volume 2: Instruction Set Reference. Idea + * for how to lay out the tables in memory taken from the implementation + * in the Bastard disassembly environment. + */ + +#include "mini_disassembler.h" + +namespace sidestep { + +/* +* This is the first table to be searched; the first field of each +* Opcode in the table is either 0 to indicate you're in the +* right table, or an index to the correct table, in the global +* map g_pentiumOpcodeMap +*/ +const Opcode s_first_opcode_byte[] = { + /* 0x0 */ { 0, IT_GENERIC, AM_E | OT_B, AM_G | OT_B, AM_NOT_USED, "add", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x1 */ { 0, IT_GENERIC, AM_E | OT_V, AM_G | OT_V, AM_NOT_USED, "add", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x2 */ { 0, IT_GENERIC, AM_G | OT_B, AM_E | OT_B, AM_NOT_USED, "add", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x3 */ { 0, IT_GENERIC, AM_G | OT_V, AM_E | OT_V, AM_NOT_USED, "add", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x4 */ { 0, IT_GENERIC, AM_REGISTER | OT_B, AM_I | OT_B, AM_NOT_USED, "add", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x5 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_I | OT_V, AM_NOT_USED, "add", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x6 */ { 0, IT_GENERIC, AM_REGISTER | OT_W, AM_NOT_USED, AM_NOT_USED, "push", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x7 */ { 0, IT_GENERIC, AM_REGISTER | OT_W, AM_NOT_USED, AM_NOT_USED, "pop", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x8 */ { 0, IT_GENERIC, AM_E | OT_B, AM_G | OT_B, AM_NOT_USED, "or", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x9 */ { 0, IT_GENERIC, AM_E | OT_V, AM_G | OT_V, AM_NOT_USED, "or", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xA */ { 0, IT_GENERIC, AM_G | OT_B, AM_E | OT_B, AM_NOT_USED, "or", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xB */ { 0, IT_GENERIC, AM_G | OT_V, AM_E | OT_V, AM_NOT_USED, "or", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xC */ { 0, IT_GENERIC, AM_REGISTER | OT_B, AM_I | OT_B, AM_NOT_USED, "or", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xD */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_I | OT_V, AM_NOT_USED, "or", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xE */ { 0, IT_GENERIC, AM_REGISTER | OT_W, AM_NOT_USED, AM_NOT_USED, "push", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xF */ { 1, IT_REFERENCE, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x10 */ { 0, IT_GENERIC, AM_E | OT_B, AM_G | OT_B, AM_NOT_USED, "adc", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x11 */ { 0, IT_GENERIC, AM_E | OT_V, AM_G | OT_V, AM_NOT_USED, "adc", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x12 */ { 0, IT_GENERIC, AM_G | OT_B, AM_E | OT_B, AM_NOT_USED, "adc", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x13 */ { 0, IT_GENERIC, AM_G | OT_V, AM_E | OT_V, AM_NOT_USED, "adc", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x14 */ { 0, IT_GENERIC, AM_REGISTER | OT_B, AM_I | OT_B, AM_NOT_USED, "adc", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x15 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_I | OT_V, AM_NOT_USED, "adc", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x16 */ { 0, IT_GENERIC, AM_REGISTER | OT_W, AM_NOT_USED, AM_NOT_USED, "push", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x17 */ { 0, IT_GENERIC, AM_REGISTER | OT_W, AM_NOT_USED, AM_NOT_USED, "pop", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x18 */ { 0, IT_GENERIC, AM_E | OT_B, AM_G | OT_B, AM_NOT_USED, "sbb", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x19 */ { 0, IT_GENERIC, AM_E | OT_V, AM_G | OT_V, AM_NOT_USED, "sbb", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x1A */ { 0, IT_GENERIC, AM_G | OT_B, AM_E | OT_B, AM_NOT_USED, "sbb", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x1B */ { 0, IT_GENERIC, AM_G | OT_V, AM_E | OT_V, AM_NOT_USED, "sbb", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x1C */ { 0, IT_GENERIC, AM_REGISTER | OT_B, AM_I | OT_B, AM_NOT_USED, "sbb", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x1D */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_I | OT_V, AM_NOT_USED, "sbb", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x1E */ { 0, IT_GENERIC, AM_REGISTER | OT_W, AM_NOT_USED, AM_NOT_USED, "push", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x1F */ { 0, IT_GENERIC, AM_REGISTER | OT_W, AM_NOT_USED, AM_NOT_USED, "pop", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x20 */ { 0, IT_GENERIC, AM_E | OT_B, AM_G | OT_B, AM_NOT_USED, "and", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x21 */ { 0, IT_GENERIC, AM_E | OT_V, AM_G | OT_V, AM_NOT_USED, "and", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x22 */ { 0, IT_GENERIC, AM_G | OT_B, AM_E | OT_B, AM_NOT_USED, "and", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x23 */ { 0, IT_GENERIC, AM_G | OT_V, AM_E | OT_V, AM_NOT_USED, "and", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x24 */ { 0, IT_GENERIC, AM_REGISTER | OT_B, AM_I | OT_B, AM_NOT_USED, "and", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x25 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_I | OT_V, AM_NOT_USED, "and", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x26 */ { 0, IT_PREFIX, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x27 */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "daa", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x28 */ { 0, IT_GENERIC, AM_E | OT_B, AM_G | OT_B, AM_NOT_USED, "sub", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x29 */ { 0, IT_GENERIC, AM_E | OT_V, AM_G | OT_V, AM_NOT_USED, "sub", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x2A */ { 0, IT_GENERIC, AM_G | OT_B, AM_E | OT_B, AM_NOT_USED, "sub", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x2B */ { 0, IT_GENERIC, AM_G | OT_V, AM_E | OT_V, AM_NOT_USED, "sub", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x2C */ { 0, IT_GENERIC, AM_REGISTER | OT_B, AM_I | OT_B, AM_NOT_USED, "sub", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x2D */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_I | OT_V, AM_NOT_USED, "sub", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x2E */ { 0, IT_PREFIX, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x2F */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "das", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x30 */ { 0, IT_GENERIC, AM_E | OT_B, AM_G | OT_B, AM_NOT_USED, "xor", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x31 */ { 0, IT_GENERIC, AM_E | OT_V, AM_G | OT_V, AM_NOT_USED, "xor", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x32 */ { 0, IT_GENERIC, AM_G | OT_B, AM_E | OT_B, AM_NOT_USED, "xor", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x33 */ { 0, IT_GENERIC, AM_G | OT_V, AM_E | OT_V, AM_NOT_USED, "xor", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x34 */ { 0, IT_GENERIC, AM_REGISTER | OT_B, AM_I | OT_B, AM_NOT_USED, "xor", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x35 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_I | OT_V, AM_NOT_USED, "xor", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x36 */ { 0, IT_PREFIX, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x37 */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "aaa", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x38 */ { 0, IT_GENERIC, AM_E | OT_B, AM_G | OT_B, AM_NOT_USED, "cmp", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x39 */ { 0, IT_GENERIC, AM_E | OT_V, AM_G | OT_V, AM_NOT_USED, "cmp", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x3A */ { 0, IT_GENERIC, AM_G | OT_B, AM_E | OT_B, AM_NOT_USED, "cmp", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x3B */ { 0, IT_GENERIC, AM_G | OT_V, AM_E | OT_V, AM_NOT_USED, "cmp", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x3C */ { 0, IT_GENERIC, AM_REGISTER | OT_B, AM_I | OT_B, AM_NOT_USED, "cmp", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x3D */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_I | OT_V, AM_NOT_USED, "cmp", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x3E */ { 0, IT_PREFIX, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x3F */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "aas", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x40 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_NOT_USED, AM_NOT_USED, "inc", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x41 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_NOT_USED, AM_NOT_USED, "inc", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x42 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_NOT_USED, AM_NOT_USED, "inc", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x43 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_NOT_USED, AM_NOT_USED, "inc", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x44 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_NOT_USED, AM_NOT_USED, "inc", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x45 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_NOT_USED, AM_NOT_USED, "inc", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x46 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_NOT_USED, AM_NOT_USED, "inc", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x47 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_NOT_USED, AM_NOT_USED, "inc", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x48 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_NOT_USED, AM_NOT_USED, "dec", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x49 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_NOT_USED, AM_NOT_USED, "dec", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x4A */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_NOT_USED, AM_NOT_USED, "dec", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x4B */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_NOT_USED, AM_NOT_USED, "dec", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x4C */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_NOT_USED, AM_NOT_USED, "dec", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x4D */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_NOT_USED, AM_NOT_USED, "dec", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x4E */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_NOT_USED, AM_NOT_USED, "dec", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x4F */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_NOT_USED, AM_NOT_USED, "dec", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x50 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_NOT_USED, AM_NOT_USED, "push", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x51 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_NOT_USED, AM_NOT_USED, "push", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x52 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_NOT_USED, AM_NOT_USED, "push", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x53 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_NOT_USED, AM_NOT_USED, "push", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x54 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_NOT_USED, AM_NOT_USED, "push", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x55 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_NOT_USED, AM_NOT_USED, "push", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x56 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_NOT_USED, AM_NOT_USED, "push", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x57 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_NOT_USED, AM_NOT_USED, "push", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x58 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_NOT_USED, AM_NOT_USED, "pop", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x59 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_NOT_USED, AM_NOT_USED, "pop", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x5A */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_NOT_USED, AM_NOT_USED, "pop", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x5B */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_NOT_USED, AM_NOT_USED, "pop", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x5C */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_NOT_USED, AM_NOT_USED, "pop", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x5D */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_NOT_USED, AM_NOT_USED, "pop", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x5E */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_NOT_USED, AM_NOT_USED, "pop", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x5F */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_NOT_USED, AM_NOT_USED, "pop", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x60 */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "pushad", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x61 */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "popad", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x62 */ { 0, IT_GENERIC, AM_G | OT_V, AM_M | OT_A, AM_NOT_USED, "bound", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x63 */ { 0, IT_GENERIC, AM_E | OT_W, AM_G | OT_W, AM_NOT_USED, "arpl", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x64 */ { 0, IT_PREFIX, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x65 */ { 0, IT_PREFIX, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x66 */ { 0, IT_PREFIX_OPERAND, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x67 */ { 0, IT_PREFIX_ADDRESS, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x68 */ { 0, IT_GENERIC, AM_I | OT_V, AM_NOT_USED, AM_NOT_USED, "push", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x69 */ { 0, IT_GENERIC, AM_G | OT_V, AM_E | OT_V, AM_I | OT_V, "imul", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x6A */ { 0, IT_GENERIC, AM_I | OT_B, AM_NOT_USED, AM_NOT_USED, "push", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x6B */ { 0, IT_GENERIC, AM_G | OT_V, AM_E | OT_V, AM_I | OT_B, "imul", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x6C */ { 0, IT_GENERIC, AM_Y | OT_B, AM_REGISTER | OT_B, AM_NOT_USED, "insb", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x6D */ { 0, IT_GENERIC, AM_Y | OT_V, AM_REGISTER | OT_V, AM_NOT_USED, "insd", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x6E */ { 0, IT_GENERIC, AM_REGISTER | OT_B, AM_X | OT_B, AM_NOT_USED, "outsb", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x6F */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_X | OT_V, AM_NOT_USED, "outsb", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x70 */ { 0, IT_JUMP, AM_J | OT_B, AM_NOT_USED, AM_NOT_USED, "jo", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x71 */ { 0, IT_JUMP, AM_J | OT_B, AM_NOT_USED, AM_NOT_USED, "jno", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x72 */ { 0, IT_JUMP, AM_J | OT_B, AM_NOT_USED, AM_NOT_USED, "jc", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x73 */ { 0, IT_JUMP, AM_J | OT_B, AM_NOT_USED, AM_NOT_USED, "jnc", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x74 */ { 0, IT_JUMP, AM_J | OT_B, AM_NOT_USED, AM_NOT_USED, "jz", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x75 */ { 0, IT_JUMP, AM_J | OT_B, AM_NOT_USED, AM_NOT_USED, "jnz", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x76 */ { 0, IT_JUMP, AM_J | OT_B, AM_NOT_USED, AM_NOT_USED, "jbe", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x77 */ { 0, IT_JUMP, AM_J | OT_B, AM_NOT_USED, AM_NOT_USED, "ja", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x78 */ { 0, IT_JUMP, AM_J | OT_B, AM_NOT_USED, AM_NOT_USED, "js", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x79 */ { 0, IT_JUMP, AM_J | OT_B, AM_NOT_USED, AM_NOT_USED, "jns", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x7A */ { 0, IT_JUMP, AM_J | OT_B, AM_NOT_USED, AM_NOT_USED, "jpe", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x7B */ { 0, IT_JUMP, AM_J | OT_B, AM_NOT_USED, AM_NOT_USED, "jpo", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x7C */ { 0, IT_JUMP, AM_J | OT_B, AM_NOT_USED, AM_NOT_USED, "jl", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x7D */ { 0, IT_JUMP, AM_J | OT_B, AM_NOT_USED, AM_NOT_USED, "jge", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x7E */ { 0, IT_JUMP, AM_J | OT_B, AM_NOT_USED, AM_NOT_USED, "jle", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x7F */ { 0, IT_JUMP, AM_J | OT_B, AM_NOT_USED, AM_NOT_USED, "jg", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x80 */ { 2, IT_REFERENCE, AM_E | OT_B, AM_I | OT_B, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x81 */ { 3, IT_REFERENCE, AM_E | OT_V, AM_I | OT_V, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x82 */ { 4, IT_REFERENCE, AM_E | OT_V, AM_I | OT_B, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x83 */ { 5, IT_REFERENCE, AM_E | OT_V, AM_I | OT_B, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x84 */ { 0, IT_GENERIC, AM_E | OT_B, AM_G | OT_B, AM_NOT_USED, "test", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x85 */ { 0, IT_GENERIC, AM_E | OT_V, AM_G | OT_V, AM_NOT_USED, "test", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x86 */ { 0, IT_GENERIC, AM_E | OT_B, AM_G | OT_B, AM_NOT_USED, "xchg", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x87 */ { 0, IT_GENERIC, AM_E | OT_V, AM_G | OT_V, AM_NOT_USED, "xchg", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x88 */ { 0, IT_GENERIC, AM_E | OT_B, AM_G | OT_B, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x89 */ { 0, IT_GENERIC, AM_E | OT_V, AM_G | OT_V, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x8A */ { 0, IT_GENERIC, AM_G | OT_B, AM_E | OT_B, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x8B */ { 0, IT_GENERIC, AM_G | OT_V, AM_E | OT_V, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x8C */ { 0, IT_GENERIC, AM_E | OT_W, AM_S | OT_W, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x8D */ { 0, IT_GENERIC, AM_G | OT_V, AM_M | OT_ADDRESS_MODE_M, AM_NOT_USED, "lea", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x8E */ { 0, IT_GENERIC, AM_S | OT_W, AM_E | OT_W, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x8F */ { 0, IT_GENERIC, AM_E | OT_V, AM_NOT_USED, AM_NOT_USED, "pop", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x90 */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "nop", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x91 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_REGISTER | OT_V, AM_NOT_USED, "xchg", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x92 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_REGISTER | OT_V, AM_NOT_USED, "xchg", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x93 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_REGISTER | OT_V, AM_NOT_USED, "xchg", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x94 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_REGISTER | OT_V, AM_NOT_USED, "xchg", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x95 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_REGISTER | OT_V, AM_NOT_USED, "xchg", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x96 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_REGISTER | OT_V, AM_NOT_USED, "xchg", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x97 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_REGISTER | OT_V, AM_NOT_USED, "xchg", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x98 */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "cwde", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x99 */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "cdq", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x9A */ { 0, IT_JUMP, AM_A | OT_P, AM_NOT_USED, AM_NOT_USED, "callf", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x9B */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "wait", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x9C */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "pushfd", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x9D */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "popfd", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x9E */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "sahf", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x9F */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "lahf", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xA0 */ { 0, IT_GENERIC, AM_REGISTER | OT_B, AM_O | OT_B, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xA1 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_O | OT_V, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xA2 */ { 0, IT_GENERIC, AM_O | OT_B, AM_REGISTER | OT_B, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xA3 */ { 0, IT_GENERIC, AM_O | OT_V, AM_REGISTER | OT_V, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xA4 */ { 0, IT_GENERIC, AM_X | OT_B, AM_Y | OT_B, AM_NOT_USED, "movsb", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xA5 */ { 0, IT_GENERIC, AM_X | OT_V, AM_Y | OT_V, AM_NOT_USED, "movsd", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xA6 */ { 0, IT_GENERIC, AM_X | OT_B, AM_Y | OT_B, AM_NOT_USED, "cmpsb", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xA7 */ { 0, IT_GENERIC, AM_X | OT_V, AM_Y | OT_V, AM_NOT_USED, "cmpsd", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xA8 */ { 0, IT_GENERIC, AM_REGISTER | OT_B, AM_I | OT_B, AM_NOT_USED, "test", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xA9 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_I | OT_V, AM_NOT_USED, "test", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xAA */ { 0, IT_GENERIC, AM_Y | OT_B, AM_REGISTER | OT_B, AM_NOT_USED, "stosb", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xAB */ { 0, IT_GENERIC, AM_Y | OT_V, AM_REGISTER | OT_V, AM_NOT_USED, "stosd", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xAC */ { 0, IT_GENERIC, AM_REGISTER | OT_B, AM_X| OT_B, AM_NOT_USED, "lodsb", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xAD */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_X| OT_V, AM_NOT_USED, "lodsd", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xAE */ { 0, IT_GENERIC, AM_REGISTER | OT_B, AM_Y | OT_B, AM_NOT_USED, "scasb", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xAF */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_Y | OT_V, AM_NOT_USED, "scasd", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xB0 */ { 0, IT_GENERIC, AM_REGISTER | OT_B, AM_I | OT_B, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xB1 */ { 0, IT_GENERIC, AM_REGISTER | OT_B, AM_I | OT_B, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xB2 */ { 0, IT_GENERIC, AM_REGISTER | OT_B, AM_I | OT_B, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xB3 */ { 0, IT_GENERIC, AM_REGISTER | OT_B, AM_I | OT_B, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xB4 */ { 0, IT_GENERIC, AM_REGISTER | OT_B, AM_I | OT_B, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xB5 */ { 0, IT_GENERIC, AM_REGISTER | OT_B, AM_I | OT_B, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xB6 */ { 0, IT_GENERIC, AM_REGISTER | OT_B, AM_I | OT_B, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xB7 */ { 0, IT_GENERIC, AM_REGISTER | OT_B, AM_I | OT_B, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xB8 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_I | OT_V, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xB9 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_I | OT_V, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xBA */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_I | OT_V, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xBB */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_I | OT_V, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xBC */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_I | OT_V, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xBD */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_I | OT_V, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xBE */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_I | OT_V, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xBF */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_I | OT_V, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xC0 */ { 6, IT_REFERENCE, AM_E | OT_B, AM_I | OT_B, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xC1 */ { 7, IT_REFERENCE, AM_E | OT_V, AM_I | OT_B, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xC2 */ { 0, IT_RETURN, AM_I | OT_W, AM_NOT_USED, AM_NOT_USED, "ret", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xC3 */ { 0, IT_RETURN, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "ret", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xC4 */ { 0, IT_GENERIC, AM_G | OT_V, AM_M | OT_P, AM_NOT_USED, "les", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xC5 */ { 0, IT_GENERIC, AM_G | OT_V, AM_M | OT_P, AM_NOT_USED, "lds", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xC6 */ { 0, IT_GENERIC, AM_E | OT_B, AM_I | OT_B, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xC7 */ { 0, IT_GENERIC, AM_E | OT_V, AM_I | OT_V, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xC8 */ { 0, IT_GENERIC, AM_I | OT_W, AM_I | OT_B, AM_NOT_USED, "enter", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xC9 */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "leave", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xCA */ { 0, IT_RETURN, AM_I | OT_W, AM_NOT_USED, AM_NOT_USED, "retf", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xCB */ { 0, IT_RETURN, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "retf", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xCC */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "int3", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xCD */ { 0, IT_GENERIC, AM_I | OT_B, AM_NOT_USED, AM_NOT_USED, "int", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xCE */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "into", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xCF */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "iret", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xD0 */ { 8, IT_REFERENCE, AM_E | OT_B, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xD1 */ { 9, IT_REFERENCE, AM_E | OT_V, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xD2 */ { 10, IT_REFERENCE, AM_E | OT_B, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xD3 */ { 11, IT_REFERENCE, AM_E | OT_V, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xD4 */ { 0, IT_GENERIC, AM_I | OT_B, AM_NOT_USED, AM_NOT_USED, "aam", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xD5 */ { 0, IT_GENERIC, AM_I | OT_B, AM_NOT_USED, AM_NOT_USED, "aad", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xD6 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xD7 */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "xlat", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + + // The following 8 lines would be references to the FPU tables, but we currently + // do not support the FPU instructions in this disassembler. + + /* 0xD8 */ { 0, IT_UNKNOWN, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xD9 */ { 0, IT_UNKNOWN, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xDA */ { 0, IT_UNKNOWN, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xDB */ { 0, IT_UNKNOWN, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xDC */ { 0, IT_UNKNOWN, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xDD */ { 0, IT_UNKNOWN, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xDE */ { 0, IT_UNKNOWN, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xDF */ { 0, IT_UNKNOWN, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + + + /* 0xE0 */ { 0, IT_JUMP, AM_J | OT_B, AM_NOT_USED, AM_NOT_USED, "loopnz", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xE1 */ { 0, IT_JUMP, AM_J | OT_B, AM_NOT_USED, AM_NOT_USED, "loopz", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xE2 */ { 0, IT_JUMP, AM_J | OT_B, AM_NOT_USED, AM_NOT_USED, "loop", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xE3 */ { 0, IT_JUMP, AM_J | OT_B, AM_NOT_USED, AM_NOT_USED, "jcxz", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xE4 */ { 0, IT_GENERIC, AM_REGISTER | OT_B, AM_I | OT_B, AM_NOT_USED, "in", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xE5 */ { 0, IT_GENERIC, AM_REGISTER | OT_B, AM_I | OT_B, AM_NOT_USED, "in", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xE6 */ { 0, IT_GENERIC, AM_I | OT_B, AM_REGISTER | OT_B, AM_NOT_USED, "out", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xE7 */ { 0, IT_GENERIC, AM_I | OT_B, AM_REGISTER | OT_B, AM_NOT_USED, "out", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xE8 */ { 0, IT_JUMP, AM_J | OT_V, AM_NOT_USED, AM_NOT_USED, "call", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xE9 */ { 0, IT_JUMP, AM_J | OT_V, AM_NOT_USED, AM_NOT_USED, "jmp", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xEA */ { 0, IT_JUMP, AM_A | OT_P, AM_NOT_USED, AM_NOT_USED, "jmp", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xEB */ { 0, IT_JUMP, AM_J | OT_B, AM_NOT_USED, AM_NOT_USED, "jmp", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xEC */ { 0, IT_GENERIC, AM_REGISTER | OT_B, AM_REGISTER | OT_W, AM_NOT_USED, "in", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xED */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_REGISTER | OT_W, AM_NOT_USED, "in", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xEE */ { 0, IT_GENERIC, AM_REGISTER | OT_W, AM_REGISTER | OT_B, AM_NOT_USED, "out", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xEF */ { 0, IT_GENERIC, AM_REGISTER | OT_W, AM_REGISTER | OT_V, AM_NOT_USED, "out", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xF0 */ { 0, IT_PREFIX, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "lock:", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xF1 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xF2 */ { 0, IT_PREFIX, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "repne:", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xF3 */ { 0, IT_PREFIX, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "rep:", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xF4 */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "hlt", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xF5 */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "cmc", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xF6 */ { 12, IT_REFERENCE, AM_E | OT_B, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xF7 */ { 13, IT_REFERENCE, AM_E | OT_V, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xF8 */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "clc", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xF9 */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "stc", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xFA */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "cli", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xFB */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "sti", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xFC */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "cld", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xFD */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "std", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xFE */ { 14, IT_REFERENCE, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xFF */ { 15, IT_REFERENCE, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } } +}; + +const Opcode s_opcode_byte_after_0f[] = { + /* 0x0 */ { 16, IT_REFERENCE, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x1 */ { 17, IT_REFERENCE, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x2 */ { 0, IT_GENERIC, AM_G | OT_V, AM_E | OT_W, AM_NOT_USED, "lar", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x3 */ { 0, IT_GENERIC, AM_G | OT_V, AM_E | OT_W, AM_NOT_USED, "lsl", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x4 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x5 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x6 */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "clts", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x7 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x8 */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "invd", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x9 */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "wbinvd", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xA */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xB */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "ud2", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xC */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xD */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xE */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xF */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x10 */ { 0, IT_GENERIC, AM_V | OT_PS, AM_W | OT_PS, AM_NOT_USED, "movups", true, + /* F2h */ { 0, IT_GENERIC, AM_V | OT_SD, AM_W | OT_SD, AM_NOT_USED, "movsd" }, + /* F3h */ { 0, IT_GENERIC, AM_V | OT_SS, AM_W | OT_SS, AM_NOT_USED, "movss" }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_PD, AM_W | OT_PD, AM_NOT_USED, "movupd" } }, + /* 0x11 */ { 0, IT_GENERIC, AM_W | OT_PS, AM_V | OT_PS, AM_NOT_USED, "movups", true, + /* F2h */ { 0, IT_GENERIC, AM_W | OT_SD, AM_V | OT_SD, AM_NOT_USED, "movsd" }, + /* F3h */ { 0, IT_GENERIC, AM_W | OT_SS, AM_V | OT_SS, AM_NOT_USED, "movss" }, + /* 66h */ { 0, IT_GENERIC, AM_W | OT_PD, AM_V | OT_PD, AM_NOT_USED, "movupd" } }, + /* 0x12 */ { 0, IT_GENERIC, AM_W | OT_Q, AM_V | OT_Q, AM_NOT_USED, "movlps", true, + /* F2h */ { 0, IT_GENERIC, AM_V | OT_Q, AM_V | OT_Q, AM_NOT_USED, "movhlps" }, // only one of ... + /* F3h */ { 0, IT_GENERIC, AM_V | OT_Q, AM_V | OT_Q, AM_NOT_USED, "movhlps" }, // ...these two is correct, Intel doesn't specify which + /* 66h */ { 0, IT_GENERIC, AM_V | OT_Q, AM_W | OT_S, AM_NOT_USED, "movlpd" } }, + /* 0x13 */ { 0, IT_GENERIC, AM_V | OT_Q, AM_W | OT_Q, AM_NOT_USED, "movlps", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_Q, AM_W | OT_Q, AM_NOT_USED, "movlpd" } }, + /* 0x14 */ { 0, IT_GENERIC, AM_V | OT_PS, AM_W | OT_Q, AM_NOT_USED, "unpcklps", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_PD, AM_W | OT_Q, AM_NOT_USED, "unpcklpd" } }, + /* 0x15 */ { 0, IT_GENERIC, AM_V | OT_PS, AM_W | OT_Q, AM_NOT_USED, "unpckhps", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_PD, AM_W | OT_Q, AM_NOT_USED, "unpckhpd" } }, + /* 0x16 */ { 0, IT_GENERIC, AM_V | OT_Q, AM_W | OT_Q, AM_NOT_USED, "movhps", true, + /* F2h */ { 0, IT_GENERIC, AM_V | OT_Q, AM_V | OT_Q, AM_NOT_USED, "movlhps" }, // only one of... + /* F3h */ { 0, IT_GENERIC, AM_V | OT_Q, AM_V | OT_Q, AM_NOT_USED, "movlhps" }, // ...these two is correct, Intel doesn't specify which + /* 66h */ { 0, IT_GENERIC, AM_V | OT_Q, AM_W | OT_Q, AM_NOT_USED, "movhpd" } }, + /* 0x17 */ { 0, IT_GENERIC, AM_W | OT_Q, AM_V | OT_Q, AM_NOT_USED, "movhps", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_W | OT_Q, AM_V | OT_Q, AM_NOT_USED, "movhpd" } }, + /* 0x18 */ { 18, IT_REFERENCE, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x19 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x1A */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x1B */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x1C */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x1D */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x1E */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x1F */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x20 */ { 0, IT_GENERIC, AM_R | OT_D, AM_C | OT_D, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x21 */ { 0, IT_GENERIC, AM_R | OT_D, AM_D | OT_D, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x22 */ { 0, IT_GENERIC, AM_C | OT_D, AM_R | OT_D, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x23 */ { 0, IT_GENERIC, AM_D | OT_D, AM_R | OT_D, AM_NOT_USED, "mov", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x24 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x25 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x26 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x27 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x28 */ { 0, IT_GENERIC, AM_V | OT_PS, AM_W | OT_PS, AM_NOT_USED, "movaps", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_PD, AM_W | OT_PD, AM_NOT_USED, "movapd" } }, + /* 0x29 */ { 0, IT_GENERIC, AM_W | OT_PS, AM_V | OT_PS, AM_NOT_USED, "movaps", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_W | OT_PD, AM_V | OT_PD, AM_NOT_USED, "movapd" } }, + /* 0x2A */ { 0, IT_GENERIC, AM_V | OT_PS, AM_Q | OT_Q, AM_NOT_USED, "cvtpi2ps", true, + /* F2h */ { 0, IT_GENERIC, AM_V | OT_SD, AM_E | OT_D, AM_NOT_USED, "cvtsi2sd" }, + /* F3h */ { 0, IT_GENERIC, AM_V | OT_SS, AM_E | OT_D, AM_NOT_USED, "cvtsi2ss" }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_PD, AM_Q | OT_DQ, AM_NOT_USED, "cvtpi2pd" } }, + /* 0x2B */ { 0, IT_GENERIC, AM_W | OT_PS, AM_V | OT_PS, AM_NOT_USED, "movntps", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_W | OT_PD, AM_V | OT_PD, AM_NOT_USED, "movntpd" } }, + /* 0x2C */ { 0, IT_GENERIC, AM_Q | OT_Q, AM_W | OT_PS, AM_NOT_USED, "cvttps2pi", true, + /* F2h */ { 0, IT_GENERIC, AM_G | OT_D, AM_W | OT_SD, AM_NOT_USED, "cvttsd2si" }, + /* F3h */ { 0, IT_GENERIC, AM_G | OT_D, AM_W | OT_SS, AM_NOT_USED, "cvttss2si" }, + /* 66h */ { 0, IT_GENERIC, AM_Q | OT_DQ, AM_W | OT_PD, AM_NOT_USED, "cvttpd2pi" } }, + /* 0x2D */ { 0, IT_GENERIC, AM_Q | OT_Q, AM_W | OT_PS, AM_NOT_USED, "cvtps2pi", true, + /* F2h */ { 0, IT_GENERIC, AM_G | OT_D, AM_W | OT_SD, AM_NOT_USED, "cvtsd2si" }, + /* F3h */ { 0, IT_GENERIC, AM_G | OT_D, AM_W | OT_SS, AM_NOT_USED, "cvtss2si" }, + /* 66h */ { 0, IT_GENERIC, AM_Q | OT_DQ, AM_W | OT_PD, AM_NOT_USED, "cvtpd2pi" } }, + /* 0x2E */ { 0, IT_GENERIC, AM_V | OT_SS, AM_W | OT_SS, AM_NOT_USED, "ucomiss", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_SD, AM_W | OT_SD, AM_NOT_USED, "ucomisd" } }, + /* 0x2F */ { 0, IT_GENERIC, AM_V | OT_PS, AM_W | OT_SS, AM_NOT_USED, "comiss", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_SD, AM_W | OT_SD, AM_NOT_USED, "comisd" } }, + /* 0x30 */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "wrmsr", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x31 */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "rdtsc", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x32 */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "rdmsr", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x33 */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "rdpmc", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x34 */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "sysenter", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x35 */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "sysexit", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x36 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x37 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x38 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x39 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x3A */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x3B */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x3C */ { 0, IT_GENERIC, AM_G | OT_V, AM_E | OT_V, AM_NOT_USED, "movnti", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x3D */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x3E */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x3F */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x40 */ { 0, IT_GENERIC, AM_G | OT_V, AM_E | OT_V, AM_NOT_USED, "cmovo", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x41 */ { 0, IT_GENERIC, AM_G | OT_V, AM_E | OT_V, AM_NOT_USED, "cmovno", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x42 */ { 0, IT_GENERIC, AM_G | OT_V, AM_E | OT_V, AM_NOT_USED, "cmovc", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x43 */ { 0, IT_GENERIC, AM_G | OT_V, AM_E | OT_V, AM_NOT_USED, "cmovnc", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x44 */ { 0, IT_GENERIC, AM_G | OT_V, AM_E | OT_V, AM_NOT_USED, "cmovz", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x45 */ { 0, IT_GENERIC, AM_G | OT_V, AM_E | OT_V, AM_NOT_USED, "cmovnz", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x46 */ { 0, IT_GENERIC, AM_G | OT_V, AM_E | OT_V, AM_NOT_USED, "cmovbe", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x47 */ { 0, IT_GENERIC, AM_G | OT_V, AM_E | OT_V, AM_NOT_USED, "cmova", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x48 */ { 0, IT_GENERIC, AM_G | OT_V, AM_E | OT_V, AM_NOT_USED, "cmovs", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x49 */ { 0, IT_GENERIC, AM_G | OT_V, AM_E | OT_V, AM_NOT_USED, "cmovns", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x4A */ { 0, IT_GENERIC, AM_G | OT_V, AM_E | OT_V, AM_NOT_USED, "cmovpe", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x4B */ { 0, IT_GENERIC, AM_G | OT_V, AM_E | OT_V, AM_NOT_USED, "cmovpo", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x4C */ { 0, IT_GENERIC, AM_G | OT_V, AM_E | OT_V, AM_NOT_USED, "cmovl", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x4D */ { 0, IT_GENERIC, AM_G | OT_V, AM_E | OT_V, AM_NOT_USED, "cmovge", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x4E */ { 0, IT_GENERIC, AM_G | OT_V, AM_E | OT_V, AM_NOT_USED, "cmovle", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x4F */ { 0, IT_GENERIC, AM_G | OT_V, AM_E | OT_V, AM_NOT_USED, "cmovg", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x50 */ { 0, IT_GENERIC, AM_E | OT_D, AM_V | OT_PS, AM_NOT_USED, "movmskps", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_E | OT_D, AM_V | OT_PD, AM_NOT_USED, "movmskpd" } }, + /* 0x51 */ { 0, IT_GENERIC, AM_V | OT_PS, AM_W | OT_PS, AM_NOT_USED, "sqrtps", true, + /* F2h */ { 0, IT_GENERIC, AM_V | OT_SD, AM_W | OT_SD, AM_NOT_USED, "sqrtsd" }, + /* F3h */ { 0, IT_GENERIC, AM_V | OT_SS, AM_W | OT_SS, AM_NOT_USED, "sqrtss" }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_PD, AM_W | OT_PD, AM_NOT_USED, "sqrtpd" } }, + /* 0x52 */ { 0, IT_GENERIC, AM_V | OT_PS, AM_W | OT_PS, AM_NOT_USED, "rsqrtps", true, + /* F2h */ { 0 }, + /* F3h */ { 0, IT_GENERIC, AM_V | OT_SS, AM_W | OT_SS, AM_NOT_USED, "rsqrtss" }, + /* 66h */ { 0 } }, + /* 0x53 */ { 0, IT_GENERIC, AM_V | OT_PS, AM_W | OT_PS, AM_NOT_USED, "rcpps", true, + /* F2h */ { 0 }, + /* F3h */ { 0, IT_GENERIC, AM_V | OT_SS, AM_W | OT_SS, AM_NOT_USED, "rcpss" }, + /* 66h */ { 0 } }, + /* 0x54 */ { 0, IT_GENERIC, AM_V | OT_PS, AM_W | OT_PS, AM_NOT_USED, "andps", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_PD, AM_W | OT_PD, AM_NOT_USED, "andpd" } }, + /* 0x55 */ { 0, IT_GENERIC, AM_V | OT_PS, AM_W | OT_PS, AM_NOT_USED, "andnps", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_PD, AM_W | OT_PD, AM_NOT_USED, "andnpd" } }, + /* 0x56 */ { 0, IT_GENERIC, AM_V | OT_PS, AM_W | OT_PS, AM_NOT_USED, "orps", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_PD, AM_W | OT_PD, AM_NOT_USED, "orpd" } }, + /* 0x57 */ { 0, IT_GENERIC, AM_V | OT_PS, AM_W | OT_PS, AM_NOT_USED, "xorps", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_PD, AM_W | OT_PD, AM_NOT_USED, "xorpd" } }, + /* 0x58 */ { 0, IT_GENERIC, AM_V | OT_PS, AM_W | OT_PS, AM_NOT_USED, "addps", true, + /* F2h */ { 0, IT_GENERIC, AM_V | OT_SD, AM_W | OT_SD, AM_NOT_USED, "addsd" }, + /* F3h */ { 0, IT_GENERIC, AM_V | OT_SS, AM_W | OT_SS, AM_NOT_USED, "addss" }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_PD, AM_W | OT_PD, AM_NOT_USED, "addpd" } }, + /* 0x59 */ { 0, IT_GENERIC, AM_V | OT_PS, AM_W | OT_PS, AM_NOT_USED, "mulps", true, + /* F2h */ { 0, IT_GENERIC, AM_V | OT_SD, AM_W | OT_SD, AM_NOT_USED, "mulsd" }, + /* F3h */ { 0, IT_GENERIC, AM_V | OT_SS, AM_W | OT_SS, AM_NOT_USED, "mulss" }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_PD, AM_W | OT_PD, AM_NOT_USED, "mulpd" } }, + /* 0x5A */ { 0, IT_GENERIC, AM_V | OT_PD, AM_W | OT_PS, AM_NOT_USED, "cvtps2pd", true, + /* F2h */ { 0, IT_GENERIC, AM_V | OT_SD, AM_W | OT_SD, AM_NOT_USED, "cvtsd2ss" }, + /* F3h */ { 0, IT_GENERIC, AM_V | OT_SS, AM_W | OT_SS, AM_NOT_USED, "cvtss2sd" }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_PS, AM_W | OT_PD, AM_NOT_USED, "cvtpd2ps" } }, + /* 0x5B */ { 0, IT_GENERIC, AM_V | OT_PS, AM_W | OT_DQ, AM_NOT_USED, "cvtdq2ps", true, + /* F2h */ { 0 }, + /* F3h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_PS, AM_NOT_USED, "cvttps2dq" }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_PS, AM_NOT_USED, "cvtps2dq" } }, + /* 0x5C */ { 0, IT_GENERIC, AM_V | OT_PS, AM_W | OT_PS, AM_NOT_USED, "subps", true, + /* F2h */ { 0, IT_GENERIC, AM_V | OT_SD, AM_W | OT_SD, AM_NOT_USED, "subsd" }, + /* F3h */ { 0, IT_GENERIC, AM_V | OT_SS, AM_W | OT_SS, AM_NOT_USED, "subss" }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_PD, AM_W | OT_PD, AM_NOT_USED, "subpd" } }, + /* 0x5D */ { 0, IT_GENERIC, AM_V | OT_PS, AM_W | OT_PS, AM_NOT_USED, "minps", true, + /* F2h */ { 0, IT_GENERIC, AM_V | OT_SD, AM_W | OT_SD, AM_NOT_USED, "minsd" }, + /* F3h */ { 0, IT_GENERIC, AM_V | OT_SS, AM_W | OT_SS, AM_NOT_USED, "minss" }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_PD, AM_W | OT_PD, AM_NOT_USED, "minpd" } }, + /* 0x5E */ { 0, IT_GENERIC, AM_V | OT_PS, AM_W | OT_PS, AM_NOT_USED, "divps", true, + /* F2h */ { 0, IT_GENERIC, AM_V | OT_SD, AM_W | OT_SD, AM_NOT_USED, "divsd" }, + /* F3h */ { 0, IT_GENERIC, AM_V | OT_SS, AM_W | OT_SS, AM_NOT_USED, "divss" }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_PD, AM_W | OT_PD, AM_NOT_USED, "divpd" } }, + /* 0x5F */ { 0, IT_GENERIC, AM_V | OT_PS, AM_W | OT_PS, AM_NOT_USED, "maxps", true, + /* F2h */ { 0, IT_GENERIC, AM_V | OT_SD, AM_W | OT_SD, AM_NOT_USED, "maxsd" }, + /* F3h */ { 0, IT_GENERIC, AM_V | OT_SS, AM_W | OT_SS, AM_NOT_USED, "maxss" }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_PD, AM_W | OT_PD, AM_NOT_USED, "maxpd" } }, + /* 0x60 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_D, AM_NOT_USED, "punpcklbw", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "punpcklbw" } }, + /* 0x61 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_D, AM_NOT_USED, "punpcklwd", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "punpcklwd" } }, + /* 0x62 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_D, AM_NOT_USED, "punpckldq", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "punpckldq" } }, + /* 0x63 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_D, AM_NOT_USED, "packsswb", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "packsswb" } }, + /* 0x64 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_D, AM_NOT_USED, "pcmpgtb", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "pcmpgtb" } }, + /* 0x65 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_D, AM_NOT_USED, "pcmpgtw", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "pcmpgtw" } }, + /* 0x66 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_D, AM_NOT_USED, "pcmpgtd", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "pcmpgtd" } }, + /* 0x67 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_D, AM_NOT_USED, "packuswb", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "packuswb" } }, + /* 0x68 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_D, AM_NOT_USED, "punpckhbw", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_P | OT_DQ, AM_Q | OT_DQ, AM_NOT_USED, "punpckhbw" } }, + /* 0x69 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_D, AM_NOT_USED, "punpckhwd", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_P | OT_DQ, AM_Q | OT_DQ, AM_NOT_USED, "punpckhwd" } }, + /* 0x6A */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_D, AM_NOT_USED, "punpckhdq", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_P | OT_DQ, AM_Q | OT_DQ, AM_NOT_USED, "punpckhdq" } }, + /* 0x6B */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_D, AM_NOT_USED, "packssdw", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_P | OT_DQ, AM_Q | OT_DQ, AM_NOT_USED, "packssdw" } }, + /* 0x6C */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "not used without prefix", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "punpcklqdq" } }, + /* 0x6D */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "not used without prefix", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "punpcklqdq" } }, + /* 0x6E */ { 0, IT_GENERIC, AM_P | OT_D, AM_E | OT_D, AM_NOT_USED, "movd", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_E | OT_D, AM_NOT_USED, "movd" } }, + /* 0x6F */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_D, AM_NOT_USED, "movq", true, + /* F2h */ { 0 }, + /* F3h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "movdqu" }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "movdqa" } }, + /* 0x70 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_I | OT_B, "pshuf", true, + /* F2h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_I | OT_B, "pshuflw" }, + /* F3h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_I | OT_B, "pshufhw" }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_I | OT_B, "pshufd" } }, + /* 0x71 */ { 19, IT_REFERENCE, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x72 */ { 20, IT_REFERENCE, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x73 */ { 21, IT_REFERENCE, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x74 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "pcmpeqb", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "pcmpeqb" } }, + /* 0x75 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "pcmpeqw", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "pcmpeqw" } }, + /* 0x76 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "pcmpeqd", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "pcmpeqd" } }, + /* 0x77 */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "emms", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + + // The following six opcodes are escapes into the MMX stuff, which this disassembler does not support. + /* 0x78 */ { 0, IT_UNKNOWN, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x79 */ { 0, IT_UNKNOWN, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x7A */ { 0, IT_UNKNOWN, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x7B */ { 0, IT_UNKNOWN, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x7C */ { 0, IT_UNKNOWN, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x7D */ { 0, IT_UNKNOWN, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + + /* 0x7E */ { 0, IT_GENERIC, AM_E | OT_D, AM_P | OT_D, AM_NOT_USED, "movd", true, + /* F2h */ { 0 }, + /* F3h */ { 0, IT_GENERIC, AM_V | OT_Q, AM_W | OT_Q, AM_NOT_USED, "movq" }, + /* 66h */ { 0, IT_GENERIC, AM_E | OT_D, AM_V | OT_DQ, AM_NOT_USED, "movd" } }, + /* 0x7F */ { 0, IT_GENERIC, AM_Q | OT_Q, AM_P | OT_Q, AM_NOT_USED, "movq", true, + /* F2h */ { 0 }, + /* F3h */ { 0, IT_GENERIC, AM_W | OT_DQ, AM_V | OT_DQ, AM_NOT_USED, "movdqu" }, + /* 66h */ { 0, IT_GENERIC, AM_W | OT_DQ, AM_V | OT_DQ, AM_NOT_USED, "movdqa" } }, + /* 0x80 */ { 0, IT_JUMP, AM_J | OT_V, AM_NOT_USED, AM_NOT_USED, "jo", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x81 */ { 0, IT_JUMP, AM_J | OT_V, AM_NOT_USED, AM_NOT_USED, "jno", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x82 */ { 0, IT_JUMP, AM_J | OT_V, AM_NOT_USED, AM_NOT_USED, "jc", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x83 */ { 0, IT_JUMP, AM_J | OT_V, AM_NOT_USED, AM_NOT_USED, "jnc", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x84 */ { 0, IT_JUMP, AM_J | OT_V, AM_NOT_USED, AM_NOT_USED, "jz", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x85 */ { 0, IT_JUMP, AM_J | OT_V, AM_NOT_USED, AM_NOT_USED, "jnz", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x86 */ { 0, IT_JUMP, AM_J | OT_V, AM_NOT_USED, AM_NOT_USED, "jbe", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x87 */ { 0, IT_JUMP, AM_J | OT_V, AM_NOT_USED, AM_NOT_USED, "ja", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x88 */ { 0, IT_JUMP, AM_J | OT_V, AM_NOT_USED, AM_NOT_USED, "js", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x89 */ { 0, IT_JUMP, AM_J | OT_V, AM_NOT_USED, AM_NOT_USED, "jns", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x8A */ { 0, IT_JUMP, AM_J | OT_V, AM_NOT_USED, AM_NOT_USED, "jpe", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x8B */ { 0, IT_JUMP, AM_J | OT_V, AM_NOT_USED, AM_NOT_USED, "jpo", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x8C */ { 0, IT_JUMP, AM_J | OT_V, AM_NOT_USED, AM_NOT_USED, "jl", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x8D */ { 0, IT_JUMP, AM_J | OT_V, AM_NOT_USED, AM_NOT_USED, "jge", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x8E */ { 0, IT_JUMP, AM_J | OT_V, AM_NOT_USED, AM_NOT_USED, "jle", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x8F */ { 0, IT_JUMP, AM_J | OT_V, AM_NOT_USED, AM_NOT_USED, "jg", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x90 */ { 0, IT_GENERIC, AM_E | OT_B, AM_NOT_USED, AM_NOT_USED, "seto", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x91 */ { 0, IT_GENERIC, AM_E | OT_B, AM_NOT_USED, AM_NOT_USED, "setno", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x92 */ { 0, IT_GENERIC, AM_E | OT_B, AM_NOT_USED, AM_NOT_USED, "setc", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x93 */ { 0, IT_GENERIC, AM_E | OT_B, AM_NOT_USED, AM_NOT_USED, "setnc", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x94 */ { 0, IT_GENERIC, AM_E | OT_B, AM_NOT_USED, AM_NOT_USED, "setz", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x95 */ { 0, IT_GENERIC, AM_E | OT_B, AM_NOT_USED, AM_NOT_USED, "setnz", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x96 */ { 0, IT_GENERIC, AM_E | OT_B, AM_NOT_USED, AM_NOT_USED, "setbe", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x97 */ { 0, IT_GENERIC, AM_E | OT_B, AM_NOT_USED, AM_NOT_USED, "seta", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x98 */ { 0, IT_GENERIC, AM_E | OT_B, AM_NOT_USED, AM_NOT_USED, "sets", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x99 */ { 0, IT_GENERIC, AM_E | OT_B, AM_NOT_USED, AM_NOT_USED, "setns", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x9A */ { 0, IT_GENERIC, AM_E | OT_B, AM_NOT_USED, AM_NOT_USED, "setpe", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x9B */ { 0, IT_GENERIC, AM_E | OT_B, AM_NOT_USED, AM_NOT_USED, "setpo", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x9C */ { 0, IT_GENERIC, AM_E | OT_B, AM_NOT_USED, AM_NOT_USED, "setl", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x9D */ { 0, IT_GENERIC, AM_E | OT_B, AM_NOT_USED, AM_NOT_USED, "setge", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x9E */ { 0, IT_GENERIC, AM_E | OT_B, AM_NOT_USED, AM_NOT_USED, "setle", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x9F */ { 0, IT_GENERIC, AM_E | OT_B, AM_NOT_USED, AM_NOT_USED, "setg", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xA0 */ { 0, IT_GENERIC, AM_REGISTER | OT_W, AM_NOT_USED, AM_NOT_USED, "push", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xA1 */ { 0, IT_GENERIC, AM_REGISTER | OT_W, AM_NOT_USED, AM_NOT_USED, "pop", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xA2 */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "cpuid", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xA3 */ { 0, IT_GENERIC, AM_E | OT_V, AM_G | OT_V, AM_NOT_USED, "bt", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xA4 */ { 0, IT_GENERIC, AM_E | OT_V, AM_G | OT_V, AM_I | OT_B, "shld", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xA5 */ { 0, IT_GENERIC, AM_E | OT_V, AM_G | OT_V, AM_I | OT_B | AM_REGISTER, "shld", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xA6 */ { 0, IT_UNKNOWN, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xA7 */ { 0, IT_UNKNOWN, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xA8 */ { 0, IT_GENERIC, AM_REGISTER | OT_W, AM_NOT_USED, AM_NOT_USED, "push", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xA9 */ { 0, IT_GENERIC, AM_REGISTER | OT_W, AM_NOT_USED, AM_NOT_USED, "pop", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xAA */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "rsm", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xAB */ { 0, IT_GENERIC, AM_E | OT_V, AM_G | OT_V, AM_NOT_USED, "bts", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xAC */ { 0, IT_GENERIC, AM_E | OT_V, AM_G | OT_V, AM_I | OT_B, "shrd", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xAD */ { 0, IT_GENERIC, AM_E | OT_V, AM_G | OT_V, AM_I | OT_B | AM_REGISTER, "shrd", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xAE */ { 22, IT_REFERENCE, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xAF */ { 0, IT_GENERIC, AM_G | OT_V, AM_E | OT_V, AM_NOT_USED, "imul", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xB0 */ { 0, IT_GENERIC, AM_E | OT_B, AM_G | OT_B, AM_NOT_USED, "cmpxchg", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xB1 */ { 0, IT_GENERIC, AM_E | OT_V, AM_G | OT_V, AM_NOT_USED, "cmpxchg", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xB2 */ { 0, IT_GENERIC, AM_M | OT_P, AM_NOT_USED, AM_NOT_USED, "lss", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xB3 */ { 0, IT_GENERIC, AM_E | OT_V, AM_G | OT_V, AM_NOT_USED, "btr", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xB4 */ { 0, IT_GENERIC, AM_M | OT_P, AM_NOT_USED, AM_NOT_USED, "lfs", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xB5 */ { 0, IT_GENERIC, AM_M | OT_P, AM_NOT_USED, AM_NOT_USED, "lgs", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xB6 */ { 0, IT_GENERIC, AM_G | OT_V, AM_E | OT_B, AM_NOT_USED, "movzx", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xB7 */ { 0, IT_GENERIC, AM_G | OT_V, AM_E | OT_W, AM_NOT_USED, "movzx", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xB8 */ { 0, IT_UNKNOWN, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xB9 */ { 0, IT_UNKNOWN, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "ud1", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xBA */ { 23, IT_REFERENCE, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xBB */ { 0, IT_GENERIC, AM_E | OT_V, AM_G | OT_V, AM_NOT_USED, "btc", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xBC */ { 0, IT_GENERIC, AM_G | OT_V, AM_E | OT_V, AM_NOT_USED, "bsf", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xBD */ { 0, IT_GENERIC, AM_G | OT_V, AM_E | OT_V, AM_NOT_USED, "bsr", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xBE */ { 0, IT_GENERIC, AM_G | OT_V, AM_E | OT_B, AM_NOT_USED, "movsx", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xBF */ { 0, IT_GENERIC, AM_G | OT_V, AM_E | OT_W, AM_NOT_USED, "movsx", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xC0 */ { 0, IT_GENERIC, AM_E | OT_B, AM_G | OT_B, AM_NOT_USED, "xadd", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xC1 */ { 0, IT_GENERIC, AM_E | OT_V, AM_NOT_USED, AM_NOT_USED, "xadd", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xC2 */ { 0, IT_GENERIC, AM_V | OT_PS, AM_W | OT_PS, AM_I | OT_B, "cmpps", true, + /* F2h */ { 0, IT_GENERIC, AM_V | OT_SD, AM_W | OT_SD, AM_I | OT_B, "cmpsd" }, + /* F3h */ { 0, IT_GENERIC, AM_V | OT_SS, AM_W | OT_SS, AM_I | OT_B, "cmpss" }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_PD, AM_W | OT_PD, AM_I | OT_B, "cmppd" } }, + /* 0xC3 */ { 0, IT_GENERIC, AM_E | OT_D, AM_G | OT_D, AM_NOT_USED, "movnti", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xC4 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_E | OT_D, AM_I | OT_B, "pinsrw", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_E | OT_D, AM_I | OT_B, "pinsrw" } }, + /* 0xC5 */ { 0, IT_GENERIC, AM_G | OT_D, AM_P | OT_Q, AM_I | OT_B, "pextrw", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_G | OT_D, AM_V | OT_DQ, AM_I | OT_B, "pextrw" } }, + /* 0xC6 */ { 0, IT_GENERIC, AM_V | OT_PS, AM_W | OT_PS, AM_I | OT_B, "shufps", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_PD, AM_W | OT_PD, AM_I | OT_B, "shufpd" } }, + /* 0xC7 */ { 24, IT_REFERENCE, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xC8 */ { 0, IT_GENERIC, AM_REGISTER | OT_D, AM_NOT_USED, AM_NOT_USED, "bswap", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xC9 */ { 0, IT_GENERIC, AM_REGISTER | OT_D, AM_NOT_USED, AM_NOT_USED, "bswap", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xCA */ { 0, IT_GENERIC, AM_REGISTER | OT_D, AM_NOT_USED, AM_NOT_USED, "bswap", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xCB */ { 0, IT_GENERIC, AM_REGISTER | OT_D, AM_NOT_USED, AM_NOT_USED, "bswap", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xCC */ { 0, IT_GENERIC, AM_REGISTER | OT_D, AM_NOT_USED, AM_NOT_USED, "bswap", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xCD */ { 0, IT_GENERIC, AM_REGISTER | OT_D, AM_NOT_USED, AM_NOT_USED, "bswap", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xCE */ { 0, IT_GENERIC, AM_REGISTER | OT_D, AM_NOT_USED, AM_NOT_USED, "bswap", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xCF */ { 0, IT_GENERIC, AM_REGISTER | OT_D, AM_NOT_USED, AM_NOT_USED, "bswap", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xD0 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xD1 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "psrlw", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "psrlw" } }, + /* 0xD2 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "psrld", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "psrld" } }, + /* 0xD3 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "psrlq", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "psrlq" } }, + /* 0xD4 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "paddq", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "paddq" } }, + /* 0xD5 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "pmullw", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "pmullw" } }, + /* 0xD6 */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "unused without prefix", true, + /* F2h */ { 0, IT_GENERIC, AM_P | OT_Q, AM_W | OT_Q, AM_NOT_USED, "movdq2q" }, + /* F3h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_Q | OT_Q, AM_NOT_USED, "movq2dq" }, + /* 66h */ { 0, IT_GENERIC, AM_W | OT_Q, AM_V | OT_Q, AM_NOT_USED, "movq" } }, + /* 0xD7 */ { 0, IT_GENERIC, AM_G | OT_D, AM_P | OT_Q, AM_NOT_USED, "pmovmskb", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_G | OT_D, AM_V | OT_DQ, AM_NOT_USED, "pmovmskb" } }, + /* 0xD8 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "psubusb", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "psubusb" } }, + /* 0xD9 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "psubusw", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "psubusw" } }, + /* 0xDA */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "pminub", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "pminub" } }, + /* 0xDB */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "pand", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "pand" } }, + /* 0xDC */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "paddusb", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "paddusb" } }, + /* 0xDD */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "paddusw", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "paddusw" } }, + /* 0xDE */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "pmaxub", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "pmaxub" } }, + /* 0xDF */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "pandn", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "pandn" } }, + /* 0xE0 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "pavgb", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "pavgb" } }, + /* 0xE1 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "psraw", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "psrqw" } }, + /* 0xE2 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "psrad", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "psrad" } }, + /* 0xE3 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "pavgw", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "pavgw" } }, + /* 0xE4 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "pmulhuw", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "pmulhuw" } }, + /* 0xE5 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "pmulhuw", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "pmulhw" } }, + /* 0xE6 */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "not used without prefix", true, + /* F2h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_PD, AM_NOT_USED, "cvtpd2dq" }, + /* F3h */ { 0, IT_GENERIC, AM_V | OT_PD, AM_W | OT_DQ, AM_NOT_USED, "cvtdq2pd" }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_PD, AM_NOT_USED, "cvttpd2dq" } }, + /* 0xE7 */ { 0, IT_GENERIC, AM_W | OT_Q, AM_V | OT_Q, AM_NOT_USED, "movntq", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_W | OT_DQ, AM_V | OT_DQ, AM_NOT_USED, "movntdq" } }, + /* 0xE8 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "psubsb", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "psubsb" } }, + /* 0xE9 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "psubsw", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "psubsw" } }, + /* 0xEA */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "pminsw", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "pminsw" } }, + /* 0xEB */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "por", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "por" } }, + /* 0xEC */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "paddsb", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "paddsb" } }, + /* 0xED */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "paddsw", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "paddsw" } }, + /* 0xEE */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "pmaxsw", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "pmaxsw" } }, + /* 0xEF */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "pxor", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "pxor" } }, + /* 0xF0 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0xF1 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "psllw", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "psllw" } }, + /* 0xF2 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "pslld", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "pslld" } }, + /* 0xF3 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "psllq", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "psllq" } }, + /* 0xF4 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "pmuludq", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "pmuludq" } }, + /* 0xF5 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "pmaddwd", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "pmaddwd" } }, + /* 0xF6 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "psadbw", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "psadbw" } }, + /* 0xF7 */ { 0, IT_GENERIC, AM_P | OT_PI, AM_Q | OT_PI, AM_NOT_USED, "maskmovq", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "maskmovdqu" } }, + /* 0xF8 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "psubb", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "psubb" } }, + /* 0xF9 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "psubw", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "psubw" } }, + /* 0xFA */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "psubd", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "psubd" } }, + /* 0xFB */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "psubq", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "psubq" } }, + /* 0xFC */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "paddb", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "paddb" } }, + /* 0xFD */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "paddw", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "paddw" } }, + /* 0xFE */ { 0, IT_GENERIC, AM_P | OT_Q, AM_Q | OT_Q, AM_NOT_USED, "paddd", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_V | OT_DQ, AM_W | OT_DQ, AM_NOT_USED, "paddd" } }, + /* 0xFF */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } } +}; + +const Opcode s_opcode_byte_after_0f00[] = { + /* 0x0 */ { 0, IT_GENERIC, AM_E | OT_W, AM_NOT_USED, AM_NOT_USED, "sldt", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x1 */ { 0, IT_GENERIC, AM_E | OT_W, AM_NOT_USED, AM_NOT_USED, "str", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x2 */ { 0, IT_GENERIC, AM_E | OT_W, AM_NOT_USED, AM_NOT_USED, "lldt", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x3 */ { 0, IT_GENERIC, AM_E | OT_W, AM_NOT_USED, AM_NOT_USED, "ltr", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x4 */ { 0, IT_GENERIC, AM_E | OT_W, AM_NOT_USED, AM_NOT_USED, "verr", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x5 */ { 0, IT_GENERIC, AM_E | OT_W, AM_NOT_USED, AM_NOT_USED, "verw", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x6 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x7 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } } +}; + +const Opcode s_opcode_byte_after_0f01[] = { + /* 0x0 */ { 0, IT_GENERIC, AM_M | OT_S, AM_NOT_USED, AM_NOT_USED, "sgdt", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x1 */ { 0, IT_GENERIC, AM_M | OT_S, AM_NOT_USED, AM_NOT_USED, "sidt", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x2 */ { 0, IT_GENERIC, AM_M | OT_S, AM_NOT_USED, AM_NOT_USED, "lgdt", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x3 */ { 0, IT_GENERIC, AM_M | OT_S, AM_NOT_USED, AM_NOT_USED, "lidt", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x4 */ { 0, IT_GENERIC, AM_E | OT_W, AM_NOT_USED, AM_NOT_USED, "smsw", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x5 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x6 */ { 0, IT_GENERIC, AM_E | OT_W, AM_NOT_USED, AM_NOT_USED, "lmsw", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x7 */ { 0, IT_GENERIC, AM_M | OT_B, AM_NOT_USED, AM_NOT_USED, "invlpg", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } } +}; + +const Opcode s_opcode_byte_after_0f18[] = { + /* 0x0 */ { 0, IT_GENERIC, AM_M | OT_ADDRESS_MODE_M, AM_NOT_USED, AM_NOT_USED, "prefetch", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x1 */ { 0, IT_GENERIC, AM_REGISTER | OT_D, AM_NOT_USED, AM_NOT_USED, "prefetch", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x2 */ { 0, IT_GENERIC, AM_REGISTER | OT_D, AM_NOT_USED, AM_NOT_USED, "prefetch", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x3 */ { 0, IT_GENERIC, AM_REGISTER | OT_D, AM_NOT_USED, AM_NOT_USED, "prefetch", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x4 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x5 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x6 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x7 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } } +}; + +const Opcode s_opcode_byte_after_0f71[] = { + /* 0x0 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x1 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x2 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_I | OT_B, AM_NOT_USED, "psrlw", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_P | OT_DQ, AM_I | OT_B, AM_NOT_USED, "psrlw" } }, + /* 0x3 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x4 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_I | OT_B, AM_NOT_USED, "psraw", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_P | OT_DQ, AM_I | OT_B, AM_NOT_USED, "psraw" } }, + /* 0x5 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x6 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_I | OT_B, AM_NOT_USED, "psllw", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_P | OT_DQ, AM_I | OT_B, AM_NOT_USED, "psllw" } }, + /* 0x7 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } } +}; + +const Opcode s_opcode_byte_after_0f72[] = { + /* 0x0 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x1 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x2 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_I | OT_B, AM_NOT_USED, "psrld", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_W | OT_DQ, AM_I | OT_B, AM_NOT_USED, "psrld" } }, + /* 0x3 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x4 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_I | OT_B, AM_NOT_USED, "psrad", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_W | OT_DQ, AM_I | OT_B, AM_NOT_USED, "psrad" } }, + /* 0x5 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x6 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_I | OT_B, AM_NOT_USED, "pslld", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_W | OT_DQ, AM_I | OT_B, AM_NOT_USED, "pslld" } }, + /* 0x7 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } } +}; + +const Opcode s_opcode_byte_after_0f73[] = { + /* 0x0 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x1 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x2 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_I | OT_B, AM_NOT_USED, "psrlq", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_W | OT_DQ, AM_I | OT_B, AM_NOT_USED, "psrlq" } }, + /* 0x3 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x4 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x5 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x6 */ { 0, IT_GENERIC, AM_P | OT_Q, AM_I | OT_B, AM_NOT_USED, "psllq", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_W | OT_DQ, AM_I | OT_B, AM_NOT_USED, "psllq" } }, + /* 0x7 */ { 0, IT_GENERIC, AM_W | OT_DQ, AM_I | OT_B, AM_NOT_USED, "pslldq", true, + /* F2h */ { 0 }, + /* F3h */ { 0 }, + /* 66h */ { 0, IT_GENERIC, AM_W | OT_DQ, AM_I | OT_B, AM_NOT_USED, "pslldq" } }, +}; + +const Opcode s_opcode_byte_after_0fae[] = { + /* 0x0 */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "fxsave", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x1 */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "fxrstor", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x2 */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "ldmxcsr", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x3 */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "stmxcsr", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x4 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x5 */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "lfence", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x6 */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "mfence", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x7 */ { 0, IT_GENERIC, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, "clflush/sfence", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, +}; + +const Opcode s_opcode_byte_after_0fba[] = { + /* 0x0 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x1 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x2 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x3 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x4 */ { 0, IT_GENERIC, AM_E | OT_V, AM_I | OT_B, AM_NOT_USED, "bt", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x5 */ { 0, IT_GENERIC, AM_E | OT_V, AM_I | OT_B, AM_NOT_USED, "bts", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x6 */ { 0, IT_GENERIC, AM_E | OT_V, AM_I | OT_B, AM_NOT_USED, "btr", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x7 */ { 0, IT_GENERIC, AM_E | OT_V, AM_I | OT_B, AM_NOT_USED, "btc", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } } +}; + +const Opcode s_opcode_byte_after_0fc7[] = { + /* 0x0 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x1 */ { 0, IT_GENERIC, AM_M | OT_Q, AM_NOT_USED, AM_NOT_USED, "cmpxch8b", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } } +}; + +const Opcode s_opcode_byte_after_80[] = { + /* 0x0 */ { 0, IT_GENERIC, AM_E | OT_B, AM_I | OT_B, AM_NOT_USED, "add", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x1 */ { 0, IT_GENERIC, AM_E | OT_B, AM_I | OT_B, AM_NOT_USED, "or", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x2 */ { 0, IT_GENERIC, AM_E | OT_B, AM_I | OT_B, AM_NOT_USED, "adc", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x3 */ { 0, IT_GENERIC, AM_E | OT_B, AM_I | OT_B, AM_NOT_USED, "sbb", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x4 */ { 0, IT_GENERIC, AM_E | OT_B, AM_I | OT_B, AM_NOT_USED, "and", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x5 */ { 0, IT_GENERIC, AM_E | OT_B, AM_I | OT_B, AM_NOT_USED, "sub", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x6 */ { 0, IT_GENERIC, AM_E | OT_B, AM_I | OT_B, AM_NOT_USED, "xor", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x7 */ { 0, IT_GENERIC, AM_E | OT_B, AM_I | OT_B, AM_NOT_USED, "cmp", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } } +}; + +const Opcode s_opcode_byte_after_81[] = { + /* 0x0 */ { 0, IT_GENERIC, AM_E | OT_V, AM_I | OT_V, AM_NOT_USED, "add", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x1 */ { 0, IT_GENERIC, AM_E | OT_V, AM_I | OT_V, AM_NOT_USED, "or", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x2 */ { 0, IT_GENERIC, AM_E | OT_V, AM_I | OT_V, AM_NOT_USED, "adc", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x3 */ { 0, IT_GENERIC, AM_E | OT_V, AM_I | OT_V, AM_NOT_USED, "sbb", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x4 */ { 0, IT_GENERIC, AM_E | OT_V, AM_I | OT_V, AM_NOT_USED, "and", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x5 */ { 0, IT_GENERIC, AM_E | OT_V, AM_I | OT_V, AM_NOT_USED, "sub", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x6 */ { 0, IT_GENERIC, AM_E | OT_V, AM_I | OT_V, AM_NOT_USED, "xor", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x7 */ { 0, IT_GENERIC, AM_E | OT_V, AM_I | OT_V, AM_NOT_USED, "cmp", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } } +}; + +const Opcode s_opcode_byte_after_82[] = { + /* 0x0 */ { 0, IT_GENERIC, AM_E | OT_V, AM_I | OT_B, AM_NOT_USED, "add", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x1 */ { 0, IT_GENERIC, AM_E | OT_V, AM_I | OT_B, AM_NOT_USED, "or", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x2 */ { 0, IT_GENERIC, AM_E | OT_V, AM_I | OT_B, AM_NOT_USED, "adc", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x3 */ { 0, IT_GENERIC, AM_E | OT_V, AM_I | OT_B, AM_NOT_USED, "sbb", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x4 */ { 0, IT_GENERIC, AM_E | OT_V, AM_I | OT_B, AM_NOT_USED, "and", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x5 */ { 0, IT_GENERIC, AM_E | OT_V, AM_I | OT_B, AM_NOT_USED, "sub", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x6 */ { 0, IT_GENERIC, AM_E | OT_V, AM_I | OT_B, AM_NOT_USED, "xor", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x7 */ { 0, IT_GENERIC, AM_E | OT_V, AM_I | OT_B, AM_NOT_USED, "cmp", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } } +}; + +const Opcode s_opcode_byte_after_83[] = { + /* 0x0 */ { 0, IT_GENERIC, AM_E | OT_V, AM_I | OT_B, AM_NOT_USED, "add", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x1 */ { 0, IT_GENERIC, AM_E | OT_V, AM_I | OT_B, AM_NOT_USED, "or", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x2 */ { 0, IT_GENERIC, AM_E | OT_V, AM_I | OT_B, AM_NOT_USED, "adc", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x3 */ { 0, IT_GENERIC, AM_E | OT_V, AM_I | OT_B, AM_NOT_USED, "sbb", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x4 */ { 0, IT_GENERIC, AM_E | OT_V, AM_I | OT_B, AM_NOT_USED, "and", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x5 */ { 0, IT_GENERIC, AM_E | OT_V, AM_I | OT_B, AM_NOT_USED, "sub", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x6 */ { 0, IT_GENERIC, AM_E | OT_V, AM_I | OT_B, AM_NOT_USED, "xor", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x7 */ { 0, IT_GENERIC, AM_E | OT_V, AM_I | OT_B, AM_NOT_USED, "cmp", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } } +}; + +const Opcode s_opcode_byte_after_c0[] = { + /* 0x0 */ { 0, IT_GENERIC, AM_E | OT_B, AM_I | OT_B, AM_NOT_USED, "rol", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x1 */ { 0, IT_GENERIC, AM_E | OT_B, AM_I | OT_B, AM_NOT_USED, "ror", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x2 */ { 0, IT_GENERIC, AM_E | OT_B, AM_I | OT_B, AM_NOT_USED, "rcl", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x3 */ { 0, IT_GENERIC, AM_E | OT_B, AM_I | OT_B, AM_NOT_USED, "rcr", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x4 */ { 0, IT_GENERIC, AM_E | OT_B, AM_I | OT_B, AM_NOT_USED, "shl", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x5 */ { 0, IT_GENERIC, AM_E | OT_B, AM_I | OT_B, AM_NOT_USED, "shr", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x6 */ { 0, IT_GENERIC, AM_E | OT_B, AM_I | OT_B, AM_NOT_USED, "sal", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x7 */ { 0, IT_GENERIC, AM_E | OT_B, AM_I | OT_B, AM_NOT_USED, "sar", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } } +}; + +const Opcode s_opcode_byte_after_c1[] = { + /* 0x0 */ { 0, IT_GENERIC, AM_E | OT_V, AM_I | OT_B, AM_NOT_USED, "rol", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x1 */ { 0, IT_GENERIC, AM_E | OT_V, AM_I | OT_B, AM_NOT_USED, "ror", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x2 */ { 0, IT_GENERIC, AM_E | OT_V, AM_I | OT_B, AM_NOT_USED, "rcl", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x3 */ { 0, IT_GENERIC, AM_E | OT_V, AM_I | OT_B, AM_NOT_USED, "rcr", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x4 */ { 0, IT_GENERIC, AM_E | OT_V, AM_I | OT_B, AM_NOT_USED, "shl", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x5 */ { 0, IT_GENERIC, AM_E | OT_V, AM_I | OT_B, AM_NOT_USED, "shr", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x6 */ { 0, IT_GENERIC, AM_E | OT_V, AM_I | OT_B, AM_NOT_USED, "sal", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x7 */ { 0, IT_GENERIC, AM_E | OT_V, AM_I | OT_B, AM_NOT_USED, "sar", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } } +}; + +const Opcode s_opcode_byte_after_d0[] = { + /* 0x0 */ { 0, IT_GENERIC, AM_E | OT_B, AM_IMPLICIT, AM_NOT_USED, "rol", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x1 */ { 0, IT_GENERIC, AM_E | OT_B, AM_IMPLICIT, AM_NOT_USED, "ror", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x2 */ { 0, IT_GENERIC, AM_E | OT_B, AM_IMPLICIT, AM_NOT_USED, "rcl", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x3 */ { 0, IT_GENERIC, AM_E | OT_B, AM_IMPLICIT, AM_NOT_USED, "rcr", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x4 */ { 0, IT_GENERIC, AM_E | OT_B, AM_IMPLICIT, AM_NOT_USED, "shl", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x5 */ { 0, IT_GENERIC, AM_E | OT_B, AM_IMPLICIT, AM_NOT_USED, "shr", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x6 */ { 0, IT_GENERIC, AM_E | OT_B, AM_IMPLICIT, AM_NOT_USED, "sal", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x7 */ { 0, IT_GENERIC, AM_E | OT_B, AM_IMPLICIT, AM_NOT_USED, "sar", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } } +}; + +const Opcode s_opcode_byte_after_d1[] = { + /* 0x0 */ { 0, IT_GENERIC, AM_E | OT_V, AM_IMPLICIT, AM_NOT_USED, "rol", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x1 */ { 0, IT_GENERIC, AM_E | OT_V, AM_IMPLICIT, AM_NOT_USED, "ror", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x2 */ { 0, IT_GENERIC, AM_E | OT_V, AM_IMPLICIT, AM_NOT_USED, "rcl", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x3 */ { 0, IT_GENERIC, AM_E | OT_V, AM_IMPLICIT, AM_NOT_USED, "rcr", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x4 */ { 0, IT_GENERIC, AM_E | OT_V, AM_IMPLICIT, AM_NOT_USED, "shl", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x5 */ { 0, IT_GENERIC, AM_E | OT_V, AM_IMPLICIT, AM_NOT_USED, "shr", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x6 */ { 0, IT_GENERIC, AM_E | OT_V, AM_IMPLICIT, AM_NOT_USED, "sal", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x7 */ { 0, IT_GENERIC, AM_E | OT_V, AM_IMPLICIT, AM_NOT_USED, "sar", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } } +}; + +const Opcode s_opcode_byte_after_d2[] = { + /* 0x0 */ { 0, IT_GENERIC, AM_E | OT_B, AM_REGISTER | OT_B, AM_NOT_USED, "rol", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x1 */ { 0, IT_GENERIC, AM_E | OT_B, AM_REGISTER | OT_B, AM_NOT_USED, "ror", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x2 */ { 0, IT_GENERIC, AM_E | OT_B, AM_REGISTER | OT_B, AM_NOT_USED, "rcl", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x3 */ { 0, IT_GENERIC, AM_E | OT_B, AM_REGISTER | OT_B, AM_NOT_USED, "rcr", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x4 */ { 0, IT_GENERIC, AM_E | OT_B, AM_REGISTER | OT_B, AM_NOT_USED, "shl", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x5 */ { 0, IT_GENERIC, AM_E | OT_B, AM_REGISTER | OT_B, AM_NOT_USED, "shr", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x6 */ { 0, IT_GENERIC, AM_E | OT_B, AM_REGISTER | OT_B, AM_NOT_USED, "sal", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x7 */ { 0, IT_GENERIC, AM_E | OT_B, AM_REGISTER | OT_B, AM_NOT_USED, "sar", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } } +}; + +const Opcode s_opcode_byte_after_d3[] = { + /* 0x0 */ { 0, IT_GENERIC, AM_E | OT_V, AM_REGISTER | OT_B, AM_NOT_USED, "rol", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x1 */ { 0, IT_GENERIC, AM_E | OT_V, AM_REGISTER | OT_B, AM_NOT_USED, "ror", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x2 */ { 0, IT_GENERIC, AM_E | OT_V, AM_REGISTER | OT_B, AM_NOT_USED, "rcl", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x3 */ { 0, IT_GENERIC, AM_E | OT_V, AM_REGISTER | OT_B, AM_NOT_USED, "rcr", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x4 */ { 0, IT_GENERIC, AM_E | OT_V, AM_REGISTER | OT_B, AM_NOT_USED, "shl", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x5 */ { 0, IT_GENERIC, AM_E | OT_V, AM_REGISTER | OT_B, AM_NOT_USED, "shr", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x6 */ { 0, IT_GENERIC, AM_E | OT_V, AM_REGISTER | OT_B, AM_NOT_USED, "sal", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x7 */ { 0, IT_GENERIC, AM_E | OT_V, AM_REGISTER | OT_B, AM_NOT_USED, "sar", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } } +}; + +const Opcode s_opcode_byte_after_f6[] = { + /* 0x0 */ { 0, IT_GENERIC, AM_E | OT_B, AM_I | OT_B, AM_NOT_USED, "test", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x1 */ { 0, IT_GENERIC, AM_E | OT_B, AM_I | OT_B, AM_NOT_USED, "test", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x2 */ { 0, IT_GENERIC, AM_E | OT_B, AM_NOT_USED, AM_NOT_USED, "not", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x3 */ { 0, IT_GENERIC, AM_E | OT_B, AM_NOT_USED, AM_NOT_USED, "neg", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x4 */ { 0, IT_GENERIC, OT_B | AM_REGISTER, AM_E | OT_B, AM_NOT_USED, "mul", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x5 */ { 0, IT_GENERIC, OT_B | AM_REGISTER, AM_E | OT_B, AM_NOT_USED, "imul", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x6 */ { 0, IT_GENERIC, AM_REGISTER | OT_B, AM_E | OT_B, AM_NOT_USED, "div", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x7 */ { 0, IT_GENERIC, AM_REGISTER | OT_B, AM_E | OT_B, AM_NOT_USED, "idiv", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } } +}; + +const Opcode s_opcode_byte_after_f7[] = { + /* 0x0 */ { 0, IT_GENERIC, AM_E | OT_V, AM_I | OT_V, AM_NOT_USED, "test", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x1 */ { 0, IT_GENERIC, AM_E | OT_V, AM_I | OT_V, AM_NOT_USED, "test", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x2 */ { 0, IT_GENERIC, AM_E | OT_V, AM_NOT_USED, AM_NOT_USED, "not", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x3 */ { 0, IT_GENERIC, AM_E | OT_V, AM_NOT_USED, AM_NOT_USED, "neg", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x4 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_E | OT_V, AM_NOT_USED, "mul", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x5 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_E | OT_V, AM_NOT_USED, "imul", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x6 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_E | OT_V, AM_NOT_USED, "div", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x7 */ { 0, IT_GENERIC, AM_REGISTER | OT_V, AM_E | OT_V, AM_NOT_USED, "idiv", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } } +}; + +const Opcode s_opcode_byte_after_fe[] = { + /* 0x0 */ { 0, IT_GENERIC, AM_E | OT_B, AM_NOT_USED, AM_NOT_USED, "inc", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x1 */ { 0, IT_GENERIC, AM_E | OT_B, AM_NOT_USED, AM_NOT_USED, "dec", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } } +}; + +const Opcode s_opcode_byte_after_ff[] = { + /* 0x0 */ { 0, IT_GENERIC, AM_E | OT_V, AM_NOT_USED, AM_NOT_USED, "inc", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x1 */ { 0, IT_GENERIC, AM_E | OT_V, AM_NOT_USED, AM_NOT_USED, "dec", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x2 */ { 0, IT_JUMP, AM_E | OT_V, AM_NOT_USED, AM_NOT_USED, "call", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x3 */ { 0, IT_JUMP, AM_E | OT_P, AM_NOT_USED, AM_NOT_USED, "call", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x4 */ { 0, IT_JUMP, AM_E | OT_V, AM_NOT_USED, AM_NOT_USED, "jmp", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x5 */ { 0, IT_JUMP, AM_E | OT_P, AM_NOT_USED, AM_NOT_USED, "jmp", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x6 */ { 0, IT_GENERIC, AM_E | OT_V, AM_NOT_USED, AM_NOT_USED, "push", false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } }, + /* 0x7 */ { 0, IT_UNUSED, AM_NOT_USED, AM_NOT_USED, AM_NOT_USED, 0, false, /* F2h */ { 0 }, /* F3h */ { 0 }, /* 66h */ { 0 } } +}; + +/* +* A table of all the other tables, containing some extra information, e.g. +* how to mask out the byte we're looking at. +*/ +const OpcodeTable MiniDisassembler::s_ia32_opcode_map_[]={ + // One-byte opcodes and jumps to larger + /* 0 */ {s_first_opcode_byte, 0, 0xff, 0, 0xff}, + // Two-byte opcodes (second byte) + /* 1 */ {s_opcode_byte_after_0f, 0, 0xff, 0, 0xff}, + // Start of tables for opcodes using ModR/M bits as extension + /* 2 */ {s_opcode_byte_after_80, 3, 0x07, 0, 0x07}, + /* 3 */ {s_opcode_byte_after_81, 3, 0x07, 0, 0x07}, + /* 4 */ {s_opcode_byte_after_82, 3, 0x07, 0, 0x07}, + /* 5 */ {s_opcode_byte_after_83, 3, 0x07, 0, 0x07}, + /* 6 */ {s_opcode_byte_after_c0, 3, 0x07, 0, 0x07}, + /* 7 */ {s_opcode_byte_after_c1, 3, 0x07, 0, 0x07}, + /* 8 */ {s_opcode_byte_after_d0, 3, 0x07, 0, 0x07}, + /* 9 */ {s_opcode_byte_after_d1, 3, 0x07, 0, 0x07}, + /* 10 */ {s_opcode_byte_after_d2, 3, 0x07, 0, 0x07}, + /* 11 */ {s_opcode_byte_after_d3, 3, 0x07, 0, 0x07}, + /* 12 */ {s_opcode_byte_after_f6, 3, 0x07, 0, 0x07}, + /* 13 */ {s_opcode_byte_after_f7, 3, 0x07, 0, 0x07}, + /* 14 */ {s_opcode_byte_after_fe, 3, 0x07, 0, 0x01}, + /* 15 */ {s_opcode_byte_after_ff, 3, 0x07, 0, 0x07}, + /* 16 */ {s_opcode_byte_after_0f00, 3, 0x07, 0, 0x07}, + /* 17 */ {s_opcode_byte_after_0f01, 3, 0x07, 0, 0x07}, + /* 18 */ {s_opcode_byte_after_0f18, 3, 0x07, 0, 0x07}, + /* 19 */ {s_opcode_byte_after_0f71, 3, 0x07, 0, 0x07}, + /* 20 */ {s_opcode_byte_after_0f72, 3, 0x07, 0, 0x07}, + /* 21 */ {s_opcode_byte_after_0f73, 3, 0x07, 0, 0x07}, + /* 22 */ {s_opcode_byte_after_0fae, 3, 0x07, 0, 0x07}, + /* 23 */ {s_opcode_byte_after_0fba, 3, 0x07, 0, 0x07}, + /* 24 */ {s_opcode_byte_after_0fc7, 3, 0x07, 0, 0x01} +}; + +}; // namespace sidestep diff --git a/third_party/tcmalloc/chromium/src/windows/mingw.h b/third_party/tcmalloc/chromium/src/windows/mingw.h new file mode 100644 index 0000000..e69b5da --- /dev/null +++ b/third_party/tcmalloc/chromium/src/windows/mingw.h @@ -0,0 +1,54 @@ +/* Copyright (c) 2007, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * Author: Craig Silverstein + * + * MinGW is an interesting mix of unix and windows. We use a normal + * configure script, but still need the windows port.h to define some + * stuff that MinGW doesn't support, like pthreads. + */ + +#ifndef GOOGLE_PERFTOOLS_WINDOWS_MINGW_H_ +#define GOOGLE_PERFTOOLS_WINDOWS_MINGW_H_ + +#ifdef __MINGW32__ + +// Older version of the mingw msvcrt don't define _aligned_malloc +#if __MSVCRT_VERSION__ < 0x0700 +# define PERFTOOLS_NO_ALIGNED_MALLOC 1 +#endif + +#include "windows/port.h" + +#define HAVE_SNPRINTF 1 + +#endif /* __MINGW32__ */ + +#endif /* GOOGLE_PERFTOOLS_WINDOWS_MINGW_H_ */ diff --git a/third_party/tcmalloc/chromium/src/windows/mini_disassembler.cc b/third_party/tcmalloc/chromium/src/windows/mini_disassembler.cc new file mode 100644 index 0000000..30bdcc1 --- /dev/null +++ b/third_party/tcmalloc/chromium/src/windows/mini_disassembler.cc @@ -0,0 +1,419 @@ +/* Copyright (c) 2007, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * Author: Joi Sigurdsson + * + * Implementation of MiniDisassembler. + */ + +#include "mini_disassembler.h" + +namespace sidestep { + +MiniDisassembler::MiniDisassembler(bool operand_default_is_32_bits, + bool address_default_is_32_bits) + : operand_default_is_32_bits_(operand_default_is_32_bits), + address_default_is_32_bits_(address_default_is_32_bits) { + Initialize(); +} + +MiniDisassembler::MiniDisassembler() + : operand_default_is_32_bits_(true), + address_default_is_32_bits_(true) { + Initialize(); +} + +InstructionType MiniDisassembler::Disassemble( + unsigned char* start_byte, + unsigned int& instruction_bytes) { + // Clean up any state from previous invocations. + Initialize(); + + // Start by processing any prefixes. + unsigned char* current_byte = start_byte; + unsigned int size = 0; + InstructionType instruction_type = ProcessPrefixes(current_byte, size); + + if (IT_UNKNOWN == instruction_type) + return instruction_type; + + current_byte += size; + size = 0; + + // Invariant: We have stripped all prefixes, and the operand_is_32_bits_ + // and address_is_32_bits_ flags are correctly set. + + instruction_type = ProcessOpcode(current_byte, 0, size); + + // Check for error processing instruction + if ((IT_UNKNOWN == instruction_type_) || (IT_UNUSED == instruction_type_)) { + return IT_UNKNOWN; + } + + current_byte += size; + + // Invariant: operand_bytes_ indicates the total size of operands + // specified by the opcode and/or ModR/M byte and/or SIB byte. + // pCurrentByte points to the first byte after the ModR/M byte, or after + // the SIB byte if it is present (i.e. the first byte of any operands + // encoded in the instruction). + + // We get the total length of any prefixes, the opcode, and the ModR/M and + // SIB bytes if present, by taking the difference of the original starting + // address and the current byte (which points to the first byte of the + // operands if present, or to the first byte of the next instruction if + // they are not). Adding the count of bytes in the operands encoded in + // the instruction gives us the full length of the instruction in bytes. + instruction_bytes += operand_bytes_ + (current_byte - start_byte); + + // Return the instruction type, which was set by ProcessOpcode(). + return instruction_type_; +} + +void MiniDisassembler::Initialize() { + operand_is_32_bits_ = operand_default_is_32_bits_; + address_is_32_bits_ = address_default_is_32_bits_; + operand_bytes_ = 0; + have_modrm_ = false; + should_decode_modrm_ = false; + instruction_type_ = IT_UNKNOWN; + got_f2_prefix_ = false; + got_f3_prefix_ = false; + got_66_prefix_ = false; +} + +InstructionType MiniDisassembler::ProcessPrefixes(unsigned char* start_byte, + unsigned int& size) { + InstructionType instruction_type = IT_GENERIC; + const Opcode& opcode = s_ia32_opcode_map_[0].table_[*start_byte]; + + switch (opcode.type_) { + case IT_PREFIX_ADDRESS: + address_is_32_bits_ = !address_default_is_32_bits_; + goto nochangeoperand; + case IT_PREFIX_OPERAND: + operand_is_32_bits_ = !operand_default_is_32_bits_; + nochangeoperand: + case IT_PREFIX: + + if (0xF2 == (*start_byte)) + got_f2_prefix_ = true; + else if (0xF3 == (*start_byte)) + got_f3_prefix_ = true; + else if (0x66 == (*start_byte)) + got_66_prefix_ = true; + + instruction_type = opcode.type_; + size ++; + // we got a prefix, so add one and check next byte + ProcessPrefixes(start_byte + 1, size); + default: + break; // not a prefix byte + } + + return instruction_type; +} + +InstructionType MiniDisassembler::ProcessOpcode(unsigned char* start_byte, + unsigned int table_index, + unsigned int& size) { + const OpcodeTable& table = s_ia32_opcode_map_[table_index]; // Get our table + unsigned char current_byte = (*start_byte) >> table.shift_; + current_byte = current_byte & table.mask_; // Mask out the bits we will use + + // Check whether the byte we have is inside the table we have. + if (current_byte < table.min_lim_ || current_byte > table.max_lim_) { + instruction_type_ = IT_UNKNOWN; + return instruction_type_; + } + + const Opcode& opcode = table.table_[current_byte]; + if (IT_UNUSED == opcode.type_) { + // This instruction is not used by the IA-32 ISA, so we indicate + // this to the user. Probably means that we were pointed to + // a byte in memory that was not the start of an instruction. + instruction_type_ = IT_UNUSED; + return instruction_type_; + } else if (IT_REFERENCE == opcode.type_) { + // We are looking at an opcode that has more bytes (or is continued + // in the ModR/M byte). Recursively find the opcode definition in + // the table for the opcode's next byte. + size++; + ProcessOpcode(start_byte + 1, opcode.table_index_, size); + return instruction_type_; + } + + const SpecificOpcode* specific_opcode = (SpecificOpcode*)&opcode; + if (opcode.is_prefix_dependent_) { + if (got_f2_prefix_ && opcode.opcode_if_f2_prefix_.mnemonic_ != 0) { + specific_opcode = &opcode.opcode_if_f2_prefix_; + } else if (got_f3_prefix_ && opcode.opcode_if_f3_prefix_.mnemonic_ != 0) { + specific_opcode = &opcode.opcode_if_f3_prefix_; + } else if (got_66_prefix_ && opcode.opcode_if_66_prefix_.mnemonic_ != 0) { + specific_opcode = &opcode.opcode_if_66_prefix_; + } + } + + // Inv: The opcode type is known. + instruction_type_ = specific_opcode->type_; + + // Let's process the operand types to see if we have any immediate + // operands, and/or a ModR/M byte. + + ProcessOperand(specific_opcode->flag_dest_); + ProcessOperand(specific_opcode->flag_source_); + ProcessOperand(specific_opcode->flag_aux_); + + // Inv: We have processed the opcode and incremented operand_bytes_ + // by the number of bytes of any operands specified by the opcode + // that are stored in the instruction (not registers etc.). Now + // we need to return the total number of bytes for the opcode and + // for the ModR/M or SIB bytes if they are present. + + if (table.mask_ != 0xff) { + if (have_modrm_) { + // we're looking at a ModR/M byte so we're not going to + // count that into the opcode size + ProcessModrm(start_byte, size); + return IT_GENERIC; + } else { + // need to count the ModR/M byte even if it's just being + // used for opcode extension + size++; + return IT_GENERIC; + } + } else { + if (have_modrm_) { + // The ModR/M byte is the next byte. + size++; + ProcessModrm(start_byte + 1, size); + return IT_GENERIC; + } else { + size++; + return IT_GENERIC; + } + } +} + +bool MiniDisassembler::ProcessOperand(int flag_operand) { + bool succeeded = true; + if (AM_NOT_USED == flag_operand) + return succeeded; + + // Decide what to do based on the addressing mode. + switch (flag_operand & AM_MASK) { + // No ModR/M byte indicated by these addressing modes, and no + // additional (e.g. immediate) parameters. + case AM_A: // Direct address + case AM_F: // EFLAGS register + case AM_X: // Memory addressed by the DS:SI register pair + case AM_Y: // Memory addressed by the ES:DI register pair + case AM_IMPLICIT: // Parameter is implicit, occupies no space in + // instruction + break; + + // There is a ModR/M byte but it does not necessarily need + // to be decoded. + case AM_C: // reg field of ModR/M selects a control register + case AM_D: // reg field of ModR/M selects a debug register + case AM_G: // reg field of ModR/M selects a general register + case AM_P: // reg field of ModR/M selects an MMX register + case AM_R: // mod field of ModR/M may refer only to a general register + case AM_S: // reg field of ModR/M selects a segment register + case AM_T: // reg field of ModR/M selects a test register + case AM_V: // reg field of ModR/M selects a 128-bit XMM register + have_modrm_ = true; + break; + + // In these addressing modes, there is a ModR/M byte and it needs to be + // decoded. No other (e.g. immediate) params than indicated in ModR/M. + case AM_E: // Operand is either a general-purpose register or memory, + // specified by ModR/M byte + case AM_M: // ModR/M byte will refer only to memory + case AM_Q: // Operand is either an MMX register or memory (complex + // evaluation), specified by ModR/M byte + case AM_W: // Operand is either a 128-bit XMM register or memory (complex + // eval), specified by ModR/M byte + have_modrm_ = true; + should_decode_modrm_ = true; + break; + + // These addressing modes specify an immediate or an offset value + // directly, so we need to look at the operand type to see how many + // bytes. + case AM_I: // Immediate data. + case AM_J: // Jump to offset. + case AM_O: // Operand is at offset. + switch (flag_operand & OT_MASK) { + case OT_B: // Byte regardless of operand-size attribute. + operand_bytes_ += OS_BYTE; + break; + case OT_C: // Byte or word, depending on operand-size attribute. + if (operand_is_32_bits_) + operand_bytes_ += OS_WORD; + else + operand_bytes_ += OS_BYTE; + break; + case OT_D: // Doubleword, regardless of operand-size attribute. + operand_bytes_ += OS_DOUBLE_WORD; + break; + case OT_DQ: // Double-quadword, regardless of operand-size attribute. + operand_bytes_ += OS_DOUBLE_QUAD_WORD; + break; + case OT_P: // 32-bit or 48-bit pointer, depending on operand-size + // attribute. + if (operand_is_32_bits_) + operand_bytes_ += OS_48_BIT_POINTER; + else + operand_bytes_ += OS_32_BIT_POINTER; + break; + case OT_PS: // 128-bit packed single-precision floating-point data. + operand_bytes_ += OS_128_BIT_PACKED_SINGLE_PRECISION_FLOATING; + break; + case OT_Q: // Quadword, regardless of operand-size attribute. + operand_bytes_ += OS_QUAD_WORD; + break; + case OT_S: // 6-byte pseudo-descriptor. + operand_bytes_ += OS_PSEUDO_DESCRIPTOR; + break; + case OT_SD: // Scalar Double-Precision Floating-Point Value + case OT_PD: // Unaligned packed double-precision floating point value + operand_bytes_ += OS_DOUBLE_PRECISION_FLOATING; + break; + case OT_SS: + // Scalar element of a 128-bit packed single-precision + // floating data. + // We simply return enItUnknown since we don't have to support + // floating point + succeeded = false; + break; + case OT_V: // Word or doubleword, depending on operand-size attribute. + if (operand_is_32_bits_) + operand_bytes_ += OS_DOUBLE_WORD; + else + operand_bytes_ += OS_WORD; + break; + case OT_W: // Word, regardless of operand-size attribute. + operand_bytes_ += OS_WORD; + break; + + // Can safely ignore these. + case OT_A: // Two one-word operands in memory or two double-word + // operands in memory + case OT_PI: // Quadword MMX technology register (e.g. mm0) + case OT_SI: // Doubleword integer register (e.g., eax) + break; + + default: + break; + } + break; + + default: + break; + } + + return succeeded; +} + +bool MiniDisassembler::ProcessModrm(unsigned char* start_byte, + unsigned int& size) { + // If we don't need to decode, we just return the size of the ModR/M + // byte (there is never a SIB byte in this case). + if (!should_decode_modrm_) { + size++; + return true; + } + + // We never care about the reg field, only the combination of the mod + // and r/m fields, so let's start by packing those fields together into + // 5 bits. + unsigned char modrm = (*start_byte); + unsigned char mod = modrm & 0xC0; // mask out top two bits to get mod field + modrm = modrm & 0x07; // mask out bottom 3 bits to get r/m field + mod = mod >> 3; // shift the mod field to the right place + modrm = mod | modrm; // combine the r/m and mod fields as discussed + mod = mod >> 3; // shift the mod field to bits 2..0 + + // Invariant: modrm contains the mod field in bits 4..3 and the r/m field + // in bits 2..0, and mod contains the mod field in bits 2..0 + + const ModrmEntry* modrm_entry = 0; + if (address_is_32_bits_) + modrm_entry = &s_ia32_modrm_map_[modrm]; + else + modrm_entry = &s_ia16_modrm_map_[modrm]; + + // Invariant: modrm_entry points to information that we need to decode + // the ModR/M byte. + + // Add to the count of operand bytes, if the ModR/M byte indicates + // that some operands are encoded in the instruction. + if (modrm_entry->is_encoded_in_instruction_) + operand_bytes_ += modrm_entry->operand_size_; + + // Process the SIB byte if necessary, and return the count + // of ModR/M and SIB bytes. + if (modrm_entry->use_sib_byte_) { + size++; + return ProcessSib(start_byte + 1, mod, size); + } else { + size++; + return true; + } +} + +bool MiniDisassembler::ProcessSib(unsigned char* start_byte, + unsigned char mod, + unsigned int& size) { + // get the mod field from the 2..0 bits of the SIB byte + unsigned char sib_base = (*start_byte) & 0x07; + if (0x05 == sib_base) { + switch (mod) { + case 0x00: // mod == 00 + case 0x02: // mod == 10 + operand_bytes_ += OS_DOUBLE_WORD; + break; + case 0x01: // mod == 01 + operand_bytes_ += OS_BYTE; + break; + case 0x03: // mod == 11 + // According to the IA-32 docs, there does not seem to be a disp + // value for this value of mod + default: + break; + } + } + + size++; + return true; +} + +}; // namespace sidestep diff --git a/third_party/tcmalloc/chromium/src/windows/mini_disassembler.h b/third_party/tcmalloc/chromium/src/windows/mini_disassembler.h new file mode 100644 index 0000000..e676232 --- /dev/null +++ b/third_party/tcmalloc/chromium/src/windows/mini_disassembler.h @@ -0,0 +1,190 @@ +/* Copyright (c) 2007, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * Author: Joi Sigurdsson + * + * Definition of MiniDisassembler. + */ + +#ifndef GOOGLE_PERFTOOLS_MINI_DISASSEMBLER_H_ +#define GOOGLE_PERFTOOLS_MINI_DISASSEMBLER_H_ + +#include <windows.h> +#include "mini_disassembler_types.h" + +// compatibility shim +#include "base/logging.h" +#define SIDESTEP_ASSERT(cond) RAW_DCHECK(cond, #cond) +#define SIDESTEP_LOG(msg) RAW_VLOG(1, msg) + +namespace sidestep { + +// This small disassembler is very limited +// in its functionality, and in fact does only the bare minimum required by the +// preamble patching utility. It may be useful for other purposes, however. +// +// The limitations include at least the following: +// -# No support for coprocessor opcodes, MMX, etc. +// -# No machine-readable identification of opcodes or decoding of +// assembly parameters. The name of the opcode (as a string) is given, +// however, to aid debugging. +// +// You may ask what this little disassembler actually does, then? The answer is +// that it does the following, which is exactly what the patching utility needs: +// -# Indicates if opcode is a jump (any kind) or a return (any kind) +// because this is important for the patching utility to determine if +// a function is too short or there are jumps too early in it for it +// to be preamble patched. +// -# The opcode length is always calculated, so that the patching utility +// can figure out where the next instruction starts, and whether it +// already has enough instructions to replace with the absolute jump +// to the patching code. +// +// The usage is quite simple; just create a MiniDisassembler and use its +// Disassemble() method. +// +// If you would like to extend this disassembler, please refer to the +// IA-32 Intel® Architecture Software Developer’s Manual Volume 2: +// Instruction Set Reference for information about operand decoding +// etc. +class MiniDisassembler { + public: + + // Creates a new instance and sets defaults. + // + // @param operand_default_32_bits If true, the default operand size is + // set to 32 bits, which is the default under Win32. Otherwise it is 16 bits. + // @param address_default_32_bits If true, the default address size is + // set to 32 bits, which is the default under Win32. Otherwise it is 16 bits. + MiniDisassembler(bool operand_default_32_bits, + bool address_default_32_bits); + + // Equivalent to MiniDisassembler(true, true); + MiniDisassembler(); + + // Attempts to disassemble a single instruction starting from the + // address in memory it is pointed to. + // + // @param start Address where disassembly should start. + // @param instruction_bytes Variable that will be <b>incremented</b> by + // the length in bytes of the instruction. + // @return enItJump, enItReturn or enItGeneric on success. enItUnknown + // if unable to disassemble, enItUnused if this seems to be an unused + // opcode. In the last two (error) cases, cbInstruction will be set + // to 0xffffffff. + // + // @post This instance of the disassembler is ready to be used again, + // with unchanged defaults from creation time. + InstructionType Disassemble(unsigned char* start, unsigned int& instruction_bytes); + + private: + + // Makes the disassembler ready for reuse. + void Initialize(); + + // Sets the flags for address and operand sizes. + // @return Number of prefix bytes. + InstructionType ProcessPrefixes(unsigned char* start, unsigned int& size); + + // Sets the flag for whether we have ModR/M, and increments + // operand_bytes_ if any are specifies by the opcode directly. + // @return Number of opcode bytes. + InstructionType ProcessOpcode(unsigned char* start, + unsigned int table, + unsigned int& size); + + // Checks the type of the supplied operand. Increments + // operand_bytes_ if it directly indicates an immediate etc. + // operand. Asserts have_modrm_ if the operand specifies + // a ModR/M byte. + bool ProcessOperand(int flag_operand); + + // Increments operand_bytes_ by size specified by ModR/M and + // by SIB if present. + // @return 0 in case of error, 1 if there is just a ModR/M byte, + // 2 if there is a ModR/M byte and a SIB byte. + bool ProcessModrm(unsigned char* start, unsigned int& size); + + // Processes the SIB byte that it is pointed to. + // @param start Pointer to the SIB byte. + // @param mod The mod field from the ModR/M byte. + // @return 1 to indicate success (indicates 1 SIB byte) + bool ProcessSib(unsigned char* start, unsigned char mod, unsigned int& size); + + // The instruction type we have decoded from the opcode. + InstructionType instruction_type_; + + // Counts the number of bytes that is occupied by operands in + // the current instruction (note: we don't care about how large + // operands stored in registers etc. are). + unsigned int operand_bytes_; + + // True iff there is a ModR/M byte in this instruction. + bool have_modrm_; + + // True iff we need to decode the ModR/M byte (sometimes it just + // points to a register, we can tell by the addressing mode). + bool should_decode_modrm_; + + // Current operand size is 32 bits if true, 16 bits if false. + bool operand_is_32_bits_; + + // Default operand size is 32 bits if true, 16 bits if false. + bool operand_default_is_32_bits_; + + // Current address size is 32 bits if true, 16 bits if false. + bool address_is_32_bits_; + + // Default address size is 32 bits if true, 16 bits if false. + bool address_default_is_32_bits_; + + // Huge big opcode table based on the IA-32 manual, defined + // in Ia32OpcodeMap.cc + static const OpcodeTable s_ia32_opcode_map_[]; + + // Somewhat smaller table to help with decoding ModR/M bytes + // when 16-bit addressing mode is being used. Defined in + // Ia32ModrmMap.cc + static const ModrmEntry s_ia16_modrm_map_[]; + + // Somewhat smaller table to help with decoding ModR/M bytes + // when 32-bit addressing mode is being used. Defined in + // Ia32ModrmMap.cc + static const ModrmEntry s_ia32_modrm_map_[]; + + // Indicators of whether we got certain prefixes that certain + // silly Intel instructions depend on in nonstandard ways for + // their behaviors. + bool got_f2_prefix_, got_f3_prefix_, got_66_prefix_; +}; + +}; // namespace sidestep + +#endif // GOOGLE_PERFTOOLS_MINI_DISASSEMBLER_H_ diff --git a/third_party/tcmalloc/chromium/src/windows/mini_disassembler_types.h b/third_party/tcmalloc/chromium/src/windows/mini_disassembler_types.h new file mode 100644 index 0000000..7f8e997 --- /dev/null +++ b/third_party/tcmalloc/chromium/src/windows/mini_disassembler_types.h @@ -0,0 +1,226 @@ +/* Copyright (c) 2007, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * Author: Joi Sigurdsson + * + * Several simple types used by the disassembler and some of the patching + * mechanisms. + */ + +#ifndef GOOGLE_PERFTOOLS_MINI_DISASSEMBLER_TYPES_H_ +#define GOOGLE_PERFTOOLS_MINI_DISASSEMBLER_TYPES_H_ + +namespace sidestep { + +// Categories of instructions that we care about +enum InstructionType { + // This opcode is not used + IT_UNUSED, + // This disassembler does not recognize this opcode (error) + IT_UNKNOWN, + // This is not an instruction but a reference to another table + IT_REFERENCE, + // This byte is a prefix byte that we can ignore + IT_PREFIX, + // This is a prefix byte that switches to the nondefault address size + IT_PREFIX_ADDRESS, + // This is a prefix byte that switches to the nondefault operand size + IT_PREFIX_OPERAND, + // A jump or call instruction + IT_JUMP, + // A return instruction + IT_RETURN, + // Any other type of instruction (in this case we don't care what it is) + IT_GENERIC, +}; + +// Lists IA-32 operand sizes in multiples of 8 bits +enum OperandSize { + OS_ZERO = 0, + OS_BYTE = 1, + OS_WORD = 2, + OS_DOUBLE_WORD = 4, + OS_QUAD_WORD = 8, + OS_DOUBLE_QUAD_WORD = 16, + OS_32_BIT_POINTER = 32/8, + OS_48_BIT_POINTER = 48/8, + OS_SINGLE_PRECISION_FLOATING = 32/8, + OS_DOUBLE_PRECISION_FLOATING = 64/8, + OS_DOUBLE_EXTENDED_PRECISION_FLOATING = 80/8, + OS_128_BIT_PACKED_SINGLE_PRECISION_FLOATING = 128/8, + OS_PSEUDO_DESCRIPTOR = 6 +}; + +// Operand addressing methods from the IA-32 manual. The enAmMask value +// is a mask for the rest. The other enumeration values are named for the +// names given to the addressing methods in the manual, e.g. enAm_D is for +// the D addressing method. +// +// The reason we use a full 4 bytes and a mask, is that we need to combine +// these flags with the enOperandType to store the details +// on the operand in a single integer. +enum AddressingMethod { + AM_NOT_USED = 0, // This operand is not used for this instruction + AM_MASK = 0x00FF0000, // Mask for the rest of the values in this enumeration + AM_A = 0x00010000, // A addressing type + AM_C = 0x00020000, // C addressing type + AM_D = 0x00030000, // D addressing type + AM_E = 0x00040000, // E addressing type + AM_F = 0x00050000, // F addressing type + AM_G = 0x00060000, // G addressing type + AM_I = 0x00070000, // I addressing type + AM_J = 0x00080000, // J addressing type + AM_M = 0x00090000, // M addressing type + AM_O = 0x000A0000, // O addressing type + AM_P = 0x000B0000, // P addressing type + AM_Q = 0x000C0000, // Q addressing type + AM_R = 0x000D0000, // R addressing type + AM_S = 0x000E0000, // S addressing type + AM_T = 0x000F0000, // T addressing type + AM_V = 0x00100000, // V addressing type + AM_W = 0x00110000, // W addressing type + AM_X = 0x00120000, // X addressing type + AM_Y = 0x00130000, // Y addressing type + AM_REGISTER = 0x00140000, // Specific register is always used as this op + AM_IMPLICIT = 0x00150000, // An implicit, fixed value is used +}; + +// Operand types from the IA-32 manual. The enOtMask value is +// a mask for the rest. The rest of the values are named for the +// names given to these operand types in the manual, e.g. enOt_ps +// is for the ps operand type in the manual. +// +// The reason we use a full 4 bytes and a mask, is that we need +// to combine these flags with the enAddressingMethod to store the details +// on the operand in a single integer. +enum OperandType { + OT_MASK = 0xFF000000, + OT_A = 0x01000000, + OT_B = 0x02000000, + OT_C = 0x03000000, + OT_D = 0x04000000, + OT_DQ = 0x05000000, + OT_P = 0x06000000, + OT_PI = 0x07000000, + OT_PS = 0x08000000, // actually unsupported for (we don't know its size) + OT_Q = 0x09000000, + OT_S = 0x0A000000, + OT_SS = 0x0B000000, + OT_SI = 0x0C000000, + OT_V = 0x0D000000, + OT_W = 0x0E000000, + OT_SD = 0x0F000000, // scalar double-precision floating-point value + OT_PD = 0x10000000, // double-precision floating point + // dummy "operand type" for address mode M - which doesn't specify + // operand type + OT_ADDRESS_MODE_M = 0x80000000 +}; + +// Everything that's in an Opcode (see below) except the three +// alternative opcode structs for different prefixes. +struct SpecificOpcode { + // Index to continuation table, or 0 if this is the last + // byte in the opcode. + int table_index_; + + // The opcode type + InstructionType type_; + + // Description of the type of the dest, src and aux operands, + // put together from an enOperandType flag and an enAddressingMethod + // flag. + int flag_dest_; + int flag_source_; + int flag_aux_; + + // We indicate the mnemonic for debugging purposes + const char* mnemonic_; +}; + +// The information we keep in our tables about each of the different +// valid instructions recognized by the IA-32 architecture. +struct Opcode { + // Index to continuation table, or 0 if this is the last + // byte in the opcode. + int table_index_; + + // The opcode type + InstructionType type_; + + // Description of the type of the dest, src and aux operands, + // put together from an enOperandType flag and an enAddressingMethod + // flag. + int flag_dest_; + int flag_source_; + int flag_aux_; + + // We indicate the mnemonic for debugging purposes + const char* mnemonic_; + + // Alternative opcode info if certain prefixes are specified. + // In most cases, all of these are zeroed-out. Only used if + // bPrefixDependent is true. + bool is_prefix_dependent_; + SpecificOpcode opcode_if_f2_prefix_; + SpecificOpcode opcode_if_f3_prefix_; + SpecificOpcode opcode_if_66_prefix_; +}; + +// Information about each table entry. +struct OpcodeTable { + // Table of instruction entries + const Opcode* table_; + // How many bytes left to shift ModR/M byte <b>before</b> applying mask + unsigned char shift_; + // Mask to apply to byte being looked at before comparing to table + unsigned char mask_; + // Minimum/maximum indexes in table. + unsigned char min_lim_; + unsigned char max_lim_; +}; + +// Information about each entry in table used to decode ModR/M byte. +struct ModrmEntry { + // Is the operand encoded as bytes in the instruction (rather than + // if it's e.g. a register in which case it's just encoded in the + // ModR/M byte) + bool is_encoded_in_instruction_; + + // Is there a SIB byte? In this case we always need to decode it. + bool use_sib_byte_; + + // What is the size of the operand (only important if it's encoded + // in the instruction)? + OperandSize operand_size_; +}; + +}; // namespace sidestep + +#endif // GOOGLE_PERFTOOLS_MINI_DISASSEMBLER_TYPES_H_ diff --git a/third_party/tcmalloc/chromium/src/windows/nm-pdb.c b/third_party/tcmalloc/chromium/src/windows/nm-pdb.c new file mode 100644 index 0000000..726d345 --- /dev/null +++ b/third_party/tcmalloc/chromium/src/windows/nm-pdb.c @@ -0,0 +1,266 @@ +/* Copyright (c) 2008, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * Author: David Vitek + * + * Dump function addresses using Microsoft debug symbols. This works + * on PDB files. Note that this program will download symbols to + * c:\websymbols without asking. + */ + +#define WIN32_LEAN_AND_MEAN +#define _CRT_SECURE_NO_WARNINGS +#define _CRT_SECURE_NO_DEPRECATE + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> // for _strdup + +#include <windows.h> +#include <dbghelp.h> + +// Unfortunately, there is no versioning info in dbghelp.h so I can +// tell whether it has an old-style (circa VC7.1) IMAGEHLP_MODULE64 +// struct, with only a few fields, or a new-style (circa VC8) +// IMAGEHLP_MODULE64, with lots of fields. These fields are just used +// for debugging, so it's fine to just assume the smaller struct, but +// for most people, using a modern MSVC, the full struct is available. +// If you are one of those people and would like this extra debugging +// info, you can uncomment the line below. +//#define VC8_OR_ABOVE + +#define SEARCH_CAP (1024*1024) +#define WEBSYM "SRV*c:\\websymbols*http://msdl.microsoft.com/download/symbols" + +typedef struct { + char *name; + ULONG64 addr; + ULONG flags; +} SYM; + +typedef struct { + ULONG64 module_base; + SYM *syms; + DWORD syms_len; + DWORD syms_cap; +} SYM_CONTEXT; + +static int sym_cmp(const void *_s1, const void *_s2) { + const SYM *s1 = (const SYM *)_s1; + const SYM *s2 = (const SYM *)_s2; + + if (s1->addr < s2->addr) + return -1; + if (s1->addr > s2->addr) + return 1; + return 0; +} + +static BOOL CALLBACK EnumSymProc(PSYMBOL_INFO symbol_info, + ULONG symbol_size, + PVOID user_context) { + SYM_CONTEXT *ctx = (SYM_CONTEXT*)user_context; + if (symbol_info->Address < ctx->module_base || + (symbol_info->Flags & SYMFLAG_TLSREL)) { + return TRUE; + } + if (ctx->syms_len == ctx->syms_cap) { + if (!ctx->syms_cap) + ctx->syms_cap++; + ctx->syms_cap *= 2; + ctx->syms = realloc(ctx->syms, sizeof(ctx->syms[0]) * ctx->syms_cap); + } + ctx->syms[ctx->syms_len].name = _strdup(symbol_info->Name); + ctx->syms[ctx->syms_len].addr = symbol_info->Address; + ctx->syms[ctx->syms_len].flags = symbol_info->Flags; + ctx->syms_len++; + return TRUE; +} + +static void MaybePrint(const char* var, const char* description) { + if (var[0]) + printf("%s: %s\n", description, var); +} + +static void PrintAvailability(BOOL var, const char *description) { + printf("s: %s\n", description, (var ? "Available" : "Not available")); +} + +static void ShowSymbolInfo(HANDLE process, ULONG64 module_base) { + /* Get module information. */ + IMAGEHLP_MODULE64 module_info; + BOOL getmoduleinfo_rv; + printf("Load Address: %I64x\n", module_base); + memset(&module_info, 0, sizeof(module_info)); + module_info.SizeOfStruct = sizeof(module_info); + getmoduleinfo_rv = SymGetModuleInfo64(process, module_base, &module_info); + if (!getmoduleinfo_rv) { + printf("Error: SymGetModuleInfo64() failed. Error code: %u\n", + GetLastError()); + return; + } + /* Display information about symbols, based on kind of symbol. */ + switch (module_info.SymType) { + case SymNone: + printf(("No symbols available for the module.\n")); + break; + case SymExport: + printf(("Loaded symbols: Exports\n")); + break; + case SymCoff: + printf(("Loaded symbols: COFF\n")); + break; + case SymCv: + printf(("Loaded symbols: CodeView\n")); + break; + case SymSym: + printf(("Loaded symbols: SYM\n")); + break; + case SymVirtual: + printf(("Loaded symbols: Virtual\n")); + break; + case SymPdb: + printf(("Loaded symbols: PDB\n")); + break; + case SymDia: + printf(("Loaded symbols: DIA\n")); + break; + case SymDeferred: + printf(("Loaded symbols: Deferred\n")); /* not actually loaded */ + break; + default: + printf(("Loaded symbols: Unknown format.\n")); + break; + } + + MaybePrint("Image name", module_info.ImageName); + MaybePrint("Loaded image name", module_info.LoadedImageName); +#ifdef VC8_OR_ABOVE /* TODO(csilvers): figure out how to tell */ + MaybePrint("PDB file name", module_info.LoadedPdbName); + if (module_info.PdbUnmatched || module_info.DbgUnmatched) { + /* This can only happen if the debug information is contained in a + * separate file (.DBG or .PDB) + */ + printf(("Warning: Unmatched symbols.\n")); + } +#endif + + /* Contents */ +#ifdef VC8_OR_ABOVE /* TODO(csilvers): figure out how to tell */ + PrintAvailability("Line numbers", module_info.LineNumbers); + PrintAvailability("Global symbols", module_info.GlobalSymbols); + PrintAvailability("Type information", module_info.TypeInfo); +#endif +} + +int main(int argc, char *argv[]) { + DWORD error; + HANDLE process; + ULONG64 module_base; + SYM_CONTEXT ctx; + int i; + char* search; + char* filename = NULL; + int rv = 0; + /* We may add SYMOPT_UNDNAME if --demangle is specified: */ + DWORD symopts = SYMOPT_DEFERRED_LOADS | SYMOPT_DEBUG; + + for (i = 1; i < argc; i++) { + if (strcmp(argv[i], "--demangle") == 0 || strcmp(argv[i], "-C") == 0) { + symopts |= SYMOPT_UNDNAME; + } else { + break; + } + } + if (i != argc - 1) { + fprintf(stderr, "usage: nm-pdb [-C|--demangle] <module or filename>\n"); + exit(1); + } + filename = argv[i]; + + process = GetCurrentProcess(); + + if (!SymInitialize(process, NULL, FALSE)) { + error = GetLastError(); + fprintf(stderr, "SymInitialize returned error : %d\n", error); + return 1; + } + + search = malloc(SEARCH_CAP); + if (SymGetSearchPath(process, search, SEARCH_CAP)) { + if (strlen(search) + sizeof(";" WEBSYM) > SEARCH_CAP) { + fprintf(stderr, "Search path too long\n"); + SymCleanup(process); + return 1; + } + strcat(search, ";" WEBSYM); + } else { + error = GetLastError(); + fprintf(stderr, "SymGetSearchPath returned error : %d\n", error); + rv = 1; /* An error, but not a fatal one */ + strcpy(search, WEBSYM); /* Use a default value */ + } + if (!SymSetSearchPath(process, search)) { + error = GetLastError(); + fprintf(stderr, "SymSetSearchPath returned error : %d\n", error); + rv = 1; /* An error, but not a fatal one */ + } + + SymSetOptions(symopts); + module_base = SymLoadModuleEx(process, NULL, filename, NULL, 0, 0, NULL, 0); + if (!module_base) { + /* SymLoadModuleEx failed */ + error = GetLastError(); + fprintf(stderr, "SymLoadModuleEx returned error : %d for %s\n", + error, filename); + SymCleanup(process); + return 1; + } + + ShowSymbolInfo(process, module_base); + + memset(&ctx, 0, sizeof(ctx)); + ctx.module_base = module_base; + if (!SymEnumSymbols(process, module_base, NULL, EnumSymProc, &ctx)) { + error = GetLastError(); + fprintf(stderr, "SymEnumSymbols returned error: %d\n", error); + rv = 1; + } else { + DWORD j; + qsort(ctx.syms, ctx.syms_len, sizeof(ctx.syms[0]), sym_cmp); + for (j = 0; j < ctx.syms_len; j++) { + printf("%016I64x X %s\n", ctx.syms[j].addr, ctx.syms[j].name); + } + /* In a perfect world, maybe we'd clean up ctx's memory? */ + } + SymUnloadModule64(process, module_base); + SymCleanup(process); + return rv; +} diff --git a/third_party/tcmalloc/chromium/src/windows/override_functions.cc b/third_party/tcmalloc/chromium/src/windows/override_functions.cc new file mode 100644 index 0000000..e634fe2 --- /dev/null +++ b/third_party/tcmalloc/chromium/src/windows/override_functions.cc @@ -0,0 +1,122 @@ +// Copyright (c) 2007, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// --- +// Author: Mike Belshe +// +// To link tcmalloc into a EXE or DLL statically without using the patching +// facility, we can take a stock libcmt and remove all the allocator functions. +// When we relink the EXE/DLL with the modified libcmt and tcmalloc, a few +// functions are missing. This file contains the additional overrides which +// are required in the VS2005 libcmt in order to link the modified libcmt. +// +// See also +// http://groups.google.com/group/google-perftools/browse_thread/thread/41cd3710af85e57b + +#include <config.h> + +#ifndef _WIN32 +# error You should only be including this file in a windows environment! +#endif + +#ifndef WIN32_OVERRIDE_ALLOCATORS +# error This file is intended for use when overriding allocators +#endif + +#include "tcmalloc.cc" + +extern "C" void* _recalloc(void* p, size_t n, size_t size) { + void* result = realloc(p, n * size); + memset(result, 0, n * size); + return result; +} + +extern "C" void* _calloc_impl(size_t n, size_t size) { + return calloc(n, size); +} + +extern "C" size_t _msize(void* p) { + return MallocExtension::instance()->GetAllocatedSize(p); +} + +extern "C" intptr_t _get_heap_handle() { + return 0; +} + +// The CRT heap initialization stub. +extern "C" int _heap_init() { + // We intentionally leak this object. It lasts for the process + // lifetime. Trying to teardown at _heap_term() is so late that + // you can't do anything useful anyway. + new TCMallocGuard(); + return 1; +} + +// The CRT heap cleanup stub. +extern "C" void _heap_term() { +} + +extern "C" int _set_new_mode(int flag) { + return tc_set_new_mode(flag); +} + +#ifndef NDEBUG +#undef malloc +#undef free +#undef calloc +int _CrtDbgReport(int, const char*, int, const char*, const char*, ...) { + return 0; +} + +int _CrtDbgReportW(int, const wchar_t*, int, const wchar_t*, const wchar_t*, ...) { + return 0; +} + +int _CrtSetReportMode(int, int) { + return 0; +} + +extern "C" void* _malloc_dbg(size_t size, int , const char*, int) { + return malloc(size); +} + +extern "C" void _free_dbg(void* ptr, int) { + free(ptr); +} + +extern "C" void* _calloc_dbg(size_t n, size_t size, int, const char*, int) { + return calloc(n, size); +} +#endif // NDEBUG + +// We set this to 1 because part of the CRT uses a check of _crtheap != 0 +// to test whether the CRT has been initialized. Once we've ripped out +// the allocators from libcmt, we need to provide this definition so that +// the rest of the CRT is still usable. +extern "C" void* _crtheap = reinterpret_cast<void*>(1); diff --git a/third_party/tcmalloc/chromium/src/windows/patch_functions.cc b/third_party/tcmalloc/chromium/src/windows/patch_functions.cc new file mode 100644 index 0000000..d0e54ff --- /dev/null +++ b/third_party/tcmalloc/chromium/src/windows/patch_functions.cc @@ -0,0 +1,939 @@ +// Copyright (c) 2007, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// --- +// Author: Craig Silverstein +// +// The main purpose of this file is to patch the libc allocation +// routines (malloc and friends, but also _msize and other +// windows-specific libc-style routines). However, we also patch +// windows routines to do accounting. We do better at the former than +// the latter. Here are some comments from Paul Pluzhnikov about what +// it might take to do a really good job patching windows routines to +// keep track of memory usage: +// +// "You should intercept at least the following: +// HeapCreate HeapDestroy HeapAlloc HeapReAlloc HeapFree +// RtlCreateHeap RtlDestroyHeap RtlAllocateHeap RtlFreeHeap +// malloc calloc realloc free +// malloc_dbg calloc_dbg realloc_dbg free_dbg +// Some of these call the other ones (but not always), sometimes +// recursively (i.e. HeapCreate may call HeapAlloc on a different +// heap, IIRC)." +// +// Since Paul didn't mention VirtualAllocEx, he may not have even been +// considering all the mmap-like functions that windows has (or he may +// just be ignoring it because he's seen we already patch it). Of the +// above, we do not patch the *_dbg functions, and of the windows +// functions, we only patch HeapAlloc and HeapFree. +// +// The *_dbg functions come into play with /MDd, /MTd, and /MLd, +// probably. It may be ok to just turn off tcmalloc in those cases -- +// if the user wants the windows debug malloc, they probably don't +// want tcmalloc! We should also test with all of /MD, /MT, and /ML, +// which we're not currently doing. + +// TODO(csilvers): try to do better here? Paul does conclude: +// "Keeping track of all of this was a nightmare." + +#ifndef _WIN32 +# error You should only be including windows/patch_functions.cc in a windows environment! +#endif + +#include <config.h> + +#ifdef WIN32_OVERRIDE_ALLOCATORS +#error This file is intended for patching allocators - use override_functions.cc instead. +#endif + +#include <windows.h> +#include <malloc.h> // for _msize and _expand +#include <tlhelp32.h> // for CreateToolhelp32Snapshot() +#include <vector> +#include <base/logging.h> +#include "base/spinlock.h" +#include "google/malloc_hook.h" +#include "malloc_hook-inl.h" +#include "preamble_patcher.h" + +// MinGW doesn't seem to define this, perhaps some windowsen don't either. +#ifndef TH32CS_SNAPMODULE32 +#define TH32CS_SNAPMODULE32 0 +#endif + +// These are hard-coded, unfortunately. :-( They are also probably +// compiler specific. See get_mangled_names.cc, in this directory, +// for instructions on how to update these names for your compiler. +const char kMangledNew[] = "??2@YAPAXI@Z"; +const char kMangledNewArray[] = "??_U@YAPAXI@Z"; +const char kMangledDelete[] = "??3@YAXPAX@Z"; +const char kMangledDeleteArray[] = "??_V@YAXPAX@Z"; +const char kMangledNewNothrow[] = "??2@YAPAXIABUnothrow_t@std@@@Z"; +const char kMangledNewArrayNothrow[] = "??_U@YAPAXIABUnothrow_t@std@@@Z"; +const char kMangledDeleteNothrow[] = "??3@YAXPAXABUnothrow_t@std@@@Z"; +const char kMangledDeleteArrayNothrow[] = "??_V@YAXPAXABUnothrow_t@std@@@Z"; + +// This is an unused but exported symbol that we can use to tell the +// MSVC linker to bring in libtcmalloc, via the /INCLUDE linker flag. +// Without this, the linker will likely decide that libtcmalloc.dll +// doesn't add anything to the executable (since it does all its work +// through patching, which the linker can't see), and ignore it +// entirely. (The name 'tcmalloc' is already reserved for a +// namespace. I'd rather export a variable named "_tcmalloc", but I +// couldn't figure out how to get that to work. This function exports +// the symbol "__tcmalloc".) +extern "C" PERFTOOLS_DLL_DECL void _tcmalloc(); +void _tcmalloc() { } + +namespace { // most everything here is in an unnamed namespace + +typedef void (*GenericFnPtr)(); + +using sidestep::PreamblePatcher; + +// These functions are how we override the memory allocation +// functions, just like tcmalloc.cc and malloc_hook.cc do. + +// This is information about the routines we're patching, for a given +// module that implements libc memory routines. A single executable +// can have several libc implementations running about (in different +// .dll's), and we need to patch/unpatch them all. This defines +// everything except the new functions we're patching in, which +// are defined in LibcFunctions, below. +class LibcInfo { + public: + LibcInfo() { + memset(this, 0, sizeof(*this)); // easiest way to initialize the array + } + bool SameAs(const LibcInfo& that) const { + return (is_valid() && + module_base_address_ == that.module_base_address_ && + module_base_size_ == that.module_base_size_); + } + bool SameAsME32(const MODULEENTRY32& me32) const { + return (is_valid() && + module_base_address_ == me32.modBaseAddr && + module_base_size_ == me32.modBaseSize); + } + bool patched() const { return is_valid() && module_name_[0] != '\0'; } + const char* module_name() const { return is_valid() ? module_name_ : ""; } + + void set_is_valid(bool b) { is_valid_ = b; } + + // These shouldn't have to be public, since only subclasses of + // LibcInfo need it, but they do. Maybe something to do with + // templates. Shrug. + bool is_valid() const { return is_valid_; } + GenericFnPtr windows_fn(int ifunction) const { + return windows_fn_[ifunction]; + } + + // Populates all the windows_fn_[] vars based on our module info. + // Returns false if windows_fn_ is all NULL's, because there's + // nothing to patch. Also populates the me32 info. + bool PopulateWindowsFn(const MODULEENTRY32& me32); + + protected: + void CopyFrom(const LibcInfo& that) { + if (this == &that) + return; + this->is_valid_ = that.is_valid_; + memcpy(this->windows_fn_, that.windows_fn_, sizeof(windows_fn_)); + this->module_base_address_ = that.module_base_address_; + this->module_base_size_ = that.module_base_size_; + memcpy(this->module_name_, that.module_name_, sizeof(module_name_)); + } + + enum { + kMalloc, kFree, kRealloc, kCalloc, + kNew, kNewArray, kDelete, kDeleteArray, + kNewNothrow, kNewArrayNothrow, kDeleteNothrow, kDeleteArrayNothrow, + // These are windows-only functions from malloc.h + k_Msize, k_Expand, k_Aligned_malloc, k_Aligned_free, + kNumFunctions + }; + + // I'd like to put these together in a struct (perhaps in the + // subclass, so we can put in perftools_fn_ as well), but vc8 seems + // to have a bug where it doesn't initialize the struct properly if + // we try to take the address of a function that's not yet loaded + // from a dll, as is the common case for static_fn_. So we need + // each to be in its own array. :-( + static const char* const function_name_[kNumFunctions]; + + // This function is only used when statically linking the binary. + // In that case, loading malloc/etc from the dll (via + // PatchOneModule) won't work, since there are no dlls. Instead, + // you just want to be taking the address of malloc/etc directly. + // In the common, non-static-link case, these pointers will all be + // NULL, since this initializer runs before msvcrt.dll is loaded. + static const GenericFnPtr static_fn_[kNumFunctions]; + + // This is the address of the function we are going to patch + // (malloc, etc). Other info about the function is in the + // patch-specific subclasses, below. + GenericFnPtr windows_fn_[kNumFunctions]; + + // This is set to true when this structure is initialized (because + // we're patching a new library) and set to false when it's + // uninitialized (because we've freed that library). + bool is_valid_; + + const void *module_base_address_; + size_t module_base_size_; + char module_name_[MAX_MODULE_NAME32 + 1]; +}; + +// Template trickiness: logically, a LibcInfo would include +// Windows_malloc_, origstub_malloc_, and Perftools_malloc_: for a +// given module, these three go together. And in fact, +// Perftools_malloc_ may need to call origstub_malloc_, which means we +// either need to change Perftools_malloc_ to take origstub_malloc_ as +// an arugment -- unfortunately impossible since it needs to keep the +// same API as normal malloc -- or we need to write a different +// version of Perftools_malloc_ for each LibcInfo instance we create. +// We choose the second route, and use templates to implement it (we +// could have also used macros). So to get multiple versions +// of the struct, we say "struct<1> var1; struct<2> var2;". The price +// we pay is some code duplication, and more annoying, each instance +// of this var is a separate type. +template<int> class LibcInfoWithPatchFunctions : public LibcInfo { + public: + // me_info should have had PopulateWindowsFn() called on it, so the + // module_* vars and windows_fn_ are set up. + bool Patch(const LibcInfo& me_info); + void Unpatch(); + + private: + // This holds the original function contents after we patch the function. + // This has to be defined static in the subclass, because the perftools_fns + // reference origstub_fn_. + static GenericFnPtr origstub_fn_[kNumFunctions]; + + // This is the function we want to patch in + static const GenericFnPtr perftools_fn_[kNumFunctions]; + + static void* Perftools_malloc(size_t size) __THROW; + static void Perftools_free(void* ptr) __THROW; + static void* Perftools_realloc(void* ptr, size_t size) __THROW; + static void* Perftools_calloc(size_t nmemb, size_t size) __THROW; + static void* Perftools_new(size_t size); + static void* Perftools_newarray(size_t size); + static void Perftools_delete(void *ptr); + static void Perftools_deletearray(void *ptr); + static void* Perftools_new_nothrow(size_t size, + const std::nothrow_t&) __THROW; + static void* Perftools_newarray_nothrow(size_t size, + const std::nothrow_t&) __THROW; + static void Perftools_delete_nothrow(void *ptr, + const std::nothrow_t&) __THROW; + static void Perftools_deletearray_nothrow(void *ptr, + const std::nothrow_t&) __THROW; + static size_t Perftools__msize(void *ptr) __THROW; + static void* Perftools__expand(void *ptr, size_t size) __THROW; + static void* Perftools__aligned_malloc(size_t size, size_t alignment) __THROW; + static void Perftools__aligned_free(void *ptr) __THROW; + // malloc.h also defines these functions: + // _recalloc, _aligned_offset_malloc, _aligned_realloc, _aligned_recalloc + // _aligned_offset_realloc, _aligned_offset_recalloc, _malloca, _freea + // But they seem pretty obscure, and I'm fine not overriding them for now. +}; + +// This class is easier because there's only one of them. +class WindowsInfo { + public: + void Patch(); + void Unpatch(); + + private: + // TODO(csilvers): should we be patching GlobalAlloc/LocalAlloc instead, + // for pre-XP systems? + enum { + kHeapAlloc, kHeapFree, kVirtualAllocEx, kVirtualFreeEx, + kMapViewOfFileEx, kUnmapViewOfFile, kLoadLibraryExW, kFreeLibrary, + kNumFunctions + }; + + struct FunctionInfo { + const char* const name; // name of fn in a module (eg "malloc") + GenericFnPtr windows_fn; // the fn whose name we call (&malloc) + GenericFnPtr origstub_fn; // original fn contents after we patch + const GenericFnPtr perftools_fn; // fn we want to patch in + }; + + static FunctionInfo function_info_[kNumFunctions]; + + // A Windows-API equivalent of malloc and free + static LPVOID WINAPI Perftools_HeapAlloc(HANDLE hHeap, DWORD dwFlags, + DWORD_PTR dwBytes); + static BOOL WINAPI Perftools_HeapFree(HANDLE hHeap, DWORD dwFlags, + LPVOID lpMem); + // A Windows-API equivalent of mmap and munmap, for "anonymous regions" + static LPVOID WINAPI Perftools_VirtualAllocEx(HANDLE process, LPVOID address, + SIZE_T size, DWORD type, + DWORD protect); + static BOOL WINAPI Perftools_VirtualFreeEx(HANDLE process, LPVOID address, + SIZE_T size, DWORD type); + // A Windows-API equivalent of mmap and munmap, for actual files + static LPVOID WINAPI Perftools_MapViewOfFileEx(HANDLE hFileMappingObject, + DWORD dwDesiredAccess, + DWORD dwFileOffsetHigh, + DWORD dwFileOffsetLow, + SIZE_T dwNumberOfBytesToMap, + LPVOID lpBaseAddress); + static BOOL WINAPI Perftools_UnmapViewOfFile(LPCVOID lpBaseAddress); + // We don't need the other 3 variants because they all call this one. */ + static HMODULE WINAPI Perftools_LoadLibraryExW(LPCWSTR lpFileName, + HANDLE hFile, + DWORD dwFlags); + static BOOL WINAPI Perftools_FreeLibrary(HMODULE hLibModule); +}; + +// If you run out, just add a few more to the array. You'll also need +// to update the switch statement in PatchOneModule(), and the list in +// UnpatchWindowsFunctions(). +static LibcInfoWithPatchFunctions<0> main_executable; +static LibcInfoWithPatchFunctions<1> libc1; +static LibcInfoWithPatchFunctions<2> libc2; +static LibcInfoWithPatchFunctions<3> libc3; +static LibcInfoWithPatchFunctions<4> libc4; +static LibcInfoWithPatchFunctions<5> libc5; +static LibcInfoWithPatchFunctions<6> libc6; +static LibcInfoWithPatchFunctions<7> libc7; +static LibcInfoWithPatchFunctions<8> libc8; +static LibcInfo* module_libcs[] = { + &libc1, &libc2, &libc3, &libc4, &libc5, &libc6, &libc7, &libc8 +}; +static WindowsInfo main_executable_windows; + +const char* const LibcInfo::function_name_[] = { + "malloc", "free", "realloc", "calloc", + kMangledNew, kMangledNewArray, kMangledDelete, kMangledDeleteArray, + // Ideally we should patch the nothrow versions of new/delete, but + // at least in msvcrt, nothrow-new machine-code is of a type we + // can't patch. Since these are relatively rare, I'm hoping it's ok + // not to patch them. (NULL name turns off patching.) + NULL, // kMangledNewNothrow, + NULL, // kMangledNewArrayNothrow, + NULL, // kMangledDeleteNothrow, + NULL, // kMangledDeleteArrayNothrow, + "_msize", "_expand", "_aligned_malloc", "_aligned_free", +}; + +// For mingw, I can't patch the new/delete here, because the +// instructions are too small to patch. Luckily, they're so small +// because all they do is call into malloc/free, so they still end up +// calling tcmalloc routines, and we don't actually lose anything +// (except maybe some stacktrace goodness) by not patching. +const GenericFnPtr LibcInfo::static_fn_[] = { + (GenericFnPtr)&::malloc, + (GenericFnPtr)&::free, + (GenericFnPtr)&::realloc, + (GenericFnPtr)&::calloc, +#ifdef __MINGW32__ + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, +#else + (GenericFnPtr)(void*(*)(size_t))&::operator new, + (GenericFnPtr)(void*(*)(size_t))&::operator new[], + (GenericFnPtr)(void(*)(void*))&::operator delete, + (GenericFnPtr)(void(*)(void*))&::operator delete[], + (GenericFnPtr) + (void*(*)(size_t, struct std::nothrow_t const &))&::operator new, + (GenericFnPtr) + (void*(*)(size_t, struct std::nothrow_t const &))&::operator new[], + (GenericFnPtr) + (void(*)(void*, struct std::nothrow_t const &))&::operator delete, + (GenericFnPtr) + (void(*)(void*, struct std::nothrow_t const &))&::operator delete[], +#endif + (GenericFnPtr)&::_msize, + (GenericFnPtr)&::_expand, +#ifdef PERFTOOLS_NO_ALIGNED_MALLOC // for older versions of mingw + // _aligned_malloc isn't always available in mingw, so don't try to patch. + (GenericFnPtr)NULL, + (GenericFnPtr)NULL, +#else + (GenericFnPtr)&::_aligned_malloc, + (GenericFnPtr)&::_aligned_free, +#endif +}; + +template<int T> GenericFnPtr LibcInfoWithPatchFunctions<T>::origstub_fn_[] = { + // This will get filled in at run-time, as patching is done. +}; + +template<int T> +const GenericFnPtr LibcInfoWithPatchFunctions<T>::perftools_fn_[] = { + (GenericFnPtr)&Perftools_malloc, + (GenericFnPtr)&Perftools_free, + (GenericFnPtr)&Perftools_realloc, + (GenericFnPtr)&Perftools_calloc, + (GenericFnPtr)&Perftools_new, + (GenericFnPtr)&Perftools_newarray, + (GenericFnPtr)&Perftools_delete, + (GenericFnPtr)&Perftools_deletearray, + (GenericFnPtr)&Perftools_new_nothrow, + (GenericFnPtr)&Perftools_newarray_nothrow, + (GenericFnPtr)&Perftools_delete_nothrow, + (GenericFnPtr)&Perftools_deletearray_nothrow, + (GenericFnPtr)&Perftools__msize, + (GenericFnPtr)&Perftools__expand, + (GenericFnPtr)&Perftools__aligned_malloc, + (GenericFnPtr)&Perftools__aligned_free, +}; + +/*static*/ WindowsInfo::FunctionInfo WindowsInfo::function_info_[] = { + { "HeapAlloc", NULL, NULL, (GenericFnPtr)&Perftools_HeapAlloc }, + { "HeapFree", NULL, NULL, (GenericFnPtr)&Perftools_HeapFree }, + { "VirtualAllocEx", NULL, NULL, (GenericFnPtr)&Perftools_VirtualAllocEx }, + { "VirtualFreeEx", NULL, NULL, (GenericFnPtr)&Perftools_VirtualFreeEx }, + { "MapViewOfFileEx", NULL, NULL, (GenericFnPtr)&Perftools_MapViewOfFileEx }, + { "UnmapViewOfFile", NULL, NULL, (GenericFnPtr)&Perftools_UnmapViewOfFile }, + { "LoadLibraryExW", NULL, NULL, (GenericFnPtr)&Perftools_LoadLibraryExW }, + { "FreeLibrary", NULL, NULL, (GenericFnPtr)&Perftools_FreeLibrary }, +}; + +bool LibcInfo::PopulateWindowsFn(const MODULEENTRY32& me32) { + module_base_address_ = me32.modBaseAddr; + module_base_size_ = me32.modBaseSize; + strcpy(module_name_, me32.szModule); + + // First, store the location of the function to patch before + // patching it. If none of these functions are found in the module, + // then this module has no libc in it, and we just return false. + for (int i = 0; i < kNumFunctions; i++) { + if (!function_name_[i]) // we can turn off patching by unsetting name + continue; + GenericFnPtr fn = NULL; + if (me32.hModule == NULL) { // used for the main executable + // This is used only for a statically-linked-in libc. + fn = static_fn_[i]; + } else { + fn = (GenericFnPtr)::GetProcAddress(me32.hModule, function_name_[i]); + } + if (fn) { + windows_fn_[i] = PreamblePatcher::ResolveTarget(fn); + } + } + + // Some modules use the same function pointer for new and new[]. If + // we find that, set one of the pointers to NULL so we don't double- + // patch. Same may happen with new and nothrow-new, or even new[] + // and nothrow-new. It's easiest just to check each fn-ptr against + // every other. + for (int i = 0; i < kNumFunctions; i++) { + for (int j = i+1; j < kNumFunctions; j++) { + if (windows_fn_[i] == windows_fn_[j]) { + // We NULL the later one (j), so as to minimize the chances we + // NULL kFree and kRealloc. See comments below. This is fragile! + windows_fn_[j] = NULL; + } + } + } + + // There's always a chance that our module uses the same function + // as another module that we've already loaded. In that case, we + // need to set our windows_fn to NULL, to avoid double-patching. + for (int ifn = 0; ifn < kNumFunctions; ifn++) { + for (int imod = 0; + imod < sizeof(module_libcs)/sizeof(*module_libcs); imod++) { + if (module_libcs[imod]->is_valid() && + this->windows_fn(ifn) == module_libcs[imod]->windows_fn(ifn)) { + windows_fn_[ifn] = NULL; + } + } + } + + bool found_non_null = false; + for (int i = 0; i < kNumFunctions; i++) { + if (windows_fn_[i]) + found_non_null = true; + } + if (!found_non_null) + return false; + + // It's important we didn't NULL out windows_fn_[kFree] or [kRealloc]. + // The reason is, if those are NULL-ed out, we'll never patch them + // and thus never get an origstub_fn_ value for them, and when we + // try to call origstub_fn_[kFree/kRealloc] in Perftools_free and + // Perftools_realloc, below, it will fail. We could work around + // that by adding a pointer from one patch-unit to the other, but we + // haven't needed to yet. + CHECK(windows_fn_[kFree]); + CHECK(windows_fn_[kRealloc]); + return true; +} + +template<int T> +bool LibcInfoWithPatchFunctions<T>::Patch(const LibcInfo& me_info) { + CopyFrom(me_info); // copies the me32 and the windows_fn_ array + for (int i = 0; i < kNumFunctions; i++) { + if (windows_fn_[i] && windows_fn_[i] != perftools_fn_[i]) + CHECK_EQ(sidestep::SIDESTEP_SUCCESS, + PreamblePatcher::Patch(windows_fn_[i], perftools_fn_[i], + &origstub_fn_[i])); + } + set_is_valid(true); + return true; +} + +template<int T> +void LibcInfoWithPatchFunctions<T>::Unpatch() { + // We have to cast our GenericFnPtrs to void* for unpatch. This is + // contra the C++ spec; we use C-style casts to empahsize that. + for (int i = 0; i < kNumFunctions; i++) { + if (windows_fn_[i]) + CHECK_EQ(sidestep::SIDESTEP_SUCCESS, + PreamblePatcher::Unpatch((void*)windows_fn_[i], + (void*)perftools_fn_[i], + (void*)origstub_fn_[i])); + } + set_is_valid(false); +} + +void WindowsInfo::Patch() { + HMODULE hkernel32 = ::GetModuleHandleA("kernel32"); + CHECK_NE(hkernel32, NULL); + + // Unlike for libc, we know these exist in our module, so we can get + // and patch at the same time. + for (int i = 0; i < kNumFunctions; i++) { + function_info_[i].windows_fn = (GenericFnPtr) + ::GetProcAddress(hkernel32, function_info_[i].name); + CHECK_EQ(sidestep::SIDESTEP_SUCCESS, + PreamblePatcher::Patch(function_info_[i].windows_fn, + function_info_[i].perftools_fn, + &function_info_[i].origstub_fn)); + } +} + +void WindowsInfo::Unpatch() { + // We have to cast our GenericFnPtrs to void* for unpatch. This is + // contra the C++ spec; we use C-style casts to empahsize that. + for (int i = 0; i < kNumFunctions; i++) { + CHECK_EQ(sidestep::SIDESTEP_SUCCESS, + PreamblePatcher::Unpatch((void*)function_info_[i].windows_fn, + (void*)function_info_[i].perftools_fn, + (void*)function_info_[i].origstub_fn)); + } +} + +// You should hold the patch_all_modules_lock when calling this. +void PatchOneModuleLocked(const LibcInfo& me_info) { + // Double-check we haven't seen this module before. + for (int i = 0; i < sizeof(module_libcs)/sizeof(*module_libcs); i++) { + if (module_libcs[i]->SameAs(me_info)) { + fprintf(stderr, "%s:%d: FATAL ERROR: %s double-patched somehow.\n", + __FILE__, __LINE__, module_libcs[i]->module_name()); + CHECK(false); + } + } + // If we don't already have info on this module, let's add it. This + // is where we're sad that each libcX has a different type, so we + // can't use an array; instead, we have to use a switch statement. + // Patch() returns false if there were no libc functions in the module. + for (int i = 0; i < sizeof(module_libcs)/sizeof(*module_libcs); i++) { + if (!module_libcs[i]->is_valid()) { // found an empty spot to add! + switch (i) { + case 0: libc1.Patch(me_info); return; + case 1: libc2.Patch(me_info); return; + case 2: libc3.Patch(me_info); return; + case 3: libc4.Patch(me_info); return; + case 4: libc5.Patch(me_info); return; + case 5: libc6.Patch(me_info); return; + case 6: libc7.Patch(me_info); return; + case 7: libc8.Patch(me_info); return; + } + } + } + printf("ERROR: Too many modules containing libc in this executable\n"); +} + +void PatchMainExecutableLocked() { + if (main_executable.patched()) + return; // main executable has already been patched + MODULEENTRY32 fake_me32; // we make a fake one to pass into Patch() + fake_me32.modBaseAddr = NULL; + fake_me32.modBaseSize = 0; + strcpy(fake_me32.szModule, "<executable>"); + fake_me32.hModule = NULL; + main_executable.PopulateWindowsFn(fake_me32); + main_executable.Patch(main_executable); +} + +static SpinLock patch_all_modules_lock(SpinLock::LINKER_INITIALIZED); + +// Iterates over all the modules currently loaded by the executable, +// and makes sure they're all patched. For ones that aren't, we patch +// them in. We also check that every module we had patched in the +// past is still loaded, and update internal data structures if so. +void PatchAllModules() { + std::vector<LibcInfo*> modules; + bool still_loaded[sizeof(module_libcs)/sizeof(*module_libcs)] = {}; + + HANDLE hModuleSnap = CreateToolhelp32Snapshot(TH32CS_SNAPMODULE | + TH32CS_SNAPMODULE32, + GetCurrentProcessId()); + if (hModuleSnap != INVALID_HANDLE_VALUE) { + MODULEENTRY32 me32; + me32.dwSize = sizeof(me32); + if (Module32First(hModuleSnap, &me32)) { + do { + bool module_already_loaded = false; + for (int i = 0; i < sizeof(module_libcs)/sizeof(*module_libcs); i++) { + if (module_libcs[i]->SameAsME32(me32)) { + still_loaded[i] = true; + module_already_loaded = true; + break; + } + } + if (!module_already_loaded) { + LibcInfo* libc_info = new LibcInfo; + if (libc_info->PopulateWindowsFn(me32)) + modules.push_back(libc_info); + else // means module has no libc routines + delete libc_info; + } + } while (Module32Next(hModuleSnap, &me32)); + } + CloseHandle(hModuleSnap); + } + + // Now do the actual patching. + { + SpinLockHolder h(&patch_all_modules_lock); + // First, delete the modules that are no longer loaded. (We go first + // so we can try to open up space for the new modules we need to load.) + for (int i = 0; i < sizeof(module_libcs)/sizeof(*module_libcs); i++) { + if (!still_loaded[i]) { + // We could call Unpatch() here, but why bother? The module + // has gone away, so nobody is going to call into it anyway. + module_libcs[i]->set_is_valid(false); + } + } + + // Now, add in new modules that we need to load. + for (std::vector<LibcInfo*>::iterator it = modules.begin(); + it != modules.end(); ++it) { + PatchOneModuleLocked(**it); // updates num_patched_modules + } + + // Now that we've dealt with the modules (dlls), update the main + // executable. We do this last because PatchMainExecutableLocked + // wants to look at how other modules were patched. + PatchMainExecutableLocked(); + } + + for (std::vector<LibcInfo*>::iterator it = modules.begin(); + it != modules.end(); ++it) { + delete *it; + } +} + + +} // end unnamed namespace + +// --------------------------------------------------------------------- +// PatchWindowsFunctions() +// This is the function that is exposed to the outside world. +// --------------------------------------------------------------------- + +void PatchWindowsFunctions() { + // This does the libc patching in every module, and the main executable. + PatchAllModules(); + main_executable_windows.Patch(); +} + +#if 0 +// It's possible to unpatch all the functions when we are exiting. + +// The idea is to handle properly windows-internal data that is +// allocated before PatchWindowsFunctions is called. If all +// destruction happened in reverse order from construction, then we +// could call UnpatchWindowsFunctions at just the right time, so that +// that early-allocated data would be freed using the windows +// allocation functions rather than tcmalloc. The problem is that +// windows allocates some structures lazily, so it would allocate them +// late (using tcmalloc) and then try to deallocate them late as well. +// So instead of unpatching, we just modify all the tcmalloc routines +// so they call through to the libc rountines if the memory in +// question doesn't seem to have been allocated with tcmalloc. I keep +// this unpatch code around for reference. + +void UnpatchWindowsFunctions() { + // We need to go back to the system malloc/etc at global destruct time, + // so objects that were constructed before tcmalloc, using the system + // malloc, can destroy themselves using the system free. This depends + // on DLLs unloading in the reverse order in which they load! + // + // We also go back to the default HeapAlloc/etc, just for consistency. + // Who knows, it may help avoid weird bugs in some situations. + main_executable_windows.Unpatch(); + main_executable.Unpatch(); + if (libc1.is_valid()) libc1.Unpatch(); + if (libc2.is_valid()) libc2.Unpatch(); + if (libc3.is_valid()) libc3.Unpatch(); + if (libc4.is_valid()) libc4.Unpatch(); + if (libc5.is_valid()) libc5.Unpatch(); + if (libc6.is_valid()) libc6.Unpatch(); + if (libc7.is_valid()) libc7.Unpatch(); + if (libc8.is_valid()) libc8.Unpatch(); +} +#endif + +// --------------------------------------------------------------------- +// Now that we've done all the patching machinery, let's end the file +// by actually defining the functions we're patching in. Mostly these +// are simple wrappers around the do_* routines in tcmalloc.cc. +// +// In fact, we #include tcmalloc.cc to get at the tcmalloc internal +// do_* functions, the better to write our own hook functions. +// U-G-L-Y, I know. But the alternatives are, perhaps, worse. This +// also lets us define _msize(), _expand(), and other windows-specific +// functions here, using tcmalloc internals, without polluting +// tcmalloc.cc. +// ------------------------------------------------------------------- + +// TODO(csilvers): refactor tcmalloc.cc into two files, so I can link +// against the file with do_malloc, and ignore the one with malloc. +#include "tcmalloc.cc" + +template<int T> +void* LibcInfoWithPatchFunctions<T>::Perftools_malloc(size_t size) __THROW { + void* result = do_malloc(size); + MallocHook::InvokeNewHook(result, size); + return result; +} + +template<int T> +void LibcInfoWithPatchFunctions<T>::Perftools_free(void* ptr) __THROW { + MallocHook::InvokeDeleteHook(ptr); + // This calls the windows free if do_free decides ptr was not + // allocated by tcmalloc. Note it calls the origstub_free from + // *this* templatized instance of LibcInfo. See "template + // trickiness" above. + do_free_with_callback(ptr, (void (*)(void*))origstub_fn_[kFree]); +} + +template<int T> +void* LibcInfoWithPatchFunctions<T>::Perftools_realloc( + void* old_ptr, size_t new_size) __THROW { + if (old_ptr == NULL) { + void* result = do_malloc(new_size); + MallocHook::InvokeNewHook(result, new_size); + return result; + } + if (new_size == 0) { + MallocHook::InvokeDeleteHook(old_ptr); + do_free_with_callback(old_ptr, + (void (*)(void*))origstub_fn_[kFree]); + return NULL; + } + return do_realloc_with_callback( + old_ptr, new_size, + (void (*)(void*))origstub_fn_[kFree], + (size_t (*)(void*))origstub_fn_[k_Msize]); +} + +template<int T> +void* LibcInfoWithPatchFunctions<T>::Perftools_calloc( + size_t n, size_t elem_size) __THROW { + void* result = do_calloc(n, elem_size); + MallocHook::InvokeNewHook(result, n * elem_size); + return result; +} + +template<int T> +void* LibcInfoWithPatchFunctions<T>::Perftools_new(size_t size) { + void* p = cpp_alloc(size, false); + MallocHook::InvokeNewHook(p, size); + return p; +} + +template<int T> +void* LibcInfoWithPatchFunctions<T>::Perftools_newarray(size_t size) { + void* p = cpp_alloc(size, false); + MallocHook::InvokeNewHook(p, size); + return p; +} + +template<int T> +void LibcInfoWithPatchFunctions<T>::Perftools_delete(void *p) { + MallocHook::InvokeDeleteHook(p); + do_free_with_callback(p, (void (*)(void*))origstub_fn_[kFree]); +} + +template<int T> +void LibcInfoWithPatchFunctions<T>::Perftools_deletearray(void *p) { + MallocHook::InvokeDeleteHook(p); + do_free_with_callback(p, (void (*)(void*))origstub_fn_[kFree]); +} + +template<int T> +void* LibcInfoWithPatchFunctions<T>::Perftools_new_nothrow( + size_t size, const std::nothrow_t&) __THROW { + void* p = cpp_alloc(size, true); + MallocHook::InvokeNewHook(p, size); + return p; +} + +template<int T> +void* LibcInfoWithPatchFunctions<T>::Perftools_newarray_nothrow( + size_t size, const std::nothrow_t&) __THROW { + void* p = cpp_alloc(size, true); + MallocHook::InvokeNewHook(p, size); + return p; +} + +template<int T> +void LibcInfoWithPatchFunctions<T>::Perftools_delete_nothrow( + void *p, const std::nothrow_t&) __THROW { + MallocHook::InvokeDeleteHook(p); + do_free_with_callback(p, (void (*)(void*))origstub_fn_[kFree]); +} + +template<int T> +void LibcInfoWithPatchFunctions<T>::Perftools_deletearray_nothrow( + void *p, const std::nothrow_t&) __THROW { + MallocHook::InvokeDeleteHook(p); + do_free_with_callback(p, (void (*)(void*))origstub_fn_[kFree]); +} + + +// _msize() lets you figure out how much space is reserved for a +// pointer, in Windows. Even if applications don't call it, any DLL +// with global constructors will call (transitively) something called +// __dllonexit_lk in order to make sure the destructors get called +// when the dll unloads. And that will call msize -- horrible things +// can ensue if this is not hooked. Other parts of libc may also call +// this internally. + +template<int T> +size_t LibcInfoWithPatchFunctions<T>::Perftools__msize(void* ptr) __THROW { + return GetSizeWithCallback(ptr, (size_t (*)(void*))origstub_fn_[k_Msize]); +} + +// We need to define this because internal windows functions like to +// call into it(?). _expand() is like realloc but doesn't move the +// pointer. We punt, which will cause callers to fall back on realloc. +template<int T> +void* LibcInfoWithPatchFunctions<T>::Perftools__expand(void *ptr, + size_t size) __THROW { + return NULL; +} + +template<int T> +void* LibcInfoWithPatchFunctions<T>::Perftools__aligned_malloc(size_t size, + size_t alignment) + __THROW { + void* result = do_memalign(alignment, size); + MallocHook::InvokeNewHook(result, size); + return result; +} + +template<int T> +void LibcInfoWithPatchFunctions<T>::Perftools__aligned_free(void *ptr) __THROW { + MallocHook::InvokeDeleteHook(ptr); + do_free_with_callback(ptr, (void (*)(void*))origstub_fn_[k_Aligned_free]); +} + +LPVOID WINAPI WindowsInfo::Perftools_HeapAlloc(HANDLE hHeap, DWORD dwFlags, + DWORD_PTR dwBytes) { + LPVOID result = ((LPVOID (WINAPI *)(HANDLE, DWORD, DWORD_PTR)) + function_info_[kHeapAlloc].origstub_fn)( + hHeap, dwFlags, dwBytes); + MallocHook::InvokeNewHook(result, dwBytes); + return result; +} + +BOOL WINAPI WindowsInfo::Perftools_HeapFree(HANDLE hHeap, DWORD dwFlags, + LPVOID lpMem) { + MallocHook::InvokeDeleteHook(lpMem); + return ((BOOL (WINAPI *)(HANDLE, DWORD, LPVOID)) + function_info_[kHeapFree].origstub_fn)( + hHeap, dwFlags, lpMem); +} + +LPVOID WINAPI WindowsInfo::Perftools_VirtualAllocEx(HANDLE process, + LPVOID address, + SIZE_T size, DWORD type, + DWORD protect) { + LPVOID result = ((LPVOID (WINAPI *)(HANDLE, LPVOID, SIZE_T, DWORD, DWORD)) + function_info_[kVirtualAllocEx].origstub_fn)( + process, address, size, type, protect); + // VirtualAllocEx() seems to be the Windows equivalent of mmap() + MallocHook::InvokeMmapHook(result, address, size, protect, type, -1, 0); + return result; +} + +BOOL WINAPI WindowsInfo::Perftools_VirtualFreeEx(HANDLE process, LPVOID address, + SIZE_T size, DWORD type) { + MallocHook::InvokeMunmapHook(address, size); + return ((BOOL (WINAPI *)(HANDLE, LPVOID, SIZE_T, DWORD)) + function_info_[kVirtualFreeEx].origstub_fn)( + process, address, size, type); +} + +LPVOID WINAPI WindowsInfo::Perftools_MapViewOfFileEx( + HANDLE hFileMappingObject, DWORD dwDesiredAccess, DWORD dwFileOffsetHigh, + DWORD dwFileOffsetLow, SIZE_T dwNumberOfBytesToMap, LPVOID lpBaseAddress) { + // For this function pair, you always deallocate the full block of + // data that you allocate, so NewHook/DeleteHook is the right API. + LPVOID result = ((LPVOID (WINAPI *)(HANDLE, DWORD, DWORD, DWORD, + SIZE_T, LPVOID)) + function_info_[kMapViewOfFileEx].origstub_fn)( + hFileMappingObject, dwDesiredAccess, dwFileOffsetHigh, + dwFileOffsetLow, dwNumberOfBytesToMap, lpBaseAddress); + MallocHook::InvokeNewHook(result, dwNumberOfBytesToMap); + return result; +} + +BOOL WINAPI WindowsInfo::Perftools_UnmapViewOfFile(LPCVOID lpBaseAddress) { + MallocHook::InvokeDeleteHook(lpBaseAddress); + return ((BOOL (WINAPI *)(LPCVOID)) + function_info_[kUnmapViewOfFile].origstub_fn)( + lpBaseAddress); +} + +HMODULE WINAPI WindowsInfo::Perftools_LoadLibraryExW(LPCWSTR lpFileName, + HANDLE hFile, + DWORD dwFlags) { + HMODULE rv = ((HMODULE (WINAPI *)(LPCWSTR, HANDLE, DWORD)) + function_info_[kLoadLibraryExW].origstub_fn)( + lpFileName, hFile, dwFlags); + PatchAllModules(); // this will patch any newly loaded libraries + return rv; +} + +BOOL WINAPI WindowsInfo::Perftools_FreeLibrary(HMODULE hLibModule) { + BOOL rv = ((BOOL (WINAPI *)(HMODULE)) + function_info_[kFreeLibrary].origstub_fn)(hLibModule); + PatchAllModules(); // this will fix up the list of patched libraries + return rv; +} diff --git a/third_party/tcmalloc/chromium/src/windows/port.cc b/third_party/tcmalloc/chromium/src/windows/port.cc new file mode 100644 index 0000000..76a9e38 --- /dev/null +++ b/third_party/tcmalloc/chromium/src/windows/port.cc @@ -0,0 +1,255 @@ +/* Copyright (c) 2007, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * Author: Craig Silverstein + */ + +#ifndef _WIN32 +# error You should only be including windows/port.cc in a windows environment! +#endif + +#include <config.h> +#include <string.h> // for strlen(), memset(), memcmp() +#include <assert.h> +#include <stdarg.h> // for va_list, va_start, va_end +#include <windows.h> +#include "port.h" +#include "base/logging.h" +#include "base/spinlock.h" +#include "system-alloc.h" + +// ----------------------------------------------------------------------- +// Basic libraries + +// These call the windows _vsnprintf, but always NUL-terminate. +int safe_vsnprintf(char *str, size_t size, const char *format, va_list ap) { + if (size == 0) // not even room for a \0? + return -1; // not what C99 says to do, but what windows does + str[size-1] = '\0'; + return _vsnprintf(str, size-1, format, ap); +} + +#ifndef HAVE_SNPRINTF +int snprintf(char *str, size_t size, const char *format, ...) { + va_list ap; + va_start(ap, format); + const int r = vsnprintf(str, size, format, ap); + va_end(ap); + return r; +} +#endif + +int getpagesize() { + static int pagesize = 0; + if (pagesize == 0) { + SYSTEM_INFO system_info; + GetSystemInfo(&system_info); + pagesize = system_info.dwPageSize; + } + return pagesize; +} + +extern "C" PERFTOOLS_DLL_DECL void* __sbrk(ptrdiff_t increment) { + LOG(FATAL, "Windows doesn't implement sbrk!\n"); + return NULL; +} + +// ----------------------------------------------------------------------- +// Threads code + +bool CheckIfKernelSupportsTLS() { + // TODO(csilvers): return true (all win's since win95, at least, support this) + return false; +} + +// Windows doesn't support pthread_key_create's destr_function, and in +// fact it's a bit tricky to get code to run when a thread exits. This +// is cargo-cult magic from http://www.codeproject.com/threads/tls.asp. +// This code is for VC++ 7.1 and later; VC++ 6.0 support is possible +// but more busy-work -- see the webpage for how to do it. If all +// this fails, we could use DllMain instead. The big problem with +// DllMain is it doesn't run if this code is statically linked into a +// binary (it also doesn't run if the thread is terminated via +// TerminateThread, which if we're lucky this routine does). + +// This makes the linker create the TLS directory if it's not already +// there (that is, even if __declspec(thead) is not used). +#ifdef _MSC_VER +#pragma comment(linker, "/INCLUDE:__tls_used") +#endif + +// When destr_fn eventually runs, it's supposed to take as its +// argument the tls-value associated with key that pthread_key_create +// creates. (Yeah, it sounds confusing but it's really not.) We +// store the destr_fn/key pair in this data structure. Because we +// store this in a single var, this implies we can only have one +// destr_fn in a program! That's enough in practice. If asserts +// trigger because we end up needing more, we'll have to turn this +// into an array. +struct DestrFnClosure { + void (*destr_fn)(void*); + pthread_key_t key_for_destr_fn_arg; +}; + +static DestrFnClosure destr_fn_info; // initted to all NULL/0. + +static int on_process_term(void) { + if (destr_fn_info.destr_fn) { + void *ptr = TlsGetValue(destr_fn_info.key_for_destr_fn_arg); + // This shouldn't be necessary, but in Release mode, Windows + // sometimes trashes the pointer in the TLS slot, so we need to + // remove the pointer from the TLS slot before the thread dies. + TlsSetValue(destr_fn_info.key_for_destr_fn_arg, NULL); + if (ptr) // pthread semantics say not to call if ptr is NULL + (*destr_fn_info.destr_fn)(ptr); + } + return 0; +} + +static void NTAPI on_tls_callback(HINSTANCE h, DWORD dwReason, PVOID pv) { + if (dwReason == DLL_THREAD_DETACH) { // thread is being destroyed! + on_process_term(); + } +} + +#ifdef _MSC_VER + +// This tells the linker to run these functions. +#pragma data_seg(push, old_seg) +#pragma data_seg(".CRT$XLB") +static void (NTAPI *p_thread_callback)(HINSTANCE h, DWORD dwReason, PVOID pv) + = on_tls_callback; +#pragma data_seg(".CRT$XTU") +static int (*p_process_term)(void) = on_process_term; +#pragma data_seg(pop, old_seg) + +#else // #ifdef _MSC_VER [probably msys/mingw] + +// We have to try the DllMain solution here, because we can't use the +// msvc-specific pragmas. +BOOL WINAPI DllMain(HINSTANCE h, DWORD dwReason, PVOID pv) { + if (dwReason == DLL_THREAD_DETACH) + on_tls_callback(h, dwReason, pv); + else if (dwReason == DLL_PROCESS_DETACH) + on_process_term(); + return TRUE; +} + +#endif // #ifdef _MSC_VER + +pthread_key_t PthreadKeyCreate(void (*destr_fn)(void*)) { + // Semantics are: we create a new key, and then promise to call + // destr_fn with TlsGetValue(key) when the thread is destroyed + // (as long as TlsGetValue(key) is not NULL). + pthread_key_t key = TlsAlloc(); + if (destr_fn) { // register it + // If this assert fails, we'll need to support an array of destr_fn_infos + assert(destr_fn_info.destr_fn == NULL); + destr_fn_info.destr_fn = destr_fn; + destr_fn_info.key_for_destr_fn_arg = key; + } + return key; +} + + +// ----------------------------------------------------------------------- +// These functions replace system-alloc.cc + +// This is mostly like MmapSysAllocator::Alloc, except it does these weird +// munmap's in the middle of the page, which is forbidden in windows. +extern void* TCMalloc_SystemAlloc(size_t size, size_t *actual_size, + size_t alignment) { + // Safest is to make actual_size same as input-size. + if (actual_size) { + *actual_size = size; + } + + // Align on the pagesize boundary + const int pagesize = getpagesize(); + if (alignment < pagesize) alignment = pagesize; + size = ((size + alignment - 1) / alignment) * alignment; + + // Ask for extra memory if alignment > pagesize + size_t extra = 0; + if (alignment > pagesize) { + extra = alignment - pagesize; + } + + void* result = VirtualAlloc(0, size + extra, + MEM_COMMIT|MEM_RESERVE, PAGE_READWRITE); + if (result == NULL) + return NULL; + + // Adjust the return memory so it is aligned + uintptr_t ptr = reinterpret_cast<uintptr_t>(result); + size_t adjust = 0; + if ((ptr & (alignment - 1)) != 0) { + adjust = alignment - (ptr & (alignment - 1)); + } + + ptr += adjust; + return reinterpret_cast<void*>(ptr); +} + +void TCMalloc_SystemRelease(void* start, size_t length) { + // TODO(csilvers): should I be calling VirtualFree here? +} + +bool RegisterSystemAllocator(SysAllocator *allocator, int priority) { + return false; // we don't allow registration on windows, right now +} + +void DumpSystemAllocatorStats(TCMalloc_Printer* printer) { + // We don't dump stats on windows, right now +} + + +// ----------------------------------------------------------------------- +// These functions rework existing functions of the same name in the +// Google codebase. + +// A replacement for HeapProfiler::CleanupOldProfiles. +void DeleteMatchingFiles(const char* prefix, const char* full_glob) { + WIN32_FIND_DATAA found; // that final A is for Ansi (as opposed to Unicode) + HANDLE hFind = FindFirstFileA(full_glob, &found); // A is for Ansi + if (hFind != INVALID_HANDLE_VALUE) { + const int prefix_length = strlen(prefix); + do { + const char *fname = found.cFileName; + if ((strlen(fname) >= prefix_length) && + (memcmp(fname, prefix, prefix_length) == 0)) { + RAW_VLOG(0, "Removing old heap profile %s\n", fname); + // TODO(csilvers): we really need to unlink dirname + fname + _unlink(fname); + } + } while (FindNextFileA(hFind, &found) != FALSE); // A is for Ansi + FindClose(hFind); + } +} diff --git a/third_party/tcmalloc/chromium/src/windows/port.h b/third_party/tcmalloc/chromium/src/windows/port.h new file mode 100644 index 0000000..66745d1 --- /dev/null +++ b/third_party/tcmalloc/chromium/src/windows/port.h @@ -0,0 +1,334 @@ +/* Copyright (c) 2007, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * Author: Craig Silverstein + * + * These are some portability typedefs and defines to make it a bit + * easier to compile this code under VC++. + * + * Several of these are taken from glib: + * http://developer.gnome.org/doc/API/glib/glib-windows-compatability-functions.html + */ + +#ifndef GOOGLE_BASE_WINDOWS_H_ +#define GOOGLE_BASE_WINDOWS_H_ + +// You should never include this file directly, but always include it +// from either config.h (MSVC) or mingw.h (MinGW/msys). +#if !defined(GOOGLE_PERFTOOLS_WINDOWS_CONFIG_H_) && \ + !defined(GOOGLE_PERFTOOLS_WINDOWS_MINGW_H_) +# error "port.h should only be included from config.h or mingw.h" +#endif + +#ifdef _WIN32 + +#ifndef WIN32_LEAN_AND_MEAN +#define WIN32_LEAN_AND_MEAN /* We always want minimal includes */ +#endif +#include <windows.h> +#include <io.h> /* because we so often use open/close/etc */ +#include <process.h> /* for _getpid */ +#include <stdarg.h> /* for va_list */ +#include <stdio.h> /* need this to override stdio's (v)snprintf */ + +// 4018: signed/unsigned mismatch is common (and ok for signed_i < unsigned_i) +// 4244: otherwise we get problems when substracting two size_t's to an int +// 4288: VC++7 gets confused when a var is defined in a loop and then after it +// 4267: too many false positives for "conversion gives possible data loss" +// 4290: it's ok windows ignores the "throw" directive +// 4996: Yes, we're ok using "unsafe" functions like vsnprintf and getenv() +#ifdef _MSC_VER +#pragma warning(disable:4018 4244 4288 4267 4290 4996) +#endif + +// ----------------------------------- BASIC TYPES + +#ifndef HAVE_STDINT_H +#ifndef HAVE___INT64 /* we need to have all the __intX names */ +# error Do not know how to set up type aliases. Edit port.h for your system. +#endif + +typedef __int8 int8_t; +typedef __int16 int16_t; +typedef __int32 int32_t; +typedef __int64 int64_t; +typedef unsigned __int8 uint8_t; +typedef unsigned __int16 uint16_t; +typedef unsigned __int32 uint32_t; +typedef unsigned __int64 uint64_t; +#endif // #ifndef HAVE_STDINT_H + +// I guess MSVC's <types.h> doesn't include ssize_t by default? +#ifdef _MSC_VER +typedef intptr_t ssize_t; +#endif + +// ----------------------------------- THREADS + +#ifndef HAVE_PTHREAD // not true for MSVC, but may be true for MSYS +typedef DWORD pthread_t; +typedef DWORD pthread_key_t; +typedef LONG pthread_once_t; +enum { PTHREAD_ONCE_INIT = 0 }; // important that this be 0! for SpinLock +#define pthread_self GetCurrentThreadId +#define pthread_equal(pthread_t_1, pthread_t_2) ((pthread_t_1)==(pthread_t_2)) + +#ifdef __cplusplus +// This replaces maybe_threads.{h,cc} +extern pthread_key_t PthreadKeyCreate(void (*destr_fn)(void*)); // in port.cc +#define perftools_pthread_key_create(pkey, destr_fn) \ + *(pkey) = PthreadKeyCreate(destr_fn) +inline void* perftools_pthread_getspecific(DWORD key) { + DWORD err = GetLastError(); + void* rv = TlsGetValue(key); + if (err) SetLastError(err); + return rv; +} +#define perftools_pthread_setspecific(key, val) \ + TlsSetValue((key), (val)) +// NOTE: this is Win2K and later. For Win98 we could use a CRITICAL_SECTION... +#define perftools_pthread_once(once, init) do { \ + if (InterlockedCompareExchange(once, 1, 0) == 0) (init)(); \ +} while (0) +#endif // __cplusplus +#endif // HAVE_PTHREAD + +// __declspec(thread) isn't usable in a dll opened via LoadLibrary(). +// But it doesn't work to LoadLibrary() us anyway, because of all the +// things we need to do before main()! So this kind of TLS is safe for us. +#define __thread __declspec(thread) + +// This code is obsolete, but I keep it around in case we are ever in +// an environment where we can't or don't want to use google spinlocks +// (from base/spinlock.{h,cc}). In that case, uncommenting this out, +// and removing spinlock.cc from the build, should be enough to revert +// back to using native spinlocks. +#if 0 +// Windows uses a spinlock internally for its mutexes, making our life easy! +// However, the Windows spinlock must always be initialized, making life hard, +// since we want LINKER_INITIALIZED. We work around this by having the +// linker initialize a bool to 0, and check that before accessing the mutex. +// This replaces spinlock.{h,cc}, and all the stuff it depends on (atomicops) +#ifdef __cplusplus +class SpinLock { + public: + SpinLock() : initialize_token_(PTHREAD_ONCE_INIT) {} + // Used for global SpinLock vars (see base/spinlock.h for more details). + enum StaticInitializer { LINKER_INITIALIZED }; + explicit SpinLock(StaticInitializer) : initialize_token_(PTHREAD_ONCE_INIT) { + perftools_pthread_once(&initialize_token_, InitializeMutex); + } + + // It's important SpinLock not have a destructor: otherwise we run + // into problems when the main thread has exited, but other threads + // are still running and try to access a main-thread spinlock. This + // means we leak mutex_ (we should call DeleteCriticalSection() + // here). However, I've verified that all SpinLocks used in + // perftools have program-long scope anyway, so the leak is + // perfectly fine. But be aware of this for the future! + + void Lock() { + // You'd thionk this would be unnecessary, since we call + // InitializeMutex() in our constructor. But sometimes Lock() can + // be called before our constructor is! This can only happen in + // global constructors, when this is a global. If we live in + // bar.cc, and some global constructor in foo.cc calls a routine + // in bar.cc that calls this->Lock(), then Lock() may well run + // before our global constructor does. To protect against that, + // we do this check. For SpinLock objects created after main() + // has started, this pthread_once call will always be a noop. + perftools_pthread_once(&initialize_token_, InitializeMutex); + EnterCriticalSection(&mutex_); + } + void Unlock() { + LeaveCriticalSection(&mutex_); + } + + // Used in assertion checks: assert(lock.IsHeld()) (see base/spinlock.h). + inline bool IsHeld() const { + // This works, but probes undocumented internals, so I've commented it out. + // c.f. http://msdn.microsoft.com/msdnmag/issues/03/12/CriticalSections/ + //return mutex_.LockCount>=0 && mutex_.OwningThread==GetCurrentThreadId(); + return true; + } + private: + void InitializeMutex() { InitializeCriticalSection(&mutex_); } + + pthread_once_t initialize_token_; + CRITICAL_SECTION mutex_; +}; + +class SpinLockHolder { // Acquires a spinlock for as long as the scope lasts + private: + SpinLock* lock_; + public: + inline explicit SpinLockHolder(SpinLock* l) : lock_(l) { l->Lock(); } + inline ~SpinLockHolder() { lock_->Unlock(); } +}; +#endif // #ifdef __cplusplus + +// This keeps us from using base/spinlock.h's implementation of SpinLock. +#define BASE_SPINLOCK_H_ 1 + +#endif // #if 0 + +// This replaces testutil.{h,cc} +extern PERFTOOLS_DLL_DECL void RunInThread(void (*fn)()); +extern PERFTOOLS_DLL_DECL void RunManyInThread(void (*fn)(), int count); +extern PERFTOOLS_DLL_DECL void RunManyInThreadWithId(void (*fn)(int), int count, + int stacksize); + + +// ----------------------------------- MMAP and other memory allocation + +#ifndef HAVE_MMAP // not true for MSVC, but may be true for msys +#define MAP_FAILED 0 +#define MREMAP_FIXED 2 // the value in linux, though it doesn't really matter +// These, when combined with the mmap invariants below, yield the proper action +#define PROT_READ PAGE_READWRITE +#define PROT_WRITE PAGE_READWRITE +#define MAP_ANONYMOUS MEM_RESERVE +#define MAP_PRIVATE MEM_COMMIT +#define MAP_SHARED MEM_RESERVE // value of this #define is 100% arbitrary + +// VirtualAlloc is only a replacement for mmap when certain invariants are kept +#define mmap(start, length, prot, flags, fd, offset) \ + ( (start) == NULL && (fd) == -1 && (offset) == 0 && \ + (prot) == (PROT_READ|PROT_WRITE) && (flags) == (MAP_PRIVATE|MAP_ANONYMOUS)\ + ? VirtualAlloc(0, length, MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE) \ + : NULL ) + +#define munmap(start, length) (VirtualFree(start, 0, MEM_RELEASE) ? 0 : -1) +#endif // HAVE_MMAP + +// We could maybe use VirtualAlloc for sbrk as well, but no need +#define sbrk(increment) ( (void*)-1 ) // sbrk returns -1 on failure + + +// ----------------------------------- STRING ROUTINES + +// We can't just use _vsnprintf and _snprintf as drop-in-replacements, +// because they don't always NUL-terminate. :-( We also can't use the +// name vsnprintf, since windows defines that (but not snprintf (!)). +extern PERFTOOLS_DLL_DECL int snprintf(char *str, size_t size, + const char *format, ...); +extern PERFTOOLS_DLL_DECL int safe_vsnprintf(char *str, size_t size, + const char *format, va_list ap); +#define vsnprintf(str, size, format, ap) safe_vsnprintf(str, size, format, ap) + +#define PRIx64 "I64x" +#define SCNx64 "I64x" +#define PRId64 "I64d" +#define SCNd64 "I64d" +#define PRIu64 "I64u" +#ifdef _WIN64 +# define PRIuPTR "llu" +# define PRIxPTR "llx" +#else +# define PRIuPTR "lu" +# define PRIxPTR "lx" +#endif + +// ----------------------------------- FILE IO +#ifndef PATH_MAX +#define PATH_MAX 1024 +#endif +#ifndef __MINGW32__ +enum { STDIN_FILENO = 0, STDOUT_FILENO = 1, STDERR_FILENO = 2 }; +#endif +#define getcwd _getcwd +#define access _access +#define open _open +#define read _read +#define write _write +#define lseek _lseek +#define close _close +#define popen _popen +#define pclose _pclose +#define mkdir(dirname, mode) _mkdir(dirname) +#ifndef O_RDONLY +#define O_RDONLY _O_RDONLY +#endif + +// ----------------------------------- SYSTEM/PROCESS +typedef int pid_t; +#define getpid _getpid +#define getppid() (0) + +// Handle case when poll is used to simulate sleep. +#define poll(r, w, t) \ + do { \ + assert(r == 0); \ + assert(w == 0); \ + Sleep(t); \ + } while(0) + +extern PERFTOOLS_DLL_DECL int getpagesize(); // in port.cc + +// ----------------------------------- OTHER + +#define srandom srand +#define random rand +#define sleep(t) Sleep(t * 1000) + +struct timespec { + int tv_sec; + int tv_nsec; +}; + +#define nanosleep(tm_ptr, ignored) \ + Sleep((tm_ptr)->tv_sec * 1000 + (tm_ptr)->tv_nsec / 1000000) + +#ifndef __MINGW32__ +#define strtoq _strtoi64 +#define strtouq _strtoui64 +#define strtoll _strtoi64 +#define strtoull _strtoui64 +#define atoll _atoi64 +#endif + +#define __THROW throw() + +// ----------------------------------- TCMALLOC-SPECIFIC + +// tcmalloc.cc calls this so we can patch VirtualAlloc() et al. +extern PERFTOOLS_DLL_DECL void PatchWindowsFunctions(); + +// ----------------------------------- BUILD-SPECIFIC + +// windows/port.h defines compatibility APIs for several .h files, which +// we therefore shouldn't be #including directly. This hack keeps us from +// doing so. TODO(csilvers): do something more principled. +#define GOOGLE_MAYBE_THREADS_H_ 1 + + +#endif /* _WIN32 */ + +#endif /* GOOGLE_BASE_WINDOWS_H_ */ diff --git a/third_party/tcmalloc/chromium/src/windows/preamble_patcher.cc b/third_party/tcmalloc/chromium/src/windows/preamble_patcher.cc new file mode 100644 index 0000000..62b6e7f --- /dev/null +++ b/third_party/tcmalloc/chromium/src/windows/preamble_patcher.cc @@ -0,0 +1,344 @@ +/* Copyright (c) 2007, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * Author: Joi Sigurdsson + * + * Implementation of PreamblePatcher + */ + +#include "preamble_patcher.h" + +#include "mini_disassembler.h" + +// compatibility shims +#include "base/logging.h" + +// Definitions of assembly statements we need +#define ASM_JMP32REL 0xE9 +#define ASM_INT3 0xCC +#define ASM_JMP32ABS_0 0xFF +#define ASM_JMP32ABS_1 0x25 +#define ASM_JMP8REL 0xEB + +namespace sidestep { + +// Handle a special case that we see with functions that point into an +// IAT table (including functions linked statically into the +// application): these function already starts with ASM_JMP32*. For +// instance, malloc() might be implemented as a JMP to __malloc(). +// This function follows the initial JMPs for us, until we get to the +// place where the actual code is defined. If we get to STOP_BEFORE, +// we return the address before stop_before. +void* PreamblePatcher::ResolveTargetImpl(unsigned char* target, + unsigned char* stop_before) { + if (target == NULL) + return NULL; + while (1) { + unsigned char* new_target; + if (target[0] == ASM_JMP32REL) { + // target[1-4] holds the place the jmp goes to, but it's + // relative to the next instruction. + int relative_offset; // Windows guarantees int is 4 bytes + SIDESTEP_ASSERT(sizeof(relative_offset) == 4); + memcpy(reinterpret_cast<void*>(&relative_offset), + reinterpret_cast<void*>(target + 1), 4); + new_target = target + 5 + relative_offset; + } else if (target[0] == ASM_JMP8REL) { + // Visual Studio 7.1 implements new[] as an 8 bit jump to new + signed char relative_offset; + memcpy(reinterpret_cast<void*>(&relative_offset), + reinterpret_cast<void*>(target + 1), 1); + new_target = target + 2 + relative_offset; + } else if (target[0] == ASM_JMP32ABS_0 && + target[1] == ASM_JMP32ABS_1) { + // Visual studio seems to sometimes do it this way instead of the + // previous way. Not sure what the rules are, but it was happening + // with operator new in some binaries. + void **new_target_v; + SIDESTEP_ASSERT(sizeof(new_target) == 4); + memcpy(&new_target_v, reinterpret_cast<void*>(target + 2), 4); + new_target = reinterpret_cast<unsigned char*>(*new_target_v); + } else { + break; + } + if (new_target == stop_before) + break; + target = new_target; + } + return target; +} + +// Special case scoped_ptr to avoid dependency on scoped_ptr below. +class DeleteUnsignedCharArray { + public: + DeleteUnsignedCharArray(unsigned char* array) : array_(array) { + } + + ~DeleteUnsignedCharArray() { + if (array_) { + delete [] array_; + } + } + + unsigned char* Release() { + unsigned char* temp = array_; + array_ = NULL; + return temp; + } + + private: + unsigned char* array_; +}; + +SideStepError PreamblePatcher::RawPatchWithStubAndProtections( + void* target_function, void *replacement_function, + unsigned char* preamble_stub, unsigned long stub_size, + unsigned long* bytes_needed) { + // We need to be able to write to a process-local copy of the first + // MAX_PREAMBLE_STUB_SIZE bytes of target_function + DWORD old_target_function_protect = 0; + BOOL succeeded = ::VirtualProtect(reinterpret_cast<void*>(target_function), + MAX_PREAMBLE_STUB_SIZE, + PAGE_EXECUTE_READWRITE, + &old_target_function_protect); + if (!succeeded) { + SIDESTEP_ASSERT(false && "Failed to make page containing target function " + "copy-on-write."); + return SIDESTEP_ACCESS_DENIED; + } + + SideStepError error_code = RawPatchWithStub(target_function, + replacement_function, + preamble_stub, + stub_size, + bytes_needed); + + // Restore the protection of the first MAX_PREAMBLE_STUB_SIZE bytes of + // pTargetFunction to what they were before we started goofing around. + // We do this regardless of whether the patch succeeded or not. + succeeded = ::VirtualProtect(reinterpret_cast<void*>(target_function), + MAX_PREAMBLE_STUB_SIZE, + old_target_function_protect, + &old_target_function_protect); + if (!succeeded) { + SIDESTEP_ASSERT(false && + "Failed to restore protection to target function."); + // We must not return an error here because the function has + // likely actually been patched, and returning an error might + // cause our client code not to unpatch it. So we just keep + // going. + } + + if (SIDESTEP_SUCCESS != error_code) { // Testing RawPatchWithStub, above + SIDESTEP_ASSERT(false); + return error_code; + } + + // Flush the instruction cache to make sure the processor doesn't execute the + // old version of the instructions (before our patch). + // + // FlushInstructionCache is actually a no-op at least on + // single-processor XP machines. I'm not sure why this is so, but + // it is, yet I want to keep the call to the API here for + // correctness in case there is a difference in some variants of + // Windows/hardware. + succeeded = ::FlushInstructionCache(::GetCurrentProcess(), + target_function, + MAX_PREAMBLE_STUB_SIZE); + if (!succeeded) { + SIDESTEP_ASSERT(false && "Failed to flush instruction cache."); + // We must not return an error here because the function has actually + // been patched, and returning an error would likely cause our client + // code not to unpatch it. So we just keep going. + } + + return SIDESTEP_SUCCESS; +} + +SideStepError PreamblePatcher::RawPatch(void* target_function, + void* replacement_function, + void** original_function_stub) { + if (!target_function || !replacement_function || !original_function_stub || + (*original_function_stub) || target_function == replacement_function) { + SIDESTEP_ASSERT(false && "Preconditions not met"); + return SIDESTEP_INVALID_PARAMETER; + } + + // @see MAX_PREAMBLE_STUB_SIZE for an explanation of how we arrives at + // this size + unsigned char* preamble_stub = new unsigned char[MAX_PREAMBLE_STUB_SIZE]; + if (!preamble_stub) { + SIDESTEP_ASSERT(false && "Unable to allocate preamble-stub."); + return SIDESTEP_INSUFFICIENT_BUFFER; + } + + // Frees the array at end of scope. + DeleteUnsignedCharArray guard_preamble_stub(preamble_stub); + + // Change the protection of the newly allocated preamble stub to + // PAGE_EXECUTE_READWRITE. This is required to work with DEP (Data + // Execution Prevention) which will cause an exception if code is executed + // from a page on which you do not have read access. + DWORD old_stub_protect = 0; + BOOL succeeded = ::VirtualProtect(preamble_stub, MAX_PREAMBLE_STUB_SIZE, + PAGE_EXECUTE_READWRITE, &old_stub_protect); + if (!succeeded) { + SIDESTEP_ASSERT(false && + "Failed to make page preamble stub read-write-execute."); + return SIDESTEP_ACCESS_DENIED; + } + + SideStepError error_code = RawPatchWithStubAndProtections( + target_function, replacement_function, preamble_stub, + MAX_PREAMBLE_STUB_SIZE, NULL); + + if (SIDESTEP_SUCCESS != error_code) { + SIDESTEP_ASSERT(false); + return error_code; + } + + // Flush the instruction cache to make sure the processor doesn't execute the + // old version of the instructions (before our patch). + // + // FlushInstructionCache is actually a no-op at least on + // single-processor XP machines. I'm not sure why this is so, but + // it is, yet I want to keep the call to the API here for + // correctness in case there is a difference in some variants of + // Windows/hardware. + succeeded = ::FlushInstructionCache(::GetCurrentProcess(), + target_function, + MAX_PREAMBLE_STUB_SIZE); + if (!succeeded) { + SIDESTEP_ASSERT(false && "Failed to flush instruction cache."); + // We must not return an error here because the function has actually + // been patched, and returning an error would likely cause our client + // code not to unpatch it. So we just keep going. + } + + SIDESTEP_LOG("PreamblePatcher::RawPatch successfully patched."); + + // detach the scoped pointer so the memory is not freed + *original_function_stub = + reinterpret_cast<void*>(guard_preamble_stub.Release()); + return SIDESTEP_SUCCESS; +} + +SideStepError PreamblePatcher::Unpatch(void* target_function, + void* replacement_function, + void* original_function_stub) { + SIDESTEP_ASSERT(target_function && replacement_function && + original_function_stub); + if (!target_function || !replacement_function || + !original_function_stub) { + return SIDESTEP_INVALID_PARAMETER; + } + + // We disassemble the preamble of the _stub_ to see how many bytes we + // originally copied to the stub. + MiniDisassembler disassembler; + unsigned int preamble_bytes = 0; + while (preamble_bytes < 5) { + InstructionType instruction_type = + disassembler.Disassemble( + reinterpret_cast<unsigned char*>(original_function_stub) + + preamble_bytes, + preamble_bytes); + if (IT_GENERIC != instruction_type) { + SIDESTEP_ASSERT(false && + "Should only have generic instructions in stub!!"); + return SIDESTEP_UNSUPPORTED_INSTRUCTION; + } + } + + // Before unpatching, target_function should be a JMP to + // replacement_function. If it's not, then either it's an error, or + // we're falling into the case where the original instruction was a + // JMP, and we patched the jumped_to address rather than the JMP + // itself. (For instance, if malloc() is just a JMP to __malloc(), + // we patched __malloc() and not malloc().) + unsigned char* target = reinterpret_cast<unsigned char*>(target_function); + target = reinterpret_cast<unsigned char*>( + ResolveTargetImpl( + target, reinterpret_cast<unsigned char*>(replacement_function))); + // We should end at the function we patched. When we patch, we insert + // a ASM_JMP32REL instruction, so look for that as a sanity check. + if (target[0] != ASM_JMP32REL) { + SIDESTEP_ASSERT(false && + "target_function does not look like it was patched."); + return SIDESTEP_INVALID_PARAMETER; + } + + // We need to be able to write to a process-local copy of the first + // MAX_PREAMBLE_STUB_SIZE bytes of target_function + DWORD old_target_function_protect = 0; + BOOL succeeded = ::VirtualProtect(reinterpret_cast<void*>(target_function), + MAX_PREAMBLE_STUB_SIZE, + PAGE_EXECUTE_READWRITE, + &old_target_function_protect); + if (!succeeded) { + SIDESTEP_ASSERT(false && "Failed to make page containing target function " + "copy-on-write."); + return SIDESTEP_ACCESS_DENIED; + } + + // Replace the first few bytes of the original function with the bytes we + // previously moved to the preamble stub. + memcpy(reinterpret_cast<void*>(target), + original_function_stub, preamble_bytes); + + // Stub is now useless so delete it. + // [csilvers: Commented out for perftools because it causes big problems + // when we're unpatching malloc. We just let this live on as a leak.] + //delete original_function_stub; + + // Restore the protection of the first MAX_PREAMBLE_STUB_SIZE bytes of + // target to what they were before we started goofing around. + succeeded = ::VirtualProtect(reinterpret_cast<void*>(target), + MAX_PREAMBLE_STUB_SIZE, + old_target_function_protect, + &old_target_function_protect); + + // Flush the instruction cache to make sure the processor doesn't execute the + // old version of the instructions (before our patch). + // + // See comment on FlushInstructionCache elsewhere in this file. + succeeded = ::FlushInstructionCache(::GetCurrentProcess(), + target, + MAX_PREAMBLE_STUB_SIZE); + if (!succeeded) { + SIDESTEP_ASSERT(false && "Failed to flush instruction cache."); + return SIDESTEP_UNEXPECTED; + } + + SIDESTEP_LOG("PreamblePatcher::Unpatch successfully unpatched."); + return SIDESTEP_SUCCESS; +} + +}; // namespace sidestep diff --git a/third_party/tcmalloc/chromium/src/windows/preamble_patcher.h b/third_party/tcmalloc/chromium/src/windows/preamble_patcher.h new file mode 100644 index 0000000..0028e4e --- /dev/null +++ b/third_party/tcmalloc/chromium/src/windows/preamble_patcher.h @@ -0,0 +1,378 @@ +/* Copyright (c) 2007, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * Author: Joi Sigurdsson + * + * Definition of PreamblePatcher + */ + +#ifndef GOOGLE_PERFTOOLS_PREAMBLE_PATCHER_H_ +#define GOOGLE_PERFTOOLS_PREAMBLE_PATCHER_H_ + +#include <windows.h> + +// compatibility shim +#include "base/logging.h" +#define SIDESTEP_ASSERT(cond) RAW_DCHECK(cond, #cond) +#define SIDESTEP_LOG(msg) RAW_VLOG(1, msg) + +// Maximum size of the preamble stub. We overwrite at least the first 5 +// bytes of the function. Considering the worst case scenario, we need 4 +// bytes + the max instruction size + 5 more bytes for our jump back to +// the original code. With that in mind, 32 is a good number :) +#define MAX_PREAMBLE_STUB_SIZE (32) + +namespace sidestep { + +// Possible results of patching/unpatching +enum SideStepError { + SIDESTEP_SUCCESS = 0, + SIDESTEP_INVALID_PARAMETER, + SIDESTEP_INSUFFICIENT_BUFFER, + SIDESTEP_JUMP_INSTRUCTION, + SIDESTEP_FUNCTION_TOO_SMALL, + SIDESTEP_UNSUPPORTED_INSTRUCTION, + SIDESTEP_NO_SUCH_MODULE, + SIDESTEP_NO_SUCH_FUNCTION, + SIDESTEP_ACCESS_DENIED, + SIDESTEP_UNEXPECTED, +}; + +#define SIDESTEP_TO_HRESULT(error) \ + MAKE_HRESULT(SEVERITY_ERROR, FACILITY_NULL, error) + +// Implements a patching mechanism that overwrites the first few bytes of +// a function preamble with a jump to our hook function, which is then +// able to call the original function via a specially-made preamble-stub +// that imitates the action of the original preamble. +// +// NOTE: This patching mechanism should currently only be used for +// non-production code, e.g. unit tests, because it is not threadsafe. +// See the TODO in preamble_patcher_with_stub.cc for instructions on what +// we need to do before using it in production code; it's fairly simple +// but unnecessary for now since we only intend to use it in unit tests. +// +// To patch a function, use either of the typesafe Patch() methods. You +// can unpatch a function using Unpatch(). +// +// Typical usage goes something like this: +// @code +// typedef int (*MyTypesafeFuncPtr)(int x); +// MyTypesafeFuncPtr original_func_stub; +// int MyTypesafeFunc(int x) { return x + 1; } +// int HookMyTypesafeFunc(int x) { return 1 + original_func_stub(x); } +// +// void MyPatchInitializingFunction() { +// original_func_stub = PreamblePatcher::Patch( +// MyTypesafeFunc, HookMyTypesafeFunc); +// if (!original_func_stub) { +// // ... error handling ... +// } +// +// // ... continue - you have patched the function successfully ... +// } +// @endcode +// +// Note that there are a number of ways that this method of patching can +// fail. The most common are: +// - If there is a jump (jxx) instruction in the first 5 bytes of +// the function being patched, we cannot patch it because in the +// current implementation we do not know how to rewrite relative +// jumps after relocating them to the preamble-stub. Note that +// if you really really need to patch a function like this, it +// would be possible to add this functionality (but at some cost). +// - If there is a return (ret) instruction in the first 5 bytes +// we cannot patch the function because it may not be long enough +// for the jmp instruction we use to inject our patch. +// - If there is another thread currently executing within the bytes +// that are copied to the preamble stub, it will crash in an undefined +// way. +// +// If you get any other error than the above, you're either pointing the +// patcher at an invalid instruction (e.g. into the middle of a multi- +// byte instruction, or not at memory containing executable instructions) +// or, there may be a bug in the disassembler we use to find +// instruction boundaries. +// +// NOTE: In optimized builds, when you have very trivial functions that +// the compiler can reason do not have side effects, the compiler may +// reuse the result of calling the function with a given parameter, which +// may mean if you patch the function in between your patch will never get +// invoked. See preamble_patcher_test.cc for an example. +class PreamblePatcher { + public: + + // This is a typesafe version of RawPatch(), identical in all other + // ways than it takes a template parameter indicating the type of the + // function being patched. + // + // @param T The type of the function you are patching. Usually + // you will establish this type using a typedef, as in the following + // example: + // @code + // typedef BOOL (WINAPI *MessageBoxPtr)(HWND, LPCTSTR, LPCTSTR, UINT); + // MessageBoxPtr original = NULL; + // PreamblePatcher::Patch(MessageBox, Hook_MessageBox, &original); + // @endcode + template <class T> + static SideStepError Patch(T target_function, + T replacement_function, + T* original_function_stub) { + // NOTE: casting from a function to a pointer is contra the C++ + // spec. It's not safe on IA64, but is on i386. We use + // a C-style cast here to emphasize this is not legal C++. + return RawPatch((void*)(target_function), + (void*)(replacement_function), + (void**)(original_function_stub)); + } + + // Patches a named function imported from the named module using + // preamble patching. Uses RawPatch() to do the actual patching + // work. + // + // @param T The type of the function you are patching. Must + // exactly match the function you specify using module_name and + // function_name. + // + // @param module_name The name of the module from which the function + // is being imported. Note that the patch will fail if this module + // has not already been loaded into the current process. + // + // @param function_name The name of the function you wish to patch. + // + // @param replacement_function Your replacement function which + // will be called whenever code tries to call the original function. + // + // @param original_function_stub Pointer to memory that should receive a + // pointer that can be used (e.g. in the replacement function) to call the + // original function, or NULL to indicate failure. + // + // @return One of the EnSideStepError error codes; only SIDESTEP_SUCCESS + // indicates success. + template <class T> + static SideStepError Patch(LPCTSTR module_name, + LPCSTR function_name, + T replacement_function, + T* original_function_stub) { + SIDESTEP_ASSERT(module_name && function_name); + if (!module_name || !function_name) { + SIDESTEP_ASSERT(false && + "You must specify a module name and function name."); + return SIDESTEP_INVALID_PARAMETER; + } + HMODULE module = ::GetModuleHandle(module_name); + SIDESTEP_ASSERT(module != NULL); + if (!module) { + SIDESTEP_ASSERT(false && "Invalid module name."); + return SIDESTEP_NO_SUCH_MODULE; + } + FARPROC existing_function = ::GetProcAddress(module, function_name); + if (!existing_function) { + SIDESTEP_ASSERT( + false && "Did not find any function with that name in the module."); + return SIDESTEP_NO_SUCH_FUNCTION; + } + // NOTE: casting from a function to a pointer is contra the C++ + // spec. It's not safe on IA64, but is on i386. We use + // a C-style cast here to emphasize this is not legal C++. + return RawPatch((void*)existing_function, (void*)replacement_function, + (void**)(original_function_stub)); + } + + // Patches a function by overwriting its first few bytes with + // a jump to a different function. This is the "worker" function + // for each of the typesafe Patch() functions. In most cases, + // it is preferable to use the Patch() functions rather than + // this one as they do more checking at compile time. + // + // @param target_function A pointer to the function that should be + // patched. + // + // @param replacement_function A pointer to the function that should + // replace the target function. The replacement function must have + // exactly the same calling convention and parameters as the original + // function. + // + // @param original_function_stub Pointer to memory that should receive a + // pointer that can be used (e.g. in the replacement function) to call the + // original function, or NULL to indicate failure. + // + // @param original_function_stub Pointer to memory that should receive a + // pointer that can be used (e.g. in the replacement function) to call the + // original function, or NULL to indicate failure. + // + // @return One of the EnSideStepError error codes; only SIDESTEP_SUCCESS + // indicates success. + // + // @note The preamble-stub (the memory pointed to by + // *original_function_stub) is allocated on the heap, and (in + // production binaries) never destroyed, resulting in a memory leak. This + // will be the case until we implement safe unpatching of a method. + // However, it is quite difficult to unpatch a method (because other + // threads in the process may be using it) so we are leaving it for now. + // See however UnsafeUnpatch, which can be used for binaries where you + // know only one thread is running, e.g. unit tests. + static SideStepError RawPatch(void* target_function, + void* replacement_function, + void** original_function_stub); + + // Unpatches target_function and deletes the stub that previously could be + // used to call the original version of the function. + // + // DELETES the stub that is passed to the function. + // + // @param target_function Pointer to the target function which was + // previously patched, i.e. a pointer which value should match the value + // of the symbol prior to patching it. + // + // @param replacement_function Pointer to the function target_function + // was patched to. + // + // @param original_function_stub Pointer to the stub returned when + // patching, that could be used to call the original version of the + // patched function. This function will also delete the stub, which after + // unpatching is useless. + // + // If your original call was + // Patch(VirtualAlloc, MyVirtualAlloc, &origptr) + // then to undo it you would call + // Unpatch(VirtualAlloc, MyVirtualAlloc, origptr); + // + // @return One of the EnSideStepError error codes; only SIDESTEP_SUCCESS + // indicates success. + static SideStepError Unpatch(void* target_function, + void* replacement_function, + void* original_function_stub); + + // A helper routine when patching, which follows jmp instructions at + // function addresses, to get to the "actual" function contents. + // This allows us to identify two functions that are at different + // addresses but actually resolve to the same code. + // + // @param target_function Pointer to a function. + // + // @return Either target_function (the input parameter), or if + // target_function's body consists entirely of a JMP instruction, + // the address it JMPs to (or more precisely, the address at the end + // of a chain of JMPs). + template <class T> + static T ResolveTarget(T target_function) { + return (T)ResolveTargetImpl((unsigned char*)target_function, NULL); + } + + private: + // Patches a function by overwriting its first few bytes with + // a jump to a different function. This is similar to the RawPatch + // function except that it uses the stub allocated by the caller + // instead of allocating it. + // + // We call VirtualProtect to make the + // target function writable at least for the duration of the call. + // + // @param target_function A pointer to the function that should be + // patched. + // + // @param replacement_function A pointer to the function that should + // replace the target function. The replacement function must have + // exactly the same calling convention and parameters as the original + // function. + // + // @param preamble_stub A pointer to a buffer where the preamble stub + // should be copied. The size of the buffer should be sufficient to + // hold the preamble bytes. + // + // @param stub_size Size in bytes of the buffer allocated for the + // preamble_stub + // + // @param bytes_needed Pointer to a variable that receives the minimum + // number of bytes required for the stub. Can be set to NULL if you're + // not interested. + // + // @return An error code indicating the result of patching. + static SideStepError RawPatchWithStubAndProtections( + void* target_function, + void *replacement_function, + unsigned char* preamble_stub, + unsigned long stub_size, + unsigned long* bytes_needed); + + // A helper function used by RawPatchWithStubAndProtections -- it + // does everything but the VirtualProtect work. Defined in + // preamble_patcher_with_stub.cc. + // + // @param target_function A pointer to the function that should be + // patched. + // + // @param replacement_function A pointer to the function that should + // replace the target function. The replacement function must have + // exactly the same calling convention and parameters as the original + // function. + // + // @param preamble_stub A pointer to a buffer where the preamble stub + // should be copied. The size of the buffer should be sufficient to + // hold the preamble bytes. + // + // @param stub_size Size in bytes of the buffer allocated for the + // preamble_stub + // + // @param bytes_needed Pointer to a variable that receives the minimum + // number of bytes required for the stub. Can be set to NULL if you're + // not interested. + // + // @return An error code indicating the result of patching. + static SideStepError RawPatchWithStub(void* target_function, + void *replacement_function, + unsigned char* preamble_stub, + unsigned long stub_size, + unsigned long* bytes_needed); + + + // A helper routine when patching, which follows jmp instructions at + // function addresses, to get to the "actual" function contents. + // This allows us to identify two functions that are at different + // addresses but actually resolve to the same code. + // + // @param target_function Pointer to a function. + // + // @param stop_before If, when following JMP instructions from + // target_function, we get to the address stop, we return + // immediately, the address that jumps to stop_before. + // + // @return Either target_function (the input parameter), or if + // target_function's body consists entirely of a JMP instruction, + // the address it JMPs to (or more precisely, the address at the end + // of a chain of JMPs). + static void* ResolveTargetImpl(unsigned char* target_function, + unsigned char* stop_before); +}; + +}; // namespace sidestep + +#endif // GOOGLE_PERFTOOLS_PREAMBLE_PATCHER_H_ diff --git a/third_party/tcmalloc/chromium/src/windows/preamble_patcher_with_stub.cc b/third_party/tcmalloc/chromium/src/windows/preamble_patcher_with_stub.cc new file mode 100644 index 0000000..4eb391d --- /dev/null +++ b/third_party/tcmalloc/chromium/src/windows/preamble_patcher_with_stub.cc @@ -0,0 +1,200 @@ +/* Copyright (c) 2007, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * Author: Joi Sigurdsson + * + * Implementation of PreamblePatcher + */ + +#include "preamble_patcher.h" + +#include "mini_disassembler.h" + +// Definitions of assembly statements we need +#define ASM_JMP32REL 0xE9 +#define ASM_INT3 0xCC + +namespace sidestep { + +SideStepError PreamblePatcher::RawPatchWithStub( + void* target_function, + void *replacement_function, + unsigned char* preamble_stub, + unsigned long stub_size, + unsigned long* bytes_needed) { + if ((NULL == target_function) || + (NULL == replacement_function) || + (NULL == preamble_stub)) { + SIDESTEP_ASSERT(false && + "Invalid parameters - either pTargetFunction or " + "pReplacementFunction or pPreambleStub were NULL."); + return SIDESTEP_INVALID_PARAMETER; + } + + // TODO(V7:joi) Siggi and I just had a discussion and decided that both + // patching and unpatching are actually unsafe. We also discussed a + // method of making it safe, which is to freeze all other threads in the + // process, check their thread context to see if their eip is currently + // inside the block of instructions we need to copy to the stub, and if so + // wait a bit and try again, then unfreeze all threads once we've patched. + // Not implementing this for now since we're only using SideStep for unit + // testing, but if we ever use it for production code this is what we + // should do. + // + // NOTE: Stoyan suggests we can write 8 or even 10 bytes atomically using + // FPU instructions, and on newer processors we could use cmpxchg8b or + // cmpxchg16b. So it might be possible to do the patching/unpatching + // atomically and avoid having to freeze other threads. Note though, that + // doing it atomically does not help if one of the other threads happens + // to have its eip in the middle of the bytes you change while you change + // them. + + // First, deal with a special case that we see with functions that + // point into an IAT table (including functions linked statically + // into the application): these function already starts with + // ASM_JMP32REL. For instance, malloc() might be implemented as a + // JMP to __malloc(). In that case, we replace the destination of + // the JMP (__malloc), rather than the JMP itself (malloc). This + // way we get the correct behavior no matter how malloc gets called. + void *new_target = ResolveTarget(target_function); + if (new_target != target_function) { // we're in the IAT case + // I'd like to just say "target = new_target", but I can't, + // because the new target will need to have its protections set. + return RawPatchWithStubAndProtections(new_target, replacement_function, + preamble_stub, stub_size, + bytes_needed); + } + unsigned char* target = reinterpret_cast<unsigned char*>(new_target); + + // Let's disassemble the preamble of the target function to see if we can + // patch, and to see how much of the preamble we need to take. We need 5 + // bytes for our jmp instruction, so let's find the minimum number of + // instructions to get 5 bytes. + MiniDisassembler disassembler; + unsigned int preamble_bytes = 0; + while (preamble_bytes < 5) { + InstructionType instruction_type = + disassembler.Disassemble(target + preamble_bytes, preamble_bytes); + if (IT_JUMP == instruction_type) { + SIDESTEP_ASSERT(false && + "Unable to patch because there is a jump instruction " + "in the first 5 bytes."); + return SIDESTEP_JUMP_INSTRUCTION; + } else if (IT_RETURN == instruction_type) { + SIDESTEP_ASSERT(false && + "Unable to patch because function is too short"); + return SIDESTEP_FUNCTION_TOO_SMALL; + } else if (IT_GENERIC != instruction_type) { + SIDESTEP_ASSERT(false && + "Disassembler encountered unsupported instruction " + "(either unused or unknown"); + return SIDESTEP_UNSUPPORTED_INSTRUCTION; + } + } + + if (NULL != bytes_needed) + *bytes_needed = preamble_bytes + 5; + + // Inv: cbPreamble is the number of bytes (at least 5) that we need to take + // from the preamble to have whole instructions that are 5 bytes or more + // in size total. The size of the stub required is cbPreamble + size of + // jmp (5) + if (preamble_bytes + 5 > stub_size) { + SIDESTEP_ASSERT(false); + return SIDESTEP_INSUFFICIENT_BUFFER; + } + + // First, copy the preamble that we will overwrite. + memcpy(reinterpret_cast<void*>(preamble_stub), + reinterpret_cast<void*>(target), preamble_bytes); + + // Now, make a jmp instruction to the rest of the target function (minus the + // preamble bytes we moved into the stub) and copy it into our preamble-stub. + // find address to jump to, relative to next address after jmp instruction +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable:4244) +#endif + int relative_offset_to_target_rest + = ((reinterpret_cast<unsigned char*>(target) + preamble_bytes) - + (preamble_stub + preamble_bytes + 5)); +#ifdef _MSC_VER +#pragma warning(pop) +#endif + // jmp (Jump near, relative, displacement relative to next instruction) + preamble_stub[preamble_bytes] = ASM_JMP32REL; + // copy the address + memcpy(reinterpret_cast<void*>(preamble_stub + preamble_bytes + 1), + reinterpret_cast<void*>(&relative_offset_to_target_rest), 4); + + // Inv: preamble_stub points to assembly code that will execute the + // original function by first executing the first cbPreamble bytes of the + // preamble, then jumping to the rest of the function. + + // Overwrite the first 5 bytes of the target function with a jump to our + // replacement function. + // (Jump near, relative, displacement relative to next instruction) + target[0] = ASM_JMP32REL; + + // Find offset from instruction after jmp, to the replacement function. +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable:4244) +#endif + int offset_to_replacement_function = + reinterpret_cast<unsigned char*>(replacement_function) - + reinterpret_cast<unsigned char*>(target) - 5; +#ifdef _MSC_VER +#pragma warning(pop) +#endif + // complete the jmp instruction + memcpy(reinterpret_cast<void*>(target + 1), + reinterpret_cast<void*>(&offset_to_replacement_function), 4); + // Set any remaining bytes that were moved to the preamble-stub to INT3 so + // as not to cause confusion (otherwise you might see some strange + // instructions if you look at the disassembly, or even invalid + // instructions). Also, by doing this, we will break into the debugger if + // some code calls into this portion of the code. If this happens, it + // means that this function cannot be patched using this patcher without + // further thought. + if (preamble_bytes > 5) { + memset(reinterpret_cast<void*>(target + 5), ASM_INT3, preamble_bytes - 5); + } + + // Inv: The memory pointed to by target_function now points to a relative + // jump instruction that jumps over to the preamble_stub. The preamble + // stub contains the first stub_size bytes of the original target + // function's preamble code, followed by a relative jump back to the next + // instruction after the first cbPreamble bytes. + + return SIDESTEP_SUCCESS; +} + +}; // namespace sidestep |