[Tracing] Enable heap dumps with both type info and backtraces

This is a major change to |HeapDumpWriter| which enables it to dump the format described in https://goo.gl/KY7zVE. This will allow the heap profiler to break down the heap by type or backtrace, but also combinations of those. This changes the interface of |HeapDumpWriter| slightly from a two-step process (insert allocations, dump json) to a three-step process (insert allocations, compute what to dump, convert to json). The main reason for doing this is testability. Aggregation and json conversion are now completely orthogonal. This means that the aggregation algorithm can be tested without having to parse the json first, and the json conversion is simpler to test too. This is part of the heap profiler in chrome://tracing. BUG=524631 Review URL: https://codereview.chromium.org/1494293005 Cr-Commit-Position: refs/heads/master@{#364444}
author: ruuda <ruuda@google.com> 2015-12-10 11:54:44 -0800
committer: Commit bot <commit-bot@chromium.org> 2015-12-10 19:55:59 +0000
commit: 3314e2e367138427f3aaccd3da030413c045ab56 (patch)
tree: a34c20a95f7bac6e7445212c31c538d4e9eff1aa /base/trace_event
parent: 4a36e67ea9694e6c01a3df6a1f6b967453c506c2 (diff)
download: chromium_src-3314e2e367138427f3aaccd3da030413c045ab56.zip
chromium_src-3314e2e367138427f3aaccd3da030413c045ab56.tar.gz
chromium_src-3314e2e367138427f3aaccd3da030413c045ab56.tar.bz2
4 files changed, 446 insertions, 264 deletions
diff --git a/base/trace_event/heap_profiler_allocation_context.h b/base/trace_event/heap_profiler_allocation_context.h
index 6a46d03..a7f1e9d 100644
--- a/base/trace_event/heap_profiler_allocation_context.h
+++ b/base/trace_event/heap_profiler_allocation_context.h
@@ -82,12 +82,12 @@ bool BASE_EXPORT operator==(const AllocationContext& lhs,
 namespace BASE_HASH_NAMESPACE {
 
 template <>
-struct hash<base::trace_event::Backtrace> {
+struct BASE_EXPORT hash<base::trace_event::Backtrace> {
   size_t operator()(const base::trace_event::Backtrace& backtrace) const;
 };
 
 template <>
-struct hash<base::trace_event::AllocationContext> {
+struct BASE_EXPORT hash<base::trace_event::AllocationContext> {
   size_t operator()(const base::trace_event::AllocationContext& context) const;
 };
 
diff --git a/base/trace_event/heap_profiler_heap_dump_writer.cc b/base/trace_event/heap_profiler_heap_dump_writer.cc
index b0ed86f..e37c540 100644
--- a/base/trace_event/heap_profiler_heap_dump_writer.cc
+++ b/base/trace_event/heap_profiler_heap_dump_writer.cc
@@ -6,128 +6,291 @@
 
 #include <algorithm>
 #include <iterator>
+#include <tuple>
 #include <utility>
 #include <vector>
 
 #include "base/format_macros.h"
+#include "base/logging.h"
 #include "base/strings/stringprintf.h"
 #include "base/trace_event/heap_profiler_stack_frame_deduplicator.h"
 #include "base/trace_event/heap_profiler_type_name_deduplicator.h"
 #include "base/trace_event/trace_event_argument.h"
 
+// Most of what the |HeapDumpWriter| does is aggregating detailed information
+// about the heap and deciding what to dump. The Input to this process is a list
+// of |AllocationContext|s and size pairs.
+//
+// The pairs are grouped into |Bucket|s. A bucket is a group of (context, size)
+// pairs where the properties of the contexts share a prefix. (Type name is
+// considered a list of length one here.) First all pairs are put into one
+// bucket that represents the entire heap. Then this bucket is recursively
+// broken down into smaller buckets. Each bucket keeps track of whether further
+// breakdown is possible.
+
 namespace base {
 namespace trace_event {
-
+namespace internal {
 namespace {
 
-template <typename T>
-bool PairSizeGt(const std::pair<T, size_t>& lhs,
-                const std::pair<T, size_t>& rhs) {
-  return lhs.second > rhs.second;
+// Denotes a property of |AllocationContext| to break down by.
+enum class BreakDownMode { kByBacktrace, kByTypeName };
+
+// A group of bytes for which the context shares a prefix.
+struct Bucket {
+  Bucket() : size(0), backtrace_cursor(0), is_broken_down_by_type_name(false) {}
+
+  std::vector<std::pair<const AllocationContext*, size_t>> bytes_by_context;
+
+  // The sum of the sizes of |bytes_by_context|.
+  size_t size;
+
+  // The index of the stack frame that has not yet been broken down by. For all
+  // elements in this bucket, the stack frames 0 up to (but not including) the
+  // cursor, must be equal.
+  size_t backtrace_cursor;
+
+  // When true, the type name for all elements in this bucket must be equal.
+  bool is_broken_down_by_type_name;
+};
+
+// Comparison operator to order buckets by their size.
+bool operator<(const Bucket& lhs, const Bucket& rhs) {
+  return lhs.size < rhs.size;
+}
+
+// Groups the allocations in the bucket by |breakBy|. The buckets in the
+// returned list will have |backtrace_cursor| advanced or
+// |is_broken_down_by_type_name| set depending on the property to group by.
+std::vector<Bucket> GetSubbuckets(const Bucket& bucket, BreakDownMode breakBy) {
+  base::hash_map<const char*, Bucket> breakdown;
+
+  if (breakBy == BreakDownMode::kByBacktrace) {
+    for (const auto& context_and_size : bucket.bytes_by_context) {
+      const Backtrace& backtrace = context_and_size.first->backtrace;
+      const char* const* begin = std::begin(backtrace.frames);
+      const char* const* end = std::end(backtrace.frames);
+      const char* const* cursor = begin + bucket.backtrace_cursor;
+
+      // The backtrace in the context is padded with null pointers, but these
+      // should not be considered for breakdown. Adjust end to point past the
+      // last non-null frame.
+      while (begin != end && *(end - 1) == nullptr)
+        end--;
+
+      DCHECK_LE(cursor, end);
+
+      if (cursor != end) {
+        Bucket& subbucket = breakdown[*cursor];
+        subbucket.size += context_and_size.second;
+        subbucket.bytes_by_context.push_back(context_and_size);
+        subbucket.backtrace_cursor = bucket.backtrace_cursor + 1;
+        subbucket.is_broken_down_by_type_name =
+            bucket.is_broken_down_by_type_name;
+        DCHECK_GT(subbucket.size, 0u);
+      }
+    }
+  } else if (breakBy == BreakDownMode::kByTypeName) {
+    if (!bucket.is_broken_down_by_type_name) {
+      for (const auto& context_and_size : bucket.bytes_by_context) {
+        const AllocationContext* context = context_and_size.first;
+        Bucket& subbucket = breakdown[context->type_name];
+        subbucket.size += context_and_size.second;
+        subbucket.bytes_by_context.push_back(context_and_size);
+        subbucket.backtrace_cursor = bucket.backtrace_cursor;
+        subbucket.is_broken_down_by_type_name = true;
+        DCHECK_GT(subbucket.size, 0u);
+      }
+    }
+  }
+
+  std::vector<Bucket> buckets;
+  buckets.reserve(breakdown.size());
+  for (auto key_bucket : breakdown)
+    buckets.push_back(key_bucket.second);
+
+  return buckets;
 }
 
-// Converts a |hash_map<T, size_t>| into a vector of (T, size_t) pairs that is
-// ordered from high |size_t| to low |size_t|.
-template <typename T>
-std::vector<std::pair<T, size_t>> SortBySizeDescending(
-    const hash_map<T, size_t>& grouped) {
-  std::vector<std::pair<T, size_t>> sorted;
-  sorted.reserve(grouped.size());
-  std::copy(grouped.begin(), grouped.end(), std::back_inserter(sorted));
-  std::sort(sorted.begin(), sorted.end(), PairSizeGt<T>);
-  return sorted;
+// Breaks down the bucket by |breakBy|. Returns only buckets that contribute
+// significantly to the total size. The long tail is omitted.
+std::vector<Bucket> BreakDownBy(const Bucket& bucket, BreakDownMode breakBy) {
+  std::vector<Bucket> buckets = GetSubbuckets(bucket, breakBy);
+
+  // Ensure that |buckets| is a max-heap (the data structure, not memory heap),
+  // so its front contains the largest bucket. Buckets should be iterated
+  // ordered by size, but sorting the vector is overkill because the long tail
+  // of small buckets will be discarded. By using a max-heap, the optimal case
+  // where all but the first bucket are discarded is O(n). The worst case where
+  // no bucket is discarded is doing a heap sort, which is O(n log n).
+  std::make_heap(buckets.begin(), buckets.end());
+
+  // Keep including buckets until adding one would increase the number of
+  // bytes accounted for by less than a percent. The large buckets end up in
+  // [it, end()), [begin(), it) is the part that contains the max-heap of small
+  // buckets. TODO(ruuda): tweak the heuristic.
+  size_t accounted_for = 0;
+  std::vector<Bucket>::iterator it;
+  for (it = buckets.end(); it != buckets.begin(); --it) {
+    // Compute contribution to number of bytes accounted for in percent, rounded
+    // down due to integer division. Buckets are iterated by descending size,
+    // so later buckets cannot have a larger contribution than this one.
+    accounted_for += buckets.front().size;
+    size_t contribution = buckets.front().size * 100 / accounted_for;
+    if (contribution == 0)
+      break;
+
+    // Put the largest bucket in [begin, it) at |it - 1| and max-heapify
+    // [begin, it - 1). This puts the next largest bucket at |buckets.front()|.
+    std::pop_heap(buckets.begin(), it);
+  }
+
+  // At this point, |buckets| looks like this (numbers are bucket sizes):
+  //
+  // <-- max-heap of small buckets --->
+  //                                  <-- large buckets by ascending size -->
+  // [ 19 | 11 | 13 | 7 | 2 | 5 | ... | 83 | 89 | 97 ]
+  //   ^                                ^              ^
+  //   |                                |              |
+  //   begin()                          it             end()
+
+  // Discard the long tail of buckets that contribute less than a percent.
+  buckets.erase(buckets.begin(), it);
+
+  return buckets;
 }
 
 }  // namespace
 
+bool operator<(Entry lhs, Entry rhs) {
+  // There is no need to compare |size|. If the backtrace and type name are
+  // equal then the sizes must be equal as well.
+  return std::tie(lhs.stack_frame_id, lhs.type_id) <
+         std::tie(rhs.stack_frame_id, rhs.type_id);
+}
+
 HeapDumpWriter::HeapDumpWriter(StackFrameDeduplicator* stack_frame_deduplicator,
                                TypeNameDeduplicator* type_name_deduplicator)
-    : traced_value_(new TracedValue()),
-      stack_frame_deduplicator_(stack_frame_deduplicator),
+    : stack_frame_deduplicator_(stack_frame_deduplicator),
       type_name_deduplicator_(type_name_deduplicator) {}
 
 HeapDumpWriter::~HeapDumpWriter() {}
 
-void HeapDumpWriter::InsertAllocation(const AllocationContext& context,
-                                      size_t size) {
-  bytes_by_context_[context] += size;
+bool HeapDumpWriter::AddEntryForBucket(const Bucket& bucket) {
+  // The contexts in the bucket are all different, but the [begin, cursor) range
+  // is equal for all contexts in the bucket, and the type names are the same if
+  // |is_broken_down_by_type_name| is set.
+  DCHECK(!bucket.bytes_by_context.empty());
+
+  const AllocationContext* context = bucket.bytes_by_context.front().first;
+
+  const char* const* backtrace_begin = std::begin(context->backtrace.frames);
+  const char* const* backtrace_end = backtrace_begin + bucket.backtrace_cursor;
+  DCHECK_LE(bucket.backtrace_cursor, arraysize(context->backtrace.frames));
+
+  Entry entry;
+  entry.stack_frame_id =
+      stack_frame_deduplicator_->Insert(backtrace_begin, backtrace_end);
+
+  // Deduplicate the type name, or use ID -1 if type name is not set.
+  entry.type_id = bucket.is_broken_down_by_type_name
+                      ? type_name_deduplicator_->Insert(context->type_name)
+                      : -1;
+
+  entry.size = bucket.size;
+
+  auto position_and_inserted = entries_.insert(entry);
+  return position_and_inserted.second;
 }
 
-scoped_refptr<TracedValue> HeapDumpWriter::WriteHeapDump() {
-  // Group by backtrace and by type ID, and compute the total heap size while
-  // iterating anyway.
-  size_t total_size = 0;
-  hash_map<Backtrace, size_t> bytes_by_backtrace;
-  hash_map<const char*, size_t> bytes_by_type;
-
-  for (auto context_size : bytes_by_context_) {
-    total_size += context_size.second;
-    bytes_by_backtrace[context_size.first.backtrace] += context_size.second;
-    bytes_by_type[context_size.first.type_name] += context_size.second;
-  }
+void HeapDumpWriter::BreakDown(const Bucket& bucket) {
+  auto by_backtrace = BreakDownBy(bucket, BreakDownMode::kByBacktrace);
+  auto by_type_name = BreakDownBy(bucket, BreakDownMode::kByTypeName);
 
-  auto sorted_bytes_by_backtrace = SortBySizeDescending(bytes_by_backtrace);
-  auto sorted_bytes_by_type = SortBySizeDescending(bytes_by_type);
+  // Insert entries for the buckets. If a bucket was not present before, it has
+  // not been broken down before, so recursively continue breaking down in that
+  // case. There might be multiple routes to the same entry (first break down
+  // by type name, then by backtrace, or first by backtrace and then by type),
+  // so a set is used to avoid dumping and breaking down entries more than once.
 
-  traced_value_->BeginArray("entries");
+  for (const Bucket& subbucket : by_backtrace)
+    if (AddEntryForBucket(subbucket))
+      BreakDown(subbucket);
 
-  // The global size, no column specified.
-  {
-    traced_value_->BeginDictionary();
-    WriteSize(total_size);
-    traced_value_->EndDictionary();
-  }
+  for (const Bucket& subbucket : by_type_name)
+    if (AddEntryForBucket(subbucket))
+      BreakDown(subbucket);
+}
 
-  // Entries with the size per backtrace.
-  for (const auto& entry : sorted_bytes_by_backtrace) {
-    traced_value_->BeginDictionary();
-    // Insert a forward reference to the backtrace that will be written to the
-    // |stackFrames| dictionary later on.
-    int idx = stack_frame_deduplicator_->Insert(std::begin(entry.first.frames),
-                                                std::end(entry.first.frames));
-    WriteStackFrameIndex(idx);
-    WriteSize(entry.second);
-    traced_value_->EndDictionary();
+const std::set<Entry>& HeapDumpWriter::Summarize(
+    const hash_map<AllocationContext, size_t>& bytes_by_context) {
+  // Start with one bucket that represents the entire heap. Iterate by
+  // reference, because the allocation contexts are going to point to allocation
+  // contexts stored in |bytes_by_context|.
+  Bucket root_bucket;
+  for (const auto& context_and_size : bytes_by_context) {
+    const AllocationContext* context = &context_and_size.first;
+    const size_t size = context_and_size.second;
+    root_bucket.bytes_by_context.push_back(std::make_pair(context, size));
+    root_bucket.size += size;
   }
 
-  // Entries with the size per type.
-  for (const auto& entry : sorted_bytes_by_type) {
-    traced_value_->BeginDictionary();
-    // Insert a forward reference to the type name that will be written to the
-    // trace when it is flushed.
-    WriteTypeId(type_name_deduplicator_->Insert(entry.first));
-    WriteSize(entry.second);
-    traced_value_->EndDictionary();
-  }
+  AddEntryForBucket(root_bucket);
 
-  traced_value_->EndArray();  // "entries"
+  // Recursively break down the heap and fill |entries_| with entries to dump.
+  BreakDown(root_bucket);
 
-  return traced_value_;
+  return entries_;
 }
 
-void HeapDumpWriter::WriteStackFrameIndex(int index) {
-  if (index == -1) {
-    // An empty backtrace (which will have index -1) is represented by the empty
-    // string, because there is no leaf frame to reference in |stackFrames|.
-    traced_value_->SetString("bt", "");
-  } else {
-    // Format index of the leaf frame as a string, because |stackFrames| is a
-    // dictionary, not an array.
-    SStringPrintf(&buffer_, "%i", index);
-    traced_value_->SetString("bt", buffer_);
+scoped_refptr<TracedValue> Serialize(const std::set<Entry>& entries) {
+  std::string buffer;
+  scoped_refptr<TracedValue> traced_value = new TracedValue;
+
+  traced_value->BeginArray("entries");
+
+  for (const Entry& entry : entries) {
+    traced_value->BeginDictionary();
+
+    // Format size as hexadecimal string into |buffer|.
+    SStringPrintf(&buffer, "%" PRIx64, static_cast<uint64_t>(entry.size));
+    traced_value->SetString("size", buffer);
+
+    if (entry.stack_frame_id == -1) {
+      // An empty backtrace (which will have ID -1) is represented by the empty
+      // string, because there is no leaf frame to reference in |stackFrames|.
+      traced_value->SetString("bt", "");
+    } else {
+      // Format index of the leaf frame as a string, because |stackFrames| is a
+      // dictionary, not an array.
+      SStringPrintf(&buffer, "%i", entry.stack_frame_id);
+      traced_value->SetString("bt", buffer);
+    }
+
+    // Type ID -1 (cumulative size for all types) is represented by the absence
+    // of the "type" key in the dictionary.
+    if (entry.type_id != -1) {
+      // Format the type ID as a string.
+      SStringPrintf(&buffer, "%i", entry.type_id);
+      traced_value->SetString("type", buffer);
+    }
+
+    traced_value->EndDictionary();
   }
-}
 
-void HeapDumpWriter::WriteTypeId(int type_id) {
-  // Format the type ID as a string.
-  SStringPrintf(&buffer_, "%i", type_id);
-  traced_value_->SetString("type", buffer_);
+  traced_value->EndArray();  // "entries"
+  return traced_value;
 }
 
-void HeapDumpWriter::WriteSize(size_t size) {
-  // Format size as hexadecimal string into |buffer_|.
-  SStringPrintf(&buffer_, "%" PRIx64, static_cast<uint64_t>(size));
-  traced_value_->SetString("size", buffer_);
+}  // namespace internal
+
+scoped_refptr<TracedValue> ExportHeapDump(
+    const hash_map<AllocationContext, size_t>& bytes_by_size,
+    StackFrameDeduplicator* stack_frame_deduplicator,
+    TypeNameDeduplicator* type_name_deduplicator) {
+  internal::HeapDumpWriter writer(stack_frame_deduplicator,
+                                  type_name_deduplicator);
+  return Serialize(writer.Summarize(bytes_by_size));
 }
 
 }  // namespace trace_event
diff --git a/base/trace_event/heap_profiler_heap_dump_writer.h b/base/trace_event/heap_profiler_heap_dump_writer.h
index c3a7767..71377cc 100644
--- a/base/trace_event/heap_profiler_heap_dump_writer.h
+++ b/base/trace_event/heap_profiler_heap_dump_writer.h
@@ -5,7 +5,7 @@
 #ifndef BASE_TRACE_EVENT_HEAP_PROFILER_HEAP_DUMP_WRITER_H_
 #define BASE_TRACE_EVENT_HEAP_PROFILER_HEAP_DUMP_WRITER_H_
 
-#include <string>
+#include <set>
 
 #include "base/base_export.h"
 #include "base/containers/hash_tables.h"
@@ -20,11 +20,43 @@ class StackFrameDeduplicator;
 class TracedValue;
 class TypeNameDeduplicator;
 
+// Aggregates |bytes_by_context|, recursively breaks down the heap, and returns
+// a traced value with an "entries" array that can be dumped in the trace log,
+// following the format described in https://goo.gl/KY7zVE. The number of
+// entries is kept reasonable because long tails are not included.
+BASE_EXPORT scoped_refptr<TracedValue> ExportHeapDump(
+    const hash_map<AllocationContext, size_t>& bytes_by_context,
+    StackFrameDeduplicator* stack_frame_deduplicator,
+    TypeNameDeduplicator* type_name_deduplicator);
+
+namespace internal {
+
+namespace {
+struct Bucket;
+}
+
+// An entry in the "entries" array as described in https://goo.gl/KY7zVE.
+struct BASE_EXPORT Entry {
+  size_t size;
+
+  // References a backtrace in the stack frame deduplicator. -1 means empty
+  // backtrace (the root of the tree).
+  int stack_frame_id;
+
+  // References a type name in the type name deduplicator. -1 indicates that
+  // the size is the cumulative size for all types (the root of the tree).
+  int type_id;
+};
+
+// Comparison operator to enable putting |Entry| in a |std::set|.
+BASE_EXPORT bool operator<(Entry lhs, Entry rhs);
+
+// Serializes entries to an "entries" array in a traced value.
+BASE_EXPORT scoped_refptr<TracedValue> Serialize(const std::set<Entry>& dump);
+
 // Helper class to dump a snapshot of an |AllocationRegister| or other heap
 // bookkeeping structure into a |TracedValue|. This class is intended to be
-// used as a one-shot local instance on the stack. To write heap dumps, call
-// |InsertAllocation| for every captured allocation, then call |WriteHeapDump|
-// to do the processing and generate a heap dump value for the trace log.
+// used as a one-shot local instance on the stack.
 class BASE_EXPORT HeapDumpWriter {
  public:
   // The |StackFrameDeduplicator| and |TypeNameDeduplicator| are not owned. The
@@ -32,30 +64,27 @@ class BASE_EXPORT HeapDumpWriter {
   // the dump writer.
   HeapDumpWriter(StackFrameDeduplicator* stack_frame_deduplicator,
                  TypeNameDeduplicator* type_name_deduplicator);
-  ~HeapDumpWriter();
 
-  // Inserts information from which the heap dump will be generated. This method
-  // does minimal processing, so it can be called when a lock is held.
-  void InsertAllocation(const AllocationContext& context, size_t size);
+  ~HeapDumpWriter();
 
-  // Aggregates allocations and writes an "entries" array to a traced value. See
-  // https://goo.gl/jYN4Zn for a description of the format.
-  scoped_refptr<TracedValue> WriteHeapDump();
+  // Aggregates allocations to compute the total size of the heap, then breaks
+  // down the heap recursively. This produces the values that should be dumped
+  // in the "entries" array. The number of entries is kept reasonable because
+  // long tails are not included. Use |Serialize| to convert to a traced value.
+  const std::set<Entry>& Summarize(
+      const hash_map<AllocationContext, size_t>& bytes_by_context);
 
  private:
-  // Writes a "bt" key that references a stack frame in the |stackFrames|
-  // dictionary.
-  void WriteStackFrameIndex(int index);
+  // Inserts an |Entry| for |Bucket| into |entries_|. Returns false if the
+  // entry was present before, true if it was not.
+  bool AddEntryForBucket(const Bucket& bucket);
 
-  // Writes a "type" key with the stringified type ID.
-  void WriteTypeId(int type_id);
+  // Recursively breaks down a bucket into smaller buckets and adds entries for
+  // the buckets worth dumping to |entries_|.
+  void BreakDown(const Bucket& bucket);
 
-  // Writes a "size" key with value |size| as a hexidecimal string to the traced
-  // value.
-  void WriteSize(size_t size);
-
-  // The value that this heap dumper writes to.
-  const scoped_refptr<TracedValue> traced_value_;
+  // The collection of entries that is filled by |Summarize|.
+  std::set<Entry> entries_;
 
   // Helper for generating the |stackFrames| dictionary. Not owned, must outlive
   // this heap dump writer instance.
@@ -65,17 +94,10 @@ class BASE_EXPORT HeapDumpWriter {
   // dump writer instance.
   TypeNameDeduplicator* const type_name_deduplicator_;
 
-  // A map of allocation context to the number of bytes allocated for that
-  // context.
-  hash_map<AllocationContext, size_t> bytes_by_context_;
-
-  // Buffer for converting integers into strings, that is re-used throughout the
-  // dump.
-  std::string buffer_;
-
   DISALLOW_COPY_AND_ASSIGN(HeapDumpWriter);
 };
 
+}  // namespace internal
 }  // namespace trace_event
 }  // namespace base
 
diff --git a/base/trace_event/heap_profiler_heap_dump_writer_unittest.cc b/base/trace_event/heap_profiler_heap_dump_writer_unittest.cc
index 841611a..ba632da 100644
--- a/base/trace_event/heap_profiler_heap_dump_writer_unittest.cc
+++ b/base/trace_event/heap_profiler_heap_dump_writer_unittest.cc
@@ -2,12 +2,11 @@
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.
 
-#include <stdlib.h>
-
-#include <iterator>
+#include <set>
 #include <string>
 
 #include "base/json/json_reader.h"
+#include "base/macros.h"
 #include "base/memory/ref_counted.h"
 #include "base/memory/scoped_ptr.h"
 #include "base/trace_event/heap_profiler_allocation_context.h"
@@ -35,27 +34,116 @@ const char kString[] = "string";
 
 namespace base {
 namespace trace_event {
+namespace internal {
 
-// Asserts that an integer stored in the json as a string has the correct value.
-void AssertIntEq(const DictionaryValue* entry,
-                 const char* key,
-                 int expected_value) {
-  std::string str;
-  ASSERT_TRUE(entry->GetString(key, &str));
-  ASSERT_EQ(expected_value, atoi(str.c_str()));
-}
-
-scoped_ptr<Value> DumpAndReadBack(HeapDumpWriter* writer) {
-  scoped_refptr<TracedValue> traced_value = writer->WriteHeapDump();
+scoped_ptr<const Value> WriteAndReadBack(const std::set<Entry>& entries) {
+  scoped_refptr<TracedValue> traced_value = Serialize(entries);
   std::string json;
   traced_value->AppendAsTraceFormat(&json);
   return JSONReader::Read(json);
 }
 
+scoped_ptr<const DictionaryValue> WriteAndReadBackEntry(Entry entry) {
+  std::set<Entry> input_entries;
+  input_entries.insert(entry);
+
+  scoped_ptr<const Value> json_dict = WriteAndReadBack(input_entries);
+
+  // Note: Ideally these should use |ASSERT_TRUE| instead of |EXPECT_TRUE|, but
+  // |ASSERT_TRUE| can only be used in void functions.
+  const DictionaryValue* dictionary;
+  EXPECT_TRUE(json_dict->GetAsDictionary(&dictionary));
+
+  const ListValue* json_entries;
+  EXPECT_TRUE(dictionary->GetList("entries", &json_entries));
+
+  const DictionaryValue* json_entry;
+  EXPECT_TRUE(json_entries->GetDictionary(0, &json_entry));
+
+  return json_entry->CreateDeepCopy();
+}
+
+// Given a desired stack frame ID and type ID, looks up the entry in the set and
+// asserts that it is present and has the expected size.
+void AssertSizeEq(const std::set<Entry>& entries,
+                  int stack_frame_id,
+                  int type_id,
+                  size_t expected_size) {
+  // The comparison operator for |Entry| does not take size into account, so by
+  // setting only stack frame ID and type ID, the real entry can be found.
+  Entry entry;
+  entry.stack_frame_id = stack_frame_id;
+  entry.type_id = type_id;
+  auto it = entries.find(entry);
+
+  ASSERT_NE(entries.end(), it) << "No entry found for sf = " << stack_frame_id
+                               << ", type = " << type_id << ".";
+  ASSERT_EQ(expected_size, it->size) << "Wrong size for sf = " << stack_frame_id
+                                     << ", type = " << type_id << ".";
+}
+
+TEST(HeapDumpWriterTest, BacktraceIndex) {
+  Entry entry;
+  entry.stack_frame_id = -1;  // -1 means empty backtrace.
+  entry.type_id = 0;
+  entry.size = 1;
+
+  scoped_ptr<const DictionaryValue> json_entry = WriteAndReadBackEntry(entry);
+
+  // For an empty backtrace, the "bt" key cannot reference a stack frame.
+  // Instead it should be set to the empty string.
+  std::string backtrace_index;
+  ASSERT_TRUE(json_entry->GetString("bt", &backtrace_index));
+  ASSERT_EQ("", backtrace_index);
+
+  // Also verify that a non-negative backtrace index is dumped properly.
+  entry.stack_frame_id = 2;
+  json_entry = WriteAndReadBackEntry(entry);
+  ASSERT_TRUE(json_entry->GetString("bt", &backtrace_index));
+  ASSERT_EQ("2", backtrace_index);
+}
+
+TEST(HeapDumpWriterTest, TypeId) {
+  Entry entry;
+  entry.type_id = -1;  // -1 means sum over all types.
+  entry.stack_frame_id = 0;
+  entry.size = 1;
+
+  scoped_ptr<const DictionaryValue> json_entry = WriteAndReadBackEntry(entry);
+
+  // Entries for the cumulative size of all types should not have the "type"
+  // key set.
+  ASSERT_FALSE(json_entry->HasKey("type"));
+
+  // Also verify that a non-negative type ID is dumped properly.
+  entry.type_id = 2;
+  json_entry = WriteAndReadBackEntry(entry);
+  std::string type_id;
+  ASSERT_TRUE(json_entry->GetString("type", &type_id));
+  ASSERT_EQ("2", type_id);
+}
+
+TEST(HeapDumpWriterTest, SizeIsHexadecimalString) {
+  // Take a number between 2^63 and 2^64 (or between 2^31 and 2^32 if |size_t|
+  // is not 64 bits).
+  const size_t large_value =
+      sizeof(size_t) == 8 ? 0xffffffffffffffc5 : 0xffffff9d;
+  const char* large_value_str =
+      sizeof(size_t) == 8 ? "ffffffffffffffc5" : "ffffff9d";
+  Entry entry;
+  entry.type_id = 0;
+  entry.stack_frame_id = 0;
+  entry.size = large_value;
+
+  scoped_ptr<const DictionaryValue> json_entry = WriteAndReadBackEntry(entry);
+
+  std::string size;
+  ASSERT_TRUE(json_entry->GetString("size", &size));
+  ASSERT_EQ(large_value_str, size);
+}
+
 TEST(HeapDumpWriterTest, BacktraceTypeNameTable) {
-  auto sf_deduplicator = make_scoped_refptr(new StackFrameDeduplicator);
-  auto tn_deduplicator = make_scoped_refptr(new TypeNameDeduplicator);
-  HeapDumpWriter writer(sf_deduplicator.get(), tn_deduplicator.get());
+  hash_map<AllocationContext, size_t> bytes_by_context;
 
   AllocationContext ctx = AllocationContext::Empty();
   ctx.backtrace.frames[0] = kBrowserMain;
@@ -63,27 +151,23 @@ TEST(HeapDumpWriterTest, BacktraceTypeNameTable) {
   ctx.type_name = kInt;
 
   // 10 bytes with context { type: int, bt: [BrowserMain, CreateWidget] }.
-  writer.InsertAllocation(ctx, 2);
-  writer.InsertAllocation(ctx, 3);
-  writer.InsertAllocation(ctx, 5);
+  bytes_by_context[ctx] = 10;
 
   ctx.type_name = kBool;
 
   // 18 bytes with context { type: bool, bt: [BrowserMain, CreateWidget] }.
-  writer.InsertAllocation(ctx, 7);
-  writer.InsertAllocation(ctx, 11);
+  bytes_by_context[ctx] = 18;
 
   ctx.backtrace.frames[0] = kRendererMain;
   ctx.backtrace.frames[1] = kInitialize;
 
   // 30 bytes with context { type: bool, bt: [RendererMain, Initialize] }.
-  writer.InsertAllocation(ctx, 13);
-  writer.InsertAllocation(ctx, 17);
+  bytes_by_context[ctx] = 30;
 
   ctx.type_name = kString;
 
   // 19 bytes with context { type: string, bt: [RendererMain, Initialize] }.
-  writer.InsertAllocation(ctx, 19);
+  bytes_by_context[ctx] = 19;
 
   // At this point the heap looks like this:
   //
@@ -95,143 +179,56 @@ TEST(HeapDumpWriterTest, BacktraceTypeNameTable) {
   // +--------+--------------------+-----------------+-----+
   // | Sum    |                 28 |              49 |  77 |
 
-  scoped_ptr<Value> heap_dump = DumpAndReadBack(&writer);
-
-  // The json heap dump should at least include this:
-  // {
-  //   "entries": [
-  //     { "size": "4d" },                                    // 77 = 0x4d.
-  //     { "size": "31", "bt": "id_of(Init <- RenMain)" },    // 49 = 0x31.
-  //     { "size": "1c", "bt": "id_of(CrWidget <- BrMain)" }, // 28 = 0x1c.
-  //     { "size": "30", "type": "id_of(bool)" },             // 48 = 0x30.
-  //     { "size": "13", "type": "id_of(string)" },           // 19 = 0x13.
-  //     { "size": "a", "type": "id_of(int)" }                // 10 = 0xa.
-  //   ]
-  // }
+  auto sf_deduplicator = make_scoped_refptr(new StackFrameDeduplicator);
+  auto tn_deduplicator = make_scoped_refptr(new TypeNameDeduplicator);
+  HeapDumpWriter writer(sf_deduplicator.get(), tn_deduplicator.get());
+  const std::set<Entry>& dump = writer.Summarize(bytes_by_context);
 
   // Get the indices of the backtraces and types by adding them again to the
   // deduplicator. Because they were added before, the same number is returned.
   StackFrame bt0[] = {kRendererMain, kInitialize};
   StackFrame bt1[] = {kBrowserMain, kCreateWidget};
-  int bt_renderer_main_initialize =
-      sf_deduplicator->Insert(std::begin(bt0), std::end(bt0));
-  int bt_browser_main_create_widget =
-      sf_deduplicator->Insert(std::begin(bt1), std::end(bt1));
+  int bt_renderer_main = sf_deduplicator->Insert(bt0, bt0 + 1);
+  int bt_browser_main = sf_deduplicator->Insert(bt1, bt1 + 1);
+  int bt_renderer_main_initialize = sf_deduplicator->Insert(bt0, bt0 + 2);
+  int bt_browser_main_create_widget = sf_deduplicator->Insert(bt1, bt1 + 2);
   int type_id_int = tn_deduplicator->Insert(kInt);
   int type_id_bool = tn_deduplicator->Insert(kBool);
   int type_id_string = tn_deduplicator->Insert(kString);
 
-  const DictionaryValue* dictionary;
-  ASSERT_TRUE(heap_dump->GetAsDictionary(&dictionary));
-
-  const ListValue* entries;
-  ASSERT_TRUE(dictionary->GetList("entries", &entries));
-
-  // Keep counters to verify that every entry is present exactly once.
-  int x4d_seen = 0;
-  int x31_seen = 0;
-  int x1c_seen = 0;
-  int x30_seen = 0;
-  int x13_seen = 0;
-  int xa_seen = 0;
-
-  for (const Value* entry_as_value : *entries) {
-    const DictionaryValue* entry;
-    ASSERT_TRUE(entry_as_value->GetAsDictionary(&entry));
-
-    // The "size" field, not to be confused with |entry->size()| which is the
-    // number of elements in the dictionary.
-    std::string size;
-    ASSERT_TRUE(entry->GetString("size", &size));
-
-    if (size == "4d") {
-      // Total size, should not include any other field.
-      ASSERT_EQ(1u, entry->size());  // Dictionary must have one element.
-      x4d_seen++;
-    } else if (size == "31") {
-      // Entry for backtrace "Initialize <- RendererMain".
-      ASSERT_EQ(2u, entry->size());  // Dictionary must have two elements.
-      AssertIntEq(entry, "bt", bt_renderer_main_initialize);
-      x31_seen++;
-    } else if (size == "1c") {
-      // Entry for backtrace "CreateWidget <- BrowserMain".
-      ASSERT_EQ(2u, entry->size());  // Dictionary must have two elements.
-      AssertIntEq(entry, "bt", bt_browser_main_create_widget);
-      x1c_seen++;
-    } else if (size == "30") {
-      // Entry for type bool.
-      ASSERT_EQ(2u, entry->size());  // Dictionary must have two elements.
-      AssertIntEq(entry, "type", type_id_bool);
-      x30_seen++;
-    } else if (size == "13") {
-      // Entry for type string.
-      ASSERT_EQ(2u, entry->size());  // Dictionary must have two elements.
-      AssertIntEq(entry, "type", type_id_string);
-      x13_seen++;
-    } else if (size == "a") {
-      // Entry for type int.
-      ASSERT_EQ(2u, entry->size());  // Dictionary must have two elements.
-      AssertIntEq(entry, "type", type_id_int);
-      xa_seen++;
-    }
-  }
-
-  ASSERT_EQ(1, x4d_seen);
-  ASSERT_EQ(1, x31_seen);
-  ASSERT_EQ(1, x1c_seen);
-  ASSERT_EQ(1, x30_seen);
-  ASSERT_EQ(1, x13_seen);
-  ASSERT_EQ(1, xa_seen);
+  // Full heap should have size 77.
+  AssertSizeEq(dump, -1, -1, 77);
+
+  // 49 bytes were allocated in RendererMain and children. Also check the type
+  // breakdown.
+  AssertSizeEq(dump, bt_renderer_main, -1, 49);
+  AssertSizeEq(dump, bt_renderer_main, type_id_bool, 30);
+  AssertSizeEq(dump, bt_renderer_main, type_id_string, 19);
+
+  // 28 bytes were allocated in BrowserMain and children. Also check the type
+  // breakdown.
+  AssertSizeEq(dump, bt_browser_main, -1, 28);
+  AssertSizeEq(dump, bt_browser_main, type_id_int, 10);
+  AssertSizeEq(dump, bt_browser_main, type_id_bool, 18);
+
+  // In this test all bytes are allocated in leaf nodes, so check again one
+  // level deeper.
+  AssertSizeEq(dump, bt_renderer_main_initialize, -1, 49);
+  AssertSizeEq(dump, bt_renderer_main_initialize, type_id_bool, 30);
+  AssertSizeEq(dump, bt_renderer_main_initialize, type_id_string, 19);
+  AssertSizeEq(dump, bt_browser_main_create_widget, -1, 28);
+  AssertSizeEq(dump, bt_browser_main_create_widget, type_id_int, 10);
+  AssertSizeEq(dump, bt_browser_main_create_widget, type_id_bool, 18);
+
+  // The type breakdown of the entrie heap should have been dumped as well.
+  AssertSizeEq(dump, -1, type_id_int, 10);
+  AssertSizeEq(dump, -1, type_id_bool, 48);
+  AssertSizeEq(dump, -1, type_id_string, 19);
 }
 
-// Test that the entry for the empty backtrace ends up in the json with the
-// "bt" field set to the empty string. Also test that an entry for "unknown
-// type" (nullptr type name) does not dereference the null pointer when writing
-// the type names, and that the type ID is 0.
-TEST(HeapDumpWriterTest, EmptyBacktraceIndexIsEmptyString) {
-  auto sf_deduplicator = make_scoped_refptr(new StackFrameDeduplicator);
-  auto tn_deduplicator = make_scoped_refptr(new TypeNameDeduplicator);
-  HeapDumpWriter writer(sf_deduplicator.get(), tn_deduplicator.get());
-
-  // A context with empty backtrace and unknown type (nullptr).
-  AllocationContext ctx = AllocationContext::Empty();
-
-  writer.InsertAllocation(ctx, 1);
-
-  scoped_ptr<Value> heap_dump = DumpAndReadBack(&writer);
-
-  const DictionaryValue* dictionary;
-  ASSERT_TRUE(heap_dump->GetAsDictionary(&dictionary));
-
-  const ListValue* entries;
-  ASSERT_TRUE(dictionary->GetList("entries", &entries));
-
-  int empty_backtrace_seen = 0;
-  int unknown_type_seen = 0;
-
-  for (const Value* entry_as_value : *entries) {
-    const DictionaryValue* entry;
-    ASSERT_TRUE(entry_as_value->GetAsDictionary(&entry));
-
-    // Note that |entry->size()| is the number of elements in the dictionary.
-    if (entry->HasKey("bt") && entry->size() == 2) {
-      std::string backtrace;
-      ASSERT_TRUE(entry->GetString("bt", &backtrace));
-      ASSERT_EQ("", backtrace);
-      empty_backtrace_seen++;
-    }
-
-    if (entry->HasKey("type") && entry->size() == 2) {
-      std::string type_id;
-      ASSERT_TRUE(entry->GetString("type", &type_id));
-      ASSERT_EQ("0", type_id);
-      unknown_type_seen++;
-    }
-  }
-
-  ASSERT_EQ(1, unknown_type_seen);
-  ASSERT_EQ(1, empty_backtrace_seen);
-}
+// TODO(ruuda): Verify that cumulative sizes are computed correctly.
+// TODO(ruuda): Verify that insignificant values are not dumped.
 
+}  // namespace internal
 }  // namespace trace_event
 }  // namespace base
author	ruuda <ruuda@google.com>	2015-12-10 11:54:44 -0800
committer	Commit bot <commit-bot@chromium.org>	2015-12-10 19:55:59 +0000
commit	3314e2e367138427f3aaccd3da030413c045ab56 (patch)
tree	a34c20a95f7bac6e7445212c31c538d4e9eff1aa /base/trace_event
parent	4a36e67ea9694e6c01a3df6a1f6b967453c506c2 (diff)
download	chromium_src-3314e2e367138427f3aaccd3da030413c045ab56.zip chromium_src-3314e2e367138427f3aaccd3da030413c045ab56.tar.gz chromium_src-3314e2e367138427f3aaccd3da030413c045ab56.tar.bz2