4 files changed, 446 insertions, 264 deletions
diff --git a/base/trace_event/heap_profiler_allocation_context.h b/base/trace_event/heap_profiler_allocation_context.h
index 6a46d03..a7f1e9d 100644
--- a/base/trace_event/heap_profiler_allocation_context.h
+++ b/base/trace_event/heap_profiler_allocation_context.h
@@ -82,12 +82,12 @@ bool BASE_EXPORT operator==(const AllocationContext& lhs,
 namespace BASE_HASH_NAMESPACE {
 
 template <>
-struct hash<base::trace_event::Backtrace> {
+struct BASE_EXPORT hash<base::trace_event::Backtrace> {
   size_t operator()(const base::trace_event::Backtrace& backtrace) const;
 };
 
 template <>
-struct hash<base::trace_event::AllocationContext> {
+struct BASE_EXPORT hash<base::trace_event::AllocationContext> {
   size_t operator()(const base::trace_event::AllocationContext& context) const;
 };
 
diff --git a/base/trace_event/heap_profiler_heap_dump_writer.cc b/base/trace_event/heap_profiler_heap_dump_writer.cc
index b0ed86f..e37c540 100644
--- a/base/trace_event/heap_profiler_heap_dump_writer.cc
+++ b/base/trace_event/heap_profiler_heap_dump_writer.cc
@@ -6,128 +6,291 @@
 
 #include <algorithm>
 #include <iterator>
+#include <tuple>
 #include <utility>
 #include <vector>
 
 #include "base/format_macros.h"
+#include "base/logging.h"
 #include "base/strings/stringprintf.h"
 #include "base/trace_event/heap_profiler_stack_frame_deduplicator.h"
 #include "base/trace_event/heap_profiler_type_name_deduplicator.h"
 #include "base/trace_event/trace_event_argument.h"
 
+// Most of what the |HeapDumpWriter| does is aggregating detailed information
+// about the heap and deciding what to dump. The Input to this process is a list
+// of |AllocationContext|s and size pairs.
+//
+// The pairs are grouped into |Bucket|s. A bucket is a group of (context, size)
+// pairs where the properties of the contexts share a prefix. (Type name is
+// considered a list of length one here.) First all pairs are put into one
+// bucket that represents the entire heap. Then this bucket is recursively
+// broken down into smaller buckets. Each bucket keeps track of whether further
+// breakdown is possible.
+
 namespace base {
 namespace trace_event {
-
+namespace internal {
 namespace {
 
-template <typename T>
-bool PairSizeGt(const std::pair<T, size_t>& lhs,
-                const std::pair<T, size_t>& rhs) {
-  return lhs.second > rhs.second;
+// Denotes a property of |AllocationContext| to break down by.
+enum class BreakDownMode { kByBacktrace, kByTypeName };
+
+// A group of bytes for which the context shares a prefix.
+struct Bucket {
+  Bucket() : size(0), backtrace_cursor(0), is_broken_down_by_type_name(false) {}
+
+  std::vector<std::pair<const AllocationContext*, size_t>> bytes_by_context;
+
+  // The sum of the sizes of |bytes_by_context|.
+  size_t size;
+
+  // The index of the stack frame that has not yet been broken down by. For all
+  // elements in this bucket, the stack frames 0 up to (but not including) the
+  // cursor, must be equal.
+  size_t backtrace_cursor;
+
+  // When true, the type name for all elements in this bucket must be equal.
+  bool is_broken_down_by_type_name;
+};
+
+// Comparison operator to order buckets by their size.
+bool operator<(const Bucket& lhs, const Bucket& rhs) {
+  return lhs.size < rhs.size;
+}
+
+// Groups the allocations in the bucket by |breakBy|. The buckets in the
+// returned list will have |backtrace_cursor| advanced or
+// |is_broken_down_by_type_name| set depending on the property to group by.
+std::vector<Bucket> GetSubbuckets(const Bucket& bucket, BreakDownMode breakBy) {
+  base::hash_map<const char*, Bucket> breakdown;
+
+  if (breakBy == BreakDownMode::kByBacktrace) {
+    for (const auto& context_and_size : bucket.bytes_by_context) {
+      const Backtrace& backtrace = context_and_size.first->backtrace;
+      const char* const* begin = std::begin(backtrace.frames);
+      const char* const* end = std::end(backtrace.frames);
+      const char* const* cursor = begin + bucket.backtrace_cursor;
+
+      // The backtrace in the context is padded with null pointers, but these
+      // should not be considered for breakdown. Adjust end to point past the
+      // last non-null frame.
+      while (begin != end && *(end - 1) == nullptr)
+        end--;
+
+      DCHECK_LE(cursor, end);
+
+      if (cursor != end) {
+        Bucket& subbucket = breakdown[*cursor];
+        subbucket.size += context_and_size.second;
+        subbucket.bytes_by_context.push_back(context_and_size);
+        subbucket.backtrace_cursor = bucket.backtrace_cursor + 1;
+        subbucket.is_broken_down_by_type_name =
+            bucket.is_broken_down_by_type_name;
+        DCHECK_GT(subbucket.size, 0u);
+      }
+    }
+  } else if (breakBy == BreakDownMode::kByTypeName) {
+    if (!bucket.is_broken_down_by_type_name) {
+      for (const auto& context_and_size : bucket.bytes_by_context) {
+        const AllocationContext* context = context_and_size.first;
+        Bucket& subbucket = breakdown[context->type_name];
+        subbucket.size += context_and_size.second;
+        subbucket.bytes_by_context.push_back(context_and_size);
+        subbucket.backtrace_cursor = bucket.backtrace_cursor;
+        subbucket.is_broken_down_by_type_name = true;
+        DCHECK_GT(subbucket.size, 0u);
+      }
+    }
+  }
+
+  std::vector<Bucket> buckets;
+  buckets.reserve(breakdown.size());
+  for (auto key_bucket : breakdown)
+    buckets.push_back(key_bucket.second);
+
+  return buckets;
 }
 
-// Converts a |hash_map<T, size_t>| into a vector of (T, size_t) pairs that is
-// ordered from high |size_t| to low |size_t|.
-template <typename T>
-std::vector<std::pair<T, size_t>> SortBySizeDescending(
-    const hash_map<T, size_t>& grouped) {
-  std::vector<std::pair<T, size_t>> sorted;
-  sorted.reserve(grouped.size());
-  std::copy(grouped.begin(), grouped.end(), std::back_inserter(sorted));
-  std::sort(sorted.begin(), sorted.end(), PairSizeGt<T>);
-  return sorted;
+// Breaks down the bucket by |breakBy|. Returns only buckets that contribute
+// significantly to the total size. The long tail is omitted.
+std::vector<Bucket> BreakDownBy(const Bucket& bucket, BreakDownMode breakBy) {
+  std::vector<Bucket> buckets = GetSubbuckets(bucket, breakBy);
+
+  // Ensure that |buckets| is a max-heap (the data structure, not memory heap),
+  // so its front contains the largest bucket. Buckets should be iterated
+  // ordered by size, but sorting the vector is overkill because the long tail
+  // of small buckets will be discarded. By using a max-heap, the optimal case
+  // where all but the first bucket are discarded is O(n). The worst case where
+  // no bucket is discarded is doing a heap sort, which is O(n log n).
+  std::make_heap(buckets.begin(), buckets.end());
+
+  // Keep including buckets until adding one would increase the number of
+  // bytes accounted for by less than a percent. The large buckets end up in
+  // [it, end()), [begin(), it) is the part that contains the max-heap of small
+  // buckets. TODO(ruuda): tweak the heuristic.
+  size_t accounted_for = 0;
+  std::vector<Bucket>::iterator it;
+  for (it = buckets.end(); it != buckets.begin(); --it) {
+    // Compute contribution to number of bytes accounted for in percent, rounded
+    // down due to integer division. Buckets are iterated by descending size,
+    // so later buckets cannot have a larger contribution than this one.
+    accounted_for += buckets.front().size;
+    size_t contribution = buckets.front().size * 100 / accounted_for;
+    if (contribution == 0)
+      break;
+
+    // Put the largest bucket in [begin, it) at |it - 1| and max-heapify
+    // [begin, it - 1). This puts the next largest bucket at |buckets.front()|.
+    std::pop_heap(buckets.begin(), it);
+  }
+
+  // At this point, |buckets| looks like this (numbers are bucket sizes):
+  //
+  // <-- max-heap of small buckets --->
+  //                                  <-- large buckets by ascending size -->
+  // [ 19 | 11 | 13 | 7 | 2 | 5 | ... | 83 | 89 | 97 ]
+  //   ^                                ^              ^
+  //   |                                |              |
+  //   begin()                          it             end()
+
+  // Discard the long tail of buckets that contribute less than a percent.
+  buckets.erase(buckets.begin(), it);
+
+  return buckets;
 }
 
 }  // namespace
 
+bool operator<(Entry lhs, Entry rhs) {
+  // There is no need to compare |size|. If the backtrace and type name are
+  // equal then the sizes must be equal as well.
+  return std::tie(lhs.stack_frame_id, lhs.type_id) <
+         std::tie(rhs.stack_frame_id, rhs.type_id);
+}
+
 HeapDumpWriter::HeapDumpWriter(StackFrameDeduplicator* stack_frame_deduplicator,
                                TypeNameDeduplicator* type_name_deduplicator)
-    : traced_value_(new TracedValue()),
-      stack_frame_deduplicator_(stack_frame_deduplicator),
+    : stack_frame_deduplicator_(stack_frame_deduplicator),
       type_name_deduplicator_(type_name_deduplicator) {}
 
 HeapDumpWriter::~HeapDumpWriter() {}
 
-void HeapDumpWriter::InsertAllocation(const AllocationContext& context,
-                                      size_t size) {
-  bytes_by_context_[context] += size;
+bool HeapDumpWriter::AddEntryForBucket(const Bucket& bucket) {
+  // The contexts in the bucket are all different, but the [begin, cursor) range
+  // is equal for all contexts in the bucket, and the type names are the same if
+  // |is_broken_down_by_type_name| is set.
+  DCHECK(!bucket.bytes_by_context.empty());
+
+  const AllocationContext* context = bucket.bytes_by_context.front().first;
+
+  const char* const* backtrace_begin = std::begin(context->backtrace.frames);
+  const char* const* backtrace_end = backtrace_begin + bucket.backtrace_cursor;
+  DCHECK_LE(bucket.backtrace_cursor, arraysize(context->backtrace.frames));
+
+  Entry entry;
+  entry.stack_frame_id =
+      stack_frame_deduplicator_->Insert(backtrace_begin, backtrace_end);
+
+  // Deduplicate the type name, or use ID -1 if type name is not set.
+  entry.type_id = bucket.is_broken_down_by_type_name
+                      ? type_name_deduplicator_->Insert(context->type_name)
+                      : -1;
+
+  entry.size = bucket.size;
+
+  auto position_and_inserted = entries_.insert(entry);
+  return position_and_inserted.second;
 }
 
-scoped_refptr<TracedValue> HeapDumpWriter::WriteHeapDump() {
-  // Group by backtrace and by type ID, and compute the total heap size while
-  // iterating anyway.
-  size_t total_size = 0;
-  hash_map<Backtrace, size_t> bytes_by_backtrace;
-  hash_map<const char*, size_t> bytes_by_type;
-
-  for (auto context_size : bytes_by_context_) {
-    total_size += context_size.second;
-    bytes_by_backtrace[context_size.first.backtrace] += context_size.second;
-    bytes_by_type[context_size.first.type_name] += context_size.second;
-  }
+void HeapDumpWriter::BreakDown(const Bucket& bucket) {
+  auto by_backtrace = BreakDownBy(bucket, BreakDownMode::kByBacktrace);
+  auto by_type_name = BreakDownBy(bucket, BreakDownMode::kByTypeName);
 
-  auto sorted_bytes_by_backtrace = SortBySizeDescending(bytes_by_backtrace);
-  auto sorted_bytes_by_type = SortBySizeDescending(bytes_by_type);
+  // Insert entries for the buckets. If a bucket was not present before, it has
+  // not been broken down before, so recursively continue breaking down in that
+  // case. There might be multiple routes to the same entry (first break down
+  // by type name, then by backtrace, or first by backtrace and then by type),
+  // so a set is used to avoid dumping and breaking down entries more than once.
 
-  traced_value_->BeginArray("entries");
+  for (const Bucket& subbucket : by_backtrace)
+    if (AddEntryForBucket(subbucket))
+      BreakDown(subbucket);
 
-  // The global size, no column specified.
-  {
-    traced_value_->BeginDictionary();
-    WriteSize(total_size);
-    traced_value_->EndDictionary();
-  }
+  for (const Bucket& subbucket : by_type_name)
+    if (AddEntryForBucket(subbucket))
+      BreakDown(subbucket);
+}
 
-  // Entries with the size per backtrace.
-  for (const auto& entry : sorted_bytes_by_backtrace) {
-    traced_value_->BeginDictionary();
-    // Insert a forward reference to the backtrace that will be written to the
-    // |stackFrames| dictionary later on.
-    int idx = stack_frame_deduplicator_->Insert(std::begin(entry.first.frames),
-                                                std::end(entry.first.frames));
-    WriteStackFrameIndex(idx);
-    WriteSize(entry.second);
-    traced_value_->EndDictionary();
+const std::set<Entry>& HeapDumpWriter::Summarize(
+    const hash_map<AllocationContext, size_t>& bytes_by_context) {
+  // Start with one bucket that represents the entire heap. Iterate by
+  // reference, because the allocation contexts are going to point to allocation
+  // contexts stored in |bytes_by_context|.
+  Bucket root_bucket;
+  for (const auto& context_and_size : bytes_by_context) {
+    const AllocationContext* context = &context_and_size.first;
+    const size_t size = context_and_size.second;
+    root_bucket.bytes_by_context.push_back(std::make_pair(context, size));
+    root_bucket.size += size;
   }
 
-  // Entries with the size per type.
-  for (const auto& entry : sorted_bytes_by_type) {
-    traced_value_->BeginDictionary();
-    // Insert a forward reference to the type name that will be written to the
-    // trace when it is flushed.
-    WriteTypeId(type_name_deduplicator_->Insert(entry.first));
-    WriteSize(entry.second);
-    traced_value_->EndDictionary();
-  }
+  AddEntryForBucket(root_bucket);
 
-  traced_value_->EndArray();  // "entries"
+  // Recursively break down the heap and fill |entries_| with entries to dump.
+  BreakDown(root_bucket);
 
-  return traced_value_;
+  return entries_;
 }
 
-void HeapDumpWriter::WriteStackFrameIndex(int index) {
-  if (index == -1) {
-    // An empty backtrace (which will have index -1) is represented by the empty
-    // string, because there is no leaf frame to reference in |stackFrames|.
-    traced_value_->SetString("bt", "");
-  } else {
-    // Format index of the leaf frame as a string, because |stackFrames| is a
-    // dictionary, not an array.
-    SStringPrintf(&buffer_, "%i", index);
-    traced_value_->SetString("bt", buffer_);
+scoped_refptr<TracedValue> Serialize(const std::set<Entry>& entries) {
+  std::string buffer;
+  scoped_refptr<TracedValue> traced_value = new TracedValue;
+
+  traced_value->BeginArray("entries");
+
+  for (const Entry& entry : entries) {
+    traced_value->BeginDictionary();
+
+    // Format size as hexadecimal string into |buffer|.
+    SStringPrintf(&buffer, "%" PRIx64, static_cast<uint64_t>(entry.size));
+    traced_value->SetString("size", buffer);
+
+    if (entry.stack_frame_id == -1) {
+      // An empty backtrace (which will have ID -1) is represented by the empty
+      // string, because there is no leaf frame to reference in |stackFrames|.
+      traced_value->SetString("bt", "");
+    } else {
+      // Format index of the leaf frame as a string, because |stackFrames| is a
+      // dictionary, not an array.
+      SStringPrintf(&buffer, "%i", entry.stack_frame_id);
+      traced_value->SetString("bt", buffer);
+    }
+
+    // Type ID -1 (cumulative size for all types) is represented by the absence
+    // of the "type" key in the dictionary.
+    if (entry.type_id != -1) {
+      // Format the type ID as a string.
+      SStringPrintf(&buffer, "%i", entry.type_id);
+      traced_value->SetString("type", buffer);
+    }
+
+    traced_value->EndDictionary();
   }
-}
 
-void HeapDumpWriter::WriteTypeId(int type_id) {
-  // Format the type ID as a string.
-  SStringPrintf(&buffer_, "%i", type_id);
-  traced_value_->SetString("type", buffer_);
+  traced_value->EndArray();  // "entries"
+  return traced_value;
 }
 
-void HeapDumpWriter::WriteSize(size_t size) {
-  // Format size as hexadecimal string into |buffer_|.
-  SStringPrintf(&buffer_, "%" PRIx64, static_cast<uint64_t>(size));
-  traced_value_->SetString("size", buffer_);
+}  // namespace internal
+
+scoped_refptr<TracedValue> ExportHeapDump(
+    const hash_map<AllocationContext, size_t>& bytes_by_size,
+    StackFrameDeduplicator* stack_frame_deduplicator,
+    TypeNameDeduplicator* type_name_deduplicator) {
+  internal::HeapDumpWriter writer(stack_frame_deduplicator,
+                                  type_name_deduplicator);
+  return Serialize(writer.Summarize(bytes_by_size));
 }
 
 }  // namespace trace_event
diff --git a/base/trace_event/heap_profiler_heap_dump_writer.h b/base/trace_event/heap_profiler_heap_dump_writer.h
index c3a7767..71377cc 100644
--- a/base/trace_event/heap_profiler_heap_dump_writer.h
+++ b/base/trace_event/heap_profiler_heap_dump_writer.h
@@ -5,7 +5,7 @@
 #ifndef BASE_TRACE_EVENT_HEAP_PROFILER_HEAP_DUMP_WRITER_H_
 #define BASE_TRACE_EVENT_HEAP_PROFILER_HEAP_DUMP_WRITER_H_
 
-#include <string>
+#include <set>
 
 #include "base/base_export.h"
 #include "base/containers/hash_tables.h"
@@ -20,11 +20,43 @@ class StackFrameDeduplicator;
 class TracedValue;
 class TypeNameDeduplicator;
 
+// Aggregates |bytes_by_context|, recursively breaks down the heap, and returns
+// a traced value with an "entries" array that can be dumped in the trace log,
+// following the format described in https://goo.gl/KY7zVE. The number of
+// entries is kept reasonable because long tails are not included.
+BASE_EXPORT scoped_refptr<TracedValue> ExportHeapDump(
+    const hash_map<AllocationContext, size_t>& bytes_by_context,
+    StackFrameDeduplicator* stack_frame_deduplicator,
+    TypeNameDeduplicator* type_name_deduplicator);
+
+namespace internal {
+
+namespace {
+struct Bucket;
+}
+
+// An entry in the "entries" array as described in https://goo.gl/KY7zVE.
+struct BASE_EXPORT Entry {
+  size_t size;
+
+  // References a backtrace in the stack frame deduplicator. -1 means empty
+  // backtrace (the root of the tree).
+  int stack_frame_id;
+
+  // References a type name in the type name deduplicator. -1 indicates that
+  // the size is the cumulative size for all types (the root of the tree).
+  int type_id;
+};
+
+// Comparison operator to enable putting |Entry| in a |std::set|.
+BASE_EXPORT bool operator<(Entry lhs, Entry rhs);
+
+// Serializes entries to an "entries" array in a traced value.
+BASE_EXPORT scoped_refptr<TracedValue> Serialize(const std::set<Entry>& dump);
+
 // Helper class to dump a snapshot of an |AllocationRegister| or other heap
 // bookkeeping structure into a |TracedValue|. This class is intended to be
-// used as a one-shot local instance on the stack. To write heap dumps, call
-// |InsertAllocation| for every captured allocation, then call |WriteHeapDump|
-// to do the processing and generate a heap dump value for the trace log.
+// used as a one-shot local instance on the stack.
 class BASE_EXPORT HeapDumpWriter {
  public:
   // The |StackFrameDeduplicator| and |TypeNameDeduplicator| are not owned. The
@@ -32,30 +64,27 @@ class BASE_EXPORT HeapDumpWriter {
   // the dump writer.
   HeapDumpWriter(StackFrameDeduplicator* stack_frame_deduplicator,
                  TypeNameDeduplicator* type_name_deduplicator);
-  ~HeapDumpWriter();
 
-  // Inserts information from which the heap dump will be generated. This method
-  // does minimal processing, so it can be called when a lock is held.
-  void InsertAllocation(const AllocationContext& context, size_t size);
+  ~HeapDumpWriter();
 
-  // Aggregates allocations and writes an "entries" array to a traced value. See
-  // https://goo.gl/jYN4Zn for a description of the format.
-  scoped_refptr<TracedValue> WriteHeapDump();
+  // Aggregates allocations to compute the total size of the heap, then breaks
+  // down the heap recursively. This produces the values that should be dumped
+  // in the "entries" array. The number of entries is kept reasonable because
+  // long tails are not included. Use |Serialize| to convert to a traced value.
+  const std::set<Entry>& Summarize(
+      const hash_map<AllocationContext, size_t>& bytes_by_context);
 
  private:
-  // Writes a "bt" key that references a stack frame in the |stackFrames|
-  // dictionary.
-  void WriteStackFrameIndex(int index);
+  // Inserts an |Entry| for |Bucket| into |entries_|. Returns false if the
+  // entry was present before, true if it was not.
+  bool AddEntryForBucket(const Bucket& bucket);
 
-  // Writes a "type" key with the stringified type ID.
-  void WriteTypeId(int type_id);
+  // Recursively breaks down a bucket into smaller buckets and adds entries for
+  // the buckets worth dumping to |entries_|.
+  void BreakDown(const Bucket& bucket);
 
-  // Writes a "size" key with value |size| as a hexidecimal string to the traced
-  // value.
-  void WriteSize(size_t size);
-
-  // The value that this heap dumper writes to.
-  const scoped_refptr<TracedValue> traced_value_;
+  // The collection of entries that is filled by |Summarize|.
+  std::set<Entry> entries_;
 
   // Helper for generating the |stackFrames| dictionary. Not owned, must outlive
   // this heap dump writer instance.
@@ -65,17 +94,10 @@ class BASE_EXPORT HeapDumpWriter {
   // dump writer instance.
   TypeNameDeduplicator* const type_name_deduplicator_;
 
-  // A map of allocation context to the number of bytes allocated for that
-  // context.
-  hash_map<AllocationContext, size_t> bytes_by_context_;
-
-  // Buffer for converting integers into strings, that is re-used throughout the
-  // dump.
-  std::string buffer_;
-
   DISALLOW_COPY_AND_ASSIGN(HeapDumpWriter);
 };
 
+}  // namespace internal
 }  // namespace trace_event
 }  // namespace base
 
diff --git a/base/trace_event/heap_profiler_heap_dump_writer_unittest.cc b/base/trace_event/heap_profiler_heap_dump_writer_unittest.cc
index 841611a..ba632da 100644
--- a/base/trace_event/heap_profiler_heap_dump_writer_unittest.cc
+++ b/base/trace_event/heap_profiler_heap_dump_writer_unittest.cc
@@ -2,12 +2,11 @@
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.
 
-#include <stdlib.h>
-
-#include <iterator>
+#include <set>
 #include <string>
 
 #include "base/json/json_reader.h"
+#include "base/macros.h"
 #include "base/memory/ref_counted.h"
 #include "base/memory/scoped_ptr.h"
 #include "base/trace_event/heap_profiler_allocation_context.h"
@@ -35,27 +34,116 @@ const char kString[] = "string";
 
 namespace base {
 namespace trace_event {
+namespace internal {
 
-// Asserts that an integer stored in the json as a string has the correct value.
-void AssertIntEq(const DictionaryValue* entry,
-                 const char* key,
-                 int expected_value) {
-  std::string str;
-  ASSERT_TRUE(entry->GetString(key, &str));
-  ASSERT_EQ(expected_value, atoi(str.c_str()));
-}
-
-scoped_ptr<Value> DumpAndReadBack(HeapDumpWriter* writer) {
-  scoped_refptr<TracedValue> traced_value = writer->WriteHeapDump();
+scoped_ptr<const Value> WriteAndReadBack(const std::set<Entry>& entries) {
+  scoped_refptr<TracedValue> traced_value = Serialize(entries);
   std::string json;
   traced_value->AppendAsTraceFormat(&json);
   return JSONReader::Read(json);
 }
 
+scoped_ptr<const DictionaryValue> WriteAndReadBackEntry(Entry entry) {
+  std::set<Entry> input_entries;
+  input_entries.insert(entry);
+
+  scoped_ptr<const Value> json_dict = WriteAndReadBack(input_entries);
+
+  // Note: Ideally these should use |ASSERT_TRUE| instead of |EXPECT_TRUE|, but
+  // |ASSERT_TRUE| can only be used in void functions.
+  const DictionaryValue* dictionary;
+  EXPECT_TRUE(json_dict->GetAsDictionary(&dictionary));
+
+  const ListValue* json_entries;
+  EXPECT_TRUE(dictionary->GetList("entries", &json_entries));
+
+  const DictionaryValue* json_entry;
+  EXPECT_TRUE(json_entries->GetDictionary(0, &json_entry));
+
+  return json_entry->CreateDeepCopy();
+}
+
+// Given a desired stack frame ID and type ID, looks up the entry in the set and
+// asserts that it is present and has the expected size.
+void AssertSizeEq(const std::set<Entry>& entries,
+                  int stack_frame_id,
+                  int type_id,
+                  size_t expected_size) {
+  // The comparison operator for |Entry| does not take size into account, so by
+  // setting only stack frame ID and type ID, the real entry can be found.
+  Entry entry;
+  entry.stack_frame_id = stack_frame_id;
+  entry.type_id = type_id;
+  auto it = entries.find(entry);
+
+  ASSERT_NE(entries.end(), it) << "No entry found for sf = " << stack_frame_id
+                               << ", type = " << type_id << ".";
+  ASSERT_EQ(expected_size, it->size) << "Wrong size for sf = " << stack_frame_id
+                                     << ", type = " << type_id << ".";
+}
+
+TEST(HeapDumpWriterTest, BacktraceIndex) {
+  Entry entry;
+  entry.stack_frame_id = -1;  // -1 means empty backtrace.
+  entry.type_id = 0;
+  entry.size = 1;
+
+  scoped_ptr<const DictionaryValue> json_entry = WriteAndReadBackEntry(entry);
+
+  // For an empty backtrace, the "bt" key cannot reference a stack frame.
+  // Instead it should be set to the empty string.
+  std::string backtrace_index;
+  ASSERT_TRUE(json_entry->GetString("bt", &backtrace_index));
+  ASSERT_EQ("", backtrace_index);
+
+  // Also verify that a non-negative backtrace index is dumped properly.
+  entry.stack_frame_id = 2;
+  json_entry = WriteAndReadBackEntry(entry);
+  ASSERT_TRUE(json_entry->GetString("bt", &backtrace_index));
+  ASSERT_EQ("2", backtrace_index);
+}
+
+TEST(HeapDumpWriterTest, TypeId) {
+  Entry entry;
+  entry.type_id = -1;  // -1 means sum over all types.
+  entry.stack_frame_id = 0;
+  entry.size = 1;
+
+  scoped_ptr<const DictionaryValue> json_entry = WriteAndReadBackEntry(entry);
+
+  // Entries for the cumulative size of all types should not have the "type"
+  // key set.
+  ASSERT_FALSE(json_entry->HasKey("type"));
+
+  // Also verify that a non-negative type ID is dumped properly.
+  entry.type_id = 2;
+  json_entry = WriteAndReadBackEntry(entry);
+  std::string type_id;
+  ASSERT_TRUE(json_entry->GetString("type", &type_id));
+  ASSERT_EQ("2", type_id);
+}
+
+TEST(HeapDumpWriterTest, SizeIsHexadecimalString) {
+  // Take a number between 2^63 and 2^64 (or between 2^31 and 2^32 if |size_t|
+  // is not 64 bits).
+  const size_t large_value =
+      sizeof(size_t) == 8 ? 0xffffffffffffffc5 : 0xffffff9d;
+  const char* large_value_str =
+      sizeof(size_t) == 8 ? "ffffffffffffffc5" : "ffffff9d";
+  Entry entry;
+  entry.type_id = 0;
+  entry.stack_frame_id = 0;
+  entry.size = large_value;
+
+  scoped_ptr<const DictionaryValue> json_entry = WriteAndReadBackEntry(entry);
+
+  std::string size;
+  ASSERT_TRUE(json_entry->GetString("size", &size));
+  ASSERT_EQ(large_value_str, size);
+}
+
 TEST(HeapDumpWriterTest, BacktraceTypeNameTable) {
-  auto sf_deduplicator = make_scoped_refptr(new StackFrameDeduplicator);
-  auto tn_deduplicator = make_scoped_refptr(new TypeNameDeduplicator);
-  HeapDumpWriter writer(sf_deduplicator.get(), tn_deduplicator.get());
+  hash_map<AllocationContext, size_t> bytes_by_context;
 
   AllocationContext ctx = AllocationContext::Empty();
   ctx.backtrace.frames[0] = kBrowserMain;
@@ -63,27 +151,23 @@ TEST(HeapDumpWriterTest, BacktraceTypeNameTable) {
   ctx.type_name = kInt;
 
   // 10 bytes with context { type: int, bt: [BrowserMain, CreateWidget] }.
-  writer.InsertAllocation(ctx, 2);
-  writer.InsertAllocation(ctx, 3);
-  writer.InsertAllocation(ctx, 5);
+  bytes_by_context[ctx] = 10;
 
   ctx.type_name = kBool;
 
   // 18 bytes with context { type: bool, bt: [BrowserMain, CreateWidget] }.
-  writer.InsertAllocation(ctx, 7);
-  writer.InsertAllocation(ctx, 11);
+  bytes_by_context[ctx] = 18;
 
   ctx.backtrace.frames[0] = kRendererMain;
   ctx.backtrace.frames[1] = kInitialize;
 
   // 30 bytes with context { type: bool, bt: [RendererMain, Initialize] }.
-  writer.InsertAllocation(ctx, 13);
-  writer.InsertAllocation(ctx, 17);
+  bytes_by_context[ctx] = 30;
 
   ctx.type_name = kString;
 
   // 19 bytes with context { type: string, bt: [RendererMain, Initialize] }.
-  writer.InsertAllocation(ctx, 19);
+  bytes_by_context[ctx] = 19;
 
   // At this point the heap looks like this:
   //
@@ -95,143 +179,56 @@ TEST(HeapDumpWriterTest, BacktraceTypeNameTable) {
   // +--------+--------------------+-----------------+-----+
   // | Sum    |                 28 |              49 |  77 |
 
-  scoped_ptr<Value> heap_dump = DumpAndReadBack(&writer);
-
-  // The json heap dump should at least include this:
-  // {
-  //   "entries": [
-  //     { "size": "4d" },                                    // 77 = 0x4d.
-  //     { "size": "31", "bt": "id_of(Init <- RenMain)" },    // 49 = 0x31.
-  //     { "size": "1c", "bt": "id_of(CrWidget <- BrMain)" }, // 28 = 0x1c.
-  //     { "size": "30", "type": "id_of(bool)" },             // 48 = 0x30.
-  //     { "size": "13", "type": "id_of(string)" },           // 19 = 0x13.
-  //     { "size": "a", "type": "id_of(int)" }                // 10 = 0xa.
-  //   ]
-  // }
+  auto sf_deduplicator = make_scoped_refptr(new StackFrameDeduplicator);
+  auto tn_deduplicator = make_scoped_refptr(new TypeNameDeduplicator);
+  HeapDumpWriter writer(sf_deduplicator.get(), tn_deduplicator.get());
+  const std::set<Entry>& dump = writer.Summarize(bytes_by_context);
 
   // Get the indices of the backtraces and types by adding them again to the
   // deduplicator. Because they were added before, the same number is returned.
   StackFrame bt0[] = {kRendererMain, kInitialize};
   StackFrame bt1[] = {kBrowserMain, kCreateWidget};
-  int bt_renderer_main_initialize =
-      sf_deduplicator->Insert(std::begin(bt0), std::end(bt0));
-  int bt_browser_main_create_widget =
-      sf_deduplicator->Insert(std::begin(bt1), std::end(bt1));
+  int bt_renderer_main = sf_deduplicator->Insert(bt0, bt0 + 1);
+  int bt_browser_main = sf_deduplicator->Insert(bt1, bt1 + 1);
+  int bt_renderer_main_initialize = sf_deduplicator->Insert(bt0, bt0 + 2);
+  int bt_browser_main_create_widget = sf_deduplicator->Insert(bt1, bt1 + 2);
   int type_id_int = tn_deduplicator->Insert(kInt);
   int type_id_bool = tn_deduplicator->Insert(kBool);
   int type_id_string = tn_deduplicator->Insert(kString);
 
-  const DictionaryValue* dictionary;
-  ASSERT_TRUE(heap_dump->GetAsDictionary(&dictionary));
-
-  const ListValue* entries;
-  ASSERT_TRUE(dictionary->GetList("entries", &entries));
-
-  // Keep counters to verify that every entry is present exactly once.
-  int x4d_seen = 0;
-  int x31_seen = 0;
-  int x1c_seen = 0;
-  int x30_seen = 0;
-  int x13_seen = 0;
-  int xa_seen = 0;
-
-  for (const Value* entry_as_value : *entries) {
-    const DictionaryValue* entry;
-    ASSERT_TRUE(entry_as_value->GetAsDictionary(&entry));
-
-    // The "size" field, not to be confused with |entry->size()| which is the
-    // number of elements in the dictionary.
-    std::string size;
-    ASSERT_TRUE(entry->GetString("size", &size));
-
-    if (size == "4d") {
-      // Total size, should not include any other field.
-      ASSERT_EQ(1u, entry->size());  // Dictionary must have one element.
-      x4d_seen++;
-    } else if (size == "31") {
-      // Entry for backtrace "Initialize <- RendererMain".
-      ASSERT_EQ(2u, entry->size());  // Dictionary must have two elements.
-      AssertIntEq(entry, "bt", bt_renderer_main_initialize);
-      x31_seen++;
-    } else if (size == "1c") {
-      // Entry for backtrace "CreateWidget <- BrowserMain".
-      ASSERT_EQ(2u, entry->size());  // Dictionary must have two elements.
-      AssertIntEq(entry, "bt", bt_browser_main_create_widget);
-      x1c_seen++;
-    } else if (size == "30") {
-      // Entry for type bool.
-      ASSERT_EQ(2u, entry->size());  // Dictionary must have two elements.
-      AssertIntEq(entry, "type", type_id_bool);
-      x30_seen++;
-    } else if (size == "13") {
-      // Entry for type string.
-      ASSERT_EQ(2u, entry->size());  // Dictionary must have two elements.
-      AssertIntEq(entry, "type", type_id_string);
-      x13_seen++;
-    } else if (size == "a") {
-      // Entry for type int.
-      ASSERT_EQ(2u, entry->size());  // Dictionary must have two elements.
-      AssertIntEq(entry, "type", type_id_int);
-      xa_seen++;
-    }
-  }
-
-  ASSERT_EQ(1, x4d_seen);
-  ASSERT_EQ(1, x31_seen);
-  ASSERT_EQ(1, x1c_seen);
-  ASSERT_EQ(1, x30_seen);
-  ASSERT_EQ(1, x13_seen);
-  ASSERT_EQ(1, xa_seen);
+  // Full heap should have size 77.
+  AssertSizeEq(dump, -1, -1, 77);
+
+  // 49 bytes were allocated in RendererMain and children. Also check the type
+  // breakdown.
+  AssertSizeEq(dump, bt_renderer_main, -1, 49);
+  AssertSizeEq(dump, bt_renderer_main, type_id_bool, 30);
+  AssertSizeEq(dump, bt_renderer_main, type_id_string, 19);
+
+  // 28 bytes were allocated in BrowserMain and children. Also check the type
+  // breakdown.
+  AssertSizeEq(dump, bt_browser_main, -1, 28);
+  AssertSizeEq(dump, bt_browser_main, type_id_int, 10);
+  AssertSizeEq(dump, bt_browser_main, type_id_bool, 18);
+
+  // In this test all bytes are allocated in leaf nodes, so check again one
+  // level deeper.
+  AssertSizeEq(dump, bt_renderer_main_initialize, -1, 49);
+  AssertSizeEq(dump, bt_renderer_main_initialize, type_id_bool, 30);
+  AssertSizeEq(dump, bt_renderer_main_initialize, type_id_string, 19);
+  AssertSizeEq(dump, bt_browser_main_create_widget, -1, 28);
+  AssertSizeEq(dump, bt_browser_main_create_widget, type_id_int, 10);
+  AssertSizeEq(dump, bt_browser_main_create_widget, type_id_bool, 18);
+
+  // The type breakdown of the entrie heap should have been dumped as well.
+  AssertSizeEq(dump, -1, type_id_int, 10);
+  AssertSizeEq(dump, -1, type_id_bool, 48);
+  AssertSizeEq(dump, -1, type_id_string, 19);
 }
 
-// Test that the entry for the empty backtrace ends up in the json with the
-// "bt" field set to the empty string. Also test that an entry for "unknown
-// type" (nullptr type name) does not dereference the null pointer when writing
-// the type names, and that the type ID is 0.
-TEST(HeapDumpWriterTest, EmptyBacktraceIndexIsEmptyString) {
-  auto sf_deduplicator = make_scoped_refptr(new StackFrameDeduplicator);
-  auto tn_deduplicator = make_scoped_refptr(new TypeNameDeduplicator);
-  HeapDumpWriter writer(sf_deduplicator.get(), tn_deduplicator.get());
-
-  // A context with empty backtrace and unknown type (nullptr).
-  AllocationContext ctx = AllocationContext::Empty();
-
-  writer.InsertAllocation(ctx, 1);
-
-  scoped_ptr<Value> heap_dump = DumpAndReadBack(&writer);
-
-  const DictionaryValue* dictionary;
-  ASSERT_TRUE(heap_dump->GetAsDictionary(&dictionary));
-
-  const ListValue* entries;
-  ASSERT_TRUE(dictionary->GetList("entries", &entries));
-
-  int empty_backtrace_seen = 0;
-  int unknown_type_seen = 0;
-
-  for (const Value* entry_as_value : *entries) {
-    const DictionaryValue* entry;
-    ASSERT_TRUE(entry_as_value->GetAsDictionary(&entry));
-
-    // Note that |entry->size()| is the number of elements in the dictionary.
-    if (entry->HasKey("bt") && entry->size() == 2) {
-      std::string backtrace;
-      ASSERT_TRUE(entry->GetString("bt", &backtrace));
-      ASSERT_EQ("", backtrace);
-      empty_backtrace_seen++;
-    }
-
-    if (entry->HasKey("type") && entry->size() == 2) {
-      std::string type_id;
-      ASSERT_TRUE(entry->GetString("type", &type_id));
-      ASSERT_EQ("0", type_id);
-      unknown_type_seen++;
-    }
-  }
-
-  ASSERT_EQ(1, unknown_type_seen);
-  ASSERT_EQ(1, empty_backtrace_seen);
-}
+// TODO(ruuda): Verify that cumulative sizes are computed correctly.
+// TODO(ruuda): Verify that insignificant values are not dumped.
 
+}  // namespace internal
 }  // namespace trace_event
 }  // namespace base