3 files changed, 114 insertions, 7 deletions
diff --git a/runtime/gc/heap-inl.h b/runtime/gc/heap-inl.h
index 2d54330..2ec9c86 100644
--- a/runtime/gc/heap-inl.h
+++ b/runtime/gc/heap-inl.h
@@ -174,6 +174,13 @@ inline mirror::Object* Heap::AllocObjectWithAllocator(Thread* self, mirror::Clas
   } else {
     DCHECK(!Dbg::IsAllocTrackingEnabled());
   }
+  if (kInstrumented) {
+    if (gc_stress_mode_) {
+      CheckGcStressMode(self, &obj);
+    }
+  } else {
+    DCHECK(!gc_stress_mode_);
+  }
   // IsConcurrentGc() isn't known at compile time so we can optimize by not checking it for
   // the BumpPointer or TLAB allocators. This is nice since it allows the entire if statement to be
   // optimized out. And for the other allocators, AllocatorMayHaveConcurrentGC is a constant since
@@ -391,7 +398,7 @@ inline bool Heap::ShouldAllocLargeObject(mirror::Class* c, size_t byte_count) co
   // Zygote resulting in it being prematurely freed.
   // We can only do this for primitive objects since large objects will not be within the card table
   // range. This also means that we rely on SetClass not dirtying the object's card.
-  return byte_count >= large_object_threshold_ && c->IsPrimitiveArray();
+  return byte_count >= large_object_threshold_ && (c->IsPrimitiveArray() || c->IsStringClass());
 }
 
 template <bool kGrow>
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index 9a70d69..57557e2 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -21,6 +21,7 @@
 
 #include <limits>
 #include <memory>
+#include <unwind.h>  // For GC verification.
 #include <vector>
 
 #include "art_field-inl.h"
@@ -125,7 +126,8 @@ Heap::Heap(size_t initial_size, size_t growth_limit, size_t min_free, size_t max
            bool ignore_max_footprint, bool use_tlab,
            bool verify_pre_gc_heap, bool verify_pre_sweeping_heap, bool verify_post_gc_heap,
            bool verify_pre_gc_rosalloc, bool verify_pre_sweeping_rosalloc,
-           bool verify_post_gc_rosalloc, bool use_homogeneous_space_compaction_for_oom,
+           bool verify_post_gc_rosalloc, bool gc_stress_mode,
+           bool use_homogeneous_space_compaction_for_oom,
            uint64_t min_interval_homogeneous_space_compaction_by_oom)
     : non_moving_space_(nullptr),
       rosalloc_space_(nullptr),
@@ -170,6 +172,7 @@ Heap::Heap(size_t initial_size, size_t growth_limit, size_t min_free, size_t max
       verify_pre_gc_rosalloc_(verify_pre_gc_rosalloc),
       verify_pre_sweeping_rosalloc_(verify_pre_sweeping_rosalloc),
       verify_post_gc_rosalloc_(verify_post_gc_rosalloc),
+      gc_stress_mode_(gc_stress_mode),
       /* For GC a lot mode, we limit the allocations stacks to be kGcAlotInterval allocations. This
        * causes a lot of GC since we do a GC for alloc whenever the stack is full. When heap
        * verification is enabled, we limit the size of allocation stacks to speed up their
@@ -209,13 +212,17 @@ Heap::Heap(size_t initial_size, size_t growth_limit, size_t min_free, size_t max
       blocking_gc_count_last_window_(0U),
       gc_count_rate_histogram_("gc count rate histogram", 1U, kGcCountRateMaxBucketCount),
       blocking_gc_count_rate_histogram_("blocking gc count rate histogram", 1U,
-                                        kGcCountRateMaxBucketCount) {
+                                        kGcCountRateMaxBucketCount),
+      backtrace_lock_(nullptr),
+      seen_backtrace_count_(0u),
+      unique_backtrace_count_(0u) {
   if (VLOG_IS_ON(heap) || VLOG_IS_ON(startup)) {
     LOG(INFO) << "Heap() entering";
   }
+  Runtime* const runtime = Runtime::Current();
   // If we aren't the zygote, switch to the default non zygote allocator. This may update the
   // entrypoints.
-  const bool is_zygote = Runtime::Current()->IsZygote();
+  const bool is_zygote = runtime->IsZygote();
   if (!is_zygote) {
     // Background compaction is currently not supported for command line runs.
     if (background_collector_type_ != foreground_collector_type_) {
@@ -507,8 +514,12 @@ Heap::Heap(size_t initial_size, size_t growth_limit, size_t min_free, size_t max
       LOG(FATAL) << "There's a gap between the image space and the non-moving space";
     }
   }
-  if (running_on_valgrind_) {
-    Runtime::Current()->GetInstrumentation()->InstrumentQuickAllocEntryPoints();
+  instrumentation::Instrumentation* const instrumentation = runtime->GetInstrumentation();
+  if (gc_stress_mode_) {
+    backtrace_lock_ = new Mutex("GC complete lock");
+  }
+  if (running_on_valgrind_ || gc_stress_mode_) {
+    instrumentation->InstrumentQuickAllocEntryPoints();
   }
   if (VLOG_IS_ON(heap) || VLOG_IS_ON(startup)) {
     LOG(INFO) << "Heap() exiting";
@@ -1072,6 +1083,12 @@ Heap::~Heap() {
   STLDeleteElements(&discontinuous_spaces_);
   delete gc_complete_lock_;
   delete pending_task_lock_;
+  delete backtrace_lock_;
+  if (unique_backtrace_count_.LoadRelaxed() != 0 || seen_backtrace_count_.LoadRelaxed() != 0) {
+    LOG(INFO) << "gc stress unique=" << unique_backtrace_count_.LoadRelaxed()
+        << " total=" << seen_backtrace_count_.LoadRelaxed() +
+            unique_backtrace_count_.LoadRelaxed();
+  }
   VLOG(heap) << "Finished ~Heap()";
 }
 
@@ -3675,5 +3692,73 @@ void Heap::ClearMarkedObjects() {
   }
 }
 
+// Based on debug malloc logic from libc/bionic/debug_stacktrace.cpp.
+class StackCrawlState {
+ public:
+  StackCrawlState(uintptr_t* frames, size_t max_depth, size_t skip_count)
+      : frames_(frames), frame_count_(0), max_depth_(max_depth), skip_count_(skip_count) {
+  }
+  size_t GetFrameCount() const {
+    return frame_count_;
+  }
+  static _Unwind_Reason_Code Callback(_Unwind_Context* context, void* arg) {
+    auto* const state = reinterpret_cast<StackCrawlState*>(arg);
+    const uintptr_t ip = _Unwind_GetIP(context);
+    // The first stack frame is get_backtrace itself. Skip it.
+    if (ip != 0 && state->skip_count_ > 0) {
+      --state->skip_count_;
+      return _URC_NO_REASON;
+    }
+    // ip may be off for ARM but it shouldn't matter since we only use it for hashing.
+    state->frames_[state->frame_count_] = ip;
+    state->frame_count_++;
+    return state->frame_count_ >= state->max_depth_ ? _URC_END_OF_STACK : _URC_NO_REASON;
+  }
+
+ private:
+  uintptr_t* const frames_;
+  size_t frame_count_;
+  const size_t max_depth_;
+  size_t skip_count_;
+};
+
+static size_t get_backtrace(uintptr_t* frames, size_t max_depth) {
+  StackCrawlState state(frames, max_depth, 0u);
+  _Unwind_Backtrace(&StackCrawlState::Callback, &state);
+  return state.GetFrameCount();
+}
+
+void Heap::CheckGcStressMode(Thread* self, mirror::Object** obj) {
+  auto* const runtime = Runtime::Current();
+  if (gc_stress_mode_ && runtime->GetClassLinker()->IsInitialized() &&
+      !runtime->IsActiveTransaction() && mirror::Class::HasJavaLangClass()) {
+    // Check if we should GC.
+    bool new_backtrace = false;
+    {
+      static constexpr size_t kMaxFrames = 16u;
+      uintptr_t backtrace[kMaxFrames];
+      const size_t frames = get_backtrace(backtrace, kMaxFrames);
+      uint64_t hash = 0;
+      for (size_t i = 0; i < frames; ++i) {
+        hash = hash * 2654435761 + backtrace[i];
+        hash += (hash >> 13) ^ (hash << 6);
+      }
+      MutexLock mu(self, *backtrace_lock_);
+      new_backtrace = seen_backtraces_.find(hash) == seen_backtraces_.end();
+      if (new_backtrace) {
+        seen_backtraces_.insert(hash);
+      }
+    }
+    if (new_backtrace) {
+      StackHandleScope<1> hs(self);
+      auto h = hs.NewHandleWrapper(obj);
+      CollectGarbage(false);
+      unique_backtrace_count_.FetchAndAddSequentiallyConsistent(1);
+    } else {
+      seen_backtrace_count_.FetchAndAddSequentiallyConsistent(1);
+    }
+  }
+}
+
 }  // namespace gc
 }  // namespace art
diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h
index dac747b..81476a4 100644
--- a/runtime/gc/heap.h
+++ b/runtime/gc/heap.h
@@ -19,6 +19,7 @@
 
 #include <iosfwd>
 #include <string>
+#include <unordered_set>
 #include <vector>
 
 #include "allocator_type.h"
@@ -180,7 +181,8 @@ class Heap {
                 bool ignore_max_footprint, bool use_tlab,
                 bool verify_pre_gc_heap, bool verify_pre_sweeping_heap, bool verify_post_gc_heap,
                 bool verify_pre_gc_rosalloc, bool verify_pre_sweeping_rosalloc,
-                bool verify_post_gc_rosalloc, bool use_homogeneous_space_compaction,
+                bool verify_post_gc_rosalloc, bool gc_stress_mode,
+                bool use_homogeneous_space_compaction,
                 uint64_t min_interval_homogeneous_space_compaction_by_oom);
 
   ~Heap();
@@ -887,6 +889,10 @@ class Heap {
 
   void UpdateGcCountRateHistograms() EXCLUSIVE_LOCKS_REQUIRED(gc_complete_lock_);
 
+  // GC stress mode attempts to do one GC per unique backtrace.
+  void CheckGcStressMode(Thread* self, mirror::Object** obj)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   // All-known continuous spaces, where objects lie within fixed bounds.
   std::vector<space::ContinuousSpace*> continuous_spaces_;
 
@@ -1042,6 +1048,7 @@ class Heap {
   bool verify_pre_gc_rosalloc_;
   bool verify_pre_sweeping_rosalloc_;
   bool verify_post_gc_rosalloc_;
+  const bool gc_stress_mode_;
 
   // RAII that temporarily disables the rosalloc verification during
   // the zygote fork.
@@ -1192,6 +1199,14 @@ class Heap {
   // The histogram of the number of blocking GC invocations per window duration.
   Histogram<uint64_t> blocking_gc_count_rate_histogram_ GUARDED_BY(gc_complete_lock_);
 
+  // GC stress related data structures.
+  Mutex* backtrace_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
+  // Debugging variables, seen backtraces vs unique backtraces.
+  Atomic<uint64_t> seen_backtrace_count_;
+  Atomic<uint64_t> unique_backtrace_count_;
+  // Stack trace hashes that we already saw,
+  std::unordered_set<uint64_t> seen_backtraces_ GUARDED_BY(backtrace_lock_);
+
   friend class CollectorTransitionTask;
   friend class collector::GarbageCollector;
   friend class collector::MarkCompact;