diff options
author | Hiroshi Yamauchi <yamauchi@google.com> | 2014-09-25 11:46:46 -0700 |
---|---|---|
committer | Hiroshi Yamauchi <yamauchi@google.com> | 2014-09-25 15:53:54 -0700 |
commit | f0edfc355893d53d1104b05501c99ad5ccf305c4 (patch) | |
tree | 7e1fa49875759512f5d02b1c45435d3e3366b920 /runtime/mirror | |
parent | 1ed5b27ee329208fd8ae22b8a9a61d708e2c1ffb (diff) | |
download | art-f0edfc355893d53d1104b05501c99ad5ccf305c4.zip art-f0edfc355893d53d1104b05501c99ad5ccf305c4.tar.gz art-f0edfc355893d53d1104b05501c99ad5ccf305c4.tar.bz2 |
Some optimizations for the array alloc path.
- Force Array::Alloc() to be inlined.
- Simplify the array size overflow check.
- Turn fill_usable into a template parameter.
- Remove a branch in Array::DataOffset() and avoid
Primitive::ComponentSize(), which has a switch, in the array alloc
path.
- Strength reductions in the array size computation by using component
size shifts instead of component sizes. Store component size shift
in the upper 16 bits of primitive_type field.
- Speedup: ~4% (3435->3284) in MemAllocTest on N4.
Bug: 9986565
Change-Id: I4b142ffac4ab8b5b915836f1660a949d6442344c
Diffstat (limited to 'runtime/mirror')
-rw-r--r-- | runtime/mirror/array-inl.h | 75 | ||||
-rw-r--r-- | runtime/mirror/array.cc | 3 | ||||
-rw-r--r-- | runtime/mirror/array.h | 20 | ||||
-rw-r--r-- | runtime/mirror/class-inl.h | 15 | ||||
-rw-r--r-- | runtime/mirror/class.h | 22 | ||||
-rw-r--r-- | runtime/mirror/object_array-inl.h | 5 | ||||
-rw-r--r-- | runtime/mirror/object_test.cc | 22 |
7 files changed, 106 insertions, 56 deletions
diff --git a/runtime/mirror/array-inl.h b/runtime/mirror/array-inl.h index 213dbc2..6582226 100644 --- a/runtime/mirror/array-inl.h +++ b/runtime/mirror/array-inl.h @@ -35,13 +35,13 @@ inline uint32_t Array::ClassSize() { template<VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption> inline size_t Array::SizeOf() { // This is safe from overflow because the array was already allocated, so we know it's sane. - size_t component_size = - GetClass<kVerifyFlags, kReadBarrierOption>()->template GetComponentSize<kReadBarrierOption>(); + size_t component_size_shift = GetClass<kVerifyFlags, kReadBarrierOption>()-> + template GetComponentSizeShift<kReadBarrierOption>(); // Don't need to check this since we already check this in GetClass. int32_t component_count = GetLength<static_cast<VerifyObjectFlags>(kVerifyFlags & ~kVerifyThis)>(); - size_t header_size = DataOffset(component_size).SizeValue(); - size_t data_size = component_count * component_size; + size_t header_size = DataOffset(1U << component_size_shift).SizeValue(); + size_t data_size = component_count << component_size_shift; return header_size + data_size; } @@ -56,24 +56,36 @@ inline bool Array::CheckIsValidIndex(int32_t index) { } static inline size_t ComputeArraySize(Thread* self, Class* array_class, int32_t component_count, - size_t component_size) + size_t component_size_shift) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { DCHECK(array_class != NULL); DCHECK_GE(component_count, 0); DCHECK(array_class->IsArrayClass()); + size_t component_size = 1U << component_size_shift; size_t header_size = Array::DataOffset(component_size).SizeValue(); - size_t data_size = component_count * component_size; + size_t data_size = static_cast<size_t>(component_count) << component_size_shift; size_t size = header_size + data_size; - // Check for overflow and throw OutOfMemoryError if this was an unreasonable request. - size_t component_shift = sizeof(size_t) * 8 - 1 - CLZ(component_size); - if (UNLIKELY(data_size >> component_shift != size_t(component_count) || size < data_size)) { + // Check for size_t overflow and throw OutOfMemoryError if this was + // an unreasonable request. +#ifdef __LP64__ + // 64-bit. No overflow as component_count is 32-bit and the maximum + // component size is 8. + DCHECK_LE((1U << component_size_shift), 8U); +#else + // 32-bit. + DCHECK_NE(header_size, 0U); + DCHECK_EQ(RoundUp(header_size, component_size), header_size); + // The array length limit (exclusive). + const size_t length_limit = (0U - header_size) >> component_size_shift; + if (UNLIKELY(length_limit <= static_cast<size_t>(component_count))) { self->ThrowOutOfMemoryError(StringPrintf("%s of length %d would overflow", PrettyDescriptor(array_class).c_str(), component_count).c_str()); return 0; // failure } +#endif return size; } @@ -103,8 +115,10 @@ class SetLengthVisitor { // array. class SetLengthToUsableSizeVisitor { public: - SetLengthToUsableSizeVisitor(int32_t min_length, size_t header_size, size_t component_size) : - minimum_length_(min_length), header_size_(header_size), component_size_(component_size) { + SetLengthToUsableSizeVisitor(int32_t min_length, size_t header_size, + size_t component_size_shift) : + minimum_length_(min_length), header_size_(header_size), + component_size_shift_(component_size_shift) { } void operator()(Object* obj, size_t usable_size) const @@ -112,10 +126,12 @@ class SetLengthToUsableSizeVisitor { // Avoid AsArray as object is not yet in live bitmap or allocation stack. Array* array = down_cast<Array*>(obj); // DCHECK(array->IsArrayInstance()); - int32_t length = (usable_size - header_size_) / component_size_; + int32_t length = (usable_size - header_size_) >> component_size_shift_; DCHECK_GE(length, minimum_length_); - byte* old_end = reinterpret_cast<byte*>(array->GetRawData(component_size_, minimum_length_)); - byte* new_end = reinterpret_cast<byte*>(array->GetRawData(component_size_, length)); + byte* old_end = reinterpret_cast<byte*>(array->GetRawData(1U << component_size_shift_, + minimum_length_)); + byte* new_end = reinterpret_cast<byte*>(array->GetRawData(1U << component_size_shift_, + length)); // Ensure space beyond original allocation is zeroed. memset(old_end, 0, new_end - old_end); array->SetLength(length); @@ -124,38 +140,46 @@ class SetLengthToUsableSizeVisitor { private: const int32_t minimum_length_; const size_t header_size_; - const size_t component_size_; + const size_t component_size_shift_; DISALLOW_COPY_AND_ASSIGN(SetLengthToUsableSizeVisitor); }; -template <bool kIsInstrumented> +template <bool kIsInstrumented, bool kFillUsable> inline Array* Array::Alloc(Thread* self, Class* array_class, int32_t component_count, - size_t component_size, gc::AllocatorType allocator_type, - bool fill_usable) { + size_t component_size_shift, gc::AllocatorType allocator_type) { DCHECK(allocator_type != gc::kAllocatorTypeLOS); - size_t size = ComputeArraySize(self, array_class, component_count, component_size); + DCHECK_EQ(array_class->GetComponentSizeShift(), component_size_shift); + DCHECK_EQ(array_class->GetComponentSize(), (1U << component_size_shift)); + size_t size = ComputeArraySize(self, array_class, component_count, component_size_shift); +#ifdef __LP64__ + // 64-bit. No size_t overflow. + DCHECK_NE(size, 0U); +#else + // 32-bit. if (UNLIKELY(size == 0)) { return nullptr; } +#endif gc::Heap* heap = Runtime::Current()->GetHeap(); Array* result; - if (!fill_usable) { + if (!kFillUsable) { SetLengthVisitor visitor(component_count); result = down_cast<Array*>( heap->AllocObjectWithAllocator<kIsInstrumented, true>(self, array_class, size, allocator_type, visitor)); } else { - SetLengthToUsableSizeVisitor visitor(component_count, DataOffset(component_size).SizeValue(), - component_size); + SetLengthToUsableSizeVisitor visitor(component_count, + DataOffset(1U << component_size_shift).SizeValue(), + component_size_shift); result = down_cast<Array*>( heap->AllocObjectWithAllocator<kIsInstrumented, true>(self, array_class, size, allocator_type, visitor)); } if (kIsDebugBuild && result != nullptr && Runtime::Current()->IsStarted()) { array_class = result->GetClass(); // In case the array class moved. - CHECK_EQ(array_class->GetComponentSize(), component_size); - if (!fill_usable) { + CHECK_EQ(array_class->GetComponentSize(), 1U << component_size_shift); + if (!kFillUsable) { CHECK_EQ(result->SizeOf(), size); } else { CHECK_GE(result->SizeOf(), size); @@ -173,7 +197,8 @@ inline void PrimitiveArray<T>::VisitRoots(RootCallback* callback, void* arg) { template<typename T> inline PrimitiveArray<T>* PrimitiveArray<T>::Alloc(Thread* self, size_t length) { - Array* raw_array = Array::Alloc<true>(self, GetArrayClass(), length, sizeof(T), + Array* raw_array = Array::Alloc<true>(self, GetArrayClass(), length, + ComponentSizeShiftWidth<sizeof(T)>(), Runtime::Current()->GetHeap()->GetCurrentAllocator()); return down_cast<PrimitiveArray<T>*>(raw_array); } diff --git a/runtime/mirror/array.cc b/runtime/mirror/array.cc index 4535f6c..636be33 100644 --- a/runtime/mirror/array.cc +++ b/runtime/mirror/array.cc @@ -48,7 +48,8 @@ static Array* RecursiveCreateMultiArray(Thread* self, StackHandleScope<1> hs(self); Handle<Array> new_array( hs.NewHandle( - Array::Alloc<true>(self, array_class.Get(), array_length, array_class->GetComponentSize(), + Array::Alloc<true>(self, array_class.Get(), array_length, + array_class->GetComponentSizeShift(), Runtime::Current()->GetHeap()->GetCurrentAllocator()))); if (UNLIKELY(new_array.Get() == nullptr)) { CHECK(self->IsExceptionPending()); diff --git a/runtime/mirror/array.h b/runtime/mirror/array.h index 7af88d6..521d7e7 100644 --- a/runtime/mirror/array.h +++ b/runtime/mirror/array.h @@ -33,13 +33,12 @@ class MANAGED Array : public Object { // The size of a java.lang.Class representing an array. static uint32_t ClassSize(); - // Allocates an array with the given properties, if fill_usable is true the array will be of at + // Allocates an array with the given properties, if kFillUsable is true the array will be of at // least component_count size, however, if there's usable space at the end of the allocation the // array will fill it. - template <bool kIsInstrumented> - static Array* Alloc(Thread* self, Class* array_class, int32_t component_count, - size_t component_size, gc::AllocatorType allocator_type, - bool fill_usable = false) + template <bool kIsInstrumented, bool kFillUsable = false> + ALWAYS_INLINE static Array* Alloc(Thread* self, Class* array_class, int32_t component_count, + size_t component_size_shift, gc::AllocatorType allocator_type) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); static Array* CreateMultiArray(Thread* self, Handle<Class> element_class, @@ -66,12 +65,11 @@ class MANAGED Array : public Object { } static MemberOffset DataOffset(size_t component_size) { - if (component_size != sizeof(int64_t)) { - return OFFSET_OF_OBJECT_MEMBER(Array, first_element_); - } else { - // Align longs and doubles. - return MemberOffset(OFFSETOF_MEMBER(Array, first_element_) + 4); - } + DCHECK(IsPowerOfTwo(component_size)) << component_size; + size_t data_offset = RoundUp(OFFSETOF_MEMBER(Array, first_element_), component_size); + DCHECK_EQ(RoundUp(data_offset, component_size), data_offset) + << "Array data offset isn't aligned with component size"; + return MemberOffset(data_offset); } void* GetRawData(size_t component_size, int32_t index) diff --git a/runtime/mirror/class-inl.h b/runtime/mirror/class-inl.h index 3f67468..3d3ae16 100644 --- a/runtime/mirror/class-inl.h +++ b/runtime/mirror/class-inl.h @@ -510,8 +510,19 @@ inline void Class::SetName(String* name) { template<VerifyObjectFlags kVerifyFlags> inline Primitive::Type Class::GetPrimitiveType() { DCHECK_EQ(sizeof(Primitive::Type), sizeof(int32_t)); - return static_cast<Primitive::Type>( - GetField32<kVerifyFlags>(OFFSET_OF_OBJECT_MEMBER(Class, primitive_type_))); + int32_t v32 = GetField32<kVerifyFlags>(OFFSET_OF_OBJECT_MEMBER(Class, primitive_type_)); + Primitive::Type type = static_cast<Primitive::Type>(v32 & 0xFFFF); + DCHECK_EQ(static_cast<size_t>(v32 >> 16), Primitive::ComponentSizeShift(type)); + return type; +} + +template<VerifyObjectFlags kVerifyFlags> +inline size_t Class::GetPrimitiveTypeSizeShift() { + DCHECK_EQ(sizeof(Primitive::Type), sizeof(int32_t)); + int32_t v32 = GetField32<kVerifyFlags>(OFFSET_OF_OBJECT_MEMBER(Class, primitive_type_)); + size_t size_shift = static_cast<Primitive::Type>(v32 >> 16); + DCHECK_EQ(size_shift, Primitive::ComponentSizeShift(static_cast<Primitive::Type>(v32 & 0xFFFF))); + return size_shift; } inline void Class::CheckObjectAlloc() { diff --git a/runtime/mirror/class.h b/runtime/mirror/class.h index 4a8d6dc..0acf695 100644 --- a/runtime/mirror/class.h +++ b/runtime/mirror/class.h @@ -345,9 +345,16 @@ class MANAGED Class FINAL : public Object { void SetPrimitiveType(Primitive::Type new_type) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { DCHECK_EQ(sizeof(Primitive::Type), sizeof(int32_t)); - SetField32<false>(OFFSET_OF_OBJECT_MEMBER(Class, primitive_type_), new_type); + int32_t v32 = static_cast<int32_t>(new_type); + DCHECK_EQ(v32 & 0xFFFF, v32) << "upper 16 bits aren't zero"; + // Store the component size shift in the upper 16 bits. + v32 |= Primitive::ComponentSizeShift(new_type) << 16; + SetField32<false>(OFFSET_OF_OBJECT_MEMBER(Class, primitive_type_), v32); } + template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags> + size_t GetPrimitiveTypeSizeShift() ALWAYS_INLINE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + // Returns true if the class is a primitive type. template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags> bool IsPrimitive() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { @@ -457,8 +464,12 @@ class MANAGED Class FINAL : public Object { template<ReadBarrierOption kReadBarrierOption = kWithReadBarrier> size_t GetComponentSize() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { - return Primitive::ComponentSize( - GetComponentType<kDefaultVerifyFlags, kReadBarrierOption>()->GetPrimitiveType()); + return 1U << GetComponentSizeShift(); + } + + template<ReadBarrierOption kReadBarrierOption = kWithReadBarrier> + size_t GetComponentSizeShift() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { + return GetComponentType<kDefaultVerifyFlags, kReadBarrierOption>()->GetPrimitiveTypeSizeShift(); } bool IsObjectClass() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { @@ -1149,8 +1160,9 @@ class MANAGED Class FINAL : public Object { // See also class_size_. uint32_t object_size_; - // Primitive type value, or Primitive::kPrimNot (0); set for generated primitive classes. - Primitive::Type primitive_type_; + // The lower 16 bits contains a Primitive::Type value. The upper 16 + // bits contains the size shift of the primitive type. + uint32_t primitive_type_; // Bitmap of offsets of ifields. uint32_t reference_instance_offsets_; diff --git a/runtime/mirror/object_array-inl.h b/runtime/mirror/object_array-inl.h index c7540dc..0ca44f8 100644 --- a/runtime/mirror/object_array-inl.h +++ b/runtime/mirror/object_array-inl.h @@ -35,10 +35,13 @@ template<class T> inline ObjectArray<T>* ObjectArray<T>::Alloc(Thread* self, Class* object_array_class, int32_t length, gc::AllocatorType allocator_type) { Array* array = Array::Alloc<true>(self, object_array_class, length, - sizeof(HeapReference<Object>), allocator_type); + ComponentSizeShiftWidth<sizeof(HeapReference<Object>)>(), + allocator_type); if (UNLIKELY(array == nullptr)) { return nullptr; } else { + DCHECK_EQ(array->GetClass()->GetComponentSizeShift(), + ComponentSizeShiftWidth<sizeof(HeapReference<Object>)>()); return array->AsObjectArray<T>(); } } diff --git a/runtime/mirror/object_test.cc b/runtime/mirror/object_test.cc index 1290a3d..7fa664d 100644 --- a/runtime/mirror/object_test.cc +++ b/runtime/mirror/object_test.cc @@ -162,19 +162,19 @@ TEST_F(ObjectTest, AllocArray) { Class* c = class_linker_->FindSystemClass(soa.Self(), "[I"); StackHandleScope<1> hs(soa.Self()); MutableHandle<Array> a( - hs.NewHandle(Array::Alloc<true>(soa.Self(), c, 1, c->GetComponentSize(), + hs.NewHandle(Array::Alloc<true>(soa.Self(), c, 1, c->GetComponentSizeShift(), Runtime::Current()->GetHeap()->GetCurrentAllocator()))); EXPECT_TRUE(c == a->GetClass()); EXPECT_EQ(1, a->GetLength()); c = class_linker_->FindSystemClass(soa.Self(), "[Ljava/lang/Object;"); - a.Assign(Array::Alloc<true>(soa.Self(), c, 1, c->GetComponentSize(), + a.Assign(Array::Alloc<true>(soa.Self(), c, 1, c->GetComponentSizeShift(), Runtime::Current()->GetHeap()->GetCurrentAllocator())); EXPECT_TRUE(c == a->GetClass()); EXPECT_EQ(1, a->GetLength()); c = class_linker_->FindSystemClass(soa.Self(), "[[Ljava/lang/Object;"); - a.Assign(Array::Alloc<true>(soa.Self(), c, 1, c->GetComponentSize(), + a.Assign(Array::Alloc<true>(soa.Self(), c, 1, c->GetComponentSizeShift(), Runtime::Current()->GetHeap()->GetCurrentAllocator())); EXPECT_TRUE(c == a->GetClass()); EXPECT_EQ(1, a->GetLength()); @@ -185,26 +185,26 @@ TEST_F(ObjectTest, AllocArray_FillUsable) { Class* c = class_linker_->FindSystemClass(soa.Self(), "[B"); StackHandleScope<1> hs(soa.Self()); MutableHandle<Array> a( - hs.NewHandle(Array::Alloc<true>(soa.Self(), c, 1, c->GetComponentSize(), - Runtime::Current()->GetHeap()->GetCurrentAllocator(), true))); + hs.NewHandle(Array::Alloc<true, true>(soa.Self(), c, 1, c->GetComponentSizeShift(), + Runtime::Current()->GetHeap()->GetCurrentAllocator()))); EXPECT_TRUE(c == a->GetClass()); EXPECT_LE(1, a->GetLength()); c = class_linker_->FindSystemClass(soa.Self(), "[I"); - a.Assign(Array::Alloc<true>(soa.Self(), c, 2, c->GetComponentSize(), - Runtime::Current()->GetHeap()->GetCurrentAllocator(), true)); + a.Assign(Array::Alloc<true, true>(soa.Self(), c, 2, c->GetComponentSizeShift(), + Runtime::Current()->GetHeap()->GetCurrentAllocator())); EXPECT_TRUE(c == a->GetClass()); EXPECT_LE(2, a->GetLength()); c = class_linker_->FindSystemClass(soa.Self(), "[Ljava/lang/Object;"); - a.Assign(Array::Alloc<true>(soa.Self(), c, 2, c->GetComponentSize(), - Runtime::Current()->GetHeap()->GetCurrentAllocator(), true)); + a.Assign(Array::Alloc<true, true>(soa.Self(), c, 2, c->GetComponentSizeShift(), + Runtime::Current()->GetHeap()->GetCurrentAllocator())); EXPECT_TRUE(c == a->GetClass()); EXPECT_LE(2, a->GetLength()); c = class_linker_->FindSystemClass(soa.Self(), "[[Ljava/lang/Object;"); - a.Assign(Array::Alloc<true>(soa.Self(), c, 2, c->GetComponentSize(), - Runtime::Current()->GetHeap()->GetCurrentAllocator(), true)); + a.Assign(Array::Alloc<true, true>(soa.Self(), c, 2, c->GetComponentSizeShift(), + Runtime::Current()->GetHeap()->GetCurrentAllocator())); EXPECT_TRUE(c == a->GetClass()); EXPECT_LE(2, a->GetLength()); } |