diff options
author | Nicolas Geoffray <ngeoffray@google.com> | 2015-02-04 09:45:14 +0000 |
---|---|---|
committer | Gerrit Code Review <noreply-gerritcodereview@google.com> | 2015-02-04 09:45:14 +0000 |
commit | f4b24f7ea507ff41db39154a1e6a681a582e224a (patch) | |
tree | ed70e8d35e91dd93d9dd5130ab88d6453dd4b6f3 | |
parent | 6bc17805d2df2678de186e3b4c3e2f959a48555f (diff) | |
parent | 3e6a3bf797e49b7f449256455c7e522e888687d8 (diff) | |
download | art-f4b24f7ea507ff41db39154a1e6a681a582e224a.zip art-f4b24f7ea507ff41db39154a1e6a681a582e224a.tar.gz art-f4b24f7ea507ff41db39154a1e6a681a582e224a.tar.bz2 |
Merge "ART: Change x86 long param ABI (Quick/JNI/Opt)"
-rwxr-xr-x | compiler/dex/quick/x86/target_x86.cc | 16 | ||||
-rw-r--r-- | compiler/jni/quick/calling_convention.h | 2 | ||||
-rw-r--r-- | compiler/jni/quick/x86/calling_convention_x86.cc | 43 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_x86.cc | 42 | ||||
-rw-r--r-- | compiler/optimizing/locations.h | 28 | ||||
-rw-r--r-- | runtime/arch/x86/quick_entrypoints_x86.S | 9 | ||||
-rw-r--r-- | runtime/entrypoints/quick/quick_trampoline_entrypoints.cc | 10 | ||||
-rw-r--r-- | runtime/oat.h | 2 |
8 files changed, 60 insertions, 92 deletions
diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc index 43ebf55..c4adb09 100755 --- a/compiler/dex/quick/x86/target_x86.cc +++ b/compiler/dex/quick/x86/target_x86.cc @@ -2469,11 +2469,17 @@ RegStorage X86Mir2Lir::InToRegStorageX86Mapper::GetNextReg(ShortyArg arg) { return m2l_->TargetReg(fpArgMappingToPhysicalReg[cur_fp_reg_++], arg.IsWide() ? kWide : kNotWide); } - } else { - if (cur_core_reg_ < coreArgMappingToPhysicalRegSize) { - result = m2l_->TargetReg(coreArgMappingToPhysicalReg[cur_core_reg_++], - arg.IsRef() ? kRef : kNotWide); - if (arg.IsWide() && cur_core_reg_ < coreArgMappingToPhysicalRegSize) { + } else if (cur_core_reg_ < coreArgMappingToPhysicalRegSize) { + result = m2l_->TargetReg(coreArgMappingToPhysicalReg[cur_core_reg_++], + arg.IsRef() ? kRef : kNotWide); + if (arg.IsWide()) { + // This must be a long, as double is handled above. + // Ensure that we don't split a long across the last register and the stack. + if (cur_core_reg_ == coreArgMappingToPhysicalRegSize) { + // Leave the last core register unused and force the whole long to the stack. + cur_core_reg_++; + result = RegStorage::InvalidReg(); + } else if (cur_core_reg_ < coreArgMappingToPhysicalRegSize) { result = RegStorage::MakeRegPair( result, m2l_->TargetReg(coreArgMappingToPhysicalReg[cur_core_reg_++], kNotWide)); } diff --git a/compiler/jni/quick/calling_convention.h b/compiler/jni/quick/calling_convention.h index 6db0c3b..0c64a36 100644 --- a/compiler/jni/quick/calling_convention.h +++ b/compiler/jni/quick/calling_convention.h @@ -141,7 +141,7 @@ class CallingConvention { if (IsStatic()) { param++; // 0th argument must skip return value at start of the shorty } else if (param == 0) { - return true; // this argument + return false; // this argument } return shorty_[param] == 'J'; } diff --git a/compiler/jni/quick/x86/calling_convention_x86.cc b/compiler/jni/quick/x86/calling_convention_x86.cc index fc72e88..8a45f0c 100644 --- a/compiler/jni/quick/x86/calling_convention_x86.cc +++ b/compiler/jni/quick/x86/calling_convention_x86.cc @@ -85,9 +85,19 @@ ManagedRegister X86ManagedRuntimeCallingConvention::CurrentParamRegister() { ManagedRegister res = ManagedRegister::NoRegister(); if (!IsCurrentParamAFloatOrDouble()) { switch (gpr_arg_count_) { - case 0: res = X86ManagedRegister::FromCpuRegister(ECX); break; - case 1: res = X86ManagedRegister::FromCpuRegister(EDX); break; - case 2: res = X86ManagedRegister::FromCpuRegister(EBX); break; + case 0: + res = X86ManagedRegister::FromCpuRegister(ECX); + break; + case 1: + res = X86ManagedRegister::FromCpuRegister(EDX); + break; + case 2: + // Don't split a long between the last register and the stack. + if (IsCurrentParamALong()) { + return ManagedRegister::NoRegister(); + } + res = X86ManagedRegister::FromCpuRegister(EBX); + break; } } else if (itr_float_and_doubles_ < 4) { // First four float parameters are passed via XMM0..XMM3 @@ -120,27 +130,34 @@ const ManagedRegisterEntrySpills& X86ManagedRuntimeCallingConvention::EntrySpill ResetIterator(FrameOffset(0)); while (HasNext()) { ManagedRegister in_reg = CurrentParamRegister(); + bool is_long = IsCurrentParamALong(); if (!in_reg.IsNoRegister()) { int32_t size = IsParamADouble(itr_args_) ? 8 : 4; int32_t spill_offset = CurrentParamStackOffset().Uint32Value(); ManagedRegisterSpill spill(in_reg, size, spill_offset); entry_spills_.push_back(spill); - if (IsCurrentParamALong() && !IsCurrentParamAReference()) { // Long. - // special case, as we may need a second register here. + if (is_long) { + // special case, as we need a second register here. in_reg = CurrentParamHighLongRegister(); - if (!in_reg.IsNoRegister()) { - // We have to spill the second half of the long. - ManagedRegisterSpill spill2(in_reg, size, spill_offset + 4); - entry_spills_.push_back(spill2); - // Long was allocated in 2 registers. - gpr_arg_count_++; - } + DCHECK(!in_reg.IsNoRegister()); + // We have to spill the second half of the long. + ManagedRegisterSpill spill2(in_reg, size, spill_offset + 4); + entry_spills_.push_back(spill2); } // Keep track of the number of GPRs allocated. if (!IsCurrentParamAFloatOrDouble()) { - gpr_arg_count_++; + if (is_long) { + // Long was allocated in 2 registers. + gpr_arg_count_ += 2; + } else { + gpr_arg_count_++; + } } + } else if (is_long) { + // We need to skip the unused last register, which is empty. + // If we are already out of registers, this is harmless. + gpr_arg_count_ += 2; } Next(); } diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index c840793..98f93a4 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -535,9 +535,6 @@ Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type X86ManagedRegister pair = X86ManagedRegister::FromRegisterPair( calling_convention.GetRegisterPairAt(index)); return Location::RegisterPairLocation(pair.AsRegisterPairLow(), pair.AsRegisterPairHigh()); - } else if (index + 1 == calling_convention.GetNumberOfRegisters()) { - // stack_index_ is the right offset for the memory. - return Location::QuickParameter(index, stack_index_ - 2); } else { return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 2)); } @@ -629,16 +626,6 @@ void CodeGeneratorX86::Move64(Location destination, Location source) { Location::RegisterLocation(destination.AsRegisterPairLow<Register>())); } else if (source.IsFpuRegister()) { LOG(FATAL) << "Unimplemented"; - } else if (source.IsQuickParameter()) { - uint16_t register_index = source.GetQuickParameterRegisterIndex(); - uint16_t stack_index = source.GetQuickParameterStackIndex(); - InvokeDexCallingConvention calling_convention; - EmitParallelMoves( - Location::RegisterLocation(calling_convention.GetRegisterAt(register_index)), - Location::RegisterLocation(destination.AsRegisterPairLow<Register>()), - Location::StackSlot( - calling_convention.GetStackOffsetOf(stack_index + 1) + GetFrameSize()), - Location::RegisterLocation(destination.AsRegisterPairHigh<Register>())); } else { // No conflict possible, so just do the moves. DCHECK(source.IsDoubleStackSlot()); @@ -646,23 +633,6 @@ void CodeGeneratorX86::Move64(Location destination, Location source) { __ movl(destination.AsRegisterPairHigh<Register>(), Address(ESP, source.GetHighStackIndex(kX86WordSize))); } - } else if (destination.IsQuickParameter()) { - InvokeDexCallingConvention calling_convention; - uint16_t register_index = destination.GetQuickParameterRegisterIndex(); - uint16_t stack_index = destination.GetQuickParameterStackIndex(); - if (source.IsRegisterPair()) { - LOG(FATAL) << "Unimplemented"; - } else if (source.IsFpuRegister()) { - LOG(FATAL) << "Unimplemented"; - } else { - DCHECK(source.IsDoubleStackSlot()); - EmitParallelMoves( - Location::StackSlot(source.GetStackIndex()), - Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index)), - Location::StackSlot(source.GetHighStackIndex(kX86WordSize)), - Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index + 1))); - __ movl(calling_convention.GetRegisterAt(register_index), Address(ESP, source.GetStackIndex())); - } } else if (destination.IsFpuRegister()) { if (source.IsFpuRegister()) { __ movaps(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>()); @@ -678,18 +648,6 @@ void CodeGeneratorX86::Move64(Location destination, Location source) { __ movl(Address(ESP, destination.GetStackIndex()), source.AsRegisterPairLow<Register>()); __ movl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)), source.AsRegisterPairHigh<Register>()); - } else if (source.IsQuickParameter()) { - // No conflict possible, so just do the move. - InvokeDexCallingConvention calling_convention; - uint16_t register_index = source.GetQuickParameterRegisterIndex(); - uint16_t stack_index = source.GetQuickParameterStackIndex(); - // Just move the low part. The only time a source is a quick parameter is - // when moving the parameter to its stack locations. And the (Java) caller - // of this method has already done that. - __ movl(Address(ESP, destination.GetStackIndex()), - calling_convention.GetRegisterAt(register_index)); - DCHECK_EQ(calling_convention.GetStackOffsetOf(stack_index + 1) + GetFrameSize(), - static_cast<size_t>(destination.GetHighStackIndex(kX86WordSize))); } else if (source.IsFpuRegister()) { __ movsd(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>()); } else { diff --git a/compiler/optimizing/locations.h b/compiler/optimizing/locations.h index 68e3a30..bf27c5c 100644 --- a/compiler/optimizing/locations.h +++ b/compiler/optimizing/locations.h @@ -62,17 +62,11 @@ class Location : public ValueObject { // We do not use the value 9 because it conflicts with kLocationConstantMask. kDoNotUse9 = 9, - // On 32bits architectures, quick can pass a long where the - // low bits are in the last parameter register, and the high - // bits are in a stack slot. The kQuickParameter kind is for - // handling this special case. - kQuickParameter = 10, - // Unallocated location represents a location that is not fixed and can be // allocated by a register allocator. Each unallocated location has // a policy that specifies what kind of location is suitable. Payload // contains register allocation policy. - kUnallocated = 11, + kUnallocated = 10, }; Location() : value_(kInvalid) { @@ -82,7 +76,6 @@ class Location : public ValueObject { static_assert((kStackSlot & kLocationConstantMask) != kConstant, "TagError"); static_assert((kDoubleStackSlot & kLocationConstantMask) != kConstant, "TagError"); static_assert((kRegister & kLocationConstantMask) != kConstant, "TagError"); - static_assert((kQuickParameter & kLocationConstantMask) != kConstant, "TagError"); static_assert((kFpuRegister & kLocationConstantMask) != kConstant, "TagError"); static_assert((kRegisterPair & kLocationConstantMask) != kConstant, "TagError"); static_assert((kFpuRegisterPair & kLocationConstantMask) != kConstant, "TagError"); @@ -267,24 +260,6 @@ class Location : public ValueObject { return GetPayload() - kStackIndexBias + word_size; } - static Location QuickParameter(uint16_t register_index, uint16_t stack_index) { - return Location(kQuickParameter, register_index << 16 | stack_index); - } - - uint32_t GetQuickParameterRegisterIndex() const { - DCHECK(IsQuickParameter()); - return GetPayload() >> 16; - } - - uint32_t GetQuickParameterStackIndex() const { - DCHECK(IsQuickParameter()); - return GetPayload() & 0xFFFF; - } - - bool IsQuickParameter() const { - return GetKind() == kQuickParameter; - } - Kind GetKind() const { return IsConstant() ? kConstant : KindField::Decode(value_); } @@ -299,7 +274,6 @@ class Location : public ValueObject { case kRegister: return "R"; case kStackSlot: return "S"; case kDoubleStackSlot: return "DS"; - case kQuickParameter: return "Q"; case kUnallocated: return "U"; case kConstant: return "C"; case kFpuRegister: return "F"; diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S index fd3a1cf..beacd49 100644 --- a/runtime/arch/x86/quick_entrypoints_x86.S +++ b/runtime/arch/x86/quick_entrypoints_x86.S @@ -467,7 +467,8 @@ DEFINE_FUNCTION art_quick_invoke_stub // Now check ebx SKIP_OVER_FLOATS esi, edi, al, .Lgpr_setup_finished - // Must be first word of a long, or an integer. + // Must be first word of a long, or an integer. First word of long doesn't + // go into EBX, but can be loaded there anyways, as it is harmless. movl (%edi), %ebx jmp .Lgpr_setup_finished .LfirstLong: @@ -569,7 +570,8 @@ DEFINE_FUNCTION art_quick_invoke_static_stub // Is there anything for ebx? SKIP_OVER_FLOATS esi, edi, al, .Lgpr_setup_finished2 - // First word of long or integer. Load into EBX. + // Must be first word of a long, or an integer. First word of long doesn't + // go into EBX, but can be loaded there anyways, as it is harmless. movl (%edi), %ebx jmp .Lgpr_setup_finished2 .LSecondLong2: @@ -585,7 +587,8 @@ DEFINE_FUNCTION art_quick_invoke_static_stub // Anything for EBX? SKIP_OVER_FLOATS esi, edi, al, .Lgpr_setup_finished2 - // First word of long or integer. Load into EBX. + // Must be first word of a long, or an integer. First word of long doesn't + // go into EBX, but can be loaded there anyways, as it is harmless. movl (%edi), %ebx jmp .Lgpr_setup_finished2 // Nothing left to load. diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc index a67ebca..98f1684 100644 --- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc +++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc @@ -59,6 +59,7 @@ class QuickArgumentVisitor { // | S0 | // | | 4x2 bytes padding // | Method* | <- sp + static constexpr bool kSplitPairAcrossRegisterAndStack = kArm32QuickCodeUseSoftFloat; static constexpr bool kAlignPairRegister = !kArm32QuickCodeUseSoftFloat; static constexpr bool kQuickSoftFloatAbi = kArm32QuickCodeUseSoftFloat; static constexpr bool kQuickDoubleRegAlignedFloatBackFilled = !kArm32QuickCodeUseSoftFloat; @@ -95,6 +96,7 @@ class QuickArgumentVisitor { // | D0 | // | | padding // | Method* | <- sp + static constexpr bool kSplitPairAcrossRegisterAndStack = false; static constexpr bool kAlignPairRegister = false; static constexpr bool kQuickSoftFloatAbi = false; // This is a hard float ABI. static constexpr bool kQuickDoubleRegAlignedFloatBackFilled = false; @@ -125,6 +127,7 @@ class QuickArgumentVisitor { // | A2 | arg2 // | A1 | arg1 // | A0/Method* | <- sp + static constexpr bool kSplitPairAcrossRegisterAndStack = true; static constexpr bool kAlignPairRegister = false; static constexpr bool kQuickSoftFloatAbi = true; // This is a soft float ABI. static constexpr bool kQuickDoubleRegAlignedFloatBackFilled = false; @@ -203,6 +206,7 @@ class QuickArgumentVisitor { // | XMM1 | float arg 2 // | XMM0 | float arg 1 // | EAX/Method* | <- sp + static constexpr bool kSplitPairAcrossRegisterAndStack = false; static constexpr bool kAlignPairRegister = false; static constexpr bool kQuickSoftFloatAbi = false; // This is a hard float ABI. static constexpr bool kQuickDoubleRegAlignedFloatBackFilled = false; @@ -243,6 +247,7 @@ class QuickArgumentVisitor { // | XMM0 | float arg 1 // | Padding | // | RDI/Method* | <- sp + static constexpr bool kSplitPairAcrossRegisterAndStack = false; static constexpr bool kAlignPairRegister = false; static constexpr bool kQuickSoftFloatAbi = false; // This is a hard float ABI. static constexpr bool kQuickDoubleRegAlignedFloatBackFilled = false; @@ -452,6 +457,11 @@ class QuickArgumentVisitor { } is_split_long_or_double_ = (GetBytesPerGprSpillLocation(kRuntimeISA) == 4) && ((gpr_index_ + 1) == kNumQuickGprArgs); + if (!kSplitPairAcrossRegisterAndStack && is_split_long_or_double_) { + // We don't want to split this. Pass over this register. + gpr_index_++; + is_split_long_or_double_ = false; + } Visit(); if (kBytesStackArgLocation == 4) { stack_index_+= 2; diff --git a/runtime/oat.h b/runtime/oat.h index 3e28606..7faf33b 100644 --- a/runtime/oat.h +++ b/runtime/oat.h @@ -32,7 +32,7 @@ class InstructionSetFeatures; class PACKED(4) OatHeader { public: static constexpr uint8_t kOatMagic[] = { 'o', 'a', 't', '\n' }; - static constexpr uint8_t kOatVersion[] = { '0', '5', '4', '\0' }; + static constexpr uint8_t kOatVersion[] = { '0', '5', '5', '\0' }; static constexpr const char* kImageLocationKey = "image-location"; static constexpr const char* kDex2OatCmdLineKey = "dex2oat-cmdline"; |