summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNicolas Geoffray <ngeoffray@google.com>2015-02-04 09:45:14 +0000
committerGerrit Code Review <noreply-gerritcodereview@google.com>2015-02-04 09:45:14 +0000
commitf4b24f7ea507ff41db39154a1e6a681a582e224a (patch)
treeed70e8d35e91dd93d9dd5130ab88d6453dd4b6f3
parent6bc17805d2df2678de186e3b4c3e2f959a48555f (diff)
parent3e6a3bf797e49b7f449256455c7e522e888687d8 (diff)
downloadart-f4b24f7ea507ff41db39154a1e6a681a582e224a.zip
art-f4b24f7ea507ff41db39154a1e6a681a582e224a.tar.gz
art-f4b24f7ea507ff41db39154a1e6a681a582e224a.tar.bz2
Merge "ART: Change x86 long param ABI (Quick/JNI/Opt)"
-rwxr-xr-xcompiler/dex/quick/x86/target_x86.cc16
-rw-r--r--compiler/jni/quick/calling_convention.h2
-rw-r--r--compiler/jni/quick/x86/calling_convention_x86.cc43
-rw-r--r--compiler/optimizing/code_generator_x86.cc42
-rw-r--r--compiler/optimizing/locations.h28
-rw-r--r--runtime/arch/x86/quick_entrypoints_x86.S9
-rw-r--r--runtime/entrypoints/quick/quick_trampoline_entrypoints.cc10
-rw-r--r--runtime/oat.h2
8 files changed, 60 insertions, 92 deletions
diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc
index 43ebf55..c4adb09 100755
--- a/compiler/dex/quick/x86/target_x86.cc
+++ b/compiler/dex/quick/x86/target_x86.cc
@@ -2469,11 +2469,17 @@ RegStorage X86Mir2Lir::InToRegStorageX86Mapper::GetNextReg(ShortyArg arg) {
return m2l_->TargetReg(fpArgMappingToPhysicalReg[cur_fp_reg_++],
arg.IsWide() ? kWide : kNotWide);
}
- } else {
- if (cur_core_reg_ < coreArgMappingToPhysicalRegSize) {
- result = m2l_->TargetReg(coreArgMappingToPhysicalReg[cur_core_reg_++],
- arg.IsRef() ? kRef : kNotWide);
- if (arg.IsWide() && cur_core_reg_ < coreArgMappingToPhysicalRegSize) {
+ } else if (cur_core_reg_ < coreArgMappingToPhysicalRegSize) {
+ result = m2l_->TargetReg(coreArgMappingToPhysicalReg[cur_core_reg_++],
+ arg.IsRef() ? kRef : kNotWide);
+ if (arg.IsWide()) {
+ // This must be a long, as double is handled above.
+ // Ensure that we don't split a long across the last register and the stack.
+ if (cur_core_reg_ == coreArgMappingToPhysicalRegSize) {
+ // Leave the last core register unused and force the whole long to the stack.
+ cur_core_reg_++;
+ result = RegStorage::InvalidReg();
+ } else if (cur_core_reg_ < coreArgMappingToPhysicalRegSize) {
result = RegStorage::MakeRegPair(
result, m2l_->TargetReg(coreArgMappingToPhysicalReg[cur_core_reg_++], kNotWide));
}
diff --git a/compiler/jni/quick/calling_convention.h b/compiler/jni/quick/calling_convention.h
index 6db0c3b..0c64a36 100644
--- a/compiler/jni/quick/calling_convention.h
+++ b/compiler/jni/quick/calling_convention.h
@@ -141,7 +141,7 @@ class CallingConvention {
if (IsStatic()) {
param++; // 0th argument must skip return value at start of the shorty
} else if (param == 0) {
- return true; // this argument
+ return false; // this argument
}
return shorty_[param] == 'J';
}
diff --git a/compiler/jni/quick/x86/calling_convention_x86.cc b/compiler/jni/quick/x86/calling_convention_x86.cc
index fc72e88..8a45f0c 100644
--- a/compiler/jni/quick/x86/calling_convention_x86.cc
+++ b/compiler/jni/quick/x86/calling_convention_x86.cc
@@ -85,9 +85,19 @@ ManagedRegister X86ManagedRuntimeCallingConvention::CurrentParamRegister() {
ManagedRegister res = ManagedRegister::NoRegister();
if (!IsCurrentParamAFloatOrDouble()) {
switch (gpr_arg_count_) {
- case 0: res = X86ManagedRegister::FromCpuRegister(ECX); break;
- case 1: res = X86ManagedRegister::FromCpuRegister(EDX); break;
- case 2: res = X86ManagedRegister::FromCpuRegister(EBX); break;
+ case 0:
+ res = X86ManagedRegister::FromCpuRegister(ECX);
+ break;
+ case 1:
+ res = X86ManagedRegister::FromCpuRegister(EDX);
+ break;
+ case 2:
+ // Don't split a long between the last register and the stack.
+ if (IsCurrentParamALong()) {
+ return ManagedRegister::NoRegister();
+ }
+ res = X86ManagedRegister::FromCpuRegister(EBX);
+ break;
}
} else if (itr_float_and_doubles_ < 4) {
// First four float parameters are passed via XMM0..XMM3
@@ -120,27 +130,34 @@ const ManagedRegisterEntrySpills& X86ManagedRuntimeCallingConvention::EntrySpill
ResetIterator(FrameOffset(0));
while (HasNext()) {
ManagedRegister in_reg = CurrentParamRegister();
+ bool is_long = IsCurrentParamALong();
if (!in_reg.IsNoRegister()) {
int32_t size = IsParamADouble(itr_args_) ? 8 : 4;
int32_t spill_offset = CurrentParamStackOffset().Uint32Value();
ManagedRegisterSpill spill(in_reg, size, spill_offset);
entry_spills_.push_back(spill);
- if (IsCurrentParamALong() && !IsCurrentParamAReference()) { // Long.
- // special case, as we may need a second register here.
+ if (is_long) {
+ // special case, as we need a second register here.
in_reg = CurrentParamHighLongRegister();
- if (!in_reg.IsNoRegister()) {
- // We have to spill the second half of the long.
- ManagedRegisterSpill spill2(in_reg, size, spill_offset + 4);
- entry_spills_.push_back(spill2);
- // Long was allocated in 2 registers.
- gpr_arg_count_++;
- }
+ DCHECK(!in_reg.IsNoRegister());
+ // We have to spill the second half of the long.
+ ManagedRegisterSpill spill2(in_reg, size, spill_offset + 4);
+ entry_spills_.push_back(spill2);
}
// Keep track of the number of GPRs allocated.
if (!IsCurrentParamAFloatOrDouble()) {
- gpr_arg_count_++;
+ if (is_long) {
+ // Long was allocated in 2 registers.
+ gpr_arg_count_ += 2;
+ } else {
+ gpr_arg_count_++;
+ }
}
+ } else if (is_long) {
+ // We need to skip the unused last register, which is empty.
+ // If we are already out of registers, this is harmless.
+ gpr_arg_count_ += 2;
}
Next();
}
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index c840793..98f93a4 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -535,9 +535,6 @@ Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type
X86ManagedRegister pair = X86ManagedRegister::FromRegisterPair(
calling_convention.GetRegisterPairAt(index));
return Location::RegisterPairLocation(pair.AsRegisterPairLow(), pair.AsRegisterPairHigh());
- } else if (index + 1 == calling_convention.GetNumberOfRegisters()) {
- // stack_index_ is the right offset for the memory.
- return Location::QuickParameter(index, stack_index_ - 2);
} else {
return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 2));
}
@@ -629,16 +626,6 @@ void CodeGeneratorX86::Move64(Location destination, Location source) {
Location::RegisterLocation(destination.AsRegisterPairLow<Register>()));
} else if (source.IsFpuRegister()) {
LOG(FATAL) << "Unimplemented";
- } else if (source.IsQuickParameter()) {
- uint16_t register_index = source.GetQuickParameterRegisterIndex();
- uint16_t stack_index = source.GetQuickParameterStackIndex();
- InvokeDexCallingConvention calling_convention;
- EmitParallelMoves(
- Location::RegisterLocation(calling_convention.GetRegisterAt(register_index)),
- Location::RegisterLocation(destination.AsRegisterPairLow<Register>()),
- Location::StackSlot(
- calling_convention.GetStackOffsetOf(stack_index + 1) + GetFrameSize()),
- Location::RegisterLocation(destination.AsRegisterPairHigh<Register>()));
} else {
// No conflict possible, so just do the moves.
DCHECK(source.IsDoubleStackSlot());
@@ -646,23 +633,6 @@ void CodeGeneratorX86::Move64(Location destination, Location source) {
__ movl(destination.AsRegisterPairHigh<Register>(),
Address(ESP, source.GetHighStackIndex(kX86WordSize)));
}
- } else if (destination.IsQuickParameter()) {
- InvokeDexCallingConvention calling_convention;
- uint16_t register_index = destination.GetQuickParameterRegisterIndex();
- uint16_t stack_index = destination.GetQuickParameterStackIndex();
- if (source.IsRegisterPair()) {
- LOG(FATAL) << "Unimplemented";
- } else if (source.IsFpuRegister()) {
- LOG(FATAL) << "Unimplemented";
- } else {
- DCHECK(source.IsDoubleStackSlot());
- EmitParallelMoves(
- Location::StackSlot(source.GetStackIndex()),
- Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index)),
- Location::StackSlot(source.GetHighStackIndex(kX86WordSize)),
- Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index + 1)));
- __ movl(calling_convention.GetRegisterAt(register_index), Address(ESP, source.GetStackIndex()));
- }
} else if (destination.IsFpuRegister()) {
if (source.IsFpuRegister()) {
__ movaps(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>());
@@ -678,18 +648,6 @@ void CodeGeneratorX86::Move64(Location destination, Location source) {
__ movl(Address(ESP, destination.GetStackIndex()), source.AsRegisterPairLow<Register>());
__ movl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)),
source.AsRegisterPairHigh<Register>());
- } else if (source.IsQuickParameter()) {
- // No conflict possible, so just do the move.
- InvokeDexCallingConvention calling_convention;
- uint16_t register_index = source.GetQuickParameterRegisterIndex();
- uint16_t stack_index = source.GetQuickParameterStackIndex();
- // Just move the low part. The only time a source is a quick parameter is
- // when moving the parameter to its stack locations. And the (Java) caller
- // of this method has already done that.
- __ movl(Address(ESP, destination.GetStackIndex()),
- calling_convention.GetRegisterAt(register_index));
- DCHECK_EQ(calling_convention.GetStackOffsetOf(stack_index + 1) + GetFrameSize(),
- static_cast<size_t>(destination.GetHighStackIndex(kX86WordSize)));
} else if (source.IsFpuRegister()) {
__ movsd(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>());
} else {
diff --git a/compiler/optimizing/locations.h b/compiler/optimizing/locations.h
index 68e3a30..bf27c5c 100644
--- a/compiler/optimizing/locations.h
+++ b/compiler/optimizing/locations.h
@@ -62,17 +62,11 @@ class Location : public ValueObject {
// We do not use the value 9 because it conflicts with kLocationConstantMask.
kDoNotUse9 = 9,
- // On 32bits architectures, quick can pass a long where the
- // low bits are in the last parameter register, and the high
- // bits are in a stack slot. The kQuickParameter kind is for
- // handling this special case.
- kQuickParameter = 10,
-
// Unallocated location represents a location that is not fixed and can be
// allocated by a register allocator. Each unallocated location has
// a policy that specifies what kind of location is suitable. Payload
// contains register allocation policy.
- kUnallocated = 11,
+ kUnallocated = 10,
};
Location() : value_(kInvalid) {
@@ -82,7 +76,6 @@ class Location : public ValueObject {
static_assert((kStackSlot & kLocationConstantMask) != kConstant, "TagError");
static_assert((kDoubleStackSlot & kLocationConstantMask) != kConstant, "TagError");
static_assert((kRegister & kLocationConstantMask) != kConstant, "TagError");
- static_assert((kQuickParameter & kLocationConstantMask) != kConstant, "TagError");
static_assert((kFpuRegister & kLocationConstantMask) != kConstant, "TagError");
static_assert((kRegisterPair & kLocationConstantMask) != kConstant, "TagError");
static_assert((kFpuRegisterPair & kLocationConstantMask) != kConstant, "TagError");
@@ -267,24 +260,6 @@ class Location : public ValueObject {
return GetPayload() - kStackIndexBias + word_size;
}
- static Location QuickParameter(uint16_t register_index, uint16_t stack_index) {
- return Location(kQuickParameter, register_index << 16 | stack_index);
- }
-
- uint32_t GetQuickParameterRegisterIndex() const {
- DCHECK(IsQuickParameter());
- return GetPayload() >> 16;
- }
-
- uint32_t GetQuickParameterStackIndex() const {
- DCHECK(IsQuickParameter());
- return GetPayload() & 0xFFFF;
- }
-
- bool IsQuickParameter() const {
- return GetKind() == kQuickParameter;
- }
-
Kind GetKind() const {
return IsConstant() ? kConstant : KindField::Decode(value_);
}
@@ -299,7 +274,6 @@ class Location : public ValueObject {
case kRegister: return "R";
case kStackSlot: return "S";
case kDoubleStackSlot: return "DS";
- case kQuickParameter: return "Q";
case kUnallocated: return "U";
case kConstant: return "C";
case kFpuRegister: return "F";
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index fd3a1cf..beacd49 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -467,7 +467,8 @@ DEFINE_FUNCTION art_quick_invoke_stub
// Now check ebx
SKIP_OVER_FLOATS esi, edi, al, .Lgpr_setup_finished
- // Must be first word of a long, or an integer.
+ // Must be first word of a long, or an integer. First word of long doesn't
+ // go into EBX, but can be loaded there anyways, as it is harmless.
movl (%edi), %ebx
jmp .Lgpr_setup_finished
.LfirstLong:
@@ -569,7 +570,8 @@ DEFINE_FUNCTION art_quick_invoke_static_stub
// Is there anything for ebx?
SKIP_OVER_FLOATS esi, edi, al, .Lgpr_setup_finished2
- // First word of long or integer. Load into EBX.
+ // Must be first word of a long, or an integer. First word of long doesn't
+ // go into EBX, but can be loaded there anyways, as it is harmless.
movl (%edi), %ebx
jmp .Lgpr_setup_finished2
.LSecondLong2:
@@ -585,7 +587,8 @@ DEFINE_FUNCTION art_quick_invoke_static_stub
// Anything for EBX?
SKIP_OVER_FLOATS esi, edi, al, .Lgpr_setup_finished2
- // First word of long or integer. Load into EBX.
+ // Must be first word of a long, or an integer. First word of long doesn't
+ // go into EBX, but can be loaded there anyways, as it is harmless.
movl (%edi), %ebx
jmp .Lgpr_setup_finished2
// Nothing left to load.
diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
index a67ebca..98f1684 100644
--- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
@@ -59,6 +59,7 @@ class QuickArgumentVisitor {
// | S0 |
// | | 4x2 bytes padding
// | Method* | <- sp
+ static constexpr bool kSplitPairAcrossRegisterAndStack = kArm32QuickCodeUseSoftFloat;
static constexpr bool kAlignPairRegister = !kArm32QuickCodeUseSoftFloat;
static constexpr bool kQuickSoftFloatAbi = kArm32QuickCodeUseSoftFloat;
static constexpr bool kQuickDoubleRegAlignedFloatBackFilled = !kArm32QuickCodeUseSoftFloat;
@@ -95,6 +96,7 @@ class QuickArgumentVisitor {
// | D0 |
// | | padding
// | Method* | <- sp
+ static constexpr bool kSplitPairAcrossRegisterAndStack = false;
static constexpr bool kAlignPairRegister = false;
static constexpr bool kQuickSoftFloatAbi = false; // This is a hard float ABI.
static constexpr bool kQuickDoubleRegAlignedFloatBackFilled = false;
@@ -125,6 +127,7 @@ class QuickArgumentVisitor {
// | A2 | arg2
// | A1 | arg1
// | A0/Method* | <- sp
+ static constexpr bool kSplitPairAcrossRegisterAndStack = true;
static constexpr bool kAlignPairRegister = false;
static constexpr bool kQuickSoftFloatAbi = true; // This is a soft float ABI.
static constexpr bool kQuickDoubleRegAlignedFloatBackFilled = false;
@@ -203,6 +206,7 @@ class QuickArgumentVisitor {
// | XMM1 | float arg 2
// | XMM0 | float arg 1
// | EAX/Method* | <- sp
+ static constexpr bool kSplitPairAcrossRegisterAndStack = false;
static constexpr bool kAlignPairRegister = false;
static constexpr bool kQuickSoftFloatAbi = false; // This is a hard float ABI.
static constexpr bool kQuickDoubleRegAlignedFloatBackFilled = false;
@@ -243,6 +247,7 @@ class QuickArgumentVisitor {
// | XMM0 | float arg 1
// | Padding |
// | RDI/Method* | <- sp
+ static constexpr bool kSplitPairAcrossRegisterAndStack = false;
static constexpr bool kAlignPairRegister = false;
static constexpr bool kQuickSoftFloatAbi = false; // This is a hard float ABI.
static constexpr bool kQuickDoubleRegAlignedFloatBackFilled = false;
@@ -452,6 +457,11 @@ class QuickArgumentVisitor {
}
is_split_long_or_double_ = (GetBytesPerGprSpillLocation(kRuntimeISA) == 4) &&
((gpr_index_ + 1) == kNumQuickGprArgs);
+ if (!kSplitPairAcrossRegisterAndStack && is_split_long_or_double_) {
+ // We don't want to split this. Pass over this register.
+ gpr_index_++;
+ is_split_long_or_double_ = false;
+ }
Visit();
if (kBytesStackArgLocation == 4) {
stack_index_+= 2;
diff --git a/runtime/oat.h b/runtime/oat.h
index 3e28606..7faf33b 100644
--- a/runtime/oat.h
+++ b/runtime/oat.h
@@ -32,7 +32,7 @@ class InstructionSetFeatures;
class PACKED(4) OatHeader {
public:
static constexpr uint8_t kOatMagic[] = { 'o', 'a', 't', '\n' };
- static constexpr uint8_t kOatVersion[] = { '0', '5', '4', '\0' };
+ static constexpr uint8_t kOatVersion[] = { '0', '5', '5', '\0' };
static constexpr const char* kImageLocationKey = "image-location";
static constexpr const char* kDex2OatCmdLineKey = "dex2oat-cmdline";