summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRoland Levillain <rpl@google.com>2015-04-21 16:09:29 +0000
committerGerrit Code Review <noreply-gerritcodereview@google.com>2015-04-21 16:09:30 +0000
commit4bb014fd8e0aa45b012d56bc4813f18fa295d2b0 (patch)
treeba5fa5beef6e2c4a91ec5bbe7b0ed81b168b4958
parentd677de20906067061f262bdd434536a02e7f0dd0 (diff)
parent232ade0b9401404ad4b61b1003551b58b96195a8 (diff)
downloadart-4bb014fd8e0aa45b012d56bc4813f18fa295d2b0.zip
art-4bb014fd8e0aa45b012d56bc4813f18fa295d2b0.tar.gz
art-4bb014fd8e0aa45b012d56bc4813f18fa295d2b0.tar.bz2
Merge "Revert "Revert "Optimizing: Fix long-to-fp conversion on x86."""
-rw-r--r--compiler/optimizing/code_generator_x86.cc158
-rw-r--r--compiler/optimizing/code_generator_x86.h4
-rw-r--r--test/477-long-to-float-conversion-precision/expected.txt0
-rw-r--r--test/477-long-to-float-conversion-precision/info.txt1
-rw-r--r--test/477-long-to-float-conversion-precision/src/Main.java41
-rw-r--r--test/Android.run-test.mk13
6 files changed, 143 insertions, 74 deletions
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 86e84ac..3dcfca6 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -1556,10 +1556,8 @@ void LocationsBuilderX86::VisitTypeConversion(HTypeConversion* conversion) {
case Primitive::kPrimLong:
// Processing a Dex `long-to-float' instruction.
- locations->SetInAt(0, Location::RequiresRegister());
- locations->SetOut(Location::RequiresFpuRegister());
- locations->AddTemp(Location::RequiresFpuRegister());
- locations->AddTemp(Location::RequiresFpuRegister());
+ locations->SetInAt(0, Location::Any());
+ locations->SetOut(Location::Any());
break;
case Primitive::kPrimDouble:
@@ -1589,10 +1587,8 @@ void LocationsBuilderX86::VisitTypeConversion(HTypeConversion* conversion) {
case Primitive::kPrimLong:
// Processing a Dex `long-to-double' instruction.
- locations->SetInAt(0, Location::RequiresRegister());
- locations->SetOut(Location::RequiresFpuRegister());
- locations->AddTemp(Location::RequiresFpuRegister());
- locations->AddTemp(Location::RequiresFpuRegister());
+ locations->SetInAt(0, Location::Any());
+ locations->SetOut(Location::Any());
break;
case Primitive::kPrimFloat:
@@ -1813,37 +1809,31 @@ void InstructionCodeGeneratorX86::VisitTypeConversion(HTypeConversion* conversio
case Primitive::kPrimLong: {
// Processing a Dex `long-to-float' instruction.
- Register low = in.AsRegisterPairLow<Register>();
- Register high = in.AsRegisterPairHigh<Register>();
- XmmRegister result = out.AsFpuRegister<XmmRegister>();
- XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
- XmmRegister constant = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
-
- // Operations use doubles for precision reasons (each 32-bit
- // half of a long fits in the 53-bit mantissa of a double,
- // but not in the 24-bit mantissa of a float). This is
- // especially important for the low bits. The result is
- // eventually converted to float.
-
- // low = low - 2^31 (to prevent bit 31 of `low` to be
- // interpreted as a sign bit)
- __ subl(low, Immediate(0x80000000));
- // temp = int-to-double(high)
- __ cvtsi2sd(temp, high);
- // temp = temp * 2^32
- __ LoadLongConstant(constant, k2Pow32EncodingForDouble);
- __ mulsd(temp, constant);
- // result = int-to-double(low)
- __ cvtsi2sd(result, low);
- // result = result + 2^31 (restore the original value of `low`)
- __ LoadLongConstant(constant, k2Pow31EncodingForDouble);
- __ addsd(result, constant);
- // result = result + temp
- __ addsd(result, temp);
- // result = double-to-float(result)
- __ cvtsd2ss(result, result);
- // Restore low.
- __ addl(low, Immediate(0x80000000));
+ size_t adjustment = 0;
+
+ // Create stack space for the call to
+ // InstructionCodeGeneratorX86::PushOntoFPStack and/or X86Assembler::fstps below.
+ // TODO: enhance register allocator to ask for stack temporaries.
+ if (!in.IsDoubleStackSlot() || !out.IsStackSlot()) {
+ adjustment = Primitive::ComponentSize(Primitive::kPrimLong);
+ __ subl(ESP, Immediate(adjustment));
+ }
+
+ // Load the value to the FP stack, using temporaries if needed.
+ PushOntoFPStack(in, 0, adjustment, false, true);
+
+ if (out.IsStackSlot()) {
+ __ fstps(Address(ESP, out.GetStackIndex() + adjustment));
+ } else {
+ __ fstps(Address(ESP, 0));
+ Location stack_temp = Location::StackSlot(0);
+ codegen_->Move32(out, stack_temp);
+ }
+
+ // Remove the temporary stack space we allocated.
+ if (adjustment != 0) {
+ __ addl(ESP, Immediate(adjustment));
+ }
break;
}
@@ -1872,29 +1862,31 @@ void InstructionCodeGeneratorX86::VisitTypeConversion(HTypeConversion* conversio
case Primitive::kPrimLong: {
// Processing a Dex `long-to-double' instruction.
- Register low = in.AsRegisterPairLow<Register>();
- Register high = in.AsRegisterPairHigh<Register>();
- XmmRegister result = out.AsFpuRegister<XmmRegister>();
- XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
- XmmRegister constant = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
-
- // low = low - 2^31 (to prevent bit 31 of `low` to be
- // interpreted as a sign bit)
- __ subl(low, Immediate(0x80000000));
- // temp = int-to-double(high)
- __ cvtsi2sd(temp, high);
- // temp = temp * 2^32
- __ LoadLongConstant(constant, k2Pow32EncodingForDouble);
- __ mulsd(temp, constant);
- // result = int-to-double(low)
- __ cvtsi2sd(result, low);
- // result = result + 2^31 (restore the original value of `low`)
- __ LoadLongConstant(constant, k2Pow31EncodingForDouble);
- __ addsd(result, constant);
- // result = result + temp
- __ addsd(result, temp);
- // Restore low.
- __ addl(low, Immediate(0x80000000));
+ size_t adjustment = 0;
+
+ // Create stack space for the call to
+ // InstructionCodeGeneratorX86::PushOntoFPStack and/or X86Assembler::fstpl below.
+ // TODO: enhance register allocator to ask for stack temporaries.
+ if (!in.IsDoubleStackSlot() || !out.IsDoubleStackSlot()) {
+ adjustment = Primitive::ComponentSize(Primitive::kPrimLong);
+ __ subl(ESP, Immediate(adjustment));
+ }
+
+ // Load the value to the FP stack, using temporaries if needed.
+ PushOntoFPStack(in, 0, adjustment, false, true);
+
+ if (out.IsDoubleStackSlot()) {
+ __ fstpl(Address(ESP, out.GetStackIndex() + adjustment));
+ } else {
+ __ fstpl(Address(ESP, 0));
+ Location stack_temp = Location::DoubleStackSlot(0);
+ codegen_->Move64(out, stack_temp);
+ }
+
+ // Remove the temporary stack space we allocated.
+ if (adjustment != 0) {
+ __ addl(ESP, Immediate(adjustment));
+ }
break;
}
@@ -2234,24 +2226,43 @@ void InstructionCodeGeneratorX86::VisitMul(HMul* mul) {
}
}
-void InstructionCodeGeneratorX86::PushOntoFPStack(Location source, uint32_t temp_offset,
- uint32_t stack_adjustment, bool is_float) {
+void InstructionCodeGeneratorX86::PushOntoFPStack(Location source,
+ uint32_t temp_offset,
+ uint32_t stack_adjustment,
+ bool is_fp,
+ bool is_wide) {
if (source.IsStackSlot()) {
- DCHECK(is_float);
- __ flds(Address(ESP, source.GetStackIndex() + stack_adjustment));
+ DCHECK(!is_wide);
+ if (is_fp) {
+ __ flds(Address(ESP, source.GetStackIndex() + stack_adjustment));
+ } else {
+ __ filds(Address(ESP, source.GetStackIndex() + stack_adjustment));
+ }
} else if (source.IsDoubleStackSlot()) {
- DCHECK(!is_float);
- __ fldl(Address(ESP, source.GetStackIndex() + stack_adjustment));
+ DCHECK(is_wide);
+ if (is_fp) {
+ __ fldl(Address(ESP, source.GetStackIndex() + stack_adjustment));
+ } else {
+ __ fildl(Address(ESP, source.GetStackIndex() + stack_adjustment));
+ }
} else {
// Write the value to the temporary location on the stack and load to FP stack.
- if (is_float) {
+ if (!is_wide) {
Location stack_temp = Location::StackSlot(temp_offset);
codegen_->Move32(stack_temp, source);
- __ flds(Address(ESP, temp_offset));
+ if (is_fp) {
+ __ flds(Address(ESP, temp_offset));
+ } else {
+ __ filds(Address(ESP, temp_offset));
+ }
} else {
Location stack_temp = Location::DoubleStackSlot(temp_offset);
codegen_->Move64(stack_temp, source);
- __ fldl(Address(ESP, temp_offset));
+ if (is_fp) {
+ __ fldl(Address(ESP, temp_offset));
+ } else {
+ __ fildl(Address(ESP, temp_offset));
+ }
}
}
}
@@ -2270,8 +2281,9 @@ void InstructionCodeGeneratorX86::GenerateRemFP(HRem *rem) {
__ subl(ESP, Immediate(2 * elem_size));
// Load the values to the FP stack in reverse order, using temporaries if needed.
- PushOntoFPStack(second, elem_size, 2 * elem_size, is_float);
- PushOntoFPStack(first, 0, 2 * elem_size, is_float);
+ const bool is_wide = !is_float;
+ PushOntoFPStack(second, elem_size, 2 * elem_size, /* is_fp */ true, is_wide);
+ PushOntoFPStack(first, 0, 2 * elem_size, /* is_fp */ true, is_wide);
// Loop doing FPREM until we stabilize.
Label retry;
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index 07476c6..8bd3cd3 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -174,8 +174,10 @@ class InstructionCodeGeneratorX86 : public HGraphVisitor {
void GenerateMemoryBarrier(MemBarrierKind kind);
void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info);
void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
+ // Push value to FPU stack. `is_fp` specifies whether the value is floating point or not.
+ // `is_wide` specifies whether it is long/double or not.
void PushOntoFPStack(Location source, uint32_t temp_offset,
- uint32_t stack_adjustment, bool is_float);
+ uint32_t stack_adjustment, bool is_fp, bool is_wide);
void GenerateImplicitNullCheck(HNullCheck* instruction);
void GenerateExplicitNullCheck(HNullCheck* instruction);
diff --git a/test/477-long-to-float-conversion-precision/expected.txt b/test/477-long-to-float-conversion-precision/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/477-long-to-float-conversion-precision/expected.txt
diff --git a/test/477-long-to-float-conversion-precision/info.txt b/test/477-long-to-float-conversion-precision/info.txt
new file mode 100644
index 0000000..d9d41d7
--- /dev/null
+++ b/test/477-long-to-float-conversion-precision/info.txt
@@ -0,0 +1 @@
+Tests for type conversions precision.
diff --git a/test/477-long-to-float-conversion-precision/src/Main.java b/test/477-long-to-float-conversion-precision/src/Main.java
new file mode 100644
index 0000000..bc17053
--- /dev/null
+++ b/test/477-long-to-float-conversion-precision/src/Main.java
@@ -0,0 +1,41 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Note that $opt$ is a marker for the optimizing compiler to ensure
+// it does compile the method.
+public class Main {
+
+ public static void assertFloatEquals(float expected, float result) {
+ if (expected != result) {
+ throw new Error("Expected: " + expected + ", found: " + result);
+ }
+ }
+
+ public static void main(String[] args) {
+ // Generate, compile and check long-to-float Dex instructions.
+ longToFloat();
+ }
+
+ private static void longToFloat() {
+ // The result for this test case is slightly less accurate on ARM,
+ // due to the implementation of long-to-float type conversions for
+ // this architecture (both in Quick and Optimizing).
+ assertFloatEquals(Float.intBitsToFloat(-555858671), $opt$LongToFloat(-8008112895877447681L));
+ }
+
+ // This method produces a long-to-float Dex instruction.
+ static float $opt$LongToFloat(long a) { return (float)a; }
+}
diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk
index 39afc67..731c040 100644
--- a/test/Android.run-test.mk
+++ b/test/Android.run-test.mk
@@ -335,6 +335,19 @@ endif
TEST_ART_BROKEN_DEFAULT_RUN_TESTS :=
+# Known broken tests for Quick's and Optimizing's ARM back ends.
+TEST_ART_BROKEN_ARM_RUN_TESTS := 477-long-to-float-conversion-precision # b/20413424
+
+ifeq ($(TARGET_ARCH),arm)
+ ifneq (,$(filter 32,$(ALL_ADDRESS_SIZES)))
+ ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,target,$(RUN_TYPES),$(PREBUILD_TYPES), \
+ $(COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
+ $(IMAGE_TYPES),$(PICTEST_TYPES),$(DEBUGGABLE_TYPES),$(TEST_ART_BROKEN_ARM_RUN_TESTS),32)
+ endif
+endif
+
+TEST_ART_BROKEN_ARM_RUN_TESTS :=
+
# Known broken tests for the arm64 optimizing compiler backend.
TEST_ART_BROKEN_OPTIMIZING_ARM64_RUN_TESTS :=