summaryrefslogtreecommitdiffstats
path: root/runtime/arch/arm
diff options
context:
space:
mode:
authorZheng Xu <zheng.xu@arm.com>2014-10-23 18:29:55 +0800
committerIan Rogers <irogers@google.com>2014-10-24 16:27:22 -0700
commit5667fdbb6e441dee7534ade18b628ed396daf593 (patch)
treea06fe0a79b3465571556d13f509daf1f664fc614 /runtime/arch/arm
parentb62ff579cd870b0bf213765b07d7b404d15ece7b (diff)
downloadart-5667fdbb6e441dee7534ade18b628ed396daf593.zip
art-5667fdbb6e441dee7534ade18b628ed396daf593.tar.gz
art-5667fdbb6e441dee7534ade18b628ed396daf593.tar.bz2
ARM: Use hardfp calling convention between java to java call.
This patch default to use hardfp calling convention. Softfp can be enabled by setting kArm32QuickCodeUseSoftFloat to true. We get about -1 ~ +5% performance improvement with different benchmark tests. Hopefully, we should be able to get more performance by address the left TODOs, as some part of the code takes the original assumption which is not optimal. DONE: 1. Interpreter to quick code 2. Quick code to interpreter 3. Transition assembly and callee-saves 4. Trampoline(generic jni, resolution, invoke with access check and etc.) 5. Pass fp arg reg following aapcs(gpr and stack do not follow aapcs) 6. Quick helper assembly routines to handle ABI differences 7. Quick code method entry 8. Quick code method invocation 9. JNI compiler TODO: 10. Rework ArgMap, FlushIn, GenDalvikArgs and affected common code. 11. Rework CallRuntimeHelperXXX(). Change-Id: I9965d8a007f4829f2560b63bcbbde271bdcf6ec2
Diffstat (limited to 'runtime/arch/arm')
-rw-r--r--runtime/arch/arm/asm_support_arm.h4
-rw-r--r--runtime/arch/arm/context_arm.cc17
-rw-r--r--runtime/arch/arm/entrypoints_init_arm.cc47
-rw-r--r--runtime/arch/arm/quick_entrypoints_arm.S245
-rw-r--r--runtime/arch/arm/quick_entrypoints_cc_arm.cc110
-rw-r--r--runtime/arch/arm/quick_method_frame_info_arm.h19
6 files changed, 317 insertions, 125 deletions
diff --git a/runtime/arch/arm/asm_support_arm.h b/runtime/arch/arm/asm_support_arm.h
index 5388cc0..8cd2a27 100644
--- a/runtime/arch/arm/asm_support_arm.h
+++ b/runtime/arch/arm/asm_support_arm.h
@@ -19,9 +19,9 @@
#include "asm_support.h"
-#define FRAME_SIZE_SAVE_ALL_CALLEE_SAVE 176
+#define FRAME_SIZE_SAVE_ALL_CALLEE_SAVE 112
#define FRAME_SIZE_REFS_ONLY_CALLEE_SAVE 32
-#define FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE 48
+#define FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE 112
// Flag for enabling R4 optimization in arm runtime
#define ARM_R4_SUSPEND_FLAG
diff --git a/runtime/arch/arm/context_arm.cc b/runtime/arch/arm/context_arm.cc
index 96ffc93..fd9c626 100644
--- a/runtime/arch/arm/context_arm.cc
+++ b/runtime/arch/arm/context_arm.cc
@@ -97,6 +97,23 @@ void ArmContext::SmashCallerSaves() {
gprs_[R1] = const_cast<uint32_t*>(&gZero);
gprs_[R2] = nullptr;
gprs_[R3] = nullptr;
+
+ fprs_[S0] = nullptr;
+ fprs_[S1] = nullptr;
+ fprs_[S2] = nullptr;
+ fprs_[S3] = nullptr;
+ fprs_[S4] = nullptr;
+ fprs_[S5] = nullptr;
+ fprs_[S6] = nullptr;
+ fprs_[S7] = nullptr;
+ fprs_[S8] = nullptr;
+ fprs_[S9] = nullptr;
+ fprs_[S10] = nullptr;
+ fprs_[S11] = nullptr;
+ fprs_[S12] = nullptr;
+ fprs_[S13] = nullptr;
+ fprs_[S14] = nullptr;
+ fprs_[S15] = nullptr;
}
extern "C" void art_quick_do_long_jump(uint32_t*, uint32_t*);
diff --git a/runtime/arch/arm/entrypoints_init_arm.cc b/runtime/arch/arm/entrypoints_init_arm.cc
index ff0eb4a..24e9b1d 100644
--- a/runtime/arch/arm/entrypoints_init_arm.cc
+++ b/runtime/arch/arm/entrypoints_init_arm.cc
@@ -77,23 +77,17 @@ extern "C" void art_quick_handle_fill_data(void*, void*);
extern "C" void art_quick_lock_object(void*);
extern "C" void art_quick_unlock_object(void*);
-// Math entrypoints.
-extern int32_t CmpgDouble(double a, double b);
-extern int32_t CmplDouble(double a, double b);
-extern int32_t CmpgFloat(float a, float b);
-extern int32_t CmplFloat(float a, float b);
-
-// Math conversions.
-extern "C" int32_t __aeabi_f2iz(float op1); // FLOAT_TO_INT
-extern "C" int32_t __aeabi_d2iz(double op1); // DOUBLE_TO_INT
-extern "C" float __aeabi_l2f(int64_t op1); // LONG_TO_FLOAT
-extern "C" double __aeabi_l2d(int64_t op1); // LONG_TO_DOUBLE
-
+// Used by soft float.
// Single-precision FP arithmetics.
-extern "C" float fmodf(float a, float b); // REM_FLOAT[_2ADDR]
-
+extern "C" float fmodf(float a, float b); // REM_FLOAT[_2ADDR]
// Double-precision FP arithmetics.
-extern "C" double fmod(double a, double b); // REM_DOUBLE[_2ADDR]
+extern "C" double fmod(double a, double b); // REM_DOUBLE[_2ADDR]
+
+// Used by hard float.
+extern "C" int64_t art_quick_f2l(float f); // FLOAT_TO_LONG
+extern "C" int64_t art_quick_d2l(double d); // DOUBLE_TO_LONG
+extern "C" float art_quick_fmodf(float a, float b); // REM_FLOAT[_2ADDR]
+extern "C" double art_quick_fmod(double a, double b); // REM_DOUBLE[_2ADDR]
// Integer arithmetics.
extern "C" int __aeabi_idivmod(int32_t, int32_t); // [DIV|REM]_INT[_2ADDR|_LIT8|_LIT16]
@@ -205,25 +199,24 @@ void InitEntryPoints(InterpreterEntryPoints* ipoints, JniEntryPoints* jpoints,
qpoints->pUnlockObject = art_quick_unlock_object;
// Math
- qpoints->pCmpgDouble = CmpgDouble;
- qpoints->pCmpgFloat = CmpgFloat;
- qpoints->pCmplDouble = CmplDouble;
- qpoints->pCmplFloat = CmplFloat;
- qpoints->pFmod = fmod;
- qpoints->pL2d = __aeabi_l2d;
- qpoints->pFmodf = fmodf;
- qpoints->pL2f = __aeabi_l2f;
- qpoints->pD2iz = __aeabi_d2iz;
- qpoints->pF2iz = __aeabi_f2iz;
qpoints->pIdivmod = __aeabi_idivmod;
- qpoints->pD2l = art_d2l;
- qpoints->pF2l = art_f2l;
qpoints->pLdiv = __aeabi_ldivmod;
qpoints->pLmod = __aeabi_ldivmod; // result returned in r2:r3
qpoints->pLmul = art_quick_mul_long;
qpoints->pShlLong = art_quick_shl_long;
qpoints->pShrLong = art_quick_shr_long;
qpoints->pUshrLong = art_quick_ushr_long;
+ if (kArm32QuickCodeUseSoftFloat) {
+ qpoints->pFmod = fmod;
+ qpoints->pFmodf = fmodf;
+ qpoints->pD2l = art_d2l;
+ qpoints->pF2l = art_f2l;
+ } else {
+ qpoints->pFmod = art_quick_fmod;
+ qpoints->pFmodf = art_quick_fmodf;
+ qpoints->pD2l = art_quick_d2l;
+ qpoints->pF2l = art_quick_f2l;
+ }
// Intrinsics
qpoints->pIndexOf = art_quick_indexof;
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index aae0c94..632b414 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S
@@ -40,10 +40,10 @@
.cfi_rel_offset r10, 24
.cfi_rel_offset r11, 28
.cfi_rel_offset lr, 32
- vpush {s0-s31} @ 32 words (128 bytes) of floats.
- .pad #128
- .cfi_adjust_cfa_offset 128
- sub sp, #12 @ 3 words of space, bottom word will hold Method*.
+ vpush {s16-s31} @ 16 words (64 bytes) of floats.
+ .pad #64
+ .cfi_adjust_cfa_offset 64
+ sub sp, #12 @ 3 words of space, bottom word will hold Method*
.pad #12
.cfi_adjust_cfa_offset 12
RUNTIME_CURRENT1 \rTemp1, \rTemp2 @ Load Runtime::Current into rTemp1.
@@ -53,7 +53,7 @@
str sp, [r9, #THREAD_TOP_QUICK_FRAME_OFFSET] @ Place sp in Thread::Current()->top_quick_frame.
// Ugly compile-time check, but we only have the preprocessor.
-#if (FRAME_SIZE_SAVE_ALL_CALLEE_SAVE != 36 + 128 + 12)
+#if (FRAME_SIZE_SAVE_ALL_CALLEE_SAVE != 36 + 64 + 12)
#error "SAVE_ALL_CALLEE_SAVE_FRAME(ARM) size not as expected."
#endif
.endm
@@ -101,15 +101,7 @@
.endm
.macro RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME_AND_RETURN
- add sp, #4 @ bottom word holds Method*
- pop {r5-r8, r10-r11, lr} @ 7 words of callee saves
- .cfi_restore r5
- .cfi_restore r6
- .cfi_restore r7
- .cfi_restore r8
- .cfi_restore r10
- .cfi_restore r11
- .cfi_adjust_cfa_offset -FRAME_SIZE_REFS_ONLY_CALLEE_SAVE
+ RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
bx lr @ return
.endm
@@ -117,9 +109,10 @@
* Macro that sets up the callee save frame to conform with
* Runtime::CreateCalleeSaveMethod(kRefsAndArgs).
*/
-.macro SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME rTemp1, rTemp2
- push {r1-r3, r5-r8, r10-r11, lr} @ 10 words of callee saves
+.macro SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_REGISTERS_ONLY
+ push {r1-r3, r5-r8, r10-r11, lr} @ 10 words of callee saves and args.
.save {r1-r3, r5-r8, r10-r11, lr}
+ .cfi_adjust_cfa_offset 40
.cfi_rel_offset r1, 0
.cfi_rel_offset r2, 4
.cfi_rel_offset r3, 8
@@ -130,47 +123,39 @@
.cfi_rel_offset r10, 28
.cfi_rel_offset r11, 32
.cfi_rel_offset lr, 36
- .cfi_adjust_cfa_offset 40
+ vpush {s0-s15} @ 16 words of float args.
+ .pad #64
+ .cfi_adjust_cfa_offset 64
sub sp, #8 @ 2 words of space, bottom word will hold Method*
.pad #8
.cfi_adjust_cfa_offset 8
+ // Ugly compile-time check, but we only have the preprocessor.
+#if (FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE != 40 + 64 + 8)
+#error "REFS_AND_ARGS_CALLEE_SAVE_FRAME(ARM) size not as expected."
+#endif
+.endm
+
+.macro SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME rTemp1, rTemp2
+ SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_REGISTERS_ONLY
RUNTIME_CURRENT3 \rTemp1, \rTemp2 @ Load Runtime::Current into rTemp1.
THIS_LOAD_REQUIRES_READ_BARRIER
@ rTemp1 is kRefsAndArgs Method*.
ldr \rTemp1, [\rTemp1, #RUNTIME_REFS_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET]
str \rTemp1, [sp, #0] @ Place Method* at bottom of stack.
str sp, [r9, #THREAD_TOP_QUICK_FRAME_OFFSET] @ Place sp in Thread::Current()->top_quick_frame.
-
- // Ugly compile-time check, but we only have the preprocessor.
-#if (FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE != 40 + 8)
-#error "REFS_AND_ARGS_CALLEE_SAVE_FRAME(ARM) size not as expected."
-#endif
.endm
.macro SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_WITH_METHOD_IN_R0
- push {r1-r3, r5-r8, r10-r11, lr} @ 10 words of callee saves
- .save {r1-r3, r5-r8, r10-r11, lr}
- .cfi_rel_offset r1, 0
- .cfi_rel_offset r2, 4
- .cfi_rel_offset r3, 8
- .cfi_rel_offset r5, 12
- .cfi_rel_offset r6, 16
- .cfi_rel_offset r7, 20
- .cfi_rel_offset r8, 24
- .cfi_rel_offset r10, 28
- .cfi_rel_offset r11, 32
- .cfi_rel_offset lr, 36
- .cfi_adjust_cfa_offset 40
- sub sp, #8 @ 2 words of space, bottom word will hold Method*
- .pad #8
- .cfi_adjust_cfa_offset 8
-
+ SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_REGISTERS_ONLY
str r0, [sp, #0] @ Store ArtMethod* to bottom of stack.
str sp, [r9, #THREAD_TOP_QUICK_FRAME_OFFSET] @ Place sp in Thread::Current()->top_quick_frame.
.endm
.macro RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
add sp, #8 @ rewind sp
+ .cfi_adjust_cfa_offset -8
+ vpop {s0-s15}
+ .cfi_adjust_cfa_offset -64
pop {r1-r3, r5-r8, r10-r11, lr} @ 10 words of callee saves
.cfi_restore r1
.cfi_restore r2
@@ -181,7 +166,7 @@
.cfi_restore r8
.cfi_restore r10
.cfi_restore r11
- .cfi_adjust_cfa_offset -48
+ .cfi_adjust_cfa_offset -40
.endm
@@ -373,60 +358,91 @@ INVOKE_TRAMPOLINE art_quick_invoke_super_trampoline_with_access_check, artInvoke
INVOKE_TRAMPOLINE art_quick_invoke_virtual_trampoline_with_access_check, artInvokeVirtualTrampolineWithAccessCheck
/*
- * Quick invocation stub.
+ * Quick invocation stub internal.
* On entry:
* r0 = method pointer
* r1 = argument array or NULL for no argument methods
* r2 = size of argument array in bytes
* r3 = (managed) thread pointer
* [sp] = JValue* result
- * [sp + 4] = shorty
+ * [sp + 4] = result_in_float
+ * [sp + 8] = core register argument array
+ * [sp + 12] = fp register argument array
+ * +-------------------------+
+ * | uint32_t* fp_reg_args |
+ * | uint32_t* core_reg_args |
+ * | result_in_float | <- Caller frame
+ * | Jvalue* result |
+ * +-------------------------+
+ * | lr |
+ * | r11 |
+ * | r9 |
+ * | r4 | <- r11
+ * +-------------------------+
+ * | uint32_t out[n-1] |
+ * | : : | Outs
+ * | uint32_t out[0] |
+ * | StackRef<ArtMethod> | <- SP value=null
+ * +-------------------------+
*/
-ENTRY art_quick_invoke_stub
- push {r0, r4, r5, r9, r11, lr} @ spill regs
- .save {r0, r4, r5, r9, r11, lr}
- .pad #24
- .cfi_adjust_cfa_offset 24
- .cfi_rel_offset r0, 0
- .cfi_rel_offset r4, 4
- .cfi_rel_offset r5, 8
- .cfi_rel_offset r9, 12
- .cfi_rel_offset r11, 16
- .cfi_rel_offset lr, 20
+ENTRY art_quick_invoke_stub_internal
+ push {r4, r9, r11, lr} @ spill regs
+ .save {r4, r9, r11, lr}
+ .pad #16
+ .cfi_adjust_cfa_offset 16
+ .cfi_rel_offset r4, 0
+ .cfi_rel_offset r9, 4
+ .cfi_rel_offset r11, 8
+ .cfi_rel_offset lr, 12
mov r11, sp @ save the stack pointer
.cfi_def_cfa_register r11
+
mov r9, r3 @ move managed thread pointer into r9
-#ifdef ARM_R4_SUSPEND_FLAG
- mov r4, #SUSPEND_CHECK_INTERVAL @ reset r4 to suspend check interval
-#endif
- add r5, r2, #4 @ create space for method pointer in frame
- sub r5, sp, r5 @ reserve & align *stack* to 16 bytes: native calling
- and r5, #0xFFFFFFF0 @ convention only aligns to 8B, so we have to ensure ART
- mov sp, r5 @ 16B alignment ourselves.
+ add r4, r2, #4 @ create space for method pointer in frame
+ sub r4, sp, r4 @ reserve & align *stack* to 16 bytes: native calling
+ and r4, #0xFFFFFFF0 @ convention only aligns to 8B, so we have to ensure ART
+ mov sp, r4 @ 16B alignment ourselves.
+ mov r4, r0 @ save method*
add r0, sp, #4 @ pass stack pointer + method ptr as dest for memcpy
bl memcpy @ memcpy (dest, src, bytes)
- ldr r0, [r11] @ restore method*
- ldr r1, [sp, #4] @ copy arg value for r1
- ldr r2, [sp, #8] @ copy arg value for r2
- ldr r3, [sp, #12] @ copy arg value for r3
mov ip, #0 @ set ip to 0
str ip, [sp] @ store NULL for method* at bottom of frame
+
+ ldr ip, [r11, #28] @ load fp register argument array pointer
+ vldm ip, {s0-s15} @ copy s0 - s15
+
+ ldr ip, [r11, #24] @ load core register argument array pointer
+ mov r0, r4 @ restore method*
+ add ip, ip, #4 @ skip r0
+ ldm ip, {r1-r3} @ copy r1 - r3
+
+#ifdef ARM_R4_SUSPEND_FLAG
+ mov r4, #SUSPEND_CHECK_INTERVAL @ reset r4 to suspend check interval
+#endif
+
ldr ip, [r0, #MIRROR_ART_METHOD_QUICK_CODE_OFFSET] @ get pointer to the code
blx ip @ call the method
+
mov sp, r11 @ restore the stack pointer
- ldr ip, [sp, #24] @ load the result pointer
- strd r0, [ip] @ store r0/r1 into result pointer
- pop {r0, r4, r5, r9, r11, lr} @ restore spill regs
- .cfi_restore r0
+ .cfi_def_cfa_register sp
+
+ ldr r4, [sp, #20] @ load result_is_float
+ ldr r9, [sp, #16] @ load the result pointer
+ cmp r4, #0
+ ite eq
+ strdeq r0, [r9] @ store r0/r1 into result pointer
+ vstrne d0, [r9] @ store s0-s1/d0 into result pointer
+
+ pop {r4, r9, r11, lr} @ restore spill regs
.cfi_restore r4
- .cfi_restore r5
.cfi_restore r9
+ .cfi_restore r11
.cfi_restore lr
- .cfi_adjust_cfa_offset -24
+ .cfi_adjust_cfa_offset -16
bx lr
-END art_quick_invoke_stub
+END art_quick_invoke_stub_internal
/*
* On entry r0 is uint32_t* gprs_ and r1 is uint32_t* fprs_
@@ -869,13 +885,14 @@ ENTRY art_quick_proxy_invoke_handler
mov r3, sp @ pass SP
blx artQuickProxyInvokeHandler @ (Method* proxy method, receiver, Thread*, SP)
ldr r2, [r9, #THREAD_EXCEPTION_OFFSET] @ load Thread::Current()->exception_
- add sp, #16 @ skip r1-r3, 4 bytes padding.
- .cfi_adjust_cfa_offset -16
- cbnz r2, 1f @ success if no exception is pending
+ // Tear down the callee-save frame. Skip arg registers.
+ add sp, #(FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE - FRAME_SIZE_REFS_ONLY_CALLEE_SAVE)
+ .cfi_adjust_cfa_offset -(FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE - FRAME_SIZE_REFS_ONLY_CALLEE_SAVE)
RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+ cbnz r2, 1f @ success if no exception is pending
+ vmov d0, r0, r1 @ store into fpr, for when it's a fpr return...
bx lr @ return on success
1:
- RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
DELIVER_PENDING_EXCEPTION
END art_quick_proxy_invoke_handler
@@ -977,20 +994,13 @@ ENTRY art_quick_generic_jni_trampoline
ldr r2, [r9, #THREAD_EXCEPTION_OFFSET] @ load Thread::Current()->exception_
cbnz r2, .Lexception_in_native
- // Tear down the callee-save frame.
- add sp, #12 @ rewind sp
- // Do not pop r0 and r1, they contain the return value.
- pop {r2-r3, r5-r8, r10-r11, lr} @ 9 words of callee saves
- .cfi_restore r2
- .cfi_restore r3
- .cfi_restore r5
- .cfi_restore r6
- .cfi_restore r7
- .cfi_restore r8
- .cfi_restore r10
- .cfi_restore r11
- .cfi_adjust_cfa_offset -48
+ // Tear down the callee-save frame. Skip arg registers.
+ add sp, #FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE-FRAME_SIZE_REFS_ONLY_CALLEE_SAVE
+ .cfi_adjust_cfa_offset -(FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE-FRAME_SIZE_REFS_ONLY_CALLEE_SAVE)
+ RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
+ // store into fpr, for when it's a fpr return...
+ vmov d0, r0, r1
bx lr // ret
.Lentry_error:
@@ -1010,11 +1020,13 @@ ENTRY art_quick_to_interpreter_bridge
mov r2, sp @ pass SP
blx artQuickToInterpreterBridge @ (Method* method, Thread*, SP)
ldr r2, [r9, #THREAD_EXCEPTION_OFFSET] @ load Thread::Current()->exception_
- add sp, #16 @ skip r1-r3, 4 bytes padding.
- .cfi_adjust_cfa_offset -16
+ // Tear down the callee-save frame. Skip arg registers.
+ add sp, #(FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE - FRAME_SIZE_REFS_ONLY_CALLEE_SAVE)
+ .cfi_adjust_cfa_offset -(FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE - FRAME_SIZE_REFS_ONLY_CALLEE_SAVE)
RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
cbnz r2, 1f @ success if no exception is pending
- bx lr @ return on success
+ vmov d0, r0, r1 @ store into fpr, for when it's a fpr return...
+ bx lr @ return on success
1:
DELIVER_PENDING_EXCEPTION
END art_quick_to_interpreter_bridge
@@ -1435,3 +1447,54 @@ ENTRY art_quick_string_compareto
.Ldone:
pop {r4, r7-r12, pc}
END art_quick_string_compareto
+
+ /* Assembly routines used to handle ABI differences. */
+
+ /* double fmod(double a, double b) */
+ .extern fmod
+ENTRY art_quick_fmod
+ push {lr}
+ .cfi_adjust_cfa_offset 4
+ .cfi_rel_offset lr, 0
+ sub sp, #4
+ .cfi_adjust_cfa_offset 4
+ vmov r0, r1, d0
+ vmov r2, r3, d1
+ bl fmod
+ vmov d0, r0, r1
+ add sp, #4
+ .cfi_adjust_cfa_offset -4
+ pop {pc}
+ .cfi_adjust_cfa_offset -4
+END art_quick_fmod
+
+ /* float fmodf(float a, float b) */
+ .extern fmodf
+ENTRY art_quick_fmodf
+ push {lr}
+ .cfi_adjust_cfa_offset 4
+ .cfi_rel_offset lr, 0
+ sub sp, #4
+ .cfi_adjust_cfa_offset 4
+ vmov r0, r1, d0
+ bl fmodf
+ vmov s0, r0
+ add sp, #4
+ .cfi_adjust_cfa_offset -4
+ pop {pc}
+ .cfi_adjust_cfa_offset -4
+END art_quick_fmod
+
+ /* int64_t art_d2l(double d) */
+ .extern art_d2l
+ENTRY art_quick_d2l
+ vmov r0, r1, d0
+ b art_d2l
+END art_quick_d2l
+
+ /* int64_t art_f2l(float f) */
+ .extern art_f2l
+ENTRY art_quick_f2l
+ vmov r0, s0
+ b art_f2l
+END art_quick_f2l
diff --git a/runtime/arch/arm/quick_entrypoints_cc_arm.cc b/runtime/arch/arm/quick_entrypoints_cc_arm.cc
new file mode 100644
index 0000000..e21e6c1
--- /dev/null
+++ b/runtime/arch/arm/quick_entrypoints_cc_arm.cc
@@ -0,0 +1,110 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "mirror/art_method.h"
+#include "utils.h" // For RoundUp().
+
+namespace art {
+
+// Assembly stub that does the final part of the up-call into Java.
+extern "C" void art_quick_invoke_stub_internal(mirror::ArtMethod*, uint32_t*, uint32_t,
+ Thread* self, JValue* result, uint32_t, uint32_t*,
+ uint32_t*);
+
+template <bool kIsStatic>
+static void quick_invoke_reg_setup(mirror::ArtMethod* method, uint32_t* args, uint32_t args_size,
+ Thread* self, JValue* result, const char* shorty) {
+ // Note: We do not follow aapcs ABI in quick code for both softfp and hardfp.
+ uint32_t core_reg_args[4]; // r0 ~ r3
+ uint32_t fp_reg_args[16]; // s0 ~ s15 (d0 ~ d7)
+ uint32_t gpr_index = 1; // Index into core registers. Reserve r0 for mirror::ArtMethod*.
+ uint32_t fpr_index = 0; // Index into float registers.
+ uint32_t fpr_double_index = 0; // Index into float registers for doubles.
+ uint32_t arg_index = 0; // Index into argument array.
+ const uint32_t result_in_float = kArm32QuickCodeUseSoftFloat ? 0 :
+ (shorty[0] == 'F' || shorty[0] == 'D') ? 1 : 0;
+
+ if (!kIsStatic) {
+ // Copy receiver for non-static methods.
+ core_reg_args[gpr_index++] = args[arg_index++];
+ }
+
+ for (uint32_t shorty_index = 1; shorty[shorty_index] != '\0'; ++shorty_index, ++arg_index) {
+ char arg_type = shorty[shorty_index];
+ if (kArm32QuickCodeUseSoftFloat) {
+ arg_type = (arg_type == 'D') ? 'J' : arg_type; // Regard double as long.
+ arg_type = (arg_type == 'F') ? 'I' : arg_type; // Regard float as int.
+ }
+ switch (arg_type) {
+ case 'D': {
+ // Copy double argument into fp_reg_args if there are still floating point reg arguments.
+ // Double should not overlap with float.
+ fpr_double_index = std::max(fpr_double_index, RoundUp(fpr_index, 2));
+ if (fpr_double_index < arraysize(fp_reg_args)) {
+ fp_reg_args[fpr_double_index++] = args[arg_index];
+ fp_reg_args[fpr_double_index++] = args[arg_index + 1];
+ }
+ ++arg_index;
+ break;
+ }
+ case 'F':
+ // Copy float argument into fp_reg_args if there are still floating point reg arguments.
+ // If fpr_index is odd then its pointing at a hole next to an existing float argument. If we
+ // encounter a float argument then pick it up from that hole. In the case fpr_index is even,
+ // ensure that we don't pick up an argument that overlaps with with a double from
+ // fpr_double_index. In either case, take care not to go beyond the maximum number of
+ // floating point arguments.
+ if (fpr_index % 2 == 0) {
+ fpr_index = std::max(fpr_double_index, fpr_index);
+ }
+ if (fpr_index < arraysize(fp_reg_args)) {
+ fp_reg_args[fpr_index++] = args[arg_index];
+ }
+ break;
+ case 'J':
+ if (gpr_index < arraysize(core_reg_args)) {
+ core_reg_args[gpr_index++] = args[arg_index];
+ }
+ ++arg_index;
+ FALLTHROUGH_INTENDED; // Fall-through to take of the high part.
+ default:
+ if (gpr_index < arraysize(core_reg_args)) {
+ core_reg_args[gpr_index++] = args[arg_index];
+ }
+ break;
+ }
+ }
+
+ art_quick_invoke_stub_internal(method, args, args_size, self, result, result_in_float,
+ core_reg_args, fp_reg_args);
+}
+
+// Called by art::mirror::ArtMethod::Invoke to do entry into a non-static method.
+// TODO: migrate into an assembly implementation as with ARM64.
+extern "C" void art_quick_invoke_stub(mirror::ArtMethod* method, uint32_t* args, uint32_t args_size,
+ Thread* self, JValue* result, const char* shorty) {
+ quick_invoke_reg_setup<false>(method, args, args_size, self, result, shorty);
+}
+
+// Called by art::mirror::ArtMethod::Invoke to do entry into a static method.
+// TODO: migrate into an assembly implementation as with ARM64.
+extern "C" void art_quick_invoke_static_stub(mirror::ArtMethod* method, uint32_t* args,
+ uint32_t args_size, Thread* self, JValue* result,
+ const char* shorty) {
+ quick_invoke_reg_setup<true>(method, args, args_size, self, result, shorty);
+}
+
+} // namespace art
diff --git a/runtime/arch/arm/quick_method_frame_info_arm.h b/runtime/arch/arm/quick_method_frame_info_arm.h
index 7595e94..c1f3fc2 100644
--- a/runtime/arch/arm/quick_method_frame_info_arm.h
+++ b/runtime/arch/arm/quick_method_frame_info_arm.h
@@ -25,6 +25,8 @@
namespace art {
namespace arm {
+static constexpr uint32_t kArmCalleeSaveAlwaysSpills =
+ (1 << art::arm::LR);
static constexpr uint32_t kArmCalleeSaveRefSpills =
(1 << art::arm::R5) | (1 << art::arm::R6) | (1 << art::arm::R7) | (1 << art::arm::R8) |
(1 << art::arm::R10) | (1 << art::arm::R11);
@@ -32,23 +34,30 @@ static constexpr uint32_t kArmCalleeSaveArgSpills =
(1 << art::arm::R1) | (1 << art::arm::R2) | (1 << art::arm::R3);
static constexpr uint32_t kArmCalleeSaveAllSpills =
(1 << art::arm::R4) | (1 << art::arm::R9);
-static constexpr uint32_t kArmCalleeSaveFpAllSpills =
+
+static constexpr uint32_t kArmCalleeSaveFpAlwaysSpills = 0;
+static constexpr uint32_t kArmCalleeSaveFpRefSpills = 0;
+static constexpr uint32_t kArmCalleeSaveFpArgSpills =
(1 << art::arm::S0) | (1 << art::arm::S1) | (1 << art::arm::S2) | (1 << art::arm::S3) |
(1 << art::arm::S4) | (1 << art::arm::S5) | (1 << art::arm::S6) | (1 << art::arm::S7) |
(1 << art::arm::S8) | (1 << art::arm::S9) | (1 << art::arm::S10) | (1 << art::arm::S11) |
- (1 << art::arm::S12) | (1 << art::arm::S13) | (1 << art::arm::S14) | (1 << art::arm::S15) |
+ (1 << art::arm::S12) | (1 << art::arm::S13) | (1 << art::arm::S14) | (1 << art::arm::S15);
+static constexpr uint32_t kArmCalleeSaveFpAllSpills =
(1 << art::arm::S16) | (1 << art::arm::S17) | (1 << art::arm::S18) | (1 << art::arm::S19) |
(1 << art::arm::S20) | (1 << art::arm::S21) | (1 << art::arm::S22) | (1 << art::arm::S23) |
(1 << art::arm::S24) | (1 << art::arm::S25) | (1 << art::arm::S26) | (1 << art::arm::S27) |
(1 << art::arm::S28) | (1 << art::arm::S29) | (1 << art::arm::S30) | (1 << art::arm::S31);
constexpr uint32_t ArmCalleeSaveCoreSpills(Runtime::CalleeSaveType type) {
- return kArmCalleeSaveRefSpills | (type == Runtime::kRefsAndArgs ? kArmCalleeSaveArgSpills : 0) |
- (type == Runtime::kSaveAll ? kArmCalleeSaveAllSpills : 0) | (1 << art::arm::LR);
+ return kArmCalleeSaveAlwaysSpills | kArmCalleeSaveRefSpills |
+ (type == Runtime::kRefsAndArgs ? kArmCalleeSaveArgSpills : 0) |
+ (type == Runtime::kSaveAll ? kArmCalleeSaveAllSpills : 0);
}
constexpr uint32_t ArmCalleeSaveFpSpills(Runtime::CalleeSaveType type) {
- return type == Runtime::kSaveAll ? kArmCalleeSaveFpAllSpills : 0;
+ return kArmCalleeSaveFpAlwaysSpills | kArmCalleeSaveFpRefSpills |
+ (type == Runtime::kRefsAndArgs ? kArmCalleeSaveFpArgSpills: 0) |
+ (type == Runtime::kSaveAll ? kArmCalleeSaveFpAllSpills : 0);
}
constexpr uint32_t ArmCalleeSaveFrameSize(Runtime::CalleeSaveType type) {