summaryrefslogtreecommitdiffstats
path: root/runtime/arch
diff options
context:
space:
mode:
authorAndreas Gampe <agampe@google.com>2014-06-16 18:39:09 -0700
committerAndreas Gampe <agampe@google.com>2014-07-01 11:06:24 -0700
commitc200a4abeca91e19969f5b35543f17f812ba32b9 (patch)
tree5912a08310e7ddb4b3907c0dc687006669c0cedd /runtime/arch
parent73904fed884bf216b51acdc64402c427cc34725b (diff)
downloadart-c200a4abeca91e19969f5b35543f17f812ba32b9.zip
art-c200a4abeca91e19969f5b35543f17f812ba32b9.tar.gz
art-c200a4abeca91e19969f5b35543f17f812ba32b9.tar.bz2
ART: Rework Generic JNI, add ARM version
Refactors and optimizes Generic JNI. This version uses TwoWordReturn to avoid writing to / loading from the bottom of the alloca. Change-Id: I3287007c976f79c9fd32d3b3a43f2d1371bf4cd3
Diffstat (limited to 'runtime/arch')
-rw-r--r--runtime/arch/arm/quick_entrypoints_arm.S89
-rw-r--r--runtime/arch/arm64/quick_entrypoints_arm64.S47
-rw-r--r--runtime/arch/x86/quick_entrypoints_x86.S38
-rw-r--r--runtime/arch/x86_64/quick_entrypoints_x86_64.S43
4 files changed, 146 insertions, 71 deletions
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index 83a683d..4939610 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S
@@ -127,7 +127,7 @@
// Ugly compile-time check, but we only have the preprocessor.
#if (FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE != 40 + 8)
-#error "REFS_AND_ARGS_CALLEE_SAVE_FRAME(ARM64) size not as expected."
+#error "REFS_AND_ARGS_CALLEE_SAVE_FRAME(ARM) size not as expected."
#endif
.endm
@@ -1007,7 +1007,92 @@ ENTRY art_quick_resolution_trampoline
DELIVER_PENDING_EXCEPTION
END art_quick_resolution_trampoline
-UNIMPLEMENTED art_quick_generic_jni_trampoline
+ /*
+ * Called to do a generic JNI down-call
+ */
+ENTRY art_quick_generic_jni_trampoline
+ SETUP_REF_AND_ARGS_CALLEE_SAVE_FRAME
+ str r0, [sp, #0] // Store native ArtMethod* to bottom of stack.
+
+ // Save rSELF
+ mov r11, rSELF
+ // Save SP , so we can have static CFI info. r10 is saved in ref_and_args.
+ mov r10, sp
+ .cfi_def_cfa_register r10
+
+ sub sp, sp, #5120
+
+ // prepare for artQuickGenericJniTrampoline call
+ // (Thread*, SP)
+ // r0 r1 <= C calling convention
+ // rSELF r10 <= where they are
+
+ mov r0, rSELF // Thread*
+ mov r1, r10
+ blx artQuickGenericJniTrampoline // (Thread*, sp)
+
+ // The C call will have registered the complete save-frame on success.
+ // The result of the call is:
+ // r0: pointer to native code, 0 on error.
+ // r1: pointer to the bottom of the used area of the alloca, can restore stack till there.
+
+ // Check for error = 0.
+ cbz r0, .Lentry_error
+
+ // Release part of the alloca.
+ mov sp, r1
+
+ // Save the code pointer
+ mov r12, r0
+
+ // Load parameters from frame into registers.
+ pop {r0-r3}
+
+ // Softfloat.
+ // TODO: Change to hardfloat when supported.
+
+ blx r12 // native call.
+
+ // result sign extension is handled in C code
+ // prepare for artQuickGenericJniEndTrampoline call
+ // (Thread*, result, result_f)
+ // r0 r1,r2 r3,stack <= C calling convention
+ // r11 r0,r1 r0,r1 <= where they are
+ sub sp, sp, #12 // Stack alignment.
+
+ push {r1}
+ mov r3, r0
+ mov r2, r1
+ mov r1, r0
+ mov r0, r11
+
+ blx artQuickGenericJniEndTrampoline
+
+ // Tear down the alloca.
+ mov sp, r10
+ .cfi_def_cfa_register sp
+
+ // Restore self pointer.
+ mov r9, r11
+
+ // Pending exceptions possible.
+ ldr r2, [r9, #THREAD_EXCEPTION_OFFSET] @ load Thread::Current()->exception_
+ cbnz r2, .Lexception_in_native
+
+ // Tear down the callee-save frame.
+ RESTORE_REF_AND_ARGS_CALLEE_SAVE_FRAME
+
+ bx lr // ret
+
+.Lentry_error:
+ mov sp, r10
+ .cfi_def_cfa_register sp
+ mov r9, r11
+.Lexception_in_native:
+ RESTORE_REF_AND_ARGS_CALLEE_SAVE_FRAME
+ DELIVER_PENDING_EXCEPTION
+
+END art_quick_generic_jni_trampoline
.extern artQuickToInterpreterBridge
ENTRY art_quick_to_interpreter_bridge
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index e088751..7907b6e 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -1485,33 +1485,34 @@ ENTRY art_quick_generic_jni_trampoline
mov x1, xFP
bl artQuickGenericJniTrampoline // (Thread*, sp)
- // Get the updated pointer. This is the bottom of the frame _with_ handle scope.
- ldr xFP, [sp]
- add x9, sp, #8
+ // The C call will have registered the complete save-frame on success.
+ // The result of the call is:
+ // x0: pointer to native code, 0 on error.
+ // x1: pointer to the bottom of the used area of the alloca, can restore stack till there.
- cmp x0, #0
- b.mi .Lentry_error // Check for error, negative value.
+ // Check for error = 0.
+ cbz x0, .Lentry_error
- // release part of the alloca.
- add x9, x9, x0
+ // Release part of the alloca.
+ mov sp, x1
- // Get the code pointer
- ldr xIP0, [x9, #0]
+ // Save the code pointer
+ mov xIP0, x0
// Load parameters from frame into registers.
// TODO Check with artQuickGenericJniTrampoline.
// Also, check again APPCS64 - the stack arguments are interleaved.
- ldp x0, x1, [x9, #8]
- ldp x2, x3, [x9, #24]
- ldp x4, x5, [x9, #40]
- ldp x6, x7, [x9, #56]
+ ldp x0, x1, [sp]
+ ldp x2, x3, [sp, #16]
+ ldp x4, x5, [sp, #32]
+ ldp x6, x7, [sp, #48]
- ldp d0, d1, [x9, #72]
- ldp d2, d3, [x9, #88]
- ldp d4, d5, [x9, #104]
- ldp d6, d7, [x9, #120]
+ ldp d0, d1, [sp, #64]
+ ldp d2, d3, [sp, #80]
+ ldp d4, d5, [sp, #96]
+ ldp d6, d7, [sp, #112]
- add sp, x9, #136
+ add sp, sp, #128
blr xIP0 // native call.
@@ -1520,13 +1521,11 @@ ENTRY art_quick_generic_jni_trampoline
// result sign extension is handled in C code
// prepare for artQuickGenericJniEndTrampoline call
- // (Thread*, SP, result, result_f)
- // x0 x1 x2 x3 <= C calling convention
- mov x5, x0 // Save return value
+ // (Thread*, result, result_f)
+ // x0 x1 x2 <= C calling convention
+ mov x1, x0 // Result (from saved)
mov x0, xSELF // Thread register
- mov x1, xFP // Stack pointer
- mov x2, x5 // Result (from saved)
- fmov x3, d0 // d0 will contain floating point result, but needs to go into x3
+ fmov x2, d0 // d0 will contain floating point result, but needs to go into x2
bl artQuickGenericJniEndTrampoline
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index 0326f9e..24b9e46 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -1127,8 +1127,7 @@ DEFINE_FUNCTION art_quick_generic_jni_trampoline
// This also stores the native ArtMethod reference at the bottom of the stack.
movl %esp, %ebp // save SP at callee-save frame
- movl %esp, %edi
- CFI_DEF_CFA_REGISTER(edi)
+ CFI_DEF_CFA_REGISTER(ebp)
subl LITERAL(5120), %esp
// prepare for artQuickGenericJniTrampoline call
// (Thread*, SP)
@@ -1141,46 +1140,39 @@ DEFINE_FUNCTION art_quick_generic_jni_trampoline
pushl %fs:THREAD_SELF_OFFSET // Pass Thread::Current().
SETUP_GOT_NOSAVE // Clobbers ebx.
call PLT_SYMBOL(artQuickGenericJniTrampoline) // (Thread*, sp)
- // Drop call stack.
- addl LITERAL(16), %esp
- // At the bottom of the alloca we now have the name pointer to the method=bottom of callee-save
- // get the adjusted frame pointer
- popl %ebp
+ // The C call will have registered the complete save-frame on success.
+ // The result of the call is:
+ // eax: pointer to native code, 0 on error.
+ // edx: pointer to the bottom of the used area of the alloca, can restore stack till there.
- // Check for error, negative value.
+ // Check for error = 0.
test %eax, %eax
- js .Lentry_error
+ jz .Lentry_error
- // release part of the alloca, get the code pointer
- addl %eax, %esp
- popl %eax
+ // Release part of the alloca.
+ movl %edx, %esp
// On x86 there are no registers passed, so nothing to pop here.
// Native call.
call *%eax
- // Pop native stack, but keep the space that was reserved cookie.
- movl %ebp, %esp
- subl LITERAL(16), %esp // Alignment.
-
// result sign extension is handled in C code
// prepare for artQuickGenericJniEndTrampoline call
- // (Thread*, SP, result, result_f)
- // (esp) 4(esp) 8(esp) 16(esp) <= C calling convention
- // fs:... ebp eax:edx xmm0 <= where they are
+ // (Thread*, result, result_f)
+ // (esp) 4(esp) 12(esp) <= C calling convention
+ // fs:... eax:edx xmm0 <= where they are
- subl LITERAL(8), %esp // Pass float result.
+ subl LITERAL(20), %esp // Padding & pass float result.
movsd %xmm0, (%esp)
pushl %edx // Pass int result.
pushl %eax
- pushl %ebp // Pass SP (to ArtMethod).
pushl %fs:THREAD_SELF_OFFSET // Pass Thread::Current().
call PLT_SYMBOL(artQuickGenericJniEndTrampoline)
// Tear down the alloca.
- movl %edi, %esp
+ movl %ebp, %esp
CFI_DEF_CFA_REGISTER(esp)
// Pending exceptions possible.
@@ -1204,7 +1196,7 @@ DEFINE_FUNCTION art_quick_generic_jni_trampoline
punpckldq %xmm1, %xmm0
ret
.Lentry_error:
- movl %edi, %esp
+ movl %ebp, %esp
CFI_DEF_CFA_REGISTER(esp)
.Lexception_in_native:
RESTORE_REF_AND_ARGS_CALLEE_SAVE_FRAME
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index 668fb88..8fa947c 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -1167,11 +1167,9 @@ DEFINE_FUNCTION art_quick_generic_jni_trampoline
movq %xmm5, 56(%rsp)
movq %xmm6, 64(%rsp)
movq %xmm7, 72(%rsp)
- // Store native ArtMethod* to bottom of stack.
- movq %rdi, 0(%rsp)
- movq %rsp, %rbp // save SP at callee-save frame
- movq %rsp, %rbx
- CFI_DEF_CFA_REGISTER(rbx)
+ movq %rdi, 0(%rsp) // Store native ArtMethod* to bottom of stack.
+ movq %rsp, %rbp // save SP at (old) callee-save frame
+ CFI_DEF_CFA_REGISTER(rbp)
//
// reserve a lot of space
//
@@ -1198,17 +1196,17 @@ DEFINE_FUNCTION art_quick_generic_jni_trampoline
movq %rbp, %rsi
call PLT_SYMBOL(artQuickGenericJniTrampoline) // (Thread*, sp)
- // At the bottom of the alloca we now have the name pointer to the method=bottom of callee-save
- // get the adjusted frame pointer
- popq %rbp
+ // The C call will have registered the complete save-frame on success.
+ // The result of the call is:
+ // %rax: pointer to native code, 0 on error.
+ // %rdx: pointer to the bottom of the used area of the alloca, can restore stack till there.
- // Check for error, negative value.
+ // Check for error = 0.
test %rax, %rax
- js .Lentry_error
+ jz .Lentry_error
- // release part of the alloca, get the code pointer
- addq %rax, %rsp
- popq %rax
+ // Release part of the alloca.
+ movq %rdx, %rsp
// pop from the register-passing alloca region
// what's the right layout?
@@ -1228,21 +1226,22 @@ DEFINE_FUNCTION art_quick_generic_jni_trampoline
movq 48(%rsp), %xmm6
movq 56(%rsp), %xmm7
addq LITERAL(64), %rsp // floating-point done
+
// native call
- call *%rax // Stack should be aligned 16B without the return addr?
+ call *%rax
+
// result sign extension is handled in C code
// prepare for artQuickGenericJniEndTrampoline call
- // (Thread*, SP, result, result_f)
- // rdi rsi rdx rcx <= C calling convention
- // gs:... rbp rax xmm0 <= where they are
+ // (Thread*, result, result_f)
+ // rdi rsi rdx <= C calling convention
+ // gs:... rax xmm0 <= where they are
movq %gs:THREAD_SELF_OFFSET, %rdi
- movq %rbp, %rsi
- movq %rax, %rdx
- movq %xmm0, %rcx
+ movq %rax, %rsi
+ movq %xmm0, %rdx
call PLT_SYMBOL(artQuickGenericJniEndTrampoline)
// Tear down the alloca.
- movq %rbx, %rsp
+ movq %rbp, %rsp
CFI_DEF_CFA_REGISTER(rsp)
// Pending exceptions possible.
@@ -1280,7 +1279,7 @@ DEFINE_FUNCTION art_quick_generic_jni_trampoline
movq %rax, %xmm0
ret
.Lentry_error:
- movq %rbx, %rsp
+ movq %rbp, %rsp
CFI_DEF_CFA_REGISTER(rsp)
.Lexception_in_native:
// TODO: the handle scope contains the this pointer which is used by the debugger for exception