diff options
author | Andreas Gampe <agampe@google.com> | 2014-06-16 18:39:09 -0700 |
---|---|---|
committer | Andreas Gampe <agampe@google.com> | 2014-07-01 11:06:24 -0700 |
commit | c200a4abeca91e19969f5b35543f17f812ba32b9 (patch) | |
tree | 5912a08310e7ddb4b3907c0dc687006669c0cedd /runtime/arch | |
parent | 73904fed884bf216b51acdc64402c427cc34725b (diff) | |
download | art-c200a4abeca91e19969f5b35543f17f812ba32b9.zip art-c200a4abeca91e19969f5b35543f17f812ba32b9.tar.gz art-c200a4abeca91e19969f5b35543f17f812ba32b9.tar.bz2 |
ART: Rework Generic JNI, add ARM version
Refactors and optimizes Generic JNI. This version uses TwoWordReturn
to avoid writing to / loading from the bottom of the alloca.
Change-Id: I3287007c976f79c9fd32d3b3a43f2d1371bf4cd3
Diffstat (limited to 'runtime/arch')
-rw-r--r-- | runtime/arch/arm/quick_entrypoints_arm.S | 89 | ||||
-rw-r--r-- | runtime/arch/arm64/quick_entrypoints_arm64.S | 47 | ||||
-rw-r--r-- | runtime/arch/x86/quick_entrypoints_x86.S | 38 | ||||
-rw-r--r-- | runtime/arch/x86_64/quick_entrypoints_x86_64.S | 43 |
4 files changed, 146 insertions, 71 deletions
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S index 83a683d..4939610 100644 --- a/runtime/arch/arm/quick_entrypoints_arm.S +++ b/runtime/arch/arm/quick_entrypoints_arm.S @@ -127,7 +127,7 @@ // Ugly compile-time check, but we only have the preprocessor. #if (FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE != 40 + 8) -#error "REFS_AND_ARGS_CALLEE_SAVE_FRAME(ARM64) size not as expected." +#error "REFS_AND_ARGS_CALLEE_SAVE_FRAME(ARM) size not as expected." #endif .endm @@ -1007,7 +1007,92 @@ ENTRY art_quick_resolution_trampoline DELIVER_PENDING_EXCEPTION END art_quick_resolution_trampoline -UNIMPLEMENTED art_quick_generic_jni_trampoline + /* + * Called to do a generic JNI down-call + */ +ENTRY art_quick_generic_jni_trampoline + SETUP_REF_AND_ARGS_CALLEE_SAVE_FRAME + str r0, [sp, #0] // Store native ArtMethod* to bottom of stack. + + // Save rSELF + mov r11, rSELF + // Save SP , so we can have static CFI info. r10 is saved in ref_and_args. + mov r10, sp + .cfi_def_cfa_register r10 + + sub sp, sp, #5120 + + // prepare for artQuickGenericJniTrampoline call + // (Thread*, SP) + // r0 r1 <= C calling convention + // rSELF r10 <= where they are + + mov r0, rSELF // Thread* + mov r1, r10 + blx artQuickGenericJniTrampoline // (Thread*, sp) + + // The C call will have registered the complete save-frame on success. + // The result of the call is: + // r0: pointer to native code, 0 on error. + // r1: pointer to the bottom of the used area of the alloca, can restore stack till there. + + // Check for error = 0. + cbz r0, .Lentry_error + + // Release part of the alloca. + mov sp, r1 + + // Save the code pointer + mov r12, r0 + + // Load parameters from frame into registers. + pop {r0-r3} + + // Softfloat. + // TODO: Change to hardfloat when supported. + + blx r12 // native call. + + // result sign extension is handled in C code + // prepare for artQuickGenericJniEndTrampoline call + // (Thread*, result, result_f) + // r0 r1,r2 r3,stack <= C calling convention + // r11 r0,r1 r0,r1 <= where they are + sub sp, sp, #12 // Stack alignment. + + push {r1} + mov r3, r0 + mov r2, r1 + mov r1, r0 + mov r0, r11 + + blx artQuickGenericJniEndTrampoline + + // Tear down the alloca. + mov sp, r10 + .cfi_def_cfa_register sp + + // Restore self pointer. + mov r9, r11 + + // Pending exceptions possible. + ldr r2, [r9, #THREAD_EXCEPTION_OFFSET] @ load Thread::Current()->exception_ + cbnz r2, .Lexception_in_native + + // Tear down the callee-save frame. + RESTORE_REF_AND_ARGS_CALLEE_SAVE_FRAME + + bx lr // ret + +.Lentry_error: + mov sp, r10 + .cfi_def_cfa_register sp + mov r9, r11 +.Lexception_in_native: + RESTORE_REF_AND_ARGS_CALLEE_SAVE_FRAME + DELIVER_PENDING_EXCEPTION + +END art_quick_generic_jni_trampoline .extern artQuickToInterpreterBridge ENTRY art_quick_to_interpreter_bridge diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S index e088751..7907b6e 100644 --- a/runtime/arch/arm64/quick_entrypoints_arm64.S +++ b/runtime/arch/arm64/quick_entrypoints_arm64.S @@ -1485,33 +1485,34 @@ ENTRY art_quick_generic_jni_trampoline mov x1, xFP bl artQuickGenericJniTrampoline // (Thread*, sp) - // Get the updated pointer. This is the bottom of the frame _with_ handle scope. - ldr xFP, [sp] - add x9, sp, #8 + // The C call will have registered the complete save-frame on success. + // The result of the call is: + // x0: pointer to native code, 0 on error. + // x1: pointer to the bottom of the used area of the alloca, can restore stack till there. - cmp x0, #0 - b.mi .Lentry_error // Check for error, negative value. + // Check for error = 0. + cbz x0, .Lentry_error - // release part of the alloca. - add x9, x9, x0 + // Release part of the alloca. + mov sp, x1 - // Get the code pointer - ldr xIP0, [x9, #0] + // Save the code pointer + mov xIP0, x0 // Load parameters from frame into registers. // TODO Check with artQuickGenericJniTrampoline. // Also, check again APPCS64 - the stack arguments are interleaved. - ldp x0, x1, [x9, #8] - ldp x2, x3, [x9, #24] - ldp x4, x5, [x9, #40] - ldp x6, x7, [x9, #56] + ldp x0, x1, [sp] + ldp x2, x3, [sp, #16] + ldp x4, x5, [sp, #32] + ldp x6, x7, [sp, #48] - ldp d0, d1, [x9, #72] - ldp d2, d3, [x9, #88] - ldp d4, d5, [x9, #104] - ldp d6, d7, [x9, #120] + ldp d0, d1, [sp, #64] + ldp d2, d3, [sp, #80] + ldp d4, d5, [sp, #96] + ldp d6, d7, [sp, #112] - add sp, x9, #136 + add sp, sp, #128 blr xIP0 // native call. @@ -1520,13 +1521,11 @@ ENTRY art_quick_generic_jni_trampoline // result sign extension is handled in C code // prepare for artQuickGenericJniEndTrampoline call - // (Thread*, SP, result, result_f) - // x0 x1 x2 x3 <= C calling convention - mov x5, x0 // Save return value + // (Thread*, result, result_f) + // x0 x1 x2 <= C calling convention + mov x1, x0 // Result (from saved) mov x0, xSELF // Thread register - mov x1, xFP // Stack pointer - mov x2, x5 // Result (from saved) - fmov x3, d0 // d0 will contain floating point result, but needs to go into x3 + fmov x2, d0 // d0 will contain floating point result, but needs to go into x2 bl artQuickGenericJniEndTrampoline diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S index 0326f9e..24b9e46 100644 --- a/runtime/arch/x86/quick_entrypoints_x86.S +++ b/runtime/arch/x86/quick_entrypoints_x86.S @@ -1127,8 +1127,7 @@ DEFINE_FUNCTION art_quick_generic_jni_trampoline // This also stores the native ArtMethod reference at the bottom of the stack. movl %esp, %ebp // save SP at callee-save frame - movl %esp, %edi - CFI_DEF_CFA_REGISTER(edi) + CFI_DEF_CFA_REGISTER(ebp) subl LITERAL(5120), %esp // prepare for artQuickGenericJniTrampoline call // (Thread*, SP) @@ -1141,46 +1140,39 @@ DEFINE_FUNCTION art_quick_generic_jni_trampoline pushl %fs:THREAD_SELF_OFFSET // Pass Thread::Current(). SETUP_GOT_NOSAVE // Clobbers ebx. call PLT_SYMBOL(artQuickGenericJniTrampoline) // (Thread*, sp) - // Drop call stack. - addl LITERAL(16), %esp - // At the bottom of the alloca we now have the name pointer to the method=bottom of callee-save - // get the adjusted frame pointer - popl %ebp + // The C call will have registered the complete save-frame on success. + // The result of the call is: + // eax: pointer to native code, 0 on error. + // edx: pointer to the bottom of the used area of the alloca, can restore stack till there. - // Check for error, negative value. + // Check for error = 0. test %eax, %eax - js .Lentry_error + jz .Lentry_error - // release part of the alloca, get the code pointer - addl %eax, %esp - popl %eax + // Release part of the alloca. + movl %edx, %esp // On x86 there are no registers passed, so nothing to pop here. // Native call. call *%eax - // Pop native stack, but keep the space that was reserved cookie. - movl %ebp, %esp - subl LITERAL(16), %esp // Alignment. - // result sign extension is handled in C code // prepare for artQuickGenericJniEndTrampoline call - // (Thread*, SP, result, result_f) - // (esp) 4(esp) 8(esp) 16(esp) <= C calling convention - // fs:... ebp eax:edx xmm0 <= where they are + // (Thread*, result, result_f) + // (esp) 4(esp) 12(esp) <= C calling convention + // fs:... eax:edx xmm0 <= where they are - subl LITERAL(8), %esp // Pass float result. + subl LITERAL(20), %esp // Padding & pass float result. movsd %xmm0, (%esp) pushl %edx // Pass int result. pushl %eax - pushl %ebp // Pass SP (to ArtMethod). pushl %fs:THREAD_SELF_OFFSET // Pass Thread::Current(). call PLT_SYMBOL(artQuickGenericJniEndTrampoline) // Tear down the alloca. - movl %edi, %esp + movl %ebp, %esp CFI_DEF_CFA_REGISTER(esp) // Pending exceptions possible. @@ -1204,7 +1196,7 @@ DEFINE_FUNCTION art_quick_generic_jni_trampoline punpckldq %xmm1, %xmm0 ret .Lentry_error: - movl %edi, %esp + movl %ebp, %esp CFI_DEF_CFA_REGISTER(esp) .Lexception_in_native: RESTORE_REF_AND_ARGS_CALLEE_SAVE_FRAME diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S index 668fb88..8fa947c 100644 --- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S +++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S @@ -1167,11 +1167,9 @@ DEFINE_FUNCTION art_quick_generic_jni_trampoline movq %xmm5, 56(%rsp) movq %xmm6, 64(%rsp) movq %xmm7, 72(%rsp) - // Store native ArtMethod* to bottom of stack. - movq %rdi, 0(%rsp) - movq %rsp, %rbp // save SP at callee-save frame - movq %rsp, %rbx - CFI_DEF_CFA_REGISTER(rbx) + movq %rdi, 0(%rsp) // Store native ArtMethod* to bottom of stack. + movq %rsp, %rbp // save SP at (old) callee-save frame + CFI_DEF_CFA_REGISTER(rbp) // // reserve a lot of space // @@ -1198,17 +1196,17 @@ DEFINE_FUNCTION art_quick_generic_jni_trampoline movq %rbp, %rsi call PLT_SYMBOL(artQuickGenericJniTrampoline) // (Thread*, sp) - // At the bottom of the alloca we now have the name pointer to the method=bottom of callee-save - // get the adjusted frame pointer - popq %rbp + // The C call will have registered the complete save-frame on success. + // The result of the call is: + // %rax: pointer to native code, 0 on error. + // %rdx: pointer to the bottom of the used area of the alloca, can restore stack till there. - // Check for error, negative value. + // Check for error = 0. test %rax, %rax - js .Lentry_error + jz .Lentry_error - // release part of the alloca, get the code pointer - addq %rax, %rsp - popq %rax + // Release part of the alloca. + movq %rdx, %rsp // pop from the register-passing alloca region // what's the right layout? @@ -1228,21 +1226,22 @@ DEFINE_FUNCTION art_quick_generic_jni_trampoline movq 48(%rsp), %xmm6 movq 56(%rsp), %xmm7 addq LITERAL(64), %rsp // floating-point done + // native call - call *%rax // Stack should be aligned 16B without the return addr? + call *%rax + // result sign extension is handled in C code // prepare for artQuickGenericJniEndTrampoline call - // (Thread*, SP, result, result_f) - // rdi rsi rdx rcx <= C calling convention - // gs:... rbp rax xmm0 <= where they are + // (Thread*, result, result_f) + // rdi rsi rdx <= C calling convention + // gs:... rax xmm0 <= where they are movq %gs:THREAD_SELF_OFFSET, %rdi - movq %rbp, %rsi - movq %rax, %rdx - movq %xmm0, %rcx + movq %rax, %rsi + movq %xmm0, %rdx call PLT_SYMBOL(artQuickGenericJniEndTrampoline) // Tear down the alloca. - movq %rbx, %rsp + movq %rbp, %rsp CFI_DEF_CFA_REGISTER(rsp) // Pending exceptions possible. @@ -1280,7 +1279,7 @@ DEFINE_FUNCTION art_quick_generic_jni_trampoline movq %rax, %xmm0 ret .Lentry_error: - movq %rbx, %rsp + movq %rbp, %rsp CFI_DEF_CFA_REGISTER(rsp) .Lexception_in_native: // TODO: the handle scope contains the this pointer which is used by the debugger for exception |