diff options
author | Andreas Gampe <agampe@google.com> | 2014-03-05 16:11:04 -0800 |
---|---|---|
committer | Ian Rogers <irogers@google.com> | 2014-03-05 22:32:53 -0800 |
commit | bf6b92a158053c98b15f4393abb3b86344ec9a20 (patch) | |
tree | 93a28d1e01b045a8df23fe5279601496b62af3dd | |
parent | 359597ee8c0667a7d2c0b74dcbb0f90bb6059bae (diff) | |
download | art-bf6b92a158053c98b15f4393abb3b86344ec9a20.zip art-bf6b92a158053c98b15f4393abb3b86344ec9a20.tar.gz art-bf6b92a158053c98b15f4393abb3b86344ec9a20.tar.bz2 |
Generic JNI implementation for x86_64
Starting implementation for generic JNI on x86_64. Frames are of
large static size (>4K) right now, should be compacted later. Passes
the whole of jni_compiler_test.
Change-Id: I88ac3e13a534afe7568d62a1ef97cb766e8260e4
-rw-r--r-- | compiler/common_compiler_test.h | 38 | ||||
-rw-r--r-- | compiler/jni/jni_compiler_test.cc | 12 | ||||
-rw-r--r-- | compiler/jni/quick/calling_convention.cc | 8 | ||||
-rw-r--r-- | runtime/arch/x86/jni_entrypoints_x86.S | 4 | ||||
-rw-r--r-- | runtime/arch/x86/quick_entrypoints_x86.S | 2 | ||||
-rw-r--r-- | runtime/arch/x86_64/context_x86_64.cc | 67 | ||||
-rw-r--r-- | runtime/arch/x86_64/context_x86_64.h | 4 | ||||
-rw-r--r-- | runtime/arch/x86_64/jni_entrypoints_x86_64.S | 48 | ||||
-rw-r--r-- | runtime/arch/x86_64/quick_entrypoints_x86_64.S | 218 | ||||
-rw-r--r-- | runtime/arch/x86_64/registers_x86_64.h | 1 | ||||
-rw-r--r-- | runtime/class_linker.cc | 13 | ||||
-rw-r--r-- | runtime/entrypoints/quick/quick_trampoline_entrypoints.cc | 474 | ||||
-rw-r--r-- | runtime/exception_test.cc | 2 | ||||
-rw-r--r-- | runtime/mirror/art_method.h | 2 | ||||
-rw-r--r-- | runtime/stack.h | 2 | ||||
-rw-r--r-- | runtime/stack_indirect_reference_table.h | 21 | ||||
-rw-r--r-- | runtime/thread.h | 3 |
17 files changed, 807 insertions, 112 deletions
diff --git a/compiler/common_compiler_test.h b/compiler/common_compiler_test.h index 3bdc95e..bca72b8 100644 --- a/compiler/common_compiler_test.h +++ b/compiler/common_compiler_test.h @@ -204,19 +204,31 @@ class CommonCompilerTest : public CommonRuntimeTest { } else { // No code? You must mean to go into the interpreter. // Or the generic JNI... - const void* method_code = method->IsNative() ? GetQuickGenericJniTrampoline() - : (kUsePortableCompiler - ? GetPortableToInterpreterBridge() - : GetQuickToInterpreterBridge()); - OatFile::OatMethod oat_method = CreateOatMethod(method_code, - kStackAlignment, - 0, - 0, - nullptr, - nullptr, - nullptr); - oat_method.LinkMethod(method); - method->SetEntryPointFromInterpreter(interpreter::artInterpreterToInterpreterBridge); + if (!method->IsNative()) { + const void* method_code = kUsePortableCompiler ? GetPortableToInterpreterBridge() + : GetQuickToInterpreterBridge(); + OatFile::OatMethod oat_method = CreateOatMethod(method_code, + kStackAlignment, + 0, + 0, + nullptr, + nullptr, + nullptr); + oat_method.LinkMethod(method); + method->SetEntryPointFromInterpreter(interpreter::artInterpreterToInterpreterBridge); + } else { + const void* method_code = GetQuickGenericJniTrampoline(); + mirror::ArtMethod* callee_save_method = runtime_->GetCalleeSaveMethod(Runtime::kRefsAndArgs); + OatFile::OatMethod oat_method = CreateOatMethod(method_code, + callee_save_method->GetFrameSizeInBytes(), + callee_save_method->GetCoreSpillMask(), + callee_save_method->GetFpSpillMask(), + nullptr, + nullptr, + nullptr); + oat_method.LinkMethod(method); + method->SetEntryPointFromInterpreter(artInterpreterToCompiledCodeBridge); + } } // Create bridges to transition between different kinds of compiled bridge. if (method->GetEntryPointFromPortableCompiledCode() == nullptr) { diff --git a/compiler/jni/jni_compiler_test.cc b/compiler/jni/jni_compiler_test.cc index f48cf6c..a9fbc64 100644 --- a/compiler/jni/jni_compiler_test.cc +++ b/compiler/jni/jni_compiler_test.cc @@ -328,7 +328,9 @@ jobject Java_MyClassNatives_fooIOO(JNIEnv* env, jobject thisObj, jint x, jobject EXPECT_TRUE(env->IsInstanceOf(thisObj, JniCompilerTest::jklass_)); gJava_MyClassNatives_fooIOO_calls++; ScopedObjectAccess soa(Thread::Current()); - EXPECT_EQ(3U, Thread::Current()->NumStackReferences()); + size_t null_args = (y == nullptr ? 1 : 0) + (z == nullptr ? 1 : 0); + EXPECT_TRUE(3U == Thread::Current()->NumStackReferences() || + (3U - null_args) == Thread::Current()->NumStackReferences()); switch (x) { case 1: return y; @@ -434,7 +436,9 @@ jobject Java_MyClassNatives_fooSIOO(JNIEnv* env, jclass klass, jint x, jobject y EXPECT_TRUE(env->IsInstanceOf(JniCompilerTest::jobj_, klass)); gJava_MyClassNatives_fooSIOO_calls++; ScopedObjectAccess soa(Thread::Current()); - EXPECT_EQ(3U, Thread::Current()->NumStackReferences()); + size_t null_args = (y == nullptr ? 1 : 0) + (z == nullptr ? 1 : 0); + EXPECT_TRUE(3U == Thread::Current()->NumStackReferences() || + (3U - null_args) == Thread::Current()->NumStackReferences()); switch (x) { case 1: return y; @@ -487,7 +491,9 @@ jobject Java_MyClassNatives_fooSSIOO(JNIEnv* env, jclass klass, jint x, jobject EXPECT_TRUE(env->IsInstanceOf(JniCompilerTest::jobj_, klass)); gJava_MyClassNatives_fooSSIOO_calls++; ScopedObjectAccess soa(Thread::Current()); - EXPECT_EQ(3U, Thread::Current()->NumStackReferences()); + size_t null_args = (y == nullptr ? 1 : 0) + (z == nullptr ? 1 : 0); + EXPECT_TRUE(3U == Thread::Current()->NumStackReferences() || + (3U - null_args) == Thread::Current()->NumStackReferences()); switch (x) { case 1: return y; diff --git a/compiler/jni/quick/calling_convention.cc b/compiler/jni/quick/calling_convention.cc index 5d5eaf2..ac962af 100644 --- a/compiler/jni/quick/calling_convention.cc +++ b/compiler/jni/quick/calling_convention.cc @@ -106,7 +106,7 @@ size_t JniCallingConvention::ReferenceCount() const { } FrameOffset JniCallingConvention::SavedLocalReferenceCookieOffset() const { - size_t start_of_sirt = SirtLinkOffset().Int32Value() + kPointerSize; + size_t start_of_sirt = SirtNumRefsOffset().Int32Value() + kPointerSize; size_t references_size = kPointerSize * ReferenceCount(); // size excluding header return FrameOffset(start_of_sirt + references_size); } @@ -158,11 +158,11 @@ bool JniCallingConvention::IsCurrentParamAReference() { // position FrameOffset JniCallingConvention::CurrentParamSirtEntryOffset() { CHECK(IsCurrentParamAReference()); - CHECK_GT(SirtLinkOffset(), SirtNumRefsOffset()); + CHECK_LT(SirtLinkOffset(), SirtNumRefsOffset()); // Address of 1st SIRT entry - int result = SirtLinkOffset().Int32Value() + kPointerSize; + int result = SirtNumRefsOffset().Int32Value() + kPointerSize; result += itr_refs_ * kPointerSize; - CHECK_GT(result, SirtLinkOffset().Int32Value()); + CHECK_GT(result, SirtNumRefsOffset().Int32Value()); return FrameOffset(result); } diff --git a/runtime/arch/x86/jni_entrypoints_x86.S b/runtime/arch/x86/jni_entrypoints_x86.S index ebd82b5..45d7356 100644 --- a/runtime/arch/x86/jni_entrypoints_x86.S +++ b/runtime/arch/x86/jni_entrypoints_x86.S @@ -29,8 +29,8 @@ DEFINE_FUNCTION art_jni_dlsym_lookup_stub UNDO_SETUP_GOT addl LITERAL(8), %esp // restore the stack CFI_ADJUST_CFA_OFFSET(-12) - cmpl LITERAL(0), %eax // check if returned method code is null - je .Lno_native_code_found // if null, jump to return to handle + testl %eax, %eax // check if returned method code is null + jz .Lno_native_code_found // if null, jump to return to handle jmp *%eax // otherwise, tail call to intended method .Lno_native_code_found: ret diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S index b24bfd5..4bde8b7 100644 --- a/runtime/arch/x86/quick_entrypoints_x86.S +++ b/runtime/arch/x86/quick_entrypoints_x86.S @@ -44,10 +44,10 @@ END_MACRO MACRO0(RESTORE_REF_ONLY_CALLEE_SAVE_FRAME) addl MACRO_LITERAL(16), %esp // Unwind stack up to return address + CFI_ADJUST_CFA_OFFSET(-16) POP ebp // Restore callee saves (ebx is saved/restored by the upcall) POP esi POP edi - CFI_ADJUST_CFA_OFFSET(-28) END_MACRO /* diff --git a/runtime/arch/x86_64/context_x86_64.cc b/runtime/arch/x86_64/context_x86_64.cc index 1310402..3f1f86d 100644 --- a/runtime/arch/x86_64/context_x86_64.cc +++ b/runtime/arch/x86_64/context_x86_64.cc @@ -26,8 +26,11 @@ namespace x86_64 { static const uintptr_t gZero = 0; void X86_64Context::Reset() { - for (size_t i = 0; i < kNumberOfCpuRegisters; i++) { - gprs_[i] = NULL; + for (size_t i = 0; i < kNumberOfCpuRegisters; ++i) { + gprs_[i] = nullptr; + } + for (size_t i = 0; i < kNumberOfFloatRegisters; ++i) { + fprs_[i] = nullptr; } gprs_[RSP] = &rsp_; // Initialize registers with easy to spot debug values. @@ -38,19 +41,30 @@ void X86_64Context::Reset() { void X86_64Context::FillCalleeSaves(const StackVisitor& fr) { mirror::ArtMethod* method = fr.GetMethod(); uint32_t core_spills = method->GetCoreSpillMask(); + uint32_t fp_core_spills = method->GetFpSpillMask(); size_t spill_count = __builtin_popcount(core_spills); - DCHECK_EQ(method->GetFpSpillMask(), 0u); + size_t fp_spill_count = __builtin_popcount(fp_core_spills); size_t frame_size = method->GetFrameSizeInBytes(); if (spill_count > 0) { // Lowest number spill is farthest away, walk registers and fill into context. - int j = 2; // Offset j to skip return address spill. - for (int i = 0; i < kNumberOfCpuRegisters; i++) { + size_t j = 2; // Offset j to skip return address spill. + for (size_t i = 0; i < kNumberOfCpuRegisters; ++i) { if (((core_spills >> i) & 1) != 0) { gprs_[i] = fr.CalleeSaveAddress(spill_count - j, frame_size); j++; } } } + if (fp_spill_count > 0) { + // Lowest number spill is farthest away, walk registers and fill into context. + size_t j = 2; // Offset j to skip return address spill. + for (size_t i = 0; i < kNumberOfFloatRegisters; ++i) { + if (((fp_core_spills >> i) & 1) != 0) { + fprs_[i] = fr.CalleeSaveAddress(spill_count + fp_spill_count - j, frame_size); + j++; + } + } + } } void X86_64Context::SmashCallerSaves() { @@ -58,7 +72,12 @@ void X86_64Context::SmashCallerSaves() { gprs_[RAX] = const_cast<uintptr_t*>(&gZero); gprs_[RDX] = const_cast<uintptr_t*>(&gZero); gprs_[RCX] = nullptr; - gprs_[RBX] = nullptr; + gprs_[RSI] = nullptr; + gprs_[RDI] = nullptr; + gprs_[R8] = nullptr; + gprs_[R9] = nullptr; + gprs_[R10] = nullptr; + gprs_[R11] = nullptr; } void X86_64Context::SetGPR(uint32_t reg, uintptr_t value) { @@ -69,7 +88,43 @@ void X86_64Context::SetGPR(uint32_t reg, uintptr_t value) { } void X86_64Context::DoLongJump() { +#if defined(__x86_64__) + // Array of GPR values, filled from the context backward for the long jump pop. We add a slot at + // the top for the stack pointer that doesn't get popped in a pop-all. + volatile uintptr_t gprs[kNumberOfCpuRegisters + 1]; + for (size_t i = 0; i < kNumberOfCpuRegisters; ++i) { + gprs[kNumberOfCpuRegisters - i - 1] = gprs_[i] != NULL ? *gprs_[i] : X86_64Context::kBadGprBase + i; + } + // We want to load the stack pointer one slot below so that the ret will pop eip. + uintptr_t rsp = gprs[kNumberOfCpuRegisters - RSP - 1] - kWordSize; + gprs[kNumberOfCpuRegisters] = rsp; + *(reinterpret_cast<uintptr_t*>(rsp)) = rip_; + __asm__ __volatile__( + "movq %0, %%rsp\n\t" // RSP points to gprs. + "popq %%r15\n\t" // Load all registers except RSP and RIP with values in gprs. + "popq %%r14\n\t" + "popq %%r13\n\t" + "popq %%r12\n\t" + "popq %%r11\n\t" + "popq %%r10\n\t" + "popq %%r9\n\t" + "popq %%r8\n\t" + "popq %%rdi\n\t" + "popq %%rsi\n\t" + "popq %%rbp\n\t" + "addq $8, %%rsp\n\t" + "popq %%rbx\n\t" + "popq %%rdx\n\t" + "popq %%rcx\n\t" + "popq %%rax\n\t" + "popq %%rsp\n\t" // Load stack pointer. + "ret\n\t" // From higher in the stack pop rip. + : // output. + : "g"(&gprs[0]) // input. + :); // clobber. +#else UNIMPLEMENTED(FATAL); +#endif } } // namespace x86_64 diff --git a/runtime/arch/x86_64/context_x86_64.h b/runtime/arch/x86_64/context_x86_64.h index 78ef89c..055df61 100644 --- a/runtime/arch/x86_64/context_x86_64.h +++ b/runtime/arch/x86_64/context_x86_64.h @@ -59,9 +59,9 @@ class X86_64Context : public Context { virtual void DoLongJump(); private: - // Pointers to register locations, floating point registers are all caller save. Values are - // initialized to NULL or the special registers below. + // Pointers to register locations. Values are initialized to NULL or the special registers below. uintptr_t* gprs_[kNumberOfCpuRegisters]; + uint64_t* fprs_[kNumberOfFloatRegisters]; // Hold values for rsp and rip if they are not located within a stack frame. RIP is somewhat // special in that it cannot be encoded normally as a register operand to an instruction (except // in 64bit addressing modes). diff --git a/runtime/arch/x86_64/jni_entrypoints_x86_64.S b/runtime/arch/x86_64/jni_entrypoints_x86_64.S index 35fcccb..10f39b7 100644 --- a/runtime/arch/x86_64/jni_entrypoints_x86_64.S +++ b/runtime/arch/x86_64/jni_entrypoints_x86_64.S @@ -19,4 +19,50 @@ /* * Jni dlsym lookup stub. */ -UNIMPLEMENTED art_jni_dlsym_lookup_stub +DEFINE_FUNCTION art_jni_dlsym_lookup_stub + // Save callee and GPR args, mixed together to agree with core spills bitmap. + PUSH r9 // Arg. + PUSH r8 // Arg. + PUSH rdi // + PUSH rsi // Arg. + PUSH rdx // Arg. + PUSH rcx // Arg. + // Create space for FPR args, plus padding for alignment + subq LITERAL(72), %rsp + CFI_ADJUST_CFA_OFFSET(72) + // Save FPRs. + movq %xmm0, 0(%rsp) + movq %xmm1, 8(%rsp) + movq %xmm2, 16(%rsp) + movq %xmm3, 24(%rsp) + movq %xmm4, 32(%rsp) + movq %xmm5, 40(%rsp) + movq %xmm6, 48(%rsp) + movq %xmm7, 56(%rsp) + // prepare call + movq %gs:THREAD_SELF_OFFSET, %rsi // RSI := Thread::Current() + // call + call PLT_SYMBOL(artFindNativeMethod) // (Thread*) + // restore arguments + movq 0(%rsp), %xmm0 + movq 8(%rsp), %xmm1 + movq 16(%rsp), %xmm2 + movq 24(%rsp), %xmm3 + movq 32(%rsp), %xmm4 + movq 40(%rsp), %xmm5 + movq 48(%rsp), %xmm6 + movq 56(%rsp), %xmm7 + addq LITERAL(72), %rsp + CFI_ADJUST_CFA_OFFSET(-72) + POP rcx // Arg. + POP rdx // Arg. + POP rsi // Arg. + POP rdi // + POP r8 // Arg. + POP r9 // Arg. + testq %rax, %rax // check if returned method code is null + jz .Lno_native_code_found // if null, jump to return to handle + jmp *%rax // otherwise, tail call to intended method +.Lno_native_code_found: + ret +END_FUNCTION art_jni_dlsym_lookup_stub diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S index 32e8434..4cd7880 100644 --- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S +++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S @@ -23,8 +23,22 @@ * Runtime::CreateCalleeSaveMethod(kSaveAll) */ MACRO0(SETUP_SAVE_ALL_CALLEE_SAVE_FRAME) - int3 - int3 + // R10 := Runtime::Current() + movq _ZN3art7Runtime9instance_E@GOTPCREL(%rip), %r10 + movq (%r10), %r10 + // Save callee and GPR args, mixed together to agree with core spills bitmap. + PUSH r15 // Callee save. + PUSH r14 // Callee save. + PUSH r13 // Callee save. + PUSH r12 // Callee save. + PUSH rbp // Callee save. + PUSH rbx // Callee save. + subq LITERAL(8), %rsp // Space for Method* (also aligns the frame). + CFI_ADJUST_CFA_OFFSET(8) + // R10 := ArtMethod* for ref and args callee save frame method. + movq RUNTIME_SAVE_ALL_CALLEE_SAVE_FRAME_OFFSET(%r10), %r10 + // Store ArtMethod* to bottom of stack. + movq %r10, 0(%rsp) END_MACRO /* @@ -54,13 +68,13 @@ MACRO0(SETUP_REF_AND_ARGS_CALLEE_SAVE_FRAME) PUSH r14 // Callee save. PUSH r13 // Callee save. PUSH r12 // Callee save. - PUSH r9 // Arg. - PUSH r8 // Arg. - PUSH rsi // Arg. + PUSH r9 // Quick arg 5. + PUSH r8 // Quick arg 4. + PUSH rsi // Quick arg 1. PUSH rbp // Callee save. PUSH rbx // Callee save. - PUSH rdx // Arg. - PUSH rcx // Arg. + PUSH rdx // Quick arg 2. + PUSH rcx // Quick arg 3. // Create space for FPR args and create 2 slots, 1 of padding and 1 for the ArtMethod*. subq LITERAL(80), %rsp CFI_ADJUST_CFA_OFFSET(80) @@ -105,13 +119,18 @@ MACRO0(RESTORE_REF_AND_ARGS_CALLEE_SAVE_FRAME) POP r15 END_MACRO + /* * Macro that set calls through to artDeliverPendingExceptionFromCode, where the pending * exception is Thread::Current()->exception_. */ MACRO0(DELIVER_PENDING_EXCEPTION) - int3 - int3 + SETUP_SAVE_ALL_CALLEE_SAVE_FRAME // save callee saves for throw + // (Thread*, SP) setup + movq %gs:THREAD_SELF_OFFSET, %rdi + movq %rsp, %rsi + call PLT_SYMBOL(artDeliverPendingExceptionFromCode) // artDeliverPendingExceptionFromCode(Thread*, SP) + int3 // unreached END_MACRO MACRO2(NO_ARG_RUNTIME_EXCEPTION, c_name, cxx_name) @@ -635,11 +654,188 @@ UNIMPLEMENTED art_quick_proxy_invoke_handler UNIMPLEMENTED art_quick_imt_conflict_trampoline UNIMPLEMENTED art_quick_resolution_trampoline - +/* Proposed Generic JNI setup + * + * #-------------------# + * | | + * | caller method... | + * #-------------------# <--- SP on entry + * | Return | + * | R15 | callee save + * | R14 | callee save + * | R13 | callee save + * | R12 | callee save + * | R9 | arg5 + * | R8 | arg4 + * | RSI/R6 | arg1 + * | RBP/R5 | callee save + * | RBX/R3 | callee save + * | RDX/R2 | arg2 + * | RCX/R1 | arg3 + * | XMM7 | float arg 8 + * | XMM6 | float arg 7 + * | XMM5 | float arg 6 + * | XMM4 | float arg 5 + * | XMM3 | float arg 4 + * | XMM2 | float arg 3 + * | XMM1 | float arg 2 + * | XMM0 | float arg 1 + * | Padding | + * | RDI/Method* | <- sp + * #-------------------# + * | local ref cookie | // 4B + * | padding | // 4B + * #----------#--------# + * | | | | + * | Temp/ | SIRT | | Scratch frame is 4k + * | Scratch | v | + * | Frame #--------| + * | | + * | #--------| + * | | ^ | + * | | JNI | | + * | | Stack| | + * #----------#--------# <--- SP on native call (needs alignment?) + * | | + * | Stack for Regs | The trampoline assembly will pop these values + * | | into registers for native call + * #---------#---------# + * | | sp* | + * | Tramp. #---------# + * | args | thread | + * | Tramp. #---------# + * | | method | + * #-------------------# <--- SP on artQuickGenericJniTrampoline + */ /* * Called to do a generic JNI down-call */ -UNIMPLEMENTED art_quick_generic_jni_trampoline +DEFINE_FUNCTION art_quick_generic_jni_trampoline + // Save callee and GPR args, mixed together to agree with core spills bitmap. + // Save callee and GPR args, mixed together to agree with core spills bitmap. + PUSH r15 // Callee save. + PUSH r14 // Callee save. + PUSH r13 // Callee save. + PUSH r12 // Callee save. + PUSH r9 // Quick arg 5. + PUSH r8 // Quick arg 4. + PUSH rsi // Quick arg 1. + PUSH rbp // Callee save. + PUSH rbx // Callee save. + PUSH rdx // Quick arg 2. + PUSH rcx // Quick arg 3. + // Create space for FPR args and create 2 slots, 1 of padding and 1 for the ArtMethod*. + subq LITERAL(80), %rsp + CFI_ADJUST_CFA_OFFSET(80) + // Save FPRs. + movq %xmm0, 16(%rsp) + movq %xmm1, 24(%rsp) + movq %xmm2, 32(%rsp) + movq %xmm3, 40(%rsp) + movq %xmm4, 48(%rsp) + movq %xmm5, 56(%rsp) + movq %xmm6, 64(%rsp) + movq %xmm7, 72(%rsp) + // Store native ArtMethod* to bottom of stack. + movq %rdi, 0(%rsp) + movq %rsp, %rbp // save SP at callee-save frame + CFI_DEF_CFA_REGISTER(rbp) + // + // reserve a lot of space + // + // 4 local state ref + // 4 padding + // 4196 4k scratch space, enough for 2x 256 8-byte parameters (TODO: SIRT overhead?) + // 16 SIRT member fields ? + // + 112 14x 8-byte stack-2-register space + // ------ + // 4332 + // 16-byte aligned: 4336 + // Note: 14x8 = 7*16, so the stack stays aligned for the native call... + // Also means: the padding is somewhere in the middle + subq LITERAL(4336), %rsp + // prepare for artQuickGenericJniTrampoline call + // (Thread*, SP) + // rdi rsi <= C calling convention + // gs:... rbp <= where they are + movq %gs:THREAD_SELF_OFFSET, %rdi + movq %rbp, %rsi + call PLT_SYMBOL(artQuickGenericJniTrampoline) + test %rax, %rax // check whether code pointer is NULL, also indicates exception + jz 1f + // pop from the register-passing alloca + // what's the right layout? + popq %rdi + popq %rsi + popq %rdx + popq %rcx + popq %r8 + popq %r9 + // TODO: skip floating point if unused, some flag. + movq 0(%rsp), %xmm0 + movq 8(%rsp), %xmm1 + movq 16(%rsp), %xmm2 + movq 24(%rsp), %xmm3 + movq 32(%rsp), %xmm4 + movq 40(%rsp), %xmm5 + movq 48(%rsp), %xmm6 + movq 56(%rsp), %xmm7 + addq LITERAL(64), %rsp // floating-point done + // native call + call *%rax // Q: is the stack aligned 16B with or without the return addr? + // result sign extension is handled in C code + // prepare for artQuickGenericJniEndTrampoline call + // (Thread*, SP, result, result_f) + // rdi rsi rdx rcx <= C calling convention + // gs:... rbp rax xmm0 <= where they are + movq %gs:THREAD_SELF_OFFSET, %rdi + movq %rbp, %rsi + movq %rax, %rdx + movq %xmm0, %rcx + call PLT_SYMBOL(artQuickGenericJniEndTrampoline) + // tear down the alloca already + movq %rbp, %rsp + CFI_DEF_CFA_REGISTER(rsp) + // Exceptions possible. + // TODO: use cmpq, needs direct encoding because of gas bug + movq %gs:THREAD_EXCEPTION_OFFSET, %rbx + test %rbx, %rbx + jnz 2f + // Tear down the callee-save frame + // Load FPRs. + // movq %xmm0, 16(%rsp) // doesn't make sense!!! + movq 24(%rsp), %xmm1 // neither does this!!! + movq 32(%rsp), %xmm2 + movq 40(%rsp), %xmm3 + movq 48(%rsp), %xmm4 + movq 56(%rsp), %xmm5 + movq 64(%rsp), %xmm6 + movq 72(%rsp), %xmm7 + // was 80 bytes + addq LITERAL(80), %rsp + CFI_ADJUST_CFA_OFFSET(-80) + // Save callee and GPR args, mixed together to agree with core spills bitmap. + POP rcx // Arg. + POP rdx // Arg. + POP rbx // Callee save. + POP rbp // Callee save. + POP rsi // Arg. + POP r8 // Arg. + POP r9 // Arg. + POP r12 // Callee save. + POP r13 // Callee save. + POP r14 // Callee save. + POP r15 // Callee save. + // store into fpr, for when it's a fpr return... + movq %rax, %xmm0 + ret +1: + // tear down the _whole_ scratch space, assumes SIRT is empty, cookie not valid etc. + movq %rbp, %rsp + CFI_DEF_CFA_REGISTER(rsp) +2: RESTORE_REF_AND_ARGS_CALLEE_SAVE_FRAME + DELIVER_PENDING_EXCEPTION +END_FUNCTION art_quick_generic_jni_trampoline /* * Called to bridge from the quick to interpreter ABI. On entry the arguments match those diff --git a/runtime/arch/x86_64/registers_x86_64.h b/runtime/arch/x86_64/registers_x86_64.h index c1a9942..8b0dc07 100644 --- a/runtime/arch/x86_64/registers_x86_64.h +++ b/runtime/arch/x86_64/registers_x86_64.h @@ -65,6 +65,7 @@ enum FloatRegister { XMM13 = 13, XMM14 = 14, XMM15 = 15, + kNumberOfFloatRegisters = 16 }; std::ostream& operator<<(std::ostream& os, const FloatRegister& rhs); diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc index 7e43994..6255c8c 100644 --- a/runtime/class_linker.cc +++ b/runtime/class_linker.cc @@ -1747,7 +1747,7 @@ static void LinkCode(const SirtRef<mirror::ArtMethod>& method, const OatFile::Oa bool enter_interpreter = NeedsInterpreter(method.get(), method->GetEntryPointFromQuickCompiledCode(), method->GetEntryPointFromPortableCompiledCode()); - if (enter_interpreter) { + if (enter_interpreter && !method->IsNative()) { method->SetEntryPointFromInterpreter(interpreter::artInterpreterToInterpreterBridge); } else { method->SetEntryPointFromInterpreter(artInterpreterToCompiledCodeBridge); @@ -1767,9 +1767,14 @@ static void LinkCode(const SirtRef<mirror::ArtMethod>& method, const OatFile::Oa method->SetEntryPointFromQuickCompiledCode(GetQuickResolutionTrampoline(runtime->GetClassLinker())); method->SetEntryPointFromPortableCompiledCode(GetPortableResolutionTrampoline(runtime->GetClassLinker())); } else if (enter_interpreter) { - // Set entry point from compiled code if there's no code or in interpreter only mode. - method->SetEntryPointFromQuickCompiledCode(GetQuickToInterpreterBridge()); - method->SetEntryPointFromPortableCompiledCode(GetPortableToInterpreterBridge()); + if (!method->IsNative()) { + // Set entry point from compiled code if there's no code or in interpreter only mode. + method->SetEntryPointFromQuickCompiledCode(GetQuickToInterpreterBridge()); + method->SetEntryPointFromPortableCompiledCode(GetPortableToInterpreterBridge()); + } else { + method->SetEntryPointFromQuickCompiledCode(GetQuickGenericJniTrampoline()); + method->SetEntryPointFromPortableCompiledCode(GetPortableToQuickBridge()); + } } else if (method->GetEntryPointFromPortableCompiledCode() != nullptr) { DCHECK(method->GetEntryPointFromQuickCompiledCode() == nullptr); have_portable_code = true; diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc index 63e0d42..bf8b8ba 100644 --- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc +++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc @@ -57,9 +57,9 @@ class QuickArgumentVisitor { // | R1 | arg1 // | R0 | padding // | Method* | <- sp - static constexpr bool kSoftFloatAbi = true; // This is a soft float ABI. - static constexpr size_t kNumGprArgs = 3; // 3 arguments passed in GPRs. - static constexpr size_t kNumFprArgs = 0; // 0 arguments passed in FPRs. + static constexpr bool kQuickSoftFloatAbi = true; // This is a soft float ABI. + static constexpr size_t kNumQuickGprArgs = 3; // 3 arguments passed in GPRs. + static constexpr size_t kNumQuickFprArgs = 0; // 0 arguments passed in FPRs. static constexpr size_t kBytesPerFprSpillLocation = 4; // FPR spill size is 4 bytes. static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Fpr1Offset = 0; // Offset of first FPR arg. static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Gpr1Offset = 8; // Offset of first GPR arg. @@ -83,9 +83,9 @@ class QuickArgumentVisitor { // | A2 | arg2 // | A1 | arg1 // | A0/Method* | <- sp - static constexpr bool kSoftFloatAbi = true; // This is a soft float ABI. - static constexpr size_t kNumGprArgs = 3; // 3 arguments passed in GPRs. - static constexpr size_t kNumFprArgs = 0; // 0 arguments passed in FPRs. + static constexpr bool kQuickSoftFloatAbi = true; // This is a soft float ABI. + static constexpr size_t kNumQuickGprArgs = 3; // 3 arguments passed in GPRs. + static constexpr size_t kNumQuickFprArgs = 0; // 0 arguments passed in FPRs. static constexpr size_t kBytesPerFprSpillLocation = 4; // FPR spill size is 4 bytes. static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Fpr1Offset = 0; // Offset of first FPR arg. static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Gpr1Offset = 4; // Offset of first GPR arg. @@ -109,9 +109,9 @@ class QuickArgumentVisitor { // | EDX | arg2 // | ECX | arg1 // | EAX/Method* | <- sp - static constexpr bool kSoftFloatAbi = true; // This is a soft float ABI. - static constexpr size_t kNumGprArgs = 3; // 3 arguments passed in GPRs. - static constexpr size_t kNumFprArgs = 0; // 0 arguments passed in FPRs. + static constexpr bool kQuickSoftFloatAbi = true; // This is a soft float ABI. + static constexpr size_t kNumQuickGprArgs = 3; // 3 arguments passed in GPRs. + static constexpr size_t kNumQuickFprArgs = 0; // 0 arguments passed in FPRs. static constexpr size_t kBytesPerFprSpillLocation = 8; // FPR spill size is 8 bytes. static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Fpr1Offset = 0; // Offset of first FPR arg. static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Gpr1Offset = 4; // Offset of first GPR arg. @@ -148,9 +148,9 @@ class QuickArgumentVisitor { // | XMM0 | float arg 1 // | Padding | // | RDI/Method* | <- sp - static constexpr bool kSoftFloatAbi = false; // This is a hard float ABI. - static constexpr size_t kNumGprArgs = 5; // 3 arguments passed in GPRs. - static constexpr size_t kNumFprArgs = 8; // 0 arguments passed in FPRs. + static constexpr bool kQuickSoftFloatAbi = false; // This is a hard float ABI. + static constexpr size_t kNumQuickGprArgs = 5; // 3 arguments passed in GPRs. + static constexpr size_t kNumQuickFprArgs = 8; // 0 arguments passed in FPRs. static constexpr size_t kBytesPerFprSpillLocation = 8; // FPR spill size is 8 bytes. static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Fpr1Offset = 16; // Offset of first FPR arg. static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Gpr1Offset = 80; // Offset of first GPR arg. @@ -211,15 +211,15 @@ class QuickArgumentVisitor { } byte* GetParamAddress() const { - if (!kSoftFloatAbi) { + if (!kQuickSoftFloatAbi) { Primitive::Type type = GetParamPrimitiveType(); if (UNLIKELY((type == Primitive::kPrimDouble) || (type == Primitive::kPrimFloat))) { - if ((kNumFprArgs != 0) && (fpr_index_ + 1 < kNumFprArgs + 1)) { + if ((kNumQuickFprArgs != 0) && (fpr_index_ + 1 < kNumQuickFprArgs + 1)) { return fpr_args_ + (fpr_index_ * kBytesPerFprSpillLocation); } } } - if (gpr_index_ < kNumGprArgs) { + if (gpr_index_ < kNumQuickGprArgs) { return gpr_args_ + GprIndexToGprOffset(gpr_index_); } return stack_args_ + (stack_index_ * kBytesStackArgLocation); @@ -257,7 +257,7 @@ class QuickArgumentVisitor { cur_type_ = Primitive::kPrimNot; is_split_long_or_double_ = false; Visit(); - if (kNumGprArgs > 0) { + if (kNumQuickGprArgs > 0) { gpr_index_++; } else { stack_index_++; @@ -274,7 +274,7 @@ class QuickArgumentVisitor { case Primitive::kPrimInt: is_split_long_or_double_ = false; Visit(); - if (gpr_index_ < kNumGprArgs) { + if (gpr_index_ < kNumQuickGprArgs) { gpr_index_++; } else { stack_index_++; @@ -283,14 +283,14 @@ class QuickArgumentVisitor { case Primitive::kPrimFloat: is_split_long_or_double_ = false; Visit(); - if (kSoftFloatAbi) { - if (gpr_index_ < kNumGprArgs) { + if (kQuickSoftFloatAbi) { + if (gpr_index_ < kNumQuickGprArgs) { gpr_index_++; } else { stack_index_++; } } else { - if ((kNumFprArgs != 0) && (fpr_index_ + 1 < kNumFprArgs + 1)) { + if ((kNumQuickFprArgs != 0) && (fpr_index_ + 1 < kNumQuickFprArgs + 1)) { fpr_index_++; } else { stack_index_++; @@ -299,14 +299,14 @@ class QuickArgumentVisitor { break; case Primitive::kPrimDouble: case Primitive::kPrimLong: - if (kSoftFloatAbi || (cur_type_ == Primitive::kPrimLong)) { + if (kQuickSoftFloatAbi || (cur_type_ == Primitive::kPrimLong)) { is_split_long_or_double_ = (kBytesPerGprSpillLocation == 4) && - ((gpr_index_ + 1) == kNumGprArgs); + ((gpr_index_ + 1) == kNumQuickGprArgs); Visit(); - if (gpr_index_ < kNumGprArgs) { + if (gpr_index_ < kNumQuickGprArgs) { gpr_index_++; if (kBytesPerGprSpillLocation == 4) { - if (gpr_index_ < kNumGprArgs) { + if (gpr_index_ < kNumQuickGprArgs) { gpr_index_++; } else { stack_index_++; @@ -322,12 +322,12 @@ class QuickArgumentVisitor { } } else { is_split_long_or_double_ = (kBytesPerFprSpillLocation == 4) && - ((fpr_index_ + 1) == kNumFprArgs); + ((fpr_index_ + 1) == kNumQuickFprArgs); Visit(); - if ((kNumFprArgs != 0) && (fpr_index_ + 1 < kNumFprArgs + 1)) { + if ((kNumQuickFprArgs != 0) && (fpr_index_ + 1 < kNumQuickFprArgs + 1)) { fpr_index_++; if (kBytesPerFprSpillLocation == 4) { - if ((kNumFprArgs != 0) && (fpr_index_ + 1 < kNumFprArgs + 1)) { + if ((kNumQuickFprArgs != 0) && (fpr_index_ + 1 < kNumQuickFprArgs + 1)) { fpr_index_++; } else { stack_index_++; @@ -352,14 +352,14 @@ class QuickArgumentVisitor { private: static size_t StackArgumentStartFromShorty(bool is_static, const char* shorty, uint32_t shorty_len) { - if (kSoftFloatAbi) { - CHECK_EQ(kNumFprArgs, 0U); - return (kNumGprArgs * kBytesPerGprSpillLocation) + kBytesPerGprSpillLocation /* ArtMethod* */; + if (kQuickSoftFloatAbi) { + CHECK_EQ(kNumQuickFprArgs, 0U); + return (kNumQuickGprArgs * kBytesPerGprSpillLocation) + kBytesPerGprSpillLocation /* ArtMethod* */; } else { size_t offset = kBytesPerGprSpillLocation; // Skip Method*. size_t gprs_seen = 0; size_t fprs_seen = 0; - if (!is_static && (gprs_seen < kNumGprArgs)) { + if (!is_static && (gprs_seen < kNumQuickGprArgs)) { gprs_seen++; offset += kBytesStackArgLocation; } @@ -371,34 +371,34 @@ class QuickArgumentVisitor { case 'S': case 'I': case 'L': - if (gprs_seen < kNumGprArgs) { + if (gprs_seen < kNumQuickGprArgs) { gprs_seen++; offset += kBytesStackArgLocation; } break; case 'J': - if (gprs_seen < kNumGprArgs) { + if (gprs_seen < kNumQuickGprArgs) { gprs_seen++; offset += 2 * kBytesStackArgLocation; if (kBytesPerGprSpillLocation == 4) { - if (gprs_seen < kNumGprArgs) { + if (gprs_seen < kNumQuickGprArgs) { gprs_seen++; } } } break; case 'F': - if ((kNumFprArgs != 0) && (fprs_seen + 1 < kNumFprArgs + 1)) { + if ((kNumQuickFprArgs != 0) && (fprs_seen + 1 < kNumQuickFprArgs + 1)) { fprs_seen++; offset += kBytesStackArgLocation; } break; case 'D': - if ((kNumFprArgs != 0) && (fprs_seen + 1 < kNumFprArgs + 1)) { + if ((kNumQuickFprArgs != 0) && (fprs_seen + 1 < kNumQuickFprArgs + 1)) { fprs_seen++; offset += 2 * kBytesStackArgLocation; if (kBytesPerFprSpillLocation == 4) { - if ((kNumFprArgs != 0) && (fprs_seen + 1 < kNumFprArgs + 1)) { + if ((kNumQuickFprArgs != 0) && (fprs_seen + 1 < kNumQuickFprArgs + 1)) { fprs_seen++; } } @@ -428,13 +428,13 @@ class QuickArgumentVisitor { }; // Visits arguments on the stack placing them into the shadow frame. -class BuildQuickShadowFrameVisitor : public QuickArgumentVisitor { +class BuildQuickShadowFrameVisitor FINAL : public QuickArgumentVisitor { public: BuildQuickShadowFrameVisitor(mirror::ArtMethod** sp, bool is_static, const char* shorty, uint32_t shorty_len, ShadowFrame* sf, size_t first_arg_reg) : QuickArgumentVisitor(sp, is_static, shorty, shorty_len), sf_(sf), cur_reg_(first_arg_reg) {} - virtual void Visit() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { + void Visit() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) OVERRIDE { Primitive::Type type = GetParamPrimitiveType(); switch (type) { case Primitive::kPrimLong: // Fall-through. @@ -525,14 +525,14 @@ extern "C" uint64_t artQuickToInterpreterBridge(mirror::ArtMethod* method, Threa // Visits arguments on the stack placing them into the args vector, Object* arguments are converted // to jobjects. -class BuildQuickArgumentVisitor : public QuickArgumentVisitor { +class BuildQuickArgumentVisitor FINAL : public QuickArgumentVisitor { public: BuildQuickArgumentVisitor(mirror::ArtMethod** sp, bool is_static, const char* shorty, uint32_t shorty_len, ScopedObjectAccessUnchecked* soa, std::vector<jvalue>* args) : QuickArgumentVisitor(sp, is_static, shorty, shorty_len), soa_(soa), args_(args) {} - virtual void Visit() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { + void Visit() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) OVERRIDE { jvalue val; Primitive::Type type = GetParamPrimitiveType(); switch (type) { @@ -638,13 +638,13 @@ extern "C" uint64_t artQuickProxyInvokeHandler(mirror::ArtMethod* proxy_method, // Read object references held in arguments from quick frames and place in a JNI local references, // so they don't get garbage collected. -class RememberForGcArgumentVisitor : public QuickArgumentVisitor { +class RememberForGcArgumentVisitor FINAL : public QuickArgumentVisitor { public: RememberForGcArgumentVisitor(mirror::ArtMethod** sp, bool is_static, const char* shorty, uint32_t shorty_len, ScopedObjectAccessUnchecked* soa) : QuickArgumentVisitor(sp, is_static, shorty, shorty_len), soa_(soa) {} - virtual void Visit() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { + void Visit() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) OVERRIDE { if (IsParamAReference()) { StackReference<mirror::Object>* stack_ref = reinterpret_cast<StackReference<mirror::Object>*>(GetParamAddress()); @@ -671,14 +671,14 @@ class RememberForGcArgumentVisitor : public QuickArgumentVisitor { // Lazily resolve a method for quick. Called by stub code. extern "C" const void* artQuickResolutionTrampoline(mirror::ArtMethod* called, mirror::Object* receiver, - Thread* thread, mirror::ArtMethod** sp) + Thread* self, mirror::ArtMethod** sp) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { - FinishCalleeSaveFrameSetup(thread, sp, Runtime::kRefsAndArgs); + FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsAndArgs); // Start new JNI local reference state - JNIEnvExt* env = thread->GetJniEnv(); + JNIEnvExt* env = self->GetJniEnv(); ScopedObjectAccessUnchecked soa(env); ScopedJniEnvLocalRefState env_state(env); - const char* old_cause = thread->StartAssertNoThreadSuspension("Quick method resolution set up"); + const char* old_cause = self->StartAssertNoThreadSuspension("Quick method resolution set up"); // Compute details about the called method (avoid GCs) ClassLinker* linker = Runtime::Current()->GetClassLinker(); @@ -757,7 +757,7 @@ extern "C" const void* artQuickResolutionTrampoline(mirror::ArtMethod* called, dex_file->GetMethodShorty(dex_file->GetMethodId(dex_method_idx), &shorty_len); RememberForGcArgumentVisitor visitor(sp, invoke_type == kStatic, shorty, shorty_len, &soa); visitor.VisitArguments(); - thread->EndAssertNoThreadSuspension(old_cause); + self->EndAssertNoThreadSuspension(old_cause); bool virtual_or_interface = invoke_type == kVirtual || invoke_type == kInterface; // Resolve method filling in dex cache. if (called->IsRuntimeMethod()) { @@ -766,7 +766,7 @@ extern "C" const void* artQuickResolutionTrampoline(mirror::ArtMethod* called, receiver = sirt_receiver.get(); } const void* code = NULL; - if (LIKELY(!thread->IsExceptionPending())) { + if (LIKELY(!self->IsExceptionPending())) { // Incompatible class change should have been handled in resolve method. CHECK(!called->CheckIncompatibleClassChange(invoke_type)) << PrettyMethod(called) << " " << invoke_type; @@ -812,7 +812,7 @@ extern "C" const void* artQuickResolutionTrampoline(mirror::ArtMethod* called, DCHECK(called_class->IsErroneous()); } } - CHECK_EQ(code == NULL, thread->IsExceptionPending()); + CHECK_EQ(code == NULL, self->IsExceptionPending()); // Fixup any locally saved objects may have moved during a GC. visitor.FixupReferences(); // Place called method in callee-save frame to be placed as first argument to quick method. @@ -820,13 +820,375 @@ extern "C" const void* artQuickResolutionTrampoline(mirror::ArtMethod* called, return code; } -extern "C" const void* artQuickGenericJniTrampoline(mirror::ArtMethod* called, - mirror::Object* receiver, - Thread* thread, mirror::ArtMethod** sp) +// Visits arguments on the stack placing them into a region lower down the stack for the benefit +// of transitioning into native code. +class BuildGenericJniFrameVisitor FINAL : public QuickArgumentVisitor { +#if defined(__arm__) + // TODO: These are all dummy values! + static constexpr bool kNativeSoftFloatAbi = false; // This is a hard float ABI. + static constexpr size_t kNumNativeGprArgs = 3; // 3 arguments passed in GPRs. + static constexpr size_t kNumNativeFprArgs = 0; // 0 arguments passed in FPRs. + + static constexpr size_t kGprStackOffset = 4336; + static constexpr size_t kFprStackOffset = 4336 - 6*8; + static constexpr size_t kCallStackStackOffset = 4336 - 112; + + static constexpr size_t kRegistersNeededForLong = 2; + static constexpr size_t kRegistersNeededForDouble = 2; +#elif defined(__mips__) + // TODO: These are all dummy values! + static constexpr bool kNativeSoftFloatAbi = true; // This is a hard float ABI. + static constexpr size_t kNumNativeGprArgs = 0; // 6 arguments passed in GPRs. + static constexpr size_t kNumNativeFprArgs = 0; // 8 arguments passed in FPRs. + + // update these + static constexpr size_t kGprStackOffset = 4336; + static constexpr size_t kFprStackOffset = 4336 - 6*8; + static constexpr size_t kCallStackStackOffset = 4336 - 112; + + static constexpr size_t kRegistersNeededForLong = 2; + static constexpr size_t kRegistersNeededForDouble = 2; +#elif defined(__i386__) + // TODO: Check these! + static constexpr bool kNativeSoftFloatAbi = true; // This is a soft float ABI. + static constexpr size_t kNumNativeGprArgs = 0; // 6 arguments passed in GPRs. + static constexpr size_t kNumNativeFprArgs = 0; // 8 arguments passed in FPRs. + + // update these + static constexpr size_t kGprStackOffset = 4336; + static constexpr size_t kFprStackOffset = 4336 - 6*8; + static constexpr size_t kCallStackStackOffset = 4336 - 112; + + static constexpr size_t kRegistersNeededForLong = 2; + static constexpr size_t kRegistersNeededForDouble = 2; +#elif defined(__x86_64__) + static constexpr bool kNativeSoftFloatAbi = false; // This is a hard float ABI. + static constexpr size_t kNumNativeGprArgs = 6; // 6 arguments passed in GPRs. + static constexpr size_t kNumNativeFprArgs = 8; // 8 arguments passed in FPRs. + + static constexpr size_t kGprStackOffset = 4336; + static constexpr size_t kFprStackOffset = 4336 - 6*8; + static constexpr size_t kCallStackStackOffset = 4336 - 112; + + static constexpr size_t kRegistersNeededForLong = 1; + static constexpr size_t kRegistersNeededForDouble = 1; +#else +#error "Unsupported architecture" +#endif + + + public: + BuildGenericJniFrameVisitor(mirror::ArtMethod** sp, bool is_static, const char* shorty, + uint32_t shorty_len, Thread* self) : + QuickArgumentVisitor(sp, is_static, shorty, shorty_len) { + // size of cookie plus padding + uint8_t* sp8 = reinterpret_cast<uint8_t*>(sp); + top_of_sirt_ = sp8 - 8; + cur_sirt_entry_ = reinterpret_cast<StackReference<mirror::Object>*>(top_of_sirt_) - 1; + sirt_number_of_references_ = 0; + gpr_index_ = kNumNativeGprArgs; + fpr_index_ = kNumNativeFprArgs; + + cur_gpr_reg_ = reinterpret_cast<uintptr_t*>(sp8 - kGprStackOffset); + cur_fpr_reg_ = reinterpret_cast<uint32_t*>(sp8 - kFprStackOffset); + cur_stack_arg_ = reinterpret_cast<uintptr_t*>(sp8 - kCallStackStackOffset); + + // jni environment is always first argument + PushPointer(self->GetJniEnv()); + + if (is_static) { + PushArgumentInSirt((*sp)->GetDeclaringClass()); + } + } + + void Visit() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) OVERRIDE { + Primitive::Type type = GetParamPrimitiveType(); + switch (type) { + case Primitive::kPrimLong: { + jlong long_arg; + if (IsSplitLongOrDouble()) { + long_arg = ReadSplitLongParam(); + } else { + long_arg = *reinterpret_cast<jlong*>(GetParamAddress()); + } + PushLongArgument(long_arg); + break; + } + case Primitive::kPrimDouble: { + uint64_t double_arg; + if (IsSplitLongOrDouble()) { + // Read into union so that we don't case to a double. + double_arg = ReadSplitLongParam(); + } else { + double_arg = *reinterpret_cast<uint64_t*>(GetParamAddress()); + } + PushDoubleArgument(double_arg); + break; + } + case Primitive::kPrimNot: { + StackReference<mirror::Object>* stack_ref = + reinterpret_cast<StackReference<mirror::Object>*>(GetParamAddress()); + PushArgumentInSirt(stack_ref->AsMirrorPtr()); + break; + } + case Primitive::kPrimFloat: + PushFloatArgument(*reinterpret_cast<int32_t*>(GetParamAddress())); + break; + case Primitive::kPrimBoolean: // Fall-through. + case Primitive::kPrimByte: // Fall-through. + case Primitive::kPrimChar: // Fall-through. + case Primitive::kPrimShort: // Fall-through. + case Primitive::kPrimInt: // Fall-through. + PushIntArgument(*reinterpret_cast<jint*>(GetParamAddress())); + break; + case Primitive::kPrimVoid: + LOG(FATAL) << "UNREACHABLE"; + break; + } + } + + void FinalizeSirt(Thread* self) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { + if (!IsAligned<8>(StackIndirectReferenceTable::SizeOf(sirt_number_of_references_))) { + sirt_number_of_references_++; + *cur_sirt_entry_ = StackReference<mirror::Object>(); + cur_sirt_entry_--; + } + CHECK(IsAligned<8>(StackIndirectReferenceTable::SizeOf(sirt_number_of_references_))); + StackIndirectReferenceTable* sirt = reinterpret_cast<StackIndirectReferenceTable*>( + top_of_sirt_ - StackIndirectReferenceTable::SizeOf(sirt_number_of_references_)); + + sirt->SetNumberOfReferences(sirt_number_of_references_); + self->PushSirt(sirt); + } + + jobject GetFirstSirtEntry() { + return reinterpret_cast<jobject>(reinterpret_cast<StackReference<mirror::Object>*>(top_of_sirt_) - 1); + } + + private: + void PushArgumentInSirt(mirror::Object* obj) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { + // Do something to push into the SIRT. + uintptr_t sirt_or_null; + if (obj != nullptr) { + sirt_number_of_references_++; + *cur_sirt_entry_ = StackReference<mirror::Object>::FromMirrorPtr(obj); + sirt_or_null = reinterpret_cast<uintptr_t>(cur_sirt_entry_); + cur_sirt_entry_--; + } else { + sirt_or_null = reinterpret_cast<uintptr_t>(nullptr); + } + // Push the GPR or stack arg. + if (gpr_index_ > 0) { + *cur_gpr_reg_ = sirt_or_null; + cur_gpr_reg_++; + gpr_index_--; + } else { + *cur_stack_arg_ = sirt_or_null; + cur_stack_arg_++; + } + } + + void PushPointer(void* val) { + if (gpr_index_ > 0) { + *cur_gpr_reg_ = reinterpret_cast<uintptr_t>(val); + cur_gpr_reg_++; + gpr_index_--; + } else { + *cur_stack_arg_ = reinterpret_cast<uintptr_t>(val); + cur_stack_arg_++; + } + } + + void PushIntArgument(jint val) { + if (gpr_index_ > 0) { + *cur_gpr_reg_ = val; + cur_gpr_reg_++; + gpr_index_--; + } else { + *cur_stack_arg_ = val; + cur_stack_arg_++; + } + } + + void PushLongArgument(jlong val) { + // This is an ugly hack for the following problem: + // Assume odd number of 32b registers. Then having exactly kRegsNeeded left needs to spill! + if (gpr_index_ >= kRegistersNeededForLong + (kNumNativeGprArgs % kRegistersNeededForLong)) { + if (kRegistersNeededForLong > 1 && ((kNumNativeGprArgs - gpr_index_) & 1) == 1) { + // Pad. + gpr_index_--; + cur_gpr_reg_++; + } + uint64_t* tmp = reinterpret_cast<uint64_t*>(cur_gpr_reg_); + *tmp = val; + cur_gpr_reg_ += kRegistersNeededForLong; + gpr_index_ -= kRegistersNeededForLong; + } else { + uint64_t* tmp = reinterpret_cast<uint64_t*>(cur_stack_arg_); + *tmp = val; + cur_stack_arg_ += kRegistersNeededForLong; + + gpr_index_ = 0; // can't use GPRs anymore + } + } + + void PushFloatArgument(int32_t val) { + if (kNativeSoftFloatAbi) { + PushIntArgument(val); + } else { + if (fpr_index_ > 0) { + *cur_fpr_reg_ = val; + cur_fpr_reg_++; + if (kRegistersNeededForDouble == 1) { + // will pop 64 bits from the stack + // TODO: extend/clear bits??? + cur_fpr_reg_++; + } + fpr_index_--; + } else { + // TODO: Check ABI for floats. + *cur_stack_arg_ = val; + cur_stack_arg_++; + } + } + } + + void PushDoubleArgument(uint64_t val) { + // See PushLongArgument for explanation + if (fpr_index_ >= kRegistersNeededForDouble + (kNumNativeFprArgs % kRegistersNeededForDouble)) { + if (kRegistersNeededForDouble > 1 && ((kNumNativeFprArgs - fpr_index_) & 1) == 1) { + // Pad. + fpr_index_--; + cur_fpr_reg_++; + } + uint64_t* tmp = reinterpret_cast<uint64_t*>(cur_fpr_reg_); + *tmp = val; + // TODO: the whole thing doesn't make sense if we take uint32_t*... + cur_fpr_reg_ += 2; // kRegistersNeededForDouble; + fpr_index_ -= kRegistersNeededForDouble; + } else { + if (!IsAligned<8>(cur_stack_arg_)) { + cur_stack_arg_++; // Pad. + } + uint64_t* tmp = reinterpret_cast<uint64_t*>(cur_stack_arg_); + *tmp = val; + cur_stack_arg_ += kRegistersNeededForDouble; + + fpr_index_ = 0; // can't use FPRs anymore + } + } + + uint32_t sirt_number_of_references_; + StackReference<mirror::Object>* cur_sirt_entry_; + uint32_t gpr_index_; // should be uint, but gives error because on some archs no regs + uintptr_t* cur_gpr_reg_; + uint32_t fpr_index_; // ----- # ----- + uint32_t* cur_fpr_reg_; + uintptr_t* cur_stack_arg_; + uint8_t* top_of_sirt_; + + DISALLOW_COPY_AND_ASSIGN(BuildGenericJniFrameVisitor); +}; + +extern "C" const void* artQuickGenericJniTrampoline(Thread* self, mirror::ArtMethod** sp) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { - LOG(FATAL) << "artQuickGenericJniTrampoline not implemented: " - << PrettyMethod(called); - return NULL; + uint32_t* sp32 = reinterpret_cast<uint32_t*>(sp); + mirror::ArtMethod* called = *sp; + DCHECK(called->IsNative()); + + // run the visitor + MethodHelper mh(called); + BuildGenericJniFrameVisitor visitor(sp, called->IsStatic(), mh.GetShorty(), mh.GetShortyLength(), + self); + visitor.VisitArguments(); + visitor.FinalizeSirt(self); + + // fix up managed-stack things in Thread + self->SetTopOfStack(sp, 0); + + // start JNI, save the cookie + uint32_t cookie; + if (called->IsSynchronized()) { + cookie = JniMethodStartSynchronized(visitor.GetFirstSirtEntry(), self); + // TODO: error checking. + if (self->IsExceptionPending()) { + self->PopSirt(); + return nullptr; + } + } else { + cookie = JniMethodStart(self); + } + *(sp32-1) = cookie; + + // retrieve native code + const void* nativeCode = called->GetNativeMethod(); + if (nativeCode == nullptr) { + // TODO: is this really an error, or do we need to try to find native code? + LOG(FATAL) << "Finding native code not implemented yet."; + } + + return nativeCode; +} + +/* + * Is called after the native JNI code. Responsible for cleanup (SIRT, saved state) and + * unlocking. + */ +extern "C" uint64_t artQuickGenericJniEndTrampoline(Thread* self, mirror::ArtMethod** sp, + jvalue result, uint64_t result_f) + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { + uint32_t* sp32 = reinterpret_cast<uint32_t*>(sp); + mirror::ArtMethod* called = *sp; + uint32_t cookie = *(sp32-1); + + // TODO: synchronized. + MethodHelper mh(called); + char return_shorty_char = mh.GetShorty()[0]; + + if (return_shorty_char == 'L') { + // the only special ending call + if (called->IsSynchronized()) { + BuildGenericJniFrameVisitor visitor(sp, called->IsStatic(), mh.GetShorty(), + mh.GetShortyLength(), self); + return reinterpret_cast<uint64_t>(JniMethodEndWithReferenceSynchronized(result.l, cookie, + visitor.GetFirstSirtEntry(), + self)); + } else { + return reinterpret_cast<uint64_t>(JniMethodEndWithReference(result.l, cookie, self)); + } + } else { + if (called->IsSynchronized()) { + // run the visitor + BuildGenericJniFrameVisitor visitor(sp, called->IsStatic(), mh.GetShorty(), + mh.GetShortyLength(), self); + JniMethodEndSynchronized(cookie, visitor.GetFirstSirtEntry(), self); + } else { + JniMethodEnd(cookie, self); + } + + switch (return_shorty_char) { + case 'F': // Fall-through. + case 'D': + return result_f; + case 'Z': + return result.z; + case 'B': + return result.b; + case 'C': + return result.c; + case 'S': + return result.s; + case 'I': + return result.i; + case 'J': + return result.j; + case 'V': + return 0; + default: + LOG(FATAL) << "Unexpected return shorty character " << return_shorty_char; + return 0; + } + } } } // namespace art diff --git a/runtime/exception_test.cc b/runtime/exception_test.cc index 3653b37..5e3f504 100644 --- a/runtime/exception_test.cc +++ b/runtime/exception_test.cc @@ -179,7 +179,7 @@ TEST_F(ExceptionTest, StackTraceElement) { fake_stack.push_back(0); // Set up thread to appear as if we called out of method_g_ at pc dex 3 - thread->SetTopOfStack(&fake_stack[0], method_g_->ToNativePc(dex_pc)); // return pc + thread->SetTopOfStack(reinterpret_cast<mirror::ArtMethod**>(&fake_stack[0]), method_g_->ToNativePc(dex_pc)); // return pc } else { // Create/push fake 20-byte shadow frame for method g fake_stack.push_back(0); diff --git a/runtime/mirror/art_method.h b/runtime/mirror/art_method.h index e17dc5f..a18e171 100644 --- a/runtime/mirror/art_method.h +++ b/runtime/mirror/art_method.h @@ -360,7 +360,7 @@ class MANAGED ArtMethod : public Object { } const void* GetNativeMethod() { - return reinterpret_cast<const void*>(GetField32(NativeMethodOffset(), false)); + return GetFieldPtr<const void*>(NativeMethodOffset(), false); } template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags> diff --git a/runtime/stack.h b/runtime/stack.h index 6a62922..f840f67 100644 --- a/runtime/stack.h +++ b/runtime/stack.h @@ -532,7 +532,7 @@ class StackVisitor { DCHECK(GetMethod() != nullptr); byte* save_addr = reinterpret_cast<byte*>(cur_quick_frame_) + frame_size - ((num + 1) * kPointerSize); -#if defined(__i386__) +#if defined(__i386__) || defined(__x86_64__) save_addr -= kPointerSize; // account for return address #endif return reinterpret_cast<uintptr_t*>(save_addr); diff --git a/runtime/stack_indirect_reference_table.h b/runtime/stack_indirect_reference_table.h index d22650b..c2d6a59 100644 --- a/runtime/stack_indirect_reference_table.h +++ b/runtime/stack_indirect_reference_table.h @@ -33,17 +33,24 @@ class Thread; class StackIndirectReferenceTable { public: explicit StackIndirectReferenceTable(mirror::Object* object) : - number_of_references_(1), link_(NULL) { + link_(NULL), number_of_references_(1) { references_[0].Assign(object); } ~StackIndirectReferenceTable() {} // Number of references contained within this SIRT - size_t NumberOfReferences() const { + uint32_t NumberOfReferences() const { return number_of_references_; } + // Returns the size of a StackIndirectReferenceTable containing num_references sirts. + static size_t SizeOf(uint32_t num_references) { + size_t header_size = OFFSETOF_MEMBER(StackIndirectReferenceTable, references_); + size_t data_size = sizeof(StackReference<mirror::Object>) * num_references; + return header_size + data_size; + } + // Link to previous SIRT or NULL StackIndirectReferenceTable* GetLink() const { return link_; @@ -54,6 +61,12 @@ class StackIndirectReferenceTable { link_ = sirt; } + // Sets the number_of_references_ field for constructing tables out of raw memory. Warning: will + // not resize anything. + void SetNumberOfReferences(uint32_t num_references) { + number_of_references_ = num_references; + } + mirror::Object* GetReference(size_t i) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { DCHECK_LT(i, number_of_references_); return references_[i].AsMirrorPtr(); @@ -74,7 +87,7 @@ class StackIndirectReferenceTable { } // Offset of length within SIRT, used by generated code - static size_t NumberOfReferencesOffset() { + static uint32_t NumberOfReferencesOffset() { return OFFSETOF_MEMBER(StackIndirectReferenceTable, number_of_references_); } @@ -86,8 +99,8 @@ class StackIndirectReferenceTable { private: StackIndirectReferenceTable() {} - size_t number_of_references_; StackIndirectReferenceTable* link_; + uint32_t number_of_references_; // number_of_references_ are available if this is allocated and filled in by jni_compiler. StackReference<mirror::Object> references_[1]; diff --git a/runtime/thread.h b/runtime/thread.h index 6df2b1c..c7ab735 100644 --- a/runtime/thread.h +++ b/runtime/thread.h @@ -320,8 +320,7 @@ class PACKED(4) Thread { ThrowLocation GetCurrentLocationForThrow() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); - void SetTopOfStack(void* stack, uintptr_t pc) { - mirror::ArtMethod** top_method = reinterpret_cast<mirror::ArtMethod**>(stack); + void SetTopOfStack(mirror::ArtMethod** top_method, uintptr_t pc) { managed_stack_.SetTopQuickFrame(top_method); managed_stack_.SetTopQuickFramePc(pc); } |