diff options
author | Serguei Katkov <serguei.i.katkov@intel.com> | 2014-07-08 17:21:53 +0700 |
---|---|---|
committer | Chao-ying Fu <chao-ying.fu@intel.com> | 2014-07-10 13:31:47 -0700 |
commit | c380191f3048db2a3796d65db8e5d5a5e7b08c65 (patch) | |
tree | 56f7f5fc60f8445ead63cd43faf06b9e1dfda6b2 /runtime/arch | |
parent | cba6b1fc88fd54c35211fd49a7a7501cfcdaa170 (diff) | |
download | art-c380191f3048db2a3796d65db8e5d5a5e7b08c65.zip art-c380191f3048db2a3796d65db8e5d5a5e7b08c65.tar.gz art-c380191f3048db2a3796d65db8e5d5a5e7b08c65.tar.bz2 |
x86_64: Enable fp-reg promotion
Patch introduces 4 register XMM12-15 available for promotion of
fp virtual registers.
Change-Id: I3f89ad07fc8ae98b70f550eada09be7b693ffb67
Signed-off-by: Serguei Katkov <serguei.i.katkov@intel.com>
Signed-off-by: Chao-ying Fu <chao-ying.fu@intel.com>
Diffstat (limited to 'runtime/arch')
-rw-r--r-- | runtime/arch/x86_64/asm_support_x86_64.h | 6 | ||||
-rw-r--r-- | runtime/arch/x86_64/context_x86_64.cc | 49 | ||||
-rw-r--r-- | runtime/arch/x86_64/entrypoints_init_x86_64.cc | 4 | ||||
-rw-r--r-- | runtime/arch/x86_64/jni_entrypoints_x86_64.S | 16 | ||||
-rw-r--r-- | runtime/arch/x86_64/quick_entrypoints_x86_64.S | 172 | ||||
-rw-r--r-- | runtime/arch/x86_64/quick_method_frame_info_x86_64.h | 6 | ||||
-rw-r--r-- | runtime/arch/x86_64/registers_x86_64.cc | 9 |
7 files changed, 204 insertions, 58 deletions
diff --git a/runtime/arch/x86_64/asm_support_x86_64.h b/runtime/arch/x86_64/asm_support_x86_64.h index bff8501..05d0ef8 100644 --- a/runtime/arch/x86_64/asm_support_x86_64.h +++ b/runtime/arch/x86_64/asm_support_x86_64.h @@ -35,9 +35,9 @@ // Offset of field Thread::thin_lock_thread_id_ verified in InitCpu #define THREAD_ID_OFFSET 12 -#define FRAME_SIZE_SAVE_ALL_CALLEE_SAVE 64 -#define FRAME_SIZE_REFS_ONLY_CALLEE_SAVE 64 -#define FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE 176 +#define FRAME_SIZE_SAVE_ALL_CALLEE_SAVE 64 + 4*8 +#define FRAME_SIZE_REFS_ONLY_CALLEE_SAVE 64 + 4*8 +#define FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE 176 + 4*8 // Expected size of a heap reference #define HEAP_REFERENCE_SIZE 4 diff --git a/runtime/arch/x86_64/context_x86_64.cc b/runtime/arch/x86_64/context_x86_64.cc index e1f47ee..7699eaf 100644 --- a/runtime/arch/x86_64/context_x86_64.cc +++ b/runtime/arch/x86_64/context_x86_64.cc @@ -78,6 +78,18 @@ void X86_64Context::SmashCallerSaves() { gprs_[R9] = nullptr; gprs_[R10] = nullptr; gprs_[R11] = nullptr; + fprs_[XMM0] = nullptr; + fprs_[XMM1] = nullptr; + fprs_[XMM2] = nullptr; + fprs_[XMM3] = nullptr; + fprs_[XMM4] = nullptr; + fprs_[XMM5] = nullptr; + fprs_[XMM6] = nullptr; + fprs_[XMM7] = nullptr; + fprs_[XMM8] = nullptr; + fprs_[XMM9] = nullptr; + fprs_[XMM10] = nullptr; + fprs_[XMM11] = nullptr; } bool X86_64Context::SetGPR(uint32_t reg, uintptr_t value) { @@ -102,41 +114,26 @@ bool X86_64Context::SetFPR(uint32_t reg, uintptr_t value) { } } +extern "C" void art_quick_do_long_jump(uintptr_t*, uintptr_t*); + void X86_64Context::DoLongJump() { #if defined(__x86_64__) - // Array of GPR values, filled from the context backward for the long jump pop. We add a slot at - // the top for the stack pointer that doesn't get popped in a pop-all. - volatile uintptr_t gprs[kNumberOfCpuRegisters + 1]; + uintptr_t gprs[kNumberOfCpuRegisters + 1]; + uintptr_t fprs[kNumberOfFloatRegisters]; + for (size_t i = 0; i < kNumberOfCpuRegisters; ++i) { gprs[kNumberOfCpuRegisters - i - 1] = gprs_[i] != nullptr ? *gprs_[i] : X86_64Context::kBadGprBase + i; } + for (size_t i = 0; i < kNumberOfFloatRegisters; ++i) { + fprs[i] = fprs_[i] != nullptr ? *fprs_[i] : X86_64Context::kBadFprBase + i; + } + // We want to load the stack pointer one slot below so that the ret will pop eip. uintptr_t rsp = gprs[kNumberOfCpuRegisters - RSP - 1] - kWordSize; gprs[kNumberOfCpuRegisters] = rsp; *(reinterpret_cast<uintptr_t*>(rsp)) = rip_; - __asm__ __volatile__( - "movq %0, %%rsp\n\t" // RSP points to gprs. - "popq %%r15\n\t" // Load all registers except RSP and RIP with values in gprs. - "popq %%r14\n\t" - "popq %%r13\n\t" - "popq %%r12\n\t" - "popq %%r11\n\t" - "popq %%r10\n\t" - "popq %%r9\n\t" - "popq %%r8\n\t" - "popq %%rdi\n\t" - "popq %%rsi\n\t" - "popq %%rbp\n\t" - "addq $8, %%rsp\n\t" - "popq %%rbx\n\t" - "popq %%rdx\n\t" - "popq %%rcx\n\t" - "popq %%rax\n\t" - "popq %%rsp\n\t" // Load stack pointer. - "ret\n\t" // From higher in the stack pop rip. - : // output. - : "g"(&gprs[0]) // input. - :); // clobber. + + art_quick_do_long_jump(gprs, fprs); #else UNIMPLEMENTED(FATAL); #endif diff --git a/runtime/arch/x86_64/entrypoints_init_x86_64.cc b/runtime/arch/x86_64/entrypoints_init_x86_64.cc index 609d1c6..204d52c 100644 --- a/runtime/arch/x86_64/entrypoints_init_x86_64.cc +++ b/runtime/arch/x86_64/entrypoints_init_x86_64.cc @@ -35,7 +35,7 @@ extern "C" void art_portable_resolution_trampoline(mirror::ArtMethod*); extern "C" void art_portable_to_interpreter_bridge(mirror::ArtMethod*); // Cast entrypoints. -extern "C" uint32_t artIsAssignableFromCode(const mirror::Class* klass, +extern "C" uint32_t art_quick_assignable_from_code(const mirror::Class* klass, const mirror::Class* ref_class); extern "C" void art_quick_check_cast(void*, void*); @@ -129,7 +129,7 @@ void InitEntryPoints(InterpreterEntryPoints* ipoints, JniEntryPoints* jpoints, ResetQuickAllocEntryPoints(qpoints); // Cast - qpoints->pInstanceofNonTrivial = artIsAssignableFromCode; + qpoints->pInstanceofNonTrivial = art_quick_assignable_from_code; qpoints->pCheckCast = art_quick_check_cast; // DexCache diff --git a/runtime/arch/x86_64/jni_entrypoints_x86_64.S b/runtime/arch/x86_64/jni_entrypoints_x86_64.S index d668797..f6736df 100644 --- a/runtime/arch/x86_64/jni_entrypoints_x86_64.S +++ b/runtime/arch/x86_64/jni_entrypoints_x86_64.S @@ -28,8 +28,8 @@ DEFINE_FUNCTION art_jni_dlsym_lookup_stub PUSH rdx // Arg. PUSH rcx // Arg. // Create space for FPR args, plus padding for alignment - subq LITERAL(72), %rsp - CFI_ADJUST_CFA_OFFSET(72) + subq LITERAL(72 + 4 * 8), %rsp + CFI_ADJUST_CFA_OFFSET(72 + 4 * 8) // Save FPRs. movq %xmm0, 0(%rsp) movq %xmm1, 8(%rsp) @@ -39,6 +39,10 @@ DEFINE_FUNCTION art_jni_dlsym_lookup_stub movq %xmm5, 40(%rsp) movq %xmm6, 48(%rsp) movq %xmm7, 56(%rsp) + movq %xmm12, 64(%rsp) + movq %xmm13, 72(%rsp) + movq %xmm14, 80(%rsp) + movq %xmm15, 88(%rsp) // prepare call movq %gs:THREAD_SELF_OFFSET, %rdi // RDI := Thread::Current() // call @@ -52,8 +56,12 @@ DEFINE_FUNCTION art_jni_dlsym_lookup_stub movq 40(%rsp), %xmm5 movq 48(%rsp), %xmm6 movq 56(%rsp), %xmm7 - addq LITERAL(72), %rsp - CFI_ADJUST_CFA_OFFSET(-72) + movq 64(%rsp), %xmm12 + movq 72(%rsp), %xmm13 + movq 80(%rsp), %xmm14 + movq 88(%rsp), %xmm15 + addq LITERAL(72 + 4 * 8), %rsp + CFI_ADJUST_CFA_OFFSET(-72 - 4 * 8) POP rcx // Arg. POP rdx // Arg. POP rsi // Arg. diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S index 8fa947c..7f7226c 100644 --- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S +++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S @@ -16,6 +16,26 @@ #include "asm_support_x86_64.S" +MACRO0(SETUP_FP_CALLEE_SAVE_FRAME) + // Create space for ART FP callee-saved registers + subq LITERAL(4 * 8), %rsp + CFI_ADJUST_CFA_OFFSET(4 * 8) + movq %xmm12, 0(%rsp) + movq %xmm13, 8(%rsp) + movq %xmm14, 16(%rsp) + movq %xmm15, 24(%rsp) +END_MACRO + +MACRO0(RESTORE_FP_CALLEE_SAVE_FRAME) + // Restore ART FP callee-saved registers + movq 0(%rsp), %xmm12 + movq 8(%rsp), %xmm13 + movq 16(%rsp), %xmm14 + movq 24(%rsp), %xmm15 + addq LITERAL(4 * 8), %rsp + CFI_ADJUST_CFA_OFFSET(- 4 * 8) +END_MACRO + // For x86, the CFA is esp+4, the address above the pushed return address on the stack. /* @@ -37,6 +57,14 @@ MACRO0(SETUP_SAVE_ALL_CALLEE_SAVE_FRAME) PUSH r12 // Callee save. PUSH rbp // Callee save. PUSH rbx // Callee save. + // Create space for FPR args, plus padding for alignment + subq LITERAL(4 * 8), %rsp + CFI_ADJUST_CFA_OFFSET(4 * 8) + // Save FPRs. + movq %xmm12, 0(%rsp) + movq %xmm13, 8(%rsp) + movq %xmm14, 16(%rsp) + movq %xmm15, 24(%rsp) subq MACRO_LITERAL(8), %rsp // Space for Method* (also aligns the frame). CFI_ADJUST_CFA_OFFSET(8) // R10 := ArtMethod* for save all callee save frame method. @@ -46,7 +74,7 @@ MACRO0(SETUP_SAVE_ALL_CALLEE_SAVE_FRAME) // Ugly compile-time check, but we only have the preprocessor. // Last +8: implicit return address pushed on stack when caller made call. -#if (FRAME_SIZE_SAVE_ALL_CALLEE_SAVE != 6*8 + 8 + 8) +#if (FRAME_SIZE_SAVE_ALL_CALLEE_SAVE != 6*8 + 4*8 + 8 + 8) #error "SAVE_ALL_CALLEE_SAVE_FRAME(X86_64) size not as expected." #endif #endif // __APPLE__ @@ -71,8 +99,14 @@ MACRO0(SETUP_REF_ONLY_CALLEE_SAVE_FRAME) PUSH r12 // Callee save. PUSH rbp // Callee save. PUSH rbx // Callee save. - subq MACRO_LITERAL(8), %rsp // Space for Method* (also aligns the frame). - CFI_ADJUST_CFA_OFFSET(8) + // Create space for FPR args, plus padding for alignment + subq LITERAL(8 + 4*8), %rsp + CFI_ADJUST_CFA_OFFSET(8 + 4*8) + // Save FPRs. + movq %xmm12, 8(%rsp) + movq %xmm13, 16(%rsp) + movq %xmm14, 24(%rsp) + movq %xmm15, 32(%rsp) // R10 := ArtMethod* for refs only callee save frame method. movq RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET(%r10), %r10 // Store ArtMethod* to bottom of stack. @@ -80,15 +114,19 @@ MACRO0(SETUP_REF_ONLY_CALLEE_SAVE_FRAME) // Ugly compile-time check, but we only have the preprocessor. // Last +8: implicit return address pushed on stack when caller made call. -#if (FRAME_SIZE_REFS_ONLY_CALLEE_SAVE != 6*8 + 8 + 8) +#if (FRAME_SIZE_REFS_ONLY_CALLEE_SAVE != 6*8 + 4*8 + 8 + 8) #error "REFS_ONLY_CALLEE_SAVE_FRAME(X86_64) size not as expected." #endif #endif // __APPLE__ END_MACRO MACRO0(RESTORE_REF_ONLY_CALLEE_SAVE_FRAME) - addq MACRO_LITERAL(8), %rsp - CFI_ADJUST_CFA_OFFSET(-8) + movq 8(%rsp), %xmm12 + movq 16(%rsp), %xmm13 + movq 24(%rsp), %xmm14 + movq 32(%rsp), %xmm15 + addq LITERAL(8 + 4*8), %rsp + CFI_ADJUST_CFA_OFFSET(-8 - 4*8) // TODO: optimize by not restoring callee-saves restored by the ABI POP rbx POP rbp @@ -123,8 +161,8 @@ MACRO0(SETUP_REF_AND_ARGS_CALLEE_SAVE_FRAME) PUSH rdx // Quick arg 2. PUSH rcx // Quick arg 3. // Create space for FPR args and create 2 slots, 1 of padding and 1 for the ArtMethod*. - subq MACRO_LITERAL(80), %rsp - CFI_ADJUST_CFA_OFFSET(80) + subq MACRO_LITERAL(80 + 4 * 8), %rsp + CFI_ADJUST_CFA_OFFSET(80 + 4 * 8) // R10 := ArtMethod* for ref and args callee save frame method. movq RUNTIME_REF_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET(%r10), %r10 // Save FPRs. @@ -136,12 +174,16 @@ MACRO0(SETUP_REF_AND_ARGS_CALLEE_SAVE_FRAME) movq %xmm5, 56(%rsp) movq %xmm6, 64(%rsp) movq %xmm7, 72(%rsp) + movq %xmm12, 80(%rsp) + movq %xmm13, 88(%rsp) + movq %xmm14, 96(%rsp) + movq %xmm15, 104(%rsp) // Store ArtMethod* to bottom of stack. movq %r10, 0(%rsp) // Ugly compile-time check, but we only have the preprocessor. // Last +8: implicit return address pushed on stack when caller made call. -#if (FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE != 11*8 + 80 + 8) +#if (FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE != 11*8 + 4*8 + 80 + 8) #error "REFS_AND_ARGS_CALLEE_SAVE_FRAME(X86_64) size not as expected." #endif #endif // __APPLE__ @@ -157,8 +199,12 @@ MACRO0(RESTORE_REF_AND_ARGS_CALLEE_SAVE_FRAME) movq 56(%rsp), %xmm5 movq 64(%rsp), %xmm6 movq 72(%rsp), %xmm7 - addq MACRO_LITERAL(80), %rsp - CFI_ADJUST_CFA_OFFSET(-80) + movq 80(%rsp), %xmm12 + movq 88(%rsp), %xmm13 + movq 96(%rsp), %xmm14 + movq 104(%rsp), %xmm15 + addq MACRO_LITERAL(80 + 4 * 8), %rsp + CFI_ADJUST_CFA_OFFSET(-(80 + 4 * 8)) // Restore callee and GPR args, mixed together to agree with core spills bitmap. POP rcx POP rdx @@ -536,6 +582,58 @@ DEFINE_FUNCTION art_quick_invoke_static_stub #endif // __APPLE__ END_FUNCTION art_quick_invoke_static_stub + /* + * Long jump stub. + * On entry: + * rdi = gprs + * rsi = fprs + */ +DEFINE_FUNCTION art_quick_do_long_jump +#if defined(__APPLE__) + int3 + int3 +#else + // Restore FPRs. + movq 0(%rsi), %xmm0 + movq 8(%rsi), %xmm1 + movq 16(%rsi), %xmm2 + movq 24(%rsi), %xmm3 + movq 32(%rsi), %xmm4 + movq 40(%rsi), %xmm5 + movq 48(%rsi), %xmm6 + movq 56(%rsi), %xmm7 + movq 64(%rsi), %xmm8 + movq 72(%rsi), %xmm9 + movq 80(%rsi), %xmm10 + movq 88(%rsi), %xmm11 + movq 96(%rsi), %xmm12 + movq 104(%rsi), %xmm13 + movq 112(%rsi), %xmm14 + movq 120(%rsi), %xmm15 + // Restore FPRs. + movq %rdi, %rsp // RSP points to gprs. + // Load all registers except RSP and RIP with values in gprs. + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %r11 + popq %r10 + popq %r9 + popq %r8 + popq %rdi + popq %rsi + popq %rbp + addq LITERAL(8), %rsp // Skip rsp + popq %rbx + popq %rdx + popq %rcx + popq %rax + popq %rsp // Load stack pointer. + ret // From higher in the stack pop rip. +#endif // __APPLE__ +END_FUNCTION art_quick_do_long_jump + MACRO3(NO_ARG_DOWNCALL, c_name, cxx_name, return_macro) DEFINE_FUNCTION VAR(c_name, 0) SETUP_REF_ONLY_CALLEE_SAVE_FRAME // save ref containing registers for GC @@ -820,13 +918,17 @@ END_FUNCTION art_quick_unlock_object DEFINE_FUNCTION art_quick_check_cast PUSH rdi // Save args for exc PUSH rsi + SETUP_FP_CALLEE_SAVE_FRAME call PLT_SYMBOL(artIsAssignableFromCode) // (Class* klass, Class* ref_klass) testq %rax, %rax jz 1f // jump forward if not assignable + RESTORE_FP_CALLEE_SAVE_FRAME addq LITERAL(16), %rsp // pop arguments CFI_ADJUST_CFA_OFFSET(-16) + ret 1: + RESTORE_FP_CALLEE_SAVE_FRAME POP rsi // Pop arguments POP rdi SETUP_SAVE_ALL_CALLEE_SAVE_FRAME // save all registers as basis for long jump context @@ -907,6 +1009,7 @@ DEFINE_FUNCTION art_quick_aput_obj PUSH rdx subq LITERAL(8), %rsp // Alignment padding. CFI_ADJUST_CFA_OFFSET(8) + SETUP_FP_CALLEE_SAVE_FRAME // "Uncompress" = do nothing, as already zero-extended on load. movl CLASS_OFFSET(%edx), %esi // Pass arg2 = value's class. @@ -918,6 +1021,7 @@ DEFINE_FUNCTION art_quick_aput_obj testq %rax, %rax jz .Lthrow_array_store_exception + RESTORE_FP_CALLEE_SAVE_FRAME // Restore arguments. addq LITERAL(8), %rsp CFI_ADJUST_CFA_OFFSET(-8) @@ -934,6 +1038,7 @@ DEFINE_FUNCTION art_quick_aput_obj // movb %dl, (%rdx, %rdi) ret .Lthrow_array_store_exception: + RESTORE_FP_CALLEE_SAVE_FRAME // Restore arguments. addq LITERAL(8), %rsp CFI_ADJUST_CFA_OFFSET(-8) @@ -1012,8 +1117,8 @@ DEFINE_FUNCTION art_quick_proxy_invoke_handler PUSH rdx // Quick arg 2. PUSH rcx // Quick arg 3. // Create space for FPR args and create 2 slots, 1 of padding and 1 for the ArtMethod*. - subq LITERAL(80), %rsp - CFI_ADJUST_CFA_OFFSET(80) + subq LITERAL(80 + 4*8), %rsp + CFI_ADJUST_CFA_OFFSET(80 + 4*8) // Save FPRs. movq %xmm0, 16(%rsp) movq %xmm1, 24(%rsp) @@ -1023,14 +1128,18 @@ DEFINE_FUNCTION art_quick_proxy_invoke_handler movq %xmm5, 56(%rsp) movq %xmm6, 64(%rsp) movq %xmm7, 72(%rsp) + movq %xmm12, 80(%rsp) + movq %xmm13, 88(%rsp) + movq %xmm14, 96(%rsp) + movq %xmm15, 104(%rsp) // Store proxy method to bottom of stack. movq %rdi, 0(%rsp) movq %gs:THREAD_SELF_OFFSET, %rdx // Pass Thread::Current(). movq %rsp, %rcx // Pass SP. call PLT_SYMBOL(artQuickProxyInvokeHandler) // (proxy method, receiver, Thread*, SP) movq %rax, %xmm0 // Copy return value in case of float returns. - addq LITERAL(168), %rsp // Pop arguments. - CFI_ADJUST_CFA_OFFSET(-168) + addq LITERAL(168 + 4*8), %rsp // Pop arguments. + CFI_ADJUST_CFA_OFFSET(-168 - 4*8) RETURN_OR_DELIVER_PENDING_EXCEPTION END_FUNCTION art_quick_proxy_invoke_handler @@ -1156,8 +1265,8 @@ DEFINE_FUNCTION art_quick_generic_jni_trampoline PUSH rdx // Quick arg 2. PUSH rcx // Quick arg 3. // Create space for FPR args and create 2 slots, 1 of padding and 1 for the ArtMethod*. - subq LITERAL(80), %rsp - CFI_ADJUST_CFA_OFFSET(80) + subq LITERAL(80 + 4*8), %rsp + CFI_ADJUST_CFA_OFFSET(80 + 4*8) // Save FPRs. movq %xmm0, 16(%rsp) movq %xmm1, 24(%rsp) @@ -1167,6 +1276,10 @@ DEFINE_FUNCTION art_quick_generic_jni_trampoline movq %xmm5, 56(%rsp) movq %xmm6, 64(%rsp) movq %xmm7, 72(%rsp) + movq %xmm12, 80(%rsp) + movq %xmm13, 88(%rsp) + movq %xmm14, 96(%rsp) + movq %xmm15, 104(%rsp) movq %rdi, 0(%rsp) // Store native ArtMethod* to bottom of stack. movq %rsp, %rbp // save SP at (old) callee-save frame CFI_DEF_CFA_REGISTER(rbp) @@ -1260,9 +1373,13 @@ DEFINE_FUNCTION art_quick_generic_jni_trampoline movq 56(%rsp), %xmm5 movq 64(%rsp), %xmm6 movq 72(%rsp), %xmm7 + movq 80(%rsp), %xmm12 + movq 88(%rsp), %xmm13 + movq 96(%rsp), %xmm14 + movq 104(%rsp), %xmm15 // was 80 bytes - addq LITERAL(80), %rsp - CFI_ADJUST_CFA_OFFSET(-80) + addq LITERAL(80 + 4*8), %rsp + CFI_ADJUST_CFA_OFFSET(-80 - 4*8) // Save callee and GPR args, mixed together to agree with core spills bitmap. POP rcx // Arg. POP rdx // Arg. @@ -1292,9 +1409,13 @@ DEFINE_FUNCTION art_quick_generic_jni_trampoline movq 56(%rsp), %xmm5 movq 64(%rsp), %xmm6 movq 72(%rsp), %xmm7 - // was 80 bytes - addq LITERAL(80), %rsp - CFI_ADJUST_CFA_OFFSET(-80) + movq 80(%rsp), %xmm12 + movq 88(%rsp), %xmm13 + movq 96(%rsp), %xmm14 + movq 104(%rsp), %xmm15 + // was 80 + 32 bytes + addq LITERAL(80 + 4*8), %rsp + CFI_ADJUST_CFA_OFFSET(-80 - 4*8) // Save callee and GPR args, mixed together to agree with core spills bitmap. POP rcx // Arg. POP rdx // Arg. @@ -1450,3 +1571,10 @@ DEFINE_FUNCTION art_quick_string_compareto END_FUNCTION art_quick_string_compareto UNIMPLEMENTED art_quick_memcmp16 + +DEFINE_FUNCTION art_quick_assignable_from_code + SETUP_FP_CALLEE_SAVE_FRAME + call PLT_SYMBOL(artIsAssignableFromCode) // (const mirror::Class*, const mirror::Class*) + RESTORE_FP_CALLEE_SAVE_FRAME + ret +END_FUNCTION art_quick_assignable_from_code diff --git a/runtime/arch/x86_64/quick_method_frame_info_x86_64.h b/runtime/arch/x86_64/quick_method_frame_info_x86_64.h index 6183909..53aa212 100644 --- a/runtime/arch/x86_64/quick_method_frame_info_x86_64.h +++ b/runtime/arch/x86_64/quick_method_frame_info_x86_64.h @@ -34,6 +34,9 @@ static constexpr uint32_t kX86_64CalleeSaveFpArgSpills = (1 << art::x86_64::XMM0) | (1 << art::x86_64::XMM1) | (1 << art::x86_64::XMM2) | (1 << art::x86_64::XMM3) | (1 << art::x86_64::XMM4) | (1 << art::x86_64::XMM5) | (1 << art::x86_64::XMM6) | (1 << art::x86_64::XMM7); +static constexpr uint32_t kX86_64CalleeSaveFpSpills = + (1 << art::x86_64::XMM12) | (1 << art::x86_64::XMM13) | + (1 << art::x86_64::XMM14) | (1 << art::x86_64::XMM15); constexpr uint32_t X86_64CalleeSaveCoreSpills(Runtime::CalleeSaveType type) { return kX86_64CalleeSaveRefSpills | @@ -42,7 +45,8 @@ constexpr uint32_t X86_64CalleeSaveCoreSpills(Runtime::CalleeSaveType type) { } constexpr uint32_t X86_64CalleeSaveFpSpills(Runtime::CalleeSaveType type) { - return (type == Runtime::kRefsAndArgs ? kX86_64CalleeSaveFpArgSpills : 0); + return kX86_64CalleeSaveFpSpills | + (type == Runtime::kRefsAndArgs ? kX86_64CalleeSaveFpArgSpills : 0); } constexpr uint32_t X86_64CalleeSaveFrameSize(Runtime::CalleeSaveType type) { diff --git a/runtime/arch/x86_64/registers_x86_64.cc b/runtime/arch/x86_64/registers_x86_64.cc index 38f3494..f29c426 100644 --- a/runtime/arch/x86_64/registers_x86_64.cc +++ b/runtime/arch/x86_64/registers_x86_64.cc @@ -34,5 +34,14 @@ std::ostream& operator<<(std::ostream& os, const Register& rhs) { return os; } +std::ostream& operator<<(std::ostream& os, const FloatRegister& rhs) { + if (rhs >= XMM0 && rhs <= XMM15) { + os << "xmm" << static_cast<int>(rhs); + } else { + os << "Register[" << static_cast<int>(rhs) << "]"; + } + return os; +} + } // namespace x86_64 } // namespace art |