diff options
Diffstat (limited to 'drivers/kvm/vmx.c')
-rw-r--r-- | drivers/kvm/vmx.c | 175 |
1 files changed, 125 insertions, 50 deletions
diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index d0a2c2d..d4701cb 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -116,7 +116,7 @@ static void vmcs_clear(struct vmcs *vmcs) static void __vcpu_clear(void *arg) { struct kvm_vcpu *vcpu = arg; - int cpu = smp_processor_id(); + int cpu = raw_smp_processor_id(); if (vcpu->cpu == cpu) vmcs_clear(vcpu->vmcs); @@ -152,15 +152,21 @@ static u64 vmcs_read64(unsigned long field) #endif } +static noinline void vmwrite_error(unsigned long field, unsigned long value) +{ + printk(KERN_ERR "vmwrite error: reg %lx value %lx (err %d)\n", + field, value, vmcs_read32(VM_INSTRUCTION_ERROR)); + dump_stack(); +} + static void vmcs_writel(unsigned long field, unsigned long value) { u8 error; asm volatile (ASM_VMX_VMWRITE_RAX_RDX "; setna %0" : "=q"(error) : "a"(value), "d"(field) : "cc" ); - if (error) - printk(KERN_ERR "vmwrite error: reg %lx value %lx (err %d)\n", - field, value, vmcs_read32(VM_INSTRUCTION_ERROR)); + if (unlikely(error)) + vmwrite_error(field, value); } static void vmcs_write16(unsigned long field, u16 value) @@ -263,6 +269,7 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu) if (interruptibility & 3) vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, interruptibility & ~3); + vcpu->interrupt_window_open = 1; } static void vmx_inject_gp(struct kvm_vcpu *vcpu, unsigned error_code) @@ -541,7 +548,7 @@ static struct vmcs *alloc_vmcs_cpu(int cpu) static struct vmcs *alloc_vmcs(void) { - return alloc_vmcs_cpu(smp_processor_id()); + return alloc_vmcs_cpu(raw_smp_processor_id()); } static void free_vmcs(struct vmcs *vmcs) @@ -736,6 +743,15 @@ static void exit_lmode(struct kvm_vcpu *vcpu) #endif +static void vmx_decache_cr0_cr4_guest_bits(struct kvm_vcpu *vcpu) +{ + vcpu->cr0 &= KVM_GUEST_CR0_MASK; + vcpu->cr0 |= vmcs_readl(GUEST_CR0) & ~KVM_GUEST_CR0_MASK; + + vcpu->cr4 &= KVM_GUEST_CR4_MASK; + vcpu->cr4 |= vmcs_readl(GUEST_CR4) & ~KVM_GUEST_CR4_MASK; +} + static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) { if (vcpu->rmode.active && (cr0 & CR0_PE_MASK)) @@ -1011,8 +1027,6 @@ static int vmx_vcpu_setup(struct kvm_vcpu *vcpu) vmcs_writel(GUEST_RIP, 0xfff0); vmcs_writel(GUEST_RSP, 0); - vmcs_writel(GUEST_CR3, 0); - //todo: dr0 = dr1 = dr2 = dr3 = 0; dr6 = 0xffff0ff0 vmcs_writel(GUEST_DR7, 0x400); @@ -1049,7 +1063,6 @@ static int vmx_vcpu_setup(struct kvm_vcpu *vcpu) | CPU_BASED_CR8_LOAD_EXITING /* 20.6.2 */ | CPU_BASED_CR8_STORE_EXITING /* 20.6.2 */ | CPU_BASED_UNCOND_IO_EXITING /* 20.6.2 */ - | CPU_BASED_INVDPG_EXITING | CPU_BASED_MOV_DR_EXITING | CPU_BASED_USE_TSC_OFFSETING /* 21.3 */ ); @@ -1094,14 +1107,6 @@ static int vmx_vcpu_setup(struct kvm_vcpu *vcpu) rdmsrl(MSR_IA32_SYSENTER_EIP, a); vmcs_writel(HOST_IA32_SYSENTER_EIP, a); /* 22.2.3 */ - ret = -ENOMEM; - vcpu->guest_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL); - if (!vcpu->guest_msrs) - goto out; - vcpu->host_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL); - if (!vcpu->host_msrs) - goto out_free_guest_msrs; - for (i = 0; i < NR_VMX_MSR; ++i) { u32 index = vmx_msr_index[i]; u32 data_low, data_high; @@ -1155,8 +1160,6 @@ static int vmx_vcpu_setup(struct kvm_vcpu *vcpu) return 0; -out_free_guest_msrs: - kfree(vcpu->guest_msrs); out: return ret; } @@ -1224,21 +1227,34 @@ static void kvm_do_inject_irq(struct kvm_vcpu *vcpu) irq | INTR_TYPE_EXT_INTR | INTR_INFO_VALID_MASK); } -static void kvm_try_inject_irq(struct kvm_vcpu *vcpu) + +static void do_interrupt_requests(struct kvm_vcpu *vcpu, + struct kvm_run *kvm_run) { - if ((vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) - && (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3) == 0) + u32 cpu_based_vm_exec_control; + + vcpu->interrupt_window_open = + ((vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) && + (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3) == 0); + + if (vcpu->interrupt_window_open && + vcpu->irq_summary && + !(vmcs_read32(VM_ENTRY_INTR_INFO_FIELD) & INTR_INFO_VALID_MASK)) /* - * Interrupts enabled, and not blocked by sti or mov ss. Good. + * If interrupts enabled, and not blocked by sti or mov ss. Good. */ kvm_do_inject_irq(vcpu); - else + + cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); + if (!vcpu->interrupt_window_open && + (vcpu->irq_summary || kvm_run->request_interrupt_window)) /* * Interrupts blocked. Wait for unblock. */ - vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, - vmcs_read32(CPU_BASED_VM_EXEC_CONTROL) - | CPU_BASED_VIRTUAL_INTR_PENDING); + cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING; + else + cpu_based_vm_exec_control &= ~CPU_BASED_VIRTUAL_INTR_PENDING; + vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); } static void kvm_guest_debug_pre(struct kvm_vcpu *vcpu) @@ -1277,6 +1293,7 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) unsigned long cr2, rip; u32 vect_info; enum emulation_result er; + int r; vect_info = vmcs_read32(IDT_VECTORING_INFO_FIELD); intr_info = vmcs_read32(VM_EXIT_INTR_INFO); @@ -1305,7 +1322,12 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) cr2 = vmcs_readl(EXIT_QUALIFICATION); spin_lock(&vcpu->kvm->lock); - if (!vcpu->mmu.page_fault(vcpu, cr2, error_code)) { + r = kvm_mmu_page_fault(vcpu, cr2, error_code); + if (r < 0) { + spin_unlock(&vcpu->kvm->lock); + return r; + } + if (!r) { spin_unlock(&vcpu->kvm->lock); return 1; } @@ -1425,17 +1447,6 @@ static int handle_io(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) return 0; } -static int handle_invlpg(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) -{ - u64 address = vmcs_read64(EXIT_QUALIFICATION); - int instruction_length = vmcs_read32(VM_EXIT_INSTRUCTION_LEN); - spin_lock(&vcpu->kvm->lock); - vcpu->mmu.inval_page(vcpu, address); - spin_unlock(&vcpu->kvm->lock); - vmcs_writel(GUEST_RIP, vmcs_readl(GUEST_RIP) + instruction_length); - return 1; -} - static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { u64 exit_qualification; @@ -1575,23 +1586,40 @@ static int handle_wrmsr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) return 1; } +static void post_kvm_run_save(struct kvm_vcpu *vcpu, + struct kvm_run *kvm_run) +{ + kvm_run->if_flag = (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) != 0; + kvm_run->cr8 = vcpu->cr8; + kvm_run->apic_base = vcpu->apic_base; + kvm_run->ready_for_interrupt_injection = (vcpu->interrupt_window_open && + vcpu->irq_summary == 0); +} + static int handle_interrupt_window(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { - /* Turn off interrupt window reporting. */ - vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, - vmcs_read32(CPU_BASED_VM_EXEC_CONTROL) - & ~CPU_BASED_VIRTUAL_INTR_PENDING); + /* + * If the user space waits to inject interrupts, exit as soon as + * possible + */ + if (kvm_run->request_interrupt_window && + !vcpu->irq_summary) { + kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN; + ++kvm_stat.irq_window_exits; + return 0; + } return 1; } static int handle_halt(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { skip_emulated_instruction(vcpu); - if (vcpu->irq_summary && (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF)) + if (vcpu->irq_summary) return 1; kvm_run->exit_reason = KVM_EXIT_HLT; + ++kvm_stat.halt_exits; return 0; } @@ -1605,7 +1633,6 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu, [EXIT_REASON_EXCEPTION_NMI] = handle_exception, [EXIT_REASON_EXTERNAL_INTERRUPT] = handle_external_interrupt, [EXIT_REASON_IO_INSTRUCTION] = handle_io, - [EXIT_REASON_INVLPG] = handle_invlpg, [EXIT_REASON_CR_ACCESS] = handle_cr, [EXIT_REASON_DR_ACCESS] = handle_dr, [EXIT_REASON_CPUID] = handle_cpuid, @@ -1642,11 +1669,27 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) return 0; } +/* + * Check if userspace requested an interrupt window, and that the + * interrupt window is open. + * + * No need to exit to userspace if we already have an interrupt queued. + */ +static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu, + struct kvm_run *kvm_run) +{ + return (!vcpu->irq_summary && + kvm_run->request_interrupt_window && + vcpu->interrupt_window_open && + (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF)); +} + static int vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { u8 fail; u16 fs_sel, gs_sel, ldt_sel; int fs_gs_ldt_reload_needed; + int r; again: /* @@ -1673,9 +1716,7 @@ again: vmcs_writel(HOST_GS_BASE, segment_base(gs_sel)); #endif - if (vcpu->irq_summary && - !(vmcs_read32(VM_ENTRY_INTR_INFO_FIELD) & INTR_INFO_VALID_MASK)) - kvm_try_inject_irq(vcpu); + do_interrupt_requests(vcpu, kvm_run); if (vcpu->guest_debug.enabled) kvm_guest_debug_pre(vcpu); @@ -1812,6 +1853,7 @@ again: fx_save(vcpu->guest_fx_image); fx_restore(vcpu->host_fx_image); + vcpu->interrupt_window_open = (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3) == 0; #ifndef CONFIG_X86_64 asm ("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS)); @@ -1821,6 +1863,7 @@ again: if (fail) { kvm_run->exit_type = KVM_EXIT_TYPE_FAIL_ENTRY; kvm_run->exit_reason = vmcs_read32(VM_INSTRUCTION_ERROR); + r = 0; } else { if (fs_gs_ldt_reload_needed) { load_ldt(ldt_sel); @@ -1840,17 +1883,28 @@ again: } vcpu->launched = 1; kvm_run->exit_type = KVM_EXIT_TYPE_VM_EXIT; - if (kvm_handle_exit(kvm_run, vcpu)) { + r = kvm_handle_exit(kvm_run, vcpu); + if (r > 0) { /* Give scheduler a change to reschedule. */ if (signal_pending(current)) { ++kvm_stat.signal_exits; + post_kvm_run_save(vcpu, kvm_run); + return -EINTR; + } + + if (dm_request_for_irq_injection(vcpu, kvm_run)) { + ++kvm_stat.request_irq_exits; + post_kvm_run_save(vcpu, kvm_run); return -EINTR; } + kvm_resched(vcpu); goto again; } } - return 0; + + post_kvm_run_save(vcpu, kvm_run); + return r; } static void vmx_flush_tlb(struct kvm_vcpu *vcpu) @@ -1906,13 +1960,33 @@ static int vmx_create_vcpu(struct kvm_vcpu *vcpu) { struct vmcs *vmcs; + vcpu->guest_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (!vcpu->guest_msrs) + return -ENOMEM; + + vcpu->host_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (!vcpu->host_msrs) + goto out_free_guest_msrs; + vmcs = alloc_vmcs(); if (!vmcs) - return -ENOMEM; + goto out_free_msrs; + vmcs_clear(vmcs); vcpu->vmcs = vmcs; vcpu->launched = 0; + return 0; + +out_free_msrs: + kfree(vcpu->host_msrs); + vcpu->host_msrs = NULL; + +out_free_guest_msrs: + kfree(vcpu->guest_msrs); + vcpu->guest_msrs = NULL; + + return -ENOMEM; } static struct kvm_arch_ops vmx_arch_ops = { @@ -1936,6 +2010,7 @@ static struct kvm_arch_ops vmx_arch_ops = { .get_segment = vmx_get_segment, .set_segment = vmx_set_segment, .get_cs_db_l_bits = vmx_get_cs_db_l_bits, + .decache_cr0_cr4_guest_bits = vmx_decache_cr0_cr4_guest_bits, .set_cr0 = vmx_set_cr0, .set_cr0_no_modeswitch = vmx_set_cr0_no_modeswitch, .set_cr3 = vmx_set_cr3, |