]> asedeno.scripts.mit.edu Git - linux.git/blobdiff - arch/x86/kvm/vmx.c
KVM: x86: Fix perf timer mode IP reporting
[linux.git] / arch / x86 / kvm / vmx.c
index b4d8da6c62c8561e49446625a80ee5db347dabd5..0e5510ebd3f25325fddb92b3272533d8aef3e632 100644 (file)
@@ -196,6 +196,14 @@ module_param(ple_window_max, int, S_IRUGO);
 
 extern const ulong vmx_return;
 
+struct kvm_vmx {
+       struct kvm kvm;
+
+       unsigned int tss_addr;
+       bool ept_identity_pagetable_done;
+       gpa_t ept_identity_map_addr;
+};
+
 #define NR_AUTOLOAD_MSRS 8
 
 struct vmcs {
@@ -698,6 +706,11 @@ enum segment_cache_field {
        SEG_FIELD_NR = 4
 };
 
+static inline struct kvm_vmx *to_kvm_vmx(struct kvm *kvm)
+{
+       return container_of(kvm, struct kvm_vmx, kvm);
+}
+
 static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu)
 {
        return container_of(vcpu, struct vcpu_vmx, vcpu);
@@ -2153,6 +2166,7 @@ static unsigned long segment_base(u16 selector)
 static void vmx_save_host_state(struct kvm_vcpu *vcpu)
 {
        struct vcpu_vmx *vmx = to_vmx(vcpu);
+       int cpu = raw_smp_processor_id();
        int i;
 
        if (vmx->host_state.loaded)
@@ -2165,7 +2179,15 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu)
         */
        vmx->host_state.ldt_sel = kvm_read_ldt();
        vmx->host_state.gs_ldt_reload_needed = vmx->host_state.ldt_sel;
+
+#ifdef CONFIG_X86_64
+       save_fsgs_for_kvm();
+       vmx->host_state.fs_sel = current->thread.fsindex;
+       vmx->host_state.gs_sel = current->thread.gsindex;
+#else
        savesegment(fs, vmx->host_state.fs_sel);
+       savesegment(gs, vmx->host_state.gs_sel);
+#endif
        if (!(vmx->host_state.fs_sel & 7)) {
                vmcs_write16(HOST_FS_SELECTOR, vmx->host_state.fs_sel);
                vmx->host_state.fs_reload_needed = 0;
@@ -2173,7 +2195,6 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu)
                vmcs_write16(HOST_FS_SELECTOR, 0);
                vmx->host_state.fs_reload_needed = 1;
        }
-       savesegment(gs, vmx->host_state.gs_sel);
        if (!(vmx->host_state.gs_sel & 7))
                vmcs_write16(HOST_GS_SELECTOR, vmx->host_state.gs_sel);
        else {
@@ -2187,15 +2208,15 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu)
 #endif
 
 #ifdef CONFIG_X86_64
-       vmcs_writel(HOST_FS_BASE, read_msr(MSR_FS_BASE));
-       vmcs_writel(HOST_GS_BASE, read_msr(MSR_GS_BASE));
+       vmcs_writel(HOST_FS_BASE, current->thread.fsbase);
+       vmcs_writel(HOST_GS_BASE, cpu_kernelmode_gs_base(cpu));
 #else
        vmcs_writel(HOST_FS_BASE, segment_base(vmx->host_state.fs_sel));
        vmcs_writel(HOST_GS_BASE, segment_base(vmx->host_state.gs_sel));
 #endif
 
 #ifdef CONFIG_X86_64
-       rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base);
+       vmx->msr_host_kernel_gs_base = current->thread.gsbase;
        if (is_long_mode(&vmx->vcpu))
                wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
 #endif
@@ -2556,6 +2577,19 @@ static int nested_vmx_check_exception(struct kvm_vcpu *vcpu, unsigned long *exit
        return 0;
 }
 
+static void vmx_clear_hlt(struct kvm_vcpu *vcpu)
+{
+       /*
+        * Ensure that we clear the HLT state in the VMCS.  We don't need to
+        * explicitly skip the instruction because if the HLT state is set,
+        * then the instruction is already executing and RIP has already been
+        * advanced.
+        */
+       if (kvm_hlt_in_guest(vcpu->kvm) &&
+                       vmcs_read32(GUEST_ACTIVITY_STATE) == GUEST_ACTIVITY_HLT)
+               vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_ACTIVE);
+}
+
 static void vmx_queue_exception(struct kvm_vcpu *vcpu)
 {
        struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -2586,6 +2620,8 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu)
                intr_info |= INTR_TYPE_HARD_EXCEPTION;
 
        vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info);
+
+       vmx_clear_hlt(vcpu);
 }
 
 static bool vmx_rdtscp_supported(void)
@@ -3746,13 +3782,11 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
              CPU_BASED_UNCOND_IO_EXITING |
              CPU_BASED_MOV_DR_EXITING |
              CPU_BASED_USE_TSC_OFFSETING |
+             CPU_BASED_MWAIT_EXITING |
+             CPU_BASED_MONITOR_EXITING |
              CPU_BASED_INVLPG_EXITING |
              CPU_BASED_RDPMC_EXITING;
 
-       if (!kvm_mwait_in_guest())
-               min |= CPU_BASED_MWAIT_EXITING |
-                       CPU_BASED_MONITOR_EXITING;
-
        opt = CPU_BASED_TPR_SHADOW |
              CPU_BASED_USE_MSR_BITMAPS |
              CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
@@ -4198,6 +4232,7 @@ static void enter_rmode(struct kvm_vcpu *vcpu)
 {
        unsigned long flags;
        struct vcpu_vmx *vmx = to_vmx(vcpu);
+       struct kvm_vmx *kvm_vmx = to_kvm_vmx(vcpu->kvm);
 
        vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_TR], VCPU_SREG_TR);
        vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_ES], VCPU_SREG_ES);
@@ -4213,13 +4248,13 @@ static void enter_rmode(struct kvm_vcpu *vcpu)
         * Very old userspace does not call KVM_SET_TSS_ADDR before entering
         * vcpu. Warn the user that an update is overdue.
         */
-       if (!vcpu->kvm->arch.tss_addr)
+       if (!kvm_vmx->tss_addr)
                printk_once(KERN_WARNING "kvm: KVM_SET_TSS_ADDR need to be "
                             "called before entering vcpu\n");
 
        vmx_segment_cache_clear(vmx);
 
-       vmcs_writel(GUEST_TR_BASE, vcpu->kvm->arch.tss_addr);
+       vmcs_writel(GUEST_TR_BASE, kvm_vmx->tss_addr);
        vmcs_write32(GUEST_TR_LIMIT, RMODE_TSS_SIZE - 1);
        vmcs_write32(GUEST_TR_AR_BYTES, 0x008b);
 
@@ -4509,7 +4544,7 @@ static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
                    is_guest_mode(vcpu))
                        guest_cr3 = kvm_read_cr3(vcpu);
                else
-                       guest_cr3 = vcpu->kvm->arch.ept_identity_map_addr;
+                       guest_cr3 = to_kvm_vmx(vcpu->kvm)->ept_identity_map_addr;
                ept_load_pdptrs(vcpu);
        }
 
@@ -4950,7 +4985,7 @@ static int init_rmode_tss(struct kvm *kvm)
        int idx, r;
 
        idx = srcu_read_lock(&kvm->srcu);
-       fn = kvm->arch.tss_addr >> PAGE_SHIFT;
+       fn = to_kvm_vmx(kvm)->tss_addr >> PAGE_SHIFT;
        r = kvm_clear_guest_page(kvm, fn, 0, PAGE_SIZE);
        if (r < 0)
                goto out;
@@ -4976,22 +5011,23 @@ static int init_rmode_tss(struct kvm *kvm)
 
 static int init_rmode_identity_map(struct kvm *kvm)
 {
+       struct kvm_vmx *kvm_vmx = to_kvm_vmx(kvm);
        int i, idx, r = 0;
        kvm_pfn_t identity_map_pfn;
        u32 tmp;
 
-       /* Protect kvm->arch.ept_identity_pagetable_done. */
+       /* Protect kvm_vmx->ept_identity_pagetable_done. */
        mutex_lock(&kvm->slots_lock);
 
-       if (likely(kvm->arch.ept_identity_pagetable_done))
+       if (likely(kvm_vmx->ept_identity_pagetable_done))
                goto out2;
 
-       if (!kvm->arch.ept_identity_map_addr)
-               kvm->arch.ept_identity_map_addr = VMX_EPT_IDENTITY_PAGETABLE_ADDR;
-       identity_map_pfn = kvm->arch.ept_identity_map_addr >> PAGE_SHIFT;
+       if (!kvm_vmx->ept_identity_map_addr)
+               kvm_vmx->ept_identity_map_addr = VMX_EPT_IDENTITY_PAGETABLE_ADDR;
+       identity_map_pfn = kvm_vmx->ept_identity_map_addr >> PAGE_SHIFT;
 
        r = __x86_set_memory_region(kvm, IDENTITY_PAGETABLE_PRIVATE_MEMSLOT,
-                                   kvm->arch.ept_identity_map_addr, PAGE_SIZE);
+                                   kvm_vmx->ept_identity_map_addr, PAGE_SIZE);
        if (r < 0)
                goto out2;
 
@@ -5008,7 +5044,7 @@ static int init_rmode_identity_map(struct kvm *kvm)
                if (r < 0)
                        goto out;
        }
-       kvm->arch.ept_identity_pagetable_done = true;
+       kvm_vmx->ept_identity_pagetable_done = true;
 
 out:
        srcu_read_unlock(&kvm->srcu, idx);
@@ -5544,6 +5580,11 @@ static u32 vmx_exec_control(struct vcpu_vmx *vmx)
                exec_control |= CPU_BASED_CR3_STORE_EXITING |
                                CPU_BASED_CR3_LOAD_EXITING  |
                                CPU_BASED_INVLPG_EXITING;
+       if (kvm_mwait_in_guest(vmx->vcpu.kvm))
+               exec_control &= ~(CPU_BASED_MWAIT_EXITING |
+                               CPU_BASED_MONITOR_EXITING);
+       if (kvm_hlt_in_guest(vmx->vcpu.kvm))
+               exec_control &= ~CPU_BASED_HLT_EXITING;
        return exec_control;
 }
 
@@ -5577,7 +5618,7 @@ static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx)
        }
        if (!enable_unrestricted_guest)
                exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST;
-       if (!ple_gap)
+       if (kvm_pause_in_guest(vmx->vcpu.kvm))
                exec_control &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING;
        if (!kvm_vcpu_apicv_active(vcpu))
                exec_control &= ~(SECONDARY_EXEC_APIC_REGISTER_VIRT |
@@ -5740,7 +5781,7 @@ static void vmx_vcpu_setup(struct vcpu_vmx *vmx)
                vmcs_write64(POSTED_INTR_DESC_ADDR, __pa((&vmx->pi_desc)));
        }
 
-       if (ple_gap) {
+       if (!kvm_pause_in_guest(vmx->vcpu.kvm)) {
                vmcs_write32(PLE_GAP, ple_gap);
                vmx->ple_window = ple_window;
                vmx->ple_window_dirty = true;
@@ -5905,6 +5946,8 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
        update_exception_bitmap(vcpu);
 
        vpid_sync_context(vmx->vpid);
+       if (init_event)
+               vmx_clear_hlt(vcpu);
 }
 
 /*
@@ -5975,6 +6018,8 @@ static void vmx_inject_irq(struct kvm_vcpu *vcpu)
        } else
                intr |= INTR_TYPE_EXT_INTR;
        vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr);
+
+       vmx_clear_hlt(vcpu);
 }
 
 static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
@@ -6005,6 +6050,8 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
 
        vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
                        INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR);
+
+       vmx_clear_hlt(vcpu);
 }
 
 static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu)
@@ -6074,10 +6121,16 @@ static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr)
                                    PAGE_SIZE * 3);
        if (ret)
                return ret;
-       kvm->arch.tss_addr = addr;
+       to_kvm_vmx(kvm)->tss_addr = addr;
        return init_rmode_tss(kvm);
 }
 
+static int vmx_set_identity_map_addr(struct kvm *kvm, u64 ident_addr)
+{
+       to_kvm_vmx(kvm)->ept_identity_map_addr = ident_addr;
+       return 0;
+}
+
 static bool rmode_exception(struct kvm_vcpu *vcpu, int vec)
 {
        switch (vec) {
@@ -7183,7 +7236,7 @@ static __exit void hardware_unsetup(void)
  */
 static int handle_pause(struct kvm_vcpu *vcpu)
 {
-       if (ple_gap)
+       if (!kvm_pause_in_guest(vcpu->kvm))
                grow_ple_window(vcpu);
 
        /*
@@ -9239,9 +9292,9 @@ static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx)
 
        /* We need to handle NMIs before interrupts are enabled */
        if (is_nmi(exit_intr_info)) {
-               kvm_before_handle_nmi(&vmx->vcpu);
+               kvm_before_interrupt(&vmx->vcpu);
                asm("int $2");
-               kvm_after_handle_nmi(&vmx->vcpu);
+               kvm_after_interrupt(&vmx->vcpu);
        }
 }
 
@@ -9731,6 +9784,17 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 }
 STACK_FRAME_NON_STANDARD(vmx_vcpu_run);
 
+static struct kvm *vmx_vm_alloc(void)
+{
+       struct kvm_vmx *kvm_vmx = kzalloc(sizeof(struct kvm_vmx), GFP_KERNEL);
+       return &kvm_vmx->kvm;
+}
+
+static void vmx_vm_free(struct kvm *kvm)
+{
+       kfree(to_kvm_vmx(kvm));
+}
+
 static void vmx_switch_vmcs(struct kvm_vcpu *vcpu, struct loaded_vmcs *vmcs)
 {
        struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -9879,6 +9943,13 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
        return ERR_PTR(err);
 }
 
+static int vmx_vm_init(struct kvm *kvm)
+{
+       if (!ple_gap)
+               kvm->arch.pause_in_guest = true;
+       return 0;
+}
+
 static void __init vmx_check_processor_compat(void *rtn)
 {
        struct vmcs_config vmcs_conf;
@@ -11020,8 +11091,10 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
         * which means L1 attempted VMEntry to L2 with invalid state.
         * Fail the VMEntry.
         */
-       if (vmx->emulation_required)
+       if (vmx->emulation_required) {
+               *entry_failure_code = ENTRY_FAIL_DEFAULT;
                return 1;
+       }
 
        /* Shadow page tables on either EPT or shadow page tables. */
        if (nested_vmx_load_cr3(vcpu, vmcs12->guest_cr3, nested_cpu_has_ept(vmcs12),
@@ -12028,7 +12101,7 @@ static void vmx_cancel_hv_timer(struct kvm_vcpu *vcpu)
 
 static void vmx_sched_in(struct kvm_vcpu *vcpu, int cpu)
 {
-       if (ple_gap)
+       if (!kvm_pause_in_guest(vcpu->kvm))
                shrink_ple_window(vcpu);
 }
 
@@ -12346,6 +12419,7 @@ static int vmx_pre_enter_smm(struct kvm_vcpu *vcpu, char *smstate)
 
        vmx->nested.smm.vmxon = vmx->nested.vmxon;
        vmx->nested.vmxon = false;
+       vmx_clear_hlt(vcpu);
        return 0;
 }
 
@@ -12387,6 +12461,10 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
        .cpu_has_accelerated_tpr = report_flexpriority,
        .cpu_has_high_real_mode_segbase = vmx_has_high_real_mode_segbase,
 
+       .vm_init = vmx_vm_init,
+       .vm_alloc = vmx_vm_alloc,
+       .vm_free = vmx_vm_free,
+
        .vcpu_create = vmx_create_vcpu,
        .vcpu_free = vmx_free_vcpu,
        .vcpu_reset = vmx_vcpu_reset,
@@ -12454,6 +12532,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
        .deliver_posted_interrupt = vmx_deliver_posted_interrupt,
 
        .set_tss_addr = vmx_set_tss_addr,
+       .set_identity_map_addr = vmx_set_identity_map_addr,
        .get_tdp_level = get_ept_level,
        .get_mt_mask = vmx_get_mt_mask,