KVM: x86: Fix perf timer mode IP reporting

[linux.git] / arch / x86 / kvm / vmx.c
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c

index b4d8da6c62c8561e49446625a80ee5db347dabd5..0e5510ebd3f25325fddb92b3272533d8aef3e632 100644 (file)
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -196,6 +196,14 @@ module_param(ple_window_max, int, S_IRUGO);
  
  extern const ulong vmx_return;
  
+struct kvm_vmx {
+       struct kvm kvm;
+
+       unsigned int tss_addr;
+       bool ept_identity_pagetable_done;
+       gpa_t ept_identity_map_addr;
+};
+
  #define NR_AUTOLOAD_MSRS 8
  
  struct vmcs {
@@ -698,6 +706,11 @@ enum segment_cache_field {
         SEG_FIELD_NR = 4
  };
  
+static inline struct kvm_vmx *to_kvm_vmx(struct kvm *kvm)
+{
+       return container_of(kvm, struct kvm_vmx, kvm);
+}
+
  static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu)
  {
         return container_of(vcpu, struct vcpu_vmx, vcpu);
@@ -2153,6 +2166,7 @@ static unsigned long segment_base(u16 selector)
  static void vmx_save_host_state(struct kvm_vcpu *vcpu)
  {
         struct vcpu_vmx *vmx = to_vmx(vcpu);
+       int cpu = raw_smp_processor_id();
         int i;
  
         if (vmx->host_state.loaded)
@@ -2165,7 +2179,15 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu)
          */
         vmx->host_state.ldt_sel = kvm_read_ldt();
         vmx->host_state.gs_ldt_reload_needed = vmx->host_state.ldt_sel;
+
+#ifdef CONFIG_X86_64
+       save_fsgs_for_kvm();
+       vmx->host_state.fs_sel = current->thread.fsindex;
+       vmx->host_state.gs_sel = current->thread.gsindex;
+#else
         savesegment(fs, vmx->host_state.fs_sel);
+       savesegment(gs, vmx->host_state.gs_sel);
+#endif
         if (!(vmx->host_state.fs_sel & 7)) {
                 vmcs_write16(HOST_FS_SELECTOR, vmx->host_state.fs_sel);
                 vmx->host_state.fs_reload_needed = 0;
@@ -2173,7 +2195,6 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu)
                 vmcs_write16(HOST_FS_SELECTOR, 0);
                 vmx->host_state.fs_reload_needed = 1;
         }
-       savesegment(gs, vmx->host_state.gs_sel);
         if (!(vmx->host_state.gs_sel & 7))
                 vmcs_write16(HOST_GS_SELECTOR, vmx->host_state.gs_sel);
         else {
@@ -2187,15 +2208,15 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu)
  #endif
  
  #ifdef CONFIG_X86_64
-       vmcs_writel(HOST_FS_BASE, read_msr(MSR_FS_BASE));
-       vmcs_writel(HOST_GS_BASE, read_msr(MSR_GS_BASE));
+       vmcs_writel(HOST_FS_BASE, current->thread.fsbase);
+       vmcs_writel(HOST_GS_BASE, cpu_kernelmode_gs_base(cpu));
  #else
         vmcs_writel(HOST_FS_BASE, segment_base(vmx->host_state.fs_sel));
         vmcs_writel(HOST_GS_BASE, segment_base(vmx->host_state.gs_sel));
  #endif
  
  #ifdef CONFIG_X86_64
-       rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base);
+       vmx->msr_host_kernel_gs_base = current->thread.gsbase;
         if (is_long_mode(&vmx->vcpu))
                 wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
  #endif
@@ -2556,6 +2577,19 @@ static int nested_vmx_check_exception(struct kvm_vcpu *vcpu, unsigned long *exit
         return 0;
  }
  
+static void vmx_clear_hlt(struct kvm_vcpu *vcpu)
+{
+       /*
+        * Ensure that we clear the HLT state in the VMCS.  We don't need to
+        * explicitly skip the instruction because if the HLT state is set,
+        * then the instruction is already executing and RIP has already been
+        * advanced.
+        */
+       if (kvm_hlt_in_guest(vcpu->kvm) &&
+                       vmcs_read32(GUEST_ACTIVITY_STATE) == GUEST_ACTIVITY_HLT)
+               vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_ACTIVE);
+}
+
  static void vmx_queue_exception(struct kvm_vcpu *vcpu)
  {
         struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -2586,6 +2620,8 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu)
                 intr_info |= INTR_TYPE_HARD_EXCEPTION;
  
         vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info);
+
+       vmx_clear_hlt(vcpu);
  }
  
  static bool vmx_rdtscp_supported(void)
@@ -3746,13 +3782,11 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
               CPU_BASED_UNCOND_IO_EXITING |
               CPU_BASED_MOV_DR_EXITING |
               CPU_BASED_USE_TSC_OFFSETING |
+             CPU_BASED_MWAIT_EXITING |
+             CPU_BASED_MONITOR_EXITING |
               CPU_BASED_INVLPG_EXITING |
               CPU_BASED_RDPMC_EXITING;
  
-       if (!kvm_mwait_in_guest())
-               min |= CPU_BASED_MWAIT_EXITING |
-                       CPU_BASED_MONITOR_EXITING;
-
         opt = CPU_BASED_TPR_SHADOW |
               CPU_BASED_USE_MSR_BITMAPS |
               CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
@@ -4198,6 +4232,7 @@ static void enter_rmode(struct kvm_vcpu *vcpu)
  {
         unsigned long flags;
         struct vcpu_vmx *vmx = to_vmx(vcpu);
+       struct kvm_vmx *kvm_vmx = to_kvm_vmx(vcpu->kvm);
  
         vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_TR], VCPU_SREG_TR);
         vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_ES], VCPU_SREG_ES);
@@ -4213,13 +4248,13 @@ static void enter_rmode(struct kvm_vcpu *vcpu)
          * Very old userspace does not call KVM_SET_TSS_ADDR before entering
          * vcpu. Warn the user that an update is overdue.
          */
-       if (!vcpu->kvm->arch.tss_addr)
+       if (!kvm_vmx->tss_addr)
                 printk_once(KERN_WARNING "kvm: KVM_SET_TSS_ADDR need to be "
                              "called before entering vcpu\n");
  
         vmx_segment_cache_clear(vmx);
  
-       vmcs_writel(GUEST_TR_BASE, vcpu->kvm->arch.tss_addr);
+       vmcs_writel(GUEST_TR_BASE, kvm_vmx->tss_addr);
         vmcs_write32(GUEST_TR_LIMIT, RMODE_TSS_SIZE - 1);
         vmcs_write32(GUEST_TR_AR_BYTES, 0x008b);
  
@@ -4509,7 +4544,7 @@ static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
                     is_guest_mode(vcpu))
                         guest_cr3 = kvm_read_cr3(vcpu);
                 else
-                       guest_cr3 = vcpu->kvm->arch.ept_identity_map_addr;
+                       guest_cr3 = to_kvm_vmx(vcpu->kvm)->ept_identity_map_addr;
                 ept_load_pdptrs(vcpu);
         }
  
@@ -4950,7 +4985,7 @@ static int init_rmode_tss(struct kvm *kvm)
         int idx, r;
  
         idx = srcu_read_lock(&kvm->srcu);
-       fn = kvm->arch.tss_addr >> PAGE_SHIFT;
+       fn = to_kvm_vmx(kvm)->tss_addr >> PAGE_SHIFT;
         r = kvm_clear_guest_page(kvm, fn, 0, PAGE_SIZE);
         if (r < 0)
                 goto out;
@@ -4976,22 +5011,23 @@ static int init_rmode_tss(struct kvm *kvm)
  
  static int init_rmode_identity_map(struct kvm *kvm)
  {
+       struct kvm_vmx *kvm_vmx = to_kvm_vmx(kvm);
         int i, idx, r = 0;
         kvm_pfn_t identity_map_pfn;
         u32 tmp;
  
-       /* Protect kvm->arch.ept_identity_pagetable_done. */
+       /* Protect kvm_vmx->ept_identity_pagetable_done. */
         mutex_lock(&kvm->slots_lock);
  
-       if (likely(kvm->arch.ept_identity_pagetable_done))
+       if (likely(kvm_vmx->ept_identity_pagetable_done))
                 goto out2;
  
-       if (!kvm->arch.ept_identity_map_addr)
-               kvm->arch.ept_identity_map_addr = VMX_EPT_IDENTITY_PAGETABLE_ADDR;
-       identity_map_pfn = kvm->arch.ept_identity_map_addr >> PAGE_SHIFT;
+       if (!kvm_vmx->ept_identity_map_addr)
+               kvm_vmx->ept_identity_map_addr = VMX_EPT_IDENTITY_PAGETABLE_ADDR;
+       identity_map_pfn = kvm_vmx->ept_identity_map_addr >> PAGE_SHIFT;
  
         r = __x86_set_memory_region(kvm, IDENTITY_PAGETABLE_PRIVATE_MEMSLOT,
-                                   kvm->arch.ept_identity_map_addr, PAGE_SIZE);
+                                   kvm_vmx->ept_identity_map_addr, PAGE_SIZE);
         if (r < 0)
                 goto out2;
  
@@ -5008,7 +5044,7 @@ static int init_rmode_identity_map(struct kvm *kvm)
                 if (r < 0)
                         goto out;
         }
-       kvm->arch.ept_identity_pagetable_done = true;
+       kvm_vmx->ept_identity_pagetable_done = true;
  
  out:
         srcu_read_unlock(&kvm->srcu, idx);
@@ -5544,6 +5580,11 @@ static u32 vmx_exec_control(struct vcpu_vmx *vmx)
                 exec_control |= CPU_BASED_CR3_STORE_EXITING |
                                 CPU_BASED_CR3_LOAD_EXITING  |
                                 CPU_BASED_INVLPG_EXITING;
+       if (kvm_mwait_in_guest(vmx->vcpu.kvm))
+               exec_control &= ~(CPU_BASED_MWAIT_EXITING |
+                               CPU_BASED_MONITOR_EXITING);
+       if (kvm_hlt_in_guest(vmx->vcpu.kvm))
+               exec_control &= ~CPU_BASED_HLT_EXITING;
         return exec_control;
  }
  
@@ -5577,7 +5618,7 @@ static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx)
         }
         if (!enable_unrestricted_guest)
                 exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST;
-       if (!ple_gap)
+       if (kvm_pause_in_guest(vmx->vcpu.kvm))
                 exec_control &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING;
         if (!kvm_vcpu_apicv_active(vcpu))
                 exec_control &= ~(SECONDARY_EXEC_APIC_REGISTER_VIRT |
@@ -5740,7 +5781,7 @@ static void vmx_vcpu_setup(struct vcpu_vmx *vmx)
                 vmcs_write64(POSTED_INTR_DESC_ADDR, __pa((&vmx->pi_desc)));
         }
  
-       if (ple_gap) {
+       if (!kvm_pause_in_guest(vmx->vcpu.kvm)) {
                 vmcs_write32(PLE_GAP, ple_gap);
                 vmx->ple_window = ple_window;
                 vmx->ple_window_dirty = true;
@@ -5905,6 +5946,8 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
         update_exception_bitmap(vcpu);
  
         vpid_sync_context(vmx->vpid);
+       if (init_event)
+               vmx_clear_hlt(vcpu);
  }
  
  /*
@@ -5975,6 +6018,8 @@ static void vmx_inject_irq(struct kvm_vcpu *vcpu)
         } else
                 intr |= INTR_TYPE_EXT_INTR;
         vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr);
+
+       vmx_clear_hlt(vcpu);
  }
  
  static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
@@ -6005,6 +6050,8 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
  
         vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
                         INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR);
+
+       vmx_clear_hlt(vcpu);
  }
  
  static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu)
@@ -6074,10 +6121,16 @@ static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr)
                                     PAGE_SIZE * 3);
         if (ret)
                 return ret;
-       kvm->arch.tss_addr = addr;
+       to_kvm_vmx(kvm)->tss_addr = addr;
         return init_rmode_tss(kvm);
  }
  
+static int vmx_set_identity_map_addr(struct kvm *kvm, u64 ident_addr)
+{
+       to_kvm_vmx(kvm)->ept_identity_map_addr = ident_addr;
+       return 0;
+}
+
  static bool rmode_exception(struct kvm_vcpu *vcpu, int vec)
  {
         switch (vec) {
@@ -7183,7 +7236,7 @@ static __exit void hardware_unsetup(void)
   */
  static int handle_pause(struct kvm_vcpu *vcpu)
  {
-       if (ple_gap)
+       if (!kvm_pause_in_guest(vcpu->kvm))
                 grow_ple_window(vcpu);
  
         /*
@@ -9239,9 +9292,9 @@ static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx)
  
         /* We need to handle NMIs before interrupts are enabled */
         if (is_nmi(exit_intr_info)) {
-               kvm_before_handle_nmi(&vmx->vcpu);
+               kvm_before_interrupt(&vmx->vcpu);
                 asm("int $2");
-               kvm_after_handle_nmi(&vmx->vcpu);
+               kvm_after_interrupt(&vmx->vcpu);
         }
  }
  
@@ -9731,6 +9784,17 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
  }
  STACK_FRAME_NON_STANDARD(vmx_vcpu_run);
  
+static struct kvm *vmx_vm_alloc(void)
+{
+       struct kvm_vmx *kvm_vmx = kzalloc(sizeof(struct kvm_vmx), GFP_KERNEL);
+       return &kvm_vmx->kvm;
+}
+
+static void vmx_vm_free(struct kvm *kvm)
+{
+       kfree(to_kvm_vmx(kvm));
+}
+
  static void vmx_switch_vmcs(struct kvm_vcpu *vcpu, struct loaded_vmcs *vmcs)
  {
         struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -9879,6 +9943,13 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
         return ERR_PTR(err);
  }
  
+static int vmx_vm_init(struct kvm *kvm)
+{
+       if (!ple_gap)
+               kvm->arch.pause_in_guest = true;
+       return 0;
+}
+
  static void __init vmx_check_processor_compat(void *rtn)
  {
         struct vmcs_config vmcs_conf;
@@ -11020,8 +11091,10 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
          * which means L1 attempted VMEntry to L2 with invalid state.
          * Fail the VMEntry.
          */
-       if (vmx->emulation_required)
+       if (vmx->emulation_required) {
+               *entry_failure_code = ENTRY_FAIL_DEFAULT;
                 return 1;
+       }
  
         /* Shadow page tables on either EPT or shadow page tables. */
         if (nested_vmx_load_cr3(vcpu, vmcs12->guest_cr3, nested_cpu_has_ept(vmcs12),
@@ -12028,7 +12101,7 @@ static void vmx_cancel_hv_timer(struct kvm_vcpu *vcpu)
  
  static void vmx_sched_in(struct kvm_vcpu *vcpu, int cpu)
  {
-       if (ple_gap)
+       if (!kvm_pause_in_guest(vcpu->kvm))
                 shrink_ple_window(vcpu);
  }
  
@@ -12346,6 +12419,7 @@ static int vmx_pre_enter_smm(struct kvm_vcpu *vcpu, char *smstate)
  
         vmx->nested.smm.vmxon = vmx->nested.vmxon;
         vmx->nested.vmxon = false;
+       vmx_clear_hlt(vcpu);
         return 0;
  }
  
@@ -12387,6 +12461,10 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
         .cpu_has_accelerated_tpr = report_flexpriority,
         .cpu_has_high_real_mode_segbase = vmx_has_high_real_mode_segbase,
  
+       .vm_init = vmx_vm_init,
+       .vm_alloc = vmx_vm_alloc,
+       .vm_free = vmx_vm_free,
+
         .vcpu_create = vmx_create_vcpu,
         .vcpu_free = vmx_free_vcpu,
         .vcpu_reset = vmx_vcpu_reset,
@@ -12454,6 +12532,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
         .deliver_posted_interrupt = vmx_deliver_posted_interrupt,
  
         .set_tss_addr = vmx_set_tss_addr,
+       .set_identity_map_addr = vmx_set_identity_map_addr,
         .get_tdp_level = get_ept_level,
         .get_mt_mask = vmx_get_mt_mask,