KVM: x86: Don't attempt VMWare emulation on #GP with non-zero error code

[linux.git] / arch / x86 / kvm / vmx / vmx.c
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c

index c030c96fc81a817f6e11e3b1580aa907b8bc63f7..d1bac3cb2440f662cad424bce9d6e46855b44d41 100644 (file)
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -486,6 +486,35 @@ static int hv_remote_flush_tlb(struct kvm *kvm)
         return hv_remote_flush_tlb_with_range(kvm, NULL);
  }
  
+static int hv_enable_direct_tlbflush(struct kvm_vcpu *vcpu)
+{
+       struct hv_enlightened_vmcs *evmcs;
+       struct hv_partition_assist_pg **p_hv_pa_pg =
+                       &vcpu->kvm->arch.hyperv.hv_pa_pg;
+       /*
+        * Synthetic VM-Exit is not enabled in current code and so All
+        * evmcs in singe VM shares same assist page.
+        */
+       if (!*p_hv_pa_pg) {
+               *p_hv_pa_pg = kzalloc(PAGE_SIZE, GFP_KERNEL);
+               if (!*p_hv_pa_pg)
+                       return -ENOMEM;
+               pr_debug("KVM: Hyper-V: allocated PA_PG for %llx\n",
+                      (u64)&vcpu->kvm);
+       }
+
+       evmcs = (struct hv_enlightened_vmcs *)to_vmx(vcpu)->loaded_vmcs->vmcs;
+
+       evmcs->partition_assist_page =
+               __pa(*p_hv_pa_pg);
+       evmcs->hv_vm_id = (u64)vcpu->kvm;
+       evmcs->hv_enlightenments_control.nested_flush_hypercall = 1;
+
+       pr_debug("KVM: Hyper-V: enabled DIRECT flush for %llx\n",
+                (u64)vcpu->kvm);
+       return 0;
+}
+
  #endif /* IS_ENABLED(CONFIG_HYPERV) */
  
  /*
@@ -1472,8 +1501,11 @@ static int vmx_rtit_ctl_check(struct kvm_vcpu *vcpu, u64 data)
         return 0;
  }
  
-
-static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
+/*
+ * Returns an int to be compatible with SVM implementation (which can fail).
+ * Do not use directly, use skip_emulated_instruction() instead.
+ */
+static int __skip_emulated_instruction(struct kvm_vcpu *vcpu)
  {
         unsigned long rip;
  
@@ -1483,6 +1515,13 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
  
         /* skipping an emulated instruction also counts */
         vmx_set_interrupt_shadow(vcpu, 0);
+
+       return EMULATE_DONE;
+}
+
+static inline void skip_emulated_instruction(struct kvm_vcpu *vcpu)
+{
+       (void)__skip_emulated_instruction(vcpu);
  }
  
  static void vmx_clear_hlt(struct kvm_vcpu *vcpu)
@@ -4026,7 +4065,7 @@ static void ept_set_mmio_spte_mask(void)
          * of an EPT paging-structure entry is 110b (write/execute).
          */
         kvm_mmu_set_mmio_spte_mask(VMX_EPT_RWX_MASK,
-                                  VMX_EPT_MISCONFIG_WX_VALUE);
+                                  VMX_EPT_MISCONFIG_WX_VALUE, 0);
  }
  
  #define VMX_XSS_EXIT_BITMAP 0
@@ -4152,6 +4191,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
  
         vcpu->arch.microcode_version = 0x100000000ULL;
         vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val();
+       vmx->hv_deadline_tsc = -1;
         kvm_set_cr8(vcpu, 0);
  
         if (!init_event) {
@@ -4499,12 +4539,22 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu)
  
         if (!vmx->rmode.vm86_active && is_gp_fault(intr_info)) {
                 WARN_ON_ONCE(!enable_vmware_backdoor);
+
+               /*
+                * VMware backdoor emulation on #GP interception only handles
+                * IN{S}, OUT{S}, and RDPMC, none of which generate a non-zero
+                * error code on #GP.
+                */
+               if (error_code) {
+                       kvm_queue_exception_e(vcpu, GP_VECTOR, error_code);
+                       return 1;
+               }
                 er = kvm_emulate_instruction(vcpu,
                         EMULTYPE_VMWARE | EMULTYPE_NO_UD_ON_FAIL);
                 if (er == EMULATE_USER_EXIT)
                         return 0;
                 else if (er != EMULATE_DONE)
-                       kvm_queue_exception_e(vcpu, GP_VECTOR, error_code);
+                       kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
                 return 1;
         }
  
@@ -4856,41 +4906,12 @@ static int handle_cpuid(struct kvm_vcpu *vcpu)
  
  static int handle_rdmsr(struct kvm_vcpu *vcpu)
  {
-       u32 ecx = kvm_rcx_read(vcpu);
-       struct msr_data msr_info;
-
-       msr_info.index = ecx;
-       msr_info.host_initiated = false;
-       if (vmx_get_msr(vcpu, &msr_info)) {
-               trace_kvm_msr_read_ex(ecx);
-               kvm_inject_gp(vcpu, 0);
-               return 1;
-       }
-
-       trace_kvm_msr_read(ecx, msr_info.data);
-
-       kvm_rax_write(vcpu, msr_info.data & -1u);
-       kvm_rdx_write(vcpu, (msr_info.data >> 32) & -1u);
-       return kvm_skip_emulated_instruction(vcpu);
+       return kvm_emulate_rdmsr(vcpu);
  }
  
  static int handle_wrmsr(struct kvm_vcpu *vcpu)
  {
-       struct msr_data msr;
-       u32 ecx = kvm_rcx_read(vcpu);
-       u64 data = kvm_read_edx_eax(vcpu);
-
-       msr.data = data;
-       msr.index = ecx;
-       msr.host_initiated = false;
-       if (kvm_set_msr(vcpu, &msr) != 0) {
-               trace_kvm_msr_write_ex(ecx, data);
-               kvm_inject_gp(vcpu, 0);
-               return 1;
-       }
-
-       trace_kvm_msr_write(ecx, data);
-       return kvm_skip_emulated_instruction(vcpu);
+       return kvm_emulate_wrmsr(vcpu);
  }
  
  static int handle_tpr_below_threshold(struct kvm_vcpu *vcpu)
@@ -5190,7 +5211,6 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
                 err = kvm_emulate_instruction(vcpu, 0);
  
                 if (err == EMULATE_USER_EXIT) {
-                       ++vcpu->stat.mmio_exits;
                         ret = 0;
                         goto out;
                 }
@@ -5227,31 +5247,33 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
  static void grow_ple_window(struct kvm_vcpu *vcpu)
  {
         struct vcpu_vmx *vmx = to_vmx(vcpu);
-       int old = vmx->ple_window;
+       unsigned int old = vmx->ple_window;
  
         vmx->ple_window = __grow_ple_window(old, ple_window,
                                             ple_window_grow,
                                             ple_window_max);
  
-       if (vmx->ple_window != old)
+       if (vmx->ple_window != old) {
                 vmx->ple_window_dirty = true;
-
-       trace_kvm_ple_window_grow(vcpu->vcpu_id, vmx->ple_window, old);
+               trace_kvm_ple_window_update(vcpu->vcpu_id,
+                                           vmx->ple_window, old);
+       }
  }
  
  static void shrink_ple_window(struct kvm_vcpu *vcpu)
  {
         struct vcpu_vmx *vmx = to_vmx(vcpu);
-       int old = vmx->ple_window;
+       unsigned int old = vmx->ple_window;
  
         vmx->ple_window = __shrink_ple_window(old, ple_window,
                                               ple_window_shrink,
                                               ple_window);
  
-       if (vmx->ple_window != old)
+       if (vmx->ple_window != old) {
                 vmx->ple_window_dirty = true;
-
-       trace_kvm_ple_window_shrink(vcpu->vcpu_id, vmx->ple_window, old);
+               trace_kvm_ple_window_update(vcpu->vcpu_id,
+                                           vmx->ple_window, old);
+       }
  }
  
  /*
@@ -5887,8 +5909,13 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
         else {
                 vcpu_unimpl(vcpu, "vmx: unexpected exit reason 0x%x\n",
                                 exit_reason);
-               kvm_queue_exception(vcpu, UD_VECTOR);
-               return 1;
+               dump_vmcs();
+               vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+               vcpu->run->internal.suberror =
+                       KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON;
+               vcpu->run->internal.ndata = 1;
+               vcpu->run->internal.data[0] = exit_reason;
+               return 0;
         }
  }
  
@@ -6522,6 +6549,9 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
                 current_evmcs->hv_clean_fields |=
                         HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
  
+       if (static_branch_unlikely(&enable_evmcs))
+               current_evmcs->hv_vp_id = vcpu->arch.hyperv.vp_index;
+
         /* MSR_IA32_DEBUGCTLMSR is zeroed on vmexit. Restore it if needed */
         if (vmx->host_debugctlmsr)
                 update_debugctlmsr(vmx->host_debugctlmsr);
@@ -6589,6 +6619,7 @@ static struct kvm *vmx_vm_alloc(void)
  
  static void vmx_vm_free(struct kvm *kvm)
  {
+       kfree(kvm->arch.hyperv.hv_pa_pg);
         vfree(to_kvm_vmx(kvm));
  }
  
@@ -6615,6 +6646,9 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
         unsigned long *msr_bitmap;
         int cpu;
  
+       BUILD_BUG_ON_MSG(offsetof(struct vcpu_vmx, vcpu) != 0,
+               "struct kvm_vcpu must be at offset 0 for arch usercopy region");
+
         vmx = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL_ACCOUNT);
         if (!vmx)
                 return ERR_PTR(-ENOMEM);
@@ -7369,10 +7403,14 @@ static int vmx_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
                  * irqbalance to make the interrupts single-CPU.
                  *
                  * We will support full lowest-priority interrupt later.
+                *
+                * In addition, we can only inject generic interrupts using
+                * the PI mechanism, refuse to route others through it.
                  */
  
                 kvm_set_msi_irq(kvm, e, &irq);
-               if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu)) {
+               if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu) ||
+                   !kvm_irq_is_postable(&irq)) {
                         /*
                          * Make sure the IRTE is in remapped mode if
                          * we don't handle it in posted mode.
@@ -7474,6 +7512,11 @@ static bool vmx_need_emulation_on_page_fault(struct kvm_vcpu *vcpu)
         return false;
  }
  
+static bool vmx_apic_init_signal_blocked(struct kvm_vcpu *vcpu)
+{
+       return to_vmx(vcpu)->nested.vmxon;
+}
+
  static __init int hardware_setup(void)
  {
         unsigned long host_bndcfgs;
@@ -7705,7 +7748,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
  
         .run = vmx_vcpu_run,
         .handle_exit = vmx_handle_exit,
-       .skip_emulated_instruction = skip_emulated_instruction,
+       .skip_emulated_instruction = __skip_emulated_instruction,
         .set_interrupt_shadow = vmx_set_interrupt_shadow,
         .get_interrupt_shadow = vmx_get_interrupt_shadow,
         .patch_hypercall = vmx_patch_hypercall,
@@ -7799,6 +7842,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
         .nested_enable_evmcs = NULL,
         .nested_get_evmcs_version = NULL,
         .need_emulation_on_page_fault = vmx_need_emulation_on_page_fault,
+       .apic_init_signal_blocked = vmx_apic_init_signal_blocked,
  };
  
  static void vmx_cleanup_l1d_flush(void)
@@ -7835,6 +7879,7 @@ static void vmx_exit(void)
                         if (!vp_ap)
                                 continue;
  
+                       vp_ap->nested_control.features.directhypercall = 0;
                         vp_ap->current_nested_vmcs = 0;
                         vp_ap->enlighten_vmentry = 0;
                 }
@@ -7874,6 +7919,11 @@ static int __init vmx_init(void)
                         pr_info("KVM: vmx: using Hyper-V Enlightened VMCS\n");
                         static_branch_enable(&enable_evmcs);
                 }
+
+               if (ms_hyperv.nested_features & HV_X64_NESTED_DIRECT_FLUSH)
+                       vmx_x86_ops.enable_direct_tlbflush
+                               = hv_enable_direct_tlbflush;
+
         } else {
                 enlightened_vmcs = false;
         }