]> asedeno.scripts.mit.edu Git - linux.git/blobdiff - arch/x86/kvm/vmx.c
KVM: async_pf: Force a nested vmexit if the injected #PF is async_pf
[linux.git] / arch / x86 / kvm / vmx.c
index f76efad248aba0dc02bce77a4cd984343d181d79..5a3bb1a697a226e68874c5a2a49a57c946822be3 100644 (file)
@@ -2422,28 +2422,41 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
  * KVM wants to inject page-faults which it got to the guest. This function
  * checks whether in a nested guest, we need to inject them to L1 or L2.
  */
-static int nested_vmx_check_exception(struct kvm_vcpu *vcpu, unsigned nr)
+static int nested_vmx_check_exception(struct kvm_vcpu *vcpu)
 {
        struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
+       unsigned int nr = vcpu->arch.exception.nr;
 
-       if (!(vmcs12->exception_bitmap & (1u << nr)))
+       if (!((vmcs12->exception_bitmap & (1u << nr)) ||
+               (nr == PF_VECTOR && vcpu->arch.exception.nested_apf)))
                return 0;
 
+       if (vcpu->arch.exception.nested_apf) {
+               vmcs_write32(VM_EXIT_INTR_ERROR_CODE, vcpu->arch.exception.error_code);
+               nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
+                       PF_VECTOR | INTR_TYPE_HARD_EXCEPTION |
+                       INTR_INFO_DELIVER_CODE_MASK | INTR_INFO_VALID_MASK,
+                       vcpu->arch.apf.nested_apf_token);
+               return 1;
+       }
+
        nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
                          vmcs_read32(VM_EXIT_INTR_INFO),
                          vmcs_readl(EXIT_QUALIFICATION));
        return 1;
 }
 
-static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
-                               bool has_error_code, u32 error_code,
-                               bool reinject)
+static void vmx_queue_exception(struct kvm_vcpu *vcpu)
 {
        struct vcpu_vmx *vmx = to_vmx(vcpu);
+       unsigned nr = vcpu->arch.exception.nr;
+       bool has_error_code = vcpu->arch.exception.has_error_code;
+       bool reinject = vcpu->arch.exception.reinject;
+       u32 error_code = vcpu->arch.exception.error_code;
        u32 intr_info = nr | INTR_INFO_VALID_MASK;
 
        if (!reinject && is_guest_mode(vcpu) &&
-           nested_vmx_check_exception(vcpu, nr))
+           nested_vmx_check_exception(vcpu))
                return;
 
        if (has_error_code) {
@@ -3764,6 +3777,25 @@ static void free_kvm_area(void)
        }
 }
 
+enum vmcs_field_type {
+       VMCS_FIELD_TYPE_U16 = 0,
+       VMCS_FIELD_TYPE_U64 = 1,
+       VMCS_FIELD_TYPE_U32 = 2,
+       VMCS_FIELD_TYPE_NATURAL_WIDTH = 3
+};
+
+static inline int vmcs_field_type(unsigned long field)
+{
+       if (0x1 & field)        /* the *_HIGH fields are all 32 bit */
+               return VMCS_FIELD_TYPE_U32;
+       return (field >> 13) & 0x3 ;
+}
+
+static inline int vmcs_field_readonly(unsigned long field)
+{
+       return (((field >> 10) & 0x3) == 1);
+}
+
 static void init_vmcs_shadow_fields(void)
 {
        int i, j;
@@ -3789,14 +3821,22 @@ static void init_vmcs_shadow_fields(void)
 
        /* shadowed fields guest access without vmexit */
        for (i = 0; i < max_shadow_read_write_fields; i++) {
-               clear_bit(shadow_read_write_fields[i],
-                         vmx_vmwrite_bitmap);
-               clear_bit(shadow_read_write_fields[i],
-                         vmx_vmread_bitmap);
+               unsigned long field = shadow_read_write_fields[i];
+
+               clear_bit(field, vmx_vmwrite_bitmap);
+               clear_bit(field, vmx_vmread_bitmap);
+               if (vmcs_field_type(field) == VMCS_FIELD_TYPE_U64) {
+                       clear_bit(field + 1, vmx_vmwrite_bitmap);
+                       clear_bit(field + 1, vmx_vmread_bitmap);
+               }
+       }
+       for (i = 0; i < max_shadow_read_only_fields; i++) {
+               unsigned long field = shadow_read_only_fields[i];
+
+               clear_bit(field, vmx_vmread_bitmap);
+               if (vmcs_field_type(field) == VMCS_FIELD_TYPE_U64)
+                       clear_bit(field + 1, vmx_vmread_bitmap);
        }
-       for (i = 0; i < max_shadow_read_only_fields; i++)
-               clear_bit(shadow_read_only_fields[i],
-                         vmx_vmread_bitmap);
 }
 
 static __init int alloc_kvm_area(void)
@@ -4634,6 +4674,11 @@ static bool guest_state_valid(struct kvm_vcpu *vcpu)
        return true;
 }
 
+static bool page_address_valid(struct kvm_vcpu *vcpu, gpa_t gpa)
+{
+       return PAGE_ALIGNED(gpa) && !(gpa >> cpuid_maxphyaddr(vcpu));
+}
+
 static int init_rmode_tss(struct kvm *kvm)
 {
        gfn_t fn;
@@ -5664,14 +5709,11 @@ static int handle_exception(struct kvm_vcpu *vcpu)
        }
 
        if (is_page_fault(intr_info)) {
-               /* EPT won't cause page fault directly */
-               BUG_ON(enable_ept);
                cr2 = vmcs_readl(EXIT_QUALIFICATION);
-               trace_kvm_page_fault(cr2, error_code);
-
-               if (kvm_event_needs_reinjection(vcpu))
-                       kvm_mmu_unprotect_page_virt(vcpu, cr2);
-               return kvm_mmu_page_fault(vcpu, cr2, error_code, NULL, 0);
+               /* EPT won't cause page fault directly */
+               WARN_ON_ONCE(!vcpu->arch.apf.host_apf_reason && enable_ept);
+               return kvm_handle_page_fault(vcpu, error_code, cr2, NULL, 0,
+                               true);
        }
 
        ex_no = intr_info & INTR_INFO_VECTOR_MASK;
@@ -7214,25 +7256,6 @@ static int handle_vmresume(struct kvm_vcpu *vcpu)
        return nested_vmx_run(vcpu, false);
 }
 
-enum vmcs_field_type {
-       VMCS_FIELD_TYPE_U16 = 0,
-       VMCS_FIELD_TYPE_U64 = 1,
-       VMCS_FIELD_TYPE_U32 = 2,
-       VMCS_FIELD_TYPE_NATURAL_WIDTH = 3
-};
-
-static inline int vmcs_field_type(unsigned long field)
-{
-       if (0x1 & field)        /* the *_HIGH fields are all 32 bit */
-               return VMCS_FIELD_TYPE_U32;
-       return (field >> 13) & 0x3 ;
-}
-
-static inline int vmcs_field_readonly(unsigned long field)
-{
-       return (((field >> 10) & 0x3) == 1);
-}
-
 /*
  * Read a vmcs12 field. Since these can have varying lengths and we return
  * one type, we chose the biggest type (u64) and zero-extend the return value
@@ -8418,9 +8441,15 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
                        exit_reason != EXIT_REASON_TASK_SWITCH)) {
                vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
                vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_DELIVERY_EV;
-               vcpu->run->internal.ndata = 2;
+               vcpu->run->internal.ndata = 3;
                vcpu->run->internal.data[0] = vectoring_info;
                vcpu->run->internal.data[1] = exit_reason;
+               vcpu->run->internal.data[2] = vcpu->arch.exit_qualification;
+               if (exit_reason == EXIT_REASON_EPT_MISCONFIG) {
+                       vcpu->run->internal.ndata++;
+                       vcpu->run->internal.data[3] =
+                               vmcs_read64(GUEST_PHYSICAL_ADDRESS);
+               }
                return 0;
        }
 
@@ -8611,17 +8640,24 @@ static void vmx_apicv_post_state_restore(struct kvm_vcpu *vcpu)
 
 static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx)
 {
-       u32 exit_intr_info;
+       u32 exit_intr_info = 0;
+       u16 basic_exit_reason = (u16)vmx->exit_reason;
 
-       if (!(vmx->exit_reason == EXIT_REASON_MCE_DURING_VMENTRY
-             || vmx->exit_reason == EXIT_REASON_EXCEPTION_NMI))
+       if (!(basic_exit_reason == EXIT_REASON_MCE_DURING_VMENTRY
+             || basic_exit_reason == EXIT_REASON_EXCEPTION_NMI))
                return;
 
-       vmx->exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
-       exit_intr_info = vmx->exit_intr_info;
+       if (!(vmx->exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY))
+               exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
+       vmx->exit_intr_info = exit_intr_info;
+
+       /* if exit due to PF check for async PF */
+       if (is_page_fault(exit_intr_info))
+               vmx->vcpu.arch.apf.host_apf_reason = kvm_read_and_reset_pf_reason();
 
        /* Handle machine checks before interrupts are enabled */
-       if (is_machine_check(exit_intr_info))
+       if (basic_exit_reason == EXIT_REASON_MCE_DURING_VMENTRY ||
+           is_machine_check(exit_intr_info))
                kvm_machine_check();
 
        /* We need to handle NMIs before interrupts are enabled */
@@ -9589,23 +9625,26 @@ static void vmx_start_preemption_timer(struct kvm_vcpu *vcpu)
                      ns_to_ktime(preemption_timeout), HRTIMER_MODE_REL);
 }
 
+static int nested_vmx_check_io_bitmap_controls(struct kvm_vcpu *vcpu,
+                                              struct vmcs12 *vmcs12)
+{
+       if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS))
+               return 0;
+
+       if (!page_address_valid(vcpu, vmcs12->io_bitmap_a) ||
+           !page_address_valid(vcpu, vmcs12->io_bitmap_b))
+               return -EINVAL;
+
+       return 0;
+}
+
 static int nested_vmx_check_msr_bitmap_controls(struct kvm_vcpu *vcpu,
                                                struct vmcs12 *vmcs12)
 {
-       int maxphyaddr;
-       u64 addr;
-
        if (!nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS))
                return 0;
 
-       if (vmcs12_read_any(vcpu, MSR_BITMAP, &addr)) {
-               WARN_ON(1);
-               return -EINVAL;
-       }
-       maxphyaddr = cpuid_maxphyaddr(vcpu);
-
-       if (!PAGE_ALIGNED(vmcs12->msr_bitmap) ||
-          ((addr + PAGE_SIZE) >> maxphyaddr))
+       if (!page_address_valid(vcpu, vmcs12->msr_bitmap))
                return -EINVAL;
 
        return 0;
@@ -10293,6 +10332,9 @@ static int check_vmentry_prereqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
            vmcs12->guest_activity_state != GUEST_ACTIVITY_HLT)
                return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
 
+       if (nested_vmx_check_io_bitmap_controls(vcpu, vmcs12))
+               return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
+
        if (nested_vmx_check_msr_bitmap_controls(vcpu, vmcs12))
                return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
 
@@ -10429,8 +10471,6 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry)
                return 1;
        }
 
-       vmcs12->launch_state = 1;
-
        /*
         * Note no nested_vmx_succeed or nested_vmx_fail here. At this point
         * we are no longer running L1, and VMLAUNCH/VMRESUME has not yet
@@ -10804,6 +10844,8 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
        vmcs12->vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
 
        if (!(vmcs12->vm_exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY)) {
+               vmcs12->launch_state = 1;
+
                /* vm_entry_intr_info_field is cleared on exit. Emulate this
                 * instead of reading the real value. */
                vmcs12->vm_entry_intr_info_field &= ~INTR_INFO_VALID_MASK;