]> asedeno.scripts.mit.edu Git - linux.git/blobdiff - arch/x86/kvm/x86.c
KVM: VMX: Handle single-step #DB for EMULTYPE_SKIP on EPT misconfig
[linux.git] / arch / x86 / kvm / x86.c
index dfd64124356806325022e22dd8d149db6826803b..c38d247dbffbe4d8b799554bea926a32aedb2b5d 100644 (file)
@@ -1145,6 +1145,42 @@ static u32 msrs_to_save[] = {
        MSR_IA32_RTIT_ADDR1_A, MSR_IA32_RTIT_ADDR1_B,
        MSR_IA32_RTIT_ADDR2_A, MSR_IA32_RTIT_ADDR2_B,
        MSR_IA32_RTIT_ADDR3_A, MSR_IA32_RTIT_ADDR3_B,
+       MSR_ARCH_PERFMON_FIXED_CTR0, MSR_ARCH_PERFMON_FIXED_CTR1,
+       MSR_ARCH_PERFMON_FIXED_CTR0 + 2, MSR_ARCH_PERFMON_FIXED_CTR0 + 3,
+       MSR_CORE_PERF_FIXED_CTR_CTRL, MSR_CORE_PERF_GLOBAL_STATUS,
+       MSR_CORE_PERF_GLOBAL_CTRL, MSR_CORE_PERF_GLOBAL_OVF_CTRL,
+       MSR_ARCH_PERFMON_PERFCTR0, MSR_ARCH_PERFMON_PERFCTR1,
+       MSR_ARCH_PERFMON_PERFCTR0 + 2, MSR_ARCH_PERFMON_PERFCTR0 + 3,
+       MSR_ARCH_PERFMON_PERFCTR0 + 4, MSR_ARCH_PERFMON_PERFCTR0 + 5,
+       MSR_ARCH_PERFMON_PERFCTR0 + 6, MSR_ARCH_PERFMON_PERFCTR0 + 7,
+       MSR_ARCH_PERFMON_PERFCTR0 + 8, MSR_ARCH_PERFMON_PERFCTR0 + 9,
+       MSR_ARCH_PERFMON_PERFCTR0 + 10, MSR_ARCH_PERFMON_PERFCTR0 + 11,
+       MSR_ARCH_PERFMON_PERFCTR0 + 12, MSR_ARCH_PERFMON_PERFCTR0 + 13,
+       MSR_ARCH_PERFMON_PERFCTR0 + 14, MSR_ARCH_PERFMON_PERFCTR0 + 15,
+       MSR_ARCH_PERFMON_PERFCTR0 + 16, MSR_ARCH_PERFMON_PERFCTR0 + 17,
+       MSR_ARCH_PERFMON_PERFCTR0 + 18, MSR_ARCH_PERFMON_PERFCTR0 + 19,
+       MSR_ARCH_PERFMON_PERFCTR0 + 20, MSR_ARCH_PERFMON_PERFCTR0 + 21,
+       MSR_ARCH_PERFMON_PERFCTR0 + 22, MSR_ARCH_PERFMON_PERFCTR0 + 23,
+       MSR_ARCH_PERFMON_PERFCTR0 + 24, MSR_ARCH_PERFMON_PERFCTR0 + 25,
+       MSR_ARCH_PERFMON_PERFCTR0 + 26, MSR_ARCH_PERFMON_PERFCTR0 + 27,
+       MSR_ARCH_PERFMON_PERFCTR0 + 28, MSR_ARCH_PERFMON_PERFCTR0 + 29,
+       MSR_ARCH_PERFMON_PERFCTR0 + 30, MSR_ARCH_PERFMON_PERFCTR0 + 31,
+       MSR_ARCH_PERFMON_EVENTSEL0, MSR_ARCH_PERFMON_EVENTSEL1,
+       MSR_ARCH_PERFMON_EVENTSEL0 + 2, MSR_ARCH_PERFMON_EVENTSEL0 + 3,
+       MSR_ARCH_PERFMON_EVENTSEL0 + 4, MSR_ARCH_PERFMON_EVENTSEL0 + 5,
+       MSR_ARCH_PERFMON_EVENTSEL0 + 6, MSR_ARCH_PERFMON_EVENTSEL0 + 7,
+       MSR_ARCH_PERFMON_EVENTSEL0 + 8, MSR_ARCH_PERFMON_EVENTSEL0 + 9,
+       MSR_ARCH_PERFMON_EVENTSEL0 + 10, MSR_ARCH_PERFMON_EVENTSEL0 + 11,
+       MSR_ARCH_PERFMON_EVENTSEL0 + 12, MSR_ARCH_PERFMON_EVENTSEL0 + 13,
+       MSR_ARCH_PERFMON_EVENTSEL0 + 14, MSR_ARCH_PERFMON_EVENTSEL0 + 15,
+       MSR_ARCH_PERFMON_EVENTSEL0 + 16, MSR_ARCH_PERFMON_EVENTSEL0 + 17,
+       MSR_ARCH_PERFMON_EVENTSEL0 + 18, MSR_ARCH_PERFMON_EVENTSEL0 + 19,
+       MSR_ARCH_PERFMON_EVENTSEL0 + 20, MSR_ARCH_PERFMON_EVENTSEL0 + 21,
+       MSR_ARCH_PERFMON_EVENTSEL0 + 22, MSR_ARCH_PERFMON_EVENTSEL0 + 23,
+       MSR_ARCH_PERFMON_EVENTSEL0 + 24, MSR_ARCH_PERFMON_EVENTSEL0 + 25,
+       MSR_ARCH_PERFMON_EVENTSEL0 + 26, MSR_ARCH_PERFMON_EVENTSEL0 + 27,
+       MSR_ARCH_PERFMON_EVENTSEL0 + 28, MSR_ARCH_PERFMON_EVENTSEL0 + 29,
+       MSR_ARCH_PERFMON_EVENTSEL0 + 30, MSR_ARCH_PERFMON_EVENTSEL0 + 31,
 };
 
 static unsigned num_msrs_to_save;
@@ -3169,7 +3205,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
        case KVM_CAP_HYPERV_EVENTFD:
        case KVM_CAP_HYPERV_TLBFLUSH:
        case KVM_CAP_HYPERV_SEND_IPI:
-       case KVM_CAP_HYPERV_ENLIGHTENED_VMCS:
        case KVM_CAP_HYPERV_CPUID:
        case KVM_CAP_PCI_SEGMENT:
        case KVM_CAP_DEBUGREGS:
@@ -3246,6 +3281,12 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
                r = kvm_x86_ops->get_nested_state ?
                        kvm_x86_ops->get_nested_state(NULL, NULL, 0) : 0;
                break;
+       case KVM_CAP_HYPERV_DIRECT_TLBFLUSH:
+               r = kvm_x86_ops->enable_direct_tlbflush != NULL;
+               break;
+       case KVM_CAP_HYPERV_ENLIGHTENED_VMCS:
+               r = kvm_x86_ops->nested_enable_evmcs != NULL;
+               break;
        default:
                break;
        }
@@ -4019,6 +4060,11 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
                                r = -EFAULT;
                }
                return r;
+       case KVM_CAP_HYPERV_DIRECT_TLBFLUSH:
+               if (!kvm_x86_ops->enable_direct_tlbflush)
+                       return -ENOTTY;
+
+               return kvm_x86_ops->enable_direct_tlbflush(vcpu);
 
        default:
                return -EINVAL;
@@ -5051,6 +5097,11 @@ static void kvm_init_msr_list(void)
        u32 dummy[2];
        unsigned i, j;
 
+       BUILD_BUG_ON_MSG(INTEL_PMC_MAX_FIXED != 4,
+                        "Please update the fixed PMCs in msrs_to_save[]");
+       BUILD_BUG_ON_MSG(INTEL_PMC_MAX_GENERIC != 32,
+                        "Please update the generic perfctr/eventsel MSRs in msrs_to_save[]");
+
        for (i = j = 0; i < ARRAY_SIZE(msrs_to_save); i++) {
                if (rdmsr_safe(msrs_to_save[i], &dummy[0], &dummy[1]) < 0)
                        continue;
@@ -5389,7 +5440,6 @@ EXPORT_SYMBOL_GPL(kvm_write_guest_virt_system);
 int handle_ud(struct kvm_vcpu *vcpu)
 {
        int emul_type = EMULTYPE_TRAP_UD;
-       enum emulation_result er;
        char sig[5]; /* ud2; .ascii "kvm" */
        struct x86_exception e;
 
@@ -5398,15 +5448,10 @@ int handle_ud(struct kvm_vcpu *vcpu)
                                sig, sizeof(sig), &e) == 0 &&
            memcmp(sig, "\xf\xbkvm", sizeof(sig)) == 0) {
                kvm_rip_write(vcpu, kvm_rip_read(vcpu) + sizeof(sig));
-               emul_type = 0;
+               emul_type = EMULTYPE_TRAP_UD_FORCED;
        }
 
-       er = kvm_emulate_instruction(vcpu, emul_type);
-       if (er == EMULATE_USER_EXIT)
-               return 0;
-       if (er != EMULATE_DONE)
-               kvm_queue_exception(vcpu, UD_VECTOR);
-       return 1;
+       return kvm_emulate_instruction(vcpu, emul_type);
 }
 EXPORT_SYMBOL_GPL(handle_ud);
 
@@ -6228,7 +6273,7 @@ static void init_emulate_ctxt(struct kvm_vcpu *vcpu)
        vcpu->arch.emulate_regs_need_sync_from_vcpu = false;
 }
 
-int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip)
+void kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip)
 {
        struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
        int ret;
@@ -6240,37 +6285,43 @@ int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip)
        ctxt->_eip = ctxt->eip + inc_eip;
        ret = emulate_int_real(ctxt, irq);
 
-       if (ret != X86EMUL_CONTINUE)
-               return EMULATE_FAIL;
-
-       ctxt->eip = ctxt->_eip;
-       kvm_rip_write(vcpu, ctxt->eip);
-       kvm_set_rflags(vcpu, ctxt->eflags);
-
-       return EMULATE_DONE;
+       if (ret != X86EMUL_CONTINUE) {
+               kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
+       } else {
+               ctxt->eip = ctxt->_eip;
+               kvm_rip_write(vcpu, ctxt->eip);
+               kvm_set_rflags(vcpu, ctxt->eflags);
+       }
 }
 EXPORT_SYMBOL_GPL(kvm_inject_realmode_interrupt);
 
 static int handle_emulation_failure(struct kvm_vcpu *vcpu, int emulation_type)
 {
-       int r = EMULATE_DONE;
-
        ++vcpu->stat.insn_emulation_fail;
        trace_kvm_emulate_insn_failed(vcpu);
 
-       if (emulation_type & EMULTYPE_NO_UD_ON_FAIL)
-               return EMULATE_FAIL;
+       if (emulation_type & EMULTYPE_VMWARE_GP) {
+               kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
+               return 1;
+       }
 
-       if (!is_guest_mode(vcpu) && kvm_x86_ops->get_cpl(vcpu) == 0) {
+       if (emulation_type & EMULTYPE_SKIP) {
                vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
                vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
                vcpu->run->internal.ndata = 0;
-               r = EMULATE_USER_EXIT;
+               return 0;
        }
 
        kvm_queue_exception(vcpu, UD_VECTOR);
 
-       return r;
+       if (!is_guest_mode(vcpu) && kvm_x86_ops->get_cpl(vcpu) == 0) {
+               vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+               vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
+               vcpu->run->internal.ndata = 0;
+               return 0;
+       }
+
+       return 1;
 }
 
 static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t cr2,
@@ -6425,7 +6476,7 @@ static int kvm_vcpu_check_hw_bp(unsigned long addr, u32 type, u32 dr7,
        return dr6;
 }
 
-static void kvm_vcpu_do_singlestep(struct kvm_vcpu *vcpu, int *r)
+static int kvm_vcpu_do_singlestep(struct kvm_vcpu *vcpu)
 {
        struct kvm_run *kvm_run = vcpu->run;
 
@@ -6434,10 +6485,10 @@ static void kvm_vcpu_do_singlestep(struct kvm_vcpu *vcpu, int *r)
                kvm_run->debug.arch.pc = vcpu->arch.singlestep_rip;
                kvm_run->debug.arch.exception = DB_VECTOR;
                kvm_run->exit_reason = KVM_EXIT_DEBUG;
-               *r = EMULATE_USER_EXIT;
-       } else {
-               kvm_queue_exception_p(vcpu, DB_VECTOR, DR6_BS);
+               return 0;
        }
+       kvm_queue_exception_p(vcpu, DB_VECTOR, DR6_BS);
+       return 1;
 }
 
 int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu)
@@ -6446,7 +6497,7 @@ int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu)
        int r;
 
        r = kvm_x86_ops->skip_emulated_instruction(vcpu);
-       if (unlikely(r != EMULATE_DONE))
+       if (unlikely(!r))
                return 0;
 
        /*
@@ -6458,8 +6509,8 @@ int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu)
         * that sets the TF flag".
         */
        if (unlikely(rflags & X86_EFLAGS_TF))
-               kvm_vcpu_do_singlestep(vcpu, &r);
-       return r == EMULATE_DONE;
+               r = kvm_vcpu_do_singlestep(vcpu);
+       return r;
 }
 EXPORT_SYMBOL_GPL(kvm_skip_emulated_instruction);
 
@@ -6478,7 +6529,7 @@ static bool kvm_vcpu_check_breakpoint(struct kvm_vcpu *vcpu, int *r)
                        kvm_run->debug.arch.pc = eip;
                        kvm_run->debug.arch.exception = DB_VECTOR;
                        kvm_run->exit_reason = KVM_EXIT_DEBUG;
-                       *r = EMULATE_USER_EXIT;
+                       *r = 0;
                        return true;
                }
        }
@@ -6494,7 +6545,7 @@ static bool kvm_vcpu_check_breakpoint(struct kvm_vcpu *vcpu, int *r)
                        vcpu->arch.dr6 &= ~DR_TRAP_BITS;
                        vcpu->arch.dr6 |= dr6 | DR6_RTM;
                        kvm_queue_exception(vcpu, DB_VECTOR);
-                       *r = EMULATE_DONE;
+                       *r = 1;
                        return true;
                }
        }
@@ -6578,11 +6629,14 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
                trace_kvm_emulate_insn_start(vcpu);
                ++vcpu->stat.insn_emulation;
                if (r != EMULATION_OK)  {
-                       if (emulation_type & EMULTYPE_TRAP_UD)
-                               return EMULATE_FAIL;
+                       if ((emulation_type & EMULTYPE_TRAP_UD) ||
+                           (emulation_type & EMULTYPE_TRAP_UD_FORCED)) {
+                               kvm_queue_exception(vcpu, UD_VECTOR);
+                               return 1;
+                       }
                        if (reexecute_instruction(vcpu, cr2, write_fault_to_spt,
                                                emulation_type))
-                               return EMULATE_DONE;
+                               return 1;
                        if (ctxt->have_exception) {
                                /*
                                 * #UD should result in just EMULATION_FAILED, and trap-like
@@ -6591,28 +6645,32 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
                                WARN_ON_ONCE(ctxt->exception.vector == UD_VECTOR ||
                                             exception_type(ctxt->exception.vector) == EXCPT_TRAP);
                                inject_emulated_exception(vcpu);
-                               return EMULATE_DONE;
+                               return 1;
                        }
-                       if (emulation_type & EMULTYPE_SKIP)
-                               return EMULATE_FAIL;
                        return handle_emulation_failure(vcpu, emulation_type);
                }
        }
 
-       if ((emulation_type & EMULTYPE_VMWARE) &&
-           !is_vmware_backdoor_opcode(ctxt))
-               return EMULATE_FAIL;
+       if ((emulation_type & EMULTYPE_VMWARE_GP) &&
+           !is_vmware_backdoor_opcode(ctxt)) {
+               kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
+               return 1;
+       }
 
+       /*
+        * Note, EMULTYPE_SKIP is intended for use *only* by vendor callbacks
+        * for kvm_skip_emulated_instruction().  The caller is responsible for
+        * updating interruptibility state and injecting single-step #DBs.
+        */
        if (emulation_type & EMULTYPE_SKIP) {
                kvm_rip_write(vcpu, ctxt->_eip);
                if (ctxt->eflags & X86_EFLAGS_RF)
                        kvm_set_rflags(vcpu, ctxt->eflags & ~X86_EFLAGS_RF);
-               kvm_x86_ops->set_interrupt_shadow(vcpu, 0);
-               return EMULATE_DONE;
+               return 1;
        }
 
        if (retry_instruction(ctxt, cr2, emulation_type))
-               return EMULATE_DONE;
+               return 1;
 
        /* this is needed for vmware backdoor interface to work since it
           changes registers values  during IO operation */
@@ -6628,18 +6686,18 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
        r = x86_emulate_insn(ctxt);
 
        if (r == EMULATION_INTERCEPTED)
-               return EMULATE_DONE;
+               return 1;
 
        if (r == EMULATION_FAILED) {
                if (reexecute_instruction(vcpu, cr2, write_fault_to_spt,
                                        emulation_type))
-                       return EMULATE_DONE;
+                       return 1;
 
                return handle_emulation_failure(vcpu, emulation_type);
        }
 
        if (ctxt->have_exception) {
-               r = EMULATE_DONE;
+               r = 1;
                if (inject_emulated_exception(vcpu))
                        return r;
        } else if (vcpu->arch.pio.count) {
@@ -6650,16 +6708,18 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
                        writeback = false;
                        vcpu->arch.complete_userspace_io = complete_emulated_pio;
                }
-               r = EMULATE_USER_EXIT;
+               r = 0;
        } else if (vcpu->mmio_needed) {
+               ++vcpu->stat.mmio_exits;
+
                if (!vcpu->mmio_is_write)
                        writeback = false;
-               r = EMULATE_USER_EXIT;
+               r = 0;
                vcpu->arch.complete_userspace_io = complete_emulated_mmio;
        } else if (r == EMULATION_RESTART)
                goto restart;
        else
-               r = EMULATE_DONE;
+               r = 1;
 
        if (writeback) {
                unsigned long rflags = kvm_x86_ops->get_rflags(vcpu);
@@ -6668,8 +6728,8 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
                if (!ctxt->have_exception ||
                    exception_type(ctxt->exception.vector) == EXCPT_TRAP) {
                        kvm_rip_write(vcpu, ctxt->eip);
-                       if (r == EMULATE_DONE && ctxt->tf)
-                               kvm_vcpu_do_singlestep(vcpu, &r);
+                       if (r && ctxt->tf)
+                               r = kvm_vcpu_do_singlestep(vcpu);
                        __kvm_set_rflags(vcpu, ctxt->eflags);
                }
 
@@ -8263,12 +8323,11 @@ static int vcpu_run(struct kvm_vcpu *vcpu)
 static inline int complete_emulated_io(struct kvm_vcpu *vcpu)
 {
        int r;
+
        vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
        r = kvm_emulate_instruction(vcpu, EMULTYPE_NO_DECODE);
        srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
-       if (r != EMULATE_DONE)
-               return 0;
-       return 1;
+       return r;
 }
 
 static int complete_emulated_pio(struct kvm_vcpu *vcpu)
@@ -8636,14 +8695,17 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index,
 
        ret = emulator_task_switch(ctxt, tss_selector, idt_index, reason,
                                   has_error_code, error_code);
-
-       if (ret)
-               return EMULATE_FAIL;
+       if (ret) {
+               vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+               vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
+               vcpu->run->internal.ndata = 0;
+               return 0;
+       }
 
        kvm_rip_write(vcpu, ctxt->eip);
        kvm_set_rflags(vcpu, ctxt->eflags);
        kvm_make_request(KVM_REQ_EVENT, vcpu);
-       return EMULATE_DONE;
+       return 1;
 }
 EXPORT_SYMBOL_GPL(kvm_task_switch);
 
@@ -9690,8 +9752,13 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
         * Scan sptes if dirty logging has been stopped, dropping those
         * which can be collapsed into a single large-page spte.  Later
         * page faults will create the large-page sptes.
+        *
+        * There is no need to do this in any of the following cases:
+        * CREATE:      No dirty mappings will already exist.
+        * MOVE/DELETE: The old mappings will already have been cleaned up by
+        *              kvm_arch_flush_shadow_memslot()
         */
-       if ((change != KVM_MR_DELETE) &&
+       if (change == KVM_MR_FLAGS_ONLY &&
                (old->flags & KVM_MEM_LOG_DIRTY_PAGES) &&
                !(new->flags & KVM_MEM_LOG_DIRTY_PAGES))
                kvm_mmu_zap_collapsible_sptes(kvm, new);