return hv_remote_flush_tlb_with_range(kvm, NULL);
}
+static int hv_enable_direct_tlbflush(struct kvm_vcpu *vcpu)
+{
+ struct hv_enlightened_vmcs *evmcs;
+ struct hv_partition_assist_pg **p_hv_pa_pg =
+ &vcpu->kvm->arch.hyperv.hv_pa_pg;
+ /*
+ * Synthetic VM-Exit is not enabled in current code and so All
+ * evmcs in singe VM shares same assist page.
+ */
+ if (!*p_hv_pa_pg) {
+ *p_hv_pa_pg = kzalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!*p_hv_pa_pg)
+ return -ENOMEM;
+ pr_debug("KVM: Hyper-V: allocated PA_PG for %llx\n",
+ (u64)&vcpu->kvm);
+ }
+
+ evmcs = (struct hv_enlightened_vmcs *)to_vmx(vcpu)->loaded_vmcs->vmcs;
+
+ evmcs->partition_assist_page =
+ __pa(*p_hv_pa_pg);
+ evmcs->hv_vm_id = (u64)vcpu->kvm;
+ evmcs->hv_enlightenments_control.nested_flush_hypercall = 1;
+
+ pr_debug("KVM: Hyper-V: enabled DIRECT flush for %llx\n",
+ (u64)vcpu->kvm);
+ return 0;
+}
+
#endif /* IS_ENABLED(CONFIG_HYPERV) */
/*
return 0;
}
-
-static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
+static int skip_emulated_instruction(struct kvm_vcpu *vcpu)
{
unsigned long rip;
- rip = kvm_rip_read(vcpu);
- rip += vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
- kvm_rip_write(vcpu, rip);
+ /*
+ * Using VMCS.VM_EXIT_INSTRUCTION_LEN on EPT misconfig depends on
+ * undefined behavior: Intel's SDM doesn't mandate the VMCS field be
+ * set when EPT misconfig occurs. In practice, real hardware updates
+ * VM_EXIT_INSTRUCTION_LEN on EPT misconfig, but other hypervisors
+ * (namely Hyper-V) don't set it due to it being undefined behavior,
+ * i.e. we end up advancing IP with some random value.
+ */
+ if (!static_cpu_has(X86_FEATURE_HYPERVISOR) ||
+ to_vmx(vcpu)->exit_reason != EXIT_REASON_EPT_MISCONFIG) {
+ rip = kvm_rip_read(vcpu);
+ rip += vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
+ kvm_rip_write(vcpu, rip);
+ } else {
+ if (!kvm_emulate_instruction(vcpu, EMULTYPE_SKIP))
+ return 0;
+ }
/* skipping an emulated instruction also counts */
vmx_set_interrupt_shadow(vcpu, 0);
+
+ return 1;
}
static void vmx_clear_hlt(struct kvm_vcpu *vcpu)
int inc_eip = 0;
if (kvm_exception_is_soft(nr))
inc_eip = vcpu->arch.event_exit_inst_len;
- if (kvm_inject_realmode_interrupt(vcpu, nr, inc_eip) != EMULATE_DONE)
- kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
+ kvm_inject_realmode_interrupt(vcpu, nr, inc_eip);
return;
}
* of an EPT paging-structure entry is 110b (write/execute).
*/
kvm_mmu_set_mmio_spte_mask(VMX_EPT_RWX_MASK,
- VMX_EPT_MISCONFIG_WX_VALUE);
+ VMX_EPT_MISCONFIG_WX_VALUE, 0);
}
#define VMX_XSS_EXIT_BITMAP 0
vcpu->arch.microcode_version = 0x100000000ULL;
vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val();
+ vmx->hv_deadline_tsc = -1;
kvm_set_cr8(vcpu, 0);
if (!init_event) {
int inc_eip = 0;
if (vcpu->arch.interrupt.soft)
inc_eip = vcpu->arch.event_exit_inst_len;
- if (kvm_inject_realmode_interrupt(vcpu, irq, inc_eip) != EMULATE_DONE)
- kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
+ kvm_inject_realmode_interrupt(vcpu, irq, inc_eip);
return;
}
intr = irq | INTR_INFO_VALID_MASK;
vmx->loaded_vmcs->nmi_known_unmasked = false;
if (vmx->rmode.vm86_active) {
- if (kvm_inject_realmode_interrupt(vcpu, NMI_VECTOR, 0) != EMULATE_DONE)
- kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
+ kvm_inject_realmode_interrupt(vcpu, NMI_VECTOR, 0);
return;
}
* Cause the #SS fault with 0 error code in VM86 mode.
*/
if (((vec == GP_VECTOR) || (vec == SS_VECTOR)) && err_code == 0) {
- if (kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE) {
+ if (kvm_emulate_instruction(vcpu, 0)) {
if (vcpu->arch.halt_request) {
vcpu->arch.halt_request = 0;
return kvm_vcpu_halt(vcpu);
u32 intr_info, ex_no, error_code;
unsigned long cr2, rip, dr6;
u32 vect_info;
- enum emulation_result er;
vect_info = vmx->idt_vectoring_info;
intr_info = vmx->exit_intr_info;
if (!vmx->rmode.vm86_active && is_gp_fault(intr_info)) {
WARN_ON_ONCE(!enable_vmware_backdoor);
- er = kvm_emulate_instruction(vcpu,
- EMULTYPE_VMWARE | EMULTYPE_NO_UD_ON_FAIL);
- if (er == EMULATE_USER_EXIT)
- return 0;
- else if (er != EMULATE_DONE)
+
+ /*
+ * VMware backdoor emulation on #GP interception only handles
+ * IN{S}, OUT{S}, and RDPMC, none of which generate a non-zero
+ * error code on #GP.
+ */
+ if (error_code) {
kvm_queue_exception_e(vcpu, GP_VECTOR, error_code);
- return 1;
+ return 1;
+ }
+ return kvm_emulate_instruction(vcpu, EMULTYPE_VMWARE_GP);
}
/*
vcpu->arch.dr6 &= ~DR_TRAP_BITS;
vcpu->arch.dr6 |= dr6 | DR6_RTM;
if (is_icebp(intr_info))
- skip_emulated_instruction(vcpu);
+ WARN_ON(!skip_emulated_instruction(vcpu));
kvm_queue_exception(vcpu, DB_VECTOR);
return 1;
++vcpu->stat.io_exits;
if (string)
- return kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE;
+ return kvm_emulate_instruction(vcpu, 0);
port = exit_qualification >> 16;
size = (exit_qualification & 7) + 1;
static int handle_desc(struct kvm_vcpu *vcpu)
{
WARN_ON(!(vcpu->arch.cr4 & X86_CR4_UMIP));
- return kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE;
+ return kvm_emulate_instruction(vcpu, 0);
}
static int handle_cr(struct kvm_vcpu *vcpu)
static int handle_rdmsr(struct kvm_vcpu *vcpu)
{
- u32 ecx = kvm_rcx_read(vcpu);
- struct msr_data msr_info;
-
- msr_info.index = ecx;
- msr_info.host_initiated = false;
- if (vmx_get_msr(vcpu, &msr_info)) {
- trace_kvm_msr_read_ex(ecx);
- kvm_inject_gp(vcpu, 0);
- return 1;
- }
-
- trace_kvm_msr_read(ecx, msr_info.data);
-
- kvm_rax_write(vcpu, msr_info.data & -1u);
- kvm_rdx_write(vcpu, (msr_info.data >> 32) & -1u);
- return kvm_skip_emulated_instruction(vcpu);
+ return kvm_emulate_rdmsr(vcpu);
}
static int handle_wrmsr(struct kvm_vcpu *vcpu)
{
- struct msr_data msr;
- u32 ecx = kvm_rcx_read(vcpu);
- u64 data = kvm_read_edx_eax(vcpu);
-
- msr.data = data;
- msr.index = ecx;
- msr.host_initiated = false;
- if (kvm_set_msr(vcpu, &msr) != 0) {
- trace_kvm_msr_write_ex(ecx, data);
- kvm_inject_gp(vcpu, 0);
- return 1;
- }
-
- trace_kvm_msr_write(ecx, data);
- return kvm_skip_emulated_instruction(vcpu);
+ return kvm_emulate_wrmsr(vcpu);
}
static int handle_tpr_below_threshold(struct kvm_vcpu *vcpu)
static int handle_invd(struct kvm_vcpu *vcpu)
{
- return kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE;
+ return kvm_emulate_instruction(vcpu, 0);
}
static int handle_invlpg(struct kvm_vcpu *vcpu)
return kvm_skip_emulated_instruction(vcpu);
}
}
- return kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE;
+ return kvm_emulate_instruction(vcpu, 0);
}
static int handle_apic_eoi_induced(struct kvm_vcpu *vcpu)
if (!idt_v || (type != INTR_TYPE_HARD_EXCEPTION &&
type != INTR_TYPE_EXT_INTR &&
type != INTR_TYPE_NMI_INTR))
- skip_emulated_instruction(vcpu);
-
- if (kvm_task_switch(vcpu, tss_selector,
- type == INTR_TYPE_SOFT_INTR ? idt_index : -1, reason,
- has_error_code, error_code) == EMULATE_FAIL) {
- vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
- vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
- vcpu->run->internal.ndata = 0;
- return 0;
- }
+ WARN_ON(!skip_emulated_instruction(vcpu));
/*
* TODO: What about debug traps on tss switch?
* Are we supposed to inject them and update dr6?
*/
-
- return 1;
+ return kvm_task_switch(vcpu, tss_selector,
+ type == INTR_TYPE_SOFT_INTR ? idt_index : -1,
+ reason, has_error_code, error_code);
}
static int handle_ept_violation(struct kvm_vcpu *vcpu)
if (!is_guest_mode(vcpu) &&
!kvm_io_bus_write(vcpu, KVM_FAST_MMIO_BUS, gpa, 0, NULL)) {
trace_kvm_fast_mmio(gpa);
- /*
- * Doing kvm_skip_emulated_instruction() depends on undefined
- * behavior: Intel's manual doesn't mandate
- * VM_EXIT_INSTRUCTION_LEN to be set in VMCS when EPT MISCONFIG
- * occurs and while on real hardware it was observed to be set,
- * other hypervisors (namely Hyper-V) don't set it, we end up
- * advancing IP with some random value. Disable fast mmio when
- * running nested and keep it for real hardware in hope that
- * VM_EXIT_INSTRUCTION_LEN will always be set correctly.
- */
- if (!static_cpu_has(X86_FEATURE_HYPERVISOR))
- return kvm_skip_emulated_instruction(vcpu);
- else
- return kvm_emulate_instruction(vcpu, EMULTYPE_SKIP) ==
- EMULATE_DONE;
+ return kvm_skip_emulated_instruction(vcpu);
}
return kvm_mmu_page_fault(vcpu, gpa, PFERR_RSVD_MASK, NULL, 0);
static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
- enum emulation_result err = EMULATE_DONE;
- int ret = 1;
bool intr_window_requested;
unsigned count = 130;
if (kvm_test_request(KVM_REQ_EVENT, vcpu))
return 1;
- err = kvm_emulate_instruction(vcpu, 0);
-
- if (err == EMULATE_USER_EXIT) {
- ++vcpu->stat.mmio_exits;
- ret = 0;
- goto out;
- }
-
- if (err != EMULATE_DONE)
- goto emulation_error;
+ if (!kvm_emulate_instruction(vcpu, 0))
+ return 0;
if (vmx->emulation_required && !vmx->rmode.vm86_active &&
- vcpu->arch.exception.pending)
- goto emulation_error;
+ vcpu->arch.exception.pending) {
+ vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+ vcpu->run->internal.suberror =
+ KVM_INTERNAL_ERROR_EMULATION;
+ vcpu->run->internal.ndata = 0;
+ return 0;
+ }
if (vcpu->arch.halt_request) {
vcpu->arch.halt_request = 0;
- ret = kvm_vcpu_halt(vcpu);
- goto out;
+ return kvm_vcpu_halt(vcpu);
}
+ /*
+ * Note, return 1 and not 0, vcpu_run() is responsible for
+ * morphing the pending signal into the proper return code.
+ */
if (signal_pending(current))
- goto out;
+ return 1;
+
if (need_resched())
schedule();
}
-out:
- return ret;
-
-emulation_error:
- vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
- vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
- vcpu->run->internal.ndata = 0;
- return 0;
+ return 1;
}
static void grow_ple_window(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
- int old = vmx->ple_window;
+ unsigned int old = vmx->ple_window;
vmx->ple_window = __grow_ple_window(old, ple_window,
ple_window_grow,
ple_window_max);
- if (vmx->ple_window != old)
+ if (vmx->ple_window != old) {
vmx->ple_window_dirty = true;
-
- trace_kvm_ple_window_grow(vcpu->vcpu_id, vmx->ple_window, old);
+ trace_kvm_ple_window_update(vcpu->vcpu_id,
+ vmx->ple_window, old);
+ }
}
static void shrink_ple_window(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
- int old = vmx->ple_window;
+ unsigned int old = vmx->ple_window;
vmx->ple_window = __shrink_ple_window(old, ple_window,
ple_window_shrink,
ple_window);
- if (vmx->ple_window != old)
+ if (vmx->ple_window != old) {
vmx->ple_window_dirty = true;
-
- trace_kvm_ple_window_shrink(vcpu->vcpu_id, vmx->ple_window, old);
+ trace_kvm_ple_window_update(vcpu->vcpu_id,
+ vmx->ple_window, old);
+ }
}
/*
else {
vcpu_unimpl(vcpu, "vmx: unexpected exit reason 0x%x\n",
exit_reason);
- kvm_queue_exception(vcpu, UD_VECTOR);
- return 1;
+ dump_vmcs();
+ vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+ vcpu->run->internal.suberror =
+ KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON;
+ vcpu->run->internal.ndata = 1;
+ vcpu->run->internal.data[0] = exit_reason;
+ return 0;
}
}
current_evmcs->hv_clean_fields |=
HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
+ if (static_branch_unlikely(&enable_evmcs))
+ current_evmcs->hv_vp_id = vcpu->arch.hyperv.vp_index;
+
/* MSR_IA32_DEBUGCTLMSR is zeroed on vmexit. Restore it if needed */
if (vmx->host_debugctlmsr)
update_debugctlmsr(vmx->host_debugctlmsr);
static void vmx_vm_free(struct kvm *kvm)
{
+ kfree(kvm->arch.hyperv.hv_pa_pg);
vfree(to_kvm_vmx(kvm));
}
unsigned long *msr_bitmap;
int cpu;
+ BUILD_BUG_ON_MSG(offsetof(struct vcpu_vmx, vcpu) != 0,
+ "struct kvm_vcpu must be at offset 0 for arch usercopy region");
+
vmx = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL_ACCOUNT);
if (!vmx)
return ERR_PTR(-ENOMEM);
* irqbalance to make the interrupts single-CPU.
*
* We will support full lowest-priority interrupt later.
+ *
+ * In addition, we can only inject generic interrupts using
+ * the PI mechanism, refuse to route others through it.
*/
kvm_set_msi_irq(kvm, e, &irq);
- if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu)) {
+ if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu) ||
+ !kvm_irq_is_postable(&irq)) {
/*
* Make sure the IRTE is in remapped mode if
* we don't handle it in posted mode.
return false;
}
+static bool vmx_apic_init_signal_blocked(struct kvm_vcpu *vcpu)
+{
+ return to_vmx(vcpu)->nested.vmxon;
+}
+
static __init int hardware_setup(void)
{
unsigned long host_bndcfgs;
.nested_enable_evmcs = NULL,
.nested_get_evmcs_version = NULL,
.need_emulation_on_page_fault = vmx_need_emulation_on_page_fault,
+ .apic_init_signal_blocked = vmx_apic_init_signal_blocked,
};
static void vmx_cleanup_l1d_flush(void)
if (!vp_ap)
continue;
+ vp_ap->nested_control.features.directhypercall = 0;
vp_ap->current_nested_vmcs = 0;
vp_ap->enlighten_vmentry = 0;
}
pr_info("KVM: vmx: using Hyper-V Enlightened VMCS\n");
static_branch_enable(&enable_evmcs);
}
+
+ if (ms_hyperv.nested_features & HV_X64_NESTED_DIRECT_FLUSH)
+ vmx_x86_ops.enable_direct_tlbflush
+ = hv_enable_direct_tlbflush;
+
} else {
enlightened_vmcs = false;
}