*
* Copyright (C) 2012 MIPS Technologies, Inc. All rights reserved.
* Authors: Sanjay Lal <sanjayl@kymasys.com>
- */
+ */
#include <linux/errno.h>
#include <linux/err.h>
#include <linux/kvm_host.h>
- #include "kvm_mips_int.h"
- #include "kvm_mips_comm.h"
+ #include "interrupt.h"
+ #include "commpage.h"
#define CREATE_TRACE_POINTS
#include "trace.h"
#define VECTORSPACING 0x100 /* for EI/VI mode */
#endif
- #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
+ #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x)
struct kvm_stats_debugfs_item debugfs_entries[] = {
- { "wait", VCPU_STAT(wait_exits) },
- { "cache", VCPU_STAT(cache_exits) },
- { "signal", VCPU_STAT(signal_exits) },
- { "interrupt", VCPU_STAT(int_exits) },
- { "cop_unsuable", VCPU_STAT(cop_unusable_exits) },
- { "tlbmod", VCPU_STAT(tlbmod_exits) },
- { "tlbmiss_ld", VCPU_STAT(tlbmiss_ld_exits) },
- { "tlbmiss_st", VCPU_STAT(tlbmiss_st_exits) },
- { "addrerr_st", VCPU_STAT(addrerr_st_exits) },
- { "addrerr_ld", VCPU_STAT(addrerr_ld_exits) },
- { "syscall", VCPU_STAT(syscall_exits) },
- { "resvd_inst", VCPU_STAT(resvd_inst_exits) },
- { "break_inst", VCPU_STAT(break_inst_exits) },
- { "flush_dcache", VCPU_STAT(flush_dcache_exits) },
- { "halt_wakeup", VCPU_STAT(halt_wakeup) },
+ { "wait", VCPU_STAT(wait_exits), KVM_STAT_VCPU },
+ { "cache", VCPU_STAT(cache_exits), KVM_STAT_VCPU },
+ { "signal", VCPU_STAT(signal_exits), KVM_STAT_VCPU },
+ { "interrupt", VCPU_STAT(int_exits), KVM_STAT_VCPU },
+ { "cop_unsuable", VCPU_STAT(cop_unusable_exits), KVM_STAT_VCPU },
+ { "tlbmod", VCPU_STAT(tlbmod_exits), KVM_STAT_VCPU },
+ { "tlbmiss_ld", VCPU_STAT(tlbmiss_ld_exits), KVM_STAT_VCPU },
+ { "tlbmiss_st", VCPU_STAT(tlbmiss_st_exits), KVM_STAT_VCPU },
+ { "addrerr_st", VCPU_STAT(addrerr_st_exits), KVM_STAT_VCPU },
+ { "addrerr_ld", VCPU_STAT(addrerr_ld_exits), KVM_STAT_VCPU },
+ { "syscall", VCPU_STAT(syscall_exits), KVM_STAT_VCPU },
+ { "resvd_inst", VCPU_STAT(resvd_inst_exits), KVM_STAT_VCPU },
+ { "break_inst", VCPU_STAT(break_inst_exits), KVM_STAT_VCPU },
+ { "flush_dcache", VCPU_STAT(flush_dcache_exits), KVM_STAT_VCPU },
+ { "halt_wakeup", VCPU_STAT(halt_wakeup), KVM_STAT_VCPU },
{NULL}
};
static int kvm_mips_reset_vcpu(struct kvm_vcpu *vcpu)
{
int i;
+
for_each_possible_cpu(i) {
vcpu->arch.guest_kernel_asid[i] = 0;
vcpu->arch.guest_user_asid[i] = 0;
}
+
return 0;
}
- /* XXXKYMA: We are simulatoring a processor that has the WII bit set in Config7, so we
- * are "runnable" if interrupts are pending
+ /*
+ * XXXKYMA: We are simulatoring a processor that has the WII bit set in
+ * Config7, so we are "runnable" if interrupts are pending
*/
int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
{
void kvm_arch_check_processor_compat(void *rtn)
{
- int *r = (int *)rtn;
- *r = 0;
- return;
+ *(int *)rtn = 0;
}
static void kvm_mips_init_tlbs(struct kvm *kvm)
{
unsigned long wired;
- /* Add a wired entry to the TLB, it is used to map the commpage to the Guest kernel */
+ /*
+ * Add a wired entry to the TLB, it is used to map the commpage to
+ * the Guest kernel
+ */
wired = read_c0_wired();
write_c0_wired(wired + 1);
mtc0_tlbw_hazard();
on_each_cpu(kvm_mips_init_vm_percpu, kvm, 1);
}
-
return 0;
}
}
}
- long
- kvm_arch_dev_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg)
+ long kvm_arch_dev_ioctl(struct file *filp, unsigned int ioctl,
+ unsigned long arg)
{
return -ENOIOCTLCMD;
}
}
int kvm_arch_prepare_memory_region(struct kvm *kvm,
- struct kvm_memory_slot *memslot,
- struct kvm_userspace_memory_region *mem,
- enum kvm_mr_change change)
+ struct kvm_memory_slot *memslot,
+ struct kvm_userspace_memory_region *mem,
+ enum kvm_mr_change change)
{
return 0;
}
void kvm_arch_commit_memory_region(struct kvm *kvm,
- struct kvm_userspace_memory_region *mem,
- const struct kvm_memory_slot *old,
- enum kvm_mr_change change)
+ struct kvm_userspace_memory_region *mem,
+ const struct kvm_memory_slot *old,
+ enum kvm_mr_change change)
{
unsigned long npages = 0;
- int i, err = 0;
+ int i;
kvm_debug("%s: kvm: %p slot: %d, GPA: %llx, size: %llx, QVA: %llx\n",
__func__, kvm, mem->slot, mem->guest_phys_addr,
if (!kvm->arch.guest_pmap) {
kvm_err("Failed to allocate guest PMAP");
- err = -ENOMEM;
- goto out;
+ return;
}
kvm_debug("Allocated space for Guest PMAP Table (%ld pages) @ %p\n",
npages, kvm->arch.guest_pmap);
/* Now setup the page table */
- for (i = 0; i < npages; i++) {
+ for (i = 0; i < npages; i++)
kvm->arch.guest_pmap[i] = KVM_INVALID_PAGE;
- }
}
}
- out:
- return;
}
void kvm_arch_flush_shadow_all(struct kvm *kvm)
struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
{
- extern char mips32_exception[], mips32_exceptionEnd[];
- extern char mips32_GuestException[], mips32_GuestExceptionEnd[];
int err, size, offset;
void *gebase;
int i;
kvm_debug("kvm @ %p: create cpu %d at %p\n", kvm, id, vcpu);
- /* Allocate space for host mode exception handlers that handle
+ /*
+ * Allocate space for host mode exception handlers that handle
* guest mode exits
*/
- if (cpu_has_veic || cpu_has_vint) {
+ if (cpu_has_veic || cpu_has_vint)
size = 0x200 + VECTORSPACING * 64;
- } else {
+ else
size = 0x4000;
- }
/* Save Linux EBASE */
vcpu->arch.host_ebase = (void *)read_c0_ebase();
local_flush_icache_range((unsigned long)gebase,
(unsigned long)gebase + ALIGN(size, PAGE_SIZE));
- /* Allocate comm page for guest kernel, a TLB will be reserved for mapping GVA @ 0xFFFF8000 to this page */
+ /*
+ * Allocate comm page for guest kernel, a TLB will be reserved for
+ * mapping GVA @ 0xFFFF8000 to this page
+ */
vcpu->arch.kseg0_commpage = kzalloc(PAGE_SIZE << 1, GFP_KERNEL);
if (!vcpu->arch.kseg0_commpage) {
kfree(vcpu->arch.guest_ebase);
kfree(vcpu->arch.kseg0_commpage);
+ kfree(vcpu);
}
void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
kvm_arch_vcpu_free(vcpu);
}
- int
- kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
- struct kvm_guest_debug *dbg)
+ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
+ struct kvm_guest_debug *dbg)
{
return -ENOIOCTLCMD;
}
return r;
}
- int
- kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_mips_interrupt *irq)
+ int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
+ struct kvm_mips_interrupt *irq)
{
int intr = (int)irq->irq;
struct kvm_vcpu *dvcpu = NULL;
dvcpu->arch.wait = 0;
- if (waitqueue_active(&dvcpu->wq)) {
+ if (waitqueue_active(&dvcpu->wq))
wake_up_interruptible(&dvcpu->wq);
- }
return 0;
}
- int
- kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
- struct kvm_mp_state *mp_state)
+ int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
+ struct kvm_mp_state *mp_state)
{
return -ENOIOCTLCMD;
}
- int
- kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
- struct kvm_mp_state *mp_state)
+ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
+ struct kvm_mp_state *mp_state)
{
return -ENOIOCTLCMD;
}
}
if ((reg->id & KVM_REG_SIZE_MASK) == KVM_REG_SIZE_U64) {
u64 __user *uaddr64 = (u64 __user *)(long)reg->addr;
+
return put_user(v, uaddr64);
} else if ((reg->id & KVM_REG_SIZE_MASK) == KVM_REG_SIZE_U32) {
u32 __user *uaddr32 = (u32 __user *)(long)reg->addr;
u32 v32 = (u32)v;
+
return put_user(v32, uaddr32);
} else {
return -EINVAL;
return 0;
}
- long
- kvm_arch_vcpu_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg)
+ long kvm_arch_vcpu_ioctl(struct file *filp, unsigned int ioctl,
+ unsigned long arg)
{
struct kvm_vcpu *vcpu = filp->private_data;
void __user *argp = (void __user *)arg;
case KVM_SET_ONE_REG:
case KVM_GET_ONE_REG: {
struct kvm_one_reg reg;
+
if (copy_from_user(®, argp, sizeof(reg)))
return -EFAULT;
if (ioctl == KVM_SET_ONE_REG)
case KVM_INTERRUPT:
{
struct kvm_mips_interrupt irq;
+
r = -EFAULT;
if (copy_from_user(&irq, argp, sizeof(irq)))
goto out;
return r;
}
- /*
- * Get (and clear) the dirty memory log for a memory slot.
- */
+ /* Get (and clear) the dirty memory log for a memory slot. */
int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
{
struct kvm_memory_slot *memslot;
ga = memslot->base_gfn << PAGE_SHIFT;
ga_end = ga + (memslot->npages << PAGE_SHIFT);
- printk("%s: dirty, ga: %#lx, ga_end %#lx\n", __func__, ga,
- ga_end);
+ kvm_info("%s: dirty, ga: %#lx, ga_end %#lx\n", __func__, ga,
+ ga_end);
n = kvm_dirty_bitmap_bytes(memslot);
memset(memslot->dirty_bitmap, 0, n);
int kvm_arch_init(void *opaque)
{
- int ret;
-
if (kvm_mips_callbacks) {
kvm_err("kvm: module already exists\n");
return -EEXIST;
}
- ret = kvm_mips_emulation_init(&kvm_mips_callbacks);
-
- return ret;
+ return kvm_mips_emulation_init(&kvm_mips_callbacks);
}
void kvm_arch_exit(void)
kvm_mips_callbacks = NULL;
}
- int
- kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
+ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
+ struct kvm_sregs *sregs)
{
return -ENOIOCTLCMD;
}
- int
- kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
+ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
+ struct kvm_sregs *sregs)
{
return -ENOIOCTLCMD;
}
if (!vcpu)
return -1;
- printk("VCPU Register Dump:\n");
- printk("\tpc = 0x%08lx\n", vcpu->arch.pc);
- printk("\texceptions: %08lx\n", vcpu->arch.pending_exceptions);
+ kvm_debug("VCPU Register Dump:\n");
+ kvm_debug("\tpc = 0x%08lx\n", vcpu->arch.pc);
+ kvm_debug("\texceptions: %08lx\n", vcpu->arch.pending_exceptions);
for (i = 0; i < 32; i += 4) {
- printk("\tgpr%02d: %08lx %08lx %08lx %08lx\n", i,
+ kvm_debug("\tgpr%02d: %08lx %08lx %08lx %08lx\n", i,
vcpu->arch.gprs[i],
vcpu->arch.gprs[i + 1],
vcpu->arch.gprs[i + 2], vcpu->arch.gprs[i + 3]);
}
- printk("\thi: 0x%08lx\n", vcpu->arch.hi);
- printk("\tlo: 0x%08lx\n", vcpu->arch.lo);
+ kvm_debug("\thi: 0x%08lx\n", vcpu->arch.hi);
+ kvm_debug("\tlo: 0x%08lx\n", vcpu->arch.lo);
cop0 = vcpu->arch.cop0;
- printk("\tStatus: 0x%08lx, Cause: 0x%08lx\n",
- kvm_read_c0_guest_status(cop0), kvm_read_c0_guest_cause(cop0));
+ kvm_debug("\tStatus: 0x%08lx, Cause: 0x%08lx\n",
+ kvm_read_c0_guest_status(cop0),
+ kvm_read_c0_guest_cause(cop0));
- printk("\tEPC: 0x%08lx\n", kvm_read_c0_guest_epc(cop0));
+ kvm_debug("\tEPC: 0x%08lx\n", kvm_read_c0_guest_epc(cop0));
return 0;
}
kvm_mips_callbacks->queue_timer_int(vcpu);
vcpu->arch.wait = 0;
- if (waitqueue_active(&vcpu->wq)) {
+ if (waitqueue_active(&vcpu->wq))
wake_up_interruptible(&vcpu->wq);
- }
}
- /*
- * low level hrtimer wake routine.
- */
+ /* low level hrtimer wake routine */
static enum hrtimer_restart kvm_mips_comparecount_wakeup(struct hrtimer *timer)
{
struct kvm_vcpu *vcpu;
void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
{
- return;
}
- int
- kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, struct kvm_translation *tr)
+ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
+ struct kvm_translation *tr)
{
return 0;
}
return kvm_mips_callbacks->vcpu_setup(vcpu);
}
- static
- void kvm_mips_set_c0_status(void)
+ static void kvm_mips_set_c0_status(void)
{
uint32_t status = read_c0_status();
run->exit_reason = KVM_EXIT_UNKNOWN;
run->ready_for_interrupt_injection = 1;
- /* Set the appropriate status bits based on host CPU features, before we hit the scheduler */
+ /*
+ * Set the appropriate status bits based on host CPU features,
+ * before we hit the scheduler
+ */
kvm_mips_set_c0_status();
local_irq_enable();
kvm_debug("kvm_mips_handle_exit: cause: %#x, PC: %p, kvm_run: %p, kvm_vcpu: %p\n",
cause, opc, run, vcpu);
- /* Do a privilege check, if in UM most of these exit conditions end up
+ /*
+ * Do a privilege check, if in UM most of these exit conditions end up
* causing an exception to be delivered to the Guest Kernel
*/
er = kvm_mips_check_privilege(cause, opc, run, vcpu);
++vcpu->stat.int_exits;
trace_kvm_exit(vcpu, INT_EXITS);
- if (need_resched()) {
+ if (need_resched())
cond_resched();
- }
ret = RESUME_GUEST;
break;
trace_kvm_exit(vcpu, COP_UNUSABLE_EXITS);
ret = kvm_mips_callbacks->handle_cop_unusable(vcpu);
/* XXXKYMA: Might need to return to user space */
- if (run->exit_reason == KVM_EXIT_IRQ_WINDOW_OPEN) {
+ if (run->exit_reason == KVM_EXIT_IRQ_WINDOW_OPEN)
ret = RESUME_HOST;
- }
break;
case T_TLB_MOD:
break;
case T_TLB_ST_MISS:
- kvm_debug
- ("TLB ST fault: cause %#x, status %#lx, PC: %p, BadVaddr: %#lx\n",
- cause, kvm_read_c0_guest_status(vcpu->arch.cop0), opc,
- badvaddr);
+ kvm_debug("TLB ST fault: cause %#x, status %#lx, PC: %p, BadVaddr: %#lx\n",
+ cause, kvm_read_c0_guest_status(vcpu->arch.cop0), opc,
+ badvaddr);
++vcpu->stat.tlbmiss_st_exits;
trace_kvm_exit(vcpu, TLBMISS_ST_EXITS);
break;
default:
- kvm_err
- ("Exception Code: %d, not yet handled, @ PC: %p, inst: 0x%08x BadVaddr: %#lx Status: %#lx\n",
- exccode, opc, kvm_get_inst(opc, vcpu), badvaddr,
- kvm_read_c0_guest_status(vcpu->arch.cop0));
+ kvm_err("Exception Code: %d, not yet handled, @ PC: %p, inst: 0x%08x BadVaddr: %#lx Status: %#lx\n",
+ exccode, opc, kvm_get_inst(opc, vcpu), badvaddr,
+ kvm_read_c0_guest_status(vcpu->arch.cop0));
kvm_arch_vcpu_dump_regs(vcpu);
run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
ret = RESUME_HOST;
kvm_mips_deliver_interrupts(vcpu, cause);
if (!(ret & RESUME_HOST)) {
- /* Only check for signals if not already exiting to userspace */
+ /* Only check for signals if not already exiting to userspace */
if (signal_pending(current)) {
run->exit_reason = KVM_EXIT_INTR;
ret = (-EINTR << 2) | RESUME_HOST;
if (ret)
return ret;
- /* On MIPS, kernel modules are executed from "mapped space", which requires TLBs.
- * The TLB handling code is statically linked with the rest of the kernel (kvm_tlb.c)
- * to avoid the possibility of double faulting. The issue is that the TLB code
- * references routines that are part of the the KVM module,
- * which are only available once the module is loaded.
+ /*
+ * On MIPS, kernel modules are executed from "mapped space", which
+ * requires TLBs. The TLB handling code is statically linked with
+ * the rest of the kernel (tlb.c) to avoid the possibility of
+ * double faulting. The issue is that the TLB code references
+ * routines that are part of the the KVM module, which are only
+ * available once the module is loaded.
*/
kvm_mips_gfn_to_pfn = gfn_to_pfn;
kvm_mips_release_pfn_clean = kvm_release_pfn_clean;
static void update_cr8_intercept(struct kvm_vcpu *vcpu);
static void process_nmi(struct kvm_vcpu *vcpu);
+ static void __kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags);
struct kvm_x86_ops *kvm_x86_ops;
EXPORT_SYMBOL_GPL(kvm_x86_ops);
void kvm_define_shared_msr(unsigned slot, u32 msr)
{
+ BUG_ON(slot >= KVM_NR_SHARED_MSRS);
if (slot >= shared_msrs_global.nr)
shared_msrs_global.nr = slot + 1;
shared_msrs_global.msrs[slot] = msr;
return EXCPT_BENIGN;
}
+ #define EXCPT_FAULT 0
+ #define EXCPT_TRAP 1
+ #define EXCPT_ABORT 2
+ #define EXCPT_INTERRUPT 3
+
+ static int exception_type(int vector)
+ {
+ unsigned int mask;
+
+ if (WARN_ON(vector > 31 || vector == NMI_VECTOR))
+ return EXCPT_INTERRUPT;
+
+ mask = 1 << vector;
+
+ /* #DB is trap, as instruction watchpoints are handled elsewhere */
+ if (mask & ((1 << DB_VECTOR) | (1 << BP_VECTOR) | (1 << OF_VECTOR)))
+ return EXCPT_TRAP;
+
+ if (mask & ((1 << DF_VECTOR) | (1 << MC_VECTOR)))
+ return EXCPT_ABORT;
+
+ /* Reserved exceptions will result in fault */
+ return EXCPT_FAULT;
+ }
+
static void kvm_multiple_exception(struct kvm_vcpu *vcpu,
unsigned nr, bool has_error, u32 error_code,
bool reinject)
vcpu->arch.switch_db_regs |= KVM_DEBUGREG_BP_ENABLED;
}
+ static u64 kvm_dr6_fixed(struct kvm_vcpu *vcpu)
+ {
+ u64 fixed = DR6_FIXED_1;
+
+ if (!guest_cpuid_has_rtm(vcpu))
+ fixed |= DR6_RTM;
+ return fixed;
+ }
+
static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
{
switch (dr) {
case 6:
if (val & 0xffffffff00000000ULL)
return -1; /* #GP */
- vcpu->arch.dr6 = (val & DR6_VOLATILE) | DR6_FIXED_1;
+ vcpu->arch.dr6 = (val & DR6_VOLATILE) | kvm_dr6_fixed(vcpu);
kvm_update_dr6(vcpu);
break;
case 5:
unsigned long flags;
s64 usdiff;
bool matched;
+ bool already_matched;
u64 data = msr->data;
raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags);
pr_debug("kvm: adjusted tsc offset by %llu\n", delta);
}
matched = true;
+ already_matched = (vcpu->arch.this_tsc_generation == kvm->arch.cur_tsc_generation);
} else {
/*
* We split periods of matched TSC writes into generations.
kvm->arch.cur_tsc_write = data;
kvm->arch.cur_tsc_offset = offset;
matched = false;
- pr_debug("kvm: new tsc generation %u, clock %llu\n",
+ pr_debug("kvm: new tsc generation %llu, clock %llu\n",
kvm->arch.cur_tsc_generation, data);
}
raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags);
spin_lock(&kvm->arch.pvclock_gtod_sync_lock);
- if (matched)
- kvm->arch.nr_vcpus_matched_tsc++;
- else
+ if (!matched) {
kvm->arch.nr_vcpus_matched_tsc = 0;
+ } else if (!already_matched) {
+ kvm->arch.nr_vcpus_matched_tsc++;
+ }
kvm_track_tsc_matching(vcpu);
spin_unlock(&kvm->arch.pvclock_gtod_sync_lock);
if (!(data & HV_X64_MSR_TSC_REFERENCE_ENABLE))
break;
gfn = data >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
- if (kvm_write_guest(kvm, data,
+ if (kvm_write_guest(kvm, gfn << HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT,
&tsc_ref, sizeof(tsc_ref)))
return 1;
mark_page_dirty(kvm, gfn);
data &= ~(u64)0x40; /* ignore flush filter disable */
data &= ~(u64)0x100; /* ignore ignne emulation enable */
data &= ~(u64)0x8; /* ignore TLB cache disable */
+ data &= ~(u64)0x40000; /* ignore Mc status write enable */
if (data != 0) {
vcpu_unimpl(vcpu, "unimplemented HWCR wrmsr: 0x%llx\n",
data);
vcpu->arch.interrupt.pending && !vcpu->arch.interrupt.soft;
events->interrupt.nr = vcpu->arch.interrupt.nr;
events->interrupt.soft = 0;
- events->interrupt.shadow =
- kvm_x86_ops->get_interrupt_shadow(vcpu,
- KVM_X86_SHADOW_INT_MOV_SS | KVM_X86_SHADOW_INT_STI);
+ events->interrupt.shadow = kvm_x86_ops->get_interrupt_shadow(vcpu);
events->nmi.injected = vcpu->arch.nmi_injected;
events->nmi.pending = vcpu->arch.nmi_pending != 0;
if (gpa == UNMAPPED_GVA)
return X86EMUL_PROPAGATE_FAULT;
- ret = kvm_read_guest(vcpu->kvm, gpa, data, toread);
+ ret = kvm_read_guest_page(vcpu->kvm, gpa >> PAGE_SHIFT, data,
+ offset, toread);
if (ret < 0) {
r = X86EMUL_IO_NEEDED;
goto out;
{
struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
+ unsigned offset;
+ int ret;
- return kvm_read_guest_virt_helper(addr, val, bytes, vcpu,
- access | PFERR_FETCH_MASK,
- exception);
+ /* Inline kvm_read_guest_virt_helper for speed. */
+ gpa_t gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, addr, access|PFERR_FETCH_MASK,
+ exception);
+ if (unlikely(gpa == UNMAPPED_GVA))
+ return X86EMUL_PROPAGATE_FAULT;
+
+ offset = addr & (PAGE_SIZE-1);
+ if (WARN_ON(offset + bytes > PAGE_SIZE))
+ bytes = (unsigned)PAGE_SIZE - offset;
+ ret = kvm_read_guest_page(vcpu->kvm, gpa >> PAGE_SHIFT, val,
+ offset, bytes);
+ if (unlikely(ret < 0))
+ return X86EMUL_IO_NEEDED;
+
+ return X86EMUL_CONTINUE;
}
int kvm_read_guest_virt(struct x86_emulate_ctxt *ctxt,
if (desc->g)
var.limit = (var.limit << 12) | 0xfff;
var.type = desc->type;
- var.present = desc->p;
var.dpl = desc->dpl;
var.db = desc->d;
var.s = desc->s;
return kvm_set_msr(emul_to_vcpu(ctxt), &msr);
}
+ static int emulator_check_pmc(struct x86_emulate_ctxt *ctxt,
+ u32 pmc)
+ {
+ return kvm_pmu_check_pmc(emul_to_vcpu(ctxt), pmc);
+ }
+
static int emulator_read_pmc(struct x86_emulate_ctxt *ctxt,
u32 pmc, u64 *pdata)
{
.set_dr = emulator_set_dr,
.set_msr = emulator_set_msr,
.get_msr = emulator_get_msr,
+ .check_pmc = emulator_check_pmc,
.read_pmc = emulator_read_pmc,
.halt = emulator_halt,
.wbinvd = emulator_wbinvd,
static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask)
{
- u32 int_shadow = kvm_x86_ops->get_interrupt_shadow(vcpu, mask);
+ u32 int_shadow = kvm_x86_ops->get_interrupt_shadow(vcpu);
/*
* an sti; sti; sequence only disable interrupts for the first
* instruction. So, if the last instruction, be it emulated or
* means that the last instruction is an sti. We should not
* leave the flag on in this case. The same goes for mov ss
*/
- if (!(int_shadow & mask))
+ if (int_shadow & mask)
+ mask = 0;
+ if (unlikely(int_shadow || mask)) {
kvm_x86_ops->set_interrupt_shadow(vcpu, mask);
+ if (!mask)
+ kvm_make_request(KVM_REQ_EVENT, vcpu);
+ }
}
static void inject_emulated_exception(struct kvm_vcpu *vcpu)
kvm_queue_exception(vcpu, ctxt->exception.vector);
}
- static void init_decode_cache(struct x86_emulate_ctxt *ctxt)
- {
- memset(&ctxt->opcode_len, 0,
- (void *)&ctxt->_regs - (void *)&ctxt->opcode_len);
-
- ctxt->fetch.start = 0;
- ctxt->fetch.end = 0;
- ctxt->io_read.pos = 0;
- ctxt->io_read.end = 0;
- ctxt->mem_read.pos = 0;
- ctxt->mem_read.end = 0;
- }
-
static void init_emulate_ctxt(struct kvm_vcpu *vcpu)
{
struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
return dr6;
}
- static void kvm_vcpu_check_singlestep(struct kvm_vcpu *vcpu, int *r)
+ static void kvm_vcpu_check_singlestep(struct kvm_vcpu *vcpu, unsigned long rflags, int *r)
{
struct kvm_run *kvm_run = vcpu->run;
/*
- * Use the "raw" value to see if TF was passed to the processor.
- * Note that the new value of the flags has not been saved yet.
+ * rflags is the old, "raw" value of the flags. The new value has
+ * not been saved yet.
*
* This is correct even for TF set by the guest, because "the
* processor will not generate this exception after the instruction
* that sets the TF flag".
*/
- unsigned long rflags = kvm_x86_ops->get_rflags(vcpu);
-
if (unlikely(rflags & X86_EFLAGS_TF)) {
if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
- kvm_run->debug.arch.dr6 = DR6_BS | DR6_FIXED_1;
+ kvm_run->debug.arch.dr6 = DR6_BS | DR6_FIXED_1 |
+ DR6_RTM;
kvm_run->debug.arch.pc = vcpu->arch.singlestep_rip;
kvm_run->debug.arch.exception = DB_VECTOR;
kvm_run->exit_reason = KVM_EXIT_DEBUG;
* cleared by the processor".
*/
vcpu->arch.dr6 &= ~15;
- vcpu->arch.dr6 |= DR6_BS;
+ vcpu->arch.dr6 |= DR6_BS | DR6_RTM;
kvm_queue_exception(vcpu, DB_VECTOR);
}
}
vcpu->arch.eff_db);
if (dr6 != 0) {
- kvm_run->debug.arch.dr6 = dr6 | DR6_FIXED_1;
+ kvm_run->debug.arch.dr6 = dr6 | DR6_FIXED_1 | DR6_RTM;
kvm_run->debug.arch.pc = kvm_rip_read(vcpu) +
get_segment_base(vcpu, VCPU_SREG_CS);
}
}
- if (unlikely(vcpu->arch.dr7 & DR7_BP_EN_MASK)) {
+ if (unlikely(vcpu->arch.dr7 & DR7_BP_EN_MASK) &&
+ !(kvm_get_rflags(vcpu) & X86_EFLAGS_RF)) {
dr6 = kvm_vcpu_check_hw_bp(eip, 0,
vcpu->arch.dr7,
vcpu->arch.db);
if (dr6 != 0) {
vcpu->arch.dr6 &= ~15;
- vcpu->arch.dr6 |= dr6;
+ vcpu->arch.dr6 |= dr6 | DR6_RTM;
kvm_queue_exception(vcpu, DB_VECTOR);
*r = EMULATE_DONE;
return true;
if (emulation_type & EMULTYPE_SKIP) {
kvm_rip_write(vcpu, ctxt->_eip);
+ if (ctxt->eflags & X86_EFLAGS_RF)
+ kvm_set_rflags(vcpu, ctxt->eflags & ~X86_EFLAGS_RF);
return EMULATE_DONE;
}
r = EMULATE_DONE;
if (writeback) {
+ unsigned long rflags = kvm_x86_ops->get_rflags(vcpu);
toggle_interruptibility(vcpu, ctxt->interruptibility);
- kvm_make_request(KVM_REQ_EVENT, vcpu);
vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
kvm_rip_write(vcpu, ctxt->eip);
if (r == EMULATE_DONE)
- kvm_vcpu_check_singlestep(vcpu, &r);
- kvm_set_rflags(vcpu, ctxt->eflags);
+ kvm_vcpu_check_singlestep(vcpu, rflags, &r);
+ __kvm_set_rflags(vcpu, ctxt->eflags);
+
+ /*
+ * For STI, interrupts are shadowed; so KVM_REQ_EVENT will
+ * do nothing, and it will be requested again as soon as
+ * the shadow expires. But we still need to check here,
+ * because POPF has no interrupt shadow.
+ */
+ if (unlikely((ctxt->eflags & ~rflags) & X86_EFLAGS_IF))
+ kvm_make_request(KVM_REQ_EVENT, vcpu);
} else
vcpu->arch.emulate_regs_need_sync_to_vcpu = true;
u64 param, ingpa, outgpa, ret;
uint16_t code, rep_idx, rep_cnt, res = HV_STATUS_SUCCESS, rep_done = 0;
bool fast, longmode;
- int cs_db, cs_l;
/*
* hypercall generates UD from non zero cpl and real mode
return 0;
}
- kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
- longmode = is_long_mode(vcpu) && cs_l == 1;
+ longmode = is_64_bit_mode(vcpu);
if (!longmode) {
param = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDX) << 32) |
int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
{
unsigned long nr, a0, a1, a2, a3, ret;
- int r = 1;
+ int op_64_bit, r = 1;
if (kvm_hv_hypercall_enabled(vcpu->kvm))
return kvm_hv_hypercall(vcpu);
trace_kvm_hypercall(nr, a0, a1, a2, a3);
- if (!is_long_mode(vcpu)) {
+ op_64_bit = is_64_bit_mode(vcpu);
+ if (!op_64_bit) {
nr &= 0xFFFFFFFF;
a0 &= 0xFFFFFFFF;
a1 &= 0xFFFFFFFF;
break;
}
out:
+ if (!op_64_bit)
+ ret = (u32)ret;
kvm_register_write(vcpu, VCPU_REGS_RAX, ret);
++vcpu->stat.hypercalls;
return r;
trace_kvm_inj_exception(vcpu->arch.exception.nr,
vcpu->arch.exception.has_error_code,
vcpu->arch.exception.error_code);
+
+ if (exception_type(vcpu->arch.exception.nr) == EXCPT_FAULT)
+ __kvm_set_rflags(vcpu, kvm_get_rflags(vcpu) |
+ X86_EFLAGS_RF);
+
kvm_x86_ops->queue_exception(vcpu, vcpu->arch.exception.nr,
vcpu->arch.exception.has_error_code,
vcpu->arch.exception.error_code,
kvm_x86_ops->set_nmi(vcpu);
}
} else if (kvm_cpu_has_injectable_intr(vcpu)) {
+ /*
+ * Because interrupts can be injected asynchronously, we are
+ * calling check_nested_events again here to avoid a race condition.
+ * See https://lkml.org/lkml/2014/7/2/60 for discussion about this
+ * proposal and current concerns. Perhaps we should be setting
+ * KVM_REQ_EVENT only on certain events and not unconditionally?
+ */
+ if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events) {
+ r = kvm_x86_ops->check_nested_events(vcpu, req_int_win);
+ if (r != 0)
+ return r;
+ }
if (kvm_x86_ops->interrupt_allowed(vcpu)) {
kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu),
false);
atomic_set(&vcpu->arch.nmi_queued, 0);
vcpu->arch.nmi_pending = 0;
vcpu->arch.nmi_injected = false;
+ kvm_clear_interrupt_queue(vcpu);
+ kvm_clear_exception_queue(vcpu);
memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db));
- vcpu->arch.dr6 = DR6_FIXED_1;
+ vcpu->arch.dr6 = DR6_INIT;
kvm_update_dr6(vcpu);
vcpu->arch.dr7 = DR7_FIXED_1;
kvm_update_dr7(vcpu);
}
EXPORT_SYMBOL_GPL(kvm_get_rflags);
- void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
+ static void __kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
{
if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP &&
kvm_is_linear_rip(vcpu, vcpu->arch.singlestep_rip))
rflags |= X86_EFLAGS_TF;
kvm_x86_ops->set_rflags(vcpu, rflags);
+ }
+
+ void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
+ {
+ __kvm_set_rflags(vcpu, rflags);
kvm_make_request(KVM_REQ_EVENT, vcpu);
}
EXPORT_SYMBOL_GPL(kvm_set_rflags);