]> asedeno.scripts.mit.edu Git - linux.git/blobdiff - arch/x86/kvm/x86.c
x86/kvm/hyper-v: add reenlightenment MSRs support
[linux.git] / arch / x86 / kvm / x86.c
index f9c5171dad2b91f5a1f4a10ea62d6ab274bd25cb..36ef3d8aad18dec85f7db33cdfc40238386f5330 100644 (file)
@@ -67,6 +67,8 @@
 #include <asm/pvclock.h>
 #include <asm/div64.h>
 #include <asm/irq_remapping.h>
+#include <asm/mshyperv.h>
+#include <asm/hypervisor.h>
 
 #define CREATE_TRACE_POINTS
 #include "trace.h"
@@ -100,6 +102,8 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu);
 static void process_nmi(struct kvm_vcpu *vcpu);
 static void enter_smm(struct kvm_vcpu *vcpu);
 static void __kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags);
+static void store_regs(struct kvm_vcpu *vcpu);
+static int sync_regs(struct kvm_vcpu *vcpu);
 
 struct kvm_x86_ops *kvm_x86_ops __read_mostly;
 EXPORT_SYMBOL_GPL(kvm_x86_ops);
@@ -177,7 +181,6 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
        { "request_irq", VCPU_STAT(request_irq_exits) },
        { "irq_exits", VCPU_STAT(irq_exits) },
        { "host_state_reload", VCPU_STAT(host_state_reload) },
-       { "efer_reload", VCPU_STAT(efer_reload) },
        { "fpu_reload", VCPU_STAT(fpu_reload) },
        { "insn_emulation", VCPU_STAT(insn_emulation) },
        { "insn_emulation_fail", VCPU_STAT(insn_emulation_fail) },
@@ -702,7 +705,8 @@ static void kvm_load_guest_xcr0(struct kvm_vcpu *vcpu)
        if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE) &&
                        !vcpu->guest_xcr0_loaded) {
                /* kvm_set_xcr() also depends on this */
-               xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.xcr0);
+               if (vcpu->arch.xcr0 != host_xcr0)
+                       xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.xcr0);
                vcpu->guest_xcr0_loaded = 1;
        }
 }
@@ -794,6 +798,9 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
        if (!guest_cpuid_has(vcpu, X86_FEATURE_LA57) && (cr4 & X86_CR4_LA57))
                return 1;
 
+       if (!guest_cpuid_has(vcpu, X86_FEATURE_UMIP) && (cr4 & X86_CR4_UMIP))
+               return 1;
+
        if (is_long_mode(vcpu)) {
                if (!(cr4 & X86_CR4_PAE))
                        return 1;
@@ -1027,7 +1034,11 @@ static u32 emulated_msrs[] = {
        HV_X64_MSR_VP_RUNTIME,
        HV_X64_MSR_SCONTROL,
        HV_X64_MSR_STIMER0_CONFIG,
-       HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
+       HV_X64_MSR_APIC_ASSIST_PAGE,
+       HV_X64_MSR_REENLIGHTENMENT_CONTROL, HV_X64_MSR_TSC_EMULATION_CONTROL,
+       HV_X64_MSR_TSC_EMULATION_STATUS,
+
+       MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
        MSR_KVM_PV_EOI_EN,
 
        MSR_IA32_TSC_ADJUST,
@@ -1037,12 +1048,71 @@ static u32 emulated_msrs[] = {
        MSR_IA32_MCG_CTL,
        MSR_IA32_MCG_EXT_CTL,
        MSR_IA32_SMBASE,
+       MSR_SMI_COUNT,
        MSR_PLATFORM_INFO,
        MSR_MISC_FEATURES_ENABLES,
 };
 
 static unsigned num_emulated_msrs;
 
+/*
+ * List of msr numbers which are used to expose MSR-based features that
+ * can be used by a hypervisor to validate requested CPU features.
+ */
+static u32 msr_based_features[] = {
+       MSR_IA32_VMX_BASIC,
+       MSR_IA32_VMX_TRUE_PINBASED_CTLS,
+       MSR_IA32_VMX_PINBASED_CTLS,
+       MSR_IA32_VMX_TRUE_PROCBASED_CTLS,
+       MSR_IA32_VMX_PROCBASED_CTLS,
+       MSR_IA32_VMX_TRUE_EXIT_CTLS,
+       MSR_IA32_VMX_EXIT_CTLS,
+       MSR_IA32_VMX_TRUE_ENTRY_CTLS,
+       MSR_IA32_VMX_ENTRY_CTLS,
+       MSR_IA32_VMX_MISC,
+       MSR_IA32_VMX_CR0_FIXED0,
+       MSR_IA32_VMX_CR0_FIXED1,
+       MSR_IA32_VMX_CR4_FIXED0,
+       MSR_IA32_VMX_CR4_FIXED1,
+       MSR_IA32_VMX_VMCS_ENUM,
+       MSR_IA32_VMX_PROCBASED_CTLS2,
+       MSR_IA32_VMX_EPT_VPID_CAP,
+       MSR_IA32_VMX_VMFUNC,
+
+       MSR_F10H_DECFG,
+       MSR_IA32_UCODE_REV,
+};
+
+static unsigned int num_msr_based_features;
+
+static int kvm_get_msr_feature(struct kvm_msr_entry *msr)
+{
+       switch (msr->index) {
+       case MSR_IA32_UCODE_REV:
+               rdmsrl(msr->index, msr->data);
+               break;
+       default:
+               if (kvm_x86_ops->get_msr_feature(msr))
+                       return 1;
+       }
+       return 0;
+}
+
+static int do_get_msr_feature(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
+{
+       struct kvm_msr_entry msr;
+       int r;
+
+       msr.index = index;
+       r = kvm_get_msr_feature(&msr);
+       if (r)
+               return r;
+
+       *data = msr.data;
+
+       return 0;
+}
+
 bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer)
 {
        if (efer & efer_reserved_bits)
@@ -1378,6 +1448,11 @@ static u64 compute_guest_tsc(struct kvm_vcpu *vcpu, s64 kernel_ns)
        return tsc;
 }
 
+static inline int gtod_is_based_on_tsc(int mode)
+{
+       return mode == VCLOCK_TSC || mode == VCLOCK_HVCLOCK;
+}
+
 static void kvm_track_tsc_matching(struct kvm_vcpu *vcpu)
 {
 #ifdef CONFIG_X86_64
@@ -1397,7 +1472,7 @@ static void kvm_track_tsc_matching(struct kvm_vcpu *vcpu)
         * perform request to enable masterclock.
         */
        if (ka->use_master_clock ||
-           (gtod->clock.vclock_mode == VCLOCK_TSC && vcpus_matched))
+           (gtod_is_based_on_tsc(gtod->clock.vclock_mode) && vcpus_matched))
                kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
 
        trace_kvm_track_tsc(vcpu->vcpu_id, ka->nr_vcpus_matched_tsc,
@@ -1460,6 +1535,19 @@ static void kvm_vcpu_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
        vcpu->arch.tsc_offset = offset;
 }
 
+static inline bool kvm_check_tsc_unstable(void)
+{
+#ifdef CONFIG_X86_64
+       /*
+        * TSC is marked unstable when we're running on Hyper-V,
+        * 'TSC page' clocksource is good.
+        */
+       if (pvclock_gtod_data.clock.vclock_mode == VCLOCK_HVCLOCK)
+               return false;
+#endif
+       return check_tsc_unstable();
+}
+
 void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr)
 {
        struct kvm *kvm = vcpu->kvm;
@@ -1505,7 +1593,7 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr)
          */
        if (synchronizing &&
            vcpu->arch.virtual_tsc_khz == kvm->arch.last_tsc_khz) {
-               if (!check_tsc_unstable()) {
+               if (!kvm_check_tsc_unstable()) {
                        offset = kvm->arch.cur_tsc_offset;
                        pr_debug("kvm: matched tsc offset for %llu\n", data);
                } else {
@@ -1605,18 +1693,43 @@ static u64 read_tsc(void)
        return last;
 }
 
-static inline u64 vgettsc(u64 *cycle_now)
+static inline u64 vgettsc(u64 *tsc_timestamp, int *mode)
 {
        long v;
        struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
+       u64 tsc_pg_val;
+
+       switch (gtod->clock.vclock_mode) {
+       case VCLOCK_HVCLOCK:
+               tsc_pg_val = hv_read_tsc_page_tsc(hv_get_tsc_page(),
+                                                 tsc_timestamp);
+               if (tsc_pg_val != U64_MAX) {
+                       /* TSC page valid */
+                       *mode = VCLOCK_HVCLOCK;
+                       v = (tsc_pg_val - gtod->clock.cycle_last) &
+                               gtod->clock.mask;
+               } else {
+                       /* TSC page invalid */
+                       *mode = VCLOCK_NONE;
+               }
+               break;
+       case VCLOCK_TSC:
+               *mode = VCLOCK_TSC;
+               *tsc_timestamp = read_tsc();
+               v = (*tsc_timestamp - gtod->clock.cycle_last) &
+                       gtod->clock.mask;
+               break;
+       default:
+               *mode = VCLOCK_NONE;
+       }
 
-       *cycle_now = read_tsc();
+       if (*mode == VCLOCK_NONE)
+               *tsc_timestamp = v = 0;
 
-       v = (*cycle_now - gtod->clock.cycle_last) & gtod->clock.mask;
        return v * gtod->clock.mult;
 }
 
-static int do_monotonic_boot(s64 *t, u64 *cycle_now)
+static int do_monotonic_boot(s64 *t, u64 *tsc_timestamp)
 {
        struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
        unsigned long seq;
@@ -1625,9 +1738,8 @@ static int do_monotonic_boot(s64 *t, u64 *cycle_now)
 
        do {
                seq = read_seqcount_begin(&gtod->seq);
-               mode = gtod->clock.vclock_mode;
                ns = gtod->nsec_base;
-               ns += vgettsc(cycle_now);
+               ns += vgettsc(tsc_timestamp, &mode);
                ns >>= gtod->clock.shift;
                ns += gtod->boot_ns;
        } while (unlikely(read_seqcount_retry(&gtod->seq, seq)));
@@ -1636,7 +1748,7 @@ static int do_monotonic_boot(s64 *t, u64 *cycle_now)
        return mode;
 }
 
-static int do_realtime(struct timespec *ts, u64 *cycle_now)
+static int do_realtime(struct timespec *ts, u64 *tsc_timestamp)
 {
        struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
        unsigned long seq;
@@ -1645,10 +1757,9 @@ static int do_realtime(struct timespec *ts, u64 *cycle_now)
 
        do {
                seq = read_seqcount_begin(&gtod->seq);
-               mode = gtod->clock.vclock_mode;
                ts->tv_sec = gtod->wall_time_sec;
                ns = gtod->nsec_base;
-               ns += vgettsc(cycle_now);
+               ns += vgettsc(tsc_timestamp, &mode);
                ns >>= gtod->clock.shift;
        } while (unlikely(read_seqcount_retry(&gtod->seq, seq)));
 
@@ -1658,25 +1769,26 @@ static int do_realtime(struct timespec *ts, u64 *cycle_now)
        return mode;
 }
 
-/* returns true if host is using tsc clocksource */
-static bool kvm_get_time_and_clockread(s64 *kernel_ns, u64 *cycle_now)
+/* returns true if host is using TSC based clocksource */
+static bool kvm_get_time_and_clockread(s64 *kernel_ns, u64 *tsc_timestamp)
 {
        /* checked again under seqlock below */
-       if (pvclock_gtod_data.clock.vclock_mode != VCLOCK_TSC)
+       if (!gtod_is_based_on_tsc(pvclock_gtod_data.clock.vclock_mode))
                return false;
 
-       return do_monotonic_boot(kernel_ns, cycle_now) == VCLOCK_TSC;
+       return gtod_is_based_on_tsc(do_monotonic_boot(kernel_ns,
+                                                     tsc_timestamp));
 }
 
-/* returns true if host is using tsc clocksource */
+/* returns true if host is using TSC based clocksource */
 static bool kvm_get_walltime_and_clockread(struct timespec *ts,
-                                          u64 *cycle_now)
+                                          u64 *tsc_timestamp)
 {
        /* checked again under seqlock below */
-       if (pvclock_gtod_data.clock.vclock_mode != VCLOCK_TSC)
+       if (!gtod_is_based_on_tsc(pvclock_gtod_data.clock.vclock_mode))
                return false;
 
-       return do_realtime(ts, cycle_now) == VCLOCK_TSC;
+       return gtod_is_based_on_tsc(do_realtime(ts, tsc_timestamp));
 }
 #endif
 
@@ -2119,6 +2231,12 @@ static void kvmclock_reset(struct kvm_vcpu *vcpu)
        vcpu->arch.pv_time_enabled = false;
 }
 
+static void kvm_vcpu_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa)
+{
+       ++vcpu->stat.tlb_flush;
+       kvm_x86_ops->tlb_flush(vcpu, invalidate_gpa);
+}
+
 static void record_steal_time(struct kvm_vcpu *vcpu)
 {
        if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
@@ -2128,7 +2246,12 @@ static void record_steal_time(struct kvm_vcpu *vcpu)
                &vcpu->arch.st.steal, sizeof(struct kvm_steal_time))))
                return;
 
-       vcpu->arch.st.steal.preempted = 0;
+       /*
+        * Doing a TLB flush here, on the guest's behalf, can avoid
+        * expensive IPIs.
+        */
+       if (xchg(&vcpu->arch.st.steal.preempted, 0) & KVM_VCPU_FLUSH_TLB)
+               kvm_vcpu_flush_tlb(vcpu, false);
 
        if (vcpu->arch.st.steal.version & 1)
                vcpu->arch.st.steal.version += 1;  /* first time write, random junk */
@@ -2163,7 +2286,6 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 
        switch (msr) {
        case MSR_AMD64_NB_CFG:
-       case MSR_IA32_UCODE_REV:
        case MSR_IA32_UCODE_WRITE:
        case MSR_VM_HSAVE_PA:
        case MSR_AMD64_PATCH_LOADER:
@@ -2171,6 +2293,10 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
        case MSR_AMD64_DC_CFG:
                break;
 
+       case MSR_IA32_UCODE_REV:
+               if (msr_info->host_initiated)
+                       vcpu->arch.microcode_version = data;
+               break;
        case MSR_EFER:
                return set_efer(vcpu, data);
        case MSR_K7_HWCR:
@@ -2229,6 +2355,11 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
                        return 1;
                vcpu->arch.smbase = data;
                break;
+       case MSR_SMI_COUNT:
+               if (!msr_info->host_initiated)
+                       return 1;
+               vcpu->arch.smi_count = data;
+               break;
        case MSR_KVM_WALL_CLOCK_NEW:
        case MSR_KVM_WALL_CLOCK:
                vcpu->kvm->arch.wall_clock = data;
@@ -2326,6 +2457,9 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
        case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
        case HV_X64_MSR_CRASH_CTL:
        case HV_X64_MSR_STIMER0_CONFIG ... HV_X64_MSR_STIMER3_COUNT:
+       case HV_X64_MSR_REENLIGHTENMENT_CONTROL:
+       case HV_X64_MSR_TSC_EMULATION_CONTROL:
+       case HV_X64_MSR_TSC_EMULATION_STATUS:
                return kvm_hv_set_msr_common(vcpu, msr, data,
                                             msr_info->host_initiated);
        case MSR_IA32_BBL_CR_CTL3:
@@ -2452,6 +2586,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
        case MSR_AMD64_DC_CFG:
                msr_info->data = 0;
                break;
+       case MSR_F15H_PERF_CTL0 ... MSR_F15H_PERF_CTR5:
        case MSR_K7_EVNTSEL0 ... MSR_K7_EVNTSEL3:
        case MSR_K7_PERFCTR0 ... MSR_K7_PERFCTR3:
        case MSR_P6_PERFCTR0 ... MSR_P6_PERFCTR1:
@@ -2461,7 +2596,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
                msr_info->data = 0;
                break;
        case MSR_IA32_UCODE_REV:
-               msr_info->data = 0x100000000ULL;
+               msr_info->data = vcpu->arch.microcode_version;
                break;
        case MSR_MTRRcap:
        case 0x200 ... 0x2ff:
@@ -2503,6 +2638,9 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
                        return 1;
                msr_info->data = vcpu->arch.smbase;
                break;
+       case MSR_SMI_COUNT:
+               msr_info->data = vcpu->arch.smi_count;
+               break;
        case MSR_IA32_PERF_STATUS:
                /* TSC increment by tick */
                msr_info->data = 1000ULL;
@@ -2552,6 +2690,9 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
        case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
        case HV_X64_MSR_CRASH_CTL:
        case HV_X64_MSR_STIMER0_CONFIG ... HV_X64_MSR_STIMER3_COUNT:
+       case HV_X64_MSR_REENLIGHTENMENT_CONTROL:
+       case HV_X64_MSR_TSC_EMULATION_CONTROL:
+       case HV_X64_MSR_TSC_EMULATION_STATUS:
                return kvm_hv_get_msr_common(vcpu,
                                             msr_info->index, &msr_info->data);
                break;
@@ -2613,13 +2754,11 @@ static int __msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs *msrs,
                    int (*do_msr)(struct kvm_vcpu *vcpu,
                                  unsigned index, u64 *data))
 {
-       int i, idx;
+       int i;
 
-       idx = srcu_read_lock(&vcpu->kvm->srcu);
        for (i = 0; i < msrs->nmsrs; ++i)
                if (do_msr(vcpu, entries[i].index, &entries[i].data))
                        break;
-       srcu_read_unlock(&vcpu->kvm->srcu, idx);
 
        return i;
 }
@@ -2702,6 +2841,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
        case KVM_CAP_HYPERV_SYNIC:
        case KVM_CAP_HYPERV_SYNIC2:
        case KVM_CAP_HYPERV_VP_INDEX:
+       case KVM_CAP_HYPERV_EVENTFD:
        case KVM_CAP_PCI_SEGMENT:
        case KVM_CAP_DEBUGREGS:
        case KVM_CAP_X86_ROBUST_SINGLESTEP:
@@ -2718,8 +2858,12 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
        case KVM_CAP_SET_BOOT_CPU_ID:
        case KVM_CAP_SPLIT_IRQCHIP:
        case KVM_CAP_IMMEDIATE_EXIT:
+       case KVM_CAP_GET_MSR_FEATURES:
                r = 1;
                break;
+       case KVM_CAP_SYNC_REGS:
+               r = KVM_SYNC_X86_VALID_FIELDS;
+               break;
        case KVM_CAP_ADJUST_CLOCK:
                r = KVM_CLOCK_TSC_STABLE;
                break;
@@ -2832,6 +2976,31 @@ long kvm_arch_dev_ioctl(struct file *filp,
                        goto out;
                r = 0;
                break;
+       case KVM_GET_MSR_FEATURE_INDEX_LIST: {
+               struct kvm_msr_list __user *user_msr_list = argp;
+               struct kvm_msr_list msr_list;
+               unsigned int n;
+
+               r = -EFAULT;
+               if (copy_from_user(&msr_list, user_msr_list, sizeof(msr_list)))
+                       goto out;
+               n = msr_list.nmsrs;
+               msr_list.nmsrs = num_msr_based_features;
+               if (copy_to_user(user_msr_list, &msr_list, sizeof(msr_list)))
+                       goto out;
+               r = -E2BIG;
+               if (n < msr_list.nmsrs)
+                       goto out;
+               r = -EFAULT;
+               if (copy_to_user(user_msr_list->indices, &msr_based_features,
+                                num_msr_based_features * sizeof(u32)))
+                       goto out;
+               r = 0;
+               break;
+       }
+       case KVM_GET_MSRS:
+               r = msr_io(NULL, argp, do_get_msr_feature, 1);
+               break;
        }
        default:
                r = -EINVAL;
@@ -2870,13 +3039,13 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
                kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
        }
 
-       if (unlikely(vcpu->cpu != cpu) || check_tsc_unstable()) {
+       if (unlikely(vcpu->cpu != cpu) || kvm_check_tsc_unstable()) {
                s64 tsc_delta = !vcpu->arch.last_host_tsc ? 0 :
                                rdtsc() - vcpu->arch.last_host_tsc;
                if (tsc_delta < 0)
                        mark_tsc_unstable("KVM discovered backwards TSC");
 
-               if (check_tsc_unstable()) {
+               if (kvm_check_tsc_unstable()) {
                        u64 offset = kvm_compute_tsc_offset(vcpu,
                                                vcpu->arch.last_guest_tsc);
                        kvm_vcpu_write_tsc_offset(vcpu, offset);
@@ -2905,7 +3074,7 @@ static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu)
        if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
                return;
 
-       vcpu->arch.st.steal.preempted = 1;
+       vcpu->arch.st.steal.preempted = KVM_VCPU_PREEMPTED;
 
        kvm_write_guest_offset_cached(vcpu->kvm, &vcpu->arch.st.stime,
                        &vcpu->arch.st.steal.preempted,
@@ -2939,12 +3108,18 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
        pagefault_enable();
        kvm_x86_ops->vcpu_put(vcpu);
        vcpu->arch.last_host_tsc = rdtsc();
+       /*
+        * If userspace has set any breakpoints or watchpoints, dr6 is restored
+        * on every vmexit, but if not, we might have a stale dr6 from the
+        * guest. do_debug expects dr6 to be cleared after it runs, do the same.
+        */
+       set_debugreg(0, 6);
 }
 
 static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,
                                    struct kvm_lapic_state *s)
 {
-       if (kvm_x86_ops->sync_pir_to_irr && vcpu->arch.apicv_active)
+       if (vcpu->arch.apicv_active)
                kvm_x86_ops->sync_pir_to_irr(vcpu);
 
        return kvm_apic_get_state(vcpu, s);
@@ -3473,6 +3648,8 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
                void *buffer;
        } u;
 
+       vcpu_load(vcpu);
+
        u.buffer = NULL;
        switch (ioctl) {
        case KVM_GET_LAPIC: {
@@ -3498,8 +3675,10 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
                if (!lapic_in_kernel(vcpu))
                        goto out;
                u.lapic = memdup_user(argp, sizeof(*u.lapic));
-               if (IS_ERR(u.lapic))
-                       return PTR_ERR(u.lapic);
+               if (IS_ERR(u.lapic)) {
+                       r = PTR_ERR(u.lapic);
+                       goto out_nofree;
+               }
 
                r = kvm_vcpu_ioctl_set_lapic(vcpu, u.lapic);
                break;
@@ -3559,12 +3738,18 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
                r = 0;
                break;
        }
-       case KVM_GET_MSRS:
+       case KVM_GET_MSRS: {
+               int idx = srcu_read_lock(&vcpu->kvm->srcu);
                r = msr_io(vcpu, argp, do_get_msr, 1);
+               srcu_read_unlock(&vcpu->kvm->srcu, idx);
                break;
-       case KVM_SET_MSRS:
+       }
+       case KVM_SET_MSRS: {
+               int idx = srcu_read_lock(&vcpu->kvm->srcu);
                r = msr_io(vcpu, argp, do_set_msr, 0);
+               srcu_read_unlock(&vcpu->kvm->srcu, idx);
                break;
+       }
        case KVM_TPR_ACCESS_REPORTING: {
                struct kvm_tpr_access_ctl tac;
 
@@ -3673,8 +3858,10 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
        }
        case KVM_SET_XSAVE: {
                u.xsave = memdup_user(argp, sizeof(*u.xsave));
-               if (IS_ERR(u.xsave))
-                       return PTR_ERR(u.xsave);
+               if (IS_ERR(u.xsave)) {
+                       r = PTR_ERR(u.xsave);
+                       goto out_nofree;
+               }
 
                r = kvm_vcpu_ioctl_x86_set_xsave(vcpu, u.xsave);
                break;
@@ -3696,8 +3883,10 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
        }
        case KVM_SET_XCRS: {
                u.xcrs = memdup_user(argp, sizeof(*u.xcrs));
-               if (IS_ERR(u.xcrs))
-                       return PTR_ERR(u.xcrs);
+               if (IS_ERR(u.xcrs)) {
+                       r = PTR_ERR(u.xcrs);
+                       goto out_nofree;
+               }
 
                r = kvm_vcpu_ioctl_x86_set_xcrs(vcpu, u.xcrs);
                break;
@@ -3741,6 +3930,8 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
        }
 out:
        kfree(u.buffer);
+out_nofree:
+       vcpu_put(vcpu);
        return r;
 }
 
@@ -4297,6 +4488,45 @@ long kvm_arch_vm_ioctl(struct file *filp,
                r = kvm_vm_ioctl_enable_cap(kvm, &cap);
                break;
        }
+       case KVM_MEMORY_ENCRYPT_OP: {
+               r = -ENOTTY;
+               if (kvm_x86_ops->mem_enc_op)
+                       r = kvm_x86_ops->mem_enc_op(kvm, argp);
+               break;
+       }
+       case KVM_MEMORY_ENCRYPT_REG_REGION: {
+               struct kvm_enc_region region;
+
+               r = -EFAULT;
+               if (copy_from_user(&region, argp, sizeof(region)))
+                       goto out;
+
+               r = -ENOTTY;
+               if (kvm_x86_ops->mem_enc_reg_region)
+                       r = kvm_x86_ops->mem_enc_reg_region(kvm, &region);
+               break;
+       }
+       case KVM_MEMORY_ENCRYPT_UNREG_REGION: {
+               struct kvm_enc_region region;
+
+               r = -EFAULT;
+               if (copy_from_user(&region, argp, sizeof(region)))
+                       goto out;
+
+               r = -ENOTTY;
+               if (kvm_x86_ops->mem_enc_unreg_region)
+                       r = kvm_x86_ops->mem_enc_unreg_region(kvm, &region);
+               break;
+       }
+       case KVM_HYPERV_EVENTFD: {
+               struct kvm_hyperv_eventfd hvevfd;
+
+               r = -EFAULT;
+               if (copy_from_user(&hvevfd, argp, sizeof(hvevfd)))
+                       goto out;
+               r = kvm_vm_ioctl_hv_eventfd(kvm, &hvevfd);
+               break;
+       }
        default:
                r = -ENOTTY;
        }
@@ -4351,6 +4581,19 @@ static void kvm_init_msr_list(void)
                j++;
        }
        num_emulated_msrs = j;
+
+       for (i = j = 0; i < ARRAY_SIZE(msr_based_features); i++) {
+               struct kvm_msr_entry msr;
+
+               msr.index = msr_based_features[i];
+               if (kvm_get_msr_feature(&msr))
+                       continue;
+
+               if (j < i)
+                       msr_based_features[j] = msr_based_features[i];
+               j++;
+       }
+       num_msr_based_features = j;
 }
 
 static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len,
@@ -5705,7 +5948,8 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
                 * handle watchpoints yet, those would be handled in
                 * the emulate_ops.
                 */
-               if (kvm_vcpu_check_breakpoint(vcpu, &r))
+               if (!(emulation_type & EMULTYPE_SKIP) &&
+                   kvm_vcpu_check_breakpoint(vcpu, &r))
                        return r;
 
                ctxt->interruptibility = 0;
@@ -5891,6 +6135,43 @@ static void tsc_khz_changed(void *data)
        __this_cpu_write(cpu_tsc_khz, khz);
 }
 
+#ifdef CONFIG_X86_64
+static void kvm_hyperv_tsc_notifier(void)
+{
+       struct kvm *kvm;
+       struct kvm_vcpu *vcpu;
+       int cpu;
+
+       spin_lock(&kvm_lock);
+       list_for_each_entry(kvm, &vm_list, vm_list)
+               kvm_make_mclock_inprogress_request(kvm);
+
+       hyperv_stop_tsc_emulation();
+
+       /* TSC frequency always matches when on Hyper-V */
+       for_each_present_cpu(cpu)
+               per_cpu(cpu_tsc_khz, cpu) = tsc_khz;
+       kvm_max_guest_tsc_khz = tsc_khz;
+
+       list_for_each_entry(kvm, &vm_list, vm_list) {
+               struct kvm_arch *ka = &kvm->arch;
+
+               spin_lock(&ka->pvclock_gtod_sync_lock);
+
+               pvclock_update_vm_gtod_copy(kvm);
+
+               kvm_for_each_vcpu(cpu, vcpu, kvm)
+                       kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
+
+               kvm_for_each_vcpu(cpu, vcpu, kvm)
+                       kvm_clear_request(KVM_REQ_MCLOCK_INPROGRESS, vcpu);
+
+               spin_unlock(&ka->pvclock_gtod_sync_lock);
+       }
+       spin_unlock(&kvm_lock);
+}
+#endif
+
 static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
                                     void *data)
 {
@@ -6112,9 +6393,9 @@ static int pvclock_gtod_notify(struct notifier_block *nb, unsigned long unused,
        update_pvclock_gtod(tk);
 
        /* disable master clock if host does not trust, or does not
-        * use, TSC clocksource
+        * use, TSC based clocksource.
         */
-       if (gtod->clock.vclock_mode != VCLOCK_TSC &&
+       if (!gtod_is_based_on_tsc(gtod->clock.vclock_mode) &&
            atomic_read(&kvm_guest_has_master_clock) != 0)
                queue_work(system_long_wq, &pvclock_gtod_work);
 
@@ -6176,6 +6457,9 @@ int kvm_arch_init(void *opaque)
        kvm_lapic_init();
 #ifdef CONFIG_X86_64
        pvclock_gtod_register_notifier(&pvclock_gtod_notifier);
+
+       if (hypervisor_is_type(X86_HYPER_MS_HYPERV))
+               set_hv_tscchange_cb(kvm_hyperv_tsc_notifier);
 #endif
 
        return 0;
@@ -6188,6 +6472,10 @@ int kvm_arch_init(void *opaque)
 
 void kvm_arch_exit(void)
 {
+#ifdef CONFIG_X86_64
+       if (hypervisor_is_type(X86_HYPER_MS_HYPERV))
+               clear_hv_tscchange_cb();
+#endif
        kvm_lapic_exit();
        perf_unregister_guest_info_callbacks(&kvm_guest_cbs);
 
@@ -6450,6 +6738,7 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win)
                kvm_x86_ops->queue_exception(vcpu);
        } else if (vcpu->arch.smi_pending && !is_smm(vcpu) && kvm_x86_ops->smi_allowed(vcpu)) {
                vcpu->arch.smi_pending = false;
+               ++vcpu->arch.smi_count;
                enter_smm(vcpu);
        } else if (vcpu->arch.nmi_pending && kvm_x86_ops->nmi_allowed(vcpu)) {
                --vcpu->arch.nmi_pending;
@@ -6751,7 +7040,7 @@ static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu)
        if (irqchip_split(vcpu->kvm))
                kvm_scan_ioapic_routes(vcpu, vcpu->arch.ioapic_handled_vectors);
        else {
-               if (kvm_x86_ops->sync_pir_to_irr && vcpu->arch.apicv_active)
+               if (vcpu->arch.apicv_active)
                        kvm_x86_ops->sync_pir_to_irr(vcpu);
                kvm_ioapic_scan_entry(vcpu, vcpu->arch.ioapic_handled_vectors);
        }
@@ -6760,12 +7049,6 @@ static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu)
        kvm_x86_ops->load_eoi_exitmap(vcpu, eoi_exit_bitmap);
 }
 
-static void kvm_vcpu_flush_tlb(struct kvm_vcpu *vcpu)
-{
-       ++vcpu->stat.tlb_flush;
-       kvm_x86_ops->tlb_flush(vcpu);
-}
-
 void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
                unsigned long start, unsigned long end)
 {
@@ -6834,7 +7117,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
                if (kvm_check_request(KVM_REQ_MMU_SYNC, vcpu))
                        kvm_mmu_sync_roots(vcpu);
                if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu))
-                       kvm_vcpu_flush_tlb(vcpu);
+                       kvm_vcpu_flush_tlb(vcpu, true);
                if (kvm_check_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu)) {
                        vcpu->run->exit_reason = KVM_EXIT_TPR_ACCESS;
                        r = 0;
@@ -6983,10 +7266,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
         * This handles the case where a posted interrupt was
         * notified with kvm_vcpu_kick.
         */
-       if (kvm_lapic_enabled(vcpu)) {
-               if (kvm_x86_ops->sync_pir_to_irr && vcpu->arch.apicv_active)
-                       kvm_x86_ops->sync_pir_to_irr(vcpu);
-       }
+       if (kvm_lapic_enabled(vcpu) && vcpu->arch.apicv_active)
+               kvm_x86_ops->sync_pir_to_irr(vcpu);
 
        if (vcpu->mode == EXITING_GUEST_MODE || kvm_request_pending(vcpu)
            || need_resched() || signal_pending(current)) {
@@ -7007,7 +7288,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
        }
 
        trace_kvm_entry(vcpu->vcpu_id);
-       wait_lapic_expire(vcpu);
+       if (lapic_timer_advance_ns)
+               wait_lapic_expire(vcpu);
        guest_enter_irqoff();
 
        if (unlikely(vcpu->arch.switch_db_regs)) {
@@ -7263,13 +7545,12 @@ static int complete_emulated_mmio(struct kvm_vcpu *vcpu)
        return 0;
 }
 
-
 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
        int r;
 
+       vcpu_load(vcpu);
        kvm_sigset_activate(vcpu);
-
        kvm_load_guest_fpu(vcpu);
 
        if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) {
@@ -7289,6 +7570,17 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
                goto out;
        }
 
+       if (vcpu->run->kvm_valid_regs & ~KVM_SYNC_X86_VALID_FIELDS) {
+               r = -EINVAL;
+               goto out;
+       }
+
+       if (vcpu->run->kvm_dirty_regs) {
+               r = sync_regs(vcpu);
+               if (r != 0)
+                       goto out;
+       }
+
        /* re-sync apic's tpr */
        if (!lapic_in_kernel(vcpu)) {
                if (kvm_set_cr8(vcpu, kvm_run->cr8) != 0) {
@@ -7313,13 +7605,16 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
 out:
        kvm_put_guest_fpu(vcpu);
+       if (vcpu->run->kvm_valid_regs)
+               store_regs(vcpu);
        post_kvm_run_save(vcpu);
        kvm_sigset_deactivate(vcpu);
 
+       vcpu_put(vcpu);
        return r;
 }
 
-int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
+static void __get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 {
        if (vcpu->arch.emulate_regs_need_sync_to_vcpu) {
                /*
@@ -7353,11 +7648,17 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 
        regs->rip = kvm_rip_read(vcpu);
        regs->rflags = kvm_get_rflags(vcpu);
+}
 
+int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
+{
+       vcpu_load(vcpu);
+       __get_regs(vcpu, regs);
+       vcpu_put(vcpu);
        return 0;
 }
 
-int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
+static void __set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 {
        vcpu->arch.emulate_regs_need_sync_from_vcpu = true;
        vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
@@ -7387,7 +7688,13 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
        vcpu->arch.exception.pending = false;
 
        kvm_make_request(KVM_REQ_EVENT, vcpu);
+}
 
+int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
+{
+       vcpu_load(vcpu);
+       __set_regs(vcpu, regs);
+       vcpu_put(vcpu);
        return 0;
 }
 
@@ -7401,8 +7708,7 @@ void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l)
 }
 EXPORT_SYMBOL_GPL(kvm_get_cs_db_l_bits);
 
-int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
-                                 struct kvm_sregs *sregs)
+static void __get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
 {
        struct desc_ptr dt;
 
@@ -7436,13 +7742,22 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
        if (vcpu->arch.interrupt.pending && !vcpu->arch.interrupt.soft)
                set_bit(vcpu->arch.interrupt.nr,
                        (unsigned long *)sregs->interrupt_bitmap);
+}
 
+int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
+                                 struct kvm_sregs *sregs)
+{
+       vcpu_load(vcpu);
+       __get_sregs(vcpu, sregs);
+       vcpu_put(vcpu);
        return 0;
 }
 
 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
                                    struct kvm_mp_state *mp_state)
 {
+       vcpu_load(vcpu);
+
        kvm_apic_accept_events(vcpu);
        if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED &&
                                        vcpu->arch.pv.pv_unhalted)
@@ -7450,21 +7765,26 @@ int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
        else
                mp_state->mp_state = vcpu->arch.mp_state;
 
+       vcpu_put(vcpu);
        return 0;
 }
 
 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
                                    struct kvm_mp_state *mp_state)
 {
+       int ret = -EINVAL;
+
+       vcpu_load(vcpu);
+
        if (!lapic_in_kernel(vcpu) &&
            mp_state->mp_state != KVM_MP_STATE_RUNNABLE)
-               return -EINVAL;
+               goto out;
 
        /* INITs are latched while in SMM */
        if ((is_smm(vcpu) || vcpu->arch.smi_pending) &&
            (mp_state->mp_state == KVM_MP_STATE_SIPI_RECEIVED ||
             mp_state->mp_state == KVM_MP_STATE_INIT_RECEIVED))
-               return -EINVAL;
+               goto out;
 
        if (mp_state->mp_state == KVM_MP_STATE_SIPI_RECEIVED) {
                vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED;
@@ -7472,7 +7792,11 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
        } else
                vcpu->arch.mp_state = mp_state->mp_state;
        kvm_make_request(KVM_REQ_EVENT, vcpu);
-       return 0;
+
+       ret = 0;
+out:
+       vcpu_put(vcpu);
+       return ret;
 }
 
 int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index,
@@ -7519,25 +7843,25 @@ int kvm_valid_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
        return 0;
 }
 
-int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
-                                 struct kvm_sregs *sregs)
+static int __set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
 {
        struct msr_data apic_base_msr;
        int mmu_reset_needed = 0;
        int pending_vec, max_bits, idx;
        struct desc_ptr dt;
+       int ret = -EINVAL;
 
        if (!guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) &&
                        (sregs->cr4 & X86_CR4_OSXSAVE))
-               return -EINVAL;
+               goto out;
 
        if (kvm_valid_sregs(vcpu, sregs))
-               return -EINVAL;
+               goto out;
 
        apic_base_msr.data = sregs->apic_base;
        apic_base_msr.host_initiated = true;
        if (kvm_set_apic_base(vcpu, &apic_base_msr))
-               return -EINVAL;
+               goto out;
 
        dt.size = sregs->idt.limit;
        dt.address = sregs->idt.base;
@@ -7603,7 +7927,20 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
 
        kvm_make_request(KVM_REQ_EVENT, vcpu);
 
-       return 0;
+       ret = 0;
+out:
+       return ret;
+}
+
+int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
+                                 struct kvm_sregs *sregs)
+{
+       int ret;
+
+       vcpu_load(vcpu);
+       ret = __set_sregs(vcpu, sregs);
+       vcpu_put(vcpu);
+       return ret;
 }
 
 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
@@ -7612,6 +7949,8 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
        unsigned long rflags;
        int i, r;
 
+       vcpu_load(vcpu);
+
        if (dbg->control & (KVM_GUESTDBG_INJECT_DB | KVM_GUESTDBG_INJECT_BP)) {
                r = -EBUSY;
                if (vcpu->arch.exception.pending)
@@ -7657,7 +7996,7 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
        r = 0;
 
 out:
-
+       vcpu_put(vcpu);
        return r;
 }
 
@@ -7671,6 +8010,8 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
        gpa_t gpa;
        int idx;
 
+       vcpu_load(vcpu);
+
        idx = srcu_read_lock(&vcpu->kvm->srcu);
        gpa = kvm_mmu_gva_to_gpa_system(vcpu, vaddr, NULL);
        srcu_read_unlock(&vcpu->kvm->srcu, idx);
@@ -7679,14 +8020,17 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
        tr->writeable = 1;
        tr->usermode = 0;
 
+       vcpu_put(vcpu);
        return 0;
 }
 
 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
 {
-       struct fxregs_state *fxsave =
-                       &vcpu->arch.guest_fpu.state.fxsave;
+       struct fxregs_state *fxsave;
+
+       vcpu_load(vcpu);
 
+       fxsave = &vcpu->arch.guest_fpu.state.fxsave;
        memcpy(fpu->fpr, fxsave->st_space, 128);
        fpu->fcw = fxsave->cwd;
        fpu->fsw = fxsave->swd;
@@ -7696,13 +8040,17 @@ int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
        fpu->last_dp = fxsave->rdp;
        memcpy(fpu->xmm, fxsave->xmm_space, sizeof fxsave->xmm_space);
 
+       vcpu_put(vcpu);
        return 0;
 }
 
 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
 {
-       struct fxregs_state *fxsave =
-                       &vcpu->arch.guest_fpu.state.fxsave;
+       struct fxregs_state *fxsave;
+
+       vcpu_load(vcpu);
+
+       fxsave = &vcpu->arch.guest_fpu.state.fxsave;
 
        memcpy(fxsave->st_space, fpu->fpr, 128);
        fxsave->cwd = fpu->fcw;
@@ -7713,6 +8061,46 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
        fxsave->rdp = fpu->last_dp;
        memcpy(fxsave->xmm_space, fpu->xmm, sizeof fxsave->xmm_space);
 
+       vcpu_put(vcpu);
+       return 0;
+}
+
+static void store_regs(struct kvm_vcpu *vcpu)
+{
+       BUILD_BUG_ON(sizeof(struct kvm_sync_regs) > SYNC_REGS_SIZE_BYTES);
+
+       if (vcpu->run->kvm_valid_regs & KVM_SYNC_X86_REGS)
+               __get_regs(vcpu, &vcpu->run->s.regs.regs);
+
+       if (vcpu->run->kvm_valid_regs & KVM_SYNC_X86_SREGS)
+               __get_sregs(vcpu, &vcpu->run->s.regs.sregs);
+
+       if (vcpu->run->kvm_valid_regs & KVM_SYNC_X86_EVENTS)
+               kvm_vcpu_ioctl_x86_get_vcpu_events(
+                               vcpu, &vcpu->run->s.regs.events);
+}
+
+static int sync_regs(struct kvm_vcpu *vcpu)
+{
+       if (vcpu->run->kvm_dirty_regs & ~KVM_SYNC_X86_VALID_FIELDS)
+               return -EINVAL;
+
+       if (vcpu->run->kvm_dirty_regs & KVM_SYNC_X86_REGS) {
+               __set_regs(vcpu, &vcpu->run->s.regs.regs);
+               vcpu->run->kvm_dirty_regs &= ~KVM_SYNC_X86_REGS;
+       }
+       if (vcpu->run->kvm_dirty_regs & KVM_SYNC_X86_SREGS) {
+               if (__set_sregs(vcpu, &vcpu->run->s.regs.sregs))
+                       return -EINVAL;
+               vcpu->run->kvm_dirty_regs &= ~KVM_SYNC_X86_SREGS;
+       }
+       if (vcpu->run->kvm_dirty_regs & KVM_SYNC_X86_EVENTS) {
+               if (kvm_vcpu_ioctl_x86_set_vcpu_events(
+                               vcpu, &vcpu->run->s.regs.events))
+                       return -EINVAL;
+               vcpu->run->kvm_dirty_regs &= ~KVM_SYNC_X86_EVENTS;
+       }
+
        return 0;
 }
 
@@ -7769,7 +8157,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
 {
        struct kvm_vcpu *vcpu;
 
-       if (check_tsc_unstable() && atomic_read(&kvm->online_vcpus) != 0)
+       if (kvm_check_tsc_unstable() && atomic_read(&kvm->online_vcpus) != 0)
                printk_once(KERN_WARNING
                "kvm: SMP vm created on host with unstable TSC; "
                "guest TSC will not be reliable\n");
@@ -7781,16 +8169,12 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
 
 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
 {
-       int r;
-
        kvm_vcpu_mtrr_init(vcpu);
-       r = vcpu_load(vcpu);
-       if (r)
-               return r;
+       vcpu_load(vcpu);
        kvm_vcpu_reset(vcpu, false);
        kvm_mmu_setup(vcpu);
        vcpu_put(vcpu);
-       return r;
+       return 0;
 }
 
 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
@@ -7800,13 +8184,15 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
 
        kvm_hv_vcpu_postcreate(vcpu);
 
-       if (vcpu_load(vcpu))
+       if (mutex_lock_killable(&vcpu->mutex))
                return;
+       vcpu_load(vcpu);
        msr.data = 0x0;
        msr.index = MSR_IA32_TSC;
        msr.host_initiated = true;
        kvm_write_tsc(vcpu, &msr);
        vcpu_put(vcpu);
+       mutex_unlock(&vcpu->mutex);
 
        if (!kvmclock_periodic_sync)
                return;
@@ -7817,11 +8203,9 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
 
 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
 {
-       int r;
        vcpu->arch.apf.msr_val = 0;
 
-       r = vcpu_load(vcpu);
-       BUG_ON(r);
+       vcpu_load(vcpu);
        kvm_mmu_unload(vcpu);
        vcpu_put(vcpu);
 
@@ -7830,9 +8214,12 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
 
 void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
 {
+       kvm_lapic_reset(vcpu, init_event);
+
        vcpu->arch.hflags = 0;
 
        vcpu->arch.smi_pending = 0;
+       vcpu->arch.smi_count = 0;
        atomic_set(&vcpu->arch.nmi_queued, 0);
        vcpu->arch.nmi_pending = 0;
        vcpu->arch.nmi_injected = false;
@@ -7926,7 +8313,7 @@ int kvm_arch_hardware_enable(void)
                return ret;
 
        local_tsc = rdtsc();
-       stable = !check_tsc_unstable();
+       stable = !kvm_check_tsc_unstable();
        list_for_each_entry(kvm, &vm_list, vm_list) {
                kvm_for_each_vcpu(i, vcpu, kvm) {
                        if (!stable && vcpu->cpu == smp_processor_id())
@@ -8172,7 +8559,6 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 
        raw_spin_lock_init(&kvm->arch.tsc_write_lock);
        mutex_init(&kvm->arch.apic_map_lock);
-       mutex_init(&kvm->arch.hyperv.hv_lock);
        spin_lock_init(&kvm->arch.pvclock_gtod_sync_lock);
 
        kvm->arch.kvmclock_offset = -ktime_get_boot_ns();
@@ -8181,6 +8567,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
        INIT_DELAYED_WORK(&kvm->arch.kvmclock_update_work, kvmclock_update_fn);
        INIT_DELAYED_WORK(&kvm->arch.kvmclock_sync_work, kvmclock_sync_fn);
 
+       kvm_hv_init_vm(kvm);
        kvm_page_track_init(kvm);
        kvm_mmu_init_vm(kvm);
 
@@ -8192,9 +8579,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 
 static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu)
 {
-       int r;
-       r = vcpu_load(vcpu);
-       BUG_ON(r);
+       vcpu_load(vcpu);
        kvm_mmu_unload(vcpu);
        vcpu_put(vcpu);
 }
@@ -8274,10 +8659,8 @@ int __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size)
                        return r;
        }
 
-       if (!size) {
-               r = vm_munmap(old.userspace_addr, old.npages * PAGE_SIZE);
-               WARN_ON(r < 0);
-       }
+       if (!size)
+               vm_munmap(old.userspace_addr, old.npages * PAGE_SIZE);
 
        return 0;
 }
@@ -8315,6 +8698,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
        kvfree(rcu_dereference_check(kvm->arch.apic_map, 1));
        kvm_mmu_uninit_vm(kvm);
        kvm_page_track_cleanup(kvm);
+       kvm_hv_destroy_vm(kvm);
 }
 
 void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,