]> asedeno.scripts.mit.edu Git - linux.git/blobdiff - arch/x86/kvm/x86.c
kvm: x86: Dynamically allocate guest_fpu
[linux.git] / arch / x86 / kvm / x86.c
index 5cd5647120f2b439a8d63b52d223499b48e5ff9e..4f786fcc620e2f23358f8d42d79587e32f26544f 100644 (file)
@@ -213,6 +213,9 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 
 u64 __read_mostly host_xcr0;
 
+struct kmem_cache *x86_fpu_cache;
+EXPORT_SYMBOL_GPL(x86_fpu_cache);
+
 static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt);
 
 static inline void kvm_async_pf_hash_reset(struct kvm_vcpu *vcpu)
@@ -1665,8 +1668,7 @@ EXPORT_SYMBOL_GPL(kvm_read_l1_tsc);
 
 static void kvm_vcpu_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
 {
-       kvm_x86_ops->write_tsc_offset(vcpu, offset);
-       vcpu->arch.tsc_offset = offset;
+       vcpu->arch.tsc_offset = kvm_x86_ops->write_l1_tsc_offset(vcpu, offset);
 }
 
 static inline bool kvm_check_tsc_unstable(void)
@@ -1794,7 +1796,8 @@ EXPORT_SYMBOL_GPL(kvm_write_tsc);
 static inline void adjust_tsc_offset_guest(struct kvm_vcpu *vcpu,
                                           s64 adjustment)
 {
-       kvm_vcpu_write_tsc_offset(vcpu, vcpu->arch.tsc_offset + adjustment);
+       u64 tsc_offset = kvm_x86_ops->read_l1_tsc_offset(vcpu);
+       kvm_vcpu_write_tsc_offset(vcpu, tsc_offset + adjustment);
 }
 
 static inline void adjust_tsc_offset_host(struct kvm_vcpu *vcpu, s64 adjustment)
@@ -2997,6 +3000,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
        case KVM_CAP_HYPERV_TLBFLUSH:
        case KVM_CAP_HYPERV_SEND_IPI:
        case KVM_CAP_HYPERV_ENLIGHTENED_VMCS:
+       case KVM_CAP_HYPERV_CPUID:
        case KVM_CAP_PCI_SEGMENT:
        case KVM_CAP_DEBUGREGS:
        case KVM_CAP_X86_ROBUST_SINGLESTEP:
@@ -3008,7 +3012,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
        case KVM_CAP_HYPERV_TIME:
        case KVM_CAP_IOAPIC_POLARITY_IGNORED:
        case KVM_CAP_TSC_DEADLINE_TIMER:
-       case KVM_CAP_ENABLE_CAP_VM:
        case KVM_CAP_DISABLE_QUIRKS:
        case KVM_CAP_SET_BOOT_CPU_ID:
        case KVM_CAP_SPLIT_IRQCHIP:
@@ -3630,7 +3633,7 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu,
 
 static void fill_xsave(u8 *dest, struct kvm_vcpu *vcpu)
 {
-       struct xregs_state *xsave = &vcpu->arch.guest_fpu.state.xsave;
+       struct xregs_state *xsave = &vcpu->arch.guest_fpu->state.xsave;
        u64 xstate_bv = xsave->header.xfeatures;
        u64 valid;
 
@@ -3672,7 +3675,7 @@ static void fill_xsave(u8 *dest, struct kvm_vcpu *vcpu)
 
 static void load_xsave(struct kvm_vcpu *vcpu, u8 *src)
 {
-       struct xregs_state *xsave = &vcpu->arch.guest_fpu.state.xsave;
+       struct xregs_state *xsave = &vcpu->arch.guest_fpu->state.xsave;
        u64 xstate_bv = *(u64 *)(src + XSAVE_HDR_OFFSET);
        u64 valid;
 
@@ -3720,7 +3723,7 @@ static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,
                fill_xsave((u8 *) guest_xsave->region, vcpu);
        } else {
                memcpy(guest_xsave->region,
-                       &vcpu->arch.guest_fpu.state.fxsave,
+                       &vcpu->arch.guest_fpu->state.fxsave,
                        sizeof(struct fxregs_state));
                *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)] =
                        XFEATURE_MASK_FPSSE;
@@ -3750,7 +3753,7 @@ static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu,
                if (xstate_bv & ~XFEATURE_MASK_FPSSE ||
                        mxcsr & ~mxcsr_feature_mask)
                        return -EINVAL;
-               memcpy(&vcpu->arch.guest_fpu.state.fxsave,
+               memcpy(&vcpu->arch.guest_fpu->state.fxsave,
                        guest_xsave->region, sizeof(struct fxregs_state));
        }
        return 0;
@@ -3828,6 +3831,8 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
                return kvm_hv_activate_synic(vcpu, cap->cap ==
                                             KVM_CAP_HYPERV_SYNIC2);
        case KVM_CAP_HYPERV_ENLIGHTENED_VMCS:
+               if (!kvm_x86_ops->nested_enable_evmcs)
+                       return -ENOTTY;
                r = kvm_x86_ops->nested_enable_evmcs(vcpu, &vmcs_version);
                if (!r) {
                        user_ptr = (void __user *)(uintptr_t)cap->args[0];
@@ -4190,6 +4195,25 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
                r = kvm_x86_ops->set_nested_state(vcpu, user_kvm_nested_state, &kvm_state);
                break;
        }
+       case KVM_GET_SUPPORTED_HV_CPUID: {
+               struct kvm_cpuid2 __user *cpuid_arg = argp;
+               struct kvm_cpuid2 cpuid;
+
+               r = -EFAULT;
+               if (copy_from_user(&cpuid, cpuid_arg, sizeof(cpuid)))
+                       goto out;
+
+               r = kvm_vcpu_ioctl_get_hv_cpuid(vcpu, &cpuid,
+                                               cpuid_arg->entries);
+               if (r)
+                       goto out;
+
+               r = -EFAULT;
+               if (copy_to_user(cpuid_arg, &cpuid, sizeof(cpuid)))
+                       goto out;
+               r = 0;
+               break;
+       }
        default:
                r = -EINVAL;
        }
@@ -4394,7 +4418,7 @@ static int kvm_vm_ioctl_reinject(struct kvm *kvm,
  */
 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
 {
-       bool is_dirty = false;
+       bool flush = false;
        int r;
 
        mutex_lock(&kvm->slots_lock);
@@ -4405,14 +4429,41 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
        if (kvm_x86_ops->flush_log_dirty)
                kvm_x86_ops->flush_log_dirty(kvm);
 
-       r = kvm_get_dirty_log_protect(kvm, log, &is_dirty);
+       r = kvm_get_dirty_log_protect(kvm, log, &flush);
 
        /*
         * All the TLBs can be flushed out of mmu lock, see the comments in
         * kvm_mmu_slot_remove_write_access().
         */
        lockdep_assert_held(&kvm->slots_lock);
-       if (is_dirty)
+       if (flush)
+               kvm_flush_remote_tlbs(kvm);
+
+       mutex_unlock(&kvm->slots_lock);
+       return r;
+}
+
+int kvm_vm_ioctl_clear_dirty_log(struct kvm *kvm, struct kvm_clear_dirty_log *log)
+{
+       bool flush = false;
+       int r;
+
+       mutex_lock(&kvm->slots_lock);
+
+       /*
+        * Flush potentially hardware-cached dirty pages to dirty_bitmap.
+        */
+       if (kvm_x86_ops->flush_log_dirty)
+               kvm_x86_ops->flush_log_dirty(kvm);
+
+       r = kvm_clear_dirty_log_protect(kvm, log, &flush);
+
+       /*
+        * All the TLBs can be flushed out of mmu lock, see the comments in
+        * kvm_mmu_slot_remove_write_access().
+        */
+       lockdep_assert_held(&kvm->slots_lock);
+       if (flush)
                kvm_flush_remote_tlbs(kvm);
 
        mutex_unlock(&kvm->slots_lock);
@@ -4431,8 +4482,8 @@ int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event,
        return 0;
 }
 
-static int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
-                                  struct kvm_enable_cap *cap)
+int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
+                           struct kvm_enable_cap *cap)
 {
        int r;
 
@@ -4765,15 +4816,6 @@ long kvm_arch_vm_ioctl(struct file *filp,
                r = 0;
                break;
        }
-       case KVM_ENABLE_CAP: {
-               struct kvm_enable_cap cap;
-
-               r = -EFAULT;
-               if (copy_from_user(&cap, argp, sizeof(cap)))
-                       goto out;
-               r = kvm_vm_ioctl_enable_cap(kvm, &cap);
-               break;
-       }
        case KVM_MEMORY_ENCRYPT_OP: {
                r = -ENOTTY;
                if (kvm_x86_ops->mem_enc_op)
@@ -6813,11 +6855,30 @@ int kvm_arch_init(void *opaque)
                goto out;
        }
 
+       /*
+        * KVM explicitly assumes that the guest has an FPU and
+        * FXSAVE/FXRSTOR. For example, the KVM_GET_FPU explicitly casts the
+        * vCPU's FPU state as a fxregs_state struct.
+        */
+       if (!boot_cpu_has(X86_FEATURE_FPU) || !boot_cpu_has(X86_FEATURE_FXSR)) {
+               printk(KERN_ERR "kvm: inadequate fpu\n");
+               r = -EOPNOTSUPP;
+               goto out;
+       }
+
        r = -ENOMEM;
+       x86_fpu_cache = kmem_cache_create("x86_fpu", fpu_kernel_xstate_size,
+                                         __alignof__(struct fpu), SLAB_ACCOUNT,
+                                         NULL);
+       if (!x86_fpu_cache) {
+               printk(KERN_ERR "kvm: failed to allocate cache for x86 fpu\n");
+               goto out;
+       }
+
        shared_msrs = alloc_percpu(struct kvm_shared_msrs);
        if (!shared_msrs) {
                printk(KERN_ERR "kvm: failed to allocate percpu kvm_shared_msrs\n");
-               goto out;
+               goto out_free_x86_fpu_cache;
        }
 
        r = kvm_mmu_module_init();
@@ -6850,6 +6911,8 @@ int kvm_arch_init(void *opaque)
 
 out_free_percpu:
        free_percpu(shared_msrs);
+out_free_x86_fpu_cache:
+       kmem_cache_destroy(x86_fpu_cache);
 out:
        return r;
 }
@@ -6873,6 +6936,7 @@ void kvm_arch_exit(void)
        kvm_x86_ops = NULL;
        kvm_mmu_module_exit();
        free_percpu(shared_msrs);
+       kmem_cache_destroy(x86_fpu_cache);
 }
 
 int kvm_vcpu_halt(struct kvm_vcpu *vcpu)
@@ -6918,6 +6982,7 @@ static int kvm_pv_clock_pairing(struct kvm_vcpu *vcpu, gpa_t paddr,
        clock_pairing.nsec = ts.tv_nsec;
        clock_pairing.tsc = kvm_read_l1_tsc(vcpu, cycle);
        clock_pairing.flags = 0;
+       memset(&clock_pairing.pad, 0, sizeof(clock_pairing.pad));
 
        ret = 0;
        if (kvm_write_guest(vcpu->kvm, paddr, &clock_pairing,
@@ -7455,7 +7520,8 @@ static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu)
        else {
                if (vcpu->arch.apicv_active)
                        kvm_x86_ops->sync_pir_to_irr(vcpu);
-               kvm_ioapic_scan_entry(vcpu, vcpu->arch.ioapic_handled_vectors);
+               if (ioapic_in_kernel(vcpu->kvm))
+                       kvm_ioapic_scan_entry(vcpu, vcpu->arch.ioapic_handled_vectors);
        }
 
        if (is_guest_mode(vcpu))
@@ -7994,9 +8060,9 @@ static int complete_emulated_mmio(struct kvm_vcpu *vcpu)
 static void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
 {
        preempt_disable();
-       copy_fpregs_to_fpstate(&vcpu->arch.user_fpu);
+       copy_fpregs_to_fpstate(&current->thread.fpu);
        /* PKRU is separately restored in kvm_x86_ops->run.  */
-       __copy_kernel_to_fpregs(&vcpu->arch.guest_fpu.state,
+       __copy_kernel_to_fpregs(&vcpu->arch.guest_fpu->state,
                                ~XFEATURE_MASK_PKRU);
        preempt_enable();
        trace_kvm_fpu(1);
@@ -8006,8 +8072,8 @@ static void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
 static void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
 {
        preempt_disable();
-       copy_fpregs_to_fpstate(&vcpu->arch.guest_fpu);
-       copy_kernel_to_fpregs(&vcpu->arch.user_fpu.state);
+       copy_fpregs_to_fpstate(vcpu->arch.guest_fpu);
+       copy_kernel_to_fpregs(&current->thread.fpu.state);
        preempt_enable();
        ++vcpu->stat.fpu_reload;
        trace_kvm_fpu(0);
@@ -8501,7 +8567,7 @@ int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
 
        vcpu_load(vcpu);
 
-       fxsave = &vcpu->arch.guest_fpu.state.fxsave;
+       fxsave = &vcpu->arch.guest_fpu->state.fxsave;
        memcpy(fpu->fpr, fxsave->st_space, 128);
        fpu->fcw = fxsave->cwd;
        fpu->fsw = fxsave->swd;
@@ -8521,7 +8587,7 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
 
        vcpu_load(vcpu);
 
-       fxsave = &vcpu->arch.guest_fpu.state.fxsave;
+       fxsave = &vcpu->arch.guest_fpu->state.fxsave;
 
        memcpy(fxsave->st_space, fpu->fpr, 128);
        fxsave->cwd = fpu->fcw;
@@ -8577,9 +8643,9 @@ static int sync_regs(struct kvm_vcpu *vcpu)
 
 static void fx_init(struct kvm_vcpu *vcpu)
 {
-       fpstate_init(&vcpu->arch.guest_fpu.state);
+       fpstate_init(&vcpu->arch.guest_fpu->state);
        if (boot_cpu_has(X86_FEATURE_XSAVES))
-               vcpu->arch.guest_fpu.state.xsave.header.xcomp_bv =
+               vcpu->arch.guest_fpu->state.xsave.header.xcomp_bv =
                        host_xcr0 | XSTATE_COMPACTION_ENABLED;
 
        /*
@@ -8617,6 +8683,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
 
 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
 {
+       vcpu->arch.msr_platform_info = MSR_PLATFORM_INFO_CPUID_FAULT;
        kvm_vcpu_mtrr_init(vcpu);
        vcpu_load(vcpu);
        kvm_vcpu_reset(vcpu, false);
@@ -8703,11 +8770,11 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
                 */
                if (init_event)
                        kvm_put_guest_fpu(vcpu);
-               mpx_state_buffer = get_xsave_addr(&vcpu->arch.guest_fpu.state.xsave,
+               mpx_state_buffer = get_xsave_addr(&vcpu->arch.guest_fpu->state.xsave,
                                        XFEATURE_MASK_BNDREGS);
                if (mpx_state_buffer)
                        memset(mpx_state_buffer, 0, sizeof(struct mpx_bndreg_state));
-               mpx_state_buffer = get_xsave_addr(&vcpu->arch.guest_fpu.state.xsave,
+               mpx_state_buffer = get_xsave_addr(&vcpu->arch.guest_fpu->state.xsave,
                                        XFEATURE_MASK_BNDCSR);
                if (mpx_state_buffer)
                        memset(mpx_state_buffer, 0, sizeof(struct mpx_bndcsr));
@@ -8719,7 +8786,6 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
                kvm_pmu_reset(vcpu);
                vcpu->arch.smbase = 0x30000;
 
-               vcpu->arch.msr_platform_info = MSR_PLATFORM_INFO_CPUID_FAULT;
                vcpu->arch.msr_misc_features_enables = 0;
 
                vcpu->arch.xcr0 = XFEATURE_MASK_FP;