]> asedeno.scripts.mit.edu Git - linux.git/commitdiff
Merge branch 'sev-v9-p2' of https://github.com/codomania/kvm
authorPaolo Bonzini <pbonzini@redhat.com>
Tue, 16 Jan 2018 15:34:48 +0000 (16:34 +0100)
committerRadim Krčmář <rkrcmar@redhat.com>
Tue, 16 Jan 2018 15:35:32 +0000 (16:35 +0100)
This part of Secure Encrypted Virtualization (SEV) patch series focuses on KVM
changes required to create and manage SEV guests.

SEV is an extension to the AMD-V architecture which supports running encrypted
virtual machine (VMs) under the control of a hypervisor. Encrypted VMs have their
pages (code and data) secured such that only the guest itself has access to
unencrypted version. Each encrypted VM is associated with a unique encryption key;
if its data is accessed to a different entity using a different key the encrypted
guest's data will be incorrectly decrypted, leading to unintelligible data.
This security model ensures that hypervisor will no longer able to inspect or
alter any guest code or data.

The key management of this feature is handled by a separate processor known as
the AMD Secure Processor (AMD-SP) which is present on AMD SOCs. The SEV Key
Management Specification (see below) provides a set of commands which can be
used by hypervisor to load virtual machine keys through the AMD-SP driver.

The patch series adds a new ioctl in KVM driver (KVM_MEMORY_ENCRYPT_OP). The
ioctl will be used by qemu to issue SEV guest-specific commands defined in Key
Management Specification.

The following links provide additional details:

AMD Memory Encryption white paper:
http://amd-dev.wpengine.netdna-cdn.com/wordpress/media/2013/12/AMD_Memory_Encryption_Whitepaper_v7-Public.pdf

AMD64 Architecture Programmer's Manual:
    http://support.amd.com/TechDocs/24593.pdf
    SME is section 7.10
    SEV is section 15.34

SEV Key Management:
http://support.amd.com/TechDocs/55766_SEV-KM API_Specification.pdf

KVM Forum Presentation:
http://www.linux-kvm.org/images/7/74/02x08A-Thomas_Lendacky-AMDs_Virtualizatoin_Memory_Encryption_Technology.pdf

SEV Guest BIOS support:
  SEV support has been add to EDKII/OVMF BIOS
  https://github.com/tianocore/edk2

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
1  2 
Documentation/virtual/kvm/api.txt
arch/x86/include/asm/cpufeatures.h
arch/x86/include/asm/kvm_host.h
arch/x86/kernel/cpu/amd.c
arch/x86/kvm/cpuid.c
arch/x86/kvm/mmu.c
arch/x86/kvm/svm.c
arch/x86/kvm/x86.c
include/uapi/linux/kvm.h

index 57d3ee9e4bde2a799715ca75871fd61b27858b0a,c2ced6a44bbb452eaa95021acba191e354ab2d75..e5f1743e0b3eb4955357941766bb002da9ca2ba8
@@@ -2901,19 -2901,14 +2901,19 @@@ userspace buffer and its length
  
  struct kvm_s390_irq_state {
        __u64 buf;
 -      __u32 flags;
 +      __u32 flags;        /* will stay unused for compatibility reasons */
        __u32 len;
 -      __u32 reserved[4];
 +      __u32 reserved[4];  /* will stay unused for compatibility reasons */
  };
  
  Userspace passes in the above struct and for each pending interrupt a
  struct kvm_s390_irq is copied to the provided buffer.
  
 +The structure contains a flags and a reserved field for future extensions. As
 +the kernel never checked for flags == 0 and QEMU never pre-zeroed flags and
 +reserved, these fields can not be used in the future without breaking
 +compatibility.
 +
  If -ENOBUFS is returned the buffer provided was too small and userspace
  may retry with a bigger buffer.
  
@@@ -2937,14 -2932,10 +2937,14 @@@ containing a struct kvm_s390_irq_state
  
  struct kvm_s390_irq_state {
        __u64 buf;
 +      __u32 flags;        /* will stay unused for compatibility reasons */
        __u32 len;
 -      __u32 pad;
 +      __u32 reserved[4];  /* will stay unused for compatibility reasons */
  };
  
 +The restrictions for flags and reserved apply as well.
 +(see KVM_S390_GET_IRQ_STATE)
 +
  The userspace memory referenced by buf contains a struct kvm_s390_irq
  for each interrupt to be injected into the guest.
  If one of the interrupts could not be injected for some reason the
@@@ -3403,6 -3394,56 +3403,56 @@@ invalid, if invalid pages are written t
  or if no page table is present for the addresses (e.g. when using
  hugepages).
  
+ 4.109 KVM_MEMORY_ENCRYPT_OP
+ Capability: basic
+ Architectures: x86
+ Type: system
+ Parameters: an opaque platform specific structure (in/out)
+ Returns: 0 on success; -1 on error
+ If the platform supports creating encrypted VMs then this ioctl can be used
+ for issuing platform-specific memory encryption commands to manage those
+ encrypted VMs.
+ Currently, this ioctl is used for issuing Secure Encrypted Virtualization
+ (SEV) commands on AMD Processors. The SEV commands are defined in
+ Documentation/virtual/kvm/amd-memory-encryption.txt.
+ 4.110 KVM_MEMORY_ENCRYPT_REG_REGION
+ Capability: basic
+ Architectures: x86
+ Type: system
+ Parameters: struct kvm_enc_region (in)
+ Returns: 0 on success; -1 on error
+ This ioctl can be used to register a guest memory region which may
+ contain encrypted data (e.g. guest RAM, SMRAM etc).
+ It is used in the SEV-enabled guest. When encryption is enabled, a guest
+ memory region may contain encrypted data. The SEV memory encryption
+ engine uses a tweak such that two identical plaintext pages, each at
+ different locations will have differing ciphertexts. So swapping or
+ moving ciphertext of those pages will not result in plaintext being
+ swapped. So relocating (or migrating) physical backing pages for the SEV
+ guest will require some additional steps.
+ Note: The current SEV key management spec does not provide commands to
+ swap or migrate (move) ciphertext pages. Hence, for now we pin the guest
+ memory region registered with the ioctl.
+ 4.111 KVM_MEMORY_ENCRYPT_UNREG_REGION
+ Capability: basic
+ Architectures: x86
+ Type: system
+ Parameters: struct kvm_enc_region (in)
+ Returns: 0 on success; -1 on error
+ This ioctl can be used to unregister the guest memory region registered
+ with KVM_MEMORY_ENCRYPT_REG_REGION ioctl above.
  5. The kvm_run structure
  ------------------------
  
index 800104c8a3edfee7f4f52a33b8451a51ee0ed90a,19b955adacff28da2b2261d4338efcf690dd0115..19f35be95f168dc2cec30e07127520e14efe8ac5
  #define X86_FEATURE_HW_PSTATE         ( 7*32+ 8) /* AMD HW-PState */
  #define X86_FEATURE_PROC_FEEDBACK     ( 7*32+ 9) /* AMD ProcFeedbackInterface */
  #define X86_FEATURE_SME                       ( 7*32+10) /* AMD Secure Memory Encryption */
+ #define X86_FEATURE_SEV                       ( 7*32+11) /* AMD Secure Encrypted Virtualization */
  
  #define X86_FEATURE_INTEL_PPIN                ( 7*32+14) /* Intel Processor Inventory Number */
  #define X86_FEATURE_INTEL_PT          ( 7*32+15) /* Intel Processor Trace */
  /* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */
  #define X86_FEATURE_CLZERO            (13*32+ 0) /* CLZERO instruction */
  #define X86_FEATURE_IRPERF            (13*32+ 1) /* Instructions Retired Count */
 +#define X86_FEATURE_XSAVEERPTR                (13*32+ 2) /* Always save/restore FP error pointers */
  
  /* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */
  #define X86_FEATURE_DTHERM            (14*32+ 0) /* Digital Thermal Sensor */
index 44de261e9223da71f036122166b9f1708a1a264f,262950f9f2d95e22d53bc06a6cea2e54e387c21e..ea7e40e9c1f0f48ed1f15d0238a2ff6c025c7c09
@@@ -86,7 -86,7 +86,7 @@@
                          | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR | X86_CR4_PCIDE \
                          | X86_CR4_OSXSAVE | X86_CR4_SMEP | X86_CR4_FSGSBASE \
                          | X86_CR4_OSXMMEXCPT | X86_CR4_LA57 | X86_CR4_VMXE \
 -                        | X86_CR4_SMAP | X86_CR4_PKE))
 +                        | X86_CR4_SMAP | X86_CR4_PKE | X86_CR4_UMIP))
  
  #define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR)
  
@@@ -504,7 -504,6 +504,7 @@@ struct kvm_vcpu_arch 
        int mp_state;
        u64 ia32_misc_enable_msr;
        u64 smbase;
 +      u64 smi_count;
        bool tpr_access_reporting;
        u64 ia32_xss;
  
        struct kvm_mmu_memory_cache mmu_page_cache;
        struct kvm_mmu_memory_cache mmu_page_header_cache;
  
 +      /*
 +       * QEMU userspace and the guest each have their own FPU state.
 +       * In vcpu_run, we switch between the user and guest FPU contexts.
 +       * While running a VCPU, the VCPU thread will have the guest FPU
 +       * context.
 +       *
 +       * Note that while the PKRU state lives inside the fpu registers,
 +       * it is switched out separately at VMENTER and VMEXIT time. The
 +       * "guest_fpu" state here contains the guest FPU context, with the
 +       * host PRKU bits.
 +       */
 +      struct fpu user_fpu;
        struct fpu guest_fpu;
 +
        u64 xcr0;
        u64 guest_supported_xcr0;
        u32 guest_xstate_size;
@@@ -761,6 -747,15 +761,15 @@@ enum kvm_irqchip_mode 
        KVM_IRQCHIP_SPLIT,        /* created with KVM_CAP_SPLIT_IRQCHIP */
  };
  
+ struct kvm_sev_info {
+       bool active;            /* SEV enabled guest */
+       unsigned int asid;      /* ASID used for this guest */
+       unsigned int handle;    /* SEV firmware handle */
+       int fd;                 /* SEV device fd */
+       unsigned long pages_locked; /* Number of pages locked */
+       struct list_head regions_list;  /* List of registered regions */
+ };
  struct kvm_arch {
        unsigned int n_used_mmu_pages;
        unsigned int n_requested_mmu_pages;
  
        bool x2apic_format;
        bool x2apic_broadcast_quirk_disabled;
+       struct kvm_sev_info sev_info;
  };
  
  struct kvm_vm_stat {
@@@ -966,7 -963,7 +977,7 @@@ struct kvm_x86_ops 
        unsigned long (*get_rflags)(struct kvm_vcpu *vcpu);
        void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags);
  
 -      void (*tlb_flush)(struct kvm_vcpu *vcpu);
 +      void (*tlb_flush)(struct kvm_vcpu *vcpu, bool invalidate_gpa);
  
        void (*run)(struct kvm_vcpu *vcpu);
        int (*handle_exit)(struct kvm_vcpu *vcpu);
        void (*handle_external_intr)(struct kvm_vcpu *vcpu);
        bool (*mpx_supported)(void);
        bool (*xsaves_supported)(void);
 +      bool (*umip_emulated)(void);
  
        int (*check_nested_events)(struct kvm_vcpu *vcpu, bool external_intr);
  
        int (*pre_enter_smm)(struct kvm_vcpu *vcpu, char *smstate);
        int (*pre_leave_smm)(struct kvm_vcpu *vcpu, u64 smbase);
        int (*enable_smi_window)(struct kvm_vcpu *vcpu);
+       int (*mem_enc_op)(struct kvm *kvm, void __user *argp);
+       int (*mem_enc_reg_region)(struct kvm *kvm, struct kvm_enc_region *argp);
+       int (*mem_enc_unreg_region)(struct kvm *kvm, struct kvm_enc_region *argp);
  };
  
  struct kvm_arch_async_pf {
@@@ -1176,8 -1176,7 +1191,8 @@@ int x86_emulate_instruction(struct kvm_
  static inline int emulate_instruction(struct kvm_vcpu *vcpu,
                        int emulation_type)
  {
 -      return x86_emulate_instruction(vcpu, 0, emulation_type, NULL, 0);
 +      return x86_emulate_instruction(vcpu, 0,
 +                      emulation_type | EMULTYPE_NO_REEXECUTE, NULL, 0);
  }
  
  void kvm_enable_efer_bits(u64);
@@@ -1450,7 -1449,4 +1465,7 @@@ static inline int kvm_cpu_get_apicid(in
  #define put_smstate(type, buf, offset, val)                      \
        *(type *)((buf) + (offset) - 0x7e00) = val
  
 +void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
 +              unsigned long start, unsigned long end);
 +
  #endif /* _ASM_X86_KVM_HOST_H */
index bcb75dc97d44075d2eecb3137b91f934072352b0,c1234aa0550ceb1995f6f8b010240dcc951babd9..df8a2418aadfcf7f40c19742de45bfc278ad0dbd
@@@ -556,6 -556,51 +556,51 @@@ static void bsp_init_amd(struct cpuinfo
        }
  }
  
+ static void early_detect_mem_encrypt(struct cpuinfo_x86 *c)
+ {
+       u64 msr;
+       /*
+        * BIOS support is required for SME and SEV.
+        *   For SME: If BIOS has enabled SME then adjust x86_phys_bits by
+        *            the SME physical address space reduction value.
+        *            If BIOS has not enabled SME then don't advertise the
+        *            SME feature (set in scattered.c).
+        *   For SEV: If BIOS has not enabled SEV then don't advertise the
+        *            SEV feature (set in scattered.c).
+        *
+        *   In all cases, since support for SME and SEV requires long mode,
+        *   don't advertise the feature under CONFIG_X86_32.
+        */
+       if (cpu_has(c, X86_FEATURE_SME) || cpu_has(c, X86_FEATURE_SEV)) {
+               /* Check if memory encryption is enabled */
+               rdmsrl(MSR_K8_SYSCFG, msr);
+               if (!(msr & MSR_K8_SYSCFG_MEM_ENCRYPT))
+                       goto clear_all;
+               /*
+                * Always adjust physical address bits. Even though this
+                * will be a value above 32-bits this is still done for
+                * CONFIG_X86_32 so that accurate values are reported.
+                */
+               c->x86_phys_bits -= (cpuid_ebx(0x8000001f) >> 6) & 0x3f;
+               if (IS_ENABLED(CONFIG_X86_32))
+                       goto clear_all;
+               rdmsrl(MSR_K7_HWCR, msr);
+               if (!(msr & MSR_K7_HWCR_SMMLOCK))
+                       goto clear_sev;
+               return;
+ clear_all:
+               clear_cpu_cap(c, X86_FEATURE_SME);
+ clear_sev:
+               clear_cpu_cap(c, X86_FEATURE_SEV);
+       }
+ }
  static void early_init_amd(struct cpuinfo_x86 *c)
  {
        u32 dummy;
        if (cpu_has_amd_erratum(c, amd_erratum_400))
                set_cpu_bug(c, X86_BUG_AMD_E400);
  
-       /*
-        * BIOS support is required for SME. If BIOS has enabled SME then
-        * adjust x86_phys_bits by the SME physical address space reduction
-        * value. If BIOS has not enabled SME then don't advertise the
-        * feature (set in scattered.c). Also, since the SME support requires
-        * long mode, don't advertise the feature under CONFIG_X86_32.
-        */
-       if (cpu_has(c, X86_FEATURE_SME)) {
-               u64 msr;
-               /* Check if SME is enabled */
-               rdmsrl(MSR_K8_SYSCFG, msr);
-               if (msr & MSR_K8_SYSCFG_MEM_ENCRYPT) {
-                       c->x86_phys_bits -= (cpuid_ebx(0x8000001f) >> 6) & 0x3f;
-                       if (IS_ENABLED(CONFIG_X86_32))
-                               clear_cpu_cap(c, X86_FEATURE_SME);
-               } else {
-                       clear_cpu_cap(c, X86_FEATURE_SME);
-               }
-       }
+       early_detect_mem_encrypt(c);
  }
  
  static void init_amd_k8(struct cpuinfo_x86 *c)
@@@ -804,11 -830,8 +830,11 @@@ static void init_amd(struct cpuinfo_x8
        case 0x17: init_amd_zn(c); break;
        }
  
 -      /* Enable workaround for FXSAVE leak */
 -      if (c->x86 >= 6)
 +      /*
 +       * Enable workaround for FXSAVE leak on CPUs
 +       * without a XSaveErPtr feature
 +       */
 +      if ((c->x86 >= 6) && (!cpu_has(c, X86_FEATURE_XSAVEERPTR)))
                set_cpu_bug(c, X86_BUG_FXSAVE_LEAK);
  
        cpu_detect_cache_sizes(c);
diff --combined arch/x86/kvm/cpuid.c
index a0e6c975f3a8bb4a8cc7b8ec21b8b5045add5010,c6473ca825cdd875dbfe19b5197549098ad40e5d..ac0041c2f5afe7faa9cb6cb86937a3e06277a097
@@@ -293,18 -293,13 +293,18 @@@ static int __do_cpuid_ent_emulated(stru
  {
        switch (func) {
        case 0:
 -              entry->eax = 1;         /* only one leaf currently */
 +              entry->eax = 7;
                ++*nent;
                break;
        case 1:
                entry->ecx = F(MOVBE);
                ++*nent;
                break;
 +      case 7:
 +              entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
 +              if (index == 0)
 +                      entry->ecx = F(RDPID);
 +              ++*nent;
        default:
                break;
        }
@@@ -332,7 -327,6 +332,7 @@@ static inline int __do_cpuid_ent(struc
        unsigned f_invpcid = kvm_x86_ops->invpcid_supported() ? F(INVPCID) : 0;
        unsigned f_mpx = kvm_mpx_supported() ? F(MPX) : 0;
        unsigned f_xsaves = kvm_x86_ops->xsaves_supported() ? F(XSAVES) : 0;
 +      unsigned f_umip = kvm_x86_ops->umip_emulated() ? F(UMIP) : 0;
  
        /* cpuid 1.edx */
        const u32 kvm_cpuid_1_edx_x86_features =
  
        /* cpuid 7.0.ecx*/
        const u32 kvm_cpuid_7_0_ecx_x86_features =
 -              F(AVX512VBMI) | F(LA57) | F(PKU) |
 -              0 /*OSPKE*/ | F(AVX512_VPOPCNTDQ);
 +              F(AVX512VBMI) | F(LA57) | F(PKU) | 0 /*OSPKE*/ |
 +              F(AVX512_VPOPCNTDQ) | F(UMIP) | F(AVX512_VBMI2) | F(GFNI) |
 +              F(VAES) | F(VPCLMULQDQ) | F(AVX512_VNNI) | F(AVX512_BITALG);
  
        /* cpuid 7.0.edx*/
        const u32 kvm_cpuid_7_0_edx_x86_features =
                        entry->ebx |= F(TSC_ADJUST);
                        entry->ecx &= kvm_cpuid_7_0_ecx_x86_features;
                        cpuid_mask(&entry->ecx, CPUID_7_ECX);
 +                      entry->ecx |= f_umip;
                        /* PKU is not yet implemented for shadow paging. */
                        if (!tdp_enabled || !boot_cpu_has(X86_FEATURE_OSPKE))
                                entry->ecx &= ~F(PKU);
                             (1 << KVM_FEATURE_ASYNC_PF) |
                             (1 << KVM_FEATURE_PV_EOI) |
                             (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT) |
 -                           (1 << KVM_FEATURE_PV_UNHALT);
 +                           (1 << KVM_FEATURE_PV_UNHALT) |
 +                           (1 << KVM_FEATURE_PV_TLB_FLUSH);
  
                if (sched_info_on())
                        entry->eax |= (1 << KVM_FEATURE_STEAL_TIME);
                entry->edx = 0;
                break;
        case 0x80000000:
-               entry->eax = min(entry->eax, 0x8000001a);
+               entry->eax = min(entry->eax, 0x8000001f);
                break;
        case 0x80000001:
                entry->edx &= kvm_cpuid_8000_0001_edx_x86_features;
diff --combined arch/x86/kvm/mmu.c
index 89da688784fa72bb0b669d4db506773ececcacf4,d5e5dbd0e5ad9d2a22a5eeb9455f4108e9375e9c..ff1e9ee259cf7cd0e9151d21b8292bd45545e380
@@@ -381,7 -381,7 +381,7 @@@ void kvm_mmu_set_mask_ptes(u64 user_mas
  }
  EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes);
  
 -void kvm_mmu_clear_all_pte_masks(void)
 +static void kvm_mmu_clear_all_pte_masks(void)
  {
        shadow_user_mask = 0;
        shadow_accessed_mask = 0;
@@@ -3395,7 -3395,7 +3395,7 @@@ static int mmu_alloc_direct_roots(struc
                spin_lock(&vcpu->kvm->mmu_lock);
                if(make_mmu_pages_available(vcpu) < 0) {
                        spin_unlock(&vcpu->kvm->mmu_lock);
 -                      return 1;
 +                      return -ENOSPC;
                }
                sp = kvm_mmu_get_page(vcpu, 0, 0,
                                vcpu->arch.mmu.shadow_root_level, 1, ACC_ALL);
                        spin_lock(&vcpu->kvm->mmu_lock);
                        if (make_mmu_pages_available(vcpu) < 0) {
                                spin_unlock(&vcpu->kvm->mmu_lock);
 -                              return 1;
 +                              return -ENOSPC;
                        }
                        sp = kvm_mmu_get_page(vcpu, i << (30 - PAGE_SHIFT),
                                        i << 30, PT32_ROOT_LEVEL, 1, ACC_ALL);
@@@ -3450,7 -3450,7 +3450,7 @@@ static int mmu_alloc_shadow_roots(struc
                spin_lock(&vcpu->kvm->mmu_lock);
                if (make_mmu_pages_available(vcpu) < 0) {
                        spin_unlock(&vcpu->kvm->mmu_lock);
 -                      return 1;
 +                      return -ENOSPC;
                }
                sp = kvm_mmu_get_page(vcpu, root_gfn, 0,
                                vcpu->arch.mmu.shadow_root_level, 0, ACC_ALL);
                spin_lock(&vcpu->kvm->mmu_lock);
                if (make_mmu_pages_available(vcpu) < 0) {
                        spin_unlock(&vcpu->kvm->mmu_lock);
 -                      return 1;
 +                      return -ENOSPC;
                }
                sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30, PT32_ROOT_LEVEL,
                                      0, ACC_ALL);
@@@ -4950,6 -4950,16 +4950,16 @@@ int kvm_mmu_page_fault(struct kvm_vcpu 
        if (mmio_info_in_cache(vcpu, cr2, direct))
                emulation_type = 0;
  emulate:
+       /*
+        * On AMD platforms, under certain conditions insn_len may be zero on #NPF.
+        * This can happen if a guest gets a page-fault on data access but the HW
+        * table walker is not able to read the instruction page (e.g instruction
+        * page is not present in memory). In those cases we simply restart the
+        * guest.
+        */
+       if (unlikely(insn && !insn_len))
+               return 1;
        er = x86_emulate_instruction(vcpu, cr2, emulation_type, insn, insn_len);
  
        switch (er) {
diff --combined arch/x86/kvm/svm.c
index 14cca8c601a912a6155511f775f397a3c84d6ddc,ec5df575299525fe6ce587e42daae714248e3fd7..5d83f0474020c5f669273179c2f191fccb405ee7
  #include <linux/amd-iommu.h>
  #include <linux/hashtable.h>
  #include <linux/frame.h>
+ #include <linux/psp-sev.h>
+ #include <linux/file.h>
+ #include <linux/pagemap.h>
+ #include <linux/swap.h>
  
  #include <asm/apic.h>
  #include <asm/perf_event.h>
@@@ -211,6 -215,9 +215,9 @@@ struct vcpu_svm 
         */
        struct list_head ir_list;
        spinlock_t ir_list_lock;
+       /* which host CPU was used for running this vcpu */
+       unsigned int last_cpu;
  };
  
  /*
@@@ -284,8 -291,12 +291,12 @@@ module_param(vls, int, 0444)
  static int vgif = true;
  module_param(vgif, int, 0444);
  
+ /* enable/disable SEV support */
+ static int sev = IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT);
+ module_param(sev, int, 0444);
  static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0);
 -static void svm_flush_tlb(struct kvm_vcpu *vcpu);
 +static void svm_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa);
  static void svm_complete_interrupts(struct vcpu_svm *svm);
  
  static int nested_svm_exit_handled(struct vcpu_svm *svm);
@@@ -319,6 -330,38 +330,38 @@@ enum 
  
  #define VMCB_AVIC_APIC_BAR_MASK               0xFFFFFFFFFF000ULL
  
+ static unsigned int max_sev_asid;
+ static unsigned int min_sev_asid;
+ static unsigned long *sev_asid_bitmap;
+ #define __sme_page_pa(x) __sme_set(page_to_pfn(x) << PAGE_SHIFT)
+ struct enc_region {
+       struct list_head list;
+       unsigned long npages;
+       struct page **pages;
+       unsigned long uaddr;
+       unsigned long size;
+ };
+ static inline bool svm_sev_enabled(void)
+ {
+       return max_sev_asid;
+ }
+ static inline bool sev_guest(struct kvm *kvm)
+ {
+       struct kvm_sev_info *sev = &kvm->arch.sev_info;
+       return sev->active;
+ }
+ static inline int sev_get_asid(struct kvm *kvm)
+ {
+       struct kvm_sev_info *sev = &kvm->arch.sev_info;
+       return sev->asid;
+ }
  static inline void mark_all_dirty(struct vmcb *vmcb)
  {
        vmcb->control.clean = 0;
@@@ -361,7 -404,6 +404,7 @@@ static void recalc_intercepts(struct vc
  {
        struct vmcb_control_area *c, *h;
        struct nested_state *g;
 +      u32 h_intercept_exceptions;
  
        mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
  
        h = &svm->nested.hsave->control;
        g = &svm->nested;
  
 +      /* No need to intercept #UD if L1 doesn't intercept it */
 +      h_intercept_exceptions =
 +              h->intercept_exceptions & ~(1U << UD_VECTOR);
 +
        c->intercept_cr = h->intercept_cr | g->intercept_cr;
        c->intercept_dr = h->intercept_dr | g->intercept_dr;
 -      c->intercept_exceptions = h->intercept_exceptions | g->intercept_exceptions;
 +      c->intercept_exceptions =
 +              h_intercept_exceptions | g->intercept_exceptions;
        c->intercept = h->intercept | g->intercept;
  }
  
@@@ -531,9 -568,13 +574,13 @@@ struct svm_cpu_data 
        u64 asid_generation;
        u32 max_asid;
        u32 next_asid;
+       u32 min_asid;
        struct kvm_ldttss_desc *tss_desc;
  
        struct page *save_area;
+       /* index = sev_asid, value = vmcb pointer */
+       struct vmcb **sev_vmcbs;
  };
  
  static DEFINE_PER_CPU(struct svm_cpu_data *, svm_data);
@@@ -788,6 -829,7 +835,7 @@@ static int svm_hardware_enable(void
        sd->asid_generation = 1;
        sd->max_asid = cpuid_ebx(SVM_CPUID_FUNC) - 1;
        sd->next_asid = sd->max_asid + 1;
+       sd->min_asid = max_sev_asid + 1;
  
        gdt = get_current_gdt_rw();
        sd->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS);
@@@ -846,6 -888,7 +894,7 @@@ static void svm_cpu_uninit(int cpu
                return;
  
        per_cpu(svm_data, raw_smp_processor_id()) = NULL;
+       kfree(sd->sev_vmcbs);
        __free_page(sd->save_area);
        kfree(sd);
  }
@@@ -859,11 -902,18 +908,18 @@@ static int svm_cpu_init(int cpu
        if (!sd)
                return -ENOMEM;
        sd->cpu = cpu;
-       sd->save_area = alloc_page(GFP_KERNEL);
        r = -ENOMEM;
+       sd->save_area = alloc_page(GFP_KERNEL);
        if (!sd->save_area)
                goto err_1;
  
+       if (svm_sev_enabled()) {
+               r = -ENOMEM;
+               sd->sev_vmcbs = kmalloc((max_sev_asid + 1) * sizeof(void *), GFP_KERNEL);
+               if (!sd->sev_vmcbs)
+                       goto err_1;
+       }
        per_cpu(svm_data, cpu) = sd;
  
        return 0;
@@@ -1051,6 -1101,48 +1107,48 @@@ static int avic_ga_log_notifier(u32 ga_
        return 0;
  }
  
+ static __init int sev_hardware_setup(void)
+ {
+       struct sev_user_data_status *status;
+       int rc;
+       /* Maximum number of encrypted guests supported simultaneously */
+       max_sev_asid = cpuid_ecx(0x8000001F);
+       if (!max_sev_asid)
+               return 1;
+       /* Minimum ASID value that should be used for SEV guest */
+       min_sev_asid = cpuid_edx(0x8000001F);
+       /* Initialize SEV ASID bitmap */
+       sev_asid_bitmap = kcalloc(BITS_TO_LONGS(max_sev_asid),
+                               sizeof(unsigned long), GFP_KERNEL);
+       if (!sev_asid_bitmap)
+               return 1;
+       status = kmalloc(sizeof(*status), GFP_KERNEL);
+       if (!status)
+               return 1;
+       /*
+        * Check SEV platform status.
+        *
+        * PLATFORM_STATUS can be called in any state, if we failed to query
+        * the PLATFORM status then either PSP firmware does not support SEV
+        * feature or SEV firmware is dead.
+        */
+       rc = sev_platform_status(status, NULL);
+       if (rc)
+               goto err;
+       pr_info("SEV supported\n");
+ err:
+       kfree(status);
+       return rc;
+ }
  static __init int svm_hardware_setup(void)
  {
        int cpu;
                kvm_enable_efer_bits(EFER_SVME | EFER_LMSLE);
        }
  
+       if (sev) {
+               if (boot_cpu_has(X86_FEATURE_SEV) &&
+                   IS_ENABLED(CONFIG_KVM_AMD_SEV)) {
+                       r = sev_hardware_setup();
+                       if (r)
+                               sev = false;
+               } else {
+                       sev = false;
+               }
+       }
        for_each_possible_cpu(cpu) {
                r = svm_cpu_init(cpu);
                if (r)
@@@ -1147,6 -1250,9 +1256,9 @@@ static __exit void svm_hardware_unsetup
  {
        int cpu;
  
+       if (svm_sev_enabled())
+               kfree(sev_asid_bitmap);
        for_each_possible_cpu(cpu)
                svm_cpu_uninit(cpu);
  
@@@ -1299,7 -1405,7 +1411,7 @@@ static void init_vmcb(struct vcpu_svm *
  
        if (npt_enabled) {
                /* Setup VMCB for Nested Paging */
-               control->nested_ctl = 1;
+               control->nested_ctl |= SVM_NESTED_CTL_NP_ENABLE;
                clr_intercept(svm, INTERCEPT_INVLPG);
                clr_exception_intercept(svm, PF_VECTOR);
                clr_cr_intercept(svm, INTERCEPT_CR3_READ);
                svm->vmcb->control.int_ctl |= V_GIF_ENABLE_MASK;
        }
  
+       if (sev_guest(svm->vcpu.kvm)) {
+               svm->vmcb->control.nested_ctl |= SVM_NESTED_CTL_SEV_ENABLE;
+               clr_exception_intercept(svm, UD_VECTOR);
+       }
        mark_all_dirty(svm->vmcb);
  
        enable_gif(svm);
@@@ -1419,6 -1530,179 +1536,179 @@@ static int avic_init_backing_page(struc
        return 0;
  }
  
+ static void __sev_asid_free(int asid)
+ {
+       struct svm_cpu_data *sd;
+       int cpu, pos;
+       pos = asid - 1;
+       clear_bit(pos, sev_asid_bitmap);
+       for_each_possible_cpu(cpu) {
+               sd = per_cpu(svm_data, cpu);
+               sd->sev_vmcbs[pos] = NULL;
+       }
+ }
+ static void sev_asid_free(struct kvm *kvm)
+ {
+       struct kvm_sev_info *sev = &kvm->arch.sev_info;
+       __sev_asid_free(sev->asid);
+ }
+ static void sev_unbind_asid(struct kvm *kvm, unsigned int handle)
+ {
+       struct sev_data_decommission *decommission;
+       struct sev_data_deactivate *data;
+       if (!handle)
+               return;
+       data = kzalloc(sizeof(*data), GFP_KERNEL);
+       if (!data)
+               return;
+       /* deactivate handle */
+       data->handle = handle;
+       sev_guest_deactivate(data, NULL);
+       wbinvd_on_all_cpus();
+       sev_guest_df_flush(NULL);
+       kfree(data);
+       decommission = kzalloc(sizeof(*decommission), GFP_KERNEL);
+       if (!decommission)
+               return;
+       /* decommission handle */
+       decommission->handle = handle;
+       sev_guest_decommission(decommission, NULL);
+       kfree(decommission);
+ }
+ static struct page **sev_pin_memory(struct kvm *kvm, unsigned long uaddr,
+                                   unsigned long ulen, unsigned long *n,
+                                   int write)
+ {
+       struct kvm_sev_info *sev = &kvm->arch.sev_info;
+       unsigned long npages, npinned, size;
+       unsigned long locked, lock_limit;
+       struct page **pages;
+       int first, last;
+       /* Calculate number of pages. */
+       first = (uaddr & PAGE_MASK) >> PAGE_SHIFT;
+       last = ((uaddr + ulen - 1) & PAGE_MASK) >> PAGE_SHIFT;
+       npages = (last - first + 1);
+       locked = sev->pages_locked + npages;
+       lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
+       if (locked > lock_limit && !capable(CAP_IPC_LOCK)) {
+               pr_err("SEV: %lu locked pages exceed the lock limit of %lu.\n", locked, lock_limit);
+               return NULL;
+       }
+       /* Avoid using vmalloc for smaller buffers. */
+       size = npages * sizeof(struct page *);
+       if (size > PAGE_SIZE)
+               pages = vmalloc(size);
+       else
+               pages = kmalloc(size, GFP_KERNEL);
+       if (!pages)
+               return NULL;
+       /* Pin the user virtual address. */
+       npinned = get_user_pages_fast(uaddr, npages, write ? FOLL_WRITE : 0, pages);
+       if (npinned != npages) {
+               pr_err("SEV: Failure locking %lu pages.\n", npages);
+               goto err;
+       }
+       *n = npages;
+       sev->pages_locked = locked;
+       return pages;
+ err:
+       if (npinned > 0)
+               release_pages(pages, npinned);
+       kvfree(pages);
+       return NULL;
+ }
+ static void sev_unpin_memory(struct kvm *kvm, struct page **pages,
+                            unsigned long npages)
+ {
+       struct kvm_sev_info *sev = &kvm->arch.sev_info;
+       release_pages(pages, npages);
+       kvfree(pages);
+       sev->pages_locked -= npages;
+ }
+ static void sev_clflush_pages(struct page *pages[], unsigned long npages)
+ {
+       uint8_t *page_virtual;
+       unsigned long i;
+       if (npages == 0 || pages == NULL)
+               return;
+       for (i = 0; i < npages; i++) {
+               page_virtual = kmap_atomic(pages[i]);
+               clflush_cache_range(page_virtual, PAGE_SIZE);
+               kunmap_atomic(page_virtual);
+       }
+ }
+ static void __unregister_enc_region_locked(struct kvm *kvm,
+                                          struct enc_region *region)
+ {
+       /*
+        * The guest may change the memory encryption attribute from C=0 -> C=1
+        * or vice versa for this memory range. Lets make sure caches are
+        * flushed to ensure that guest data gets written into memory with
+        * correct C-bit.
+        */
+       sev_clflush_pages(region->pages, region->npages);
+       sev_unpin_memory(kvm, region->pages, region->npages);
+       list_del(&region->list);
+       kfree(region);
+ }
+ static void sev_vm_destroy(struct kvm *kvm)
+ {
+       struct kvm_sev_info *sev = &kvm->arch.sev_info;
+       struct list_head *head = &sev->regions_list;
+       struct list_head *pos, *q;
+       if (!sev_guest(kvm))
+               return;
+       mutex_lock(&kvm->lock);
+       /*
+        * if userspace was terminated before unregistering the memory regions
+        * then lets unpin all the registered memory.
+        */
+       if (!list_empty(head)) {
+               list_for_each_safe(pos, q, head) {
+                       __unregister_enc_region_locked(kvm,
+                               list_entry(pos, struct enc_region, list));
+               }
+       }
+       mutex_unlock(&kvm->lock);
+       sev_unbind_asid(kvm, sev->handle);
+       sev_asid_free(kvm);
+ }
  static void avic_vm_destroy(struct kvm *kvm)
  {
        unsigned long flags;
        spin_unlock_irqrestore(&svm_vm_data_hash_lock, flags);
  }
  
+ static void svm_vm_destroy(struct kvm *kvm)
+ {
+       avic_vm_destroy(kvm);
+       sev_vm_destroy(kvm);
+ }
  static int avic_vm_init(struct kvm *kvm)
  {
        unsigned long flags;
@@@ -2035,7 -2325,7 +2331,7 @@@ static int svm_set_cr4(struct kvm_vcpu 
                return 1;
  
        if (npt_enabled && ((old_cr4 ^ cr4) & X86_CR4_PGE))
 -              svm_flush_tlb(vcpu);
 +              svm_flush_tlb(vcpu, true);
  
        vcpu->arch.cr4 = cr4;
        if (!npt_enabled)
@@@ -2094,7 -2384,7 +2390,7 @@@ static void new_asid(struct vcpu_svm *s
  {
        if (sd->next_asid > sd->max_asid) {
                ++sd->asid_generation;
-               sd->next_asid = 1;
+               sd->next_asid = sd->min_asid;
                svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ALL_ASID;
        }
  
@@@ -2142,22 -2432,24 +2438,24 @@@ static void svm_set_dr7(struct kvm_vcp
  
  static int pf_interception(struct vcpu_svm *svm)
  {
-       u64 fault_address = svm->vmcb->control.exit_info_2;
+       u64 fault_address = __sme_clr(svm->vmcb->control.exit_info_2);
        u64 error_code = svm->vmcb->control.exit_info_1;
  
        return kvm_handle_page_fault(&svm->vcpu, error_code, fault_address,
-                       svm->vmcb->control.insn_bytes,
+                       static_cpu_has(X86_FEATURE_DECODEASSISTS) ?
+                       svm->vmcb->control.insn_bytes : NULL,
                        svm->vmcb->control.insn_len);
  }
  
  static int npf_interception(struct vcpu_svm *svm)
  {
-       u64 fault_address = svm->vmcb->control.exit_info_2;
+       u64 fault_address = __sme_clr(svm->vmcb->control.exit_info_2);
        u64 error_code = svm->vmcb->control.exit_info_1;
  
        trace_kvm_page_fault(fault_address, error_code);
        return kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code,
-                       svm->vmcb->control.insn_bytes,
+                       static_cpu_has(X86_FEATURE_DECODEASSISTS) ?
+                       svm->vmcb->control.insn_bytes : NULL,
                        svm->vmcb->control.insn_len);
  }
  
@@@ -2202,10 -2494,7 +2500,10 @@@ static int ud_interception(struct vcpu_
  {
        int er;
  
 +      WARN_ON_ONCE(is_guest_mode(&svm->vcpu));
        er = emulate_instruction(&svm->vcpu, EMULTYPE_TRAP_UD);
 +      if (er == EMULATE_USER_EXIT)
 +              return 0;
        if (er != EMULATE_DONE)
                kvm_queue_exception(&svm->vcpu, UD_VECTOR);
        return 1;
@@@ -2385,7 -2674,7 +2683,7 @@@ static void nested_svm_set_tdp_cr3(stru
  
        svm->vmcb->control.nested_cr3 = __sme_set(root);
        mark_dirty(svm->vmcb, VMCB_NPT);
 -      svm_flush_tlb(vcpu);
 +      svm_flush_tlb(vcpu, true);
  }
  
  static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu,
@@@ -2927,7 -3216,8 +3225,8 @@@ static bool nested_vmcb_checks(struct v
        if (vmcb->control.asid == 0)
                return false;
  
-       if (vmcb->control.nested_ctl && !npt_enabled)
+       if ((vmcb->control.nested_ctl & SVM_NESTED_CTL_NP_ENABLE) &&
+           !npt_enabled)
                return false;
  
        return true;
@@@ -2941,7 -3231,7 +3240,7 @@@ static void enter_svm_guest_mode(struc
        else
                svm->vcpu.arch.hflags &= ~HF_HIF_MASK;
  
-       if (nested_vmcb->control.nested_ctl) {
+       if (nested_vmcb->control.nested_ctl & SVM_NESTED_CTL_NP_ENABLE) {
                kvm_mmu_unload(&svm->vcpu);
                svm->nested.nested_cr3 = nested_vmcb->control.nested_cr3;
                nested_svm_init_mmu_context(&svm->vcpu);
        svm->nested.intercept_exceptions = nested_vmcb->control.intercept_exceptions;
        svm->nested.intercept            = nested_vmcb->control.intercept;
  
 -      svm_flush_tlb(&svm->vcpu);
 +      svm_flush_tlb(&svm->vcpu, true);
        svm->vmcb->control.int_ctl = nested_vmcb->control.int_ctl | V_INTR_MASKING_MASK;
        if (nested_vmcb->control.int_ctl & V_INTR_MASKING_MASK)
                svm->vcpu.arch.hflags |= HF_VINTR_MASK;
@@@ -4362,12 -4652,39 +4661,39 @@@ static void reload_tss(struct kvm_vcpu 
        load_TR_desc();
  }
  
+ static void pre_sev_run(struct vcpu_svm *svm, int cpu)
+ {
+       struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
+       int asid = sev_get_asid(svm->vcpu.kvm);
+       /* Assign the asid allocated with this SEV guest */
+       svm->vmcb->control.asid = asid;
+       /*
+        * Flush guest TLB:
+        *
+        * 1) when different VMCB for the same ASID is to be run on the same host CPU.
+        * 2) or this VMCB was executed on different host CPU in previous VMRUNs.
+        */
+       if (sd->sev_vmcbs[asid] == svm->vmcb &&
+           svm->last_cpu == cpu)
+               return;
+       svm->last_cpu = cpu;
+       sd->sev_vmcbs[asid] = svm->vmcb;
+       svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ASID;
+       mark_dirty(svm->vmcb, VMCB_ASID);
+ }
  static void pre_svm_run(struct vcpu_svm *svm)
  {
        int cpu = raw_smp_processor_id();
  
        struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
  
+       if (sev_guest(svm->vcpu.kvm))
+               return pre_sev_run(svm, cpu);
        /* FIXME: handle wraparound of asid_generation */
        if (svm->asid_generation != sd->asid_generation)
                new_asid(svm, sd);
@@@ -4785,7 -5102,7 +5111,7 @@@ static int svm_set_tss_addr(struct kvm 
        return 0;
  }
  
 -static void svm_flush_tlb(struct kvm_vcpu *vcpu)
 +static void svm_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa)
  {
        struct vcpu_svm *svm = to_svm(vcpu);
  
@@@ -5076,7 -5393,7 +5402,7 @@@ static void svm_set_cr3(struct kvm_vcp
  
        svm->vmcb->save.cr3 = __sme_set(root);
        mark_dirty(svm->vmcb, VMCB_CR);
 -      svm_flush_tlb(vcpu);
 +      svm_flush_tlb(vcpu, true);
  }
  
  static void set_tdp_cr3(struct kvm_vcpu *vcpu, unsigned long root)
        svm->vmcb->save.cr3 = kvm_read_cr3(vcpu);
        mark_dirty(svm->vmcb, VMCB_CR);
  
 -      svm_flush_tlb(vcpu);
 +      svm_flush_tlb(vcpu, true);
  }
  
  static int is_disabled(void)
@@@ -5176,6 -5493,12 +5502,12 @@@ static void svm_set_supported_cpuid(u3
                        entry->edx |= SVM_FEATURE_NPT;
  
                break;
+       case 0x8000001F:
+               /* Support memory encryption cpuid if host supports it */
+               if (boot_cpu_has(X86_FEATURE_SEV))
+                       cpuid(0x8000001f, &entry->eax, &entry->ebx,
+                               &entry->ecx, &entry->edx);
        }
  }
  
@@@ -5204,11 -5527,6 +5536,11 @@@ static bool svm_xsaves_supported(void
        return false;
  }
  
 +static bool svm_umip_emulated(void)
 +{
 +      return false;
 +}
 +
  static bool svm_has_wbinvd_exit(void)
  {
        return true;
@@@ -5510,6 -5828,828 +5842,828 @@@ static int enable_smi_window(struct kvm
        return 0;
  }
  
+ static int sev_asid_new(void)
+ {
+       int pos;
+       /*
+        * SEV-enabled guest must use asid from min_sev_asid to max_sev_asid.
+        */
+       pos = find_next_zero_bit(sev_asid_bitmap, max_sev_asid, min_sev_asid - 1);
+       if (pos >= max_sev_asid)
+               return -EBUSY;
+       set_bit(pos, sev_asid_bitmap);
+       return pos + 1;
+ }
+ static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp)
+ {
+       struct kvm_sev_info *sev = &kvm->arch.sev_info;
+       int asid, ret;
+       ret = -EBUSY;
+       asid = sev_asid_new();
+       if (asid < 0)
+               return ret;
+       ret = sev_platform_init(&argp->error);
+       if (ret)
+               goto e_free;
+       sev->active = true;
+       sev->asid = asid;
+       INIT_LIST_HEAD(&sev->regions_list);
+       return 0;
+ e_free:
+       __sev_asid_free(asid);
+       return ret;
+ }
+ static int sev_bind_asid(struct kvm *kvm, unsigned int handle, int *error)
+ {
+       struct sev_data_activate *data;
+       int asid = sev_get_asid(kvm);
+       int ret;
+       wbinvd_on_all_cpus();
+       ret = sev_guest_df_flush(error);
+       if (ret)
+               return ret;
+       data = kzalloc(sizeof(*data), GFP_KERNEL);
+       if (!data)
+               return -ENOMEM;
+       /* activate ASID on the given handle */
+       data->handle = handle;
+       data->asid   = asid;
+       ret = sev_guest_activate(data, error);
+       kfree(data);
+       return ret;
+ }
+ static int __sev_issue_cmd(int fd, int id, void *data, int *error)
+ {
+       struct fd f;
+       int ret;
+       f = fdget(fd);
+       if (!f.file)
+               return -EBADF;
+       ret = sev_issue_cmd_external_user(f.file, id, data, error);
+       fdput(f);
+       return ret;
+ }
+ static int sev_issue_cmd(struct kvm *kvm, int id, void *data, int *error)
+ {
+       struct kvm_sev_info *sev = &kvm->arch.sev_info;
+       return __sev_issue_cmd(sev->fd, id, data, error);
+ }
+ static int sev_launch_start(struct kvm *kvm, struct kvm_sev_cmd *argp)
+ {
+       struct kvm_sev_info *sev = &kvm->arch.sev_info;
+       struct sev_data_launch_start *start;
+       struct kvm_sev_launch_start params;
+       void *dh_blob, *session_blob;
+       int *error = &argp->error;
+       int ret;
+       if (!sev_guest(kvm))
+               return -ENOTTY;
+       if (copy_from_user(&params, (void __user *)(uintptr_t)argp->data, sizeof(params)))
+               return -EFAULT;
+       start = kzalloc(sizeof(*start), GFP_KERNEL);
+       if (!start)
+               return -ENOMEM;
+       dh_blob = NULL;
+       if (params.dh_uaddr) {
+               dh_blob = psp_copy_user_blob(params.dh_uaddr, params.dh_len);
+               if (IS_ERR(dh_blob)) {
+                       ret = PTR_ERR(dh_blob);
+                       goto e_free;
+               }
+               start->dh_cert_address = __sme_set(__pa(dh_blob));
+               start->dh_cert_len = params.dh_len;
+       }
+       session_blob = NULL;
+       if (params.session_uaddr) {
+               session_blob = psp_copy_user_blob(params.session_uaddr, params.session_len);
+               if (IS_ERR(session_blob)) {
+                       ret = PTR_ERR(session_blob);
+                       goto e_free_dh;
+               }
+               start->session_address = __sme_set(__pa(session_blob));
+               start->session_len = params.session_len;
+       }
+       start->handle = params.handle;
+       start->policy = params.policy;
+       /* create memory encryption context */
+       ret = __sev_issue_cmd(argp->sev_fd, SEV_CMD_LAUNCH_START, start, error);
+       if (ret)
+               goto e_free_session;
+       /* Bind ASID to this guest */
+       ret = sev_bind_asid(kvm, start->handle, error);
+       if (ret)
+               goto e_free_session;
+       /* return handle to userspace */
+       params.handle = start->handle;
+       if (copy_to_user((void __user *)(uintptr_t)argp->data, &params, sizeof(params))) {
+               sev_unbind_asid(kvm, start->handle);
+               ret = -EFAULT;
+               goto e_free_session;
+       }
+       sev->handle = start->handle;
+       sev->fd = argp->sev_fd;
+ e_free_session:
+       kfree(session_blob);
+ e_free_dh:
+       kfree(dh_blob);
+ e_free:
+       kfree(start);
+       return ret;
+ }
+ static int get_num_contig_pages(int idx, struct page **inpages,
+                               unsigned long npages)
+ {
+       unsigned long paddr, next_paddr;
+       int i = idx + 1, pages = 1;
+       /* find the number of contiguous pages starting from idx */
+       paddr = __sme_page_pa(inpages[idx]);
+       while (i < npages) {
+               next_paddr = __sme_page_pa(inpages[i++]);
+               if ((paddr + PAGE_SIZE) == next_paddr) {
+                       pages++;
+                       paddr = next_paddr;
+                       continue;
+               }
+               break;
+       }
+       return pages;
+ }
+ static int sev_launch_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)
+ {
+       unsigned long vaddr, vaddr_end, next_vaddr, npages, size;
+       struct kvm_sev_info *sev = &kvm->arch.sev_info;
+       struct kvm_sev_launch_update_data params;
+       struct sev_data_launch_update_data *data;
+       struct page **inpages;
+       int i, ret, pages;
+       if (!sev_guest(kvm))
+               return -ENOTTY;
+       if (copy_from_user(&params, (void __user *)(uintptr_t)argp->data, sizeof(params)))
+               return -EFAULT;
+       data = kzalloc(sizeof(*data), GFP_KERNEL);
+       if (!data)
+               return -ENOMEM;
+       vaddr = params.uaddr;
+       size = params.len;
+       vaddr_end = vaddr + size;
+       /* Lock the user memory. */
+       inpages = sev_pin_memory(kvm, vaddr, size, &npages, 1);
+       if (!inpages) {
+               ret = -ENOMEM;
+               goto e_free;
+       }
+       /*
+        * The LAUNCH_UPDATE command will perform in-place encryption of the
+        * memory content (i.e it will write the same memory region with C=1).
+        * It's possible that the cache may contain the data with C=0, i.e.,
+        * unencrypted so invalidate it first.
+        */
+       sev_clflush_pages(inpages, npages);
+       for (i = 0; vaddr < vaddr_end; vaddr = next_vaddr, i += pages) {
+               int offset, len;
+               /*
+                * If the user buffer is not page-aligned, calculate the offset
+                * within the page.
+                */
+               offset = vaddr & (PAGE_SIZE - 1);
+               /* Calculate the number of pages that can be encrypted in one go. */
+               pages = get_num_contig_pages(i, inpages, npages);
+               len = min_t(size_t, ((pages * PAGE_SIZE) - offset), size);
+               data->handle = sev->handle;
+               data->len = len;
+               data->address = __sme_page_pa(inpages[i]) + offset;
+               ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_DATA, data, &argp->error);
+               if (ret)
+                       goto e_unpin;
+               size -= len;
+               next_vaddr = vaddr + len;
+       }
+ e_unpin:
+       /* content of memory is updated, mark pages dirty */
+       for (i = 0; i < npages; i++) {
+               set_page_dirty_lock(inpages[i]);
+               mark_page_accessed(inpages[i]);
+       }
+       /* unlock the user pages */
+       sev_unpin_memory(kvm, inpages, npages);
+ e_free:
+       kfree(data);
+       return ret;
+ }
+ static int sev_launch_measure(struct kvm *kvm, struct kvm_sev_cmd *argp)
+ {
+       struct kvm_sev_info *sev = &kvm->arch.sev_info;
+       struct sev_data_launch_measure *data;
+       struct kvm_sev_launch_measure params;
+       void *blob = NULL;
+       int ret;
+       if (!sev_guest(kvm))
+               return -ENOTTY;
+       if (copy_from_user(&params, (void __user *)(uintptr_t)argp->data, sizeof(params)))
+               return -EFAULT;
+       data = kzalloc(sizeof(*data), GFP_KERNEL);
+       if (!data)
+               return -ENOMEM;
+       /* User wants to query the blob length */
+       if (!params.len)
+               goto cmd;
+       if (params.uaddr) {
+               if (params.len > SEV_FW_BLOB_MAX_SIZE) {
+                       ret = -EINVAL;
+                       goto e_free;
+               }
+               if (!access_ok(VERIFY_WRITE, params.uaddr, params.len)) {
+                       ret = -EFAULT;
+                       goto e_free;
+               }
+               ret = -ENOMEM;
+               blob = kmalloc(params.len, GFP_KERNEL);
+               if (!blob)
+                       goto e_free;
+               data->address = __psp_pa(blob);
+               data->len = params.len;
+       }
+ cmd:
+       data->handle = sev->handle;
+       ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_MEASURE, data, &argp->error);
+       /*
+        * If we query the session length, FW responded with expected data.
+        */
+       if (!params.len)
+               goto done;
+       if (ret)
+               goto e_free_blob;
+       if (blob) {
+               if (copy_to_user((void __user *)(uintptr_t)params.uaddr, blob, params.len))
+                       ret = -EFAULT;
+       }
+ done:
+       params.len = data->len;
+       if (copy_to_user((void __user *)(uintptr_t)argp->data, &params, sizeof(params)))
+               ret = -EFAULT;
+ e_free_blob:
+       kfree(blob);
+ e_free:
+       kfree(data);
+       return ret;
+ }
+ static int sev_launch_finish(struct kvm *kvm, struct kvm_sev_cmd *argp)
+ {
+       struct kvm_sev_info *sev = &kvm->arch.sev_info;
+       struct sev_data_launch_finish *data;
+       int ret;
+       if (!sev_guest(kvm))
+               return -ENOTTY;
+       data = kzalloc(sizeof(*data), GFP_KERNEL);
+       if (!data)
+               return -ENOMEM;
+       data->handle = sev->handle;
+       ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_FINISH, data, &argp->error);
+       kfree(data);
+       return ret;
+ }
+ static int sev_guest_status(struct kvm *kvm, struct kvm_sev_cmd *argp)
+ {
+       struct kvm_sev_info *sev = &kvm->arch.sev_info;
+       struct kvm_sev_guest_status params;
+       struct sev_data_guest_status *data;
+       int ret;
+       if (!sev_guest(kvm))
+               return -ENOTTY;
+       data = kzalloc(sizeof(*data), GFP_KERNEL);
+       if (!data)
+               return -ENOMEM;
+       data->handle = sev->handle;
+       ret = sev_issue_cmd(kvm, SEV_CMD_GUEST_STATUS, data, &argp->error);
+       if (ret)
+               goto e_free;
+       params.policy = data->policy;
+       params.state = data->state;
+       params.handle = data->handle;
+       if (copy_to_user((void __user *)(uintptr_t)argp->data, &params, sizeof(params)))
+               ret = -EFAULT;
+ e_free:
+       kfree(data);
+       return ret;
+ }
+ static int __sev_issue_dbg_cmd(struct kvm *kvm, unsigned long src,
+                              unsigned long dst, int size,
+                              int *error, bool enc)
+ {
+       struct kvm_sev_info *sev = &kvm->arch.sev_info;
+       struct sev_data_dbg *data;
+       int ret;
+       data = kzalloc(sizeof(*data), GFP_KERNEL);
+       if (!data)
+               return -ENOMEM;
+       data->handle = sev->handle;
+       data->dst_addr = dst;
+       data->src_addr = src;
+       data->len = size;
+       ret = sev_issue_cmd(kvm,
+                           enc ? SEV_CMD_DBG_ENCRYPT : SEV_CMD_DBG_DECRYPT,
+                           data, error);
+       kfree(data);
+       return ret;
+ }
+ static int __sev_dbg_decrypt(struct kvm *kvm, unsigned long src_paddr,
+                            unsigned long dst_paddr, int sz, int *err)
+ {
+       int offset;
+       /*
+        * Its safe to read more than we are asked, caller should ensure that
+        * destination has enough space.
+        */
+       src_paddr = round_down(src_paddr, 16);
+       offset = src_paddr & 15;
+       sz = round_up(sz + offset, 16);
+       return __sev_issue_dbg_cmd(kvm, src_paddr, dst_paddr, sz, err, false);
+ }
+ static int __sev_dbg_decrypt_user(struct kvm *kvm, unsigned long paddr,
+                                 unsigned long __user dst_uaddr,
+                                 unsigned long dst_paddr,
+                                 int size, int *err)
+ {
+       struct page *tpage = NULL;
+       int ret, offset;
+       /* if inputs are not 16-byte then use intermediate buffer */
+       if (!IS_ALIGNED(dst_paddr, 16) ||
+           !IS_ALIGNED(paddr,     16) ||
+           !IS_ALIGNED(size,      16)) {
+               tpage = (void *)alloc_page(GFP_KERNEL);
+               if (!tpage)
+                       return -ENOMEM;
+               dst_paddr = __sme_page_pa(tpage);
+       }
+       ret = __sev_dbg_decrypt(kvm, paddr, dst_paddr, size, err);
+       if (ret)
+               goto e_free;
+       if (tpage) {
+               offset = paddr & 15;
+               if (copy_to_user((void __user *)(uintptr_t)dst_uaddr,
+                                page_address(tpage) + offset, size))
+                       ret = -EFAULT;
+       }
+ e_free:
+       if (tpage)
+               __free_page(tpage);
+       return ret;
+ }
+ static int __sev_dbg_encrypt_user(struct kvm *kvm, unsigned long paddr,
+                                 unsigned long __user vaddr,
+                                 unsigned long dst_paddr,
+                                 unsigned long __user dst_vaddr,
+                                 int size, int *error)
+ {
+       struct page *src_tpage = NULL;
+       struct page *dst_tpage = NULL;
+       int ret, len = size;
+       /* If source buffer is not aligned then use an intermediate buffer */
+       if (!IS_ALIGNED(vaddr, 16)) {
+               src_tpage = alloc_page(GFP_KERNEL);
+               if (!src_tpage)
+                       return -ENOMEM;
+               if (copy_from_user(page_address(src_tpage),
+                               (void __user *)(uintptr_t)vaddr, size)) {
+                       __free_page(src_tpage);
+                       return -EFAULT;
+               }
+               paddr = __sme_page_pa(src_tpage);
+       }
+       /*
+        *  If destination buffer or length is not aligned then do read-modify-write:
+        *   - decrypt destination in an intermediate buffer
+        *   - copy the source buffer in an intermediate buffer
+        *   - use the intermediate buffer as source buffer
+        */
+       if (!IS_ALIGNED(dst_vaddr, 16) || !IS_ALIGNED(size, 16)) {
+               int dst_offset;
+               dst_tpage = alloc_page(GFP_KERNEL);
+               if (!dst_tpage) {
+                       ret = -ENOMEM;
+                       goto e_free;
+               }
+               ret = __sev_dbg_decrypt(kvm, dst_paddr,
+                                       __sme_page_pa(dst_tpage), size, error);
+               if (ret)
+                       goto e_free;
+               /*
+                *  If source is kernel buffer then use memcpy() otherwise
+                *  copy_from_user().
+                */
+               dst_offset = dst_paddr & 15;
+               if (src_tpage)
+                       memcpy(page_address(dst_tpage) + dst_offset,
+                              page_address(src_tpage), size);
+               else {
+                       if (copy_from_user(page_address(dst_tpage) + dst_offset,
+                                          (void __user *)(uintptr_t)vaddr, size)) {
+                               ret = -EFAULT;
+                               goto e_free;
+                       }
+               }
+               paddr = __sme_page_pa(dst_tpage);
+               dst_paddr = round_down(dst_paddr, 16);
+               len = round_up(size, 16);
+       }
+       ret = __sev_issue_dbg_cmd(kvm, paddr, dst_paddr, len, error, true);
+ e_free:
+       if (src_tpage)
+               __free_page(src_tpage);
+       if (dst_tpage)
+               __free_page(dst_tpage);
+       return ret;
+ }
+ static int sev_dbg_crypt(struct kvm *kvm, struct kvm_sev_cmd *argp, bool dec)
+ {
+       unsigned long vaddr, vaddr_end, next_vaddr;
+       unsigned long dst_vaddr, dst_vaddr_end;
+       struct page **src_p, **dst_p;
+       struct kvm_sev_dbg debug;
+       unsigned long n;
+       int ret, size;
+       if (!sev_guest(kvm))
+               return -ENOTTY;
+       if (copy_from_user(&debug, (void __user *)(uintptr_t)argp->data, sizeof(debug)))
+               return -EFAULT;
+       vaddr = debug.src_uaddr;
+       size = debug.len;
+       vaddr_end = vaddr + size;
+       dst_vaddr = debug.dst_uaddr;
+       dst_vaddr_end = dst_vaddr + size;
+       for (; vaddr < vaddr_end; vaddr = next_vaddr) {
+               int len, s_off, d_off;
+               /* lock userspace source and destination page */
+               src_p = sev_pin_memory(kvm, vaddr & PAGE_MASK, PAGE_SIZE, &n, 0);
+               if (!src_p)
+                       return -EFAULT;
+               dst_p = sev_pin_memory(kvm, dst_vaddr & PAGE_MASK, PAGE_SIZE, &n, 1);
+               if (!dst_p) {
+                       sev_unpin_memory(kvm, src_p, n);
+                       return -EFAULT;
+               }
+               /*
+                * The DBG_{DE,EN}CRYPT commands will perform {dec,en}cryption of the
+                * memory content (i.e it will write the same memory region with C=1).
+                * It's possible that the cache may contain the data with C=0, i.e.,
+                * unencrypted so invalidate it first.
+                */
+               sev_clflush_pages(src_p, 1);
+               sev_clflush_pages(dst_p, 1);
+               /*
+                * Since user buffer may not be page aligned, calculate the
+                * offset within the page.
+                */
+               s_off = vaddr & ~PAGE_MASK;
+               d_off = dst_vaddr & ~PAGE_MASK;
+               len = min_t(size_t, (PAGE_SIZE - s_off), size);
+               if (dec)
+                       ret = __sev_dbg_decrypt_user(kvm,
+                                                    __sme_page_pa(src_p[0]) + s_off,
+                                                    dst_vaddr,
+                                                    __sme_page_pa(dst_p[0]) + d_off,
+                                                    len, &argp->error);
+               else
+                       ret = __sev_dbg_encrypt_user(kvm,
+                                                    __sme_page_pa(src_p[0]) + s_off,
+                                                    vaddr,
+                                                    __sme_page_pa(dst_p[0]) + d_off,
+                                                    dst_vaddr,
+                                                    len, &argp->error);
+               sev_unpin_memory(kvm, src_p, 1);
+               sev_unpin_memory(kvm, dst_p, 1);
+               if (ret)
+                       goto err;
+               next_vaddr = vaddr + len;
+               dst_vaddr = dst_vaddr + len;
+               size -= len;
+       }
+ err:
+       return ret;
+ }
+ static int sev_launch_secret(struct kvm *kvm, struct kvm_sev_cmd *argp)
+ {
+       struct kvm_sev_info *sev = &kvm->arch.sev_info;
+       struct sev_data_launch_secret *data;
+       struct kvm_sev_launch_secret params;
+       struct page **pages;
+       void *blob, *hdr;
+       unsigned long n;
+       int ret;
+       if (!sev_guest(kvm))
+               return -ENOTTY;
+       if (copy_from_user(&params, (void __user *)(uintptr_t)argp->data, sizeof(params)))
+               return -EFAULT;
+       pages = sev_pin_memory(kvm, params.guest_uaddr, params.guest_len, &n, 1);
+       if (!pages)
+               return -ENOMEM;
+       /*
+        * The secret must be copied into contiguous memory region, lets verify
+        * that userspace memory pages are contiguous before we issue command.
+        */
+       if (get_num_contig_pages(0, pages, n) != n) {
+               ret = -EINVAL;
+               goto e_unpin_memory;
+       }
+       ret = -ENOMEM;
+       data = kzalloc(sizeof(*data), GFP_KERNEL);
+       if (!data)
+               goto e_unpin_memory;
+       blob = psp_copy_user_blob(params.trans_uaddr, params.trans_len);
+       if (IS_ERR(blob)) {
+               ret = PTR_ERR(blob);
+               goto e_free;
+       }
+       data->trans_address = __psp_pa(blob);
+       data->trans_len = params.trans_len;
+       hdr = psp_copy_user_blob(params.hdr_uaddr, params.hdr_len);
+       if (IS_ERR(hdr)) {
+               ret = PTR_ERR(hdr);
+               goto e_free_blob;
+       }
+       data->trans_address = __psp_pa(blob);
+       data->trans_len = params.trans_len;
+       data->handle = sev->handle;
+       ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_SECRET, data, &argp->error);
+       kfree(hdr);
+ e_free_blob:
+       kfree(blob);
+ e_free:
+       kfree(data);
+ e_unpin_memory:
+       sev_unpin_memory(kvm, pages, n);
+       return ret;
+ }
+ static int svm_mem_enc_op(struct kvm *kvm, void __user *argp)
+ {
+       struct kvm_sev_cmd sev_cmd;
+       int r;
+       if (!svm_sev_enabled())
+               return -ENOTTY;
+       if (copy_from_user(&sev_cmd, argp, sizeof(struct kvm_sev_cmd)))
+               return -EFAULT;
+       mutex_lock(&kvm->lock);
+       switch (sev_cmd.id) {
+       case KVM_SEV_INIT:
+               r = sev_guest_init(kvm, &sev_cmd);
+               break;
+       case KVM_SEV_LAUNCH_START:
+               r = sev_launch_start(kvm, &sev_cmd);
+               break;
+       case KVM_SEV_LAUNCH_UPDATE_DATA:
+               r = sev_launch_update_data(kvm, &sev_cmd);
+               break;
+       case KVM_SEV_LAUNCH_MEASURE:
+               r = sev_launch_measure(kvm, &sev_cmd);
+               break;
+       case KVM_SEV_LAUNCH_FINISH:
+               r = sev_launch_finish(kvm, &sev_cmd);
+               break;
+       case KVM_SEV_GUEST_STATUS:
+               r = sev_guest_status(kvm, &sev_cmd);
+               break;
+       case KVM_SEV_DBG_DECRYPT:
+               r = sev_dbg_crypt(kvm, &sev_cmd, true);
+               break;
+       case KVM_SEV_DBG_ENCRYPT:
+               r = sev_dbg_crypt(kvm, &sev_cmd, false);
+               break;
+       case KVM_SEV_LAUNCH_SECRET:
+               r = sev_launch_secret(kvm, &sev_cmd);
+               break;
+       default:
+               r = -EINVAL;
+               goto out;
+       }
+       if (copy_to_user(argp, &sev_cmd, sizeof(struct kvm_sev_cmd)))
+               r = -EFAULT;
+ out:
+       mutex_unlock(&kvm->lock);
+       return r;
+ }
+ static int svm_register_enc_region(struct kvm *kvm,
+                                  struct kvm_enc_region *range)
+ {
+       struct kvm_sev_info *sev = &kvm->arch.sev_info;
+       struct enc_region *region;
+       int ret = 0;
+       if (!sev_guest(kvm))
+               return -ENOTTY;
+       region = kzalloc(sizeof(*region), GFP_KERNEL);
+       if (!region)
+               return -ENOMEM;
+       region->pages = sev_pin_memory(kvm, range->addr, range->size, &region->npages, 1);
+       if (!region->pages) {
+               ret = -ENOMEM;
+               goto e_free;
+       }
+       /*
+        * The guest may change the memory encryption attribute from C=0 -> C=1
+        * or vice versa for this memory range. Lets make sure caches are
+        * flushed to ensure that guest data gets written into memory with
+        * correct C-bit.
+        */
+       sev_clflush_pages(region->pages, region->npages);
+       region->uaddr = range->addr;
+       region->size = range->size;
+       mutex_lock(&kvm->lock);
+       list_add_tail(&region->list, &sev->regions_list);
+       mutex_unlock(&kvm->lock);
+       return ret;
+ e_free:
+       kfree(region);
+       return ret;
+ }
+ static struct enc_region *
+ find_enc_region(struct kvm *kvm, struct kvm_enc_region *range)
+ {
+       struct kvm_sev_info *sev = &kvm->arch.sev_info;
+       struct list_head *head = &sev->regions_list;
+       struct enc_region *i;
+       list_for_each_entry(i, head, list) {
+               if (i->uaddr == range->addr &&
+                   i->size == range->size)
+                       return i;
+       }
+       return NULL;
+ }
+ static int svm_unregister_enc_region(struct kvm *kvm,
+                                    struct kvm_enc_region *range)
+ {
+       struct enc_region *region;
+       int ret;
+       mutex_lock(&kvm->lock);
+       if (!sev_guest(kvm)) {
+               ret = -ENOTTY;
+               goto failed;
+       }
+       region = find_enc_region(kvm, range);
+       if (!region) {
+               ret = -EINVAL;
+               goto failed;
+       }
+       __unregister_enc_region_locked(kvm, region);
+       mutex_unlock(&kvm->lock);
+       return 0;
+ failed:
+       mutex_unlock(&kvm->lock);
+       return ret;
+ }
  static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
        .cpu_has_kvm_support = has_svm,
        .disabled_by_bios = is_disabled,
        .vcpu_reset = svm_vcpu_reset,
  
        .vm_init = avic_vm_init,
-       .vm_destroy = avic_vm_destroy,
+       .vm_destroy = svm_vm_destroy,
  
        .prepare_guest_switch = svm_prepare_guest_switch,
        .vcpu_load = svm_vcpu_load,
        .invpcid_supported = svm_invpcid_supported,
        .mpx_supported = svm_mpx_supported,
        .xsaves_supported = svm_xsaves_supported,
 +      .umip_emulated = svm_umip_emulated,
  
        .set_supported_cpuid = svm_set_supported_cpuid,
  
        .pre_enter_smm = svm_pre_enter_smm,
        .pre_leave_smm = svm_pre_leave_smm,
        .enable_smi_window = enable_smi_window,
+       .mem_enc_op = svm_mem_enc_op,
+       .mem_enc_reg_region = svm_register_enc_region,
+       .mem_enc_unreg_region = svm_unregister_enc_region,
  };
  
  static int __init svm_init(void)
diff --combined arch/x86/kvm/x86.c
index 56d8a1e11e50b04d6d61cbf3ba6cf93c58755c9d,926f55cecf2e3ef2e2fd6544cc318ff54ddcb3c4..6d2e1459adc97963410893bbbca0cb64539879b2
@@@ -107,9 -107,6 +107,9 @@@ EXPORT_SYMBOL_GPL(kvm_x86_ops)
  static bool __read_mostly ignore_msrs = 0;
  module_param(ignore_msrs, bool, S_IRUGO | S_IWUSR);
  
 +static bool __read_mostly report_ignored_msrs = true;
 +module_param(report_ignored_msrs, bool, S_IRUGO | S_IWUSR);
 +
  unsigned int min_timer_period_us = 500;
  module_param(min_timer_period_us, uint, S_IRUGO | S_IWUSR);
  
@@@ -702,8 -699,7 +702,8 @@@ static void kvm_load_guest_xcr0(struct 
        if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE) &&
                        !vcpu->guest_xcr0_loaded) {
                /* kvm_set_xcr() also depends on this */
 -              xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.xcr0);
 +              if (vcpu->arch.xcr0 != host_xcr0)
 +                      xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.xcr0);
                vcpu->guest_xcr0_loaded = 1;
        }
  }
@@@ -795,9 -791,6 +795,9 @@@ int kvm_set_cr4(struct kvm_vcpu *vcpu, 
        if (!guest_cpuid_has(vcpu, X86_FEATURE_LA57) && (cr4 & X86_CR4_LA57))
                return 1;
  
 +      if (!guest_cpuid_has(vcpu, X86_FEATURE_UMIP) && (cr4 & X86_CR4_UMIP))
 +              return 1;
 +
        if (is_long_mode(vcpu)) {
                if (!(cr4 & X86_CR4_PAE))
                        return 1;
@@@ -1040,7 -1033,6 +1040,7 @@@ static u32 emulated_msrs[] = 
        MSR_IA32_MCG_CTL,
        MSR_IA32_MCG_EXT_CTL,
        MSR_IA32_SMBASE,
 +      MSR_SMI_COUNT,
        MSR_PLATFORM_INFO,
        MSR_MISC_FEATURES_ENABLES,
  };
@@@ -1803,13 -1795,10 +1803,13 @@@ u64 get_kvmclock_ns(struct kvm *kvm
        /* both __this_cpu_read() and rdtsc() should be on the same cpu */
        get_cpu();
  
 -      kvm_get_time_scale(NSEC_PER_SEC, __this_cpu_read(cpu_tsc_khz) * 1000LL,
 -                         &hv_clock.tsc_shift,
 -                         &hv_clock.tsc_to_system_mul);
 -      ret = __pvclock_read_cycles(&hv_clock, rdtsc());
 +      if (__this_cpu_read(cpu_tsc_khz)) {
 +              kvm_get_time_scale(NSEC_PER_SEC, __this_cpu_read(cpu_tsc_khz) * 1000LL,
 +                                 &hv_clock.tsc_shift,
 +                                 &hv_clock.tsc_to_system_mul);
 +              ret = __pvclock_read_cycles(&hv_clock, rdtsc());
 +      } else
 +              ret = ktime_get_boot_ns() + ka->kvmclock_offset;
  
        put_cpu();
  
@@@ -1841,9 -1830,6 +1841,9 @@@ static void kvm_setup_pvclock_page(stru
         */
        BUILD_BUG_ON(offsetof(struct pvclock_vcpu_time_info, version) != 0);
  
 +      if (guest_hv_clock.version & 1)
 +              ++guest_hv_clock.version;  /* first time write, random junk */
 +
        vcpu->hv_clock.version = guest_hv_clock.version + 1;
        kvm_write_guest_cached(v->kvm, &vcpu->pv_time,
                                &vcpu->hv_clock,
@@@ -2123,12 -2109,6 +2123,12 @@@ static void kvmclock_reset(struct kvm_v
        vcpu->arch.pv_time_enabled = false;
  }
  
 +static void kvm_vcpu_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa)
 +{
 +      ++vcpu->stat.tlb_flush;
 +      kvm_x86_ops->tlb_flush(vcpu, invalidate_gpa);
 +}
 +
  static void record_steal_time(struct kvm_vcpu *vcpu)
  {
        if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
                &vcpu->arch.st.steal, sizeof(struct kvm_steal_time))))
                return;
  
 -      vcpu->arch.st.steal.preempted = 0;
 +      /*
 +       * Doing a TLB flush here, on the guest's behalf, can avoid
 +       * expensive IPIs.
 +       */
 +      if (xchg(&vcpu->arch.st.steal.preempted, 0) & KVM_VCPU_FLUSH_TLB)
 +              kvm_vcpu_flush_tlb(vcpu, false);
  
        if (vcpu->arch.st.steal.version & 1)
                vcpu->arch.st.steal.version += 1;  /* first time write, random junk */
@@@ -2244,11 -2219,6 +2244,11 @@@ int kvm_set_msr_common(struct kvm_vcpu 
                        return 1;
                vcpu->arch.smbase = data;
                break;
 +      case MSR_SMI_COUNT:
 +              if (!msr_info->host_initiated)
 +                      return 1;
 +              vcpu->arch.smi_count = data;
 +              break;
        case MSR_KVM_WALL_CLOCK_NEW:
        case MSR_KVM_WALL_CLOCK:
                vcpu->kvm->arch.wall_clock = data;
                /* Drop writes to this legacy MSR -- see rdmsr
                 * counterpart for further detail.
                 */
 -              vcpu_unimpl(vcpu, "ignored wrmsr: 0x%x data 0x%llx\n", msr, data);
 +              if (report_ignored_msrs)
 +                      vcpu_unimpl(vcpu, "ignored wrmsr: 0x%x data 0x%llx\n",
 +                              msr, data);
                break;
        case MSR_AMD64_OSVW_ID_LENGTH:
                if (!guest_cpuid_has(vcpu, X86_FEATURE_OSVW))
                                    msr, data);
                        return 1;
                } else {
 -                      vcpu_unimpl(vcpu, "ignored wrmsr: 0x%x data 0x%llx\n",
 -                                  msr, data);
 +                      if (report_ignored_msrs)
 +                              vcpu_unimpl(vcpu,
 +                                      "ignored wrmsr: 0x%x data 0x%llx\n",
 +                                      msr, data);
                        break;
                }
        }
@@@ -2523,9 -2489,6 +2523,9 @@@ int kvm_get_msr_common(struct kvm_vcpu 
                        return 1;
                msr_info->data = vcpu->arch.smbase;
                break;
 +      case MSR_SMI_COUNT:
 +              msr_info->data = vcpu->arch.smi_count;
 +              break;
        case MSR_IA32_PERF_STATUS:
                /* TSC increment by tick */
                msr_info->data = 1000ULL;
                                               msr_info->index);
                        return 1;
                } else {
 -                      vcpu_unimpl(vcpu, "ignored rdmsr: 0x%x\n", msr_info->index);
 +                      if (report_ignored_msrs)
 +                              vcpu_unimpl(vcpu, "ignored rdmsr: 0x%x\n",
 +                                      msr_info->index);
                        msr_info->data = 0;
                }
                break;
@@@ -2928,7 -2889,7 +2928,7 @@@ static void kvm_steal_time_set_preempte
        if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
                return;
  
 -      vcpu->arch.st.steal.preempted = 1;
 +      vcpu->arch.st.steal.preempted = KVM_VCPU_PREEMPTED;
  
        kvm_write_guest_offset_cached(vcpu->kvm, &vcpu->arch.st.stime,
                        &vcpu->arch.st.steal.preempted,
@@@ -2961,13 -2922,8 +2961,13 @@@ void kvm_arch_vcpu_put(struct kvm_vcpu 
        srcu_read_unlock(&vcpu->kvm->srcu, idx);
        pagefault_enable();
        kvm_x86_ops->vcpu_put(vcpu);
 -      kvm_put_guest_fpu(vcpu);
        vcpu->arch.last_host_tsc = rdtsc();
 +      /*
 +       * If userspace has set any breakpoints or watchpoints, dr6 is restored
 +       * on every vmexit, but if not, we might have a stale dr6 from the
 +       * guest. do_debug expects dr6 to be cleared after it runs, do the same.
 +       */
 +      set_debugreg(0, 6);
  }
  
  static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,
@@@ -3502,8 -3458,6 +3502,8 @@@ long kvm_arch_vcpu_ioctl(struct file *f
                void *buffer;
        } u;
  
 +      vcpu_load(vcpu);
 +
        u.buffer = NULL;
        switch (ioctl) {
        case KVM_GET_LAPIC: {
                if (!lapic_in_kernel(vcpu))
                        goto out;
                u.lapic = memdup_user(argp, sizeof(*u.lapic));
 -              if (IS_ERR(u.lapic))
 -                      return PTR_ERR(u.lapic);
 +              if (IS_ERR(u.lapic)) {
 +                      r = PTR_ERR(u.lapic);
 +                      goto out_nofree;
 +              }
  
                r = kvm_vcpu_ioctl_set_lapic(vcpu, u.lapic);
                break;
        }
        case KVM_SET_XSAVE: {
                u.xsave = memdup_user(argp, sizeof(*u.xsave));
 -              if (IS_ERR(u.xsave))
 -                      return PTR_ERR(u.xsave);
 +              if (IS_ERR(u.xsave)) {
 +                      r = PTR_ERR(u.xsave);
 +                      goto out_nofree;
 +              }
  
                r = kvm_vcpu_ioctl_x86_set_xsave(vcpu, u.xsave);
                break;
        }
        case KVM_SET_XCRS: {
                u.xcrs = memdup_user(argp, sizeof(*u.xcrs));
 -              if (IS_ERR(u.xcrs))
 -                      return PTR_ERR(u.xcrs);
 +              if (IS_ERR(u.xcrs)) {
 +                      r = PTR_ERR(u.xcrs);
 +                      goto out_nofree;
 +              }
  
                r = kvm_vcpu_ioctl_x86_set_xcrs(vcpu, u.xcrs);
                break;
        }
  out:
        kfree(u.buffer);
 +out_nofree:
 +      vcpu_put(vcpu);
        return r;
  }
  
@@@ -4335,6 -4281,36 +4335,36 @@@ set_identity_unlock
                r = kvm_vm_ioctl_enable_cap(kvm, &cap);
                break;
        }
+       case KVM_MEMORY_ENCRYPT_OP: {
+               r = -ENOTTY;
+               if (kvm_x86_ops->mem_enc_op)
+                       r = kvm_x86_ops->mem_enc_op(kvm, argp);
+               break;
+       }
+       case KVM_MEMORY_ENCRYPT_REG_REGION: {
+               struct kvm_enc_region region;
+               r = -EFAULT;
+               if (copy_from_user(&region, argp, sizeof(region)))
+                       goto out;
+               r = -ENOTTY;
+               if (kvm_x86_ops->mem_enc_reg_region)
+                       r = kvm_x86_ops->mem_enc_reg_region(kvm, &region);
+               break;
+       }
+       case KVM_MEMORY_ENCRYPT_UNREG_REGION: {
+               struct kvm_enc_region region;
+               r = -EFAULT;
+               if (copy_from_user(&region, argp, sizeof(region)))
+                       goto out;
+               r = -ENOTTY;
+               if (kvm_x86_ops->mem_enc_unreg_region)
+                       r = kvm_x86_ops->mem_enc_unreg_region(kvm, &region);
+               break;
+       }
        default:
                r = -ENOTTY;
        }
@@@ -5291,6 -5267,17 +5321,6 @@@ static void emulator_halt(struct x86_em
        emul_to_vcpu(ctxt)->arch.halt_request = 1;
  }
  
 -static void emulator_get_fpu(struct x86_emulate_ctxt *ctxt)
 -{
 -      preempt_disable();
 -      kvm_load_guest_fpu(emul_to_vcpu(ctxt));
 -}
 -
 -static void emulator_put_fpu(struct x86_emulate_ctxt *ctxt)
 -{
 -      preempt_enable();
 -}
 -
  static int emulator_intercept(struct x86_emulate_ctxt *ctxt,
                              struct x86_instruction_info *info,
                              enum x86_intercept_stage stage)
@@@ -5368,6 -5355,8 +5398,6 @@@ static const struct x86_emulate_ops emu
        .halt                = emulator_halt,
        .wbinvd              = emulator_wbinvd,
        .fix_hypercall       = emulator_fix_hypercall,
 -      .get_fpu             = emulator_get_fpu,
 -      .put_fpu             = emulator_put_fpu,
        .intercept           = emulator_intercept,
        .get_cpuid           = emulator_get_cpuid,
        .set_nmi_mask        = emulator_set_nmi_mask,
@@@ -5471,7 -5460,7 +5501,7 @@@ static int handle_emulation_failure(str
                vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
                vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
                vcpu->run->internal.ndata = 0;
 -              r = EMULATE_FAIL;
 +              r = EMULATE_USER_EXIT;
        }
        kvm_queue_exception(vcpu, UD_VECTOR);
  
@@@ -5763,8 -5752,6 +5793,8 @@@ int x86_emulate_instruction(struct kvm_
                        if (reexecute_instruction(vcpu, cr2, write_fault_to_spt,
                                                emulation_type))
                                return EMULATE_DONE;
 +                      if (ctxt->have_exception && inject_emulated_exception(vcpu))
 +                              return EMULATE_DONE;
                        if (emulation_type & EMULTYPE_SKIP)
                                return EMULATE_FAIL;
                        return handle_emulation_failure(vcpu);
@@@ -6488,7 -6475,6 +6518,7 @@@ static int inject_pending_event(struct 
                kvm_x86_ops->queue_exception(vcpu);
        } else if (vcpu->arch.smi_pending && !is_smm(vcpu) && kvm_x86_ops->smi_allowed(vcpu)) {
                vcpu->arch.smi_pending = false;
 +              ++vcpu->arch.smi_count;
                enter_smm(vcpu);
        } else if (vcpu->arch.nmi_pending && kvm_x86_ops->nmi_allowed(vcpu)) {
                --vcpu->arch.nmi_pending;
@@@ -6799,18 -6785,10 +6829,18 @@@ static void vcpu_scan_ioapic(struct kvm
        kvm_x86_ops->load_eoi_exitmap(vcpu, eoi_exit_bitmap);
  }
  
 -static void kvm_vcpu_flush_tlb(struct kvm_vcpu *vcpu)
 +void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
 +              unsigned long start, unsigned long end)
  {
 -      ++vcpu->stat.tlb_flush;
 -      kvm_x86_ops->tlb_flush(vcpu);
 +      unsigned long apic_address;
 +
 +      /*
 +       * The physical address of apic access page is stored in the VMCS.
 +       * Update it when it becomes invalid.
 +       */
 +      apic_address = gfn_to_hva(kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT);
 +      if (start <= apic_address && apic_address < end)
 +              kvm_make_all_cpus_request(kvm, KVM_REQ_APIC_PAGE_RELOAD);
  }
  
  void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu)
@@@ -6867,7 -6845,7 +6897,7 @@@ static int vcpu_enter_guest(struct kvm_
                if (kvm_check_request(KVM_REQ_MMU_SYNC, vcpu))
                        kvm_mmu_sync_roots(vcpu);
                if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu))
 -                      kvm_vcpu_flush_tlb(vcpu);
 +                      kvm_vcpu_flush_tlb(vcpu, true);
                if (kvm_check_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu)) {
                        vcpu->run->exit_reason = KVM_EXIT_TPR_ACCESS;
                        r = 0;
        preempt_disable();
  
        kvm_x86_ops->prepare_guest_switch(vcpu);
 -      kvm_load_guest_fpu(vcpu);
  
        /*
         * Disable IRQs before setting IN_GUEST_MODE.  Posted interrupt
        }
  
        trace_kvm_entry(vcpu->vcpu_id);
 -      wait_lapic_expire(vcpu);
 +      if (lapic_timer_advance_ns)
 +              wait_lapic_expire(vcpu);
        guest_enter_irqoff();
  
        if (unlikely(vcpu->arch.switch_db_regs)) {
@@@ -7300,11 -7278,14 +7330,11 @@@ static int complete_emulated_mmio(struc
  
  int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
  {
 -      struct fpu *fpu = &current->thread.fpu;
        int r;
 -      sigset_t sigsaved;
 -
 -      fpu__initialize(fpu);
  
 -      if (vcpu->sigset_active)
 -              sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
 +      vcpu_load(vcpu);
 +      kvm_sigset_activate(vcpu);
 +      kvm_load_guest_fpu(vcpu);
  
        if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) {
                if (kvm_run->immediate_exit) {
                r = vcpu_run(vcpu);
  
  out:
 +      kvm_put_guest_fpu(vcpu);
        post_kvm_run_save(vcpu);
 -      if (vcpu->sigset_active)
 -              sigprocmask(SIG_SETMASK, &sigsaved, NULL);
 +      kvm_sigset_deactivate(vcpu);
  
 +      vcpu_put(vcpu);
        return r;
  }
  
  int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
  {
 +      vcpu_load(vcpu);
 +
        if (vcpu->arch.emulate_regs_need_sync_to_vcpu) {
                /*
                 * We are here if userspace calls get_regs() in the middle of
        regs->rip = kvm_rip_read(vcpu);
        regs->rflags = kvm_get_rflags(vcpu);
  
 +      vcpu_put(vcpu);
        return 0;
  }
  
  int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
  {
 +      vcpu_load(vcpu);
 +
        vcpu->arch.emulate_regs_need_sync_from_vcpu = true;
        vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
  
  #endif
  
        kvm_rip_write(vcpu, regs->rip);
 -      kvm_set_rflags(vcpu, regs->rflags);
 +      kvm_set_rflags(vcpu, regs->rflags | X86_EFLAGS_FIXED);
  
        vcpu->arch.exception.pending = false;
  
        kvm_make_request(KVM_REQ_EVENT, vcpu);
  
 +      vcpu_put(vcpu);
        return 0;
  }
  
@@@ -7447,8 -7421,6 +7477,8 @@@ int kvm_arch_vcpu_ioctl_get_sregs(struc
  {
        struct desc_ptr dt;
  
 +      vcpu_load(vcpu);
 +
        kvm_get_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
        kvm_get_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
        kvm_get_segment(vcpu, &sregs->es, VCPU_SREG_ES);
                set_bit(vcpu->arch.interrupt.nr,
                        (unsigned long *)sregs->interrupt_bitmap);
  
 +      vcpu_put(vcpu);
        return 0;
  }
  
  int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
                                    struct kvm_mp_state *mp_state)
  {
 +      vcpu_load(vcpu);
 +
        kvm_apic_accept_events(vcpu);
        if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED &&
                                        vcpu->arch.pv.pv_unhalted)
        else
                mp_state->mp_state = vcpu->arch.mp_state;
  
 +      vcpu_put(vcpu);
        return 0;
  }
  
  int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
                                    struct kvm_mp_state *mp_state)
  {
 +      int ret = -EINVAL;
 +
 +      vcpu_load(vcpu);
 +
        if (!lapic_in_kernel(vcpu) &&
            mp_state->mp_state != KVM_MP_STATE_RUNNABLE)
 -              return -EINVAL;
 +              goto out;
  
        /* INITs are latched while in SMM */
        if ((is_smm(vcpu) || vcpu->arch.smi_pending) &&
            (mp_state->mp_state == KVM_MP_STATE_SIPI_RECEIVED ||
             mp_state->mp_state == KVM_MP_STATE_INIT_RECEIVED))
 -              return -EINVAL;
 +              goto out;
  
        if (mp_state->mp_state == KVM_MP_STATE_SIPI_RECEIVED) {
                vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED;
        } else
                vcpu->arch.mp_state = mp_state->mp_state;
        kvm_make_request(KVM_REQ_EVENT, vcpu);
 -      return 0;
 +
 +      ret = 0;
 +out:
 +      vcpu_put(vcpu);
 +      return ret;
  }
  
  int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index,
@@@ -7558,18 -7518,15 +7588,18 @@@ int kvm_arch_vcpu_ioctl_set_sregs(struc
        int mmu_reset_needed = 0;
        int pending_vec, max_bits, idx;
        struct desc_ptr dt;
 +      int ret = -EINVAL;
 +
 +      vcpu_load(vcpu);
  
        if (!guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) &&
                        (sregs->cr4 & X86_CR4_OSXSAVE))
 -              return -EINVAL;
 +              goto out;
  
        apic_base_msr.data = sregs->apic_base;
        apic_base_msr.host_initiated = true;
        if (kvm_set_apic_base(vcpu, &apic_base_msr))
 -              return -EINVAL;
 +              goto out;
  
        dt.size = sregs->idt.limit;
        dt.address = sregs->idt.base;
  
        kvm_make_request(KVM_REQ_EVENT, vcpu);
  
 -      return 0;
 +      ret = 0;
 +out:
 +      vcpu_put(vcpu);
 +      return ret;
  }
  
  int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
        unsigned long rflags;
        int i, r;
  
 +      vcpu_load(vcpu);
 +
        if (dbg->control & (KVM_GUESTDBG_INJECT_DB | KVM_GUESTDBG_INJECT_BP)) {
                r = -EBUSY;
                if (vcpu->arch.exception.pending)
        r = 0;
  
  out:
 -
 +      vcpu_put(vcpu);
        return r;
  }
  
@@@ -7708,8 -7660,6 +7738,8 @@@ int kvm_arch_vcpu_ioctl_translate(struc
        gpa_t gpa;
        int idx;
  
 +      vcpu_load(vcpu);
 +
        idx = srcu_read_lock(&vcpu->kvm->srcu);
        gpa = kvm_mmu_gva_to_gpa_system(vcpu, vaddr, NULL);
        srcu_read_unlock(&vcpu->kvm->srcu, idx);
        tr->writeable = 1;
        tr->usermode = 0;
  
 +      vcpu_put(vcpu);
        return 0;
  }
  
  int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
  {
 -      struct fxregs_state *fxsave =
 -                      &vcpu->arch.guest_fpu.state.fxsave;
 +      struct fxregs_state *fxsave;
 +
 +      vcpu_load(vcpu);
  
 +      fxsave = &vcpu->arch.guest_fpu.state.fxsave;
        memcpy(fpu->fpr, fxsave->st_space, 128);
        fpu->fcw = fxsave->cwd;
        fpu->fsw = fxsave->swd;
        fpu->last_dp = fxsave->rdp;
        memcpy(fpu->xmm, fxsave->xmm_space, sizeof fxsave->xmm_space);
  
 +      vcpu_put(vcpu);
        return 0;
  }
  
  int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
  {
 -      struct fxregs_state *fxsave =
 -                      &vcpu->arch.guest_fpu.state.fxsave;
 +      struct fxregs_state *fxsave;
 +
 +      vcpu_load(vcpu);
 +
 +      fxsave = &vcpu->arch.guest_fpu.state.fxsave;
  
        memcpy(fxsave->st_space, fpu->fpr, 128);
        fxsave->cwd = fpu->fcw;
        fxsave->rdp = fpu->last_dp;
        memcpy(fxsave->xmm_space, fpu->xmm, sizeof fxsave->xmm_space);
  
 +      vcpu_put(vcpu);
        return 0;
  }
  
@@@ -7778,25 -7720,32 +7808,25 @@@ static void fx_init(struct kvm_vcpu *vc
        vcpu->arch.cr0 |= X86_CR0_ET;
  }
  
 +/* Swap (qemu) user FPU context for the guest FPU context. */
  void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
  {
 -      if (vcpu->guest_fpu_loaded)
 -              return;
 -
 -      /*
 -       * Restore all possible states in the guest,
 -       * and assume host would use all available bits.
 -       * Guest xcr0 would be loaded later.
 -       */
 -      vcpu->guest_fpu_loaded = 1;
 -      __kernel_fpu_begin();
 +      preempt_disable();
 +      copy_fpregs_to_fpstate(&vcpu->arch.user_fpu);
        /* PKRU is separately restored in kvm_x86_ops->run.  */
        __copy_kernel_to_fpregs(&vcpu->arch.guest_fpu.state,
                                ~XFEATURE_MASK_PKRU);
 +      preempt_enable();
        trace_kvm_fpu(1);
  }
  
 +/* When vcpu_run ends, restore user space FPU context. */
  void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
  {
 -      if (!vcpu->guest_fpu_loaded)
 -              return;
 -
 -      vcpu->guest_fpu_loaded = 0;
 +      preempt_disable();
        copy_fpregs_to_fpstate(&vcpu->arch.guest_fpu);
 -      __kernel_fpu_end();
 +      copy_kernel_to_fpregs(&vcpu->arch.user_fpu.state);
 +      preempt_enable();
        ++vcpu->stat.fpu_reload;
        trace_kvm_fpu(0);
  }
@@@ -7828,12 -7777,16 +7858,12 @@@ struct kvm_vcpu *kvm_arch_vcpu_create(s
  
  int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
  {
 -      int r;
 -
        kvm_vcpu_mtrr_init(vcpu);
 -      r = vcpu_load(vcpu);
 -      if (r)
 -              return r;
 +      vcpu_load(vcpu);
        kvm_vcpu_reset(vcpu, false);
        kvm_mmu_setup(vcpu);
        vcpu_put(vcpu);
 -      return r;
 +      return 0;
  }
  
  void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
  
        kvm_hv_vcpu_postcreate(vcpu);
  
 -      if (vcpu_load(vcpu))
 +      if (mutex_lock_killable(&vcpu->mutex))
                return;
 +      vcpu_load(vcpu);
        msr.data = 0x0;
        msr.index = MSR_IA32_TSC;
        msr.host_initiated = true;
        kvm_write_tsc(vcpu, &msr);
        vcpu_put(vcpu);
 +      mutex_unlock(&vcpu->mutex);
  
        if (!kvmclock_periodic_sync)
                return;
  
  void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
  {
 -      int r;
        vcpu->arch.apf.msr_val = 0;
  
 -      r = vcpu_load(vcpu);
 -      BUG_ON(r);
 +      vcpu_load(vcpu);
        kvm_mmu_unload(vcpu);
        vcpu_put(vcpu);
  
@@@ -7876,7 -7829,6 +7906,7 @@@ void kvm_vcpu_reset(struct kvm_vcpu *vc
        vcpu->arch.hflags = 0;
  
        vcpu->arch.smi_pending = 0;
 +      vcpu->arch.smi_count = 0;
        atomic_set(&vcpu->arch.nmi_queued, 0);
        vcpu->arch.nmi_pending = 0;
        vcpu->arch.nmi_injected = false;
                 * To avoid have the INIT path from kvm_apic_has_events() that be
                 * called with loaded FPU and does not let userspace fix the state.
                 */
 -              kvm_put_guest_fpu(vcpu);
 +              if (init_event)
 +                      kvm_put_guest_fpu(vcpu);
                mpx_state_buffer = get_xsave_addr(&vcpu->arch.guest_fpu.state.xsave,
                                        XFEATURE_MASK_BNDREGS);
                if (mpx_state_buffer)
                                        XFEATURE_MASK_BNDCSR);
                if (mpx_state_buffer)
                        memset(mpx_state_buffer, 0, sizeof(struct mpx_bndcsr));
 +              if (init_event)
 +                      kvm_load_guest_fpu(vcpu);
        }
  
        if (!init_event) {
@@@ -8236,7 -8185,9 +8266,7 @@@ int kvm_arch_init_vm(struct kvm *kvm, u
  
  static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu)
  {
 -      int r;
 -      r = vcpu_load(vcpu);
 -      BUG_ON(r);
 +      vcpu_load(vcpu);
        kvm_mmu_unload(vcpu);
        vcpu_put(vcpu);
  }
diff --combined include/uapi/linux/kvm.h
index 496e59a2738ba99308f438e1f0509e66e17086cb,571431d3384b40c703438d933043aa037acb1993..62c564dd4aa194e0e78995ce03afa98a89316bc9
@@@ -630,9 -630,9 +630,9 @@@ struct kvm_s390_irq 
  
  struct kvm_s390_irq_state {
        __u64 buf;
 -      __u32 flags;
 +      __u32 flags;        /* will stay unused for compatibility reasons */
        __u32 len;
 -      __u32 reserved[4];
 +      __u32 reserved[4];  /* will stay unused for compatibility reasons */
  };
  
  /* for KVM_SET_GUEST_DEBUG */
@@@ -1358,6 -1358,96 +1358,96 @@@ struct kvm_s390_ucas_mapping 
  /* Available with KVM_CAP_S390_CMMA_MIGRATION */
  #define KVM_S390_GET_CMMA_BITS      _IOWR(KVMIO, 0xb8, struct kvm_s390_cmma_log)
  #define KVM_S390_SET_CMMA_BITS      _IOW(KVMIO, 0xb9, struct kvm_s390_cmma_log)
+ /* Memory Encryption Commands */
+ #define KVM_MEMORY_ENCRYPT_OP      _IOWR(KVMIO, 0xba, unsigned long)
+ struct kvm_enc_region {
+       __u64 addr;
+       __u64 size;
+ };
+ #define KVM_MEMORY_ENCRYPT_REG_REGION    _IOR(KVMIO, 0xbb, struct kvm_enc_region)
+ #define KVM_MEMORY_ENCRYPT_UNREG_REGION  _IOR(KVMIO, 0xbc, struct kvm_enc_region)
+ /* Secure Encrypted Virtualization command */
+ enum sev_cmd_id {
+       /* Guest initialization commands */
+       KVM_SEV_INIT = 0,
+       KVM_SEV_ES_INIT,
+       /* Guest launch commands */
+       KVM_SEV_LAUNCH_START,
+       KVM_SEV_LAUNCH_UPDATE_DATA,
+       KVM_SEV_LAUNCH_UPDATE_VMSA,
+       KVM_SEV_LAUNCH_SECRET,
+       KVM_SEV_LAUNCH_MEASURE,
+       KVM_SEV_LAUNCH_FINISH,
+       /* Guest migration commands (outgoing) */
+       KVM_SEV_SEND_START,
+       KVM_SEV_SEND_UPDATE_DATA,
+       KVM_SEV_SEND_UPDATE_VMSA,
+       KVM_SEV_SEND_FINISH,
+       /* Guest migration commands (incoming) */
+       KVM_SEV_RECEIVE_START,
+       KVM_SEV_RECEIVE_UPDATE_DATA,
+       KVM_SEV_RECEIVE_UPDATE_VMSA,
+       KVM_SEV_RECEIVE_FINISH,
+       /* Guest status and debug commands */
+       KVM_SEV_GUEST_STATUS,
+       KVM_SEV_DBG_DECRYPT,
+       KVM_SEV_DBG_ENCRYPT,
+       /* Guest certificates commands */
+       KVM_SEV_CERT_EXPORT,
+       KVM_SEV_NR_MAX,
+ };
+ struct kvm_sev_cmd {
+       __u32 id;
+       __u64 data;
+       __u32 error;
+       __u32 sev_fd;
+ };
+ struct kvm_sev_launch_start {
+       __u32 handle;
+       __u32 policy;
+       __u64 dh_uaddr;
+       __u32 dh_len;
+       __u64 session_uaddr;
+       __u32 session_len;
+ };
+ struct kvm_sev_launch_update_data {
+       __u64 uaddr;
+       __u32 len;
+ };
+ struct kvm_sev_launch_secret {
+       __u64 hdr_uaddr;
+       __u32 hdr_len;
+       __u64 guest_uaddr;
+       __u32 guest_len;
+       __u64 trans_uaddr;
+       __u32 trans_len;
+ };
+ struct kvm_sev_launch_measure {
+       __u64 uaddr;
+       __u32 len;
+ };
+ struct kvm_sev_guest_status {
+       __u32 handle;
+       __u32 policy;
+       __u32 state;
+ };
+ struct kvm_sev_dbg {
+       __u64 src_uaddr;
+       __u64 dst_uaddr;
+       __u32 len;
+ };
  
  #define KVM_DEV_ASSIGN_ENABLE_IOMMU   (1 << 0)
  #define KVM_DEV_ASSIGN_PCI_2_3                (1 << 1)