Merge branch 'sev-v9-p2' of https://github.com/codomania/kvm

author Paolo Bonzini <pbonzini@redhat.com>

Tue, 16 Jan 2018 15:34:48 +0000 (16:34 +0100)

committer Radim Krčmář <rkrcmar@redhat.com>

Tue, 16 Jan 2018 15:35:32 +0000 (16:35 +0100)
author Paolo Bonzini <pbonzini@redhat.com>
Tue, 16 Jan 2018 15:34:48 +0000 (16:34 +0100)
committer Radim Krčmář <rkrcmar@redhat.com>
Tue, 16 Jan 2018 15:35:32 +0000 (16:35 +0100)
diff --combined Documentation/virtual/kvm/api.txt

index 57d3ee9e4bde2a799715ca75871fd61b27858b0a,c2ced6a44bbb452eaa95021acba191e354ab2d75..e5f1743e0b3eb4955357941766bb002da9ca2ba8
--- 1/Documentation/virtual/kvm/api.txt
--- 2/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@@ -2901,19 -2901,14 +2901,19 @@@ userspace buffer and its length
   
   struct kvm_s390_irq_state {
         __u64 buf;
- -      __u32 flags;
+ +      __u32 flags;        /* will stay unused for compatibility reasons */
         __u32 len;
- -      __u32 reserved[4];
+ +      __u32 reserved[4];  /* will stay unused for compatibility reasons */
   };
   
   Userspace passes in the above struct and for each pending interrupt a
   struct kvm_s390_irq is copied to the provided buffer.
   
+ +The structure contains a flags and a reserved field for future extensions. As
+ +the kernel never checked for flags == 0 and QEMU never pre-zeroed flags and
+ +reserved, these fields can not be used in the future without breaking
+ +compatibility.
+ +
   If -ENOBUFS is returned the buffer provided was too small and userspace
   may retry with a bigger buffer.
   
@@@ -2937,14 -2932,10 +2937,14 @@@ containing a struct kvm_s390_irq_state
   
   struct kvm_s390_irq_state {
         __u64 buf;
+ +      __u32 flags;        /* will stay unused for compatibility reasons */
         __u32 len;
- -      __u32 pad;
+ +      __u32 reserved[4];  /* will stay unused for compatibility reasons */
   };
   
+ +The restrictions for flags and reserved apply as well.
+ +(see KVM_S390_GET_IRQ_STATE)
+ +
   The userspace memory referenced by buf contains a struct kvm_s390_irq
   for each interrupt to be injected into the guest.
   If one of the interrupts could not be injected for some reason the
@@@ -3403,6 -3394,56 +3403,56 @@@ invalid, if invalid pages are written t
   or if no page table is present for the addresses (e.g. when using
   hugepages).
   
+ 4.109 KVM_MEMORY_ENCRYPT_OP
+ 
+ Capability: basic
+ Architectures: x86
+ Type: system
+ Parameters: an opaque platform specific structure (in/out)
+ Returns: 0 on success; -1 on error
+ 
+ If the platform supports creating encrypted VMs then this ioctl can be used
+ for issuing platform-specific memory encryption commands to manage those
+ encrypted VMs.
+ 
+ Currently, this ioctl is used for issuing Secure Encrypted Virtualization
+ (SEV) commands on AMD Processors. The SEV commands are defined in
+ Documentation/virtual/kvm/amd-memory-encryption.txt.
+ 
+ 4.110 KVM_MEMORY_ENCRYPT_REG_REGION
+ 
+ Capability: basic
+ Architectures: x86
+ Type: system
+ Parameters: struct kvm_enc_region (in)
+ Returns: 0 on success; -1 on error
+ 
+ This ioctl can be used to register a guest memory region which may
+ contain encrypted data (e.g. guest RAM, SMRAM etc).
+ 
+ It is used in the SEV-enabled guest. When encryption is enabled, a guest
+ memory region may contain encrypted data. The SEV memory encryption
+ engine uses a tweak such that two identical plaintext pages, each at
+ different locations will have differing ciphertexts. So swapping or
+ moving ciphertext of those pages will not result in plaintext being
+ swapped. So relocating (or migrating) physical backing pages for the SEV
+ guest will require some additional steps.
+ 
+ Note: The current SEV key management spec does not provide commands to
+ swap or migrate (move) ciphertext pages. Hence, for now we pin the guest
+ memory region registered with the ioctl.
+ 
+ 4.111 KVM_MEMORY_ENCRYPT_UNREG_REGION
+ 
+ Capability: basic
+ Architectures: x86
+ Type: system
+ Parameters: struct kvm_enc_region (in)
+ Returns: 0 on success; -1 on error
+ 
+ This ioctl can be used to unregister the guest memory region registered
+ with KVM_MEMORY_ENCRYPT_REG_REGION ioctl above.
+ 
   5. The kvm_run structure
   ------------------------
   
diff --combined arch/x86/include/asm/cpufeatures.h

index 800104c8a3edfee7f4f52a33b8451a51ee0ed90a,19b955adacff28da2b2261d4338efcf690dd0115..19f35be95f168dc2cec30e07127520e14efe8ac5
--- 1/arch/x86/include/asm/cpufeatures.h
--- 2/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@@ -201,6 -201,7 +201,7 @@@
   #define X86_FEATURE_HW_PSTATE         ( 7*32+ 8) /* AMD HW-PState */
   #define X86_FEATURE_PROC_FEEDBACK     ( 7*32+ 9) /* AMD ProcFeedbackInterface */
   #define X86_FEATURE_SME                       ( 7*32+10) /* AMD Secure Memory Encryption */
+ #define X86_FEATURE_SEV                       ( 7*32+11) /* AMD Secure Encrypted Virtualization */
   
   #define X86_FEATURE_INTEL_PPIN                ( 7*32+14) /* Intel Processor Inventory Number */
   #define X86_FEATURE_INTEL_PT          ( 7*32+15) /* Intel Processor Trace */
@@@ -266,7 -267,6 +267,7 @@@
   /* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */
   #define X86_FEATURE_CLZERO            (13*32+ 0) /* CLZERO instruction */
   #define X86_FEATURE_IRPERF            (13*32+ 1) /* Instructions Retired Count */
+ +#define X86_FEATURE_XSAVEERPTR                (13*32+ 2) /* Always save/restore FP error pointers */
   
   /* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */
   #define X86_FEATURE_DTHERM            (14*32+ 0) /* Digital Thermal Sensor */
diff --combined arch/x86/include/asm/kvm_host.h

index 44de261e9223da71f036122166b9f1708a1a264f,262950f9f2d95e22d53bc06a6cea2e54e387c21e..ea7e40e9c1f0f48ed1f15d0238a2ff6c025c7c09
--- 1/arch/x86/include/asm/kvm_host.h
--- 2/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@@ -86,7 -86,7 +86,7 @@@
                           | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR | X86_CR4_PCIDE \
                           | X86_CR4_OSXSAVE | X86_CR4_SMEP | X86_CR4_FSGSBASE \
                           | X86_CR4_OSXMMEXCPT | X86_CR4_LA57 | X86_CR4_VMXE \
- -                        | X86_CR4_SMAP | X86_CR4_PKE))
+ +                        | X86_CR4_SMAP | X86_CR4_PKE | X86_CR4_UMIP))
   
   #define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR)
   
@@@ -504,7 -504,6 +504,7 @@@ struct kvm_vcpu_arch 
         int mp_state;
         u64 ia32_misc_enable_msr;
         u64 smbase;
+ +      u64 smi_count;
         bool tpr_access_reporting;
         u64 ia32_xss;
   
@@@ -537,20 -536,7 +537,20 @@@
         struct kvm_mmu_memory_cache mmu_page_cache;
         struct kvm_mmu_memory_cache mmu_page_header_cache;
   
+ +      /*
+ +       * QEMU userspace and the guest each have their own FPU state.
+ +       * In vcpu_run, we switch between the user and guest FPU contexts.
+ +       * While running a VCPU, the VCPU thread will have the guest FPU
+ +       * context.
+ +       *
+ +       * Note that while the PKRU state lives inside the fpu registers,
+ +       * it is switched out separately at VMENTER and VMEXIT time. The
+ +       * "guest_fpu" state here contains the guest FPU context, with the
+ +       * host PRKU bits.
+ +       */
+ +      struct fpu user_fpu;
         struct fpu guest_fpu;
+ +
         u64 xcr0;
         u64 guest_supported_xcr0;
         u32 guest_xstate_size;
@@@ -761,6 -747,15 +761,15 @@@ enum kvm_irqchip_mode 
         KVM_IRQCHIP_SPLIT,        /* created with KVM_CAP_SPLIT_IRQCHIP */
   };
   
+ struct kvm_sev_info {
+       bool active;            /* SEV enabled guest */
+       unsigned int asid;      /* ASID used for this guest */
+       unsigned int handle;    /* SEV firmware handle */
+       int fd;                 /* SEV device fd */
+       unsigned long pages_locked; /* Number of pages locked */
+       struct list_head regions_list;  /* List of registered regions */
+ };
+ 
   struct kvm_arch {
         unsigned int n_used_mmu_pages;
         unsigned int n_requested_mmu_pages;
@@@ -848,6 -843,8 +857,8 @@@
   
         bool x2apic_format;
         bool x2apic_broadcast_quirk_disabled;
+ 
+       struct kvm_sev_info sev_info;
   };
   
   struct kvm_vm_stat {
@@@ -966,7 -963,7 +977,7 @@@ struct kvm_x86_ops 
         unsigned long (*get_rflags)(struct kvm_vcpu *vcpu);
         void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags);
   
- -      void (*tlb_flush)(struct kvm_vcpu *vcpu);
+ +      void (*tlb_flush)(struct kvm_vcpu *vcpu, bool invalidate_gpa);
   
         void (*run)(struct kvm_vcpu *vcpu);
         int (*handle_exit)(struct kvm_vcpu *vcpu);
@@@ -1018,7 -1015,6 +1029,7 @@@
         void (*handle_external_intr)(struct kvm_vcpu *vcpu);
         bool (*mpx_supported)(void);
         bool (*xsaves_supported)(void);
+ +      bool (*umip_emulated)(void);
   
         int (*check_nested_events)(struct kvm_vcpu *vcpu, bool external_intr);
   
@@@ -1081,6 -1077,10 +1092,10 @@@
         int (*pre_enter_smm)(struct kvm_vcpu *vcpu, char *smstate);
         int (*pre_leave_smm)(struct kvm_vcpu *vcpu, u64 smbase);
         int (*enable_smi_window)(struct kvm_vcpu *vcpu);
+ 
+       int (*mem_enc_op)(struct kvm *kvm, void __user *argp);
+       int (*mem_enc_reg_region)(struct kvm *kvm, struct kvm_enc_region *argp);
+       int (*mem_enc_unreg_region)(struct kvm *kvm, struct kvm_enc_region *argp);
   };
   
   struct kvm_arch_async_pf {
@@@ -1176,8 -1176,7 +1191,8 @@@ int x86_emulate_instruction(struct kvm_
   static inline int emulate_instruction(struct kvm_vcpu *vcpu,
                         int emulation_type)
   {
- -      return x86_emulate_instruction(vcpu, 0, emulation_type, NULL, 0);
+ +      return x86_emulate_instruction(vcpu, 0,
+ +                      emulation_type | EMULTYPE_NO_REEXECUTE, NULL, 0);
   }
   
   void kvm_enable_efer_bits(u64);
@@@ -1450,7 -1449,4 +1465,7 @@@ static inline int kvm_cpu_get_apicid(in
   #define put_smstate(type, buf, offset, val)                      \
         *(type *)((buf) + (offset) - 0x7e00) = val
   
+ +void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
+ +              unsigned long start, unsigned long end);
+ +
   #endif /* _ASM_X86_KVM_HOST_H */
diff --combined arch/x86/kernel/cpu/amd.c

index bcb75dc97d44075d2eecb3137b91f934072352b0,c1234aa0550ceb1995f6f8b010240dcc951babd9..df8a2418aadfcf7f40c19742de45bfc278ad0dbd
--- 1/arch/x86/kernel/cpu/amd.c
--- 2/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@@ -556,6 -556,51 +556,51 @@@ static void bsp_init_amd(struct cpuinfo
         }
   }
   
+ static void early_detect_mem_encrypt(struct cpuinfo_x86 *c)
+ {
+       u64 msr;
+ 
+       /*
+        * BIOS support is required for SME and SEV.
+        *   For SME: If BIOS has enabled SME then adjust x86_phys_bits by
+        *            the SME physical address space reduction value.
+        *            If BIOS has not enabled SME then don't advertise the
+        *            SME feature (set in scattered.c).
+        *   For SEV: If BIOS has not enabled SEV then don't advertise the
+        *            SEV feature (set in scattered.c).
+        *
+        *   In all cases, since support for SME and SEV requires long mode,
+        *   don't advertise the feature under CONFIG_X86_32.
+        */
+       if (cpu_has(c, X86_FEATURE_SME) || cpu_has(c, X86_FEATURE_SEV)) {
+               /* Check if memory encryption is enabled */
+               rdmsrl(MSR_K8_SYSCFG, msr);
+               if (!(msr & MSR_K8_SYSCFG_MEM_ENCRYPT))
+                       goto clear_all;
+ 
+               /*
+                * Always adjust physical address bits. Even though this
+                * will be a value above 32-bits this is still done for
+                * CONFIG_X86_32 so that accurate values are reported.
+                */
+               c->x86_phys_bits -= (cpuid_ebx(0x8000001f) >> 6) & 0x3f;
+ 
+               if (IS_ENABLED(CONFIG_X86_32))
+                       goto clear_all;
+ 
+               rdmsrl(MSR_K7_HWCR, msr);
+               if (!(msr & MSR_K7_HWCR_SMMLOCK))
+                       goto clear_sev;
+ 
+               return;
+ 
+ clear_all:
+               clear_cpu_cap(c, X86_FEATURE_SME);
+ clear_sev:
+               clear_cpu_cap(c, X86_FEATURE_SEV);
+       }
+ }
+ 
   static void early_init_amd(struct cpuinfo_x86 *c)
   {
         u32 dummy;
@@@ -627,26 -672,7 +672,7 @@@
         if (cpu_has_amd_erratum(c, amd_erratum_400))
                 set_cpu_bug(c, X86_BUG_AMD_E400);
   
-       /*
-        * BIOS support is required for SME. If BIOS has enabled SME then
-        * adjust x86_phys_bits by the SME physical address space reduction
-        * value. If BIOS has not enabled SME then don't advertise the
-        * feature (set in scattered.c). Also, since the SME support requires
-        * long mode, don't advertise the feature under CONFIG_X86_32.
-        */
-       if (cpu_has(c, X86_FEATURE_SME)) {
-               u64 msr;
- 
-               /* Check if SME is enabled */
-               rdmsrl(MSR_K8_SYSCFG, msr);
-               if (msr & MSR_K8_SYSCFG_MEM_ENCRYPT) {
-                       c->x86_phys_bits -= (cpuid_ebx(0x8000001f) >> 6) & 0x3f;
-                       if (IS_ENABLED(CONFIG_X86_32))
-                               clear_cpu_cap(c, X86_FEATURE_SME);
-               } else {
-                       clear_cpu_cap(c, X86_FEATURE_SME);
-               }
-       }
+       early_detect_mem_encrypt(c);
   }
   
   static void init_amd_k8(struct cpuinfo_x86 *c)
@@@ -804,11 -830,8 +830,11 @@@ static void init_amd(struct cpuinfo_x8
         case 0x17: init_amd_zn(c); break;
         }
   
- -      /* Enable workaround for FXSAVE leak */
- -      if (c->x86 >= 6)
+ +      /*
+ +       * Enable workaround for FXSAVE leak on CPUs
+ +       * without a XSaveErPtr feature
+ +       */
+ +      if ((c->x86 >= 6) && (!cpu_has(c, X86_FEATURE_XSAVEERPTR)))
                 set_cpu_bug(c, X86_BUG_FXSAVE_LEAK);
   
         cpu_detect_cache_sizes(c);
diff --combined arch/x86/kvm/cpuid.c

index a0e6c975f3a8bb4a8cc7b8ec21b8b5045add5010,c6473ca825cdd875dbfe19b5197549098ad40e5d..ac0041c2f5afe7faa9cb6cb86937a3e06277a097
--- 1/arch/x86/kvm/cpuid.c
--- 2/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@@ -293,18 -293,13 +293,18 @@@ static int __do_cpuid_ent_emulated(stru
   {
         switch (func) {
         case 0:
- -              entry->eax = 1;         /* only one leaf currently */
+ +              entry->eax = 7;
                 ++*nent;
                 break;
         case 1:
                 entry->ecx = F(MOVBE);
                 ++*nent;
                 break;
+ +      case 7:
+ +              entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
+ +              if (index == 0)
+ +                      entry->ecx = F(RDPID);
+ +              ++*nent;
         default:
                 break;
         }
@@@ -332,7 -327,6 +332,7 @@@ static inline int __do_cpuid_ent(struc
         unsigned f_invpcid = kvm_x86_ops->invpcid_supported() ? F(INVPCID) : 0;
         unsigned f_mpx = kvm_mpx_supported() ? F(MPX) : 0;
         unsigned f_xsaves = kvm_x86_ops->xsaves_supported() ? F(XSAVES) : 0;
+ +      unsigned f_umip = kvm_x86_ops->umip_emulated() ? F(UMIP) : 0;
   
         /* cpuid 1.edx */
         const u32 kvm_cpuid_1_edx_x86_features =
@@@ -393,9 -387,8 +393,9 @@@
   
         /* cpuid 7.0.ecx*/
         const u32 kvm_cpuid_7_0_ecx_x86_features =
- -              F(AVX512VBMI) | F(LA57) | F(PKU) |
- -              0 /*OSPKE*/ | F(AVX512_VPOPCNTDQ);
+ +              F(AVX512VBMI) | F(LA57) | F(PKU) | 0 /*OSPKE*/ |
+ +              F(AVX512_VPOPCNTDQ) | F(UMIP) | F(AVX512_VBMI2) | F(GFNI) |
+ +              F(VAES) | F(VPCLMULQDQ) | F(AVX512_VNNI) | F(AVX512_BITALG);
   
         /* cpuid 7.0.edx*/
         const u32 kvm_cpuid_7_0_edx_x86_features =
@@@ -480,7 -473,6 +480,7 @@@
                         entry->ebx |= F(TSC_ADJUST);
                         entry->ecx &= kvm_cpuid_7_0_ecx_x86_features;
                         cpuid_mask(&entry->ecx, CPUID_7_ECX);
+ +                      entry->ecx |= f_umip;
                         /* PKU is not yet implemented for shadow paging. */
                         if (!tdp_enabled || !boot_cpu_has(X86_FEATURE_OSPKE))
                                 entry->ecx &= ~F(PKU);
@@@ -602,8 -594,7 +602,8 @@@
                              (1 << KVM_FEATURE_ASYNC_PF) |
                              (1 << KVM_FEATURE_PV_EOI) |
                              (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT) |
- -                           (1 << KVM_FEATURE_PV_UNHALT);
+ +                           (1 << KVM_FEATURE_PV_UNHALT) |
+ +                           (1 << KVM_FEATURE_PV_TLB_FLUSH);
   
                 if (sched_info_on())
                         entry->eax |= (1 << KVM_FEATURE_STEAL_TIME);
@@@ -613,7 -604,7 +613,7 @@@
                 entry->edx = 0;
                 break;
         case 0x80000000:
-               entry->eax = min(entry->eax, 0x8000001a);
+               entry->eax = min(entry->eax, 0x8000001f);
                 break;
         case 0x80000001:
                 entry->edx &= kvm_cpuid_8000_0001_edx_x86_features;
diff --combined arch/x86/kvm/mmu.c

index 89da688784fa72bb0b669d4db506773ececcacf4,d5e5dbd0e5ad9d2a22a5eeb9455f4108e9375e9c..ff1e9ee259cf7cd0e9151d21b8292bd45545e380
--- 1/arch/x86/kvm/mmu.c
--- 2/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@@ -381,7 -381,7 +381,7 @@@ void kvm_mmu_set_mask_ptes(u64 user_mas
   }
   EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes);
   
- -void kvm_mmu_clear_all_pte_masks(void)
+ +static void kvm_mmu_clear_all_pte_masks(void)
   {
         shadow_user_mask = 0;
         shadow_accessed_mask = 0;
@@@ -3395,7 -3395,7 +3395,7 @@@ static int mmu_alloc_direct_roots(struc
                 spin_lock(&vcpu->kvm->mmu_lock);
                 if(make_mmu_pages_available(vcpu) < 0) {
                         spin_unlock(&vcpu->kvm->mmu_lock);
- -                      return 1;
+ +                      return -ENOSPC;
                 }
                 sp = kvm_mmu_get_page(vcpu, 0, 0,
                                 vcpu->arch.mmu.shadow_root_level, 1, ACC_ALL);
@@@ -3410,7 -3410,7 +3410,7 @@@
                         spin_lock(&vcpu->kvm->mmu_lock);
                         if (make_mmu_pages_available(vcpu) < 0) {
                                 spin_unlock(&vcpu->kvm->mmu_lock);
- -                              return 1;
+ +                              return -ENOSPC;
                         }
                         sp = kvm_mmu_get_page(vcpu, i << (30 - PAGE_SHIFT),
                                         i << 30, PT32_ROOT_LEVEL, 1, ACC_ALL);
@@@ -3450,7 -3450,7 +3450,7 @@@ static int mmu_alloc_shadow_roots(struc
                 spin_lock(&vcpu->kvm->mmu_lock);
                 if (make_mmu_pages_available(vcpu) < 0) {
                         spin_unlock(&vcpu->kvm->mmu_lock);
- -                      return 1;
+ +                      return -ENOSPC;
                 }
                 sp = kvm_mmu_get_page(vcpu, root_gfn, 0,
                                 vcpu->arch.mmu.shadow_root_level, 0, ACC_ALL);
@@@ -3487,7 -3487,7 +3487,7 @@@
                 spin_lock(&vcpu->kvm->mmu_lock);
                 if (make_mmu_pages_available(vcpu) < 0) {
                         spin_unlock(&vcpu->kvm->mmu_lock);
- -                      return 1;
+ +                      return -ENOSPC;
                 }
                 sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30, PT32_ROOT_LEVEL,
                                       0, ACC_ALL);
@@@ -4950,6 -4950,16 +4950,16 @@@ int kvm_mmu_page_fault(struct kvm_vcpu 
         if (mmio_info_in_cache(vcpu, cr2, direct))
                 emulation_type = 0;
   emulate:
+       /*
+        * On AMD platforms, under certain conditions insn_len may be zero on #NPF.
+        * This can happen if a guest gets a page-fault on data access but the HW
+        * table walker is not able to read the instruction page (e.g instruction
+        * page is not present in memory). In those cases we simply restart the
+        * guest.
+        */
+       if (unlikely(insn && !insn_len))
+               return 1;
+ 
         er = x86_emulate_instruction(vcpu, cr2, emulation_type, insn, insn_len);
   
         switch (er) {
diff --combined arch/x86/kvm/svm.c

index 14cca8c601a912a6155511f775f397a3c84d6ddc,ec5df575299525fe6ce587e42daae714248e3fd7..5d83f0474020c5f669273179c2f191fccb405ee7
--- 1/arch/x86/kvm/svm.c
--- 2/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@@ -37,6 -37,10 +37,10 @@@
   #include <linux/amd-iommu.h>
   #include <linux/hashtable.h>
   #include <linux/frame.h>
+ #include <linux/psp-sev.h>
+ #include <linux/file.h>
+ #include <linux/pagemap.h>
+ #include <linux/swap.h>
   
   #include <asm/apic.h>
   #include <asm/perf_event.h>
@@@ -211,6 -215,9 +215,9 @@@ struct vcpu_svm 
          */
         struct list_head ir_list;
         spinlock_t ir_list_lock;
+ 
+       /* which host CPU was used for running this vcpu */
+       unsigned int last_cpu;
   };
   
   /*
@@@ -284,8 -291,12 +291,12 @@@ module_param(vls, int, 0444)
   static int vgif = true;
   module_param(vgif, int, 0444);
   
+ /* enable/disable SEV support */
+ static int sev = IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT);
+ module_param(sev, int, 0444);
+ 
   static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0);
- -static void svm_flush_tlb(struct kvm_vcpu *vcpu);
+ +static void svm_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa);
   static void svm_complete_interrupts(struct vcpu_svm *svm);
   
   static int nested_svm_exit_handled(struct vcpu_svm *svm);
@@@ -319,6 -330,38 +330,38 @@@ enum 
   
   #define VMCB_AVIC_APIC_BAR_MASK               0xFFFFFFFFFF000ULL
   
+ static unsigned int max_sev_asid;
+ static unsigned int min_sev_asid;
+ static unsigned long *sev_asid_bitmap;
+ #define __sme_page_pa(x) __sme_set(page_to_pfn(x) << PAGE_SHIFT)
+ 
+ struct enc_region {
+       struct list_head list;
+       unsigned long npages;
+       struct page **pages;
+       unsigned long uaddr;
+       unsigned long size;
+ };
+ 
+ static inline bool svm_sev_enabled(void)
+ {
+       return max_sev_asid;
+ }
+ 
+ static inline bool sev_guest(struct kvm *kvm)
+ {
+       struct kvm_sev_info *sev = &kvm->arch.sev_info;
+ 
+       return sev->active;
+ }
+ 
+ static inline int sev_get_asid(struct kvm *kvm)
+ {
+       struct kvm_sev_info *sev = &kvm->arch.sev_info;
+ 
+       return sev->asid;
+ }
+ 
   static inline void mark_all_dirty(struct vmcb *vmcb)
   {
         vmcb->control.clean = 0;
@@@ -361,7 -404,6 +404,7 @@@ static void recalc_intercepts(struct vc
   {
         struct vmcb_control_area *c, *h;
         struct nested_state *g;
+ +      u32 h_intercept_exceptions;
   
         mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
   
@@@ -372,14 -414,9 +415,14 @@@
         h = &svm->nested.hsave->control;
         g = &svm->nested;
   
+ +      /* No need to intercept #UD if L1 doesn't intercept it */
+ +      h_intercept_exceptions =
+ +              h->intercept_exceptions & ~(1U << UD_VECTOR);
+ +
         c->intercept_cr = h->intercept_cr | g->intercept_cr;
         c->intercept_dr = h->intercept_dr | g->intercept_dr;
- -      c->intercept_exceptions = h->intercept_exceptions | g->intercept_exceptions;
+ +      c->intercept_exceptions =
+ +              h_intercept_exceptions | g->intercept_exceptions;
         c->intercept = h->intercept | g->intercept;
   }
   
@@@ -531,9 -568,13 +574,13 @@@ struct svm_cpu_data 
         u64 asid_generation;
         u32 max_asid;
         u32 next_asid;
+       u32 min_asid;
         struct kvm_ldttss_desc *tss_desc;
   
         struct page *save_area;
+ 
+       /* index = sev_asid, value = vmcb pointer */
+       struct vmcb **sev_vmcbs;
   };
   
   static DEFINE_PER_CPU(struct svm_cpu_data *, svm_data);
@@@ -788,6 -829,7 +835,7 @@@ static int svm_hardware_enable(void
         sd->asid_generation = 1;
         sd->max_asid = cpuid_ebx(SVM_CPUID_FUNC) - 1;
         sd->next_asid = sd->max_asid + 1;
+       sd->min_asid = max_sev_asid + 1;
   
         gdt = get_current_gdt_rw();
         sd->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS);
@@@ -846,6 -888,7 +894,7 @@@ static void svm_cpu_uninit(int cpu
                 return;
   
         per_cpu(svm_data, raw_smp_processor_id()) = NULL;
+       kfree(sd->sev_vmcbs);
         __free_page(sd->save_area);
         kfree(sd);
   }
@@@ -859,11 -902,18 +908,18 @@@ static int svm_cpu_init(int cpu
         if (!sd)
                 return -ENOMEM;
         sd->cpu = cpu;
-       sd->save_area = alloc_page(GFP_KERNEL);
         r = -ENOMEM;
+       sd->save_area = alloc_page(GFP_KERNEL);
         if (!sd->save_area)
                 goto err_1;
   
+       if (svm_sev_enabled()) {
+               r = -ENOMEM;
+               sd->sev_vmcbs = kmalloc((max_sev_asid + 1) * sizeof(void *), GFP_KERNEL);
+               if (!sd->sev_vmcbs)
+                       goto err_1;
+       }
+ 
         per_cpu(svm_data, cpu) = sd;
   
         return 0;
@@@ -1051,6 -1101,48 +1107,48 @@@ static int avic_ga_log_notifier(u32 ga_
         return 0;
   }
   
+ static __init int sev_hardware_setup(void)
+ {
+       struct sev_user_data_status *status;
+       int rc;
+ 
+       /* Maximum number of encrypted guests supported simultaneously */
+       max_sev_asid = cpuid_ecx(0x8000001F);
+ 
+       if (!max_sev_asid)
+               return 1;
+ 
+       /* Minimum ASID value that should be used for SEV guest */
+       min_sev_asid = cpuid_edx(0x8000001F);
+ 
+       /* Initialize SEV ASID bitmap */
+       sev_asid_bitmap = kcalloc(BITS_TO_LONGS(max_sev_asid),
+                               sizeof(unsigned long), GFP_KERNEL);
+       if (!sev_asid_bitmap)
+               return 1;
+ 
+       status = kmalloc(sizeof(*status), GFP_KERNEL);
+       if (!status)
+               return 1;
+ 
+       /*
+        * Check SEV platform status.
+        *
+        * PLATFORM_STATUS can be called in any state, if we failed to query
+        * the PLATFORM status then either PSP firmware does not support SEV
+        * feature or SEV firmware is dead.
+        */
+       rc = sev_platform_status(status, NULL);
+       if (rc)
+               goto err;
+ 
+       pr_info("SEV supported\n");
+ 
+ err:
+       kfree(status);
+       return rc;
+ }
+ 
   static __init int svm_hardware_setup(void)
   {
         int cpu;
@@@ -1086,6 -1178,17 +1184,17 @@@
                 kvm_enable_efer_bits(EFER_SVME | EFER_LMSLE);
         }
   
+       if (sev) {
+               if (boot_cpu_has(X86_FEATURE_SEV) &&
+                   IS_ENABLED(CONFIG_KVM_AMD_SEV)) {
+                       r = sev_hardware_setup();
+                       if (r)
+                               sev = false;
+               } else {
+                       sev = false;
+               }
+       }
+ 
         for_each_possible_cpu(cpu) {
                 r = svm_cpu_init(cpu);
                 if (r)
@@@ -1147,6 -1250,9 +1256,9 @@@ static __exit void svm_hardware_unsetup
   {
         int cpu;
   
+       if (svm_sev_enabled())
+               kfree(sev_asid_bitmap);
+ 
         for_each_possible_cpu(cpu)
                 svm_cpu_uninit(cpu);
   
@@@ -1299,7 -1405,7 +1411,7 @@@ static void init_vmcb(struct vcpu_svm *
   
         if (npt_enabled) {
                 /* Setup VMCB for Nested Paging */
-               control->nested_ctl = 1;
+               control->nested_ctl |= SVM_NESTED_CTL_NP_ENABLE;
                 clr_intercept(svm, INTERCEPT_INVLPG);
                 clr_exception_intercept(svm, PF_VECTOR);
                 clr_cr_intercept(svm, INTERCEPT_CR3_READ);
@@@ -1337,6 -1443,11 +1449,11 @@@
                 svm->vmcb->control.int_ctl |= V_GIF_ENABLE_MASK;
         }
   
+       if (sev_guest(svm->vcpu.kvm)) {
+               svm->vmcb->control.nested_ctl |= SVM_NESTED_CTL_SEV_ENABLE;
+               clr_exception_intercept(svm, UD_VECTOR);
+       }
+ 
         mark_all_dirty(svm->vmcb);
   
         enable_gif(svm);
@@@ -1419,6 -1530,179 +1536,179 @@@ static int avic_init_backing_page(struc
         return 0;
   }
   
+ static void __sev_asid_free(int asid)
+ {
+       struct svm_cpu_data *sd;
+       int cpu, pos;
+ 
+       pos = asid - 1;
+       clear_bit(pos, sev_asid_bitmap);
+ 
+       for_each_possible_cpu(cpu) {
+               sd = per_cpu(svm_data, cpu);
+               sd->sev_vmcbs[pos] = NULL;
+       }
+ }
+ 
+ static void sev_asid_free(struct kvm *kvm)
+ {
+       struct kvm_sev_info *sev = &kvm->arch.sev_info;
+ 
+       __sev_asid_free(sev->asid);
+ }
+ 
+ static void sev_unbind_asid(struct kvm *kvm, unsigned int handle)
+ {
+       struct sev_data_decommission *decommission;
+       struct sev_data_deactivate *data;
+ 
+       if (!handle)
+               return;
+ 
+       data = kzalloc(sizeof(*data), GFP_KERNEL);
+       if (!data)
+               return;
+ 
+       /* deactivate handle */
+       data->handle = handle;
+       sev_guest_deactivate(data, NULL);
+ 
+       wbinvd_on_all_cpus();
+       sev_guest_df_flush(NULL);
+       kfree(data);
+ 
+       decommission = kzalloc(sizeof(*decommission), GFP_KERNEL);
+       if (!decommission)
+               return;
+ 
+       /* decommission handle */
+       decommission->handle = handle;
+       sev_guest_decommission(decommission, NULL);
+ 
+       kfree(decommission);
+ }
+ 
+ static struct page **sev_pin_memory(struct kvm *kvm, unsigned long uaddr,
+                                   unsigned long ulen, unsigned long *n,
+                                   int write)
+ {
+       struct kvm_sev_info *sev = &kvm->arch.sev_info;
+       unsigned long npages, npinned, size;
+       unsigned long locked, lock_limit;
+       struct page **pages;
+       int first, last;
+ 
+       /* Calculate number of pages. */
+       first = (uaddr & PAGE_MASK) >> PAGE_SHIFT;
+       last = ((uaddr + ulen - 1) & PAGE_MASK) >> PAGE_SHIFT;
+       npages = (last - first + 1);
+ 
+       locked = sev->pages_locked + npages;
+       lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
+       if (locked > lock_limit && !capable(CAP_IPC_LOCK)) {
+               pr_err("SEV: %lu locked pages exceed the lock limit of %lu.\n", locked, lock_limit);
+               return NULL;
+       }
+ 
+       /* Avoid using vmalloc for smaller buffers. */
+       size = npages * sizeof(struct page *);
+       if (size > PAGE_SIZE)
+               pages = vmalloc(size);
+       else
+               pages = kmalloc(size, GFP_KERNEL);
+ 
+       if (!pages)
+               return NULL;
+ 
+       /* Pin the user virtual address. */
+       npinned = get_user_pages_fast(uaddr, npages, write ? FOLL_WRITE : 0, pages);
+       if (npinned != npages) {
+               pr_err("SEV: Failure locking %lu pages.\n", npages);
+               goto err;
+       }
+ 
+       *n = npages;
+       sev->pages_locked = locked;
+ 
+       return pages;
+ 
+ err:
+       if (npinned > 0)
+               release_pages(pages, npinned);
+ 
+       kvfree(pages);
+       return NULL;
+ }
+ 
+ static void sev_unpin_memory(struct kvm *kvm, struct page **pages,
+                            unsigned long npages)
+ {
+       struct kvm_sev_info *sev = &kvm->arch.sev_info;
+ 
+       release_pages(pages, npages);
+       kvfree(pages);
+       sev->pages_locked -= npages;
+ }
+ 
+ static void sev_clflush_pages(struct page *pages[], unsigned long npages)
+ {
+       uint8_t *page_virtual;
+       unsigned long i;
+ 
+       if (npages == 0 || pages == NULL)
+               return;
+ 
+       for (i = 0; i < npages; i++) {
+               page_virtual = kmap_atomic(pages[i]);
+               clflush_cache_range(page_virtual, PAGE_SIZE);
+               kunmap_atomic(page_virtual);
+       }
+ }
+ 
+ static void __unregister_enc_region_locked(struct kvm *kvm,
+                                          struct enc_region *region)
+ {
+       /*
+        * The guest may change the memory encryption attribute from C=0 -> C=1
+        * or vice versa for this memory range. Lets make sure caches are
+        * flushed to ensure that guest data gets written into memory with
+        * correct C-bit.
+        */
+       sev_clflush_pages(region->pages, region->npages);
+ 
+       sev_unpin_memory(kvm, region->pages, region->npages);
+       list_del(&region->list);
+       kfree(region);
+ }
+ 
+ static void sev_vm_destroy(struct kvm *kvm)
+ {
+       struct kvm_sev_info *sev = &kvm->arch.sev_info;
+       struct list_head *head = &sev->regions_list;
+       struct list_head *pos, *q;
+ 
+       if (!sev_guest(kvm))
+               return;
+ 
+       mutex_lock(&kvm->lock);
+ 
+       /*
+        * if userspace was terminated before unregistering the memory regions
+        * then lets unpin all the registered memory.
+        */
+       if (!list_empty(head)) {
+               list_for_each_safe(pos, q, head) {
+                       __unregister_enc_region_locked(kvm,
+                               list_entry(pos, struct enc_region, list));
+               }
+       }
+ 
+       mutex_unlock(&kvm->lock);
+ 
+       sev_unbind_asid(kvm, sev->handle);
+       sev_asid_free(kvm);
+ }
+ 
   static void avic_vm_destroy(struct kvm *kvm)
   {
         unsigned long flags;
@@@ -1437,6 -1721,12 +1727,12 @@@
         spin_unlock_irqrestore(&svm_vm_data_hash_lock, flags);
   }
   
+ static void svm_vm_destroy(struct kvm *kvm)
+ {
+       avic_vm_destroy(kvm);
+       sev_vm_destroy(kvm);
+ }
+ 
   static int avic_vm_init(struct kvm *kvm)
   {
         unsigned long flags;
@@@ -2035,7 -2325,7 +2331,7 @@@ static int svm_set_cr4(struct kvm_vcpu 
                 return 1;
   
         if (npt_enabled && ((old_cr4 ^ cr4) & X86_CR4_PGE))
- -              svm_flush_tlb(vcpu);
+ +              svm_flush_tlb(vcpu, true);
   
         vcpu->arch.cr4 = cr4;
         if (!npt_enabled)
@@@ -2094,7 -2384,7 +2390,7 @@@ static void new_asid(struct vcpu_svm *s
   {
         if (sd->next_asid > sd->max_asid) {
                 ++sd->asid_generation;
-               sd->next_asid = 1;
+               sd->next_asid = sd->min_asid;
                 svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ALL_ASID;
         }
   
@@@ -2142,22 -2432,24 +2438,24 @@@ static void svm_set_dr7(struct kvm_vcp
   
   static int pf_interception(struct vcpu_svm *svm)
   {
-       u64 fault_address = svm->vmcb->control.exit_info_2;
+       u64 fault_address = __sme_clr(svm->vmcb->control.exit_info_2);
         u64 error_code = svm->vmcb->control.exit_info_1;
   
         return kvm_handle_page_fault(&svm->vcpu, error_code, fault_address,
-                       svm->vmcb->control.insn_bytes,
+                       static_cpu_has(X86_FEATURE_DECODEASSISTS) ?
+                       svm->vmcb->control.insn_bytes : NULL,
                         svm->vmcb->control.insn_len);
   }
   
   static int npf_interception(struct vcpu_svm *svm)
   {
-       u64 fault_address = svm->vmcb->control.exit_info_2;
+       u64 fault_address = __sme_clr(svm->vmcb->control.exit_info_2);
         u64 error_code = svm->vmcb->control.exit_info_1;
   
         trace_kvm_page_fault(fault_address, error_code);
         return kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code,
-                       svm->vmcb->control.insn_bytes,
+                       static_cpu_has(X86_FEATURE_DECODEASSISTS) ?
+                       svm->vmcb->control.insn_bytes : NULL,
                         svm->vmcb->control.insn_len);
   }
   
@@@ -2202,10 -2494,7 +2500,10 @@@ static int ud_interception(struct vcpu_
   {
         int er;
   
+ +      WARN_ON_ONCE(is_guest_mode(&svm->vcpu));
         er = emulate_instruction(&svm->vcpu, EMULTYPE_TRAP_UD);
+ +      if (er == EMULATE_USER_EXIT)
+ +              return 0;
         if (er != EMULATE_DONE)
                 kvm_queue_exception(&svm->vcpu, UD_VECTOR);
         return 1;
@@@ -2385,7 -2674,7 +2683,7 @@@ static void nested_svm_set_tdp_cr3(stru
   
         svm->vmcb->control.nested_cr3 = __sme_set(root);
         mark_dirty(svm->vmcb, VMCB_NPT);
- -      svm_flush_tlb(vcpu);
+ +      svm_flush_tlb(vcpu, true);
   }
   
   static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu,
@@@ -2927,7 -3216,8 +3225,8 @@@ static bool nested_vmcb_checks(struct v
         if (vmcb->control.asid == 0)
                 return false;
   
-       if (vmcb->control.nested_ctl && !npt_enabled)
+       if ((vmcb->control.nested_ctl & SVM_NESTED_CTL_NP_ENABLE) &&
+           !npt_enabled)
                 return false;
   
         return true;
@@@ -2941,7 -3231,7 +3240,7 @@@ static void enter_svm_guest_mode(struc
         else
                 svm->vcpu.arch.hflags &= ~HF_HIF_MASK;
   
-       if (nested_vmcb->control.nested_ctl) {
+       if (nested_vmcb->control.nested_ctl & SVM_NESTED_CTL_NP_ENABLE) {
                 kvm_mmu_unload(&svm->vcpu);
                 svm->nested.nested_cr3 = nested_vmcb->control.nested_cr3;
                 nested_svm_init_mmu_context(&svm->vcpu);
@@@ -2989,7 -3279,7 +3288,7 @@@
         svm->nested.intercept_exceptions = nested_vmcb->control.intercept_exceptions;
         svm->nested.intercept            = nested_vmcb->control.intercept;
   
- -      svm_flush_tlb(&svm->vcpu);
+ +      svm_flush_tlb(&svm->vcpu, true);
         svm->vmcb->control.int_ctl = nested_vmcb->control.int_ctl | V_INTR_MASKING_MASK;
         if (nested_vmcb->control.int_ctl & V_INTR_MASKING_MASK)
                 svm->vcpu.arch.hflags |= HF_VINTR_MASK;
@@@ -4362,12 -4652,39 +4661,39 @@@ static void reload_tss(struct kvm_vcpu 
         load_TR_desc();
   }
   
+ static void pre_sev_run(struct vcpu_svm *svm, int cpu)
+ {
+       struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
+       int asid = sev_get_asid(svm->vcpu.kvm);
+ 
+       /* Assign the asid allocated with this SEV guest */
+       svm->vmcb->control.asid = asid;
+ 
+       /*
+        * Flush guest TLB:
+        *
+        * 1) when different VMCB for the same ASID is to be run on the same host CPU.
+        * 2) or this VMCB was executed on different host CPU in previous VMRUNs.
+        */
+       if (sd->sev_vmcbs[asid] == svm->vmcb &&
+           svm->last_cpu == cpu)
+               return;
+ 
+       svm->last_cpu = cpu;
+       sd->sev_vmcbs[asid] = svm->vmcb;
+       svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ASID;
+       mark_dirty(svm->vmcb, VMCB_ASID);
+ }
+ 
   static void pre_svm_run(struct vcpu_svm *svm)
   {
         int cpu = raw_smp_processor_id();
   
         struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
   
+       if (sev_guest(svm->vcpu.kvm))
+               return pre_sev_run(svm, cpu);
+ 
         /* FIXME: handle wraparound of asid_generation */
         if (svm->asid_generation != sd->asid_generation)
                 new_asid(svm, sd);
@@@ -4785,7 -5102,7 +5111,7 @@@ static int svm_set_tss_addr(struct kvm 
         return 0;
   }
   
- -static void svm_flush_tlb(struct kvm_vcpu *vcpu)
+ +static void svm_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa)
   {
         struct vcpu_svm *svm = to_svm(vcpu);
   
@@@ -5076,7 -5393,7 +5402,7 @@@ static void svm_set_cr3(struct kvm_vcp
   
         svm->vmcb->save.cr3 = __sme_set(root);
         mark_dirty(svm->vmcb, VMCB_CR);
- -      svm_flush_tlb(vcpu);
+ +      svm_flush_tlb(vcpu, true);
   }
   
   static void set_tdp_cr3(struct kvm_vcpu *vcpu, unsigned long root)
@@@ -5090,7 -5407,7 +5416,7 @@@
         svm->vmcb->save.cr3 = kvm_read_cr3(vcpu);
         mark_dirty(svm->vmcb, VMCB_CR);
   
- -      svm_flush_tlb(vcpu);
+ +      svm_flush_tlb(vcpu, true);
   }
   
   static int is_disabled(void)
@@@ -5176,6 -5493,12 +5502,12 @@@ static void svm_set_supported_cpuid(u3
                         entry->edx |= SVM_FEATURE_NPT;
   
                 break;
+       case 0x8000001F:
+               /* Support memory encryption cpuid if host supports it */
+               if (boot_cpu_has(X86_FEATURE_SEV))
+                       cpuid(0x8000001f, &entry->eax, &entry->ebx,
+                               &entry->ecx, &entry->edx);
+ 
         }
   }
   
@@@ -5204,11 -5527,6 +5536,11 @@@ static bool svm_xsaves_supported(void
         return false;
   }
   
+ +static bool svm_umip_emulated(void)
+ +{
+ +      return false;
+ +}
+ +
   static bool svm_has_wbinvd_exit(void)
   {
         return true;
@@@ -5510,6 -5828,828 +5842,828 @@@ static int enable_smi_window(struct kvm
         return 0;
   }
   
+ static int sev_asid_new(void)
+ {
+       int pos;
+ 
+       /*
+        * SEV-enabled guest must use asid from min_sev_asid to max_sev_asid.
+        */
+       pos = find_next_zero_bit(sev_asid_bitmap, max_sev_asid, min_sev_asid - 1);
+       if (pos >= max_sev_asid)
+               return -EBUSY;
+ 
+       set_bit(pos, sev_asid_bitmap);
+       return pos + 1;
+ }
+ 
+ static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp)
+ {
+       struct kvm_sev_info *sev = &kvm->arch.sev_info;
+       int asid, ret;
+ 
+       ret = -EBUSY;
+       asid = sev_asid_new();
+       if (asid < 0)
+               return ret;
+ 
+       ret = sev_platform_init(&argp->error);
+       if (ret)
+               goto e_free;
+ 
+       sev->active = true;
+       sev->asid = asid;
+       INIT_LIST_HEAD(&sev->regions_list);
+ 
+       return 0;
+ 
+ e_free:
+       __sev_asid_free(asid);
+       return ret;
+ }
+ 
+ static int sev_bind_asid(struct kvm *kvm, unsigned int handle, int *error)
+ {
+       struct sev_data_activate *data;
+       int asid = sev_get_asid(kvm);
+       int ret;
+ 
+       wbinvd_on_all_cpus();
+ 
+       ret = sev_guest_df_flush(error);
+       if (ret)
+               return ret;
+ 
+       data = kzalloc(sizeof(*data), GFP_KERNEL);
+       if (!data)
+               return -ENOMEM;
+ 
+       /* activate ASID on the given handle */
+       data->handle = handle;
+       data->asid   = asid;
+       ret = sev_guest_activate(data, error);
+       kfree(data);
+ 
+       return ret;
+ }
+ 
+ static int __sev_issue_cmd(int fd, int id, void *data, int *error)
+ {
+       struct fd f;
+       int ret;
+ 
+       f = fdget(fd);
+       if (!f.file)
+               return -EBADF;
+ 
+       ret = sev_issue_cmd_external_user(f.file, id, data, error);
+ 
+       fdput(f);
+       return ret;
+ }
+ 
+ static int sev_issue_cmd(struct kvm *kvm, int id, void *data, int *error)
+ {
+       struct kvm_sev_info *sev = &kvm->arch.sev_info;
+ 
+       return __sev_issue_cmd(sev->fd, id, data, error);
+ }
+ 
+ static int sev_launch_start(struct kvm *kvm, struct kvm_sev_cmd *argp)
+ {
+       struct kvm_sev_info *sev = &kvm->arch.sev_info;
+       struct sev_data_launch_start *start;
+       struct kvm_sev_launch_start params;
+       void *dh_blob, *session_blob;
+       int *error = &argp->error;
+       int ret;
+ 
+       if (!sev_guest(kvm))
+               return -ENOTTY;
+ 
+       if (copy_from_user(&params, (void __user *)(uintptr_t)argp->data, sizeof(params)))
+               return -EFAULT;
+ 
+       start = kzalloc(sizeof(*start), GFP_KERNEL);
+       if (!start)
+               return -ENOMEM;
+ 
+       dh_blob = NULL;
+       if (params.dh_uaddr) {
+               dh_blob = psp_copy_user_blob(params.dh_uaddr, params.dh_len);
+               if (IS_ERR(dh_blob)) {
+                       ret = PTR_ERR(dh_blob);
+                       goto e_free;
+               }
+ 
+               start->dh_cert_address = __sme_set(__pa(dh_blob));
+               start->dh_cert_len = params.dh_len;
+       }
+ 
+       session_blob = NULL;
+       if (params.session_uaddr) {
+               session_blob = psp_copy_user_blob(params.session_uaddr, params.session_len);
+               if (IS_ERR(session_blob)) {
+                       ret = PTR_ERR(session_blob);
+                       goto e_free_dh;
+               }
+ 
+               start->session_address = __sme_set(__pa(session_blob));
+               start->session_len = params.session_len;
+       }
+ 
+       start->handle = params.handle;
+       start->policy = params.policy;
+ 
+       /* create memory encryption context */
+       ret = __sev_issue_cmd(argp->sev_fd, SEV_CMD_LAUNCH_START, start, error);
+       if (ret)
+               goto e_free_session;
+ 
+       /* Bind ASID to this guest */
+       ret = sev_bind_asid(kvm, start->handle, error);
+       if (ret)
+               goto e_free_session;
+ 
+       /* return handle to userspace */
+       params.handle = start->handle;
+       if (copy_to_user((void __user *)(uintptr_t)argp->data, &params, sizeof(params))) {
+               sev_unbind_asid(kvm, start->handle);
+               ret = -EFAULT;
+               goto e_free_session;
+       }
+ 
+       sev->handle = start->handle;
+       sev->fd = argp->sev_fd;
+ 
+ e_free_session:
+       kfree(session_blob);
+ e_free_dh:
+       kfree(dh_blob);
+ e_free:
+       kfree(start);
+       return ret;
+ }
+ 
+ static int get_num_contig_pages(int idx, struct page **inpages,
+                               unsigned long npages)
+ {
+       unsigned long paddr, next_paddr;
+       int i = idx + 1, pages = 1;
+ 
+       /* find the number of contiguous pages starting from idx */
+       paddr = __sme_page_pa(inpages[idx]);
+       while (i < npages) {
+               next_paddr = __sme_page_pa(inpages[i++]);
+               if ((paddr + PAGE_SIZE) == next_paddr) {
+                       pages++;
+                       paddr = next_paddr;
+                       continue;
+               }
+               break;
+       }
+ 
+       return pages;
+ }
+ 
+ static int sev_launch_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)
+ {
+       unsigned long vaddr, vaddr_end, next_vaddr, npages, size;
+       struct kvm_sev_info *sev = &kvm->arch.sev_info;
+       struct kvm_sev_launch_update_data params;
+       struct sev_data_launch_update_data *data;
+       struct page **inpages;
+       int i, ret, pages;
+ 
+       if (!sev_guest(kvm))
+               return -ENOTTY;
+ 
+       if (copy_from_user(&params, (void __user *)(uintptr_t)argp->data, sizeof(params)))
+               return -EFAULT;
+ 
+       data = kzalloc(sizeof(*data), GFP_KERNEL);
+       if (!data)
+               return -ENOMEM;
+ 
+       vaddr = params.uaddr;
+       size = params.len;
+       vaddr_end = vaddr + size;
+ 
+       /* Lock the user memory. */
+       inpages = sev_pin_memory(kvm, vaddr, size, &npages, 1);
+       if (!inpages) {
+               ret = -ENOMEM;
+               goto e_free;
+       }
+ 
+       /*
+        * The LAUNCH_UPDATE command will perform in-place encryption of the
+        * memory content (i.e it will write the same memory region with C=1).
+        * It's possible that the cache may contain the data with C=0, i.e.,
+        * unencrypted so invalidate it first.
+        */
+       sev_clflush_pages(inpages, npages);
+ 
+       for (i = 0; vaddr < vaddr_end; vaddr = next_vaddr, i += pages) {
+               int offset, len;
+ 
+               /*
+                * If the user buffer is not page-aligned, calculate the offset
+                * within the page.
+                */
+               offset = vaddr & (PAGE_SIZE - 1);
+ 
+               /* Calculate the number of pages that can be encrypted in one go. */
+               pages = get_num_contig_pages(i, inpages, npages);
+ 
+               len = min_t(size_t, ((pages * PAGE_SIZE) - offset), size);
+ 
+               data->handle = sev->handle;
+               data->len = len;
+               data->address = __sme_page_pa(inpages[i]) + offset;
+               ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_DATA, data, &argp->error);
+               if (ret)
+                       goto e_unpin;
+ 
+               size -= len;
+               next_vaddr = vaddr + len;
+       }
+ 
+ e_unpin:
+       /* content of memory is updated, mark pages dirty */
+       for (i = 0; i < npages; i++) {
+               set_page_dirty_lock(inpages[i]);
+               mark_page_accessed(inpages[i]);
+       }
+       /* unlock the user pages */
+       sev_unpin_memory(kvm, inpages, npages);
+ e_free:
+       kfree(data);
+       return ret;
+ }
+ 
+ static int sev_launch_measure(struct kvm *kvm, struct kvm_sev_cmd *argp)
+ {
+       struct kvm_sev_info *sev = &kvm->arch.sev_info;
+       struct sev_data_launch_measure *data;
+       struct kvm_sev_launch_measure params;
+       void *blob = NULL;
+       int ret;
+ 
+       if (!sev_guest(kvm))
+               return -ENOTTY;
+ 
+       if (copy_from_user(&params, (void __user *)(uintptr_t)argp->data, sizeof(params)))
+               return -EFAULT;
+ 
+       data = kzalloc(sizeof(*data), GFP_KERNEL);
+       if (!data)
+               return -ENOMEM;
+ 
+       /* User wants to query the blob length */
+       if (!params.len)
+               goto cmd;
+ 
+       if (params.uaddr) {
+               if (params.len > SEV_FW_BLOB_MAX_SIZE) {
+                       ret = -EINVAL;
+                       goto e_free;
+               }
+ 
+               if (!access_ok(VERIFY_WRITE, params.uaddr, params.len)) {
+                       ret = -EFAULT;
+                       goto e_free;
+               }
+ 
+               ret = -ENOMEM;
+               blob = kmalloc(params.len, GFP_KERNEL);
+               if (!blob)
+                       goto e_free;
+ 
+               data->address = __psp_pa(blob);
+               data->len = params.len;
+       }
+ 
+ cmd:
+       data->handle = sev->handle;
+       ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_MEASURE, data, &argp->error);
+ 
+       /*
+        * If we query the session length, FW responded with expected data.
+        */
+       if (!params.len)
+               goto done;
+ 
+       if (ret)
+               goto e_free_blob;
+ 
+       if (blob) {
+               if (copy_to_user((void __user *)(uintptr_t)params.uaddr, blob, params.len))
+                       ret = -EFAULT;
+       }
+ 
+ done:
+       params.len = data->len;
+       if (copy_to_user((void __user *)(uintptr_t)argp->data, &params, sizeof(params)))
+               ret = -EFAULT;
+ e_free_blob:
+       kfree(blob);
+ e_free:
+       kfree(data);
+       return ret;
+ }
+ 
+ static int sev_launch_finish(struct kvm *kvm, struct kvm_sev_cmd *argp)
+ {
+       struct kvm_sev_info *sev = &kvm->arch.sev_info;
+       struct sev_data_launch_finish *data;
+       int ret;
+ 
+       if (!sev_guest(kvm))
+               return -ENOTTY;
+ 
+       data = kzalloc(sizeof(*data), GFP_KERNEL);
+       if (!data)
+               return -ENOMEM;
+ 
+       data->handle = sev->handle;
+       ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_FINISH, data, &argp->error);
+ 
+       kfree(data);
+       return ret;
+ }
+ 
+ static int sev_guest_status(struct kvm *kvm, struct kvm_sev_cmd *argp)
+ {
+       struct kvm_sev_info *sev = &kvm->arch.sev_info;
+       struct kvm_sev_guest_status params;
+       struct sev_data_guest_status *data;
+       int ret;
+ 
+       if (!sev_guest(kvm))
+               return -ENOTTY;
+ 
+       data = kzalloc(sizeof(*data), GFP_KERNEL);
+       if (!data)
+               return -ENOMEM;
+ 
+       data->handle = sev->handle;
+       ret = sev_issue_cmd(kvm, SEV_CMD_GUEST_STATUS, data, &argp->error);
+       if (ret)
+               goto e_free;
+ 
+       params.policy = data->policy;
+       params.state = data->state;
+       params.handle = data->handle;
+ 
+       if (copy_to_user((void __user *)(uintptr_t)argp->data, &params, sizeof(params)))
+               ret = -EFAULT;
+ e_free:
+       kfree(data);
+       return ret;
+ }
+ 
+ static int __sev_issue_dbg_cmd(struct kvm *kvm, unsigned long src,
+                              unsigned long dst, int size,
+                              int *error, bool enc)
+ {
+       struct kvm_sev_info *sev = &kvm->arch.sev_info;
+       struct sev_data_dbg *data;
+       int ret;
+ 
+       data = kzalloc(sizeof(*data), GFP_KERNEL);
+       if (!data)
+               return -ENOMEM;
+ 
+       data->handle = sev->handle;
+       data->dst_addr = dst;
+       data->src_addr = src;
+       data->len = size;
+ 
+       ret = sev_issue_cmd(kvm,
+                           enc ? SEV_CMD_DBG_ENCRYPT : SEV_CMD_DBG_DECRYPT,
+                           data, error);
+       kfree(data);
+       return ret;
+ }
+ 
+ static int __sev_dbg_decrypt(struct kvm *kvm, unsigned long src_paddr,
+                            unsigned long dst_paddr, int sz, int *err)
+ {
+       int offset;
+ 
+       /*
+        * Its safe to read more than we are asked, caller should ensure that
+        * destination has enough space.
+        */
+       src_paddr = round_down(src_paddr, 16);
+       offset = src_paddr & 15;
+       sz = round_up(sz + offset, 16);
+ 
+       return __sev_issue_dbg_cmd(kvm, src_paddr, dst_paddr, sz, err, false);
+ }
+ 
+ static int __sev_dbg_decrypt_user(struct kvm *kvm, unsigned long paddr,
+                                 unsigned long __user dst_uaddr,
+                                 unsigned long dst_paddr,
+                                 int size, int *err)
+ {
+       struct page *tpage = NULL;
+       int ret, offset;
+ 
+       /* if inputs are not 16-byte then use intermediate buffer */
+       if (!IS_ALIGNED(dst_paddr, 16) ||
+           !IS_ALIGNED(paddr,     16) ||
+           !IS_ALIGNED(size,      16)) {
+               tpage = (void *)alloc_page(GFP_KERNEL);
+               if (!tpage)
+                       return -ENOMEM;
+ 
+               dst_paddr = __sme_page_pa(tpage);
+       }
+ 
+       ret = __sev_dbg_decrypt(kvm, paddr, dst_paddr, size, err);
+       if (ret)
+               goto e_free;
+ 
+       if (tpage) {
+               offset = paddr & 15;
+               if (copy_to_user((void __user *)(uintptr_t)dst_uaddr,
+                                page_address(tpage) + offset, size))
+                       ret = -EFAULT;
+       }
+ 
+ e_free:
+       if (tpage)
+               __free_page(tpage);
+ 
+       return ret;
+ }
+ 
+ static int __sev_dbg_encrypt_user(struct kvm *kvm, unsigned long paddr,
+                                 unsigned long __user vaddr,
+                                 unsigned long dst_paddr,
+                                 unsigned long __user dst_vaddr,
+                                 int size, int *error)
+ {
+       struct page *src_tpage = NULL;
+       struct page *dst_tpage = NULL;
+       int ret, len = size;
+ 
+       /* If source buffer is not aligned then use an intermediate buffer */
+       if (!IS_ALIGNED(vaddr, 16)) {
+               src_tpage = alloc_page(GFP_KERNEL);
+               if (!src_tpage)
+                       return -ENOMEM;
+ 
+               if (copy_from_user(page_address(src_tpage),
+                               (void __user *)(uintptr_t)vaddr, size)) {
+                       __free_page(src_tpage);
+                       return -EFAULT;
+               }
+ 
+               paddr = __sme_page_pa(src_tpage);
+       }
+ 
+       /*
+        *  If destination buffer or length is not aligned then do read-modify-write:
+        *   - decrypt destination in an intermediate buffer
+        *   - copy the source buffer in an intermediate buffer
+        *   - use the intermediate buffer as source buffer
+        */
+       if (!IS_ALIGNED(dst_vaddr, 16) || !IS_ALIGNED(size, 16)) {
+               int dst_offset;
+ 
+               dst_tpage = alloc_page(GFP_KERNEL);
+               if (!dst_tpage) {
+                       ret = -ENOMEM;
+                       goto e_free;
+               }
+ 
+               ret = __sev_dbg_decrypt(kvm, dst_paddr,
+                                       __sme_page_pa(dst_tpage), size, error);
+               if (ret)
+                       goto e_free;
+ 
+               /*
+                *  If source is kernel buffer then use memcpy() otherwise
+                *  copy_from_user().
+                */
+               dst_offset = dst_paddr & 15;
+ 
+               if (src_tpage)
+                       memcpy(page_address(dst_tpage) + dst_offset,
+                              page_address(src_tpage), size);
+               else {
+                       if (copy_from_user(page_address(dst_tpage) + dst_offset,
+                                          (void __user *)(uintptr_t)vaddr, size)) {
+                               ret = -EFAULT;
+                               goto e_free;
+                       }
+               }
+ 
+               paddr = __sme_page_pa(dst_tpage);
+               dst_paddr = round_down(dst_paddr, 16);
+               len = round_up(size, 16);
+       }
+ 
+       ret = __sev_issue_dbg_cmd(kvm, paddr, dst_paddr, len, error, true);
+ 
+ e_free:
+       if (src_tpage)
+               __free_page(src_tpage);
+       if (dst_tpage)
+               __free_page(dst_tpage);
+       return ret;
+ }
+ 
+ static int sev_dbg_crypt(struct kvm *kvm, struct kvm_sev_cmd *argp, bool dec)
+ {
+       unsigned long vaddr, vaddr_end, next_vaddr;
+       unsigned long dst_vaddr, dst_vaddr_end;
+       struct page **src_p, **dst_p;
+       struct kvm_sev_dbg debug;
+       unsigned long n;
+       int ret, size;
+ 
+       if (!sev_guest(kvm))
+               return -ENOTTY;
+ 
+       if (copy_from_user(&debug, (void __user *)(uintptr_t)argp->data, sizeof(debug)))
+               return -EFAULT;
+ 
+       vaddr = debug.src_uaddr;
+       size = debug.len;
+       vaddr_end = vaddr + size;
+       dst_vaddr = debug.dst_uaddr;
+       dst_vaddr_end = dst_vaddr + size;
+ 
+       for (; vaddr < vaddr_end; vaddr = next_vaddr) {
+               int len, s_off, d_off;
+ 
+               /* lock userspace source and destination page */
+               src_p = sev_pin_memory(kvm, vaddr & PAGE_MASK, PAGE_SIZE, &n, 0);
+               if (!src_p)
+                       return -EFAULT;
+ 
+               dst_p = sev_pin_memory(kvm, dst_vaddr & PAGE_MASK, PAGE_SIZE, &n, 1);
+               if (!dst_p) {
+                       sev_unpin_memory(kvm, src_p, n);
+                       return -EFAULT;
+               }
+ 
+               /*
+                * The DBG_{DE,EN}CRYPT commands will perform {dec,en}cryption of the
+                * memory content (i.e it will write the same memory region with C=1).
+                * It's possible that the cache may contain the data with C=0, i.e.,
+                * unencrypted so invalidate it first.
+                */
+               sev_clflush_pages(src_p, 1);
+               sev_clflush_pages(dst_p, 1);
+ 
+               /*
+                * Since user buffer may not be page aligned, calculate the
+                * offset within the page.
+                */
+               s_off = vaddr & ~PAGE_MASK;
+               d_off = dst_vaddr & ~PAGE_MASK;
+               len = min_t(size_t, (PAGE_SIZE - s_off), size);
+ 
+               if (dec)
+                       ret = __sev_dbg_decrypt_user(kvm,
+                                                    __sme_page_pa(src_p[0]) + s_off,
+                                                    dst_vaddr,
+                                                    __sme_page_pa(dst_p[0]) + d_off,
+                                                    len, &argp->error);
+               else
+                       ret = __sev_dbg_encrypt_user(kvm,
+                                                    __sme_page_pa(src_p[0]) + s_off,
+                                                    vaddr,
+                                                    __sme_page_pa(dst_p[0]) + d_off,
+                                                    dst_vaddr,
+                                                    len, &argp->error);
+ 
+               sev_unpin_memory(kvm, src_p, 1);
+               sev_unpin_memory(kvm, dst_p, 1);
+ 
+               if (ret)
+                       goto err;
+ 
+               next_vaddr = vaddr + len;
+               dst_vaddr = dst_vaddr + len;
+               size -= len;
+       }
+ err:
+       return ret;
+ }
+ 
+ static int sev_launch_secret(struct kvm *kvm, struct kvm_sev_cmd *argp)
+ {
+       struct kvm_sev_info *sev = &kvm->arch.sev_info;
+       struct sev_data_launch_secret *data;
+       struct kvm_sev_launch_secret params;
+       struct page **pages;
+       void *blob, *hdr;
+       unsigned long n;
+       int ret;
+ 
+       if (!sev_guest(kvm))
+               return -ENOTTY;
+ 
+       if (copy_from_user(&params, (void __user *)(uintptr_t)argp->data, sizeof(params)))
+               return -EFAULT;
+ 
+       pages = sev_pin_memory(kvm, params.guest_uaddr, params.guest_len, &n, 1);
+       if (!pages)
+               return -ENOMEM;
+ 
+       /*
+        * The secret must be copied into contiguous memory region, lets verify
+        * that userspace memory pages are contiguous before we issue command.
+        */
+       if (get_num_contig_pages(0, pages, n) != n) {
+               ret = -EINVAL;
+               goto e_unpin_memory;
+       }
+ 
+       ret = -ENOMEM;
+       data = kzalloc(sizeof(*data), GFP_KERNEL);
+       if (!data)
+               goto e_unpin_memory;
+ 
+       blob = psp_copy_user_blob(params.trans_uaddr, params.trans_len);
+       if (IS_ERR(blob)) {
+               ret = PTR_ERR(blob);
+               goto e_free;
+       }
+ 
+       data->trans_address = __psp_pa(blob);
+       data->trans_len = params.trans_len;
+ 
+       hdr = psp_copy_user_blob(params.hdr_uaddr, params.hdr_len);
+       if (IS_ERR(hdr)) {
+               ret = PTR_ERR(hdr);
+               goto e_free_blob;
+       }
+       data->trans_address = __psp_pa(blob);
+       data->trans_len = params.trans_len;
+ 
+       data->handle = sev->handle;
+       ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_SECRET, data, &argp->error);
+ 
+       kfree(hdr);
+ 
+ e_free_blob:
+       kfree(blob);
+ e_free:
+       kfree(data);
+ e_unpin_memory:
+       sev_unpin_memory(kvm, pages, n);
+       return ret;
+ }
+ 
+ static int svm_mem_enc_op(struct kvm *kvm, void __user *argp)
+ {
+       struct kvm_sev_cmd sev_cmd;
+       int r;
+ 
+       if (!svm_sev_enabled())
+               return -ENOTTY;
+ 
+       if (copy_from_user(&sev_cmd, argp, sizeof(struct kvm_sev_cmd)))
+               return -EFAULT;
+ 
+       mutex_lock(&kvm->lock);
+ 
+       switch (sev_cmd.id) {
+       case KVM_SEV_INIT:
+               r = sev_guest_init(kvm, &sev_cmd);
+               break;
+       case KVM_SEV_LAUNCH_START:
+               r = sev_launch_start(kvm, &sev_cmd);
+               break;
+       case KVM_SEV_LAUNCH_UPDATE_DATA:
+               r = sev_launch_update_data(kvm, &sev_cmd);
+               break;
+       case KVM_SEV_LAUNCH_MEASURE:
+               r = sev_launch_measure(kvm, &sev_cmd);
+               break;
+       case KVM_SEV_LAUNCH_FINISH:
+               r = sev_launch_finish(kvm, &sev_cmd);
+               break;
+       case KVM_SEV_GUEST_STATUS:
+               r = sev_guest_status(kvm, &sev_cmd);
+               break;
+       case KVM_SEV_DBG_DECRYPT:
+               r = sev_dbg_crypt(kvm, &sev_cmd, true);
+               break;
+       case KVM_SEV_DBG_ENCRYPT:
+               r = sev_dbg_crypt(kvm, &sev_cmd, false);
+               break;
+       case KVM_SEV_LAUNCH_SECRET:
+               r = sev_launch_secret(kvm, &sev_cmd);
+               break;
+       default:
+               r = -EINVAL;
+               goto out;
+       }
+ 
+       if (copy_to_user(argp, &sev_cmd, sizeof(struct kvm_sev_cmd)))
+               r = -EFAULT;
+ 
+ out:
+       mutex_unlock(&kvm->lock);
+       return r;
+ }
+ 
+ static int svm_register_enc_region(struct kvm *kvm,
+                                  struct kvm_enc_region *range)
+ {
+       struct kvm_sev_info *sev = &kvm->arch.sev_info;
+       struct enc_region *region;
+       int ret = 0;
+ 
+       if (!sev_guest(kvm))
+               return -ENOTTY;
+ 
+       region = kzalloc(sizeof(*region), GFP_KERNEL);
+       if (!region)
+               return -ENOMEM;
+ 
+       region->pages = sev_pin_memory(kvm, range->addr, range->size, &region->npages, 1);
+       if (!region->pages) {
+               ret = -ENOMEM;
+               goto e_free;
+       }
+ 
+       /*
+        * The guest may change the memory encryption attribute from C=0 -> C=1
+        * or vice versa for this memory range. Lets make sure caches are
+        * flushed to ensure that guest data gets written into memory with
+        * correct C-bit.
+        */
+       sev_clflush_pages(region->pages, region->npages);
+ 
+       region->uaddr = range->addr;
+       region->size = range->size;
+ 
+       mutex_lock(&kvm->lock);
+       list_add_tail(&region->list, &sev->regions_list);
+       mutex_unlock(&kvm->lock);
+ 
+       return ret;
+ 
+ e_free:
+       kfree(region);
+       return ret;
+ }
+ 
+ static struct enc_region *
+ find_enc_region(struct kvm *kvm, struct kvm_enc_region *range)
+ {
+       struct kvm_sev_info *sev = &kvm->arch.sev_info;
+       struct list_head *head = &sev->regions_list;
+       struct enc_region *i;
+ 
+       list_for_each_entry(i, head, list) {
+               if (i->uaddr == range->addr &&
+                   i->size == range->size)
+                       return i;
+       }
+ 
+       return NULL;
+ }
+ 
+ 
+ static int svm_unregister_enc_region(struct kvm *kvm,
+                                    struct kvm_enc_region *range)
+ {
+       struct enc_region *region;
+       int ret;
+ 
+       mutex_lock(&kvm->lock);
+ 
+       if (!sev_guest(kvm)) {
+               ret = -ENOTTY;
+               goto failed;
+       }
+ 
+       region = find_enc_region(kvm, range);
+       if (!region) {
+               ret = -EINVAL;
+               goto failed;
+       }
+ 
+       __unregister_enc_region_locked(kvm, region);
+ 
+       mutex_unlock(&kvm->lock);
+       return 0;
+ 
+ failed:
+       mutex_unlock(&kvm->lock);
+       return ret;
+ }
+ 
   static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
         .cpu_has_kvm_support = has_svm,
         .disabled_by_bios = is_disabled,
@@@ -5526,7 -6666,7 +6680,7 @@@
         .vcpu_reset = svm_vcpu_reset,
   
         .vm_init = avic_vm_init,
-       .vm_destroy = avic_vm_destroy,
+       .vm_destroy = svm_vm_destroy,
   
         .prepare_guest_switch = svm_prepare_guest_switch,
         .vcpu_load = svm_vcpu_load,
@@@ -5602,7 -6742,6 +6756,7 @@@
         .invpcid_supported = svm_invpcid_supported,
         .mpx_supported = svm_mpx_supported,
         .xsaves_supported = svm_xsaves_supported,
+ +      .umip_emulated = svm_umip_emulated,
   
         .set_supported_cpuid = svm_set_supported_cpuid,
   
@@@ -5626,6 -6765,10 +6780,10 @@@
         .pre_enter_smm = svm_pre_enter_smm,
         .pre_leave_smm = svm_pre_leave_smm,
         .enable_smi_window = enable_smi_window,
+ 
+       .mem_enc_op = svm_mem_enc_op,
+       .mem_enc_reg_region = svm_register_enc_region,
+       .mem_enc_unreg_region = svm_unregister_enc_region,
   };
   
   static int __init svm_init(void)
diff --combined arch/x86/kvm/x86.c

index 56d8a1e11e50b04d6d61cbf3ba6cf93c58755c9d,926f55cecf2e3ef2e2fd6544cc318ff54ddcb3c4..6d2e1459adc97963410893bbbca0cb64539879b2
--- 1/arch/x86/kvm/x86.c
--- 2/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@@ -107,9 -107,6 +107,9 @@@ EXPORT_SYMBOL_GPL(kvm_x86_ops)
   static bool __read_mostly ignore_msrs = 0;
   module_param(ignore_msrs, bool, S_IRUGO | S_IWUSR);
   
+ +static bool __read_mostly report_ignored_msrs = true;
+ +module_param(report_ignored_msrs, bool, S_IRUGO | S_IWUSR);
+ +
   unsigned int min_timer_period_us = 500;
   module_param(min_timer_period_us, uint, S_IRUGO | S_IWUSR);
   
@@@ -702,8 -699,7 +702,8 @@@ static void kvm_load_guest_xcr0(struct 
         if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE) &&
                         !vcpu->guest_xcr0_loaded) {
                 /* kvm_set_xcr() also depends on this */
- -              xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.xcr0);
+ +              if (vcpu->arch.xcr0 != host_xcr0)
+ +                      xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.xcr0);
                 vcpu->guest_xcr0_loaded = 1;
         }
   }
@@@ -795,9 -791,6 +795,9 @@@ int kvm_set_cr4(struct kvm_vcpu *vcpu, 
         if (!guest_cpuid_has(vcpu, X86_FEATURE_LA57) && (cr4 & X86_CR4_LA57))
                 return 1;
   
+ +      if (!guest_cpuid_has(vcpu, X86_FEATURE_UMIP) && (cr4 & X86_CR4_UMIP))
+ +              return 1;
+ +
         if (is_long_mode(vcpu)) {
                 if (!(cr4 & X86_CR4_PAE))
                         return 1;
@@@ -1040,7 -1033,6 +1040,7 @@@ static u32 emulated_msrs[] = 
         MSR_IA32_MCG_CTL,
         MSR_IA32_MCG_EXT_CTL,
         MSR_IA32_SMBASE,
+ +      MSR_SMI_COUNT,
         MSR_PLATFORM_INFO,
         MSR_MISC_FEATURES_ENABLES,
   };
@@@ -1803,13 -1795,10 +1803,13 @@@ u64 get_kvmclock_ns(struct kvm *kvm
         /* both __this_cpu_read() and rdtsc() should be on the same cpu */
         get_cpu();
   
- -      kvm_get_time_scale(NSEC_PER_SEC, __this_cpu_read(cpu_tsc_khz) * 1000LL,
- -                         &hv_clock.tsc_shift,
- -                         &hv_clock.tsc_to_system_mul);
- -      ret = __pvclock_read_cycles(&hv_clock, rdtsc());
+ +      if (__this_cpu_read(cpu_tsc_khz)) {
+ +              kvm_get_time_scale(NSEC_PER_SEC, __this_cpu_read(cpu_tsc_khz) * 1000LL,
+ +                                 &hv_clock.tsc_shift,
+ +                                 &hv_clock.tsc_to_system_mul);
+ +              ret = __pvclock_read_cycles(&hv_clock, rdtsc());
+ +      } else
+ +              ret = ktime_get_boot_ns() + ka->kvmclock_offset;
   
         put_cpu();
   
@@@ -1841,9 -1830,6 +1841,9 @@@ static void kvm_setup_pvclock_page(stru
          */
         BUILD_BUG_ON(offsetof(struct pvclock_vcpu_time_info, version) != 0);
   
+ +      if (guest_hv_clock.version & 1)
+ +              ++guest_hv_clock.version;  /* first time write, random junk */
+ +
         vcpu->hv_clock.version = guest_hv_clock.version + 1;
         kvm_write_guest_cached(v->kvm, &vcpu->pv_time,
                                 &vcpu->hv_clock,
@@@ -2123,12 -2109,6 +2123,12 @@@ static void kvmclock_reset(struct kvm_v
         vcpu->arch.pv_time_enabled = false;
   }
   
+ +static void kvm_vcpu_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa)
+ +{
+ +      ++vcpu->stat.tlb_flush;
+ +      kvm_x86_ops->tlb_flush(vcpu, invalidate_gpa);
+ +}
+ +
   static void record_steal_time(struct kvm_vcpu *vcpu)
   {
         if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
@@@ -2138,12 -2118,7 +2138,12 @@@
                 &vcpu->arch.st.steal, sizeof(struct kvm_steal_time))))
                 return;
   
- -      vcpu->arch.st.steal.preempted = 0;
+ +      /*
+ +       * Doing a TLB flush here, on the guest's behalf, can avoid
+ +       * expensive IPIs.
+ +       */
+ +      if (xchg(&vcpu->arch.st.steal.preempted, 0) & KVM_VCPU_FLUSH_TLB)
+ +              kvm_vcpu_flush_tlb(vcpu, false);
   
         if (vcpu->arch.st.steal.version & 1)
                 vcpu->arch.st.steal.version += 1;  /* first time write, random junk */
@@@ -2244,11 -2219,6 +2244,11 @@@ int kvm_set_msr_common(struct kvm_vcpu 
                         return 1;
                 vcpu->arch.smbase = data;
                 break;
+ +      case MSR_SMI_COUNT:
+ +              if (!msr_info->host_initiated)
+ +                      return 1;
+ +              vcpu->arch.smi_count = data;
+ +              break;
         case MSR_KVM_WALL_CLOCK_NEW:
         case MSR_KVM_WALL_CLOCK:
                 vcpu->kvm->arch.wall_clock = data;
@@@ -2352,9 -2322,7 +2352,9 @@@
                 /* Drop writes to this legacy MSR -- see rdmsr
                  * counterpart for further detail.
                  */
- -              vcpu_unimpl(vcpu, "ignored wrmsr: 0x%x data 0x%llx\n", msr, data);
+ +              if (report_ignored_msrs)
+ +                      vcpu_unimpl(vcpu, "ignored wrmsr: 0x%x data 0x%llx\n",
+ +                              msr, data);
                 break;
         case MSR_AMD64_OSVW_ID_LENGTH:
                 if (!guest_cpuid_has(vcpu, X86_FEATURE_OSVW))
@@@ -2391,10 -2359,8 +2391,10 @@@
                                     msr, data);
                         return 1;
                 } else {
- -                      vcpu_unimpl(vcpu, "ignored wrmsr: 0x%x data 0x%llx\n",
- -                                  msr, data);
+ +                      if (report_ignored_msrs)
+ +                              vcpu_unimpl(vcpu,
+ +                                      "ignored wrmsr: 0x%x data 0x%llx\n",
+ +                                      msr, data);
                         break;
                 }
         }
@@@ -2523,9 -2489,6 +2523,9 @@@ int kvm_get_msr_common(struct kvm_vcpu 
                         return 1;
                 msr_info->data = vcpu->arch.smbase;
                 break;
+ +      case MSR_SMI_COUNT:
+ +              msr_info->data = vcpu->arch.smi_count;
+ +              break;
         case MSR_IA32_PERF_STATUS:
                 /* TSC increment by tick */
                 msr_info->data = 1000ULL;
@@@ -2615,9 -2578,7 +2615,9 @@@
                                                msr_info->index);
                         return 1;
                 } else {
- -                      vcpu_unimpl(vcpu, "ignored rdmsr: 0x%x\n", msr_info->index);
+ +                      if (report_ignored_msrs)
+ +                              vcpu_unimpl(vcpu, "ignored rdmsr: 0x%x\n",
+ +                                      msr_info->index);
                         msr_info->data = 0;
                 }
                 break;
@@@ -2928,7 -2889,7 +2928,7 @@@ static void kvm_steal_time_set_preempte
         if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
                 return;
   
- -      vcpu->arch.st.steal.preempted = 1;
+ +      vcpu->arch.st.steal.preempted = KVM_VCPU_PREEMPTED;
   
         kvm_write_guest_offset_cached(vcpu->kvm, &vcpu->arch.st.stime,
                         &vcpu->arch.st.steal.preempted,
@@@ -2961,13 -2922,8 +2961,13 @@@ void kvm_arch_vcpu_put(struct kvm_vcpu 
         srcu_read_unlock(&vcpu->kvm->srcu, idx);
         pagefault_enable();
         kvm_x86_ops->vcpu_put(vcpu);
- -      kvm_put_guest_fpu(vcpu);
         vcpu->arch.last_host_tsc = rdtsc();
+ +      /*
+ +       * If userspace has set any breakpoints or watchpoints, dr6 is restored
+ +       * on every vmexit, but if not, we might have a stale dr6 from the
+ +       * guest. do_debug expects dr6 to be cleared after it runs, do the same.
+ +       */
+ +      set_debugreg(0, 6);
   }
   
   static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,
@@@ -3502,8 -3458,6 +3502,8 @@@ long kvm_arch_vcpu_ioctl(struct file *f
                 void *buffer;
         } u;
   
+ +      vcpu_load(vcpu);
+ +
         u.buffer = NULL;
         switch (ioctl) {
         case KVM_GET_LAPIC: {
@@@ -3529,10 -3483,8 +3529,10 @@@
                 if (!lapic_in_kernel(vcpu))
                         goto out;
                 u.lapic = memdup_user(argp, sizeof(*u.lapic));
- -              if (IS_ERR(u.lapic))
- -                      return PTR_ERR(u.lapic);
+ +              if (IS_ERR(u.lapic)) {
+ +                      r = PTR_ERR(u.lapic);
+ +                      goto out_nofree;
+ +              }
   
                 r = kvm_vcpu_ioctl_set_lapic(vcpu, u.lapic);
                 break;
@@@ -3706,10 -3658,8 +3706,10 @@@
         }
         case KVM_SET_XSAVE: {
                 u.xsave = memdup_user(argp, sizeof(*u.xsave));
- -              if (IS_ERR(u.xsave))
- -                      return PTR_ERR(u.xsave);
+ +              if (IS_ERR(u.xsave)) {
+ +                      r = PTR_ERR(u.xsave);
+ +                      goto out_nofree;
+ +              }
   
                 r = kvm_vcpu_ioctl_x86_set_xsave(vcpu, u.xsave);
                 break;
@@@ -3731,10 -3681,8 +3731,10 @@@
         }
         case KVM_SET_XCRS: {
                 u.xcrs = memdup_user(argp, sizeof(*u.xcrs));
- -              if (IS_ERR(u.xcrs))
- -                      return PTR_ERR(u.xcrs);
+ +              if (IS_ERR(u.xcrs)) {
+ +                      r = PTR_ERR(u.xcrs);
+ +                      goto out_nofree;
+ +              }
   
                 r = kvm_vcpu_ioctl_x86_set_xcrs(vcpu, u.xcrs);
                 break;
@@@ -3778,8 -3726,6 +3778,8 @@@
         }
   out:
         kfree(u.buffer);
+ +out_nofree:
+ +      vcpu_put(vcpu);
         return r;
   }
   
@@@ -4335,6 -4281,36 +4335,36 @@@ set_identity_unlock
                 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
                 break;
         }
+       case KVM_MEMORY_ENCRYPT_OP: {
+               r = -ENOTTY;
+               if (kvm_x86_ops->mem_enc_op)
+                       r = kvm_x86_ops->mem_enc_op(kvm, argp);
+               break;
+       }
+       case KVM_MEMORY_ENCRYPT_REG_REGION: {
+               struct kvm_enc_region region;
+ 
+               r = -EFAULT;
+               if (copy_from_user(&region, argp, sizeof(region)))
+                       goto out;
+ 
+               r = -ENOTTY;
+               if (kvm_x86_ops->mem_enc_reg_region)
+                       r = kvm_x86_ops->mem_enc_reg_region(kvm, &region);
+               break;
+       }
+       case KVM_MEMORY_ENCRYPT_UNREG_REGION: {
+               struct kvm_enc_region region;
+ 
+               r = -EFAULT;
+               if (copy_from_user(&region, argp, sizeof(region)))
+                       goto out;
+ 
+               r = -ENOTTY;
+               if (kvm_x86_ops->mem_enc_unreg_region)
+                       r = kvm_x86_ops->mem_enc_unreg_region(kvm, &region);
+               break;
+       }
         default:
                 r = -ENOTTY;
         }
@@@ -5291,6 -5267,17 +5321,6 @@@ static void emulator_halt(struct x86_em
         emul_to_vcpu(ctxt)->arch.halt_request = 1;
   }
   
- -static void emulator_get_fpu(struct x86_emulate_ctxt *ctxt)
- -{
- -      preempt_disable();
- -      kvm_load_guest_fpu(emul_to_vcpu(ctxt));
- -}
- -
- -static void emulator_put_fpu(struct x86_emulate_ctxt *ctxt)
- -{
- -      preempt_enable();
- -}
- -
   static int emulator_intercept(struct x86_emulate_ctxt *ctxt,
                               struct x86_instruction_info *info,
                               enum x86_intercept_stage stage)
@@@ -5368,6 -5355,8 +5398,6 @@@ static const struct x86_emulate_ops emu
         .halt                = emulator_halt,
         .wbinvd              = emulator_wbinvd,
         .fix_hypercall       = emulator_fix_hypercall,
- -      .get_fpu             = emulator_get_fpu,
- -      .put_fpu             = emulator_put_fpu,
         .intercept           = emulator_intercept,
         .get_cpuid           = emulator_get_cpuid,
         .set_nmi_mask        = emulator_set_nmi_mask,
@@@ -5471,7 -5460,7 +5501,7 @@@ static int handle_emulation_failure(str
                 vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
                 vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
                 vcpu->run->internal.ndata = 0;
- -              r = EMULATE_FAIL;
+ +              r = EMULATE_USER_EXIT;
         }
         kvm_queue_exception(vcpu, UD_VECTOR);
   
@@@ -5763,8 -5752,6 +5793,8 @@@ int x86_emulate_instruction(struct kvm_
                         if (reexecute_instruction(vcpu, cr2, write_fault_to_spt,
                                                 emulation_type))
                                 return EMULATE_DONE;
+ +                      if (ctxt->have_exception && inject_emulated_exception(vcpu))
+ +                              return EMULATE_DONE;
                         if (emulation_type & EMULTYPE_SKIP)
                                 return EMULATE_FAIL;
                         return handle_emulation_failure(vcpu);
@@@ -6488,7 -6475,6 +6518,7 @@@ static int inject_pending_event(struct 
                 kvm_x86_ops->queue_exception(vcpu);
         } else if (vcpu->arch.smi_pending && !is_smm(vcpu) && kvm_x86_ops->smi_allowed(vcpu)) {
                 vcpu->arch.smi_pending = false;
+ +              ++vcpu->arch.smi_count;
                 enter_smm(vcpu);
         } else if (vcpu->arch.nmi_pending && kvm_x86_ops->nmi_allowed(vcpu)) {
                 --vcpu->arch.nmi_pending;
@@@ -6799,18 -6785,10 +6829,18 @@@ static void vcpu_scan_ioapic(struct kvm
         kvm_x86_ops->load_eoi_exitmap(vcpu, eoi_exit_bitmap);
   }
   
- -static void kvm_vcpu_flush_tlb(struct kvm_vcpu *vcpu)
+ +void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
+ +              unsigned long start, unsigned long end)
   {
- -      ++vcpu->stat.tlb_flush;
- -      kvm_x86_ops->tlb_flush(vcpu);
+ +      unsigned long apic_address;
+ +
+ +      /*
+ +       * The physical address of apic access page is stored in the VMCS.
+ +       * Update it when it becomes invalid.
+ +       */
+ +      apic_address = gfn_to_hva(kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT);
+ +      if (start <= apic_address && apic_address < end)
+ +              kvm_make_all_cpus_request(kvm, KVM_REQ_APIC_PAGE_RELOAD);
   }
   
   void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu)
@@@ -6867,7 -6845,7 +6897,7 @@@ static int vcpu_enter_guest(struct kvm_
                 if (kvm_check_request(KVM_REQ_MMU_SYNC, vcpu))
                         kvm_mmu_sync_roots(vcpu);
                 if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu))
- -                      kvm_vcpu_flush_tlb(vcpu);
+ +                      kvm_vcpu_flush_tlb(vcpu, true);
                 if (kvm_check_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu)) {
                         vcpu->run->exit_reason = KVM_EXIT_TPR_ACCESS;
                         r = 0;
@@@ -6987,6 -6965,7 +7017,6 @@@
         preempt_disable();
   
         kvm_x86_ops->prepare_guest_switch(vcpu);
- -      kvm_load_guest_fpu(vcpu);
   
         /*
          * Disable IRQs before setting IN_GUEST_MODE.  Posted interrupt
@@@ -7040,8 -7019,7 +7070,8 @@@
         }
   
         trace_kvm_entry(vcpu->vcpu_id);
- -      wait_lapic_expire(vcpu);
+ +      if (lapic_timer_advance_ns)
+ +              wait_lapic_expire(vcpu);
         guest_enter_irqoff();
   
         if (unlikely(vcpu->arch.switch_db_regs)) {
@@@ -7300,11 -7278,14 +7330,11 @@@ static int complete_emulated_mmio(struc
   
   int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
   {
- -      struct fpu *fpu = &current->thread.fpu;
         int r;
- -      sigset_t sigsaved;
- -
- -      fpu__initialize(fpu);
   
- -      if (vcpu->sigset_active)
- -              sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
+ +      vcpu_load(vcpu);
+ +      kvm_sigset_activate(vcpu);
+ +      kvm_load_guest_fpu(vcpu);
   
         if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) {
                 if (kvm_run->immediate_exit) {
@@@ -7346,18 -7327,15 +7376,18 @@@
                 r = vcpu_run(vcpu);
   
   out:
+ +      kvm_put_guest_fpu(vcpu);
         post_kvm_run_save(vcpu);
- -      if (vcpu->sigset_active)
- -              sigprocmask(SIG_SETMASK, &sigsaved, NULL);
+ +      kvm_sigset_deactivate(vcpu);
   
+ +      vcpu_put(vcpu);
         return r;
   }
   
   int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
   {
+ +      vcpu_load(vcpu);
+ +
         if (vcpu->arch.emulate_regs_need_sync_to_vcpu) {
                 /*
                  * We are here if userspace calls get_regs() in the middle of
@@@ -7391,14 -7369,11 +7421,14 @@@
         regs->rip = kvm_rip_read(vcpu);
         regs->rflags = kvm_get_rflags(vcpu);
   
+ +      vcpu_put(vcpu);
         return 0;
   }
   
   int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
   {
+ +      vcpu_load(vcpu);
+ +
         vcpu->arch.emulate_regs_need_sync_from_vcpu = true;
         vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
   
@@@ -7422,13 -7397,12 +7452,13 @@@
   #endif
   
         kvm_rip_write(vcpu, regs->rip);
- -      kvm_set_rflags(vcpu, regs->rflags);
+ +      kvm_set_rflags(vcpu, regs->rflags | X86_EFLAGS_FIXED);
   
         vcpu->arch.exception.pending = false;
   
         kvm_make_request(KVM_REQ_EVENT, vcpu);
   
+ +      vcpu_put(vcpu);
         return 0;
   }
   
@@@ -7447,8 -7421,6 +7477,8 @@@ int kvm_arch_vcpu_ioctl_get_sregs(struc
   {
         struct desc_ptr dt;
   
+ +      vcpu_load(vcpu);
+ +
         kvm_get_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
         kvm_get_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
         kvm_get_segment(vcpu, &sregs->es, VCPU_SREG_ES);
@@@ -7480,15 -7452,12 +7510,15 @@@
                 set_bit(vcpu->arch.interrupt.nr,
                         (unsigned long *)sregs->interrupt_bitmap);
   
+ +      vcpu_put(vcpu);
         return 0;
   }
   
   int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
                                     struct kvm_mp_state *mp_state)
   {
+ +      vcpu_load(vcpu);
+ +
         kvm_apic_accept_events(vcpu);
         if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED &&
                                         vcpu->arch.pv.pv_unhalted)
@@@ -7496,26 -7465,21 +7526,26 @@@
         else
                 mp_state->mp_state = vcpu->arch.mp_state;
   
+ +      vcpu_put(vcpu);
         return 0;
   }
   
   int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
                                     struct kvm_mp_state *mp_state)
   {
+ +      int ret = -EINVAL;
+ +
+ +      vcpu_load(vcpu);
+ +
         if (!lapic_in_kernel(vcpu) &&
             mp_state->mp_state != KVM_MP_STATE_RUNNABLE)
- -              return -EINVAL;
+ +              goto out;
   
         /* INITs are latched while in SMM */
         if ((is_smm(vcpu) || vcpu->arch.smi_pending) &&
             (mp_state->mp_state == KVM_MP_STATE_SIPI_RECEIVED ||
              mp_state->mp_state == KVM_MP_STATE_INIT_RECEIVED))
- -              return -EINVAL;
+ +              goto out;
   
         if (mp_state->mp_state == KVM_MP_STATE_SIPI_RECEIVED) {
                 vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED;
@@@ -7523,11 -7487,7 +7553,11 @@@
         } else
                 vcpu->arch.mp_state = mp_state->mp_state;
         kvm_make_request(KVM_REQ_EVENT, vcpu);
- -      return 0;
+ +
+ +      ret = 0;
+ +out:
+ +      vcpu_put(vcpu);
+ +      return ret;
   }
   
   int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index,
@@@ -7558,18 -7518,15 +7588,18 @@@ int kvm_arch_vcpu_ioctl_set_sregs(struc
         int mmu_reset_needed = 0;
         int pending_vec, max_bits, idx;
         struct desc_ptr dt;
+ +      int ret = -EINVAL;
+ +
+ +      vcpu_load(vcpu);
   
         if (!guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) &&
                         (sregs->cr4 & X86_CR4_OSXSAVE))
- -              return -EINVAL;
+ +              goto out;
   
         apic_base_msr.data = sregs->apic_base;
         apic_base_msr.host_initiated = true;
         if (kvm_set_apic_base(vcpu, &apic_base_msr))
- -              return -EINVAL;
+ +              goto out;
   
         dt.size = sregs->idt.limit;
         dt.address = sregs->idt.base;
@@@ -7635,10 -7592,7 +7665,10 @@@
   
         kvm_make_request(KVM_REQ_EVENT, vcpu);
   
- -      return 0;
+ +      ret = 0;
+ +out:
+ +      vcpu_put(vcpu);
+ +      return ret;
   }
   
   int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
@@@ -7647,8 -7601,6 +7677,8 @@@
         unsigned long rflags;
         int i, r;
   
+ +      vcpu_load(vcpu);
+ +
         if (dbg->control & (KVM_GUESTDBG_INJECT_DB | KVM_GUESTDBG_INJECT_BP)) {
                 r = -EBUSY;
                 if (vcpu->arch.exception.pending)
@@@ -7694,7 -7646,7 +7724,7 @@@
         r = 0;
   
   out:
- -
+ +      vcpu_put(vcpu);
         return r;
   }
   
@@@ -7708,8 -7660,6 +7738,8 @@@ int kvm_arch_vcpu_ioctl_translate(struc
         gpa_t gpa;
         int idx;
   
+ +      vcpu_load(vcpu);
+ +
         idx = srcu_read_lock(&vcpu->kvm->srcu);
         gpa = kvm_mmu_gva_to_gpa_system(vcpu, vaddr, NULL);
         srcu_read_unlock(&vcpu->kvm->srcu, idx);
@@@ -7718,17 -7668,14 +7748,17 @@@
         tr->writeable = 1;
         tr->usermode = 0;
   
+ +      vcpu_put(vcpu);
         return 0;
   }
   
   int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
   {
- -      struct fxregs_state *fxsave =
- -                      &vcpu->arch.guest_fpu.state.fxsave;
+ +      struct fxregs_state *fxsave;
+ +
+ +      vcpu_load(vcpu);
   
+ +      fxsave = &vcpu->arch.guest_fpu.state.fxsave;
         memcpy(fpu->fpr, fxsave->st_space, 128);
         fpu->fcw = fxsave->cwd;
         fpu->fsw = fxsave->swd;
@@@ -7738,17 -7685,13 +7768,17 @@@
         fpu->last_dp = fxsave->rdp;
         memcpy(fpu->xmm, fxsave->xmm_space, sizeof fxsave->xmm_space);
   
+ +      vcpu_put(vcpu);
         return 0;
   }
   
   int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
   {
- -      struct fxregs_state *fxsave =
- -                      &vcpu->arch.guest_fpu.state.fxsave;
+ +      struct fxregs_state *fxsave;
+ +
+ +      vcpu_load(vcpu);
+ +
+ +      fxsave = &vcpu->arch.guest_fpu.state.fxsave;
   
         memcpy(fxsave->st_space, fpu->fpr, 128);
         fxsave->cwd = fpu->fcw;
@@@ -7759,7 -7702,6 +7789,7 @@@
         fxsave->rdp = fpu->last_dp;
         memcpy(fxsave->xmm_space, fpu->xmm, sizeof fxsave->xmm_space);
   
+ +      vcpu_put(vcpu);
         return 0;
   }
   
@@@ -7778,25 -7720,32 +7808,25 @@@ static void fx_init(struct kvm_vcpu *vc
         vcpu->arch.cr0 |= X86_CR0_ET;
   }
   
+ +/* Swap (qemu) user FPU context for the guest FPU context. */
   void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
   {
- -      if (vcpu->guest_fpu_loaded)
- -              return;
- -
- -      /*
- -       * Restore all possible states in the guest,
- -       * and assume host would use all available bits.
- -       * Guest xcr0 would be loaded later.
- -       */
- -      vcpu->guest_fpu_loaded = 1;
- -      __kernel_fpu_begin();
+ +      preempt_disable();
+ +      copy_fpregs_to_fpstate(&vcpu->arch.user_fpu);
         /* PKRU is separately restored in kvm_x86_ops->run.  */
         __copy_kernel_to_fpregs(&vcpu->arch.guest_fpu.state,
                                 ~XFEATURE_MASK_PKRU);
+ +      preempt_enable();
         trace_kvm_fpu(1);
   }
   
+ +/* When vcpu_run ends, restore user space FPU context. */
   void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
   {
- -      if (!vcpu->guest_fpu_loaded)
- -              return;
- -
- -      vcpu->guest_fpu_loaded = 0;
+ +      preempt_disable();
         copy_fpregs_to_fpstate(&vcpu->arch.guest_fpu);
- -      __kernel_fpu_end();
+ +      copy_kernel_to_fpregs(&vcpu->arch.user_fpu.state);
+ +      preempt_enable();
         ++vcpu->stat.fpu_reload;
         trace_kvm_fpu(0);
   }
@@@ -7828,12 -7777,16 +7858,12 @@@ struct kvm_vcpu *kvm_arch_vcpu_create(s
   
   int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
   {
- -      int r;
- -
         kvm_vcpu_mtrr_init(vcpu);
- -      r = vcpu_load(vcpu);
- -      if (r)
- -              return r;
+ +      vcpu_load(vcpu);
         kvm_vcpu_reset(vcpu, false);
         kvm_mmu_setup(vcpu);
         vcpu_put(vcpu);
- -      return r;
+ +      return 0;
   }
   
   void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
@@@ -7843,15 -7796,13 +7873,15 @@@
   
         kvm_hv_vcpu_postcreate(vcpu);
   
- -      if (vcpu_load(vcpu))
+ +      if (mutex_lock_killable(&vcpu->mutex))
                 return;
+ +      vcpu_load(vcpu);
         msr.data = 0x0;
         msr.index = MSR_IA32_TSC;
         msr.host_initiated = true;
         kvm_write_tsc(vcpu, &msr);
         vcpu_put(vcpu);
+ +      mutex_unlock(&vcpu->mutex);
   
         if (!kvmclock_periodic_sync)
                 return;
@@@ -7862,9 -7813,11 +7892,9 @@@
   
   void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
   {
- -      int r;
         vcpu->arch.apf.msr_val = 0;
   
- -      r = vcpu_load(vcpu);
- -      BUG_ON(r);
+ +      vcpu_load(vcpu);
         kvm_mmu_unload(vcpu);
         vcpu_put(vcpu);
   
@@@ -7876,7 -7829,6 +7906,7 @@@ void kvm_vcpu_reset(struct kvm_vcpu *vc
         vcpu->arch.hflags = 0;
   
         vcpu->arch.smi_pending = 0;
+ +      vcpu->arch.smi_count = 0;
         atomic_set(&vcpu->arch.nmi_queued, 0);
         vcpu->arch.nmi_pending = 0;
         vcpu->arch.nmi_injected = false;
@@@ -7910,8 -7862,7 +7940,8 @@@
                  * To avoid have the INIT path from kvm_apic_has_events() that be
                  * called with loaded FPU and does not let userspace fix the state.
                  */
- -              kvm_put_guest_fpu(vcpu);
+ +              if (init_event)
+ +                      kvm_put_guest_fpu(vcpu);
                 mpx_state_buffer = get_xsave_addr(&vcpu->arch.guest_fpu.state.xsave,
                                         XFEATURE_MASK_BNDREGS);
                 if (mpx_state_buffer)
@@@ -7920,8 -7871,6 +7950,8 @@@
                                         XFEATURE_MASK_BNDCSR);
                 if (mpx_state_buffer)
                         memset(mpx_state_buffer, 0, sizeof(struct mpx_bndcsr));
+ +              if (init_event)
+ +                      kvm_load_guest_fpu(vcpu);
         }
   
         if (!init_event) {
@@@ -8236,7 -8185,9 +8266,7 @@@ int kvm_arch_init_vm(struct kvm *kvm, u
   
   static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu)
   {
- -      int r;
- -      r = vcpu_load(vcpu);
- -      BUG_ON(r);
+ +      vcpu_load(vcpu);
         kvm_mmu_unload(vcpu);
         vcpu_put(vcpu);
   }
diff --combined include/uapi/linux/kvm.h

index 496e59a2738ba99308f438e1f0509e66e17086cb,571431d3384b40c703438d933043aa037acb1993..62c564dd4aa194e0e78995ce03afa98a89316bc9
--- 1/include/uapi/linux/kvm.h
--- 2/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@@ -630,9 -630,9 +630,9 @@@ struct kvm_s390_irq 
   
   struct kvm_s390_irq_state {
         __u64 buf;
- -      __u32 flags;
+ +      __u32 flags;        /* will stay unused for compatibility reasons */
         __u32 len;
- -      __u32 reserved[4];
+ +      __u32 reserved[4];  /* will stay unused for compatibility reasons */
   };
   
   /* for KVM_SET_GUEST_DEBUG */
@@@ -1358,6 -1358,96 +1358,96 @@@ struct kvm_s390_ucas_mapping 
   /* Available with KVM_CAP_S390_CMMA_MIGRATION */
   #define KVM_S390_GET_CMMA_BITS      _IOWR(KVMIO, 0xb8, struct kvm_s390_cmma_log)
   #define KVM_S390_SET_CMMA_BITS      _IOW(KVMIO, 0xb9, struct kvm_s390_cmma_log)
+ /* Memory Encryption Commands */
+ #define KVM_MEMORY_ENCRYPT_OP      _IOWR(KVMIO, 0xba, unsigned long)
+ 
+ struct kvm_enc_region {
+       __u64 addr;
+       __u64 size;
+ };
+ 
+ #define KVM_MEMORY_ENCRYPT_REG_REGION    _IOR(KVMIO, 0xbb, struct kvm_enc_region)
+ #define KVM_MEMORY_ENCRYPT_UNREG_REGION  _IOR(KVMIO, 0xbc, struct kvm_enc_region)
+ 
+ /* Secure Encrypted Virtualization command */
+ enum sev_cmd_id {
+       /* Guest initialization commands */
+       KVM_SEV_INIT = 0,
+       KVM_SEV_ES_INIT,
+       /* Guest launch commands */
+       KVM_SEV_LAUNCH_START,
+       KVM_SEV_LAUNCH_UPDATE_DATA,
+       KVM_SEV_LAUNCH_UPDATE_VMSA,
+       KVM_SEV_LAUNCH_SECRET,
+       KVM_SEV_LAUNCH_MEASURE,
+       KVM_SEV_LAUNCH_FINISH,
+       /* Guest migration commands (outgoing) */
+       KVM_SEV_SEND_START,
+       KVM_SEV_SEND_UPDATE_DATA,
+       KVM_SEV_SEND_UPDATE_VMSA,
+       KVM_SEV_SEND_FINISH,
+       /* Guest migration commands (incoming) */
+       KVM_SEV_RECEIVE_START,
+       KVM_SEV_RECEIVE_UPDATE_DATA,
+       KVM_SEV_RECEIVE_UPDATE_VMSA,
+       KVM_SEV_RECEIVE_FINISH,
+       /* Guest status and debug commands */
+       KVM_SEV_GUEST_STATUS,
+       KVM_SEV_DBG_DECRYPT,
+       KVM_SEV_DBG_ENCRYPT,
+       /* Guest certificates commands */
+       KVM_SEV_CERT_EXPORT,
+ 
+       KVM_SEV_NR_MAX,
+ };
+ 
+ struct kvm_sev_cmd {
+       __u32 id;
+       __u64 data;
+       __u32 error;
+       __u32 sev_fd;
+ };
+ 
+ struct kvm_sev_launch_start {
+       __u32 handle;
+       __u32 policy;
+       __u64 dh_uaddr;
+       __u32 dh_len;
+       __u64 session_uaddr;
+       __u32 session_len;
+ };
+ 
+ struct kvm_sev_launch_update_data {
+       __u64 uaddr;
+       __u32 len;
+ };
+ 
+ 
+ struct kvm_sev_launch_secret {
+       __u64 hdr_uaddr;
+       __u32 hdr_len;
+       __u64 guest_uaddr;
+       __u32 guest_len;
+       __u64 trans_uaddr;
+       __u32 trans_len;
+ };
+ 
+ struct kvm_sev_launch_measure {
+       __u64 uaddr;
+       __u32 len;
+ };
+ 
+ struct kvm_sev_guest_status {
+       __u32 handle;
+       __u32 policy;
+       __u32 state;
+ };
+ 
+ struct kvm_sev_dbg {
+       __u64 src_uaddr;
+       __u64 dst_uaddr;
+       __u32 len;
+ };
   
   #define KVM_DEV_ASSIGN_ENABLE_IOMMU   (1 << 0)
   #define KVM_DEV_ASSIGN_PCI_2_3                (1 << 1)
author	Paolo Bonzini <pbonzini@redhat.com>
	Tue, 16 Jan 2018 15:34:48 +0000 (16:34 +0100)
committer	Radim Krčmář <rkrcmar@redhat.com>
	Tue, 16 Jan 2018 15:35:32 +0000 (16:35 +0100)
		1	2
Documentation/virtual/kvm/api.txt	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/include/asm/cpufeatures.h	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/include/asm/kvm_host.h	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/cpu/amd.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kvm/cpuid.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kvm/mmu.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kvm/svm.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kvm/x86.c	patch \|	diff1 \|	diff2 \|	blob \| history
include/uapi/linux/kvm.h	patch \|	diff1 \|	diff2 \|	blob \| history