]> asedeno.scripts.mit.edu Git - linux.git/blobdiff - arch/x86/kvm/vmx.c
KVM: x86: VMX: Intercept #GP to support access to VMware backdoor ports
[linux.git] / arch / x86 / kvm / vmx.c
index cab6ea1f8be50b9bd4e20bb18d983f2742bea2d9..b4d8da6c62c8561e49446625a80ee5db347dabd5 100644 (file)
@@ -131,13 +131,15 @@ module_param_named(preemption_timer, enable_preemption_timer, bool, S_IRUGO);
 #endif
 
 #define KVM_GUEST_CR0_MASK (X86_CR0_NW | X86_CR0_CD)
-#define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST (X86_CR0_WP | X86_CR0_NE)
-#define KVM_VM_CR0_ALWAYS_ON                                           \
-       (KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE)
+#define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST X86_CR0_NE
+#define KVM_VM_CR0_ALWAYS_ON                           \
+       (KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST |      \
+        X86_CR0_WP | X86_CR0_PG | X86_CR0_PE)
 #define KVM_CR4_GUEST_OWNED_BITS                                     \
        (X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR      \
         | X86_CR4_OSXMMEXCPT | X86_CR4_LA57 | X86_CR4_TSD)
 
+#define KVM_VM_CR4_ALWAYS_ON_UNRESTRICTED_GUEST X86_CR4_VMXE
 #define KVM_PMODE_VM_CR4_ALWAYS_ON (X86_CR4_PAE | X86_CR4_VMXE)
 #define KVM_RMODE_VM_CR4_ALWAYS_ON (X86_CR4_VME | X86_CR4_PAE | X86_CR4_VMXE)
 
@@ -425,6 +427,35 @@ struct __packed vmcs12 {
  */
 #define VMCS12_MAX_FIELD_INDEX 0x17
 
+struct nested_vmx_msrs {
+       /*
+        * We only store the "true" versions of the VMX capability MSRs. We
+        * generate the "non-true" versions by setting the must-be-1 bits
+        * according to the SDM.
+        */
+       u32 procbased_ctls_low;
+       u32 procbased_ctls_high;
+       u32 secondary_ctls_low;
+       u32 secondary_ctls_high;
+       u32 pinbased_ctls_low;
+       u32 pinbased_ctls_high;
+       u32 exit_ctls_low;
+       u32 exit_ctls_high;
+       u32 entry_ctls_low;
+       u32 entry_ctls_high;
+       u32 misc_low;
+       u32 misc_high;
+       u32 ept_caps;
+       u32 vpid_caps;
+       u64 basic;
+       u64 cr0_fixed0;
+       u64 cr0_fixed1;
+       u64 cr4_fixed0;
+       u64 cr4_fixed1;
+       u64 vmcs_enum;
+       u64 vmfunc_controls;
+};
+
 /*
  * The nested_vmx structure is part of vcpu_vmx, and holds information we need
  * for correct emulation of VMX (i.e., nested VMX) on this vcpu.
@@ -476,32 +507,7 @@ struct nested_vmx {
        u16 vpid02;
        u16 last_vpid;
 
-       /*
-        * We only store the "true" versions of the VMX capability MSRs. We
-        * generate the "non-true" versions by setting the must-be-1 bits
-        * according to the SDM.
-        */
-       u32 nested_vmx_procbased_ctls_low;
-       u32 nested_vmx_procbased_ctls_high;
-       u32 nested_vmx_secondary_ctls_low;
-       u32 nested_vmx_secondary_ctls_high;
-       u32 nested_vmx_pinbased_ctls_low;
-       u32 nested_vmx_pinbased_ctls_high;
-       u32 nested_vmx_exit_ctls_low;
-       u32 nested_vmx_exit_ctls_high;
-       u32 nested_vmx_entry_ctls_low;
-       u32 nested_vmx_entry_ctls_high;
-       u32 nested_vmx_misc_low;
-       u32 nested_vmx_misc_high;
-       u32 nested_vmx_ept_caps;
-       u32 nested_vmx_vpid_caps;
-       u64 nested_vmx_basic;
-       u64 nested_vmx_cr0_fixed0;
-       u64 nested_vmx_cr0_fixed1;
-       u64 nested_vmx_cr4_fixed0;
-       u64 nested_vmx_cr4_fixed1;
-       u64 nested_vmx_vmcs_enum;
-       u64 nested_vmx_vmfunc_controls;
+       struct nested_vmx_msrs msrs;
 
        /* SMM related state */
        struct {
@@ -954,6 +960,7 @@ static struct vmcs_config {
        u32 cpu_based_2nd_exec_ctrl;
        u32 vmexit_ctrl;
        u32 vmentry_ctrl;
+       struct nested_vmx_msrs nested;
 } vmcs_config;
 
 static struct vmx_capability {
@@ -1032,6 +1039,11 @@ static inline bool is_invalid_opcode(u32 intr_info)
        return is_exception_n(intr_info, UD_VECTOR);
 }
 
+static inline bool is_gp_fault(u32 intr_info)
+{
+       return is_exception_n(intr_info, GP_VECTOR);
+}
+
 static inline bool is_external_interrupt(u32 intr_info)
 {
        return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK))
@@ -1314,7 +1326,7 @@ static inline bool report_flexpriority(void)
 
 static inline unsigned nested_cpu_vmx_misc_cr3_count(struct kvm_vcpu *vcpu)
 {
-       return vmx_misc_cr3_count(to_vmx(vcpu)->nested.nested_vmx_misc_low);
+       return vmx_misc_cr3_count(to_vmx(vcpu)->nested.msrs.misc_low);
 }
 
 static inline bool nested_cpu_has(struct vmcs12 *vmcs12, u32 bit)
@@ -1335,6 +1347,16 @@ static inline bool nested_cpu_has_preemption_timer(struct vmcs12 *vmcs12)
                PIN_BASED_VMX_PREEMPTION_TIMER;
 }
 
+static inline bool nested_cpu_has_nmi_exiting(struct vmcs12 *vmcs12)
+{
+       return vmcs12->pin_based_vm_exec_control & PIN_BASED_NMI_EXITING;
+}
+
+static inline bool nested_cpu_has_virtual_nmis(struct vmcs12 *vmcs12)
+{
+       return vmcs12->pin_based_vm_exec_control & PIN_BASED_VIRTUAL_NMIS;
+}
+
 static inline int nested_cpu_has_ept(struct vmcs12 *vmcs12)
 {
        return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_EPT);
@@ -1858,6 +1880,14 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu)
 
        eb = (1u << PF_VECTOR) | (1u << UD_VECTOR) | (1u << MC_VECTOR) |
             (1u << DB_VECTOR) | (1u << AC_VECTOR);
+       /*
+        * Guest access to VMware backdoor ports could legitimately
+        * trigger #GP because of TSS I/O permission bitmap.
+        * We intercept those #GP and allow access to them anyway
+        * as VMware does.
+        */
+       if (enable_vmware_backdoor)
+               eb |= (1u << GP_VECTOR);
        if ((vcpu->guest_debug &
             (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)) ==
            (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP))
@@ -2683,8 +2713,13 @@ static inline bool nested_vmx_allowed(struct kvm_vcpu *vcpu)
  * bit in the high half is on if the corresponding bit in the control field
  * may be on. See also vmx_control_verify().
  */
-static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
+static void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, bool apicv)
 {
+       if (!nested) {
+               memset(msrs, 0, sizeof(*msrs));
+               return;
+       }
+
        /*
         * Note that as a general rule, the high half of the MSRs (bits in
         * the control fields which may be 1) should be initialized by the
@@ -2702,70 +2737,68 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
 
        /* pin-based controls */
        rdmsr(MSR_IA32_VMX_PINBASED_CTLS,
-               vmx->nested.nested_vmx_pinbased_ctls_low,
-               vmx->nested.nested_vmx_pinbased_ctls_high);
-       vmx->nested.nested_vmx_pinbased_ctls_low |=
+               msrs->pinbased_ctls_low,
+               msrs->pinbased_ctls_high);
+       msrs->pinbased_ctls_low |=
                PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
-       vmx->nested.nested_vmx_pinbased_ctls_high &=
+       msrs->pinbased_ctls_high &=
                PIN_BASED_EXT_INTR_MASK |
                PIN_BASED_NMI_EXITING |
-               PIN_BASED_VIRTUAL_NMIS;
-       vmx->nested.nested_vmx_pinbased_ctls_high |=
+               PIN_BASED_VIRTUAL_NMIS |
+               (apicv ? PIN_BASED_POSTED_INTR : 0);
+       msrs->pinbased_ctls_high |=
                PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR |
                PIN_BASED_VMX_PREEMPTION_TIMER;
-       if (kvm_vcpu_apicv_active(&vmx->vcpu))
-               vmx->nested.nested_vmx_pinbased_ctls_high |=
-                       PIN_BASED_POSTED_INTR;
 
        /* exit controls */
        rdmsr(MSR_IA32_VMX_EXIT_CTLS,
-               vmx->nested.nested_vmx_exit_ctls_low,
-               vmx->nested.nested_vmx_exit_ctls_high);
-       vmx->nested.nested_vmx_exit_ctls_low =
+               msrs->exit_ctls_low,
+               msrs->exit_ctls_high);
+       msrs->exit_ctls_low =
                VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR;
 
-       vmx->nested.nested_vmx_exit_ctls_high &=
+       msrs->exit_ctls_high &=
 #ifdef CONFIG_X86_64
                VM_EXIT_HOST_ADDR_SPACE_SIZE |
 #endif
                VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT;
-       vmx->nested.nested_vmx_exit_ctls_high |=
+       msrs->exit_ctls_high |=
                VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR |
                VM_EXIT_LOAD_IA32_EFER | VM_EXIT_SAVE_IA32_EFER |
                VM_EXIT_SAVE_VMX_PREEMPTION_TIMER | VM_EXIT_ACK_INTR_ON_EXIT;
 
        if (kvm_mpx_supported())
-               vmx->nested.nested_vmx_exit_ctls_high |= VM_EXIT_CLEAR_BNDCFGS;
+               msrs->exit_ctls_high |= VM_EXIT_CLEAR_BNDCFGS;
 
        /* We support free control of debug control saving. */
-       vmx->nested.nested_vmx_exit_ctls_low &= ~VM_EXIT_SAVE_DEBUG_CONTROLS;
+       msrs->exit_ctls_low &= ~VM_EXIT_SAVE_DEBUG_CONTROLS;
 
        /* entry controls */
        rdmsr(MSR_IA32_VMX_ENTRY_CTLS,
-               vmx->nested.nested_vmx_entry_ctls_low,
-               vmx->nested.nested_vmx_entry_ctls_high);
-       vmx->nested.nested_vmx_entry_ctls_low =
+               msrs->entry_ctls_low,
+               msrs->entry_ctls_high);
+       msrs->entry_ctls_low =
                VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR;
-       vmx->nested.nested_vmx_entry_ctls_high &=
+       msrs->entry_ctls_high &=
 #ifdef CONFIG_X86_64
                VM_ENTRY_IA32E_MODE |
 #endif
                VM_ENTRY_LOAD_IA32_PAT;
-       vmx->nested.nested_vmx_entry_ctls_high |=
+       msrs->entry_ctls_high |=
                (VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR | VM_ENTRY_LOAD_IA32_EFER);
        if (kvm_mpx_supported())
-               vmx->nested.nested_vmx_entry_ctls_high |= VM_ENTRY_LOAD_BNDCFGS;
+               msrs->entry_ctls_high |= VM_ENTRY_LOAD_BNDCFGS;
 
        /* We support free control of debug control loading. */
-       vmx->nested.nested_vmx_entry_ctls_low &= ~VM_ENTRY_LOAD_DEBUG_CONTROLS;
+       msrs->entry_ctls_low &= ~VM_ENTRY_LOAD_DEBUG_CONTROLS;
 
        /* cpu-based controls */
        rdmsr(MSR_IA32_VMX_PROCBASED_CTLS,
-               vmx->nested.nested_vmx_procbased_ctls_low,
-               vmx->nested.nested_vmx_procbased_ctls_high);
-       vmx->nested.nested_vmx_procbased_ctls_low =
+               msrs->procbased_ctls_low,
+               msrs->procbased_ctls_high);
+       msrs->procbased_ctls_low =
                CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
-       vmx->nested.nested_vmx_procbased_ctls_high &=
+       msrs->procbased_ctls_high &=
                CPU_BASED_VIRTUAL_INTR_PENDING |
                CPU_BASED_VIRTUAL_NMI_PENDING | CPU_BASED_USE_TSC_OFFSETING |
                CPU_BASED_HLT_EXITING | CPU_BASED_INVLPG_EXITING |
@@ -2785,12 +2818,12 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
         * can use it to avoid exits to L1 - even when L0 runs L2
         * without MSR bitmaps.
         */
-       vmx->nested.nested_vmx_procbased_ctls_high |=
+       msrs->procbased_ctls_high |=
                CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR |
                CPU_BASED_USE_MSR_BITMAPS;
 
        /* We support free control of CR3 access interception. */
-       vmx->nested.nested_vmx_procbased_ctls_low &=
+       msrs->procbased_ctls_low &=
                ~(CPU_BASED_CR3_LOAD_EXITING | CPU_BASED_CR3_STORE_EXITING);
 
        /*
@@ -2798,10 +2831,10 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
         * depend on CPUID bits, they are added later by vmx_cpuid_update.
         */
        rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2,
-               vmx->nested.nested_vmx_secondary_ctls_low,
-               vmx->nested.nested_vmx_secondary_ctls_high);
-       vmx->nested.nested_vmx_secondary_ctls_low = 0;
-       vmx->nested.nested_vmx_secondary_ctls_high &=
+               msrs->secondary_ctls_low,
+               msrs->secondary_ctls_high);
+       msrs->secondary_ctls_low = 0;
+       msrs->secondary_ctls_high &=
                SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
                SECONDARY_EXEC_DESC |
                SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
@@ -2811,33 +2844,33 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
 
        if (enable_ept) {
                /* nested EPT: emulate EPT also to L1 */
-               vmx->nested.nested_vmx_secondary_ctls_high |=
+               msrs->secondary_ctls_high |=
                        SECONDARY_EXEC_ENABLE_EPT;
-               vmx->nested.nested_vmx_ept_caps = VMX_EPT_PAGE_WALK_4_BIT |
+               msrs->ept_caps = VMX_EPT_PAGE_WALK_4_BIT |
                         VMX_EPTP_WB_BIT | VMX_EPT_INVEPT_BIT;
                if (cpu_has_vmx_ept_execute_only())
-                       vmx->nested.nested_vmx_ept_caps |=
+                       msrs->ept_caps |=
                                VMX_EPT_EXECUTE_ONLY_BIT;
-               vmx->nested.nested_vmx_ept_caps &= vmx_capability.ept;
-               vmx->nested.nested_vmx_ept_caps |= VMX_EPT_EXTENT_GLOBAL_BIT |
+               msrs->ept_caps &= vmx_capability.ept;
+               msrs->ept_caps |= VMX_EPT_EXTENT_GLOBAL_BIT |
                        VMX_EPT_EXTENT_CONTEXT_BIT | VMX_EPT_2MB_PAGE_BIT |
                        VMX_EPT_1GB_PAGE_BIT;
                if (enable_ept_ad_bits) {
-                       vmx->nested.nested_vmx_secondary_ctls_high |=
+                       msrs->secondary_ctls_high |=
                                SECONDARY_EXEC_ENABLE_PML;
-                       vmx->nested.nested_vmx_ept_caps |= VMX_EPT_AD_BIT;
+                       msrs->ept_caps |= VMX_EPT_AD_BIT;
                }
        }
 
        if (cpu_has_vmx_vmfunc()) {
-               vmx->nested.nested_vmx_secondary_ctls_high |=
+               msrs->secondary_ctls_high |=
                        SECONDARY_EXEC_ENABLE_VMFUNC;
                /*
                 * Advertise EPTP switching unconditionally
                 * since we emulate it
                 */
                if (enable_ept)
-                       vmx->nested.nested_vmx_vmfunc_controls =
+                       msrs->vmfunc_controls =
                                VMX_VMFUNC_EPTP_SWITCHING;
        }
 
@@ -2848,25 +2881,25 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
         * not failing the single-context invvpid, and it is worse.
         */
        if (enable_vpid) {
-               vmx->nested.nested_vmx_secondary_ctls_high |=
+               msrs->secondary_ctls_high |=
                        SECONDARY_EXEC_ENABLE_VPID;
-               vmx->nested.nested_vmx_vpid_caps = VMX_VPID_INVVPID_BIT |
+               msrs->vpid_caps = VMX_VPID_INVVPID_BIT |
                        VMX_VPID_EXTENT_SUPPORTED_MASK;
        }
 
        if (enable_unrestricted_guest)
-               vmx->nested.nested_vmx_secondary_ctls_high |=
+               msrs->secondary_ctls_high |=
                        SECONDARY_EXEC_UNRESTRICTED_GUEST;
 
        /* miscellaneous data */
        rdmsr(MSR_IA32_VMX_MISC,
-               vmx->nested.nested_vmx_misc_low,
-               vmx->nested.nested_vmx_misc_high);
-       vmx->nested.nested_vmx_misc_low &= VMX_MISC_SAVE_EFER_LMA;
-       vmx->nested.nested_vmx_misc_low |=
+               msrs->misc_low,
+               msrs->misc_high);
+       msrs->misc_low &= VMX_MISC_SAVE_EFER_LMA;
+       msrs->misc_low |=
                VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE |
                VMX_MISC_ACTIVITY_HLT;
-       vmx->nested.nested_vmx_misc_high = 0;
+       msrs->misc_high = 0;
 
        /*
         * This MSR reports some information about VMX support. We
@@ -2874,14 +2907,14 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
         * guest, and the VMCS structure we give it - not about the
         * VMX support of the underlying hardware.
         */
-       vmx->nested.nested_vmx_basic =
+       msrs->basic =
                VMCS12_REVISION |
                VMX_BASIC_TRUE_CTLS |
                ((u64)VMCS12_SIZE << VMX_BASIC_VMCS_SIZE_SHIFT) |
                (VMX_BASIC_MEM_TYPE_WB << VMX_BASIC_MEM_TYPE_SHIFT);
 
        if (cpu_has_vmx_basic_inout())
-               vmx->nested.nested_vmx_basic |= VMX_BASIC_INOUT;
+               msrs->basic |= VMX_BASIC_INOUT;
 
        /*
         * These MSRs specify bits which the guest must keep fixed on
@@ -2890,15 +2923,15 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
         */
 #define VMXON_CR0_ALWAYSON     (X86_CR0_PE | X86_CR0_PG | X86_CR0_NE)
 #define VMXON_CR4_ALWAYSON     X86_CR4_VMXE
-       vmx->nested.nested_vmx_cr0_fixed0 = VMXON_CR0_ALWAYSON;
-       vmx->nested.nested_vmx_cr4_fixed0 = VMXON_CR4_ALWAYSON;
+       msrs->cr0_fixed0 = VMXON_CR0_ALWAYSON;
+       msrs->cr4_fixed0 = VMXON_CR4_ALWAYSON;
 
        /* These MSRs specify bits which the guest must keep fixed off. */
-       rdmsrl(MSR_IA32_VMX_CR0_FIXED1, vmx->nested.nested_vmx_cr0_fixed1);
-       rdmsrl(MSR_IA32_VMX_CR4_FIXED1, vmx->nested.nested_vmx_cr4_fixed1);
+       rdmsrl(MSR_IA32_VMX_CR0_FIXED1, msrs->cr0_fixed1);
+       rdmsrl(MSR_IA32_VMX_CR4_FIXED1, msrs->cr4_fixed1);
 
        /* highest index: VMX_PREEMPTION_TIMER_VALUE */
-       vmx->nested.nested_vmx_vmcs_enum = VMCS12_MAX_FIELD_INDEX << 1;
+       msrs->vmcs_enum = VMCS12_MAX_FIELD_INDEX << 1;
 }
 
 /*
@@ -2935,7 +2968,7 @@ static int vmx_restore_vmx_basic(struct vcpu_vmx *vmx, u64 data)
                BIT_ULL(49) | BIT_ULL(54) | BIT_ULL(55) |
                /* reserved */
                BIT_ULL(31) | GENMASK_ULL(47, 45) | GENMASK_ULL(63, 56);
-       u64 vmx_basic = vmx->nested.nested_vmx_basic;
+       u64 vmx_basic = vmx->nested.msrs.basic;
 
        if (!is_bitwise_subset(vmx_basic, data, feature_and_reserved))
                return -EINVAL;
@@ -2954,7 +2987,7 @@ static int vmx_restore_vmx_basic(struct vcpu_vmx *vmx, u64 data)
        if (vmx_basic_vmcs_size(vmx_basic) > vmx_basic_vmcs_size(data))
                return -EINVAL;
 
-       vmx->nested.nested_vmx_basic = data;
+       vmx->nested.msrs.basic = data;
        return 0;
 }
 
@@ -2966,24 +2999,24 @@ vmx_restore_control_msr(struct vcpu_vmx *vmx, u32 msr_index, u64 data)
 
        switch (msr_index) {
        case MSR_IA32_VMX_TRUE_PINBASED_CTLS:
-               lowp = &vmx->nested.nested_vmx_pinbased_ctls_low;
-               highp = &vmx->nested.nested_vmx_pinbased_ctls_high;
+               lowp = &vmx->nested.msrs.pinbased_ctls_low;
+               highp = &vmx->nested.msrs.pinbased_ctls_high;
                break;
        case MSR_IA32_VMX_TRUE_PROCBASED_CTLS:
-               lowp = &vmx->nested.nested_vmx_procbased_ctls_low;
-               highp = &vmx->nested.nested_vmx_procbased_ctls_high;
+               lowp = &vmx->nested.msrs.procbased_ctls_low;
+               highp = &vmx->nested.msrs.procbased_ctls_high;
                break;
        case MSR_IA32_VMX_TRUE_EXIT_CTLS:
-               lowp = &vmx->nested.nested_vmx_exit_ctls_low;
-               highp = &vmx->nested.nested_vmx_exit_ctls_high;
+               lowp = &vmx->nested.msrs.exit_ctls_low;
+               highp = &vmx->nested.msrs.exit_ctls_high;
                break;
        case MSR_IA32_VMX_TRUE_ENTRY_CTLS:
-               lowp = &vmx->nested.nested_vmx_entry_ctls_low;
-               highp = &vmx->nested.nested_vmx_entry_ctls_high;
+               lowp = &vmx->nested.msrs.entry_ctls_low;
+               highp = &vmx->nested.msrs.entry_ctls_high;
                break;
        case MSR_IA32_VMX_PROCBASED_CTLS2:
-               lowp = &vmx->nested.nested_vmx_secondary_ctls_low;
-               highp = &vmx->nested.nested_vmx_secondary_ctls_high;
+               lowp = &vmx->nested.msrs.secondary_ctls_low;
+               highp = &vmx->nested.msrs.secondary_ctls_high;
                break;
        default:
                BUG();
@@ -3014,13 +3047,13 @@ static int vmx_restore_vmx_misc(struct vcpu_vmx *vmx, u64 data)
                GENMASK_ULL(13, 9) | BIT_ULL(31);
        u64 vmx_misc;
 
-       vmx_misc = vmx_control_msr(vmx->nested.nested_vmx_misc_low,
-                                  vmx->nested.nested_vmx_misc_high);
+       vmx_misc = vmx_control_msr(vmx->nested.msrs.misc_low,
+                                  vmx->nested.msrs.misc_high);
 
        if (!is_bitwise_subset(vmx_misc, data, feature_and_reserved_bits))
                return -EINVAL;
 
-       if ((vmx->nested.nested_vmx_pinbased_ctls_high &
+       if ((vmx->nested.msrs.pinbased_ctls_high &
             PIN_BASED_VMX_PREEMPTION_TIMER) &&
            vmx_misc_preemption_timer_rate(data) !=
            vmx_misc_preemption_timer_rate(vmx_misc))
@@ -3035,8 +3068,8 @@ static int vmx_restore_vmx_misc(struct vcpu_vmx *vmx, u64 data)
        if (vmx_misc_mseg_revid(data) != vmx_misc_mseg_revid(vmx_misc))
                return -EINVAL;
 
-       vmx->nested.nested_vmx_misc_low = data;
-       vmx->nested.nested_vmx_misc_high = data >> 32;
+       vmx->nested.msrs.misc_low = data;
+       vmx->nested.msrs.misc_high = data >> 32;
        return 0;
 }
 
@@ -3044,15 +3077,15 @@ static int vmx_restore_vmx_ept_vpid_cap(struct vcpu_vmx *vmx, u64 data)
 {
        u64 vmx_ept_vpid_cap;
 
-       vmx_ept_vpid_cap = vmx_control_msr(vmx->nested.nested_vmx_ept_caps,
-                                          vmx->nested.nested_vmx_vpid_caps);
+       vmx_ept_vpid_cap = vmx_control_msr(vmx->nested.msrs.ept_caps,
+                                          vmx->nested.msrs.vpid_caps);
 
        /* Every bit is either reserved or a feature bit. */
        if (!is_bitwise_subset(vmx_ept_vpid_cap, data, -1ULL))
                return -EINVAL;
 
-       vmx->nested.nested_vmx_ept_caps = data;
-       vmx->nested.nested_vmx_vpid_caps = data >> 32;
+       vmx->nested.msrs.ept_caps = data;
+       vmx->nested.msrs.vpid_caps = data >> 32;
        return 0;
 }
 
@@ -3062,10 +3095,10 @@ static int vmx_restore_fixed0_msr(struct vcpu_vmx *vmx, u32 msr_index, u64 data)
 
        switch (msr_index) {
        case MSR_IA32_VMX_CR0_FIXED0:
-               msr = &vmx->nested.nested_vmx_cr0_fixed0;
+               msr = &vmx->nested.msrs.cr0_fixed0;
                break;
        case MSR_IA32_VMX_CR4_FIXED0:
-               msr = &vmx->nested.nested_vmx_cr4_fixed0;
+               msr = &vmx->nested.msrs.cr4_fixed0;
                break;
        default:
                BUG();
@@ -3129,7 +3162,7 @@ static int vmx_set_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
        case MSR_IA32_VMX_EPT_VPID_CAP:
                return vmx_restore_vmx_ept_vpid_cap(vmx, data);
        case MSR_IA32_VMX_VMCS_ENUM:
-               vmx->nested.nested_vmx_vmcs_enum = data;
+               vmx->nested.msrs.vmcs_enum = data;
                return 0;
        default:
                /*
@@ -3140,77 +3173,75 @@ static int vmx_set_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
 }
 
 /* Returns 0 on success, non-0 otherwise. */
-static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
+static int vmx_get_vmx_msr(struct nested_vmx_msrs *msrs, u32 msr_index, u64 *pdata)
 {
-       struct vcpu_vmx *vmx = to_vmx(vcpu);
-
        switch (msr_index) {
        case MSR_IA32_VMX_BASIC:
-               *pdata = vmx->nested.nested_vmx_basic;
+               *pdata = msrs->basic;
                break;
        case MSR_IA32_VMX_TRUE_PINBASED_CTLS:
        case MSR_IA32_VMX_PINBASED_CTLS:
                *pdata = vmx_control_msr(
-                       vmx->nested.nested_vmx_pinbased_ctls_low,
-                       vmx->nested.nested_vmx_pinbased_ctls_high);
+                       msrs->pinbased_ctls_low,
+                       msrs->pinbased_ctls_high);
                if (msr_index == MSR_IA32_VMX_PINBASED_CTLS)
                        *pdata |= PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
                break;
        case MSR_IA32_VMX_TRUE_PROCBASED_CTLS:
        case MSR_IA32_VMX_PROCBASED_CTLS:
                *pdata = vmx_control_msr(
-                       vmx->nested.nested_vmx_procbased_ctls_low,
-                       vmx->nested.nested_vmx_procbased_ctls_high);
+                       msrs->procbased_ctls_low,
+                       msrs->procbased_ctls_high);
                if (msr_index == MSR_IA32_VMX_PROCBASED_CTLS)
                        *pdata |= CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
                break;
        case MSR_IA32_VMX_TRUE_EXIT_CTLS:
        case MSR_IA32_VMX_EXIT_CTLS:
                *pdata = vmx_control_msr(
-                       vmx->nested.nested_vmx_exit_ctls_low,
-                       vmx->nested.nested_vmx_exit_ctls_high);
+                       msrs->exit_ctls_low,
+                       msrs->exit_ctls_high);
                if (msr_index == MSR_IA32_VMX_EXIT_CTLS)
                        *pdata |= VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR;
                break;
        case MSR_IA32_VMX_TRUE_ENTRY_CTLS:
        case MSR_IA32_VMX_ENTRY_CTLS:
                *pdata = vmx_control_msr(
-                       vmx->nested.nested_vmx_entry_ctls_low,
-                       vmx->nested.nested_vmx_entry_ctls_high);
+                       msrs->entry_ctls_low,
+                       msrs->entry_ctls_high);
                if (msr_index == MSR_IA32_VMX_ENTRY_CTLS)
                        *pdata |= VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR;
                break;
        case MSR_IA32_VMX_MISC:
                *pdata = vmx_control_msr(
-                       vmx->nested.nested_vmx_misc_low,
-                       vmx->nested.nested_vmx_misc_high);
+                       msrs->misc_low,
+                       msrs->misc_high);
                break;
        case MSR_IA32_VMX_CR0_FIXED0:
-               *pdata = vmx->nested.nested_vmx_cr0_fixed0;
+               *pdata = msrs->cr0_fixed0;
                break;
        case MSR_IA32_VMX_CR0_FIXED1:
-               *pdata = vmx->nested.nested_vmx_cr0_fixed1;
+               *pdata = msrs->cr0_fixed1;
                break;
        case MSR_IA32_VMX_CR4_FIXED0:
-               *pdata = vmx->nested.nested_vmx_cr4_fixed0;
+               *pdata = msrs->cr4_fixed0;
                break;
        case MSR_IA32_VMX_CR4_FIXED1:
-               *pdata = vmx->nested.nested_vmx_cr4_fixed1;
+               *pdata = msrs->cr4_fixed1;
                break;
        case MSR_IA32_VMX_VMCS_ENUM:
-               *pdata = vmx->nested.nested_vmx_vmcs_enum;
+               *pdata = msrs->vmcs_enum;
                break;
        case MSR_IA32_VMX_PROCBASED_CTLS2:
                *pdata = vmx_control_msr(
-                       vmx->nested.nested_vmx_secondary_ctls_low,
-                       vmx->nested.nested_vmx_secondary_ctls_high);
+                       msrs->secondary_ctls_low,
+                       msrs->secondary_ctls_high);
                break;
        case MSR_IA32_VMX_EPT_VPID_CAP:
-               *pdata = vmx->nested.nested_vmx_ept_caps |
-                       ((u64)vmx->nested.nested_vmx_vpid_caps << 32);
+               *pdata = msrs->ept_caps |
+                       ((u64)msrs->vpid_caps << 32);
                break;
        case MSR_IA32_VMX_VMFUNC:
-               *pdata = vmx->nested.nested_vmx_vmfunc_controls;
+               *pdata = msrs->vmfunc_controls;
                break;
        default:
                return 1;
@@ -3227,6 +3258,20 @@ static inline bool vmx_feature_control_msr_valid(struct kvm_vcpu *vcpu,
        return !(val & ~valid_bits);
 }
 
+static int vmx_get_msr_feature(struct kvm_msr_entry *msr)
+{
+       switch (msr->index) {
+       case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
+               if (!nested)
+                       return 1;
+               return vmx_get_vmx_msr(&vmcs_config.nested, msr->index, &msr->data);
+       default:
+               return 1;
+       }
+
+       return 0;
+}
+
 /*
  * Reads an msr value (of 'msr_index') into 'pdata'.
  * Returns 0 on success, non-0 otherwise.
@@ -3298,7 +3343,8 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
        case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
                if (!nested_vmx_allowed(vcpu))
                        return 1;
-               return vmx_get_vmx_msr(vcpu, msr_info->index, &msr_info->data);
+               return vmx_get_vmx_msr(&vmx->nested.msrs, msr_info->index,
+                                      &msr_info->data);
        case MSR_IA32_XSS:
                if (!vmx_xsaves_supported())
                        return 1;
@@ -3689,6 +3735,7 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
        u32 _vmexit_control = 0;
        u32 _vmentry_control = 0;
 
+       memset(vmcs_conf, 0, sizeof(*vmcs_conf));
        min = CPU_BASED_HLT_EXITING |
 #ifdef CONFIG_X86_64
              CPU_BASED_CR8_LOAD_EXITING |
@@ -4280,7 +4327,7 @@ static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu)
 
 static void vmx_decache_cr3(struct kvm_vcpu *vcpu)
 {
-       if (enable_ept && is_paging(vcpu))
+       if (enable_unrestricted_guest || (enable_ept && is_paging(vcpu)))
                vcpu->arch.cr3 = vmcs_readl(GUEST_CR3);
        __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
 }
@@ -4328,11 +4375,11 @@ static void ept_save_pdptrs(struct kvm_vcpu *vcpu)
 
 static bool nested_guest_cr0_valid(struct kvm_vcpu *vcpu, unsigned long val)
 {
-       u64 fixed0 = to_vmx(vcpu)->nested.nested_vmx_cr0_fixed0;
-       u64 fixed1 = to_vmx(vcpu)->nested.nested_vmx_cr0_fixed1;
+       u64 fixed0 = to_vmx(vcpu)->nested.msrs.cr0_fixed0;
+       u64 fixed1 = to_vmx(vcpu)->nested.msrs.cr0_fixed1;
        struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
 
-       if (to_vmx(vcpu)->nested.nested_vmx_secondary_ctls_high &
+       if (to_vmx(vcpu)->nested.msrs.secondary_ctls_high &
                SECONDARY_EXEC_UNRESTRICTED_GUEST &&
            nested_cpu_has2(vmcs12, SECONDARY_EXEC_UNRESTRICTED_GUEST))
                fixed0 &= ~(X86_CR0_PE | X86_CR0_PG);
@@ -4342,16 +4389,16 @@ static bool nested_guest_cr0_valid(struct kvm_vcpu *vcpu, unsigned long val)
 
 static bool nested_host_cr0_valid(struct kvm_vcpu *vcpu, unsigned long val)
 {
-       u64 fixed0 = to_vmx(vcpu)->nested.nested_vmx_cr0_fixed0;
-       u64 fixed1 = to_vmx(vcpu)->nested.nested_vmx_cr0_fixed1;
+       u64 fixed0 = to_vmx(vcpu)->nested.msrs.cr0_fixed0;
+       u64 fixed1 = to_vmx(vcpu)->nested.msrs.cr0_fixed1;
 
        return fixed_bits_valid(val, fixed0, fixed1);
 }
 
 static bool nested_cr4_valid(struct kvm_vcpu *vcpu, unsigned long val)
 {
-       u64 fixed0 = to_vmx(vcpu)->nested.nested_vmx_cr4_fixed0;
-       u64 fixed1 = to_vmx(vcpu)->nested.nested_vmx_cr4_fixed1;
+       u64 fixed0 = to_vmx(vcpu)->nested.msrs.cr4_fixed0;
+       u64 fixed1 = to_vmx(vcpu)->nested.msrs.cr4_fixed1;
 
        return fixed_bits_valid(val, fixed0, fixed1);
 }
@@ -4417,7 +4464,7 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
        }
 #endif
 
-       if (enable_ept)
+       if (enable_ept && !enable_unrestricted_guest)
                ept_update_paging_mode_cr0(&hw_cr0, cr0, vcpu);
 
        vmcs_writel(CR0_READ_SHADOW, cr0);
@@ -4458,7 +4505,8 @@ static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
        if (enable_ept) {
                eptp = construct_eptp(vcpu, cr3);
                vmcs_write64(EPT_POINTER, eptp);
-               if (is_paging(vcpu) || is_guest_mode(vcpu))
+               if (enable_unrestricted_guest || is_paging(vcpu) ||
+                   is_guest_mode(vcpu))
                        guest_cr3 = kvm_read_cr3(vcpu);
                else
                        guest_cr3 = vcpu->kvm->arch.ept_identity_map_addr;
@@ -4476,11 +4524,15 @@ static int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
         * is in force while we are in guest mode.  Do not let guests control
         * this bit, even if host CR4.MCE == 0.
         */
-       unsigned long hw_cr4 =
-               (cr4_read_shadow() & X86_CR4_MCE) |
-               (cr4 & ~X86_CR4_MCE) |
-               (to_vmx(vcpu)->rmode.vm86_active ?
-                KVM_RMODE_VM_CR4_ALWAYS_ON : KVM_PMODE_VM_CR4_ALWAYS_ON);
+       unsigned long hw_cr4;
+
+       hw_cr4 = (cr4_read_shadow() & X86_CR4_MCE) | (cr4 & ~X86_CR4_MCE);
+       if (enable_unrestricted_guest)
+               hw_cr4 |= KVM_VM_CR4_ALWAYS_ON_UNRESTRICTED_GUEST;
+       else if (to_vmx(vcpu)->rmode.vm86_active)
+               hw_cr4 |= KVM_RMODE_VM_CR4_ALWAYS_ON;
+       else
+               hw_cr4 |= KVM_PMODE_VM_CR4_ALWAYS_ON;
 
        if ((cr4 & X86_CR4_UMIP) && !boot_cpu_has(X86_FEATURE_UMIP)) {
                vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL,
@@ -4506,16 +4558,17 @@ static int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
                return 1;
 
        vcpu->arch.cr4 = cr4;
-       if (enable_ept) {
-               if (!is_paging(vcpu)) {
-                       hw_cr4 &= ~X86_CR4_PAE;
-                       hw_cr4 |= X86_CR4_PSE;
-               } else if (!(cr4 & X86_CR4_PAE)) {
-                       hw_cr4 &= ~X86_CR4_PAE;
+
+       if (!enable_unrestricted_guest) {
+               if (enable_ept) {
+                       if (!is_paging(vcpu)) {
+                               hw_cr4 &= ~X86_CR4_PAE;
+                               hw_cr4 |= X86_CR4_PSE;
+                       } else if (!(cr4 & X86_CR4_PAE)) {
+                               hw_cr4 &= ~X86_CR4_PAE;
+                       }
                }
-       }
 
-       if (!enable_unrestricted_guest && !is_paging(vcpu))
                /*
                 * SMEP/SMAP/PKU is disabled if CPU is in non-paging mode in
                 * hardware.  To emulate this behavior, SMEP/SMAP/PKU needs
@@ -4527,7 +4580,9 @@ static int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
                 * If enable_unrestricted_guest, the CPU automatically
                 * disables SMEP/SMAP/PKU when the guest sets CR0.PG=0.
                 */
-               hw_cr4 &= ~(X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_PKE);
+               if (!is_paging(vcpu))
+                       hw_cr4 &= ~(X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_PKE);
+       }
 
        vmcs_writel(CR4_READ_SHADOW, cr4);
        vmcs_writel(GUEST_CR4, hw_cr4);
@@ -5554,10 +5609,10 @@ static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx)
 
                if (nested) {
                        if (xsaves_enabled)
-                               vmx->nested.nested_vmx_secondary_ctls_high |=
+                               vmx->nested.msrs.secondary_ctls_high |=
                                        SECONDARY_EXEC_XSAVES;
                        else
-                               vmx->nested.nested_vmx_secondary_ctls_high &=
+                               vmx->nested.msrs.secondary_ctls_high &=
                                        ~SECONDARY_EXEC_XSAVES;
                }
        }
@@ -5569,10 +5624,10 @@ static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx)
 
                if (nested) {
                        if (rdtscp_enabled)
-                               vmx->nested.nested_vmx_secondary_ctls_high |=
+                               vmx->nested.msrs.secondary_ctls_high |=
                                        SECONDARY_EXEC_RDTSCP;
                        else
-                               vmx->nested.nested_vmx_secondary_ctls_high &=
+                               vmx->nested.msrs.secondary_ctls_high &=
                                        ~SECONDARY_EXEC_RDTSCP;
                }
        }
@@ -5590,10 +5645,10 @@ static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx)
 
                if (nested) {
                        if (invpcid_enabled)
-                               vmx->nested.nested_vmx_secondary_ctls_high |=
+                               vmx->nested.msrs.secondary_ctls_high |=
                                        SECONDARY_EXEC_ENABLE_INVPCID;
                        else
-                               vmx->nested.nested_vmx_secondary_ctls_high &=
+                               vmx->nested.msrs.secondary_ctls_high &=
                                        ~SECONDARY_EXEC_ENABLE_INVPCID;
                }
        }
@@ -5605,10 +5660,10 @@ static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx)
 
                if (nested) {
                        if (rdrand_enabled)
-                               vmx->nested.nested_vmx_secondary_ctls_high |=
+                               vmx->nested.msrs.secondary_ctls_high |=
                                        SECONDARY_EXEC_RDRAND_EXITING;
                        else
-                               vmx->nested.nested_vmx_secondary_ctls_high &=
+                               vmx->nested.msrs.secondary_ctls_high &=
                                        ~SECONDARY_EXEC_RDRAND_EXITING;
                }
        }
@@ -5620,10 +5675,10 @@ static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx)
 
                if (nested) {
                        if (rdseed_enabled)
-                               vmx->nested.nested_vmx_secondary_ctls_high |=
+                               vmx->nested.msrs.secondary_ctls_high |=
                                        SECONDARY_EXEC_RDSEED_EXITING;
                        else
-                               vmx->nested.nested_vmx_secondary_ctls_high &=
+                               vmx->nested.msrs.secondary_ctls_high &=
                                        ~SECONDARY_EXEC_RDSEED_EXITING;
                }
        }
@@ -5767,6 +5822,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
        vmx->rmode.vm86_active = 0;
        vmx->spec_ctrl = 0;
 
+       vcpu->arch.microcode_version = 0x100000000ULL;
        vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val();
        kvm_set_cr8(vcpu, 0);
 
@@ -5873,8 +5929,7 @@ static bool nested_exit_intr_ack_set(struct kvm_vcpu *vcpu)
 
 static bool nested_exit_on_nmi(struct kvm_vcpu *vcpu)
 {
-       return get_vmcs12(vcpu)->pin_based_vm_exec_control &
-               PIN_BASED_NMI_EXITING;
+       return nested_cpu_has_nmi_exiting(get_vmcs12(vcpu));
 }
 
 static void enable_irq_window(struct kvm_vcpu *vcpu)
@@ -6012,6 +6067,9 @@ static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr)
 {
        int ret;
 
+       if (enable_unrestricted_guest)
+               return 0;
+
        ret = x86_set_memory_region(kvm, TSS_PRIVATE_MEMSLOT, addr,
                                    PAGE_SIZE * 3);
        if (ret)
@@ -6135,6 +6193,17 @@ static int handle_exception(struct kvm_vcpu *vcpu)
        if (intr_info & INTR_INFO_DELIVER_CODE_MASK)
                error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE);
 
+       if (!vmx->rmode.vm86_active && is_gp_fault(intr_info)) {
+               WARN_ON_ONCE(!enable_vmware_backdoor);
+               er = emulate_instruction(vcpu,
+                       EMULTYPE_VMWARE | EMULTYPE_NO_UD_ON_FAIL);
+               if (er == EMULATE_USER_EXIT)
+                       return 0;
+               else if (er != EMULATE_DONE)
+                       kvm_queue_exception_e(vcpu, GP_VECTOR, error_code);
+               return 1;
+       }
+
        /*
         * The #PF with PFEC.RSVD = 1 indicates the guest is accessing
         * MMIO, it is better to report an internal error.
@@ -6220,28 +6289,22 @@ static int handle_triple_fault(struct kvm_vcpu *vcpu)
 static int handle_io(struct kvm_vcpu *vcpu)
 {
        unsigned long exit_qualification;
-       int size, in, string, ret;
+       int size, in, string;
        unsigned port;
 
        exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
        string = (exit_qualification & 16) != 0;
-       in = (exit_qualification & 8) != 0;
 
        ++vcpu->stat.io_exits;
 
-       if (string || in)
+       if (string)
                return emulate_instruction(vcpu, 0) == EMULATE_DONE;
 
        port = exit_qualification >> 16;
        size = (exit_qualification & 7) + 1;
+       in = (exit_qualification & 8) != 0;
 
-       ret = kvm_skip_emulated_instruction(vcpu);
-
-       /*
-        * TODO: we might be squashing a KVM_GUESTDBG_SINGLESTEP-triggered
-        * KVM_EXIT_DEBUG here.
-        */
-       return kvm_fast_pio_out(vcpu, size, port) && ret;
+       return kvm_fast_pio(vcpu, size, port, in);
 }
 
 static void
@@ -6332,6 +6395,7 @@ static int handle_cr(struct kvm_vcpu *vcpu)
                        err = handle_set_cr0(vcpu, val);
                        return kvm_complete_insn_gp(vcpu, err);
                case 3:
+                       WARN_ON_ONCE(enable_unrestricted_guest);
                        err = kvm_set_cr3(vcpu, val);
                        return kvm_complete_insn_gp(vcpu, err);
                case 4:
@@ -6364,6 +6428,7 @@ static int handle_cr(struct kvm_vcpu *vcpu)
        case 1: /*mov from cr*/
                switch (cr) {
                case 3:
+                       WARN_ON_ONCE(enable_unrestricted_guest);
                        val = kvm_read_cr3(vcpu);
                        kvm_register_write(vcpu, reg, val);
                        trace_kvm_cr_read(cr, val);
@@ -6818,6 +6883,13 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
        bool intr_window_requested;
        unsigned count = 130;
 
+       /*
+        * We should never reach the point where we are emulating L2
+        * due to invalid guest state as that means we incorrectly
+        * allowed a nested VMEntry with an invalid vmcs12.
+        */
+       WARN_ON_ONCE(vmx->emulation_required && vmx->nested.nested_run_pending);
+
        cpu_exec_ctrl = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
        intr_window_requested = cpu_exec_ctrl & CPU_BASED_VIRTUAL_INTR_PENDING;
 
@@ -7082,6 +7154,7 @@ static __init int hardware_setup(void)
                init_vmcs_shadow_fields();
 
        kvm_set_posted_intr_wakeup_handler(wakeup_handler);
+       nested_vmx_setup_ctls_msrs(&vmcs_config.nested, enable_apicv);
 
        kvm_mce_cap_supported |= MCG_LMCE_P;
 
@@ -7942,9 +8015,9 @@ static int handle_invept(struct kvm_vcpu *vcpu)
                u64 eptp, gpa;
        } operand;
 
-       if (!(vmx->nested.nested_vmx_secondary_ctls_high &
+       if (!(vmx->nested.msrs.secondary_ctls_high &
              SECONDARY_EXEC_ENABLE_EPT) ||
-           !(vmx->nested.nested_vmx_ept_caps & VMX_EPT_INVEPT_BIT)) {
+           !(vmx->nested.msrs.ept_caps & VMX_EPT_INVEPT_BIT)) {
                kvm_queue_exception(vcpu, UD_VECTOR);
                return 1;
        }
@@ -7955,7 +8028,7 @@ static int handle_invept(struct kvm_vcpu *vcpu)
        vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
        type = kvm_register_readl(vcpu, (vmx_instruction_info >> 28) & 0xf);
 
-       types = (vmx->nested.nested_vmx_ept_caps >> VMX_EPT_EXTENT_SHIFT) & 6;
+       types = (vmx->nested.msrs.ept_caps >> VMX_EPT_EXTENT_SHIFT) & 6;
 
        if (type >= 32 || !(types & (1 << type))) {
                nested_vmx_failValid(vcpu,
@@ -8006,9 +8079,9 @@ static int handle_invvpid(struct kvm_vcpu *vcpu)
                u64 gla;
        } operand;
 
-       if (!(vmx->nested.nested_vmx_secondary_ctls_high &
+       if (!(vmx->nested.msrs.secondary_ctls_high &
              SECONDARY_EXEC_ENABLE_VPID) ||
-                       !(vmx->nested.nested_vmx_vpid_caps & VMX_VPID_INVVPID_BIT)) {
+                       !(vmx->nested.msrs.vpid_caps & VMX_VPID_INVVPID_BIT)) {
                kvm_queue_exception(vcpu, UD_VECTOR);
                return 1;
        }
@@ -8019,7 +8092,7 @@ static int handle_invvpid(struct kvm_vcpu *vcpu)
        vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
        type = kvm_register_readl(vcpu, (vmx_instruction_info >> 28) & 0xf);
 
-       types = (vmx->nested.nested_vmx_vpid_caps &
+       types = (vmx->nested.msrs.vpid_caps &
                        VMX_VPID_EXTENT_SUPPORTED_MASK) >> 8;
 
        if (type >= 32 || !(types & (1 << type))) {
@@ -8113,11 +8186,11 @@ static bool valid_ept_address(struct kvm_vcpu *vcpu, u64 address)
        /* Check for memory type validity */
        switch (address & VMX_EPTP_MT_MASK) {
        case VMX_EPTP_MT_UC:
-               if (!(vmx->nested.nested_vmx_ept_caps & VMX_EPTP_UC_BIT))
+               if (!(vmx->nested.msrs.ept_caps & VMX_EPTP_UC_BIT))
                        return false;
                break;
        case VMX_EPTP_MT_WB:
-               if (!(vmx->nested.nested_vmx_ept_caps & VMX_EPTP_WB_BIT))
+               if (!(vmx->nested.msrs.ept_caps & VMX_EPTP_WB_BIT))
                        return false;
                break;
        default:
@@ -8134,7 +8207,7 @@ static bool valid_ept_address(struct kvm_vcpu *vcpu, u64 address)
 
        /* AD, if set, should be supported */
        if (address & VMX_EPTP_AD_ENABLE_BIT) {
-               if (!(vmx->nested.nested_vmx_ept_caps & VMX_EPT_AD_BIT))
+               if (!(vmx->nested.msrs.ept_caps & VMX_EPT_AD_BIT))
                        return false;
        }
 
@@ -9765,14 +9838,15 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
                        goto free_vmcs;
        }
 
-       if (enable_ept) {
+       if (enable_ept && !enable_unrestricted_guest) {
                err = init_rmode_identity_map(kvm);
                if (err)
                        goto free_vmcs;
        }
 
        if (nested) {
-               nested_vmx_setup_ctls_msrs(vmx);
+               nested_vmx_setup_ctls_msrs(&vmx->nested.msrs,
+                                          kvm_vcpu_apicv_active(&vmx->vcpu));
                vmx->nested.vpid02 = allocate_vpid();
        }
 
@@ -9812,6 +9886,7 @@ static void __init vmx_check_processor_compat(void *rtn)
        *(int *)rtn = 0;
        if (setup_vmcs_config(&vmcs_conf) < 0)
                *(int *)rtn = -EIO;
+       nested_vmx_setup_ctls_msrs(&vmcs_conf.nested, enable_apicv);
        if (memcmp(&vmcs_config, &vmcs_conf, sizeof(struct vmcs_config)) != 0) {
                printk(KERN_ERR "kvm: CPU %d feature inconsistency!\n",
                                smp_processor_id());
@@ -9899,12 +9974,12 @@ static void nested_vmx_cr_fixed1_bits_update(struct kvm_vcpu *vcpu)
        struct vcpu_vmx *vmx = to_vmx(vcpu);
        struct kvm_cpuid_entry2 *entry;
 
-       vmx->nested.nested_vmx_cr0_fixed1 = 0xffffffff;
-       vmx->nested.nested_vmx_cr4_fixed1 = X86_CR4_PCE;
+       vmx->nested.msrs.cr0_fixed1 = 0xffffffff;
+       vmx->nested.msrs.cr4_fixed1 = X86_CR4_PCE;
 
 #define cr4_fixed1_update(_cr4_mask, _reg, _cpuid_mask) do {           \
        if (entry && (entry->_reg & (_cpuid_mask)))                     \
-               vmx->nested.nested_vmx_cr4_fixed1 |= (_cr4_mask);       \
+               vmx->nested.msrs.cr4_fixed1 |= (_cr4_mask);     \
 } while (0)
 
        entry = kvm_find_cpuid_entry(vcpu, 0x1, 0);
@@ -10001,7 +10076,7 @@ static int nested_ept_init_mmu_context(struct kvm_vcpu *vcpu)
 
        kvm_mmu_unload(vcpu);
        kvm_init_shadow_ept_mmu(vcpu,
-                       to_vmx(vcpu)->nested.nested_vmx_ept_caps &
+                       to_vmx(vcpu)->nested.msrs.ept_caps &
                        VMX_EPT_EXECUTE_ONLY_BIT,
                        nested_ept_ad_enabled(vcpu));
        vcpu->arch.mmu.set_cr3           = vmx_set_cr3;
@@ -10940,6 +11015,14 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
                vmx->nested.dirty_vmcs12 = false;
        }
 
+       /*
+        * Guest state is invalid and unrestricted guest is disabled,
+        * which means L1 attempted VMEntry to L2 with invalid state.
+        * Fail the VMEntry.
+        */
+       if (vmx->emulation_required)
+               return 1;
+
        /* Shadow page tables on either EPT or shadow page tables. */
        if (nested_vmx_load_cr3(vcpu, vmcs12->guest_cr3, nested_cpu_has_ept(vmcs12),
                                entry_failure_code))
@@ -10953,6 +11036,19 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
        return 0;
 }
 
+static int nested_vmx_check_nmi_controls(struct vmcs12 *vmcs12)
+{
+       if (!nested_cpu_has_nmi_exiting(vmcs12) &&
+           nested_cpu_has_virtual_nmis(vmcs12))
+               return -EINVAL;
+
+       if (!nested_cpu_has_virtual_nmis(vmcs12) &&
+           nested_cpu_has(vmcs12, CPU_BASED_VIRTUAL_NMI_PENDING))
+               return -EINVAL;
+
+       return 0;
+}
+
 static int check_vmentry_prereqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
 {
        struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -10980,26 +11076,29 @@ static int check_vmentry_prereqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
                return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
 
        if (!vmx_control_verify(vmcs12->cpu_based_vm_exec_control,
-                               vmx->nested.nested_vmx_procbased_ctls_low,
-                               vmx->nested.nested_vmx_procbased_ctls_high) ||
+                               vmx->nested.msrs.procbased_ctls_low,
+                               vmx->nested.msrs.procbased_ctls_high) ||
            (nested_cpu_has(vmcs12, CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) &&
             !vmx_control_verify(vmcs12->secondary_vm_exec_control,
-                                vmx->nested.nested_vmx_secondary_ctls_low,
-                                vmx->nested.nested_vmx_secondary_ctls_high)) ||
+                                vmx->nested.msrs.secondary_ctls_low,
+                                vmx->nested.msrs.secondary_ctls_high)) ||
            !vmx_control_verify(vmcs12->pin_based_vm_exec_control,
-                               vmx->nested.nested_vmx_pinbased_ctls_low,
-                               vmx->nested.nested_vmx_pinbased_ctls_high) ||
+                               vmx->nested.msrs.pinbased_ctls_low,
+                               vmx->nested.msrs.pinbased_ctls_high) ||
            !vmx_control_verify(vmcs12->vm_exit_controls,
-                               vmx->nested.nested_vmx_exit_ctls_low,
-                               vmx->nested.nested_vmx_exit_ctls_high) ||
+                               vmx->nested.msrs.exit_ctls_low,
+                               vmx->nested.msrs.exit_ctls_high) ||
            !vmx_control_verify(vmcs12->vm_entry_controls,
-                               vmx->nested.nested_vmx_entry_ctls_low,
-                               vmx->nested.nested_vmx_entry_ctls_high))
+                               vmx->nested.msrs.entry_ctls_low,
+                               vmx->nested.msrs.entry_ctls_high))
+               return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
+
+       if (nested_vmx_check_nmi_controls(vmcs12))
                return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
 
        if (nested_cpu_has_vmfunc(vmcs12)) {
                if (vmcs12->vm_function_control &
-                   ~vmx->nested.nested_vmx_vmfunc_controls)
+                   ~vmx->nested.msrs.vmfunc_controls)
                        return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
 
                if (nested_cpu_has_eptp_switching(vmcs12)) {
@@ -12297,6 +12396,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
        .vcpu_put = vmx_vcpu_put,
 
        .update_bp_intercept = update_exception_bitmap,
+       .get_msr_feature = vmx_get_msr_feature,
        .get_msr = vmx_get_msr,
        .set_msr = vmx_set_msr,
        .get_segment_base = vmx_get_segment_base,