Merge branch 'x86/core' into perf/core, to resolve conflicts and to pick up completed...

author Ingo Molnar <mingo@kernel.org>

Mon, 25 Nov 2019 08:09:27 +0000 (09:09 +0100)

committer Ingo Molnar <mingo@kernel.org>

Mon, 25 Nov 2019 08:09:27 +0000 (09:09 +0100)
author Ingo Molnar <mingo@kernel.org>
Mon, 25 Nov 2019 08:09:27 +0000 (09:09 +0100)
committer Ingo Molnar <mingo@kernel.org>
Mon, 25 Nov 2019 08:09:27 +0000 (09:09 +0100)
diff --combined arch/x86/kvm/x86.c

index 5d530521f11ddb5cd98bb3816f661cd51475ccba,777574f547c0148d2f8866a9a1980b023f8d7e95..783aa8d141bfa4da0fcc045282da0e5769307dbe
--- 1/arch/x86/kvm/x86.c
--- 2/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@@ -68,6 -68,7 +68,7 @@@
   #include <asm/mshyperv.h>
   #include <asm/hypervisor.h>
   #include <asm/intel_pt.h>
+ #include <asm/emulate_prefix.h>
   #include <clocksource/hyperv_timer.h>
   
   #define CREATE_TRACE_POINTS
@@@ -213,7 -214,6 +214,7 @@@ struct kvm_stats_debugfs_item debugfs_e
         { "mmu_unsync", VM_STAT(mmu_unsync) },
         { "remote_tlb_flush", VM_STAT(remote_tlb_flush) },
         { "largepages", VM_STAT(lpages, .mode = 0444) },
+ +      { "nx_largepages_splitted", VM_STAT(nx_lpage_splits, .mode = 0444) },
         { "max_mmu_page_hash_collisions",
                 VM_STAT(max_mmu_page_hash_collisions) },
         { NULL }
@@@ -361,7 -361,8 +362,7 @@@ EXPORT_SYMBOL_GPL(kvm_set_apic_base)
   asmlinkage __visible void kvm_spurious_fault(void)
   {
         /* Fault while not rebooting.  We want the trace. */
- -      if (!kvm_rebooting)
- -              BUG();
+ +      BUG_ON(!kvm_rebooting);
   }
   EXPORT_SYMBOL_GPL(kvm_spurious_fault);
   
@@@ -1133,15 -1134,13 +1134,15 @@@ EXPORT_SYMBOL_GPL(kvm_rdpmc)
    * List of msr numbers which we expose to userspace through KVM_GET_MSRS
    * and KVM_SET_MSRS, and KVM_GET_MSR_INDEX_LIST.
    *
- - * This list is modified at module load time to reflect the
+ + * The three MSR lists(msrs_to_save, emulated_msrs, msr_based_features)
+ + * extract the supported MSRs from the related const lists.
+ + * msrs_to_save is selected from the msrs_to_save_all to reflect the
    * capabilities of the host cpu. This capabilities test skips MSRs that are
- - * kvm-specific. Those are put in emulated_msrs; filtering of emulated_msrs
+ + * kvm-specific. Those are put in emulated_msrs_all; filtering of emulated_msrs
    * may depend on host virtualization features rather than host cpu features.
    */
   
- -static u32 msrs_to_save[] = {
+ +static const u32 msrs_to_save_all[] = {
         MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
         MSR_STAR,
   #ifdef CONFIG_X86_64
@@@ -1182,10 -1181,9 +1183,10 @@@
         MSR_ARCH_PERFMON_EVENTSEL0 + 16, MSR_ARCH_PERFMON_EVENTSEL0 + 17,
   };
   
+ +static u32 msrs_to_save[ARRAY_SIZE(msrs_to_save_all)];
   static unsigned num_msrs_to_save;
   
- -static u32 emulated_msrs[] = {
+ +static const u32 emulated_msrs_all[] = {
         MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
         MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
         HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
@@@ -1224,7 -1222,7 +1225,7 @@@
          * by arch/x86/kvm/vmx/nested.c based on CPUID or other MSRs.
          * We always support the "true" VMX control MSRs, even if the host
          * processor does not, so I am putting these registers here rather
- -       * than in msrs_to_save.
+ +       * than in msrs_to_save_all.
          */
         MSR_IA32_VMX_BASIC,
         MSR_IA32_VMX_TRUE_PINBASED_CTLS,
@@@ -1243,14 -1241,13 +1244,14 @@@
         MSR_KVM_POLL_CONTROL,
   };
   
+ +static u32 emulated_msrs[ARRAY_SIZE(emulated_msrs_all)];
   static unsigned num_emulated_msrs;
   
   /*
    * List of msr numbers which are used to expose MSR-based features that
    * can be used by a hypervisor to validate requested CPU features.
    */
- -static u32 msr_based_features[] = {
+ +static const u32 msr_based_features_all[] = {
         MSR_IA32_VMX_BASIC,
         MSR_IA32_VMX_TRUE_PINBASED_CTLS,
         MSR_IA32_VMX_PINBASED_CTLS,
@@@ -1275,7 -1272,6 +1276,7 @@@
         MSR_IA32_ARCH_CAPABILITIES,
   };
   
+ +static u32 msr_based_features[ARRAY_SIZE(msr_based_features_all)];
   static unsigned int num_msr_based_features;
   
   static u64 kvm_get_arch_capabilities(void)
@@@ -1285,14 -1281,6 +1286,14 @@@
         if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES))
                 rdmsrl(MSR_IA32_ARCH_CAPABILITIES, data);
   
+ +      /*
+ +       * If nx_huge_pages is enabled, KVM's shadow paging will ensure that
+ +       * the nested hypervisor runs with NX huge pages.  If it is not,
+ +       * L1 is anyway vulnerable to ITLB_MULTIHIT explots from other
+ +       * L1 guests, so it need not worry about its own (L2) guests.
+ +       */
+ +      data |= ARCH_CAP_PSCHANGE_MC_NO;
+ +
         /*
          * If we're doing cache flushes (either "always" or "cond")
          * we will do one whenever the guest does a vmlaunch/vmresume.
@@@ -1312,25 -1300,6 +1313,25 @@@
         if (!boot_cpu_has_bug(X86_BUG_MDS))
                 data |= ARCH_CAP_MDS_NO;
   
+ +      /*
+ +       * On TAA affected systems, export MDS_NO=0 when:
+ +       *      - TSX is enabled on the host, i.e. X86_FEATURE_RTM=1.
+ +       *      - Updated microcode is present. This is detected by
+ +       *        the presence of ARCH_CAP_TSX_CTRL_MSR and ensures
+ +       *        that VERW clears CPU buffers.
+ +       *
+ +       * When MDS_NO=0 is exported, guests deploy clear CPU buffer
+ +       * mitigation and don't complain:
+ +       *
+ +       *      "Vulnerable: Clear CPU buffers attempted, no microcode"
+ +       *
+ +       * If TSX is disabled on the system, guests are also mitigated against
+ +       * TAA and clear CPU buffer mitigation is not required for guests.
+ +       */
+ +      if (boot_cpu_has_bug(X86_BUG_TAA) && boot_cpu_has(X86_FEATURE_RTM) &&
+ +          (data & ARCH_CAP_TSX_CTRL_MSR))
+ +              data &= ~ARCH_CAP_MDS_NO;
+ +
         return data;
   }
   
@@@ -2569,7 -2538,6 +2570,7 @@@ static int kvm_pv_enable_async_pf(struc
   static void kvmclock_reset(struct kvm_vcpu *vcpu)
   {
         vcpu->arch.pv_time_enabled = false;
+ +      vcpu->arch.time = 0;
   }
   
   static void kvm_vcpu_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa)
@@@ -2735,6 -2703,8 +2736,6 @@@ int kvm_set_msr_common(struct kvm_vcpu 
         case MSR_KVM_SYSTEM_TIME: {
                 struct kvm_arch *ka = &vcpu->kvm->arch;
   
- -              kvmclock_reset(vcpu);
- -
                 if (vcpu->vcpu_id == 0 && !msr_info->host_initiated) {
                         bool tmp = (msr == MSR_KVM_SYSTEM_TIME);
   
@@@ -2748,13 -2718,14 +2749,13 @@@
                 kvm_make_request(KVM_REQ_GLOBAL_CLOCK_UPDATE, vcpu);
   
                 /* we verify if the enable bit is set... */
+ +              vcpu->arch.pv_time_enabled = false;
                 if (!(data & 1))
                         break;
   
- -              if (kvm_gfn_to_hva_cache_init(vcpu->kvm,
+ +              if (!kvm_gfn_to_hva_cache_init(vcpu->kvm,
                      &vcpu->arch.pv_time, data & ~1ULL,
                      sizeof(struct pvclock_vcpu_time_info)))
- -                      vcpu->arch.pv_time_enabled = false;
- -              else
                         vcpu->arch.pv_time_enabled = true;
   
                 break;
@@@ -5123,26 -5094,22 +5124,26 @@@ static void kvm_init_msr_list(void
   {
         struct x86_pmu_capability x86_pmu;
         u32 dummy[2];
- -      unsigned i, j;
+ +      unsigned i;
   
         BUILD_BUG_ON_MSG(INTEL_PMC_MAX_FIXED != 4,
- -                       "Please update the fixed PMCs in msrs_to_save[]");
+ +                       "Please update the fixed PMCs in msrs_to_saved_all[]");
   
         perf_get_x86_pmu_capability(&x86_pmu);
   
- -      for (i = j = 0; i < ARRAY_SIZE(msrs_to_save); i++) {
- -              if (rdmsr_safe(msrs_to_save[i], &dummy[0], &dummy[1]) < 0)
+ +      num_msrs_to_save = 0;
+ +      num_emulated_msrs = 0;
+ +      num_msr_based_features = 0;
+ +
+ +      for (i = 0; i < ARRAY_SIZE(msrs_to_save_all); i++) {
+ +              if (rdmsr_safe(msrs_to_save_all[i], &dummy[0], &dummy[1]) < 0)
                         continue;
   
                 /*
                  * Even MSRs that are valid in the host may not be exposed
                  * to the guests in some cases.
                  */
- -              switch (msrs_to_save[i]) {
+ +              switch (msrs_to_save_all[i]) {
                 case MSR_IA32_BNDCFGS:
                         if (!kvm_mpx_supported())
                                 continue;
@@@ -5170,17 -5137,17 +5171,17 @@@
                         break;
                 case MSR_IA32_RTIT_ADDR0_A ... MSR_IA32_RTIT_ADDR3_B: {
                         if (!kvm_x86_ops->pt_supported() ||
- -                              msrs_to_save[i] - MSR_IA32_RTIT_ADDR0_A >=
+ +                              msrs_to_save_all[i] - MSR_IA32_RTIT_ADDR0_A >=
                                 intel_pt_validate_hw_cap(PT_CAP_num_address_ranges) * 2)
                                 continue;
                         break;
                 case MSR_ARCH_PERFMON_PERFCTR0 ... MSR_ARCH_PERFMON_PERFCTR0 + 17:
- -                      if (msrs_to_save[i] - MSR_ARCH_PERFMON_PERFCTR0 >=
+ +                      if (msrs_to_save_all[i] - MSR_ARCH_PERFMON_PERFCTR0 >=
                             min(INTEL_PMC_MAX_GENERIC, x86_pmu.num_counters_gp))
                                 continue;
                         break;
                 case MSR_ARCH_PERFMON_EVENTSEL0 ... MSR_ARCH_PERFMON_EVENTSEL0 + 17:
- -                      if (msrs_to_save[i] - MSR_ARCH_PERFMON_EVENTSEL0 >=
+ +                      if (msrs_to_save_all[i] - MSR_ARCH_PERFMON_EVENTSEL0 >=
                             min(INTEL_PMC_MAX_GENERIC, x86_pmu.num_counters_gp))
                                 continue;
                 }
@@@ -5188,25 -5155,34 +5189,25 @@@
                         break;
                 }
   
- -              if (j < i)
- -                      msrs_to_save[j] = msrs_to_save[i];
- -              j++;
+ +              msrs_to_save[num_msrs_to_save++] = msrs_to_save_all[i];
         }
- -      num_msrs_to_save = j;
   
- -      for (i = j = 0; i < ARRAY_SIZE(emulated_msrs); i++) {
- -              if (!kvm_x86_ops->has_emulated_msr(emulated_msrs[i]))
+ +      for (i = 0; i < ARRAY_SIZE(emulated_msrs_all); i++) {
+ +              if (!kvm_x86_ops->has_emulated_msr(emulated_msrs_all[i]))
                         continue;
   
- -              if (j < i)
- -                      emulated_msrs[j] = emulated_msrs[i];
- -              j++;
+ +              emulated_msrs[num_emulated_msrs++] = emulated_msrs_all[i];
         }
- -      num_emulated_msrs = j;
   
- -      for (i = j = 0; i < ARRAY_SIZE(msr_based_features); i++) {
+ +      for (i = 0; i < ARRAY_SIZE(msr_based_features_all); i++) {
                 struct kvm_msr_entry msr;
   
- -              msr.index = msr_based_features[i];
+ +              msr.index = msr_based_features_all[i];
                 if (kvm_get_msr_feature(&msr))
                         continue;
   
- -              if (j < i)
- -                      msr_based_features[j] = msr_based_features[i];
- -              j++;
+ +              msr_based_features[num_msr_based_features++] = msr_based_features_all[i];
         }
- -      num_msr_based_features = j;
   }
   
   static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len,
@@@ -5471,6 -5447,7 +5472,7 @@@ EXPORT_SYMBOL_GPL(kvm_write_guest_virt_
   
   int handle_ud(struct kvm_vcpu *vcpu)
   {
+       static const char kvm_emulate_prefix[] = { __KVM_EMULATE_PREFIX };
         int emul_type = EMULTYPE_TRAP_UD;
         char sig[5]; /* ud2; .ascii "kvm" */
         struct x86_exception e;
@@@ -5478,7 -5455,7 +5480,7 @@@
         if (force_emulation_prefix &&
             kvm_read_guest_virt(vcpu, kvm_get_linear_rip(vcpu),
                                 sig, sizeof(sig), &e) == 0 &&
-           memcmp(sig, "\xf\xbkvm", sizeof(sig)) == 0) {
+           memcmp(sig, kvm_emulate_prefix, sizeof(sig)) == 0) {
                 kvm_rip_write(vcpu, kvm_rip_read(vcpu) + sizeof(sig));
                 emul_type = EMULTYPE_TRAP_UD_FORCED;
         }
@@@ -7966,12 -7943,8 +7968,12 @@@ static int vcpu_enter_guest(struct kvm_
         bool req_immediate_exit = false;
   
         if (kvm_request_pending(vcpu)) {
- -              if (kvm_check_request(KVM_REQ_GET_VMCS12_PAGES, vcpu))
- -                      kvm_x86_ops->get_vmcs12_pages(vcpu);
+ +              if (kvm_check_request(KVM_REQ_GET_VMCS12_PAGES, vcpu)) {
+ +                      if (unlikely(!kvm_x86_ops->get_vmcs12_pages(vcpu))) {
+ +                              r = 0;
+ +                              goto out;
+ +                      }
+ +              }
                 if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu))
                         kvm_mmu_unload(vcpu);
                 if (kvm_check_request(KVM_REQ_MIGRATE_TIMER, vcpu))
@@@ -9456,7 -9429,6 +9458,7 @@@ int kvm_arch_init_vm(struct kvm *kvm, u
         INIT_HLIST_HEAD(&kvm->arch.mask_notifier_list);
         INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
         INIT_LIST_HEAD(&kvm->arch.zapped_obsolete_pages);
+ +      INIT_LIST_HEAD(&kvm->arch.lpage_disallowed_mmu_pages);
         INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
         atomic_set(&kvm->arch.noncoherent_dma_count, 0);
   
@@@ -9485,11 -9457,6 +9487,11 @@@
         return kvm_x86_ops->vm_init(kvm);
   }
   
+ +int kvm_arch_post_init_vm(struct kvm *kvm)
+ +{
+ +      return kvm_mmu_post_init_vm(kvm);
+ +}
+ +
   static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu)
   {
         vcpu_load(vcpu);
@@@ -9591,11 -9558,6 +9593,11 @@@ int x86_set_memory_region(struct kvm *k
   }
   EXPORT_SYMBOL_GPL(x86_set_memory_region);
   
+ +void kvm_arch_pre_destroy_vm(struct kvm *kvm)
+ +{
+ +      kvm_mmu_pre_destroy_vm(kvm);
+ +}
+ +
   void kvm_arch_destroy_vm(struct kvm *kvm)
   {
         if (current->mm == kvm->mm) {
diff --combined tools/perf/check-headers.sh

index 48290a0c917cf714a89f27835e59b80ce88b746d,499235a411628db34a6b24854c409c86ab355a34..a1dc1672435201513b3e3ad3a7bbbab59b4a32ec
--- 1/tools/perf/check-headers.sh
--- 2/tools/perf/check-headers.sh
+++ b/tools/perf/check-headers.sh
@@@ -28,8 -28,7 +28,9 @@@ arch/x86/include/asm/disabled-features.
   arch/x86/include/asm/required-features.h
   arch/x86/include/asm/cpufeatures.h
   arch/x86/include/asm/inat_types.h
+ arch/x86/include/asm/emulate_prefix.h
+ +arch/x86/include/asm/irq_vectors.h
+ +arch/x86/include/asm/msr-index.h
   arch/x86/include/uapi/asm/prctl.h
   arch/x86/lib/x86-opcode-map.txt
   arch/x86/tools/gen-insn-attr-x86.awk
@@@ -118,7 -117,7 +119,7 @@@ check lib/ctype.c                '-I "^EXPORT_S
   check arch/x86/include/asm/inat.h     '-I "^#include [\"<]\(asm/\)*inat_types.h[\">]"'
   check arch/x86/include/asm/insn.h     '-I "^#include [\"<]\(asm/\)*inat.h[\">]"'
   check arch/x86/lib/inat.c           '-I "^#include [\"<]\(../include/\)*asm/insn.h[\">]"'
- check arch/x86/lib/insn.c           '-I "^#include [\"<]\(../include/\)*asm/in\(at\|sn\).h[\">]"'
+ check arch/x86/lib/insn.c             '-I "^#include [\"<]\(../include/\)*asm/in\(at\|sn\).h[\">]" -I "^#include [\"<]\(../include/\)*asm/emulate_prefix.h[\">]"'
   
   # diff non-symmetric files
   check_2 tools/perf/arch/x86/entry/syscalls/syscall_64.tbl arch/x86/entry/syscalls/syscall_64.tbl
author	Ingo Molnar <mingo@kernel.org>
	Mon, 25 Nov 2019 08:09:27 +0000 (09:09 +0100)
committer	Ingo Molnar <mingo@kernel.org>
	Mon, 25 Nov 2019 08:09:27 +0000 (09:09 +0100)
		1	2
arch/x86/kvm/x86.c	patch \|	diff1 \|	diff2 \|	blob \| history
tools/perf/check-headers.sh	patch \|	diff1 \|	diff2 \|	blob \| history