From: Ingo Molnar <mingo@kernel.org>
Date: Mon, 25 Nov 2019 08:09:27 +0000 (+0100)
Subject: Merge branch 'x86/core' into perf/core, to resolve conflicts and to pick up completed... 
X-Git-Tag: v5.5-rc1~152^2
X-Git-Url: https://asedeno.scripts.mit.edu/gitweb/?a=commitdiff_plain;h=ceb9e77324fa661b1001a0ae66f061b5fcb4e4e6;hp=-c;p=linux.git

Merge branch 'x86/core' into perf/core, to resolve conflicts and to pick up completed topic tree

Conflicts:
	tools/perf/check-headers.sh

Signed-off-by: Ingo Molnar <mingo@kernel.org>
---

ceb9e77324fa661b1001a0ae66f061b5fcb4e4e6
diff --combined arch/x86/kvm/x86.c
index 5d530521f11d,777574f547c0..783aa8d141bf
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@@ -68,6 -68,7 +68,7 @@@
  #include <asm/mshyperv.h>
  #include <asm/hypervisor.h>
  #include <asm/intel_pt.h>
+ #include <asm/emulate_prefix.h>
  #include <clocksource/hyperv_timer.h>
  
  #define CREATE_TRACE_POINTS
@@@ -213,7 -214,6 +214,7 @@@ struct kvm_stats_debugfs_item debugfs_e
  	{ "mmu_unsync", VM_STAT(mmu_unsync) },
  	{ "remote_tlb_flush", VM_STAT(remote_tlb_flush) },
  	{ "largepages", VM_STAT(lpages, .mode = 0444) },
 +	{ "nx_largepages_splitted", VM_STAT(nx_lpage_splits, .mode = 0444) },
  	{ "max_mmu_page_hash_collisions",
  		VM_STAT(max_mmu_page_hash_collisions) },
  	{ NULL }
@@@ -361,7 -361,8 +362,7 @@@ EXPORT_SYMBOL_GPL(kvm_set_apic_base)
  asmlinkage __visible void kvm_spurious_fault(void)
  {
  	/* Fault while not rebooting.  We want the trace. */
 -	if (!kvm_rebooting)
 -		BUG();
 +	BUG_ON(!kvm_rebooting);
  }
  EXPORT_SYMBOL_GPL(kvm_spurious_fault);
  
@@@ -1133,15 -1134,13 +1134,15 @@@ EXPORT_SYMBOL_GPL(kvm_rdpmc)
   * List of msr numbers which we expose to userspace through KVM_GET_MSRS
   * and KVM_SET_MSRS, and KVM_GET_MSR_INDEX_LIST.
   *
 - * This list is modified at module load time to reflect the
 + * The three MSR lists(msrs_to_save, emulated_msrs, msr_based_features)
 + * extract the supported MSRs from the related const lists.
 + * msrs_to_save is selected from the msrs_to_save_all to reflect the
   * capabilities of the host cpu. This capabilities test skips MSRs that are
 - * kvm-specific. Those are put in emulated_msrs; filtering of emulated_msrs
 + * kvm-specific. Those are put in emulated_msrs_all; filtering of emulated_msrs
   * may depend on host virtualization features rather than host cpu features.
   */
  
 -static u32 msrs_to_save[] = {
 +static const u32 msrs_to_save_all[] = {
  	MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
  	MSR_STAR,
  #ifdef CONFIG_X86_64
@@@ -1182,10 -1181,9 +1183,10 @@@
  	MSR_ARCH_PERFMON_EVENTSEL0 + 16, MSR_ARCH_PERFMON_EVENTSEL0 + 17,
  };
  
 +static u32 msrs_to_save[ARRAY_SIZE(msrs_to_save_all)];
  static unsigned num_msrs_to_save;
  
 -static u32 emulated_msrs[] = {
 +static const u32 emulated_msrs_all[] = {
  	MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
  	MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
  	HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
@@@ -1224,7 -1222,7 +1225,7 @@@
  	 * by arch/x86/kvm/vmx/nested.c based on CPUID or other MSRs.
  	 * We always support the "true" VMX control MSRs, even if the host
  	 * processor does not, so I am putting these registers here rather
 -	 * than in msrs_to_save.
 +	 * than in msrs_to_save_all.
  	 */
  	MSR_IA32_VMX_BASIC,
  	MSR_IA32_VMX_TRUE_PINBASED_CTLS,
@@@ -1243,14 -1241,13 +1244,14 @@@
  	MSR_KVM_POLL_CONTROL,
  };
  
 +static u32 emulated_msrs[ARRAY_SIZE(emulated_msrs_all)];
  static unsigned num_emulated_msrs;
  
  /*
   * List of msr numbers which are used to expose MSR-based features that
   * can be used by a hypervisor to validate requested CPU features.
   */
 -static u32 msr_based_features[] = {
 +static const u32 msr_based_features_all[] = {
  	MSR_IA32_VMX_BASIC,
  	MSR_IA32_VMX_TRUE_PINBASED_CTLS,
  	MSR_IA32_VMX_PINBASED_CTLS,
@@@ -1275,7 -1272,6 +1276,7 @@@
  	MSR_IA32_ARCH_CAPABILITIES,
  };
  
 +static u32 msr_based_features[ARRAY_SIZE(msr_based_features_all)];
  static unsigned int num_msr_based_features;
  
  static u64 kvm_get_arch_capabilities(void)
@@@ -1285,14 -1281,6 +1286,14 @@@
  	if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES))
  		rdmsrl(MSR_IA32_ARCH_CAPABILITIES, data);
  
 +	/*
 +	 * If nx_huge_pages is enabled, KVM's shadow paging will ensure that
 +	 * the nested hypervisor runs with NX huge pages.  If it is not,
 +	 * L1 is anyway vulnerable to ITLB_MULTIHIT explots from other
 +	 * L1 guests, so it need not worry about its own (L2) guests.
 +	 */
 +	data |= ARCH_CAP_PSCHANGE_MC_NO;
 +
  	/*
  	 * If we're doing cache flushes (either "always" or "cond")
  	 * we will do one whenever the guest does a vmlaunch/vmresume.
@@@ -1312,25 -1300,6 +1313,25 @@@
  	if (!boot_cpu_has_bug(X86_BUG_MDS))
  		data |= ARCH_CAP_MDS_NO;
  
 +	/*
 +	 * On TAA affected systems, export MDS_NO=0 when:
 +	 *	- TSX is enabled on the host, i.e. X86_FEATURE_RTM=1.
 +	 *	- Updated microcode is present. This is detected by
 +	 *	  the presence of ARCH_CAP_TSX_CTRL_MSR and ensures
 +	 *	  that VERW clears CPU buffers.
 +	 *
 +	 * When MDS_NO=0 is exported, guests deploy clear CPU buffer
 +	 * mitigation and don't complain:
 +	 *
 +	 *	"Vulnerable: Clear CPU buffers attempted, no microcode"
 +	 *
 +	 * If TSX is disabled on the system, guests are also mitigated against
 +	 * TAA and clear CPU buffer mitigation is not required for guests.
 +	 */
 +	if (boot_cpu_has_bug(X86_BUG_TAA) && boot_cpu_has(X86_FEATURE_RTM) &&
 +	    (data & ARCH_CAP_TSX_CTRL_MSR))
 +		data &= ~ARCH_CAP_MDS_NO;
 +
  	return data;
  }
  
@@@ -2569,7 -2538,6 +2570,7 @@@ static int kvm_pv_enable_async_pf(struc
  static void kvmclock_reset(struct kvm_vcpu *vcpu)
  {
  	vcpu->arch.pv_time_enabled = false;
 +	vcpu->arch.time = 0;
  }
  
  static void kvm_vcpu_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa)
@@@ -2735,6 -2703,8 +2736,6 @@@ int kvm_set_msr_common(struct kvm_vcpu 
  	case MSR_KVM_SYSTEM_TIME: {
  		struct kvm_arch *ka = &vcpu->kvm->arch;
  
 -		kvmclock_reset(vcpu);
 -
  		if (vcpu->vcpu_id == 0 && !msr_info->host_initiated) {
  			bool tmp = (msr == MSR_KVM_SYSTEM_TIME);
  
@@@ -2748,13 -2718,14 +2749,13 @@@
  		kvm_make_request(KVM_REQ_GLOBAL_CLOCK_UPDATE, vcpu);
  
  		/* we verify if the enable bit is set... */
 +		vcpu->arch.pv_time_enabled = false;
  		if (!(data & 1))
  			break;
  
 -		if (kvm_gfn_to_hva_cache_init(vcpu->kvm,
 +		if (!kvm_gfn_to_hva_cache_init(vcpu->kvm,
  		     &vcpu->arch.pv_time, data & ~1ULL,
  		     sizeof(struct pvclock_vcpu_time_info)))
 -			vcpu->arch.pv_time_enabled = false;
 -		else
  			vcpu->arch.pv_time_enabled = true;
  
  		break;
@@@ -5123,26 -5094,22 +5124,26 @@@ static void kvm_init_msr_list(void
  {
  	struct x86_pmu_capability x86_pmu;
  	u32 dummy[2];
 -	unsigned i, j;
 +	unsigned i;
  
  	BUILD_BUG_ON_MSG(INTEL_PMC_MAX_FIXED != 4,
 -			 "Please update the fixed PMCs in msrs_to_save[]");
 +			 "Please update the fixed PMCs in msrs_to_saved_all[]");
  
  	perf_get_x86_pmu_capability(&x86_pmu);
  
 -	for (i = j = 0; i < ARRAY_SIZE(msrs_to_save); i++) {
 -		if (rdmsr_safe(msrs_to_save[i], &dummy[0], &dummy[1]) < 0)
 +	num_msrs_to_save = 0;
 +	num_emulated_msrs = 0;
 +	num_msr_based_features = 0;
 +
 +	for (i = 0; i < ARRAY_SIZE(msrs_to_save_all); i++) {
 +		if (rdmsr_safe(msrs_to_save_all[i], &dummy[0], &dummy[1]) < 0)
  			continue;
  
  		/*
  		 * Even MSRs that are valid in the host may not be exposed
  		 * to the guests in some cases.
  		 */
 -		switch (msrs_to_save[i]) {
 +		switch (msrs_to_save_all[i]) {
  		case MSR_IA32_BNDCFGS:
  			if (!kvm_mpx_supported())
  				continue;
@@@ -5170,17 -5137,17 +5171,17 @@@
  			break;
  		case MSR_IA32_RTIT_ADDR0_A ... MSR_IA32_RTIT_ADDR3_B: {
  			if (!kvm_x86_ops->pt_supported() ||
 -				msrs_to_save[i] - MSR_IA32_RTIT_ADDR0_A >=
 +				msrs_to_save_all[i] - MSR_IA32_RTIT_ADDR0_A >=
  				intel_pt_validate_hw_cap(PT_CAP_num_address_ranges) * 2)
  				continue;
  			break;
  		case MSR_ARCH_PERFMON_PERFCTR0 ... MSR_ARCH_PERFMON_PERFCTR0 + 17:
 -			if (msrs_to_save[i] - MSR_ARCH_PERFMON_PERFCTR0 >=
 +			if (msrs_to_save_all[i] - MSR_ARCH_PERFMON_PERFCTR0 >=
  			    min(INTEL_PMC_MAX_GENERIC, x86_pmu.num_counters_gp))
  				continue;
  			break;
  		case MSR_ARCH_PERFMON_EVENTSEL0 ... MSR_ARCH_PERFMON_EVENTSEL0 + 17:
 -			if (msrs_to_save[i] - MSR_ARCH_PERFMON_EVENTSEL0 >=
 +			if (msrs_to_save_all[i] - MSR_ARCH_PERFMON_EVENTSEL0 >=
  			    min(INTEL_PMC_MAX_GENERIC, x86_pmu.num_counters_gp))
  				continue;
  		}
@@@ -5188,25 -5155,34 +5189,25 @@@
  			break;
  		}
  
 -		if (j < i)
 -			msrs_to_save[j] = msrs_to_save[i];
 -		j++;
 +		msrs_to_save[num_msrs_to_save++] = msrs_to_save_all[i];
  	}
 -	num_msrs_to_save = j;
  
 -	for (i = j = 0; i < ARRAY_SIZE(emulated_msrs); i++) {
 -		if (!kvm_x86_ops->has_emulated_msr(emulated_msrs[i]))
 +	for (i = 0; i < ARRAY_SIZE(emulated_msrs_all); i++) {
 +		if (!kvm_x86_ops->has_emulated_msr(emulated_msrs_all[i]))
  			continue;
  
 -		if (j < i)
 -			emulated_msrs[j] = emulated_msrs[i];
 -		j++;
 +		emulated_msrs[num_emulated_msrs++] = emulated_msrs_all[i];
  	}
 -	num_emulated_msrs = j;
  
 -	for (i = j = 0; i < ARRAY_SIZE(msr_based_features); i++) {
 +	for (i = 0; i < ARRAY_SIZE(msr_based_features_all); i++) {
  		struct kvm_msr_entry msr;
  
 -		msr.index = msr_based_features[i];
 +		msr.index = msr_based_features_all[i];
  		if (kvm_get_msr_feature(&msr))
  			continue;
  
 -		if (j < i)
 -			msr_based_features[j] = msr_based_features[i];
 -		j++;
 +		msr_based_features[num_msr_based_features++] = msr_based_features_all[i];
  	}
 -	num_msr_based_features = j;
  }
  
  static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len,
@@@ -5471,6 -5447,7 +5472,7 @@@ EXPORT_SYMBOL_GPL(kvm_write_guest_virt_
  
  int handle_ud(struct kvm_vcpu *vcpu)
  {
+ 	static const char kvm_emulate_prefix[] = { __KVM_EMULATE_PREFIX };
  	int emul_type = EMULTYPE_TRAP_UD;
  	char sig[5]; /* ud2; .ascii "kvm" */
  	struct x86_exception e;
@@@ -5478,7 -5455,7 +5480,7 @@@
  	if (force_emulation_prefix &&
  	    kvm_read_guest_virt(vcpu, kvm_get_linear_rip(vcpu),
  				sig, sizeof(sig), &e) == 0 &&
- 	    memcmp(sig, "\xf\xbkvm", sizeof(sig)) == 0) {
+ 	    memcmp(sig, kvm_emulate_prefix, sizeof(sig)) == 0) {
  		kvm_rip_write(vcpu, kvm_rip_read(vcpu) + sizeof(sig));
  		emul_type = EMULTYPE_TRAP_UD_FORCED;
  	}
@@@ -7966,12 -7943,8 +7968,12 @@@ static int vcpu_enter_guest(struct kvm_
  	bool req_immediate_exit = false;
  
  	if (kvm_request_pending(vcpu)) {
 -		if (kvm_check_request(KVM_REQ_GET_VMCS12_PAGES, vcpu))
 -			kvm_x86_ops->get_vmcs12_pages(vcpu);
 +		if (kvm_check_request(KVM_REQ_GET_VMCS12_PAGES, vcpu)) {
 +			if (unlikely(!kvm_x86_ops->get_vmcs12_pages(vcpu))) {
 +				r = 0;
 +				goto out;
 +			}
 +		}
  		if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu))
  			kvm_mmu_unload(vcpu);
  		if (kvm_check_request(KVM_REQ_MIGRATE_TIMER, vcpu))
@@@ -9456,7 -9429,6 +9458,7 @@@ int kvm_arch_init_vm(struct kvm *kvm, u
  	INIT_HLIST_HEAD(&kvm->arch.mask_notifier_list);
  	INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
  	INIT_LIST_HEAD(&kvm->arch.zapped_obsolete_pages);
 +	INIT_LIST_HEAD(&kvm->arch.lpage_disallowed_mmu_pages);
  	INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
  	atomic_set(&kvm->arch.noncoherent_dma_count, 0);
  
@@@ -9485,11 -9457,6 +9487,11 @@@
  	return kvm_x86_ops->vm_init(kvm);
  }
  
 +int kvm_arch_post_init_vm(struct kvm *kvm)
 +{
 +	return kvm_mmu_post_init_vm(kvm);
 +}
 +
  static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu)
  {
  	vcpu_load(vcpu);
@@@ -9591,11 -9558,6 +9593,11 @@@ int x86_set_memory_region(struct kvm *k
  }
  EXPORT_SYMBOL_GPL(x86_set_memory_region);
  
 +void kvm_arch_pre_destroy_vm(struct kvm *kvm)
 +{
 +	kvm_mmu_pre_destroy_vm(kvm);
 +}
 +
  void kvm_arch_destroy_vm(struct kvm *kvm)
  {
  	if (current->mm == kvm->mm) {
diff --combined tools/perf/check-headers.sh
index 48290a0c917c,499235a41162..a1dc16724352
--- a/tools/perf/check-headers.sh
+++ b/tools/perf/check-headers.sh
@@@ -28,8 -28,7 +28,9 @@@ arch/x86/include/asm/disabled-features.
  arch/x86/include/asm/required-features.h
  arch/x86/include/asm/cpufeatures.h
  arch/x86/include/asm/inat_types.h
+ arch/x86/include/asm/emulate_prefix.h
 +arch/x86/include/asm/irq_vectors.h
 +arch/x86/include/asm/msr-index.h
  arch/x86/include/uapi/asm/prctl.h
  arch/x86/lib/x86-opcode-map.txt
  arch/x86/tools/gen-insn-attr-x86.awk
@@@ -118,7 -117,7 +119,7 @@@ check lib/ctype.c		      '-I "^EXPORT_S
  check arch/x86/include/asm/inat.h     '-I "^#include [\"<]\(asm/\)*inat_types.h[\">]"'
  check arch/x86/include/asm/insn.h     '-I "^#include [\"<]\(asm/\)*inat.h[\">]"'
  check arch/x86/lib/inat.c	      '-I "^#include [\"<]\(../include/\)*asm/insn.h[\">]"'
- check arch/x86/lib/insn.c	      '-I "^#include [\"<]\(../include/\)*asm/in\(at\|sn\).h[\">]"'
+ check arch/x86/lib/insn.c             '-I "^#include [\"<]\(../include/\)*asm/in\(at\|sn\).h[\">]" -I "^#include [\"<]\(../include/\)*asm/emulate_prefix.h[\">]"'
  
  # diff non-symmetric files
  check_2 tools/perf/arch/x86/entry/syscalls/syscall_64.tbl arch/x86/entry/syscalls/syscall_64.tbl