]> asedeno.scripts.mit.edu Git - linux.git/commitdiff
Merge tag 'kvm-arm-for-4.8' of git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm...
authorRadim Krčmář <rkrcmar@redhat.com>
Fri, 22 Jul 2016 18:27:26 +0000 (20:27 +0200)
committerRadim Krčmář <rkrcmar@redhat.com>
Fri, 22 Jul 2016 18:27:26 +0000 (20:27 +0200)
KVM/ARM changes for Linux 4.8

- GICv3 ITS emulation
- Simpler idmap management that fixes potential TLB conflicts
- Honor the kernel protection in HYP mode
- Removal of the old vgic implementation

63 files changed:
Documentation/virtual/kvm/api.txt
arch/arm/kvm/arm.c
arch/mips/include/asm/kvm_host.h
arch/mips/include/asm/uasm.h
arch/mips/include/uapi/asm/inst.h
arch/mips/kernel/asm-offsets.c
arch/mips/kernel/branch.c
arch/mips/kvm/Kconfig
arch/mips/kvm/Makefile
arch/mips/kvm/dyntrans.c
arch/mips/kvm/emulate.c
arch/mips/kvm/entry.c [new file with mode: 0644]
arch/mips/kvm/fpu.S
arch/mips/kvm/interrupt.h
arch/mips/kvm/locore.S [deleted file]
arch/mips/kvm/mips.c
arch/mips/kvm/trace.h
arch/mips/kvm/trap_emul.c
arch/mips/math-emu/cp1emu.c
arch/mips/mm/uasm-micromips.c
arch/mips/mm/uasm-mips.c
arch/mips/mm/uasm.c
arch/powerpc/include/asm/hmi.h [new file with mode: 0644]
arch/powerpc/include/asm/paca.h
arch/powerpc/kernel/Makefile
arch/powerpc/kernel/exceptions-64s.S
arch/powerpc/kernel/hmi.c [new file with mode: 0644]
arch/powerpc/kernel/idle_power7.S
arch/powerpc/kernel/traps.c
arch/powerpc/kvm/book3s_hv.c
arch/powerpc/kvm/book3s_hv_ras.c
arch/powerpc/kvm/book3s_hv_rmhandlers.S
arch/powerpc/kvm/book3s_pr.c
arch/powerpc/kvm/booke.c
arch/powerpc/kvm/emulate.c
arch/powerpc/kvm/mpic.c
arch/powerpc/kvm/powerpc.c
arch/powerpc/platforms/powernv/opal-wrappers.S
arch/s390/include/asm/kvm_host.h
arch/s390/kvm/guestdbg.c
arch/s390/kvm/intercept.c
arch/s390/kvm/interrupt.c
arch/s390/kvm/kvm-s390.c
arch/s390/kvm/kvm-s390.h
arch/s390/kvm/priv.c
arch/s390/kvm/vsie.c
arch/x86/include/asm/kvm_host.h
arch/x86/kvm/irq_comm.c
arch/x86/kvm/lapic.c
arch/x86/kvm/lapic.h
arch/x86/kvm/mmu.c
arch/x86/kvm/mmu.h
arch/x86/kvm/paging_tmpl.h
arch/x86/kvm/svm.c
arch/x86/kvm/vmx.c
arch/x86/kvm/x86.c
include/linux/context_tracking.h
include/linux/kvm_host.h
include/trace/events/kvm.h
include/uapi/linux/kvm.h
mm/gup.c
virt/kvm/irqchip.c
virt/kvm/kvm_main.c

index 07049eadb1243258f50fca9f924ccf17c1789066..5237e1b2fd66cf038e480d1710ae0bcd80a2edae 100644 (file)
@@ -1482,6 +1482,11 @@ struct kvm_irq_routing_msi {
        __u32 pad;
 };
 
+On x86, address_hi is ignored unless the KVM_X2APIC_API_USE_32BIT_IDS
+feature of KVM_CAP_X2APIC_API capability is enabled.  If it is enabled,
+address_hi bits 31-8 provide bits 31-8 of the destination id.  Bits 7-0 of
+address_hi must be zero.
+
 struct kvm_irq_routing_s390_adapter {
        __u64 ind_addr;
        __u64 summary_addr;
@@ -1583,6 +1588,17 @@ struct kvm_lapic_state {
 Reads the Local APIC registers and copies them into the input argument.  The
 data format and layout are the same as documented in the architecture manual.
 
+If KVM_X2APIC_API_USE_32BIT_IDS feature of KVM_CAP_X2APIC_API is
+enabled, then the format of APIC_ID register depends on the APIC mode
+(reported by MSR_IA32_APICBASE) of its VCPU.  x2APIC stores APIC ID in
+the APIC_ID register (bytes 32-35).  xAPIC only allows an 8-bit APIC ID
+which is stored in bits 31-24 of the APIC register, or equivalently in
+byte 35 of struct kvm_lapic_state's regs field.  KVM_GET_LAPIC must then
+be called after MSR_IA32_APICBASE has been set with KVM_SET_MSR.
+
+If KVM_X2APIC_API_USE_32BIT_IDS feature is disabled, struct kvm_lapic_state
+always uses xAPIC format.
+
 
 4.58 KVM_SET_LAPIC
 
@@ -1600,6 +1616,10 @@ struct kvm_lapic_state {
 Copies the input argument into the Local APIC registers.  The data format
 and layout are the same as documented in the architecture manual.
 
+The format of the APIC ID register (bytes 32-35 of struct kvm_lapic_state's
+regs field) depends on the state of the KVM_CAP_X2APIC_API capability.
+See the note in KVM_GET_LAPIC.
+
 
 4.59 KVM_IOEVENTFD
 
@@ -2188,6 +2208,10 @@ The per-VM KVM_CAP_MSI_DEVID capability advertises the need to provide
 the device ID. If this capability is not set, userland cannot rely on
 the kernel to allow the KVM_MSI_VALID_DEVID flag being set.
 
+On x86, address_hi is ignored unless the KVM_CAP_X2APIC_API capability is
+enabled.  If it is enabled, address_hi bits 31-8 provide bits 31-8 of the
+destination id.  Bits 7-0 of address_hi must be zero.
+
 
 4.71 KVM_CREATE_PIT2
 
@@ -3819,6 +3843,42 @@ Allows use of runtime-instrumentation introduced with zEC12 processor.
 Will return -EINVAL if the machine does not support runtime-instrumentation.
 Will return -EBUSY if a VCPU has already been created.
 
+7.7 KVM_CAP_X2APIC_API
+
+Architectures: x86
+Parameters: args[0] - features that should be enabled
+Returns: 0 on success, -EINVAL when args[0] contains invalid features
+
+Valid feature flags in args[0] are
+
+#define KVM_X2APIC_API_USE_32BIT_IDS            (1ULL << 0)
+#define KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK  (1ULL << 1)
+
+Enabling KVM_X2APIC_API_USE_32BIT_IDS changes the behavior of
+KVM_SET_GSI_ROUTING, KVM_SIGNAL_MSI, KVM_SET_LAPIC, and KVM_GET_LAPIC,
+allowing the use of 32-bit APIC IDs.  See KVM_CAP_X2APIC_API in their
+respective sections.
+
+KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK must be enabled for x2APIC to work
+in logical mode or with more than 255 VCPUs.  Otherwise, KVM treats 0xff
+as a broadcast even in x2APIC mode in order to support physical x2APIC
+without interrupt remapping.  This is undesirable in logical mode,
+where 0xff represents CPUs 0-7 in cluster 0.
+
+7.8 KVM_CAP_S390_USER_INSTR0
+
+Architectures: s390
+Parameters: none
+
+With this capability enabled, all illegal instructions 0x0000 (2 bytes) will
+be intercepted and forwarded to user space. User space can use this
+mechanism e.g. to realize 2-byte software breakpoints. The kernel will
+not inject an operating exception for these instructions, user space has
+to take care of that.
+
+This capability can be enabled dynamically even if VCPUs were already
+created and are running.
+
 8. Other capabilities.
 ----------------------
 
index fb4661cf896ecdb5558b5c29fd3d2fe732356178..abdb2ea19bf0b80aa067f56b5be82b5ccca2becb 100644 (file)
@@ -616,7 +616,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
                 * Enter the guest
                 */
                trace_kvm_entry(*vcpu_pc(vcpu));
-               __kvm_guest_enter();
+               guest_enter_irqoff();
                vcpu->mode = IN_GUEST_MODE;
 
                ret = kvm_call_hyp(__kvm_vcpu_run, vcpu);
@@ -642,14 +642,14 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
                local_irq_enable();
 
                /*
-                * We do local_irq_enable() before calling kvm_guest_exit() so
+                * We do local_irq_enable() before calling guest_exit() so
                 * that if a timer interrupt hits while running the guest we
                 * account that tick as being spent in the guest.  We enable
-                * preemption after calling kvm_guest_exit() so that if we get
+                * preemption after calling guest_exit() so that if we get
                 * preempted we make sure ticks after that is not counted as
                 * guest time.
                 */
-               kvm_guest_exit();
+               guest_exit();
                trace_kvm_exit(ret, kvm_vcpu_trap_get_class(vcpu), *vcpu_pc(vcpu));
 
                /*
index b0773c6d622fa58fc05212f56c506c17b489ceaa..b54bcadd8aecab6c294591361cae8c64818247c6 100644 (file)
@@ -400,7 +400,7 @@ static inline void _kvm_atomic_set_c0_guest_reg(unsigned long *reg,
        unsigned long temp;
        do {
                __asm__ __volatile__(
-               "       .set    mips3                           \n"
+               "       .set    "MIPS_ISA_ARCH_LEVEL"           \n"
                "       " __LL "%0, %1                          \n"
                "       or      %0, %2                          \n"
                "       " __SC  "%0, %1                         \n"
@@ -416,7 +416,7 @@ static inline void _kvm_atomic_clear_c0_guest_reg(unsigned long *reg,
        unsigned long temp;
        do {
                __asm__ __volatile__(
-               "       .set    mips3                           \n"
+               "       .set    "MIPS_ISA_ARCH_LEVEL"           \n"
                "       " __LL "%0, %1                          \n"
                "       and     %0, %2                          \n"
                "       " __SC  "%0, %1                         \n"
@@ -433,7 +433,7 @@ static inline void _kvm_atomic_change_c0_guest_reg(unsigned long *reg,
        unsigned long temp;
        do {
                __asm__ __volatile__(
-               "       .set    mips3                           \n"
+               "       .set    "MIPS_ISA_ARCH_LEVEL"           \n"
                "       " __LL "%0, %1                          \n"
                "       and     %0, %2                          \n"
                "       or      %0, %3                          \n"
@@ -533,8 +533,13 @@ int kvm_mips_emulation_init(struct kvm_mips_callbacks **install_callbacks);
 /* Debug: dump vcpu state */
 int kvm_arch_vcpu_dump_regs(struct kvm_vcpu *vcpu);
 
-/* Trampoline ASM routine to start running in "Guest" context */
-extern int __kvm_mips_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu);
+extern int kvm_mips_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu);
+
+/* Building of entry/exception code */
+int kvm_mips_entry_setup(void);
+void *kvm_mips_build_vcpu_run(void *addr);
+void *kvm_mips_build_exception(void *addr, void *handler);
+void *kvm_mips_build_exit(void *addr);
 
 /* FPU/MSA context management */
 void __kvm_save_fpu(struct kvm_vcpu_arch *vcpu);
index b6ecfeee4dbe8aca256803ee2f980575825133e8..f7929f65f7ca27bf4926deb8e51d73d724c935bb 100644 (file)
@@ -104,8 +104,13 @@ Ip_u1s2(_bltz);
 Ip_u1s2(_bltzl);
 Ip_u1u2s3(_bne);
 Ip_u2s3u1(_cache);
+Ip_u1u2(_cfc1);
+Ip_u2u1(_cfcmsa);
+Ip_u1u2(_ctc1);
+Ip_u2u1(_ctcmsa);
 Ip_u2u1s3(_daddiu);
 Ip_u3u1u2(_daddu);
+Ip_u1(_di);
 Ip_u2u1msbu3(_dins);
 Ip_u2u1msbu3(_dinsm);
 Ip_u1u2(_divu);
@@ -141,6 +146,8 @@ Ip_u1(_mfhi);
 Ip_u1(_mflo);
 Ip_u1u2u3(_mtc0);
 Ip_u1u2u3(_mthc0);
+Ip_u1(_mthi);
+Ip_u1(_mtlo);
 Ip_u3u1u2(_mul);
 Ip_u3u1u2(_or);
 Ip_u2u1u3(_ori);
index a1ebf973725c7987fcafaf4ab9fa7a6b3c65eca9..77429d1622b343aed9f6a0d1c81f5ed4eacadffd 100644 (file)
 enum major_op {
        spec_op, bcond_op, j_op, jal_op,
        beq_op, bne_op, blez_op, bgtz_op,
-       addi_op, cbcond0_op = addi_op, addiu_op, slti_op, sltiu_op,
+       addi_op, pop10_op = addi_op, addiu_op, slti_op, sltiu_op,
        andi_op, ori_op, xori_op, lui_op,
        cop0_op, cop1_op, cop2_op, cop1x_op,
        beql_op, bnel_op, blezl_op, bgtzl_op,
-       daddi_op, cbcond1_op = daddi_op, daddiu_op, ldl_op, ldr_op,
+       daddi_op, pop30_op = daddi_op, daddiu_op, ldl_op, ldr_op,
        spec2_op, jalx_op, mdmx_op, msa_op = mdmx_op, spec3_op,
        lb_op, lh_op, lwl_op, lw_op,
        lbu_op, lhu_op, lwr_op, lwu_op,
        sb_op, sh_op, swl_op, sw_op,
        sdl_op, sdr_op, swr_op, cache_op,
        ll_op, lwc1_op, lwc2_op, bc6_op = lwc2_op, pref_op,
-       lld_op, ldc1_op, ldc2_op, beqzcjic_op = ldc2_op, ld_op,
+       lld_op, ldc1_op, ldc2_op, pop66_op = ldc2_op, ld_op,
        sc_op, swc1_op, swc2_op, balc6_op = swc2_op, major_3b_op,
-       scd_op, sdc1_op, sdc2_op, bnezcjialc_op = sdc2_op, sd_op
+       scd_op, sdc1_op, sdc2_op, pop76_op = sdc2_op, sd_op
 };
 
 /*
@@ -92,6 +92,50 @@ enum spec3_op {
        rdhwr_op  = 0x3b
 };
 
+/*
+ * Bits 10-6 minor opcode for r6 spec mult/div encodings
+ */
+enum mult_op {
+       mult_mult_op = 0x0,
+       mult_mul_op = 0x2,
+       mult_muh_op = 0x3,
+};
+enum multu_op {
+       multu_multu_op = 0x0,
+       multu_mulu_op = 0x2,
+       multu_muhu_op = 0x3,
+};
+enum div_op {
+       div_div_op = 0x0,
+       div_div6_op = 0x2,
+       div_mod_op = 0x3,
+};
+enum divu_op {
+       divu_divu_op = 0x0,
+       divu_divu6_op = 0x2,
+       divu_modu_op = 0x3,
+};
+enum dmult_op {
+       dmult_dmult_op = 0x0,
+       dmult_dmul_op = 0x2,
+       dmult_dmuh_op = 0x3,
+};
+enum dmultu_op {
+       dmultu_dmultu_op = 0x0,
+       dmultu_dmulu_op = 0x2,
+       dmultu_dmuhu_op = 0x3,
+};
+enum ddiv_op {
+       ddiv_ddiv_op = 0x0,
+       ddiv_ddiv6_op = 0x2,
+       ddiv_dmod_op = 0x3,
+};
+enum ddivu_op {
+       ddivu_ddivu_op = 0x0,
+       ddivu_ddivu6_op = 0x2,
+       ddivu_dmodu_op = 0x3,
+};
+
 /*
  * rt field of bcond opcodes.
  */
@@ -237,6 +281,21 @@ enum bshfl_func {
        seh_op  = 0x18,
 };
 
+/*
+ * MSA minor opcodes.
+ */
+enum msa_func {
+       msa_elm_op = 0x19,
+};
+
+/*
+ * MSA ELM opcodes.
+ */
+enum msa_elm {
+       msa_ctc_op = 0x3e,
+       msa_cfc_op = 0x7e,
+};
+
 /*
  * func field for MSA MI10 format.
  */
@@ -264,7 +323,7 @@ enum mm_major_op {
        mm_pool32b_op, mm_pool16b_op, mm_lhu16_op, mm_andi16_op,
        mm_addiu32_op, mm_lhu32_op, mm_sh32_op, mm_lh32_op,
        mm_pool32i_op, mm_pool16c_op, mm_lwsp16_op, mm_pool16d_op,
-       mm_ori32_op, mm_pool32f_op, mm_reserved1_op, mm_reserved2_op,
+       mm_ori32_op, mm_pool32f_op, mm_pool32s_op, mm_reserved2_op,
        mm_pool32c_op, mm_lwgp16_op, mm_lw16_op, mm_pool16e_op,
        mm_xori32_op, mm_jals32_op, mm_addiupc_op, mm_reserved3_op,
        mm_reserved4_op, mm_pool16f_op, mm_sb16_op, mm_beqz16_op,
@@ -360,7 +419,10 @@ enum mm_32axf_minor_op {
        mm_mflo32_op = 0x075,
        mm_jalrhb_op = 0x07c,
        mm_tlbwi_op = 0x08d,
+       mm_mthi32_op = 0x0b5,
        mm_tlbwr_op = 0x0cd,
+       mm_mtlo32_op = 0x0f5,
+       mm_di_op = 0x11d,
        mm_jalrs_op = 0x13c,
        mm_jalrshb_op = 0x17c,
        mm_sync_op = 0x1ad,
@@ -478,6 +540,13 @@ enum mm_32f_73_minor_op {
        mm_fcvts1_op = 0xed,
 };
 
+/*
+ * (microMIPS) POOL32S minor opcodes.
+ */
+enum mm_32s_minor_op {
+       mm_32s_elm_op = 0x16,
+};
+
 /*
  * (microMIPS) POOL16C minor opcodes.
  */
index a1263d188a5a8c732b4154b4cce8aeca82b8d4a2..fae2f9447792a02e667dc765c1a0a44ab2789b6b 100644 (file)
@@ -339,67 +339,9 @@ void output_pm_defines(void)
 }
 #endif
 
-void output_cpuinfo_defines(void)
-{
-       COMMENT(" MIPS cpuinfo offsets. ");
-       DEFINE(CPUINFO_SIZE, sizeof(struct cpuinfo_mips));
-#ifdef CONFIG_MIPS_ASID_BITS_VARIABLE
-       OFFSET(CPUINFO_ASID_MASK, cpuinfo_mips, asid_mask);
-#endif
-}
-
 void output_kvm_defines(void)
 {
        COMMENT(" KVM/MIPS Specfic offsets. ");
-       DEFINE(VCPU_ARCH_SIZE, sizeof(struct kvm_vcpu_arch));
-       OFFSET(VCPU_RUN, kvm_vcpu, run);
-       OFFSET(VCPU_HOST_ARCH, kvm_vcpu, arch);
-
-       OFFSET(VCPU_GUEST_EBASE, kvm_vcpu_arch, guest_ebase);
-
-       OFFSET(VCPU_HOST_STACK, kvm_vcpu_arch, host_stack);
-       OFFSET(VCPU_HOST_GP, kvm_vcpu_arch, host_gp);
-
-       OFFSET(VCPU_HOST_CP0_BADVADDR, kvm_vcpu_arch, host_cp0_badvaddr);
-       OFFSET(VCPU_HOST_CP0_CAUSE, kvm_vcpu_arch, host_cp0_cause);
-       OFFSET(VCPU_HOST_EPC, kvm_vcpu_arch, host_cp0_epc);
-
-       OFFSET(VCPU_R0, kvm_vcpu_arch, gprs[0]);
-       OFFSET(VCPU_R1, kvm_vcpu_arch, gprs[1]);
-       OFFSET(VCPU_R2, kvm_vcpu_arch, gprs[2]);
-       OFFSET(VCPU_R3, kvm_vcpu_arch, gprs[3]);
-       OFFSET(VCPU_R4, kvm_vcpu_arch, gprs[4]);
-       OFFSET(VCPU_R5, kvm_vcpu_arch, gprs[5]);
-       OFFSET(VCPU_R6, kvm_vcpu_arch, gprs[6]);
-       OFFSET(VCPU_R7, kvm_vcpu_arch, gprs[7]);
-       OFFSET(VCPU_R8, kvm_vcpu_arch, gprs[8]);
-       OFFSET(VCPU_R9, kvm_vcpu_arch, gprs[9]);
-       OFFSET(VCPU_R10, kvm_vcpu_arch, gprs[10]);
-       OFFSET(VCPU_R11, kvm_vcpu_arch, gprs[11]);
-       OFFSET(VCPU_R12, kvm_vcpu_arch, gprs[12]);
-       OFFSET(VCPU_R13, kvm_vcpu_arch, gprs[13]);
-       OFFSET(VCPU_R14, kvm_vcpu_arch, gprs[14]);
-       OFFSET(VCPU_R15, kvm_vcpu_arch, gprs[15]);
-       OFFSET(VCPU_R16, kvm_vcpu_arch, gprs[16]);
-       OFFSET(VCPU_R17, kvm_vcpu_arch, gprs[17]);
-       OFFSET(VCPU_R18, kvm_vcpu_arch, gprs[18]);
-       OFFSET(VCPU_R19, kvm_vcpu_arch, gprs[19]);
-       OFFSET(VCPU_R20, kvm_vcpu_arch, gprs[20]);
-       OFFSET(VCPU_R21, kvm_vcpu_arch, gprs[21]);
-       OFFSET(VCPU_R22, kvm_vcpu_arch, gprs[22]);
-       OFFSET(VCPU_R23, kvm_vcpu_arch, gprs[23]);
-       OFFSET(VCPU_R24, kvm_vcpu_arch, gprs[24]);
-       OFFSET(VCPU_R25, kvm_vcpu_arch, gprs[25]);
-       OFFSET(VCPU_R26, kvm_vcpu_arch, gprs[26]);
-       OFFSET(VCPU_R27, kvm_vcpu_arch, gprs[27]);
-       OFFSET(VCPU_R28, kvm_vcpu_arch, gprs[28]);
-       OFFSET(VCPU_R29, kvm_vcpu_arch, gprs[29]);
-       OFFSET(VCPU_R30, kvm_vcpu_arch, gprs[30]);
-       OFFSET(VCPU_R31, kvm_vcpu_arch, gprs[31]);
-       OFFSET(VCPU_LO, kvm_vcpu_arch, lo);
-       OFFSET(VCPU_HI, kvm_vcpu_arch, hi);
-       OFFSET(VCPU_PC, kvm_vcpu_arch, pc);
-       BLANK();
 
        OFFSET(VCPU_FPR0, kvm_vcpu_arch, fpu.fpr[0]);
        OFFSET(VCPU_FPR1, kvm_vcpu_arch, fpu.fpr[1]);
@@ -437,14 +379,6 @@ void output_kvm_defines(void)
        OFFSET(VCPU_FCR31, kvm_vcpu_arch, fpu.fcr31);
        OFFSET(VCPU_MSA_CSR, kvm_vcpu_arch, fpu.msacsr);
        BLANK();
-
-       OFFSET(VCPU_COP0, kvm_vcpu_arch, cop0);
-       OFFSET(VCPU_GUEST_KERNEL_ASID, kvm_vcpu_arch, guest_kernel_asid);
-       OFFSET(VCPU_GUEST_USER_ASID, kvm_vcpu_arch, guest_user_asid);
-
-       OFFSET(COP0_TLB_HI, mips_coproc, reg[MIPS_CP0_TLB_HI][0]);
-       OFFSET(COP0_STATUS, mips_coproc, reg[MIPS_CP0_STATUS][0]);
-       BLANK();
 }
 
 #ifdef CONFIG_MIPS_CPS
index 6dc3f1fdaccc3640bf1199789936a4823786d1b9..46c227fc98f5af75bbdd389cbe8eb75b33815470 100644 (file)
@@ -790,7 +790,7 @@ int __compute_return_epc_for_insn(struct pt_regs *regs,
                epc += 4 + (insn.i_format.simmediate << 2);
                regs->cp0_epc = epc;
                break;
-       case beqzcjic_op:
+       case pop66_op:
                if (!cpu_has_mips_r6) {
                        ret = -SIGILL;
                        break;
@@ -798,7 +798,7 @@ int __compute_return_epc_for_insn(struct pt_regs *regs,
                /* Compact branch: BEQZC || JIC */
                regs->cp0_epc += 8;
                break;
-       case bnezcjialc_op:
+       case pop76_op:
                if (!cpu_has_mips_r6) {
                        ret = -SIGILL;
                        break;
@@ -809,8 +809,8 @@ int __compute_return_epc_for_insn(struct pt_regs *regs,
                regs->cp0_epc += 8;
                break;
 #endif
-       case cbcond0_op:
-       case cbcond1_op:
+       case pop10_op:
+       case pop30_op:
                /* Only valid for MIPS R6 */
                if (!cpu_has_mips_r6) {
                        ret = -SIGILL;
index 2ae12825529f8fe37e6e6b13c79eee39d4c4753c..7c56d6b124d1620248da030c94be9d3322696d48 100644 (file)
@@ -17,6 +17,7 @@ if VIRTUALIZATION
 config KVM
        tristate "Kernel-based Virtual Machine (KVM) support"
        depends on HAVE_KVM
+       select EXPORT_UASM
        select PREEMPT_NOTIFIERS
        select ANON_INODES
        select KVM_MMIO
index 0aabe40fcac9b7783cceac0f4b1c9ffe8e83a1a5..847429de780d3b948fe9cb296e40223188af715e 100644 (file)
@@ -7,7 +7,7 @@ EXTRA_CFLAGS += -Ivirt/kvm -Iarch/mips/kvm
 
 common-objs-$(CONFIG_CPU_HAS_MSA) += msa.o
 
-kvm-objs := $(common-objs-y) mips.o emulate.o locore.o \
+kvm-objs := $(common-objs-y) mips.o emulate.o entry.o \
            interrupt.o stats.o commpage.o \
            dyntrans.o trap_emul.o fpu.o
 kvm-objs += mmu.o
index 8a1833b9eb384dde8544a32ecd12265bb131468c..91ebd2b6034f678c00649ab0621b2bc31b8ed262 100644 (file)
@@ -72,7 +72,10 @@ int kvm_mips_trans_cache_va(union mips_instruction inst, u32 *opc,
        synci_inst.i_format.opcode = bcond_op;
        synci_inst.i_format.rs = inst.i_format.rs;
        synci_inst.i_format.rt = synci_op;
-       synci_inst.i_format.simmediate = inst.i_format.simmediate;
+       if (cpu_has_mips_r6)
+               synci_inst.i_format.simmediate = inst.spec3_format.simmediate;
+       else
+               synci_inst.i_format.simmediate = inst.i_format.simmediate;
 
        return kvm_mips_trans_replace(vcpu, opc, synci_inst);
 }
index 5f0354c80c8eb9017a55f3e8f267e89d071f763b..be18dfe9ecaa210b23f6e7c1dd1f1368efd675dc 100644 (file)
@@ -161,9 +161,12 @@ unsigned long kvm_compute_return_epc(struct kvm_vcpu *vcpu,
                nextpc = epc;
                break;
 
-       case blez_op:           /* not really i_format */
-       case blezl_op:
-               /* rt field assumed to be zero */
+       case blez_op:   /* POP06 */
+#ifndef CONFIG_CPU_MIPSR6
+       case blezl_op:  /* removed in R6 */
+#endif
+               if (insn.i_format.rt != 0)
+                       goto compact_branch;
                if ((long)arch->gprs[insn.i_format.rs] <= 0)
                        epc = epc + 4 + (insn.i_format.simmediate << 2);
                else
@@ -171,9 +174,12 @@ unsigned long kvm_compute_return_epc(struct kvm_vcpu *vcpu,
                nextpc = epc;
                break;
 
-       case bgtz_op:
-       case bgtzl_op:
-               /* rt field assumed to be zero */
+       case bgtz_op:   /* POP07 */
+#ifndef CONFIG_CPU_MIPSR6
+       case bgtzl_op:  /* removed in R6 */
+#endif
+               if (insn.i_format.rt != 0)
+                       goto compact_branch;
                if ((long)arch->gprs[insn.i_format.rs] > 0)
                        epc = epc + 4 + (insn.i_format.simmediate << 2);
                else
@@ -185,6 +191,40 @@ unsigned long kvm_compute_return_epc(struct kvm_vcpu *vcpu,
        case cop1_op:
                kvm_err("%s: unsupported cop1_op\n", __func__);
                break;
+
+#ifdef CONFIG_CPU_MIPSR6
+       /* R6 added the following compact branches with forbidden slots */
+       case blezl_op:  /* POP26 */
+       case bgtzl_op:  /* POP27 */
+               /* only rt == 0 isn't compact branch */
+               if (insn.i_format.rt != 0)
+                       goto compact_branch;
+               break;
+       case pop10_op:
+       case pop30_op:
+               /* only rs == rt == 0 is reserved, rest are compact branches */
+               if (insn.i_format.rs != 0 || insn.i_format.rt != 0)
+                       goto compact_branch;
+               break;
+       case pop66_op:
+       case pop76_op:
+               /* only rs == 0 isn't compact branch */
+               if (insn.i_format.rs != 0)
+                       goto compact_branch;
+               break;
+compact_branch:
+               /*
+                * If we've hit an exception on the forbidden slot, then
+                * the branch must not have been taken.
+                */
+               epc += 8;
+               nextpc = epc;
+               break;
+#else
+compact_branch:
+               /* Compact branches not supported before R6 */
+               break;
+#endif
        }
 
        return nextpc;
@@ -1561,7 +1601,10 @@ enum emulation_result kvm_mips_emulate_cache(union mips_instruction inst,
 
        base = inst.i_format.rs;
        op_inst = inst.i_format.rt;
-       offset = inst.i_format.simmediate;
+       if (cpu_has_mips_r6)
+               offset = inst.spec3_format.simmediate;
+       else
+               offset = inst.i_format.simmediate;
        cache = op_inst & CacheOp_Cache;
        op = op_inst & CacheOp_Op;
 
@@ -1724,11 +1767,27 @@ enum emulation_result kvm_mips_emulate_inst(u32 cause, u32 *opc,
                er = kvm_mips_emulate_load(inst, cause, run, vcpu);
                break;
 
+#ifndef CONFIG_CPU_MIPSR6
        case cache_op:
                ++vcpu->stat.cache_exits;
                trace_kvm_exit(vcpu, KVM_TRACE_EXIT_CACHE);
                er = kvm_mips_emulate_cache(inst, opc, cause, run, vcpu);
                break;
+#else
+       case spec3_op:
+               switch (inst.spec3_format.func) {
+               case cache6_op:
+                       ++vcpu->stat.cache_exits;
+                       trace_kvm_exit(vcpu, KVM_TRACE_EXIT_CACHE);
+                       er = kvm_mips_emulate_cache(inst, opc, cause, run,
+                                                   vcpu);
+                       break;
+               default:
+                       goto unknown;
+               };
+               break;
+unknown:
+#endif
 
        default:
                kvm_err("Instruction emulation not supported (%p/%#x)\n", opc,
@@ -2298,7 +2357,9 @@ enum emulation_result kvm_mips_handle_ri(u32 cause, u32 *opc,
        }
 
        if (inst.r_format.opcode == spec3_op &&
-           inst.r_format.func == rdhwr_op) {
+           inst.r_format.func == rdhwr_op &&
+           inst.r_format.rs == 0 &&
+           (inst.r_format.re >> 3) == 0) {
                int usermode = !KVM_GUEST_KERNEL_MODE(vcpu);
                int rd = inst.r_format.rd;
                int rt = inst.r_format.rt;
diff --git a/arch/mips/kvm/entry.c b/arch/mips/kvm/entry.c
new file mode 100644 (file)
index 0000000..75ba7c2
--- /dev/null
@@ -0,0 +1,676 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Generation of main entry point for the guest, exception handling.
+ *
+ * Copyright (C) 2012  MIPS Technologies, Inc.
+ * Authors: Sanjay Lal <sanjayl@kymasys.com>
+ *
+ * Copyright (C) 2016 Imagination Technologies Ltd.
+ */
+
+#include <linux/kvm_host.h>
+#include <asm/msa.h>
+#include <asm/setup.h>
+#include <asm/uasm.h>
+
+/* Register names */
+#define ZERO           0
+#define AT             1
+#define V0             2
+#define V1             3
+#define A0             4
+#define A1             5
+
+#if _MIPS_SIM == _MIPS_SIM_ABI32
+#define T0             8
+#define T1             9
+#define T2             10
+#define T3             11
+#endif /* _MIPS_SIM == _MIPS_SIM_ABI32 */
+
+#if _MIPS_SIM == _MIPS_SIM_ABI64 || _MIPS_SIM == _MIPS_SIM_NABI32
+#define T0             12
+#define T1             13
+#define T2             14
+#define T3             15
+#endif /* _MIPS_SIM == _MIPS_SIM_ABI64 || _MIPS_SIM == _MIPS_SIM_NABI32 */
+
+#define S0             16
+#define S1             17
+#define T9             25
+#define K0             26
+#define K1             27
+#define GP             28
+#define SP             29
+#define RA             31
+
+/* Some CP0 registers */
+#define C0_HWRENA      7, 0
+#define C0_BADVADDR    8, 0
+#define C0_ENTRYHI     10, 0
+#define C0_STATUS      12, 0
+#define C0_CAUSE       13, 0
+#define C0_EPC         14, 0
+#define C0_EBASE       15, 1
+#define C0_CONFIG5     16, 5
+#define C0_DDATA_LO    28, 3
+#define C0_ERROREPC    30, 0
+
+#define CALLFRAME_SIZ   32
+
+static unsigned int scratch_vcpu[2] = { C0_DDATA_LO };
+static unsigned int scratch_tmp[2] = { C0_ERROREPC };
+
+enum label_id {
+       label_fpu_1 = 1,
+       label_msa_1,
+       label_return_to_host,
+       label_kernel_asid,
+       label_exit_common,
+};
+
+UASM_L_LA(_fpu_1)
+UASM_L_LA(_msa_1)
+UASM_L_LA(_return_to_host)
+UASM_L_LA(_kernel_asid)
+UASM_L_LA(_exit_common)
+
+static void *kvm_mips_build_enter_guest(void *addr);
+static void *kvm_mips_build_ret_from_exit(void *addr);
+static void *kvm_mips_build_ret_to_guest(void *addr);
+static void *kvm_mips_build_ret_to_host(void *addr);
+
+/**
+ * kvm_mips_entry_setup() - Perform global setup for entry code.
+ *
+ * Perform global setup for entry code, such as choosing a scratch register.
+ *
+ * Returns:    0 on success.
+ *             -errno on failure.
+ */
+int kvm_mips_entry_setup(void)
+{
+       /*
+        * We prefer to use KScratchN registers if they are available over the
+        * defaults above, which may not work on all cores.
+        */
+       unsigned int kscratch_mask = cpu_data[0].kscratch_mask & 0xfc;
+
+       /* Pick a scratch register for storing VCPU */
+       if (kscratch_mask) {
+               scratch_vcpu[0] = 31;
+               scratch_vcpu[1] = ffs(kscratch_mask) - 1;
+               kscratch_mask &= ~BIT(scratch_vcpu[1]);
+       }
+
+       /* Pick a scratch register to use as a temp for saving state */
+       if (kscratch_mask) {
+               scratch_tmp[0] = 31;
+               scratch_tmp[1] = ffs(kscratch_mask) - 1;
+               kscratch_mask &= ~BIT(scratch_tmp[1]);
+       }
+
+       return 0;
+}
+
+static void kvm_mips_build_save_scratch(u32 **p, unsigned int tmp,
+                                       unsigned int frame)
+{
+       /* Save the VCPU scratch register value in cp0_epc of the stack frame */
+       uasm_i_mfc0(p, tmp, scratch_vcpu[0], scratch_vcpu[1]);
+       UASM_i_SW(p, tmp, offsetof(struct pt_regs, cp0_epc), frame);
+
+       /* Save the temp scratch register value in cp0_cause of stack frame */
+       if (scratch_tmp[0] == 31) {
+               uasm_i_mfc0(p, tmp, scratch_tmp[0], scratch_tmp[1]);
+               UASM_i_SW(p, tmp, offsetof(struct pt_regs, cp0_cause), frame);
+       }
+}
+
+static void kvm_mips_build_restore_scratch(u32 **p, unsigned int tmp,
+                                          unsigned int frame)
+{
+       /*
+        * Restore host scratch register values saved by
+        * kvm_mips_build_save_scratch().
+        */
+       UASM_i_LW(p, tmp, offsetof(struct pt_regs, cp0_epc), frame);
+       uasm_i_mtc0(p, tmp, scratch_vcpu[0], scratch_vcpu[1]);
+
+       if (scratch_tmp[0] == 31) {
+               UASM_i_LW(p, tmp, offsetof(struct pt_regs, cp0_cause), frame);
+               uasm_i_mtc0(p, tmp, scratch_tmp[0], scratch_tmp[1]);
+       }
+}
+
+/**
+ * kvm_mips_build_vcpu_run() - Assemble function to start running a guest VCPU.
+ * @addr:      Address to start writing code.
+ *
+ * Assemble the start of the vcpu_run function to run a guest VCPU. The function
+ * conforms to the following prototype:
+ *
+ * int vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu);
+ *
+ * The exit from the guest and return to the caller is handled by the code
+ * generated by kvm_mips_build_ret_to_host().
+ *
+ * Returns:    Next address after end of written function.
+ */
+void *kvm_mips_build_vcpu_run(void *addr)
+{
+       u32 *p = addr;
+       unsigned int i;
+
+       /*
+        * A0: run
+        * A1: vcpu
+        */
+
+       /* k0/k1 not being used in host kernel context */
+       uasm_i_addiu(&p, K1, SP, -(int)sizeof(struct pt_regs));
+       for (i = 16; i < 32; ++i) {
+               if (i == 24)
+                       i = 28;
+               UASM_i_SW(&p, i, offsetof(struct pt_regs, regs[i]), K1);
+       }
+
+       /* Save host status */
+       uasm_i_mfc0(&p, V0, C0_STATUS);
+       UASM_i_SW(&p, V0, offsetof(struct pt_regs, cp0_status), K1);
+
+       /* Save scratch registers, will be used to store pointer to vcpu etc */
+       kvm_mips_build_save_scratch(&p, V1, K1);
+
+       /* VCPU scratch register has pointer to vcpu */
+       uasm_i_mtc0(&p, A1, scratch_vcpu[0], scratch_vcpu[1]);
+
+       /* Offset into vcpu->arch */
+       uasm_i_addiu(&p, K1, A1, offsetof(struct kvm_vcpu, arch));
+
+       /*
+        * Save the host stack to VCPU, used for exception processing
+        * when we exit from the Guest
+        */
+       UASM_i_SW(&p, SP, offsetof(struct kvm_vcpu_arch, host_stack), K1);
+
+       /* Save the kernel gp as well */
+       UASM_i_SW(&p, GP, offsetof(struct kvm_vcpu_arch, host_gp), K1);
+
+       /*
+        * Setup status register for running the guest in UM, interrupts
+        * are disabled
+        */
+       UASM_i_LA(&p, K0, ST0_EXL | KSU_USER | ST0_BEV);
+       uasm_i_mtc0(&p, K0, C0_STATUS);
+       uasm_i_ehb(&p);
+
+       /* load up the new EBASE */
+       UASM_i_LW(&p, K0, offsetof(struct kvm_vcpu_arch, guest_ebase), K1);
+       uasm_i_mtc0(&p, K0, C0_EBASE);
+
+       /*
+        * Now that the new EBASE has been loaded, unset BEV, set
+        * interrupt mask as it was but make sure that timer interrupts
+        * are enabled
+        */
+       uasm_i_addiu(&p, K0, ZERO, ST0_EXL | KSU_USER | ST0_IE);
+       uasm_i_andi(&p, V0, V0, ST0_IM);
+       uasm_i_or(&p, K0, K0, V0);
+       uasm_i_mtc0(&p, K0, C0_STATUS);
+       uasm_i_ehb(&p);
+
+       p = kvm_mips_build_enter_guest(p);
+
+       return p;
+}
+
+/**
+ * kvm_mips_build_enter_guest() - Assemble code to resume guest execution.
+ * @addr:      Address to start writing code.
+ *
+ * Assemble the code to resume guest execution. This code is common between the
+ * initial entry into the guest from the host, and returning from the exit
+ * handler back to the guest.
+ *
+ * Returns:    Next address after end of written function.
+ */
+static void *kvm_mips_build_enter_guest(void *addr)
+{
+       u32 *p = addr;
+       unsigned int i;
+       struct uasm_label labels[2];
+       struct uasm_reloc relocs[2];
+       struct uasm_label *l = labels;
+       struct uasm_reloc *r = relocs;
+
+       memset(labels, 0, sizeof(labels));
+       memset(relocs, 0, sizeof(relocs));
+
+       /* Set Guest EPC */
+       UASM_i_LW(&p, T0, offsetof(struct kvm_vcpu_arch, pc), K1);
+       uasm_i_mtc0(&p, T0, C0_EPC);
+
+       /* Set the ASID for the Guest Kernel */
+       UASM_i_LW(&p, T0, offsetof(struct kvm_vcpu_arch, cop0), K1);
+       UASM_i_LW(&p, T0, offsetof(struct mips_coproc, reg[MIPS_CP0_STATUS][0]),
+                 T0);
+       uasm_i_andi(&p, T0, T0, KSU_USER | ST0_ERL | ST0_EXL);
+       uasm_i_xori(&p, T0, T0, KSU_USER);
+       uasm_il_bnez(&p, &r, T0, label_kernel_asid);
+        uasm_i_addiu(&p, T1, K1,
+                     offsetof(struct kvm_vcpu_arch, guest_kernel_asid));
+       /* else user */
+       uasm_i_addiu(&p, T1, K1,
+                    offsetof(struct kvm_vcpu_arch, guest_user_asid));
+       uasm_l_kernel_asid(&l, p);
+
+       /* t1: contains the base of the ASID array, need to get the cpu id  */
+       /* smp_processor_id */
+       UASM_i_LW(&p, T2, offsetof(struct thread_info, cpu), GP);
+       /* x4 */
+       uasm_i_sll(&p, T2, T2, 2);
+       UASM_i_ADDU(&p, T3, T1, T2);
+       UASM_i_LW(&p, K0, 0, T3);
+#ifdef CONFIG_MIPS_ASID_BITS_VARIABLE
+       /* x sizeof(struct cpuinfo_mips)/4 */
+       uasm_i_addiu(&p, T3, ZERO, sizeof(struct cpuinfo_mips)/4);
+       uasm_i_mul(&p, T2, T2, T3);
+
+       UASM_i_LA_mostly(&p, AT, (long)&cpu_data[0].asid_mask);
+       UASM_i_ADDU(&p, AT, AT, T2);
+       UASM_i_LW(&p, T2, uasm_rel_lo((long)&cpu_data[0].asid_mask), AT);
+       uasm_i_and(&p, K0, K0, T2);
+#else
+       uasm_i_andi(&p, K0, K0, MIPS_ENTRYHI_ASID);
+#endif
+       uasm_i_mtc0(&p, K0, C0_ENTRYHI);
+       uasm_i_ehb(&p);
+
+       /* Disable RDHWR access */
+       uasm_i_mtc0(&p, ZERO, C0_HWRENA);
+
+       /* load the guest context from VCPU and return */
+       for (i = 1; i < 32; ++i) {
+               /* Guest k0/k1 loaded later */
+               if (i == K0 || i == K1)
+                       continue;
+               UASM_i_LW(&p, i, offsetof(struct kvm_vcpu_arch, gprs[i]), K1);
+       }
+
+#ifndef CONFIG_CPU_MIPSR6
+       /* Restore hi/lo */
+       UASM_i_LW(&p, K0, offsetof(struct kvm_vcpu_arch, hi), K1);
+       uasm_i_mthi(&p, K0);
+
+       UASM_i_LW(&p, K0, offsetof(struct kvm_vcpu_arch, lo), K1);
+       uasm_i_mtlo(&p, K0);
+#endif
+
+       /* Restore the guest's k0/k1 registers */
+       UASM_i_LW(&p, K0, offsetof(struct kvm_vcpu_arch, gprs[K0]), K1);
+       UASM_i_LW(&p, K1, offsetof(struct kvm_vcpu_arch, gprs[K1]), K1);
+
+       /* Jump to guest */
+       uasm_i_eret(&p);
+
+       uasm_resolve_relocs(relocs, labels);
+
+       return p;
+}
+
+/**
+ * kvm_mips_build_exception() - Assemble first level guest exception handler.
+ * @addr:      Address to start writing code.
+ * @handler:   Address of common handler (within range of @addr).
+ *
+ * Assemble exception vector code for guest execution. The generated vector will
+ * branch to the common exception handler generated by kvm_mips_build_exit().
+ *
+ * Returns:    Next address after end of written function.
+ */
+void *kvm_mips_build_exception(void *addr, void *handler)
+{
+       u32 *p = addr;
+       struct uasm_label labels[2];
+       struct uasm_reloc relocs[2];
+       struct uasm_label *l = labels;
+       struct uasm_reloc *r = relocs;
+
+       memset(labels, 0, sizeof(labels));
+       memset(relocs, 0, sizeof(relocs));
+
+       /* Save guest k1 into scratch register */
+       uasm_i_mtc0(&p, K1, scratch_tmp[0], scratch_tmp[1]);
+
+       /* Get the VCPU pointer from the VCPU scratch register */
+       uasm_i_mfc0(&p, K1, scratch_vcpu[0], scratch_vcpu[1]);
+       uasm_i_addiu(&p, K1, K1, offsetof(struct kvm_vcpu, arch));
+
+       /* Save guest k0 into VCPU structure */
+       UASM_i_SW(&p, K0, offsetof(struct kvm_vcpu_arch, gprs[K0]), K1);
+
+       /* Branch to the common handler */
+       uasm_il_b(&p, &r, label_exit_common);
+        uasm_i_nop(&p);
+
+       uasm_l_exit_common(&l, handler);
+       uasm_resolve_relocs(relocs, labels);
+
+       return p;
+}
+
+/**
+ * kvm_mips_build_exit() - Assemble common guest exit handler.
+ * @addr:      Address to start writing code.
+ *
+ * Assemble the generic guest exit handling code. This is called by the
+ * exception vectors (generated by kvm_mips_build_exception()), and calls
+ * kvm_mips_handle_exit(), then either resumes the guest or returns to the host
+ * depending on the return value.
+ *
+ * Returns:    Next address after end of written function.
+ */
+void *kvm_mips_build_exit(void *addr)
+{
+       u32 *p = addr;
+       unsigned int i;
+       struct uasm_label labels[3];
+       struct uasm_reloc relocs[3];
+       struct uasm_label *l = labels;
+       struct uasm_reloc *r = relocs;
+
+       memset(labels, 0, sizeof(labels));
+       memset(relocs, 0, sizeof(relocs));
+
+       /*
+        * Generic Guest exception handler. We end up here when the guest
+        * does something that causes a trap to kernel mode.
+        *
+        * Both k0/k1 registers will have already been saved (k0 into the vcpu
+        * structure, and k1 into the scratch_tmp register).
+        *
+        * The k1 register will already contain the kvm_vcpu_arch pointer.
+        */
+
+       /* Start saving Guest context to VCPU */
+       for (i = 0; i < 32; ++i) {
+               /* Guest k0/k1 saved later */
+               if (i == K0 || i == K1)
+                       continue;
+               UASM_i_SW(&p, i, offsetof(struct kvm_vcpu_arch, gprs[i]), K1);
+       }
+
+#ifndef CONFIG_CPU_MIPSR6
+       /* We need to save hi/lo and restore them on the way out */
+       uasm_i_mfhi(&p, T0);
+       UASM_i_SW(&p, T0, offsetof(struct kvm_vcpu_arch, hi), K1);
+
+       uasm_i_mflo(&p, T0);
+       UASM_i_SW(&p, T0, offsetof(struct kvm_vcpu_arch, lo), K1);
+#endif
+
+       /* Finally save guest k1 to VCPU */
+       uasm_i_ehb(&p);
+       uasm_i_mfc0(&p, T0, scratch_tmp[0], scratch_tmp[1]);
+       UASM_i_SW(&p, T0, offsetof(struct kvm_vcpu_arch, gprs[K1]), K1);
+
+       /* Now that context has been saved, we can use other registers */
+
+       /* Restore vcpu */
+       uasm_i_mfc0(&p, A1, scratch_vcpu[0], scratch_vcpu[1]);
+       uasm_i_move(&p, S1, A1);
+
+       /* Restore run (vcpu->run) */
+       UASM_i_LW(&p, A0, offsetof(struct kvm_vcpu, run), A1);
+       /* Save pointer to run in s0, will be saved by the compiler */
+       uasm_i_move(&p, S0, A0);
+
+       /*
+        * Save Host level EPC, BadVaddr and Cause to VCPU, useful to process
+        * the exception
+        */
+       uasm_i_mfc0(&p, K0, C0_EPC);
+       UASM_i_SW(&p, K0, offsetof(struct kvm_vcpu_arch, pc), K1);
+
+       uasm_i_mfc0(&p, K0, C0_BADVADDR);
+       UASM_i_SW(&p, K0, offsetof(struct kvm_vcpu_arch, host_cp0_badvaddr),
+                 K1);
+
+       uasm_i_mfc0(&p, K0, C0_CAUSE);
+       uasm_i_sw(&p, K0, offsetof(struct kvm_vcpu_arch, host_cp0_cause), K1);
+
+       /* Now restore the host state just enough to run the handlers */
+
+       /* Switch EBASE to the one used by Linux */
+       /* load up the host EBASE */
+       uasm_i_mfc0(&p, V0, C0_STATUS);
+
+       uasm_i_lui(&p, AT, ST0_BEV >> 16);
+       uasm_i_or(&p, K0, V0, AT);
+
+       uasm_i_mtc0(&p, K0, C0_STATUS);
+       uasm_i_ehb(&p);
+
+       UASM_i_LA_mostly(&p, K0, (long)&ebase);
+       UASM_i_LW(&p, K0, uasm_rel_lo((long)&ebase), K0);
+       uasm_i_mtc0(&p, K0, C0_EBASE);
+
+       if (raw_cpu_has_fpu) {
+               /*
+                * If FPU is enabled, save FCR31 and clear it so that later
+                * ctc1's don't trigger FPE for pending exceptions.
+                */
+               uasm_i_lui(&p, AT, ST0_CU1 >> 16);
+               uasm_i_and(&p, V1, V0, AT);
+               uasm_il_beqz(&p, &r, V1, label_fpu_1);
+                uasm_i_nop(&p);
+               uasm_i_cfc1(&p, T0, 31);
+               uasm_i_sw(&p, T0, offsetof(struct kvm_vcpu_arch, fpu.fcr31),
+                         K1);
+               uasm_i_ctc1(&p, ZERO, 31);
+               uasm_l_fpu_1(&l, p);
+       }
+
+       if (cpu_has_msa) {
+               /*
+                * If MSA is enabled, save MSACSR and clear it so that later
+                * instructions don't trigger MSAFPE for pending exceptions.
+                */
+               uasm_i_mfc0(&p, T0, C0_CONFIG5);
+               uasm_i_ext(&p, T0, T0, 27, 1); /* MIPS_CONF5_MSAEN */
+               uasm_il_beqz(&p, &r, T0, label_msa_1);
+                uasm_i_nop(&p);
+               uasm_i_cfcmsa(&p, T0, MSA_CSR);
+               uasm_i_sw(&p, T0, offsetof(struct kvm_vcpu_arch, fpu.msacsr),
+                         K1);
+               uasm_i_ctcmsa(&p, MSA_CSR, ZERO);
+               uasm_l_msa_1(&l, p);
+       }
+
+       /* Now that the new EBASE has been loaded, unset BEV and KSU_USER */
+       uasm_i_addiu(&p, AT, ZERO, ~(ST0_EXL | KSU_USER | ST0_IE));
+       uasm_i_and(&p, V0, V0, AT);
+       uasm_i_lui(&p, AT, ST0_CU0 >> 16);
+       uasm_i_or(&p, V0, V0, AT);
+       uasm_i_mtc0(&p, V0, C0_STATUS);
+       uasm_i_ehb(&p);
+
+       /* Load up host GP */
+       UASM_i_LW(&p, GP, offsetof(struct kvm_vcpu_arch, host_gp), K1);
+
+       /* Need a stack before we can jump to "C" */
+       UASM_i_LW(&p, SP, offsetof(struct kvm_vcpu_arch, host_stack), K1);
+
+       /* Saved host state */
+       uasm_i_addiu(&p, SP, SP, -(int)sizeof(struct pt_regs));
+
+       /*
+        * XXXKYMA do we need to load the host ASID, maybe not because the
+        * kernel entries are marked GLOBAL, need to verify
+        */
+
+       /* Restore host scratch registers, as we'll have clobbered them */
+       kvm_mips_build_restore_scratch(&p, K0, SP);
+
+       /* Restore RDHWR access */
+       UASM_i_LA_mostly(&p, K0, (long)&hwrena);
+       uasm_i_lw(&p, K0, uasm_rel_lo((long)&hwrena), K0);
+       uasm_i_mtc0(&p, K0, C0_HWRENA);
+
+       /* Jump to handler */
+       /*
+        * XXXKYMA: not sure if this is safe, how large is the stack??
+        * Now jump to the kvm_mips_handle_exit() to see if we can deal
+        * with this in the kernel
+        */
+       UASM_i_LA(&p, T9, (unsigned long)kvm_mips_handle_exit);
+       uasm_i_jalr(&p, RA, T9);
+        uasm_i_addiu(&p, SP, SP, -CALLFRAME_SIZ);
+
+       uasm_resolve_relocs(relocs, labels);
+
+       p = kvm_mips_build_ret_from_exit(p);
+
+       return p;
+}
+
+/**
+ * kvm_mips_build_ret_from_exit() - Assemble guest exit return handler.
+ * @addr:      Address to start writing code.
+ *
+ * Assemble the code to handle the return from kvm_mips_handle_exit(), either
+ * resuming the guest or returning to the host depending on the return value.
+ *
+ * Returns:    Next address after end of written function.
+ */
+static void *kvm_mips_build_ret_from_exit(void *addr)
+{
+       u32 *p = addr;
+       struct uasm_label labels[2];
+       struct uasm_reloc relocs[2];
+       struct uasm_label *l = labels;
+       struct uasm_reloc *r = relocs;
+
+       memset(labels, 0, sizeof(labels));
+       memset(relocs, 0, sizeof(relocs));
+
+       /* Return from handler Make sure interrupts are disabled */
+       uasm_i_di(&p, ZERO);
+       uasm_i_ehb(&p);
+
+       /*
+        * XXXKYMA: k0/k1 could have been blown away if we processed
+        * an exception while we were handling the exception from the
+        * guest, reload k1
+        */
+
+       uasm_i_move(&p, K1, S1);
+       uasm_i_addiu(&p, K1, K1, offsetof(struct kvm_vcpu, arch));
+
+       /*
+        * Check return value, should tell us if we are returning to the
+        * host (handle I/O etc)or resuming the guest
+        */
+       uasm_i_andi(&p, T0, V0, RESUME_HOST);
+       uasm_il_bnez(&p, &r, T0, label_return_to_host);
+        uasm_i_nop(&p);
+
+       p = kvm_mips_build_ret_to_guest(p);
+
+       uasm_l_return_to_host(&l, p);
+       p = kvm_mips_build_ret_to_host(p);
+
+       uasm_resolve_relocs(relocs, labels);
+
+       return p;
+}
+
+/**
+ * kvm_mips_build_ret_to_guest() - Assemble code to return to the guest.
+ * @addr:      Address to start writing code.
+ *
+ * Assemble the code to handle return from the guest exit handler
+ * (kvm_mips_handle_exit()) back to the guest.
+ *
+ * Returns:    Next address after end of written function.
+ */
+static void *kvm_mips_build_ret_to_guest(void *addr)
+{
+       u32 *p = addr;
+
+       /* Put the saved pointer to vcpu (s1) back into the scratch register */
+       uasm_i_mtc0(&p, S1, scratch_vcpu[0], scratch_vcpu[1]);
+
+       /* Load up the Guest EBASE to minimize the window where BEV is set */
+       UASM_i_LW(&p, T0, offsetof(struct kvm_vcpu_arch, guest_ebase), K1);
+
+       /* Switch EBASE back to the one used by KVM */
+       uasm_i_mfc0(&p, V1, C0_STATUS);
+       uasm_i_lui(&p, AT, ST0_BEV >> 16);
+       uasm_i_or(&p, K0, V1, AT);
+       uasm_i_mtc0(&p, K0, C0_STATUS);
+       uasm_i_ehb(&p);
+       uasm_i_mtc0(&p, T0, C0_EBASE);
+
+       /* Setup status register for running guest in UM */
+       uasm_i_ori(&p, V1, V1, ST0_EXL | KSU_USER | ST0_IE);
+       UASM_i_LA(&p, AT, ~(ST0_CU0 | ST0_MX));
+       uasm_i_and(&p, V1, V1, AT);
+       uasm_i_mtc0(&p, V1, C0_STATUS);
+       uasm_i_ehb(&p);
+
+       p = kvm_mips_build_enter_guest(p);
+
+       return p;
+}
+
+/**
+ * kvm_mips_build_ret_to_host() - Assemble code to return to the host.
+ * @addr:      Address to start writing code.
+ *
+ * Assemble the code to handle return from the guest exit handler
+ * (kvm_mips_handle_exit()) back to the host, i.e. to the caller of the vcpu_run
+ * function generated by kvm_mips_build_vcpu_run().
+ *
+ * Returns:    Next address after end of written function.
+ */
+static void *kvm_mips_build_ret_to_host(void *addr)
+{
+       u32 *p = addr;
+       unsigned int i;
+
+       /* EBASE is already pointing to Linux */
+       UASM_i_LW(&p, K1, offsetof(struct kvm_vcpu_arch, host_stack), K1);
+       uasm_i_addiu(&p, K1, K1, -(int)sizeof(struct pt_regs));
+
+       /*
+        * r2/v0 is the return code, shift it down by 2 (arithmetic)
+        * to recover the err code
+        */
+       uasm_i_sra(&p, K0, V0, 2);
+       uasm_i_move(&p, V0, K0);
+
+       /* Load context saved on the host stack */
+       for (i = 16; i < 31; ++i) {
+               if (i == 24)
+                       i = 28;
+               UASM_i_LW(&p, i, offsetof(struct pt_regs, regs[i]), K1);
+       }
+
+       /* Restore RDHWR access */
+       UASM_i_LA_mostly(&p, K0, (long)&hwrena);
+       uasm_i_lw(&p, K0, uasm_rel_lo((long)&hwrena), K0);
+       uasm_i_mtc0(&p, K0, C0_HWRENA);
+
+       /* Restore RA, which is the address we will return to */
+       UASM_i_LW(&p, RA, offsetof(struct pt_regs, regs[RA]), K1);
+       uasm_i_jr(&p, RA);
+        uasm_i_nop(&p);
+
+       return p;
+}
+
index 531fbf5131c02b1771b1d52f4006e1082deb15e1..16f17c6390dd000248abcfde1c272dbd0ae679df 100644 (file)
 #include <asm/mipsregs.h>
 #include <asm/regdef.h>
 
+/* preprocessor replaces the fp in ".set fp=64" with $30 otherwise */
+#undef fp
+
        .set    noreorder
        .set    noat
 
 LEAF(__kvm_save_fpu)
        .set    push
-       .set    mips64r2
        SET_HARDFLOAT
+       .set    fp=64
        mfc0    t0, CP0_STATUS
        sll     t0, t0, 5                       # is Status.FR set?
        bgez    t0, 1f                          # no: skip odd doubles
@@ -63,8 +66,8 @@ LEAF(__kvm_save_fpu)
 
 LEAF(__kvm_restore_fpu)
        .set    push
-       .set    mips64r2
        SET_HARDFLOAT
+       .set    fp=64
        mfc0    t0, CP0_STATUS
        sll     t0, t0, 5                       # is Status.FR set?
        bgez    t0, 1f                          # no: skip odd doubles
index d661c100b2198e63ebf8aa5de139684ad57e9580..fb118a2c8379f8b97fceb68730f98f6bb6d6ba61 100644 (file)
 #define MIPS_EXC_MAX                12
 /* XXXSL More to follow */
 
-extern char __kvm_mips_vcpu_run_end[];
-extern char mips32_exception[], mips32_exceptionEnd[];
-extern char mips32_GuestException[], mips32_GuestExceptionEnd[];
-
 #define C_TI        (_ULCAST_(1) << 30)
 
 #define KVM_MIPS_IRQ_DELIVER_ALL_AT_ONCE (0)
diff --git a/arch/mips/kvm/locore.S b/arch/mips/kvm/locore.S
deleted file mode 100644 (file)
index 698286c..0000000
+++ /dev/null
@@ -1,602 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Main entry point for the guest, exception handling.
- *
- * Copyright (C) 2012  MIPS Technologies, Inc.  All rights reserved.
- * Authors: Sanjay Lal <sanjayl@kymasys.com>
- */
-
-#include <asm/asm.h>
-#include <asm/asmmacro.h>
-#include <asm/regdef.h>
-#include <asm/mipsregs.h>
-#include <asm/stackframe.h>
-#include <asm/asm-offsets.h>
-
-#define _C_LABEL(x)     x
-#define MIPSX(name)     mips32_ ## name
-#define CALLFRAME_SIZ   32
-
-/*
- * VECTOR
- *  exception vector entrypoint
- */
-#define VECTOR(x, regmask)      \
-    .ent    _C_LABEL(x),0;      \
-    EXPORT(x);
-
-#define VECTOR_END(x)      \
-    EXPORT(x);
-
-/* Overload, Danger Will Robinson!! */
-#define PT_HOST_USERLOCAL   PT_EPC
-
-#define CP0_DDATA_LO        $28,3
-
-/* Resume Flags */
-#define RESUME_FLAG_HOST        (1<<1)  /* Resume host? */
-
-#define RESUME_GUEST            0
-#define RESUME_HOST             RESUME_FLAG_HOST
-
-/*
- * __kvm_mips_vcpu_run: entry point to the guest
- * a0: run
- * a1: vcpu
- */
-       .set    noreorder
-
-FEXPORT(__kvm_mips_vcpu_run)
-       /* k0/k1 not being used in host kernel context */
-       INT_ADDIU k1, sp, -PT_SIZE
-       LONG_S  $16, PT_R16(k1)
-       LONG_S  $17, PT_R17(k1)
-       LONG_S  $18, PT_R18(k1)
-       LONG_S  $19, PT_R19(k1)
-       LONG_S  $20, PT_R20(k1)
-       LONG_S  $21, PT_R21(k1)
-       LONG_S  $22, PT_R22(k1)
-       LONG_S  $23, PT_R23(k1)
-
-       LONG_S  $28, PT_R28(k1)
-       LONG_S  $29, PT_R29(k1)
-       LONG_S  $30, PT_R30(k1)
-       LONG_S  $31, PT_R31(k1)
-
-       /* Save hi/lo */
-       mflo    v0
-       LONG_S  v0, PT_LO(k1)
-       mfhi    v1
-       LONG_S  v1, PT_HI(k1)
-
-       /* Save host status */
-       mfc0    v0, CP0_STATUS
-       LONG_S  v0, PT_STATUS(k1)
-
-       /* Save DDATA_LO, will be used to store pointer to vcpu */
-       mfc0    v1, CP0_DDATA_LO
-       LONG_S  v1, PT_HOST_USERLOCAL(k1)
-
-       /* DDATA_LO has pointer to vcpu */
-       mtc0    a1, CP0_DDATA_LO
-
-       /* Offset into vcpu->arch */
-       INT_ADDIU k1, a1, VCPU_HOST_ARCH
-
-       /*
-        * Save the host stack to VCPU, used for exception processing
-        * when we exit from the Guest
-        */
-       LONG_S  sp, VCPU_HOST_STACK(k1)
-
-       /* Save the kernel gp as well */
-       LONG_S  gp, VCPU_HOST_GP(k1)
-
-       /*
-        * Setup status register for running the guest in UM, interrupts
-        * are disabled
-        */
-       li      k0, (ST0_EXL | KSU_USER | ST0_BEV)
-       mtc0    k0, CP0_STATUS
-       ehb
-
-       /* load up the new EBASE */
-       LONG_L  k0, VCPU_GUEST_EBASE(k1)
-       mtc0    k0, CP0_EBASE
-
-       /*
-        * Now that the new EBASE has been loaded, unset BEV, set
-        * interrupt mask as it was but make sure that timer interrupts
-        * are enabled
-        */
-       li      k0, (ST0_EXL | KSU_USER | ST0_IE)
-       andi    v0, v0, ST0_IM
-       or      k0, k0, v0
-       mtc0    k0, CP0_STATUS
-       ehb
-
-       /* Set Guest EPC */
-       LONG_L  t0, VCPU_PC(k1)
-       mtc0    t0, CP0_EPC
-
-FEXPORT(__kvm_mips_load_asid)
-       /* Set the ASID for the Guest Kernel */
-       PTR_L   t0, VCPU_COP0(k1)
-       LONG_L  t0, COP0_STATUS(t0)
-       andi    t0, KSU_USER | ST0_ERL | ST0_EXL
-       xori    t0, KSU_USER
-       bnez    t0, 1f          /* If kernel */
-        INT_ADDIU t1, k1, VCPU_GUEST_KERNEL_ASID  /* (BD)  */
-       INT_ADDIU t1, k1, VCPU_GUEST_USER_ASID    /* else user */
-1:
-       /* t1: contains the base of the ASID array, need to get the cpu id */
-       LONG_L  t2, TI_CPU($28)             /* smp_processor_id */
-       INT_SLL t2, t2, 2                   /* x4 */
-       REG_ADDU t3, t1, t2
-       LONG_L  k0, (t3)
-#ifdef CONFIG_MIPS_ASID_BITS_VARIABLE
-       li      t3, CPUINFO_SIZE/4
-       mul     t2, t2, t3              /* x sizeof(struct cpuinfo_mips)/4 */
-       LONG_L  t2, (cpu_data + CPUINFO_ASID_MASK)(t2)
-       and     k0, k0, t2
-#else
-       andi    k0, k0, MIPS_ENTRYHI_ASID
-#endif
-       mtc0    k0, CP0_ENTRYHI
-       ehb
-
-       /* Disable RDHWR access */
-       mtc0    zero, CP0_HWRENA
-
-       .set    noat
-       /* Now load up the Guest Context from VCPU */
-       LONG_L  $1, VCPU_R1(k1)
-       LONG_L  $2, VCPU_R2(k1)
-       LONG_L  $3, VCPU_R3(k1)
-
-       LONG_L  $4, VCPU_R4(k1)
-       LONG_L  $5, VCPU_R5(k1)
-       LONG_L  $6, VCPU_R6(k1)
-       LONG_L  $7, VCPU_R7(k1)
-
-       LONG_L  $8, VCPU_R8(k1)
-       LONG_L  $9, VCPU_R9(k1)
-       LONG_L  $10, VCPU_R10(k1)
-       LONG_L  $11, VCPU_R11(k1)
-       LONG_L  $12, VCPU_R12(k1)
-       LONG_L  $13, VCPU_R13(k1)
-       LONG_L  $14, VCPU_R14(k1)
-       LONG_L  $15, VCPU_R15(k1)
-       LONG_L  $16, VCPU_R16(k1)
-       LONG_L  $17, VCPU_R17(k1)
-       LONG_L  $18, VCPU_R18(k1)
-       LONG_L  $19, VCPU_R19(k1)
-       LONG_L  $20, VCPU_R20(k1)
-       LONG_L  $21, VCPU_R21(k1)
-       LONG_L  $22, VCPU_R22(k1)
-       LONG_L  $23, VCPU_R23(k1)
-       LONG_L  $24, VCPU_R24(k1)
-       LONG_L  $25, VCPU_R25(k1)
-
-       /* k0/k1 loaded up later */
-
-       LONG_L  $28, VCPU_R28(k1)
-       LONG_L  $29, VCPU_R29(k1)
-       LONG_L  $30, VCPU_R30(k1)
-       LONG_L  $31, VCPU_R31(k1)
-
-       /* Restore hi/lo */
-       LONG_L  k0, VCPU_LO(k1)
-       mtlo    k0
-
-       LONG_L  k0, VCPU_HI(k1)
-       mthi    k0
-
-FEXPORT(__kvm_mips_load_k0k1)
-       /* Restore the guest's k0/k1 registers */
-       LONG_L  k0, VCPU_R26(k1)
-       LONG_L  k1, VCPU_R27(k1)
-
-       /* Jump to guest */
-       eret
-EXPORT(__kvm_mips_vcpu_run_end)
-
-VECTOR(MIPSX(exception), unknown)
-/* Find out what mode we came from and jump to the proper handler. */
-       mtc0    k0, CP0_ERROREPC        #01: Save guest k0
-       ehb                             #02:
-
-       mfc0    k0, CP0_EBASE           #02: Get EBASE
-       INT_SRL k0, k0, 10              #03: Get rid of CPUNum
-       INT_SLL k0, k0, 10              #04
-       LONG_S  k1, 0x3000(k0)          #05: Save k1 @ offset 0x3000
-       INT_ADDIU k0, k0, 0x2000        #06: Exception handler is
-                                       #    installed @ offset 0x2000
-       j       k0                      #07: jump to the function
-        nop                            #08: branch delay slot
-VECTOR_END(MIPSX(exceptionEnd))
-.end MIPSX(exception)
-
-/*
- * Generic Guest exception handler. We end up here when the guest
- * does something that causes a trap to kernel mode.
- */
-NESTED (MIPSX(GuestException), CALLFRAME_SIZ, ra)
-       /* Get the VCPU pointer from DDTATA_LO */
-       mfc0    k1, CP0_DDATA_LO
-       INT_ADDIU k1, k1, VCPU_HOST_ARCH
-
-       /* Start saving Guest context to VCPU */
-       LONG_S  $0, VCPU_R0(k1)
-       LONG_S  $1, VCPU_R1(k1)
-       LONG_S  $2, VCPU_R2(k1)
-       LONG_S  $3, VCPU_R3(k1)
-       LONG_S  $4, VCPU_R4(k1)
-       LONG_S  $5, VCPU_R5(k1)
-       LONG_S  $6, VCPU_R6(k1)
-       LONG_S  $7, VCPU_R7(k1)
-       LONG_S  $8, VCPU_R8(k1)
-       LONG_S  $9, VCPU_R9(k1)
-       LONG_S  $10, VCPU_R10(k1)
-       LONG_S  $11, VCPU_R11(k1)
-       LONG_S  $12, VCPU_R12(k1)
-       LONG_S  $13, VCPU_R13(k1)
-       LONG_S  $14, VCPU_R14(k1)
-       LONG_S  $15, VCPU_R15(k1)
-       LONG_S  $16, VCPU_R16(k1)
-       LONG_S  $17, VCPU_R17(k1)
-       LONG_S  $18, VCPU_R18(k1)
-       LONG_S  $19, VCPU_R19(k1)
-       LONG_S  $20, VCPU_R20(k1)
-       LONG_S  $21, VCPU_R21(k1)
-       LONG_S  $22, VCPU_R22(k1)
-       LONG_S  $23, VCPU_R23(k1)
-       LONG_S  $24, VCPU_R24(k1)
-       LONG_S  $25, VCPU_R25(k1)
-
-       /* Guest k0/k1 saved later */
-
-       LONG_S  $28, VCPU_R28(k1)
-       LONG_S  $29, VCPU_R29(k1)
-       LONG_S  $30, VCPU_R30(k1)
-       LONG_S  $31, VCPU_R31(k1)
-
-       .set at
-
-       /* We need to save hi/lo and restore them on the way out */
-       mfhi    t0
-       LONG_S  t0, VCPU_HI(k1)
-
-       mflo    t0
-       LONG_S  t0, VCPU_LO(k1)
-
-       /* Finally save guest k0/k1 to VCPU */
-       mfc0    t0, CP0_ERROREPC
-       LONG_S  t0, VCPU_R26(k1)
-
-       /* Get GUEST k1 and save it in VCPU */
-       PTR_LI  t1, ~0x2ff
-       mfc0    t0, CP0_EBASE
-       and     t0, t0, t1
-       LONG_L  t0, 0x3000(t0)
-       LONG_S  t0, VCPU_R27(k1)
-
-       /* Now that context has been saved, we can use other registers */
-
-       /* Restore vcpu */
-       mfc0    a1, CP0_DDATA_LO
-       move    s1, a1
-
-       /* Restore run (vcpu->run) */
-       LONG_L  a0, VCPU_RUN(a1)
-       /* Save pointer to run in s0, will be saved by the compiler */
-       move    s0, a0
-
-       /*
-        * Save Host level EPC, BadVaddr and Cause to VCPU, useful to
-        * process the exception
-        */
-       mfc0    k0,CP0_EPC
-       LONG_S  k0, VCPU_PC(k1)
-
-       mfc0    k0, CP0_BADVADDR
-       LONG_S  k0, VCPU_HOST_CP0_BADVADDR(k1)
-
-       mfc0    k0, CP0_CAUSE
-       sw      k0, VCPU_HOST_CP0_CAUSE(k1)
-
-       /* Now restore the host state just enough to run the handlers */
-
-       /* Switch EBASE to the one used by Linux */
-       /* load up the host EBASE */
-       mfc0    v0, CP0_STATUS
-
-       or      k0, v0, ST0_BEV
-
-       mtc0    k0, CP0_STATUS
-       ehb
-
-       LONG_L  k0, ebase
-       mtc0    k0,CP0_EBASE
-
-       /*
-        * If FPU is enabled, save FCR31 and clear it so that later ctc1's don't
-        * trigger FPE for pending exceptions.
-        */
-       and     v1, v0, ST0_CU1
-       beqz    v1, 1f
-        nop
-       .set    push
-       SET_HARDFLOAT
-       cfc1    t0, fcr31
-       sw      t0, VCPU_FCR31(k1)
-       ctc1    zero,fcr31
-       .set    pop
-1:
-
-#ifdef CONFIG_CPU_HAS_MSA
-       /*
-        * If MSA is enabled, save MSACSR and clear it so that later
-        * instructions don't trigger MSAFPE for pending exceptions.
-        */
-       mfc0    t0, CP0_CONFIG3
-       ext     t0, t0, 28, 1 /* MIPS_CONF3_MSAP */
-       beqz    t0, 1f
-        nop
-       mfc0    t0, CP0_CONFIG5
-       ext     t0, t0, 27, 1 /* MIPS_CONF5_MSAEN */
-       beqz    t0, 1f
-        nop
-       _cfcmsa t0, MSA_CSR
-       sw      t0, VCPU_MSA_CSR(k1)
-       _ctcmsa MSA_CSR, zero
-1:
-#endif
-
-       /* Now that the new EBASE has been loaded, unset BEV and KSU_USER */
-       and     v0, v0, ~(ST0_EXL | KSU_USER | ST0_IE)
-       or      v0, v0, ST0_CU0
-       mtc0    v0, CP0_STATUS
-       ehb
-
-       /* Load up host GP */
-       LONG_L  gp, VCPU_HOST_GP(k1)
-
-       /* Need a stack before we can jump to "C" */
-       LONG_L  sp, VCPU_HOST_STACK(k1)
-
-       /* Saved host state */
-       INT_ADDIU sp, sp, -PT_SIZE
-
-       /*
-        * XXXKYMA do we need to load the host ASID, maybe not because the
-        * kernel entries are marked GLOBAL, need to verify
-        */
-
-       /* Restore host DDATA_LO */
-       LONG_L  k0, PT_HOST_USERLOCAL(sp)
-       mtc0    k0, CP0_DDATA_LO
-
-       /* Restore RDHWR access */
-       INT_L   k0, hwrena
-       mtc0    k0, CP0_HWRENA
-
-       /* Jump to handler */
-FEXPORT(__kvm_mips_jump_to_handler)
-       /*
-        * XXXKYMA: not sure if this is safe, how large is the stack??
-        * Now jump to the kvm_mips_handle_exit() to see if we can deal
-        * with this in the kernel
-        */
-       PTR_LA  t9, kvm_mips_handle_exit
-       jalr.hb t9
-        INT_ADDIU sp, sp, -CALLFRAME_SIZ           /* BD Slot */
-
-       /* Return from handler Make sure interrupts are disabled */
-       di
-       ehb
-
-       /*
-        * XXXKYMA: k0/k1 could have been blown away if we processed
-        * an exception while we were handling the exception from the
-        * guest, reload k1
-        */
-
-       move    k1, s1
-       INT_ADDIU k1, k1, VCPU_HOST_ARCH
-
-       /*
-        * Check return value, should tell us if we are returning to the
-        * host (handle I/O etc)or resuming the guest
-        */
-       andi    t0, v0, RESUME_HOST
-       bnez    t0, __kvm_mips_return_to_host
-        nop
-
-__kvm_mips_return_to_guest:
-       /* Put the saved pointer to vcpu (s1) back into the DDATA_LO Register */
-       mtc0    s1, CP0_DDATA_LO
-
-       /* Load up the Guest EBASE to minimize the window where BEV is set */
-       LONG_L  t0, VCPU_GUEST_EBASE(k1)
-
-       /* Switch EBASE back to the one used by KVM */
-       mfc0    v1, CP0_STATUS
-       or      k0, v1, ST0_BEV
-       mtc0    k0, CP0_STATUS
-       ehb
-       mtc0    t0, CP0_EBASE
-
-       /* Setup status register for running guest in UM */
-       or      v1, v1, (ST0_EXL | KSU_USER | ST0_IE)
-       and     v1, v1, ~(ST0_CU0 | ST0_MX)
-       mtc0    v1, CP0_STATUS
-       ehb
-
-       /* Set Guest EPC */
-       LONG_L  t0, VCPU_PC(k1)
-       mtc0    t0, CP0_EPC
-
-       /* Set the ASID for the Guest Kernel */
-       PTR_L   t0, VCPU_COP0(k1)
-       LONG_L  t0, COP0_STATUS(t0)
-       andi    t0, KSU_USER | ST0_ERL | ST0_EXL
-       xori    t0, KSU_USER
-       bnez    t0, 1f          /* If kernel */
-        INT_ADDIU t1, k1, VCPU_GUEST_KERNEL_ASID  /* (BD)  */
-       INT_ADDIU t1, k1, VCPU_GUEST_USER_ASID    /* else user */
-1:
-       /* t1: contains the base of the ASID array, need to get the cpu id  */
-       LONG_L  t2, TI_CPU($28)         /* smp_processor_id */
-       INT_SLL t2, t2, 2               /* x4 */
-       REG_ADDU t3, t1, t2
-       LONG_L  k0, (t3)
-#ifdef CONFIG_MIPS_ASID_BITS_VARIABLE
-       li      t3, CPUINFO_SIZE/4
-       mul     t2, t2, t3              /* x sizeof(struct cpuinfo_mips)/4 */
-       LONG_L  t2, (cpu_data + CPUINFO_ASID_MASK)(t2)
-       and     k0, k0, t2
-#else
-       andi    k0, k0, MIPS_ENTRYHI_ASID
-#endif
-       mtc0    k0, CP0_ENTRYHI
-       ehb
-
-       /* Disable RDHWR access */
-       mtc0    zero, CP0_HWRENA
-
-       .set    noat
-       /* load the guest context from VCPU and return */
-       LONG_L  $0, VCPU_R0(k1)
-       LONG_L  $1, VCPU_R1(k1)
-       LONG_L  $2, VCPU_R2(k1)
-       LONG_L  $3, VCPU_R3(k1)
-       LONG_L  $4, VCPU_R4(k1)
-       LONG_L  $5, VCPU_R5(k1)
-       LONG_L  $6, VCPU_R6(k1)
-       LONG_L  $7, VCPU_R7(k1)
-       LONG_L  $8, VCPU_R8(k1)
-       LONG_L  $9, VCPU_R9(k1)
-       LONG_L  $10, VCPU_R10(k1)
-       LONG_L  $11, VCPU_R11(k1)
-       LONG_L  $12, VCPU_R12(k1)
-       LONG_L  $13, VCPU_R13(k1)
-       LONG_L  $14, VCPU_R14(k1)
-       LONG_L  $15, VCPU_R15(k1)
-       LONG_L  $16, VCPU_R16(k1)
-       LONG_L  $17, VCPU_R17(k1)
-       LONG_L  $18, VCPU_R18(k1)
-       LONG_L  $19, VCPU_R19(k1)
-       LONG_L  $20, VCPU_R20(k1)
-       LONG_L  $21, VCPU_R21(k1)
-       LONG_L  $22, VCPU_R22(k1)
-       LONG_L  $23, VCPU_R23(k1)
-       LONG_L  $24, VCPU_R24(k1)
-       LONG_L  $25, VCPU_R25(k1)
-
-       /* $/k1 loaded later */
-       LONG_L  $28, VCPU_R28(k1)
-       LONG_L  $29, VCPU_R29(k1)
-       LONG_L  $30, VCPU_R30(k1)
-       LONG_L  $31, VCPU_R31(k1)
-
-FEXPORT(__kvm_mips_skip_guest_restore)
-       LONG_L  k0, VCPU_HI(k1)
-       mthi    k0
-
-       LONG_L  k0, VCPU_LO(k1)
-       mtlo    k0
-
-       LONG_L  k0, VCPU_R26(k1)
-       LONG_L  k1, VCPU_R27(k1)
-
-       eret
-       .set    at
-
-__kvm_mips_return_to_host:
-       /* EBASE is already pointing to Linux */
-       LONG_L  k1, VCPU_HOST_STACK(k1)
-       INT_ADDIU k1,k1, -PT_SIZE
-
-       /* Restore host DDATA_LO */
-       LONG_L  k0, PT_HOST_USERLOCAL(k1)
-       mtc0    k0, CP0_DDATA_LO
-
-       /*
-        * r2/v0 is the return code, shift it down by 2 (arithmetic)
-        * to recover the err code
-        */
-       INT_SRA k0, v0, 2
-       move    $2, k0
-
-       /* Load context saved on the host stack */
-       LONG_L  $16, PT_R16(k1)
-       LONG_L  $17, PT_R17(k1)
-       LONG_L  $18, PT_R18(k1)
-       LONG_L  $19, PT_R19(k1)
-       LONG_L  $20, PT_R20(k1)
-       LONG_L  $21, PT_R21(k1)
-       LONG_L  $22, PT_R22(k1)
-       LONG_L  $23, PT_R23(k1)
-
-       LONG_L  $28, PT_R28(k1)
-       LONG_L  $29, PT_R29(k1)
-       LONG_L  $30, PT_R30(k1)
-
-       LONG_L  k0, PT_HI(k1)
-       mthi    k0
-
-       LONG_L  k0, PT_LO(k1)
-       mtlo    k0
-
-       /* Restore RDHWR access */
-       INT_L   k0, hwrena
-       mtc0    k0, CP0_HWRENA
-
-       /* Restore RA, which is the address we will return to */
-       LONG_L  ra, PT_R31(k1)
-       j       ra
-        nop
-
-VECTOR_END(MIPSX(GuestExceptionEnd))
-.end MIPSX(GuestException)
-
-MIPSX(exceptions):
-       ####
-       ##### The exception handlers.
-       #####
-       .word _C_LABEL(MIPSX(GuestException))   #  0
-       .word _C_LABEL(MIPSX(GuestException))   #  1
-       .word _C_LABEL(MIPSX(GuestException))   #  2
-       .word _C_LABEL(MIPSX(GuestException))   #  3
-       .word _C_LABEL(MIPSX(GuestException))   #  4
-       .word _C_LABEL(MIPSX(GuestException))   #  5
-       .word _C_LABEL(MIPSX(GuestException))   #  6
-       .word _C_LABEL(MIPSX(GuestException))   #  7
-       .word _C_LABEL(MIPSX(GuestException))   #  8
-       .word _C_LABEL(MIPSX(GuestException))   #  9
-       .word _C_LABEL(MIPSX(GuestException))   # 10
-       .word _C_LABEL(MIPSX(GuestException))   # 11
-       .word _C_LABEL(MIPSX(GuestException))   # 12
-       .word _C_LABEL(MIPSX(GuestException))   # 13
-       .word _C_LABEL(MIPSX(GuestException))   # 14
-       .word _C_LABEL(MIPSX(GuestException))   # 15
-       .word _C_LABEL(MIPSX(GuestException))   # 16
-       .word _C_LABEL(MIPSX(GuestException))   # 17
-       .word _C_LABEL(MIPSX(GuestException))   # 18
-       .word _C_LABEL(MIPSX(GuestException))   # 19
-       .word _C_LABEL(MIPSX(GuestException))   # 20
-       .word _C_LABEL(MIPSX(GuestException))   # 21
-       .word _C_LABEL(MIPSX(GuestException))   # 22
-       .word _C_LABEL(MIPSX(GuestException))   # 23
-       .word _C_LABEL(MIPSX(GuestException))   # 24
-       .word _C_LABEL(MIPSX(GuestException))   # 25
-       .word _C_LABEL(MIPSX(GuestException))   # 26
-       .word _C_LABEL(MIPSX(GuestException))   # 27
-       .word _C_LABEL(MIPSX(GuestException))   # 28
-       .word _C_LABEL(MIPSX(GuestException))   # 29
-       .word _C_LABEL(MIPSX(GuestException))   # 30
-       .word _C_LABEL(MIPSX(GuestException))   # 31
index 5a2b9034a05ce1fa7f4772e9c58a097c315bcca2..414b00074e296a92d0f5377be5936032eac0ef25 100644 (file)
@@ -245,10 +245,27 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
        }
 }
 
+static inline void dump_handler(const char *symbol, void *start, void *end)
+{
+       u32 *p;
+
+       pr_debug("LEAF(%s)\n", symbol);
+
+       pr_debug("\t.set push\n");
+       pr_debug("\t.set noreorder\n");
+
+       for (p = start; p < (u32 *)end; ++p)
+               pr_debug("\t.word\t0x%08x\t\t# %p\n", *p, p);
+
+       pr_debug("\t.set\tpop\n");
+
+       pr_debug("\tEND(%s)\n", symbol);
+}
+
 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
 {
-       int err, size, offset;
-       void *gebase;
+       int err, size;
+       void *gebase, *p, *handler;
        int i;
 
        struct kvm_vcpu *vcpu = kzalloc(sizeof(struct kvm_vcpu), GFP_KERNEL);
@@ -286,41 +303,38 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
        /* Save new ebase */
        vcpu->arch.guest_ebase = gebase;
 
-       /* Copy L1 Guest Exception handler to correct offset */
+       /* Build guest exception vectors dynamically in unmapped memory */
+       handler = gebase + 0x2000;
 
        /* TLB Refill, EXL = 0 */
-       memcpy(gebase, mips32_exception,
-              mips32_exceptionEnd - mips32_exception);
+       kvm_mips_build_exception(gebase, handler);
 
        /* General Exception Entry point */
-       memcpy(gebase + 0x180, mips32_exception,
-              mips32_exceptionEnd - mips32_exception);
+       kvm_mips_build_exception(gebase + 0x180, handler);
 
        /* For vectored interrupts poke the exception code @ all offsets 0-7 */
        for (i = 0; i < 8; i++) {
                kvm_debug("L1 Vectored handler @ %p\n",
                          gebase + 0x200 + (i * VECTORSPACING));
-               memcpy(gebase + 0x200 + (i * VECTORSPACING), mips32_exception,
-                      mips32_exceptionEnd - mips32_exception);
+               kvm_mips_build_exception(gebase + 0x200 + i * VECTORSPACING,
+                                        handler);
        }
 
-       /* General handler, relocate to unmapped space for sanity's sake */
-       offset = 0x2000;
-       kvm_debug("Installing KVM Exception handlers @ %p, %#x bytes\n",
-                 gebase + offset,
-                 mips32_GuestExceptionEnd - mips32_GuestException);
+       /* General exit handler */
+       p = handler;
+       p = kvm_mips_build_exit(p);
 
-       memcpy(gebase + offset, mips32_GuestException,
-              mips32_GuestExceptionEnd - mips32_GuestException);
+       /* Guest entry routine */
+       vcpu->arch.vcpu_run = p;
+       p = kvm_mips_build_vcpu_run(p);
 
-#ifdef MODULE
-       offset += mips32_GuestExceptionEnd - mips32_GuestException;
-       memcpy(gebase + offset, (char *)__kvm_mips_vcpu_run,
-              __kvm_mips_vcpu_run_end - (char *)__kvm_mips_vcpu_run);
-       vcpu->arch.vcpu_run = gebase + offset;
-#else
-       vcpu->arch.vcpu_run = __kvm_mips_vcpu_run;
-#endif
+       /* Dump the generated code */
+       pr_debug("#include <asm/asm.h>\n");
+       pr_debug("#include <asm/regdef.h>\n");
+       pr_debug("\n");
+       dump_handler("kvm_vcpu_run", vcpu->arch.vcpu_run, p);
+       dump_handler("kvm_gen_exc", gebase + 0x180, gebase + 0x200);
+       dump_handler("kvm_exit", gebase + 0x2000, vcpu->arch.vcpu_run);
 
        /* Invalidate the icache for these ranges */
        local_flush_icache_range((unsigned long)gebase,
@@ -406,7 +420,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
        kvm_mips_deliver_interrupts(vcpu,
                                    kvm_read_c0_guest_cause(vcpu->arch.cop0));
 
-       __kvm_guest_enter();
+       guest_enter_irqoff();
 
        /* Disable hardware page table walking while in guest */
        htw_stop();
@@ -418,7 +432,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
        /* Re-enable HTW before enabling interrupts */
        htw_start();
 
-       __kvm_guest_exit();
+       guest_exit_irqoff();
        local_irq_enable();
 
        if (vcpu->sigset_active)
@@ -507,8 +521,10 @@ static u64 kvm_mips_get_one_regs[] = {
        KVM_REG_MIPS_R30,
        KVM_REG_MIPS_R31,
 
+#ifndef CONFIG_CPU_MIPSR6
        KVM_REG_MIPS_HI,
        KVM_REG_MIPS_LO,
+#endif
        KVM_REG_MIPS_PC,
 
        KVM_REG_MIPS_CP0_INDEX,
@@ -652,12 +668,14 @@ static int kvm_mips_get_reg(struct kvm_vcpu *vcpu,
        case KVM_REG_MIPS_R0 ... KVM_REG_MIPS_R31:
                v = (long)vcpu->arch.gprs[reg->id - KVM_REG_MIPS_R0];
                break;
+#ifndef CONFIG_CPU_MIPSR6
        case KVM_REG_MIPS_HI:
                v = (long)vcpu->arch.hi;
                break;
        case KVM_REG_MIPS_LO:
                v = (long)vcpu->arch.lo;
                break;
+#endif
        case KVM_REG_MIPS_PC:
                v = (long)vcpu->arch.pc;
                break;
@@ -873,12 +891,14 @@ static int kvm_mips_set_reg(struct kvm_vcpu *vcpu,
        case KVM_REG_MIPS_R1 ... KVM_REG_MIPS_R31:
                vcpu->arch.gprs[reg->id - KVM_REG_MIPS_R0] = v;
                break;
+#ifndef CONFIG_CPU_MIPSR6
        case KVM_REG_MIPS_HI:
                vcpu->arch.hi = v;
                break;
        case KVM_REG_MIPS_LO:
                vcpu->arch.lo = v;
                break;
+#endif
        case KVM_REG_MIPS_PC:
                vcpu->arch.pc = v;
                break;
@@ -1763,6 +1783,10 @@ static int __init kvm_mips_init(void)
 {
        int ret;
 
+       ret = kvm_mips_entry_setup();
+       if (ret)
+               return ret;
+
        ret = kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
 
        if (ret)
index a38bdab685745edbaaa400b2c93eff66d6b7f310..c858cf168078401931d762542928857414d8b200 100644 (file)
 /*
  * Tracepoints for VM enters
  */
-TRACE_EVENT(kvm_enter,
-           TP_PROTO(struct kvm_vcpu *vcpu),
-           TP_ARGS(vcpu),
-           TP_STRUCT__entry(
-                       __field(unsigned long, pc)
-           ),
-
-           TP_fast_assign(
-                       __entry->pc = vcpu->arch.pc;
-           ),
-
-           TP_printk("PC: 0x%08lx",
-                     __entry->pc)
-);
-
-TRACE_EVENT(kvm_reenter,
-           TP_PROTO(struct kvm_vcpu *vcpu),
-           TP_ARGS(vcpu),
-           TP_STRUCT__entry(
-                       __field(unsigned long, pc)
-           ),
-
-           TP_fast_assign(
-                       __entry->pc = vcpu->arch.pc;
-           ),
-
-           TP_printk("PC: 0x%08lx",
-                     __entry->pc)
+DECLARE_EVENT_CLASS(kvm_transition,
+       TP_PROTO(struct kvm_vcpu *vcpu),
+       TP_ARGS(vcpu),
+       TP_STRUCT__entry(
+               __field(unsigned long, pc)
+       ),
+
+       TP_fast_assign(
+               __entry->pc = vcpu->arch.pc;
+       ),
+
+       TP_printk("PC: 0x%08lx",
+                 __entry->pc)
 );
 
-TRACE_EVENT(kvm_out,
-           TP_PROTO(struct kvm_vcpu *vcpu),
-           TP_ARGS(vcpu),
-           TP_STRUCT__entry(
-                       __field(unsigned long, pc)
-           ),
+DEFINE_EVENT(kvm_transition, kvm_enter,
+            TP_PROTO(struct kvm_vcpu *vcpu),
+            TP_ARGS(vcpu));
 
-           TP_fast_assign(
-                       __entry->pc = vcpu->arch.pc;
-           ),
+DEFINE_EVENT(kvm_transition, kvm_reenter,
+            TP_PROTO(struct kvm_vcpu *vcpu),
+            TP_ARGS(vcpu));
 
-           TP_printk("PC: 0x%08lx",
-                     __entry->pc)
-);
+DEFINE_EVENT(kvm_transition, kvm_out,
+            TP_PROTO(struct kvm_vcpu *vcpu),
+            TP_ARGS(vcpu));
 
 /* The first 32 exit reasons correspond to Cause.ExcCode */
 #define KVM_TRACE_EXIT_INT              0
index 00e8dc3d36cb1c99013d31905db0d2ad18b82864..091553942bcbca51941fa768b67bdcc3f5dfecdc 100644 (file)
@@ -431,9 +431,15 @@ static int kvm_trap_emul_vcpu_setup(struct kvm_vcpu *vcpu)
 
        /*
         * Arch specific stuff, set up config registers properly so that the
-        * guest will come up as expected, for now we simulate a MIPS 24kc
+        * guest will come up as expected
         */
+#ifndef CONFIG_CPU_MIPSR6
+       /* r2-r5, simulate a MIPS 24kc */
        kvm_write_c0_guest_prid(cop0, 0x00019300);
+#else
+       /* r6+, simulate a generic QEMU machine */
+       kvm_write_c0_guest_prid(cop0, 0x00010000);
+#endif
        /*
         * Have config1, Cacheable, noncoherent, write-back, write allocate.
         * Endianness, arch revision & virtually tagged icache should match
index d96e912b9d44fa7a077500e9ba884deb8caf3cbd..6dc07fba187fa8958e711ddf55f82ec5b32b077c 100644 (file)
@@ -627,8 +627,8 @@ static int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn,
                                dec_insn.pc_inc +
                                dec_insn.next_pc_inc;
                return 1;
-       case cbcond0_op:
-       case cbcond1_op:
+       case pop10_op:
+       case pop30_op:
                if (!cpu_has_mips_r6)
                        break;
                if (insn.i_format.rt && !insn.i_format.rs)
@@ -683,14 +683,14 @@ static int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn,
                        dec_insn.next_pc_inc;
 
                return 1;
-       case beqzcjic_op:
+       case pop66_op:
                if (!cpu_has_mips_r6)
                        break;
                *contpc = regs->cp0_epc + dec_insn.pc_inc +
                        dec_insn.next_pc_inc;
 
                return 1;
-       case bnezcjialc_op:
+       case pop76_op:
                if (!cpu_has_mips_r6)
                        break;
                if (!insn.i_format.rs)
index d78178daea4bc2c1e069d2069db976b1ce693740..277cf52d80e1895c142970fb43c6a5c49044955a 100644 (file)
@@ -53,8 +53,13 @@ static struct insn insn_table_MM[] = {
        { insn_bltzl, 0, 0 },
        { insn_bne, M(mm_bne32_op, 0, 0, 0, 0, 0), RT | RS | BIMM },
        { insn_cache, M(mm_pool32b_op, 0, 0, mm_cache_func, 0, 0), RT | RS | SIMM },
+       { insn_cfc1, M(mm_pool32f_op, 0, 0, 0, mm_cfc1_op, mm_32f_73_op), RT | RS },
+       { insn_cfcmsa, M(mm_pool32s_op, 0, msa_cfc_op, 0, 0, mm_32s_elm_op), RD | RE },
+       { insn_ctc1, M(mm_pool32f_op, 0, 0, 0, mm_ctc1_op, mm_32f_73_op), RT | RS },
+       { insn_ctcmsa, M(mm_pool32s_op, 0, msa_ctc_op, 0, 0, mm_32s_elm_op), RD | RE },
        { insn_daddu, 0, 0 },
        { insn_daddiu, 0, 0 },
+       { insn_di, M(mm_pool32a_op, 0, 0, 0, mm_di_op, mm_pool32axf_op), RS },
        { insn_divu, M(mm_pool32a_op, 0, 0, 0, mm_divu_op, mm_pool32axf_op), RT | RS },
        { insn_dmfc0, 0, 0 },
        { insn_dmtc0, 0, 0 },
@@ -84,6 +89,8 @@ static struct insn insn_table_MM[] = {
        { insn_mfhi, M(mm_pool32a_op, 0, 0, 0, mm_mfhi32_op, mm_pool32axf_op), RS },
        { insn_mflo, M(mm_pool32a_op, 0, 0, 0, mm_mflo32_op, mm_pool32axf_op), RS },
        { insn_mtc0, M(mm_pool32a_op, 0, 0, 0, mm_mtc0_op, mm_pool32axf_op), RT | RS | RD },
+       { insn_mthi, M(mm_pool32a_op, 0, 0, 0, mm_mthi32_op, mm_pool32axf_op), RS },
+       { insn_mtlo, M(mm_pool32a_op, 0, 0, 0, mm_mtlo32_op, mm_pool32axf_op), RS },
        { insn_mul, M(mm_pool32a_op, 0, 0, 0, 0, mm_mul_op), RT | RS | RD },
        { insn_or, M(mm_pool32a_op, 0, 0, 0, 0, mm_or32_op), RT | RS | RD },
        { insn_ori, M(mm_ori32_op, 0, 0, 0, 0, 0), RT | RS | UIMM },
@@ -166,13 +173,15 @@ static void build_insn(u32 **buf, enum opcode opc, ...)
        op = ip->match;
        va_start(ap, opc);
        if (ip->fields & RS) {
-               if (opc == insn_mfc0 || opc == insn_mtc0)
+               if (opc == insn_mfc0 || opc == insn_mtc0 ||
+                   opc == insn_cfc1 || opc == insn_ctc1)
                        op |= build_rt(va_arg(ap, u32));
                else
                        op |= build_rs(va_arg(ap, u32));
        }
        if (ip->fields & RT) {
-               if (opc == insn_mfc0 || opc == insn_mtc0)
+               if (opc == insn_mfc0 || opc == insn_mtc0 ||
+                   opc == insn_cfc1 || opc == insn_ctc1)
                        op |= build_rs(va_arg(ap, u32));
                else
                        op |= build_rt(va_arg(ap, u32));
index 9c2220a45189a6e954a597c05bb880f700b93801..cec524167822399aa3f87b562e9d2a294bea4eaa 100644 (file)
@@ -67,9 +67,14 @@ static struct insn insn_table[] = {
 #else
        { insn_cache,  M6(cache_op, 0, 0, 0, cache6_op),  RS | RT | SIMM9 },
 #endif
+       { insn_cfc1, M(cop1_op, cfc_op, 0, 0, 0, 0), RT | RD },
+       { insn_cfcmsa, M(msa_op, 0, msa_cfc_op, 0, 0, msa_elm_op), RD | RE },
+       { insn_ctc1, M(cop1_op, ctc_op, 0, 0, 0, 0), RT | RD },
+       { insn_ctcmsa, M(msa_op, 0, msa_ctc_op, 0, 0, msa_elm_op), RD | RE },
        { insn_daddiu, M(daddiu_op, 0, 0, 0, 0, 0), RS | RT | SIMM },
        { insn_daddu, M(spec_op, 0, 0, 0, 0, daddu_op), RS | RT | RD },
        { insn_dinsm, M(spec3_op, 0, 0, 0, 0, dinsm_op), RS | RT | RD | RE },
+       { insn_di, M(cop0_op, mfmc0_op, 0, 12, 0, 0), RT },
        { insn_dins, M(spec3_op, 0, 0, 0, 0, dins_op), RS | RT | RD | RE },
        { insn_divu, M(spec_op, 0, 0, 0, 0, divu_op), RS | RT },
        { insn_dmfc0, M(cop0_op, dmfc_op, 0, 0, 0, 0), RT | RD | SET},
@@ -114,7 +119,13 @@ static struct insn insn_table[] = {
        { insn_mflo,  M(spec_op, 0, 0, 0, 0, mflo_op), RD },
        { insn_mtc0,  M(cop0_op, mtc_op, 0, 0, 0, 0),  RT | RD | SET},
        { insn_mthc0,  M(cop0_op, mthc0_op, 0, 0, 0, 0),  RT | RD | SET},
+       { insn_mthi,  M(spec_op, 0, 0, 0, 0, mthi_op), RS },
+       { insn_mtlo,  M(spec_op, 0, 0, 0, 0, mtlo_op), RS },
+#ifndef CONFIG_CPU_MIPSR6
        { insn_mul, M(spec2_op, 0, 0, 0, 0, mul_op), RS | RT | RD},
+#else
+       { insn_mul, M(spec_op, 0, 0, 0, mult_mul_op, mult_op), RS | RT | RD},
+#endif
        { insn_ori,  M(ori_op, 0, 0, 0, 0, 0),  RS | RT | UIMM },
        { insn_or,  M(spec_op, 0, 0, 0, 0, or_op),  RS | RT | RD },
 #ifndef CONFIG_CPU_MIPSR6
index ad718debc35a74d62c2d0b532420b2711a5c4416..3e0282d301d61bd4e35aee94ee5cf2f2cebd7a5e 100644 (file)
@@ -49,18 +49,19 @@ enum opcode {
        insn_invalid,
        insn_addiu, insn_addu, insn_and, insn_andi, insn_bbit0, insn_bbit1,
        insn_beq, insn_beql, insn_bgez, insn_bgezl, insn_bltz, insn_bltzl,
-       insn_bne, insn_cache, insn_daddiu, insn_daddu, insn_dins, insn_dinsm,
-       insn_divu, insn_dmfc0, insn_dmtc0, insn_drotr, insn_drotr32, insn_dsll,
+       insn_bne, insn_cache, insn_cfc1, insn_cfcmsa, insn_ctc1, insn_ctcmsa,
+       insn_daddiu, insn_daddu, insn_di, insn_dins, insn_dinsm, insn_divu,
+       insn_dmfc0, insn_dmtc0, insn_drotr, insn_drotr32, insn_dsll,
        insn_dsll32, insn_dsra, insn_dsrl, insn_dsrl32, insn_dsubu, insn_eret,
        insn_ext, insn_ins, insn_j, insn_jal, insn_jalr, insn_jr, insn_lb,
        insn_ld, insn_ldx, insn_lh, insn_ll, insn_lld, insn_lui, insn_lw,
        insn_lwx, insn_mfc0, insn_mfhc0, insn_mfhi, insn_mflo, insn_mtc0,
-       insn_mthc0, insn_mul, insn_or, insn_ori, insn_pref, insn_rfe,
-       insn_rotr, insn_sc, insn_scd, insn_sd, insn_sll, insn_sllv, insn_slt,
-       insn_sltiu, insn_sltu, insn_sra, insn_srl, insn_srlv, insn_subu,
-       insn_sw, insn_sync, insn_syscall, insn_tlbp, insn_tlbr, insn_tlbwi,
-       insn_tlbwr, insn_wait, insn_wsbh, insn_xor, insn_xori, insn_yield,
-       insn_lddir, insn_ldpte,
+       insn_mthc0, insn_mthi, insn_mtlo, insn_mul, insn_or, insn_ori,
+       insn_pref, insn_rfe, insn_rotr, insn_sc, insn_scd, insn_sd, insn_sll,
+       insn_sllv, insn_slt, insn_sltiu, insn_sltu, insn_sra, insn_srl,
+       insn_srlv, insn_subu, insn_sw, insn_sync, insn_syscall, insn_tlbp,
+       insn_tlbr, insn_tlbwi, insn_tlbwr, insn_wait, insn_wsbh, insn_xor,
+       insn_xori, insn_yield, insn_lddir, insn_ldpte,
 };
 
 struct insn {
@@ -268,10 +269,15 @@ I_u1s2(_bltz)
 I_u1s2(_bltzl)
 I_u1u2s3(_bne)
 I_u2s3u1(_cache)
+I_u1u2(_cfc1)
+I_u2u1(_cfcmsa)
+I_u1u2(_ctc1)
+I_u2u1(_ctcmsa)
 I_u1u2u3(_dmfc0)
 I_u1u2u3(_dmtc0)
 I_u2u1s3(_daddiu)
 I_u3u1u2(_daddu)
+I_u1(_di);
 I_u1u2(_divu)
 I_u2u1u3(_dsll)
 I_u2u1u3(_dsll32)
@@ -301,6 +307,8 @@ I_u1(_mfhi)
 I_u1(_mflo)
 I_u1u2u3(_mtc0)
 I_u1u2u3(_mthc0)
+I_u1(_mthi)
+I_u1(_mtlo)
 I_u3u1u2(_mul)
 I_u2u1u3(_ori)
 I_u3u1u2(_or)
diff --git a/arch/powerpc/include/asm/hmi.h b/arch/powerpc/include/asm/hmi.h
new file mode 100644 (file)
index 0000000..88b4901
--- /dev/null
@@ -0,0 +1,45 @@
+/*
+ * Hypervisor Maintenance Interrupt header file.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.
+ *
+ * Copyright 2015 IBM Corporation
+ * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
+ */
+
+#ifndef __ASM_PPC64_HMI_H__
+#define __ASM_PPC64_HMI_H__
+
+#ifdef CONFIG_PPC_BOOK3S_64
+
+#define        CORE_TB_RESYNC_REQ_BIT          63
+#define MAX_SUBCORE_PER_CORE           4
+
+/*
+ * sibling_subcore_state structure is used to co-ordinate all threads
+ * during HMI to avoid TB corruption. This structure is allocated once
+ * per each core and shared by all threads on that core.
+ */
+struct sibling_subcore_state {
+       unsigned long   flags;
+       u8              in_guest[MAX_SUBCORE_PER_CORE];
+};
+
+extern void wait_for_subcore_guest_exit(void);
+extern void wait_for_tb_resync(void);
+#else
+static inline void wait_for_subcore_guest_exit(void) { }
+static inline void wait_for_tb_resync(void) { }
+#endif
+#endif /* __ASM_PPC64_HMI_H__ */
index 546540b910959570cc34d8dfa040ee155885ae00..4b17bd058e01f0cc1ca228cc438426f0777e1960 100644 (file)
@@ -25,6 +25,7 @@
 #ifdef CONFIG_KVM_BOOK3S_64_HANDLER
 #include <asm/kvm_book3s_asm.h>
 #endif
+#include <asm/hmi.h>
 
 register struct paca_struct *local_paca asm("r13");
 
@@ -181,6 +182,11 @@ struct paca_struct {
         */
        u16 in_mce;
        u8 hmi_event_available;          /* HMI event is available */
+       /*
+        * Bitmap for sibling subcore status. See kvm/book3s_hv_ras.c for
+        * more details
+        */
+       struct sibling_subcore_state *sibling_subcore_state;
 #endif
 
        /* Stuff for accurate time accounting */
index 2da380fcc34c699c34851696cae2899a18d9c8bd..6972a23433d349e4de099051e23971494417f04a 100644 (file)
@@ -41,7 +41,7 @@ obj-$(CONFIG_VDSO32)          += vdso32/
 obj-$(CONFIG_HAVE_HW_BREAKPOINT)       += hw_breakpoint.o
 obj-$(CONFIG_PPC_BOOK3S_64)    += cpu_setup_ppc970.o cpu_setup_pa6t.o
 obj-$(CONFIG_PPC_BOOK3S_64)    += cpu_setup_power.o
-obj-$(CONFIG_PPC_BOOK3S_64)    += mce.o mce_power.o
+obj-$(CONFIG_PPC_BOOK3S_64)    += mce.o mce_power.o hmi.o
 obj64-$(CONFIG_RELOCATABLE)    += reloc_64.o
 obj-$(CONFIG_PPC_BOOK3E_64)    += exceptions-64e.o idle_book3e.o
 obj-$(CONFIG_PPC64)            += vdso64/
index 4c9440629128ccd94dbc35d5eb0b6e060d4d970f..0eba47e074b94ce7d8c70049fc9a098eb654234a 100644 (file)
@@ -680,6 +680,8 @@ _GLOBAL(__replay_interrupt)
 BEGIN_FTR_SECTION
        cmpwi   r3,0xe80
        beq     h_doorbell_common
+       cmpwi   r3,0xe60
+       beq     hmi_exception_common
 FTR_SECTION_ELSE
        cmpwi   r3,0xa00
        beq     doorbell_super_common
@@ -1172,7 +1174,7 @@ fwnmi_data_area:
 
        .globl hmi_exception_early
 hmi_exception_early:
-       EXCEPTION_PROLOG_1(PACA_EXGEN, NOTEST, 0xe60)
+       EXCEPTION_PROLOG_1(PACA_EXGEN, KVMTEST, 0xe62)
        mr      r10,r1                  /* Save r1                      */
        ld      r1,PACAEMERGSP(r13)     /* Use emergency stack          */
        subi    r1,r1,INT_FRAME_SIZE    /* alloc stack frame            */
diff --git a/arch/powerpc/kernel/hmi.c b/arch/powerpc/kernel/hmi.c
new file mode 100644 (file)
index 0000000..e3f738e
--- /dev/null
@@ -0,0 +1,56 @@
+/*
+ * Hypervisor Maintenance Interrupt (HMI) handling.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.
+ *
+ * Copyright 2015 IBM Corporation
+ * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
+ */
+
+#undef DEBUG
+
+#include <linux/types.h>
+#include <linux/compiler.h>
+#include <asm/paca.h>
+#include <asm/hmi.h>
+
+void wait_for_subcore_guest_exit(void)
+{
+       int i;
+
+       /*
+        * NULL bitmap pointer indicates that KVM module hasn't
+        * been loaded yet and hence no guests are running.
+        * If no KVM is in use, no need to co-ordinate among threads
+        * as all of them will always be in host and no one is going
+        * to modify TB other than the opal hmi handler.
+        * Hence, just return from here.
+        */
+       if (!local_paca->sibling_subcore_state)
+               return;
+
+       for (i = 0; i < MAX_SUBCORE_PER_CORE; i++)
+               while (local_paca->sibling_subcore_state->in_guest[i])
+                       cpu_relax();
+}
+
+void wait_for_tb_resync(void)
+{
+       if (!local_paca->sibling_subcore_state)
+               return;
+
+       while (test_bit(CORE_TB_RESYNC_REQ_BIT,
+                               &local_paca->sibling_subcore_state->flags))
+               cpu_relax();
+}
index 470ceebd2d237c565eea24467b071440f177d149..bb5112908fb29a0068030950da34424170fb5a4d 100644 (file)
@@ -270,8 +270,9 @@ ALT_FTR_SECTION_END_NESTED_IFSET(CPU_FTR_ARCH_207S, 66);            \
        ld      r2,PACATOC(r13);                                        \
        ld      r1,PACAR1(r13);                                         \
        std     r3,ORIG_GPR3(r1);       /* Save original r3 */          \
-       li      r0,OPAL_HANDLE_HMI;     /* Pass opal token argument*/   \
-       bl      opal_call_realmode;                                     \
+       li      r3,0;                   /* NULL argument */             \
+       bl      hmi_exception_realmode;                                 \
+       nop;                                                            \
        ld      r3,ORIG_GPR3(r1);       /* Restore original r3 */       \
 20:    nop;
 
index 9229ba63c37086a86af0a4b17dcde17012eff732..9ec95daccad92a2f16df44eaea7c47504808c69b 100644 (file)
@@ -60,6 +60,7 @@
 #include <asm/switch_to.h>
 #include <asm/tm.h>
 #include <asm/debug.h>
+#include <asm/hmi.h>
 #include <sysdev/fsl_pci.h>
 
 #if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC)
@@ -307,9 +308,13 @@ long hmi_exception_realmode(struct pt_regs *regs)
 {
        __this_cpu_inc(irq_stat.hmi_exceptions);
 
+       wait_for_subcore_guest_exit();
+
        if (ppc_md.hmi_exception_early)
                ppc_md.hmi_exception_early(regs);
 
+       wait_for_tb_resync();
+
        return 0;
 }
 
index e20beae5ca7a462d9f1cfb7211b9a84592383ddf..2fd5580c8f6ebaf3981015c7688cbfb4fea880f2 100644 (file)
@@ -52,6 +52,7 @@
 #include <asm/switch_to.h>
 #include <asm/smp.h>
 #include <asm/dbell.h>
+#include <asm/hmi.h>
 #include <linux/gfp.h>
 #include <linux/vmalloc.h>
 #include <linux/highmem.h>
@@ -2522,7 +2523,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
                list_for_each_entry(pvc, &core_info.vcs[sub], preempt_list)
                        spin_unlock(&pvc->lock);
 
-       kvm_guest_enter();
+       guest_enter();
 
        srcu_idx = srcu_read_lock(&vc->kvm->srcu);
 
@@ -2570,7 +2571,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
 
        /* make sure updates to secondary vcpu structs are visible now */
        smp_mb();
-       kvm_guest_exit();
+       guest_exit();
 
        for (sub = 0; sub < core_info.n_subcores; ++sub)
                list_for_each_entry_safe(pvc, vcnext, &core_info.vcs[sub],
@@ -3401,6 +3402,38 @@ static struct kvmppc_ops kvm_ops_hv = {
        .hcall_implemented = kvmppc_hcall_impl_hv,
 };
 
+static int kvm_init_subcore_bitmap(void)
+{
+       int i, j;
+       int nr_cores = cpu_nr_cores();
+       struct sibling_subcore_state *sibling_subcore_state;
+
+       for (i = 0; i < nr_cores; i++) {
+               int first_cpu = i * threads_per_core;
+               int node = cpu_to_node(first_cpu);
+
+               /* Ignore if it is already allocated. */
+               if (paca[first_cpu].sibling_subcore_state)
+                       continue;
+
+               sibling_subcore_state =
+                       kmalloc_node(sizeof(struct sibling_subcore_state),
+                                                       GFP_KERNEL, node);
+               if (!sibling_subcore_state)
+                       return -ENOMEM;
+
+               memset(sibling_subcore_state, 0,
+                               sizeof(struct sibling_subcore_state));
+
+               for (j = 0; j < threads_per_core; j++) {
+                       int cpu = first_cpu + j;
+
+                       paca[cpu].sibling_subcore_state = sibling_subcore_state;
+               }
+       }
+       return 0;
+}
+
 static int kvmppc_book3s_init_hv(void)
 {
        int r;
@@ -3411,6 +3444,10 @@ static int kvmppc_book3s_init_hv(void)
        if (r < 0)
                return -ENODEV;
 
+       r = kvm_init_subcore_bitmap();
+       if (r)
+               return r;
+
        kvm_ops_hv.owner = THIS_MODULE;
        kvmppc_hv_ops = &kvm_ops_hv;
 
index 93b5f5c9b4455ef6e769472ca35a2c8e91ba96be..0fa70a9618d7ad603d482f7570419bd81b6aa46b 100644 (file)
@@ -13,6 +13,9 @@
 #include <linux/kernel.h>
 #include <asm/opal.h>
 #include <asm/mce.h>
+#include <asm/machdep.h>
+#include <asm/cputhreads.h>
+#include <asm/hmi.h>
 
 /* SRR1 bits for machine check on POWER7 */
 #define SRR1_MC_LDSTERR                (1ul << (63-42))
@@ -140,3 +143,176 @@ long kvmppc_realmode_machine_check(struct kvm_vcpu *vcpu)
 {
        return kvmppc_realmode_mc_power7(vcpu);
 }
+
+/* Check if dynamic split is in force and return subcore size accordingly. */
+static inline int kvmppc_cur_subcore_size(void)
+{
+       if (local_paca->kvm_hstate.kvm_split_mode)
+               return local_paca->kvm_hstate.kvm_split_mode->subcore_size;
+
+       return threads_per_subcore;
+}
+
+void kvmppc_subcore_enter_guest(void)
+{
+       int thread_id, subcore_id;
+
+       thread_id = cpu_thread_in_core(local_paca->paca_index);
+       subcore_id = thread_id / kvmppc_cur_subcore_size();
+
+       local_paca->sibling_subcore_state->in_guest[subcore_id] = 1;
+}
+
+void kvmppc_subcore_exit_guest(void)
+{
+       int thread_id, subcore_id;
+
+       thread_id = cpu_thread_in_core(local_paca->paca_index);
+       subcore_id = thread_id / kvmppc_cur_subcore_size();
+
+       local_paca->sibling_subcore_state->in_guest[subcore_id] = 0;
+}
+
+static bool kvmppc_tb_resync_required(void)
+{
+       if (test_and_set_bit(CORE_TB_RESYNC_REQ_BIT,
+                               &local_paca->sibling_subcore_state->flags))
+               return false;
+
+       return true;
+}
+
+static void kvmppc_tb_resync_done(void)
+{
+       clear_bit(CORE_TB_RESYNC_REQ_BIT,
+                       &local_paca->sibling_subcore_state->flags);
+}
+
+/*
+ * kvmppc_realmode_hmi_handler() is called only by primary thread during
+ * guest exit path.
+ *
+ * There are multiple reasons why HMI could occur, one of them is
+ * Timebase (TB) error. If this HMI is due to TB error, then TB would
+ * have been in stopped state. The opal hmi handler Will fix it and
+ * restore the TB value with host timebase value. For HMI caused due
+ * to non-TB errors, opal hmi handler will not touch/restore TB register
+ * and hence there won't be any change in TB value.
+ *
+ * Since we are not sure about the cause of this HMI, we can't be sure
+ * about the content of TB register whether it holds guest or host timebase
+ * value. Hence the idea is to resync the TB on every HMI, so that we
+ * know about the exact state of the TB value. Resync TB call will
+ * restore TB to host timebase.
+ *
+ * Things to consider:
+ * - On TB error, HMI interrupt is reported on all the threads of the core
+ *   that has encountered TB error irrespective of split-core mode.
+ * - The very first thread on the core that get chance to fix TB error
+ *   would rsync the TB with local chipTOD value.
+ * - The resync TB is a core level action i.e. it will sync all the TBs
+ *   in that core independent of split-core mode. This means if we trigger
+ *   TB sync from a thread from one subcore, it would affect TB values of
+ *   sibling subcores of the same core.
+ *
+ * All threads need to co-ordinate before making opal hmi handler.
+ * All threads will use sibling_subcore_state->in_guest[] (shared by all
+ * threads in the core) in paca which holds information about whether
+ * sibling subcores are in Guest mode or host mode. The in_guest[] array
+ * is of size MAX_SUBCORE_PER_CORE=4, indexed using subcore id to set/unset
+ * subcore status. Only primary threads from each subcore is responsible
+ * to set/unset its designated array element while entering/exiting the
+ * guset.
+ *
+ * After invoking opal hmi handler call, one of the thread (of entire core)
+ * will need to resync the TB. Bit 63 from subcore state bitmap flags
+ * (sibling_subcore_state->flags) will be used to co-ordinate between
+ * primary threads to decide who takes up the responsibility.
+ *
+ * This is what we do:
+ * - Primary thread from each subcore tries to set resync required bit[63]
+ *   of paca->sibling_subcore_state->flags.
+ * - The first primary thread that is able to set the flag takes the
+ *   responsibility of TB resync. (Let us call it as thread leader)
+ * - All other threads which are in host will call
+ *   wait_for_subcore_guest_exit() and wait for in_guest[0-3] from
+ *   paca->sibling_subcore_state to get cleared.
+ * - All the primary thread will clear its subcore status from subcore
+ *   state in_guest[] array respectively.
+ * - Once all primary threads clear in_guest[0-3], all of them will invoke
+ *   opal hmi handler.
+ * - Now all threads will wait for TB resync to complete by invoking
+ *   wait_for_tb_resync() except the thread leader.
+ * - Thread leader will do a TB resync by invoking opal_resync_timebase()
+ *   call and the it will clear the resync required bit.
+ * - All other threads will now come out of resync wait loop and proceed
+ *   with individual execution.
+ * - On return of this function, primary thread will signal all
+ *   secondary threads to proceed.
+ * - All secondary threads will eventually call opal hmi handler on
+ *   their exit path.
+ */
+
+long kvmppc_realmode_hmi_handler(void)
+{
+       int ptid = local_paca->kvm_hstate.ptid;
+       bool resync_req;
+
+       /* This is only called on primary thread. */
+       BUG_ON(ptid != 0);
+       __this_cpu_inc(irq_stat.hmi_exceptions);
+
+       /*
+        * By now primary thread has already completed guest->host
+        * partition switch but haven't signaled secondaries yet.
+        * All the secondary threads on this subcore is waiting
+        * for primary thread to signal them to go ahead.
+        *
+        * For threads from subcore which isn't in guest, they all will
+        * wait until all other subcores on this core exit the guest.
+        *
+        * Now set the resync required bit. If you are the first to
+        * set this bit then kvmppc_tb_resync_required() function will
+        * return true. For rest all other subcores
+        * kvmppc_tb_resync_required() will return false.
+        *
+        * If resync_req == true, then this thread is responsible to
+        * initiate TB resync after hmi handler has completed.
+        * All other threads on this core will wait until this thread
+        * clears the resync required bit flag.
+        */
+       resync_req = kvmppc_tb_resync_required();
+
+       /* Reset the subcore status to indicate it has exited guest */
+       kvmppc_subcore_exit_guest();
+
+       /*
+        * Wait for other subcores on this core to exit the guest.
+        * All the primary threads and threads from subcore that are
+        * not in guest will wait here until all subcores are out
+        * of guest context.
+        */
+       wait_for_subcore_guest_exit();
+
+       /*
+        * At this point we are sure that primary threads from each
+        * subcore on this core have completed guest->host partition
+        * switch. Now it is safe to call HMI handler.
+        */
+       if (ppc_md.hmi_exception_early)
+               ppc_md.hmi_exception_early(NULL);
+
+       /*
+        * Check if this thread is responsible to resync TB.
+        * All other threads will wait until this thread completes the
+        * TB resync.
+        */
+       if (resync_req) {
+               opal_resync_timebase();
+               /* Reset TB resync req bit */
+               kvmppc_tb_resync_done();
+       } else {
+               wait_for_tb_resync();
+       }
+       return 0;
+}
index e571ad277398fd6625499c05ae03cde47a46d20a..0d246fca157a96a613b636deb02f8381b71ab090 100644 (file)
@@ -29,6 +29,7 @@
 #include <asm/kvm_book3s_asm.h>
 #include <asm/book3s/64/mmu-hash.h>
 #include <asm/tm.h>
+#include <asm/opal.h>
 
 #define VCPU_GPRS_TM(reg) (((reg) * ULONG_SIZE) + VCPU_GPR_TM)
 
@@ -373,6 +374,18 @@ kvm_secondary_got_guest:
        lwsync
        std     r0, HSTATE_KVM_VCORE(r13)
 
+       /*
+        * All secondaries exiting guest will fall through this path.
+        * Before proceeding, just check for HMI interrupt and
+        * invoke opal hmi handler. By now we are sure that the
+        * primary thread on this core/subcore has already made partition
+        * switch/TB resync and we are good to call opal hmi handler.
+        */
+       cmpwi   r12, BOOK3S_INTERRUPT_HMI
+       bne     kvm_no_guest
+
+       li      r3,0                    /* NULL argument */
+       bl      hmi_exception_realmode
 /*
  * At this point we have finished executing in the guest.
  * We need to wait for hwthread_req to become zero, since
@@ -427,6 +440,22 @@ kvm_no_guest:
  * whole-core mode, so we need to nap.
  */
 kvm_unsplit_nap:
+       /*
+        * When secondaries are napping in kvm_unsplit_nap() with
+        * hwthread_req = 1, HMI goes ignored even though subcores are
+        * already exited the guest. Hence HMI keeps waking up secondaries
+        * from nap in a loop and secondaries always go back to nap since
+        * no vcore is assigned to them. This makes impossible for primary
+        * thread to get hold of secondary threads resulting into a soft
+        * lockup in KVM path.
+        *
+        * Let us check if HMI is pending and handle it before we go to nap.
+        */
+       cmpwi   r12, BOOK3S_INTERRUPT_HMI
+       bne     55f
+       li      r3, 0                   /* NULL argument */
+       bl      hmi_exception_realmode
+55:
        /*
         * Ensure that secondary doesn't nap when it has
         * its vcore pointer set.
@@ -601,6 +630,11 @@ BEGIN_FTR_SECTION
        mtspr   SPRN_DPDES, r8
 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 
+       /* Mark the subcore state as inside guest */
+       bl      kvmppc_subcore_enter_guest
+       nop
+       ld      r5, HSTATE_KVM_VCORE(r13)
+       ld      r4, HSTATE_KVM_VCPU(r13)
        li      r0,1
        stb     r0,VCORE_IN_GUEST(r5)   /* signal secondaries to continue */
 
@@ -1683,6 +1717,23 @@ BEGIN_FTR_SECTION
        mtspr   SPRN_DPDES, r8
 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 
+       /* If HMI, call kvmppc_realmode_hmi_handler() */
+       cmpwi   r12, BOOK3S_INTERRUPT_HMI
+       bne     27f
+       bl      kvmppc_realmode_hmi_handler
+       nop
+       li      r12, BOOK3S_INTERRUPT_HMI
+       /*
+        * At this point kvmppc_realmode_hmi_handler would have resync-ed
+        * the TB. Hence it is not required to subtract guest timebase
+        * offset from timebase. So, skip it.
+        *
+        * Also, do not call kvmppc_subcore_exit_guest() because it has
+        * been invoked as part of kvmppc_realmode_hmi_handler().
+        */
+       b       30f
+
+27:
        /* Subtract timebase offset from timebase */
        ld      r8,VCORE_TB_OFFSET(r5)
        cmpdi   r8,0
@@ -1698,8 +1749,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
        addis   r8,r8,0x100             /* if so, increment upper 40 bits */
        mtspr   SPRN_TBU40,r8
 
+17:    bl      kvmppc_subcore_exit_guest
+       nop
+30:    ld      r5,HSTATE_KVM_VCORE(r13)
+       ld      r4,VCORE_KVM(r5)        /* pointer to struct kvm */
+
        /* Reset PCR */
-17:    ld      r0, VCORE_PCR(r5)
+       ld      r0, VCORE_PCR(r5)
        cmpdi   r0, 0
        beq     18f
        li      r0, 0
@@ -2461,6 +2517,8 @@ BEGIN_FTR_SECTION
        cmpwi   r6, 3                   /* hypervisor doorbell? */
        beq     3f
 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
+       cmpwi   r6, 0xa                 /* Hypervisor maintenance ? */
+       beq     4f
        li      r3, 1                   /* anything else, return 1 */
 0:     blr
 
@@ -2482,6 +2540,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
        li      r3, -1
        blr
 
+       /* Woken up due to Hypervisor maintenance interrupt */
+4:     li      r12, BOOK3S_INTERRUPT_HMI
+       li      r3, 1
+       blr
+
 /*
  * Determine what sort of external interrupt is pending (if any).
  * Returns:
index 8e4f64f0b7741d60e90f204c8265d1729bfa1788..b6cd730c33ef5a1e0290046ca8d8793ec32d01ba 100644 (file)
@@ -914,7 +914,7 @@ int kvmppc_handle_exit_pr(struct kvm_run *run, struct kvm_vcpu *vcpu,
        /* We get here with MSR.EE=1 */
 
        trace_kvm_exit(exit_nr, vcpu);
-       kvm_guest_exit();
+       guest_exit();
 
        switch (exit_nr) {
        case BOOK3S_INTERRUPT_INST_STORAGE:
@@ -1049,7 +1049,17 @@ int kvmppc_handle_exit_pr(struct kvm_run *run, struct kvm_vcpu *vcpu,
                int emul;
 
 program_interrupt:
-               flags = vcpu->arch.shadow_srr1 & 0x1f0000ull;
+               /*
+                * shadow_srr1 only contains valid flags if we came here via
+                * a program exception. The other exceptions (emulation assist,
+                * FP unavailable, etc.) do not provide flags in SRR1, so use
+                * an illegal-instruction exception when injecting a program
+                * interrupt into the guest.
+                */
+               if (exit_nr == BOOK3S_INTERRUPT_PROGRAM)
+                       flags = vcpu->arch.shadow_srr1 & 0x1f0000ull;
+               else
+                       flags = SRR1_PROGILL;
 
                emul = kvmppc_get_last_inst(vcpu, INST_GENERIC, &last_inst);
                if (emul != EMULATE_DONE) {
@@ -1531,7 +1541,7 @@ static int kvmppc_vcpu_run_pr(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 
        kvmppc_clear_debug(vcpu);
 
-       /* No need for kvm_guest_exit. It's done in handle_exit.
+       /* No need for guest_exit. It's done in handle_exit.
           We also get here with interrupts enabled. */
 
        /* Make sure we save the guest FPU/Altivec/VSX state */
index 4afae695899ad99373c033e53978603879bf078c..02b4672f7347ecd2ccfc0cd15f5238b8d1681f17 100644 (file)
@@ -776,7 +776,7 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 
        ret = __kvmppc_vcpu_run(kvm_run, vcpu);
 
-       /* No need for kvm_guest_exit. It's done in handle_exit.
+       /* No need for guest_exit. It's done in handle_exit.
           We also get here with interrupts enabled. */
 
        /* Switch back to user space debug context */
@@ -1012,7 +1012,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
        }
 
        trace_kvm_exit(exit_nr, vcpu);
-       __kvm_guest_exit();
+       guest_exit_irqoff();
 
        local_irq_enable();
 
index 5cc2e7af3a7b96322b9f768a7aadb867ea6b7560..b379146de55bf13d76aa5882dd342148db687fb8 100644 (file)
@@ -302,7 +302,6 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
                        advance = 0;
                        printk(KERN_ERR "Couldn't emulate instruction 0x%08x "
                               "(op %d xop %d)\n", inst, get_op(inst), get_xop(inst));
-                       kvmppc_core_queue_program(vcpu, 0);
                }
        }
 
index 6249cdc834d14977ffe5344a4b15857de01abe79..ed38f811411847d659e1810c9e5a94c583369df6 100644 (file)
@@ -1823,7 +1823,8 @@ int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
        return 0;
 }
 
-int kvm_set_routing_entry(struct kvm_kernel_irq_routing_entry *e,
+int kvm_set_routing_entry(struct kvm *kvm,
+                         struct kvm_kernel_irq_routing_entry *e,
                          const struct kvm_irq_routing_entry *ue)
 {
        int r = -EINVAL;
index 02416fea765301303c0130d3900a1f929319582f..1ac036e45ed4f8255e0fb1f05a3bfe6ce165af04 100644 (file)
@@ -119,7 +119,7 @@ int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu)
                        continue;
                }
 
-               __kvm_guest_enter();
+               guest_enter_irqoff();
                return 1;
        }
 
index e45b88a5d7e0f7ed20f68516050a7e8a8c37fa1d..df6ad949e35f6dc8b416453bc548dcf6592ad301 100644 (file)
@@ -64,7 +64,6 @@ END_FTR_SECTION(0, 1);                                                \
        OPAL_BRANCH(opal_tracepoint_entry) \
        mfcr    r12;                    \
        stw     r12,8(r1);              \
-       std     r1,PACAR1(r13);         \
        li      r11,0;                  \
        mfmsr   r12;                    \
        ori     r11,r11,MSR_EE;         \
@@ -127,7 +126,6 @@ opal_tracepoint_entry:
        mfcr    r12
        std     r11,16(r1)
        stw     r12,8(r1)
-       std     r1,PACAR1(r13)
        li      r11,0
        mfmsr   r12
        ori     r11,r11,MSR_EE
index 946fc86202fdac371a6e32028c16b11967feb668..183b01727de4e10b9477e987423f038496c2fc8d 100644 (file)
@@ -43,6 +43,7 @@
 /* s390-specific vcpu->requests bit members */
 #define KVM_REQ_ENABLE_IBS         8
 #define KVM_REQ_DISABLE_IBS        9
+#define KVM_REQ_ICPT_OPEREXC       10
 
 #define SIGP_CTRL_C            0x80
 #define SIGP_CTRL_SCN_MASK     0x3f
@@ -666,6 +667,7 @@ struct kvm_arch{
        int user_cpu_state_ctrl;
        int user_sigp;
        int user_stsi;
+       int user_instr0;
        struct s390_io_adapter *adapters[MAX_S390_IO_ADAPTERS];
        wait_queue_head_t ipte_wq;
        int ipte_lock_count;
index 1e0849e209650dbe8e5de156a6afa1d3bdf45bf3..31a05330d11c77fc0c34cd30ea54bc2075a2c3d9 100644 (file)
@@ -439,6 +439,23 @@ static int debug_exit_required(struct kvm_vcpu *vcpu)
 #define guest_per_enabled(vcpu) \
                             (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PER)
 
+int kvm_s390_handle_per_ifetch_icpt(struct kvm_vcpu *vcpu)
+{
+       const u8 ilen = kvm_s390_get_ilen(vcpu);
+       struct kvm_s390_pgm_info pgm_info = {
+               .code = PGM_PER,
+               .per_code = PER_EVENT_IFETCH >> 24,
+               .per_address = __rewind_psw(vcpu->arch.sie_block->gpsw, ilen),
+       };
+
+       /*
+        * The PSW points to the next instruction, therefore the intercepted
+        * instruction generated a PER i-fetch event. PER address therefore
+        * points at the previous PSW address (could be an EXECUTE function).
+        */
+       return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
+}
+
 static void filter_guest_per_event(struct kvm_vcpu *vcpu)
 {
        u32 perc = vcpu->arch.sie_block->perc << 24;
index 9359f65c8634ba3f6924b42ac863618e59be6e9f..7a2f1551bc39545d79232d758caa74e6eef15a30 100644 (file)
@@ -359,11 +359,16 @@ static int handle_operexc(struct kvm_vcpu *vcpu)
            test_kvm_facility(vcpu->kvm, 74))
                return handle_sthyi(vcpu);
 
+       if (vcpu->arch.sie_block->ipa == 0 && vcpu->kvm->arch.user_instr0)
+               return -EOPNOTSUPP;
+
        return kvm_s390_inject_program_int(vcpu, PGM_OPERATION);
 }
 
 int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu)
 {
+       int rc, per_rc = 0;
+
        if (kvm_is_ucontrol(vcpu->kvm))
                return -EOPNOTSUPP;
 
@@ -372,7 +377,8 @@ int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu)
        case 0x18:
                return handle_noop(vcpu);
        case 0x04:
-               return handle_instruction(vcpu);
+               rc = handle_instruction(vcpu);
+               break;
        case 0x08:
                return handle_prog(vcpu);
        case 0x14:
@@ -384,10 +390,18 @@ int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu)
        case 0x28:
                return handle_stop(vcpu);
        case 0x2c:
-               return handle_operexc(vcpu);
+               rc = handle_operexc(vcpu);
+               break;
        case 0x38:
-               return handle_partial_execution(vcpu);
+               rc = handle_partial_execution(vcpu);
+               break;
        default:
                return -EOPNOTSUPP;
        }
+
+       /* process PER, also if the instrution is processed in user space */
+       if (vcpu->arch.sie_block->icptstatus & 0x02 &&
+           (!rc || rc == -EOPNOTSUPP))
+               per_rc = kvm_s390_handle_per_ifetch_icpt(vcpu);
+       return per_rc ? per_rc : rc;
 }
index ca19627779db09756beeb18afe59ee60d8f64ac8..24524c0f3ef88b2987c7bc1d54d924e811428286 100644 (file)
@@ -2246,7 +2246,8 @@ static int set_adapter_int(struct kvm_kernel_irq_routing_entry *e,
        return ret;
 }
 
-int kvm_set_routing_entry(struct kvm_kernel_irq_routing_entry *e,
+int kvm_set_routing_entry(struct kvm *kvm,
+                         struct kvm_kernel_irq_routing_entry *e,
                          const struct kvm_irq_routing_entry *ue)
 {
        int ret;
index 03eeeb0ded2470a3d9ea0f6df82f6308266a3c83..63ac7c1641a7b16fbd412d1b4dd3336512f067d4 100644 (file)
@@ -364,6 +364,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
        case KVM_CAP_S390_USER_STSI:
        case KVM_CAP_S390_SKEYS:
        case KVM_CAP_S390_IRQ_STATE:
+       case KVM_CAP_S390_USER_INSTR0:
                r = 1;
                break;
        case KVM_CAP_S390_MEM_OP:
@@ -456,6 +457,16 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
        return r;
 }
 
+static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
+{
+       unsigned int i;
+       struct kvm_vcpu *vcpu;
+
+       kvm_for_each_vcpu(i, vcpu, kvm) {
+               kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
+       }
+}
+
 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
 {
        int r;
@@ -507,6 +518,12 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
                kvm->arch.user_stsi = 1;
                r = 0;
                break;
+       case KVM_CAP_S390_USER_INSTR0:
+               VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
+               kvm->arch.user_instr0 = 1;
+               icpt_operexc_on_all_vcpus(kvm);
+               r = 0;
+               break;
        default:
                r = -EINVAL;
                break;
@@ -1836,6 +1853,8 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
                vcpu->arch.gmap = vcpu->kvm->arch.gmap;
                sca_add_vcpu(vcpu);
        }
+       if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
+               vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
        /* make vcpu_load load the right gmap on the first trigger */
        vcpu->arch.enabled_gmap = vcpu->arch.gmap;
 }
@@ -1923,8 +1942,6 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
        }
        vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
        vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
-       if (test_kvm_facility(vcpu->kvm, 74))
-               vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
 
        if (vcpu->kvm->arch.use_cmma) {
                rc = kvm_s390_vcpu_setup_cmma(vcpu);
@@ -2369,6 +2386,11 @@ static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
                goto retry;
        }
 
+       if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
+               vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
+               goto retry;
+       }
+
        /* nothing to do, just clear the request */
        clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
 
@@ -2623,14 +2645,14 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
                 * guest_enter and guest_exit should be no uaccess.
                 */
                local_irq_disable();
-               __kvm_guest_enter();
+               guest_enter_irqoff();
                __disable_cpu_timer_accounting(vcpu);
                local_irq_enable();
                exit_reason = sie64a(vcpu->arch.sie_block,
                                     vcpu->run->s.regs.gprs);
                local_irq_disable();
                __enable_cpu_timer_accounting(vcpu);
-               __kvm_guest_exit();
+               guest_exit_irqoff();
                local_irq_enable();
                vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
 
index 031f451bb2cf79a2550b14eb6bd2701a0e29d765..b8432862a81715761fbe274071d0988021b61a54 100644 (file)
@@ -238,6 +238,8 @@ static inline void kvm_s390_forward_psw(struct kvm_vcpu *vcpu, int ilen)
 }
 static inline void kvm_s390_retry_instr(struct kvm_vcpu *vcpu)
 {
+       /* don't inject PER events if we re-execute the instruction */
+       vcpu->arch.sie_block->icptstatus &= ~0x02;
        kvm_s390_rewind_psw(vcpu, kvm_s390_get_ilen(vcpu));
 }
 
@@ -377,6 +379,7 @@ int kvm_s390_import_bp_data(struct kvm_vcpu *vcpu,
                            struct kvm_guest_debug *dbg);
 void kvm_s390_clear_bp_data(struct kvm_vcpu *vcpu);
 void kvm_s390_prepare_debug_exit(struct kvm_vcpu *vcpu);
+int kvm_s390_handle_per_ifetch_icpt(struct kvm_vcpu *vcpu);
 void kvm_s390_handle_per_event(struct kvm_vcpu *vcpu);
 
 /* support for Basic/Extended SCA handling */
index c77ad2dc334ff7f3b2ca05b104df41d84edf0a6b..46160388e9964ba201a5c61768cc83eaa19eb4bb 100644 (file)
@@ -1185,7 +1185,15 @@ static int handle_sckpf(struct kvm_vcpu *vcpu)
        return 0;
 }
 
+static int handle_ptff(struct kvm_vcpu *vcpu)
+{
+       /* we don't emulate any control instructions yet */
+       kvm_s390_set_psw_cc(vcpu, 3);
+       return 0;
+}
+
 static const intercept_handler_t x01_handlers[256] = {
+       [0x04] = handle_ptff,
        [0x07] = handle_sckpf,
 };
 
index 6895e7b3be123767d4829072ff3ba39b53350cac..c106488b41371b2c8cc188df7ba5e8e5d7c06f31 100644 (file)
@@ -765,13 +765,13 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
 
        srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
        local_irq_disable();
-       kvm_guest_enter();
+       guest_enter_irqoff();
        local_irq_enable();
 
        rc = sie64a(scb_s, vcpu->run->s.regs.gprs);
 
        local_irq_disable();
-       kvm_guest_exit();
+       guest_exit_irqoff();
        local_irq_enable();
        vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
 
index 360c5171ea1a89412d0c308f5c16bfbe36e39e0e..9fcb197aa5cee07a050f0bef1d2b3704dfac7a9d 100644 (file)
@@ -34,8 +34,9 @@
 #include <asm/asm.h>
 #include <asm/kvm_page_track.h>
 
-#define KVM_MAX_VCPUS 255
-#define KVM_SOFT_MAX_VCPUS 160
+#define KVM_MAX_VCPUS 288
+#define KVM_SOFT_MAX_VCPUS 240
+#define KVM_MAX_VCPU_ID 1023
 #define KVM_USER_MEM_SLOTS 509
 /* memory slots that are not exposed to userspace */
 #define KVM_PRIVATE_MEM_SLOTS 3
@@ -598,6 +599,7 @@ struct kvm_vcpu_arch {
        u64 mcg_cap;
        u64 mcg_status;
        u64 mcg_ctl;
+       u64 mcg_ext_ctl;
        u64 *mce_banks;
 
        /* Cache MMIO info */
@@ -681,9 +683,12 @@ struct kvm_arch_memory_slot {
 struct kvm_apic_map {
        struct rcu_head rcu;
        u8 mode;
-       struct kvm_lapic *phys_map[256];
-       /* first index is cluster id second is cpu id in a cluster */
-       struct kvm_lapic *logical_map[16][16];
+       u32 max_apic_id;
+       union {
+               struct kvm_lapic *xapic_flat_map[8];
+               struct kvm_lapic *xapic_cluster_map[16][4];
+       };
+       struct kvm_lapic *phys_map[];
 };
 
 /* Hyper-V emulation context */
@@ -778,6 +783,9 @@ struct kvm_arch {
        u32 ldr_mode;
        struct page *avic_logical_id_table_page;
        struct page *avic_physical_id_table_page;
+
+       bool x2apic_format;
+       bool x2apic_broadcast_quirk_disabled;
 };
 
 struct kvm_vm_stat {
@@ -1008,6 +1016,8 @@ struct kvm_x86_ops {
 
        int (*set_hv_timer)(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc);
        void (*cancel_hv_timer)(struct kvm_vcpu *vcpu);
+
+       void (*setup_mce)(struct kvm_vcpu *vcpu);
 };
 
 struct kvm_arch_async_pf {
@@ -1028,7 +1038,7 @@ void kvm_mmu_setup(struct kvm_vcpu *vcpu);
 void kvm_mmu_init_vm(struct kvm *kvm);
 void kvm_mmu_uninit_vm(struct kvm *kvm);
 void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
-               u64 dirty_mask, u64 nx_mask, u64 x_mask);
+               u64 dirty_mask, u64 nx_mask, u64 x_mask, u64 p_mask);
 
 void kvm_mmu_reset_context(struct kvm_vcpu *vcpu);
 void kvm_mmu_slot_remove_write_access(struct kvm *kvm,
@@ -1082,6 +1092,8 @@ extern u64  kvm_max_tsc_scaling_ratio;
 /* 1ull << kvm_tsc_scaling_ratio_frac_bits */
 extern u64  kvm_default_tsc_scaling_ratio;
 
+extern u64 kvm_mce_cap_supported;
+
 enum emulation_result {
        EMULATE_DONE,         /* no further processing */
        EMULATE_USER_EXIT,    /* kvm_run ready for userspace exit */
@@ -1356,7 +1368,7 @@ bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu);
 bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq,
                             struct kvm_vcpu **dest_vcpu);
 
-void kvm_set_msi_irq(struct kvm_kernel_irq_routing_entry *e,
+void kvm_set_msi_irq(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e,
                     struct kvm_lapic_irq *irq);
 
 static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu)
index dfb4c64768771f8fa21f622eb6046e27e1ab3ed6..25810b144b58d979c5ba9355fc7b0907494f5869 100644 (file)
@@ -110,13 +110,17 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
        return r;
 }
 
-void kvm_set_msi_irq(struct kvm_kernel_irq_routing_entry *e,
+void kvm_set_msi_irq(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e,
                     struct kvm_lapic_irq *irq)
 {
-       trace_kvm_msi_set_irq(e->msi.address_lo, e->msi.data);
+       trace_kvm_msi_set_irq(e->msi.address_lo | (kvm->arch.x2apic_format ?
+                                            (u64)e->msi.address_hi << 32 : 0),
+                             e->msi.data);
 
        irq->dest_id = (e->msi.address_lo &
                        MSI_ADDR_DEST_ID_MASK) >> MSI_ADDR_DEST_ID_SHIFT;
+       if (kvm->arch.x2apic_format)
+               irq->dest_id |= MSI_ADDR_EXT_DEST_ID(e->msi.address_hi);
        irq->vector = (e->msi.data &
                        MSI_DATA_VECTOR_MASK) >> MSI_DATA_VECTOR_SHIFT;
        irq->dest_mode = (1 << MSI_ADDR_DEST_MODE_SHIFT) & e->msi.address_lo;
@@ -129,15 +133,24 @@ void kvm_set_msi_irq(struct kvm_kernel_irq_routing_entry *e,
 }
 EXPORT_SYMBOL_GPL(kvm_set_msi_irq);
 
+static inline bool kvm_msi_route_invalid(struct kvm *kvm,
+               struct kvm_kernel_irq_routing_entry *e)
+{
+       return kvm->arch.x2apic_format && (e->msi.address_hi & 0xff);
+}
+
 int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
                struct kvm *kvm, int irq_source_id, int level, bool line_status)
 {
        struct kvm_lapic_irq irq;
 
+       if (kvm_msi_route_invalid(kvm, e))
+               return -EINVAL;
+
        if (!level)
                return -1;
 
-       kvm_set_msi_irq(e, &irq);
+       kvm_set_msi_irq(kvm, e, &irq);
 
        return kvm_irq_delivery_to_apic(kvm, NULL, &irq, NULL);
 }
@@ -153,7 +166,10 @@ int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *e,
        if (unlikely(e->type != KVM_IRQ_ROUTING_MSI))
                return -EWOULDBLOCK;
 
-       kvm_set_msi_irq(e, &irq);
+       if (kvm_msi_route_invalid(kvm, e))
+               return -EINVAL;
+
+       kvm_set_msi_irq(kvm, e, &irq);
 
        if (kvm_irq_delivery_to_apic_fast(kvm, NULL, &irq, &r, NULL))
                return r;
@@ -248,7 +264,8 @@ static int kvm_hv_set_sint(struct kvm_kernel_irq_routing_entry *e,
        return kvm_hv_synic_set_irq(kvm, e->hv_sint.vcpu, e->hv_sint.sint);
 }
 
-int kvm_set_routing_entry(struct kvm_kernel_irq_routing_entry *e,
+int kvm_set_routing_entry(struct kvm *kvm,
+                         struct kvm_kernel_irq_routing_entry *e,
                          const struct kvm_irq_routing_entry *ue)
 {
        int r = -EINVAL;
@@ -285,6 +302,9 @@ int kvm_set_routing_entry(struct kvm_kernel_irq_routing_entry *e,
                e->msi.address_lo = ue->u.msi.address_lo;
                e->msi.address_hi = ue->u.msi.address_hi;
                e->msi.data = ue->u.msi.data;
+
+               if (kvm_msi_route_invalid(kvm, e))
+                       goto out;
                break;
        case KVM_IRQ_ROUTING_HV_SINT:
                e->set = kvm_hv_set_sint;
@@ -388,21 +408,16 @@ void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu,
                               kvm->arch.nr_reserved_ioapic_pins);
        for (i = 0; i < nr_ioapic_pins; ++i) {
                hlist_for_each_entry(entry, &table->map[i], link) {
-                       u32 dest_id, dest_mode;
-                       bool level;
+                       struct kvm_lapic_irq irq;
 
                        if (entry->type != KVM_IRQ_ROUTING_MSI)
                                continue;
-                       dest_id = (entry->msi.address_lo >> 12) & 0xff;
-                       dest_mode = (entry->msi.address_lo >> 2) & 0x1;
-                       level = entry->msi.data & MSI_DATA_TRIGGER_LEVEL;
-                       if (level && kvm_apic_match_dest(vcpu, NULL, 0,
-                                               dest_id, dest_mode)) {
-                               u32 vector = entry->msi.data & 0xff;
-
-                               __set_bit(vector,
-                                         ioapic_handled_vectors);
-                       }
+
+                       kvm_set_msi_irq(vcpu->kvm, entry, &irq);
+
+                       if (irq.level && kvm_apic_match_dest(vcpu, NULL, 0,
+                                               irq.dest_id, irq.dest_mode))
+                               __set_bit(irq.vector, ioapic_handled_vectors);
                }
        }
        srcu_read_unlock(&kvm->irq_srcu, idx);
index fdc05ae08bac9401f2da25459ba1dca6a3268cb2..6895fd28aae97c8d1b11c908ccfd7162011cc3ab 100644 (file)
@@ -115,26 +115,43 @@ static inline int apic_enabled(struct kvm_lapic *apic)
        (LVT_MASK | APIC_MODE_MASK | APIC_INPUT_POLARITY | \
         APIC_LVT_REMOTE_IRR | APIC_LVT_LEVEL_TRIGGER)
 
-/* The logical map is definitely wrong if we have multiple
- * modes at the same time.  (Physical map is always right.)
- */
-static inline bool kvm_apic_logical_map_valid(struct kvm_apic_map *map)
-{
-       return !(map->mode & (map->mode - 1));
+static inline bool kvm_apic_map_get_logical_dest(struct kvm_apic_map *map,
+               u32 dest_id, struct kvm_lapic ***cluster, u16 *mask) {
+       switch (map->mode) {
+       case KVM_APIC_MODE_X2APIC: {
+               u32 offset = (dest_id >> 16) * 16;
+               u32 max_apic_id = map->max_apic_id;
+
+               if (offset <= max_apic_id) {
+                       u8 cluster_size = min(max_apic_id - offset + 1, 16U);
+
+                       *cluster = &map->phys_map[offset];
+                       *mask = dest_id & (0xffff >> (16 - cluster_size));
+               } else {
+                       *mask = 0;
+               }
+
+               return true;
+               }
+       case KVM_APIC_MODE_XAPIC_FLAT:
+               *cluster = map->xapic_flat_map;
+               *mask = dest_id & 0xff;
+               return true;
+       case KVM_APIC_MODE_XAPIC_CLUSTER:
+               *cluster = map->xapic_cluster_map[dest_id >> 4];
+               *mask = dest_id & 0xf;
+               return true;
+       default:
+               /* Not optimized. */
+               return false;
+       }
 }
 
-static inline void
-apic_logical_id(struct kvm_apic_map *map, u32 dest_id, u16 *cid, u16 *lid)
+static void kvm_apic_map_free(struct rcu_head *rcu)
 {
-       unsigned lid_bits;
-
-       BUILD_BUG_ON(KVM_APIC_MODE_XAPIC_CLUSTER !=  4);
-       BUILD_BUG_ON(KVM_APIC_MODE_XAPIC_FLAT    !=  8);
-       BUILD_BUG_ON(KVM_APIC_MODE_X2APIC        != 16);
-       lid_bits = map->mode;
+       struct kvm_apic_map *map = container_of(rcu, struct kvm_apic_map, rcu);
 
-       *cid = dest_id >> lid_bits;
-       *lid = dest_id & ((1 << lid_bits) - 1);
+       kvfree(map);
 }
 
 static void recalculate_apic_map(struct kvm *kvm)
@@ -142,17 +159,26 @@ static void recalculate_apic_map(struct kvm *kvm)
        struct kvm_apic_map *new, *old = NULL;
        struct kvm_vcpu *vcpu;
        int i;
-
-       new = kzalloc(sizeof(struct kvm_apic_map), GFP_KERNEL);
+       u32 max_id = 255;
 
        mutex_lock(&kvm->arch.apic_map_lock);
 
+       kvm_for_each_vcpu(i, vcpu, kvm)
+               if (kvm_apic_present(vcpu))
+                       max_id = max(max_id, kvm_apic_id(vcpu->arch.apic));
+
+       new = kvm_kvzalloc(sizeof(struct kvm_apic_map) +
+                          sizeof(struct kvm_lapic *) * ((u64)max_id + 1));
+
        if (!new)
                goto out;
 
+       new->max_apic_id = max_id;
+
        kvm_for_each_vcpu(i, vcpu, kvm) {
                struct kvm_lapic *apic = vcpu->arch.apic;
-               u16 cid, lid;
+               struct kvm_lapic **cluster;
+               u16 mask;
                u32 ldr, aid;
 
                if (!kvm_apic_present(vcpu))
@@ -161,7 +187,7 @@ static void recalculate_apic_map(struct kvm *kvm)
                aid = kvm_apic_id(apic);
                ldr = kvm_lapic_get_reg(apic, APIC_LDR);
 
-               if (aid < ARRAY_SIZE(new->phys_map))
+               if (aid <= new->max_apic_id)
                        new->phys_map[aid] = apic;
 
                if (apic_x2apic_mode(apic)) {
@@ -174,13 +200,11 @@ static void recalculate_apic_map(struct kvm *kvm)
                                new->mode |= KVM_APIC_MODE_XAPIC_CLUSTER;
                }
 
-               if (!kvm_apic_logical_map_valid(new))
+               if (!kvm_apic_map_get_logical_dest(new, ldr, &cluster, &mask))
                        continue;
 
-               apic_logical_id(new, ldr, &cid, &lid);
-
-               if (lid && cid < ARRAY_SIZE(new->logical_map))
-                       new->logical_map[cid][ffs(lid) - 1] = apic;
+               if (mask)
+                       cluster[ffs(mask) - 1] = apic;
        }
 out:
        old = rcu_dereference_protected(kvm->arch.apic_map,
@@ -189,7 +213,7 @@ static void recalculate_apic_map(struct kvm *kvm)
        mutex_unlock(&kvm->arch.apic_map_lock);
 
        if (old)
-               kfree_rcu(old, rcu);
+               call_rcu(&old->rcu, kvm_apic_map_free);
 
        kvm_make_scan_ioapic_request(kvm);
 }
@@ -210,7 +234,7 @@ static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val)
        }
 }
 
-static inline void kvm_apic_set_id(struct kvm_lapic *apic, u8 id)
+static inline void kvm_apic_set_xapic_id(struct kvm_lapic *apic, u8 id)
 {
        kvm_lapic_set_reg(apic, APIC_ID, id << 24);
        recalculate_apic_map(apic->vcpu->kvm);
@@ -222,11 +246,11 @@ static inline void kvm_apic_set_ldr(struct kvm_lapic *apic, u32 id)
        recalculate_apic_map(apic->vcpu->kvm);
 }
 
-static inline void kvm_apic_set_x2apic_id(struct kvm_lapic *apic, u8 id)
+static inline void kvm_apic_set_x2apic_id(struct kvm_lapic *apic, u32 id)
 {
        u32 ldr = ((id >> 4) << 16) | (1 << (id & 0xf));
 
-       kvm_lapic_set_reg(apic, APIC_ID, id << 24);
+       kvm_lapic_set_reg(apic, APIC_ID, id);
        kvm_lapic_set_reg(apic, APIC_LDR, ldr);
        recalculate_apic_map(apic->vcpu->kvm);
 }
@@ -599,17 +623,30 @@ static bool kvm_apic_match_logical_addr(struct kvm_lapic *apic, u32 mda)
        }
 }
 
-/* KVM APIC implementation has two quirks
- *  - dest always begins at 0 while xAPIC MDA has offset 24,
- *  - IOxAPIC messages have to be delivered (directly) to x2APIC.
+/* The KVM local APIC implementation has two quirks:
+ *
+ *  - the xAPIC MDA stores the destination at bits 24-31, while this
+ *    is not true of struct kvm_lapic_irq's dest_id field.  This is
+ *    just a quirk in the API and is not problematic.
+ *
+ *  - in-kernel IOAPIC messages have to be delivered directly to
+ *    x2APIC, because the kernel does not support interrupt remapping.
+ *    In order to support broadcast without interrupt remapping, x2APIC
+ *    rewrites the destination of non-IPI messages from APIC_BROADCAST
+ *    to X2APIC_BROADCAST.
+ *
+ * The broadcast quirk can be disabled with KVM_CAP_X2APIC_API.  This is
+ * important when userspace wants to use x2APIC-format MSIs, because
+ * APIC_BROADCAST (0xff) is a legal route for "cluster 0, CPUs 0-7".
  */
-static u32 kvm_apic_mda(unsigned int dest_id, struct kvm_lapic *source,
-                                              struct kvm_lapic *target)
+static u32 kvm_apic_mda(struct kvm_vcpu *vcpu, unsigned int dest_id,
+               struct kvm_lapic *source, struct kvm_lapic *target)
 {
        bool ipi = source != NULL;
        bool x2apic_mda = apic_x2apic_mode(ipi ? source : target);
 
-       if (!ipi && dest_id == APIC_BROADCAST && x2apic_mda)
+       if (!vcpu->kvm->arch.x2apic_broadcast_quirk_disabled &&
+           !ipi && dest_id == APIC_BROADCAST && x2apic_mda)
                return X2APIC_BROADCAST;
 
        return x2apic_mda ? dest_id : SET_APIC_DEST_FIELD(dest_id);
@@ -619,7 +656,7 @@ bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
                           int short_hand, unsigned int dest, int dest_mode)
 {
        struct kvm_lapic *target = vcpu->arch.apic;
-       u32 mda = kvm_apic_mda(dest, source, target);
+       u32 mda = kvm_apic_mda(vcpu, dest, source, target);
 
        apic_debug("target %p, source %p, dest 0x%x, "
                   "dest_mode 0x%x, short_hand 0x%x\n",
@@ -671,102 +708,126 @@ static void kvm_apic_disabled_lapic_found(struct kvm *kvm)
        }
 }
 
-bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
-               struct kvm_lapic_irq *irq, int *r, struct dest_map *dest_map)
+static bool kvm_apic_is_broadcast_dest(struct kvm *kvm, struct kvm_lapic **src,
+               struct kvm_lapic_irq *irq, struct kvm_apic_map *map)
 {
-       struct kvm_apic_map *map;
-       unsigned long bitmap = 1;
-       struct kvm_lapic **dst;
-       int i;
-       bool ret, x2apic_ipi;
+       if (kvm->arch.x2apic_broadcast_quirk_disabled) {
+               if ((irq->dest_id == APIC_BROADCAST &&
+                               map->mode != KVM_APIC_MODE_X2APIC))
+                       return true;
+               if (irq->dest_id == X2APIC_BROADCAST)
+                       return true;
+       } else {
+               bool x2apic_ipi = src && *src && apic_x2apic_mode(*src);
+               if (irq->dest_id == (x2apic_ipi ?
+                                    X2APIC_BROADCAST : APIC_BROADCAST))
+                       return true;
+       }
 
-       *r = -1;
+       return false;
+}
 
-       if (irq->shorthand == APIC_DEST_SELF) {
-               *r = kvm_apic_set_irq(src->vcpu, irq, dest_map);
-               return true;
-       }
+/* Return true if the interrupt can be handled by using *bitmap as index mask
+ * for valid destinations in *dst array.
+ * Return false if kvm_apic_map_get_dest_lapic did nothing useful.
+ * Note: we may have zero kvm_lapic destinations when we return true, which
+ * means that the interrupt should be dropped.  In this case, *bitmap would be
+ * zero and *dst undefined.
+ */
+static inline bool kvm_apic_map_get_dest_lapic(struct kvm *kvm,
+               struct kvm_lapic **src, struct kvm_lapic_irq *irq,
+               struct kvm_apic_map *map, struct kvm_lapic ***dst,
+               unsigned long *bitmap)
+{
+       int i, lowest;
 
-       if (irq->shorthand)
+       if (irq->shorthand == APIC_DEST_SELF && src) {
+               *dst = src;
+               *bitmap = 1;
+               return true;
+       } else if (irq->shorthand)
                return false;
 
-       x2apic_ipi = src && apic_x2apic_mode(src);
-       if (irq->dest_id == (x2apic_ipi ? X2APIC_BROADCAST : APIC_BROADCAST))
+       if (!map || kvm_apic_is_broadcast_dest(kvm, src, irq, map))
                return false;
 
-       ret = true;
-       rcu_read_lock();
-       map = rcu_dereference(kvm->arch.apic_map);
-
-       if (!map) {
-               ret = false;
-               goto out;
+       if (irq->dest_mode == APIC_DEST_PHYSICAL) {
+               if (irq->dest_id > map->max_apic_id) {
+                       *bitmap = 0;
+               } else {
+                       *dst = &map->phys_map[irq->dest_id];
+                       *bitmap = 1;
+               }
+               return true;
        }
 
-       if (irq->dest_mode == APIC_DEST_PHYSICAL) {
-               if (irq->dest_id >= ARRAY_SIZE(map->phys_map))
-                       goto out;
+       *bitmap = 0;
+       if (!kvm_apic_map_get_logical_dest(map, irq->dest_id, dst,
+                               (u16 *)bitmap))
+               return false;
 
-               dst = &map->phys_map[irq->dest_id];
-       } else {
-               u16 cid;
+       if (!kvm_lowest_prio_delivery(irq))
+               return true;
 
-               if (!kvm_apic_logical_map_valid(map)) {
-                       ret = false;
-                       goto out;
+       if (!kvm_vector_hashing_enabled()) {
+               lowest = -1;
+               for_each_set_bit(i, bitmap, 16) {
+                       if (!(*dst)[i])
+                               continue;
+                       if (lowest < 0)
+                               lowest = i;
+                       else if (kvm_apic_compare_prio((*dst)[i]->vcpu,
+                                               (*dst)[lowest]->vcpu) < 0)
+                               lowest = i;
                }
+       } else {
+               if (!*bitmap)
+                       return true;
 
-               apic_logical_id(map, irq->dest_id, &cid, (u16 *)&bitmap);
+               lowest = kvm_vector_to_index(irq->vector, hweight16(*bitmap),
+                               bitmap, 16);
 
-               if (cid >= ARRAY_SIZE(map->logical_map))
-                       goto out;
+               if (!(*dst)[lowest]) {
+                       kvm_apic_disabled_lapic_found(kvm);
+                       *bitmap = 0;
+                       return true;
+               }
+       }
 
-               dst = map->logical_map[cid];
+       *bitmap = (lowest >= 0) ? 1 << lowest : 0;
 
-               if (!kvm_lowest_prio_delivery(irq))
-                       goto set_irq;
+       return true;
+}
 
-               if (!kvm_vector_hashing_enabled()) {
-                       int l = -1;
-                       for_each_set_bit(i, &bitmap, 16) {
-                               if (!dst[i])
-                                       continue;
-                               if (l < 0)
-                                       l = i;
-                               else if (kvm_apic_compare_prio(dst[i]->vcpu,
-                                                       dst[l]->vcpu) < 0)
-                                       l = i;
-                       }
-                       bitmap = (l >= 0) ? 1 << l : 0;
-               } else {
-                       int idx;
-                       unsigned int dest_vcpus;
+bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
+               struct kvm_lapic_irq *irq, int *r, struct dest_map *dest_map)
+{
+       struct kvm_apic_map *map;
+       unsigned long bitmap;
+       struct kvm_lapic **dst = NULL;
+       int i;
+       bool ret;
 
-                       dest_vcpus = hweight16(bitmap);
-                       if (dest_vcpus == 0)
-                               goto out;
+       *r = -1;
 
-                       idx = kvm_vector_to_index(irq->vector,
-                               dest_vcpus, &bitmap, 16);
+       if (irq->shorthand == APIC_DEST_SELF) {
+               *r = kvm_apic_set_irq(src->vcpu, irq, dest_map);
+               return true;
+       }
 
-                       if (!dst[idx]) {
-                               kvm_apic_disabled_lapic_found(kvm);
-                               goto out;
-                       }
+       rcu_read_lock();
+       map = rcu_dereference(kvm->arch.apic_map);
 
-                       bitmap = (idx >= 0) ? 1 << idx : 0;
+       ret = kvm_apic_map_get_dest_lapic(kvm, &src, irq, map, &dst, &bitmap);
+       if (ret)
+               for_each_set_bit(i, &bitmap, 16) {
+                       if (!dst[i])
+                               continue;
+                       if (*r < 0)
+                               *r = 0;
+                       *r += kvm_apic_set_irq(dst[i]->vcpu, irq, dest_map);
                }
-       }
 
-set_irq:
-       for_each_set_bit(i, &bitmap, 16) {
-               if (!dst[i])
-                       continue;
-               if (*r < 0)
-                       *r = 0;
-               *r += kvm_apic_set_irq(dst[i]->vcpu, irq, dest_map);
-       }
-out:
        rcu_read_unlock();
        return ret;
 }
@@ -789,8 +850,9 @@ bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq,
                        struct kvm_vcpu **dest_vcpu)
 {
        struct kvm_apic_map *map;
+       unsigned long bitmap;
+       struct kvm_lapic **dst = NULL;
        bool ret = false;
-       struct kvm_lapic *dst = NULL;
 
        if (irq->shorthand)
                return false;
@@ -798,69 +860,16 @@ bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq,
        rcu_read_lock();
        map = rcu_dereference(kvm->arch.apic_map);
 
-       if (!map)
-               goto out;
-
-       if (irq->dest_mode == APIC_DEST_PHYSICAL) {
-               if (irq->dest_id == 0xFF)
-                       goto out;
-
-               if (irq->dest_id >= ARRAY_SIZE(map->phys_map))
-                       goto out;
-
-               dst = map->phys_map[irq->dest_id];
-               if (dst && kvm_apic_present(dst->vcpu))
-                       *dest_vcpu = dst->vcpu;
-               else
-                       goto out;
-       } else {
-               u16 cid;
-               unsigned long bitmap = 1;
-               int i, r = 0;
-
-               if (!kvm_apic_logical_map_valid(map))
-                       goto out;
-
-               apic_logical_id(map, irq->dest_id, &cid, (u16 *)&bitmap);
-
-               if (cid >= ARRAY_SIZE(map->logical_map))
-                       goto out;
-
-               if (kvm_vector_hashing_enabled() &&
-                               kvm_lowest_prio_delivery(irq)) {
-                       int idx;
-                       unsigned int dest_vcpus;
+       if (kvm_apic_map_get_dest_lapic(kvm, NULL, irq, map, &dst, &bitmap) &&
+                       hweight16(bitmap) == 1) {
+               unsigned long i = find_first_bit(&bitmap, 16);
 
-                       dest_vcpus = hweight16(bitmap);
-                       if (dest_vcpus == 0)
-                               goto out;
-
-                       idx = kvm_vector_to_index(irq->vector, dest_vcpus,
-                                                 &bitmap, 16);
-
-                       dst = map->logical_map[cid][idx];
-                       if (!dst) {
-                               kvm_apic_disabled_lapic_found(kvm);
-                               goto out;
-                       }
-
-                       *dest_vcpu = dst->vcpu;
-               } else {
-                       for_each_set_bit(i, &bitmap, 16) {
-                               dst = map->logical_map[cid][i];
-                               if (++r == 2)
-                                       goto out;
-                       }
-
-                       if (dst && kvm_apic_present(dst->vcpu))
-                               *dest_vcpu = dst->vcpu;
-                       else
-                               goto out;
+               if (dst[i]) {
+                       *dest_vcpu = dst[i]->vcpu;
+                       ret = true;
                }
        }
 
-       ret = true;
-out:
        rcu_read_unlock();
        return ret;
 }
@@ -1127,12 +1136,6 @@ static u32 __apic_read(struct kvm_lapic *apic, unsigned int offset)
                return 0;
 
        switch (offset) {
-       case APIC_ID:
-               if (apic_x2apic_mode(apic))
-                       val = kvm_apic_id(apic);
-               else
-                       val = kvm_apic_id(apic) << 24;
-               break;
        case APIC_ARBPRI:
                apic_debug("Access APIC ARBPRI register which is for P6\n");
                break;
@@ -1349,41 +1352,52 @@ bool kvm_lapic_hv_timer_in_use(struct kvm_vcpu *vcpu)
 }
 EXPORT_SYMBOL_GPL(kvm_lapic_hv_timer_in_use);
 
+static void cancel_hv_tscdeadline(struct kvm_lapic *apic)
+{
+       kvm_x86_ops->cancel_hv_timer(apic->vcpu);
+       apic->lapic_timer.hv_timer_in_use = false;
+}
+
 void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu)
 {
        struct kvm_lapic *apic = vcpu->arch.apic;
 
        WARN_ON(!apic->lapic_timer.hv_timer_in_use);
        WARN_ON(swait_active(&vcpu->wq));
-       kvm_x86_ops->cancel_hv_timer(vcpu);
-       apic->lapic_timer.hv_timer_in_use = false;
+       cancel_hv_tscdeadline(apic);
        apic_timer_expired(apic);
 }
 EXPORT_SYMBOL_GPL(kvm_lapic_expired_hv_timer);
 
+static bool start_hv_tscdeadline(struct kvm_lapic *apic)
+{
+       u64 tscdeadline = apic->lapic_timer.tscdeadline;
+
+       if (atomic_read(&apic->lapic_timer.pending) ||
+               kvm_x86_ops->set_hv_timer(apic->vcpu, tscdeadline)) {
+               if (apic->lapic_timer.hv_timer_in_use)
+                       cancel_hv_tscdeadline(apic);
+       } else {
+               apic->lapic_timer.hv_timer_in_use = true;
+               hrtimer_cancel(&apic->lapic_timer.timer);
+
+               /* In case the sw timer triggered in the window */
+               if (atomic_read(&apic->lapic_timer.pending))
+                       cancel_hv_tscdeadline(apic);
+       }
+       trace_kvm_hv_timer_state(apic->vcpu->vcpu_id,
+                       apic->lapic_timer.hv_timer_in_use);
+       return apic->lapic_timer.hv_timer_in_use;
+}
+
 void kvm_lapic_switch_to_hv_timer(struct kvm_vcpu *vcpu)
 {
        struct kvm_lapic *apic = vcpu->arch.apic;
 
        WARN_ON(apic->lapic_timer.hv_timer_in_use);
 
-       if (apic_lvtt_tscdeadline(apic) &&
-           !atomic_read(&apic->lapic_timer.pending)) {
-               u64 tscdeadline = apic->lapic_timer.tscdeadline;
-
-               if (!kvm_x86_ops->set_hv_timer(vcpu, tscdeadline)) {
-                       apic->lapic_timer.hv_timer_in_use = true;
-                       hrtimer_cancel(&apic->lapic_timer.timer);
-
-                       /* In case the sw timer triggered in the window */
-                       if (atomic_read(&apic->lapic_timer.pending)) {
-                               apic->lapic_timer.hv_timer_in_use = false;
-                               kvm_x86_ops->cancel_hv_timer(apic->vcpu);
-                       }
-               }
-               trace_kvm_hv_timer_state(vcpu->vcpu_id,
-                               apic->lapic_timer.hv_timer_in_use);
-       }
+       if (apic_lvtt_tscdeadline(apic))
+               start_hv_tscdeadline(apic);
 }
 EXPORT_SYMBOL_GPL(kvm_lapic_switch_to_hv_timer);
 
@@ -1395,8 +1409,7 @@ void kvm_lapic_switch_to_sw_timer(struct kvm_vcpu *vcpu)
        if (!apic->lapic_timer.hv_timer_in_use)
                return;
 
-       kvm_x86_ops->cancel_hv_timer(vcpu);
-       apic->lapic_timer.hv_timer_in_use = false;
+       cancel_hv_tscdeadline(apic);
 
        if (atomic_read(&apic->lapic_timer.pending))
                return;
@@ -1451,15 +1464,7 @@ static void start_apic_timer(struct kvm_lapic *apic)
                           ktime_to_ns(ktime_add_ns(now,
                                        apic->lapic_timer.period)));
        } else if (apic_lvtt_tscdeadline(apic)) {
-               /* lapic timer in tsc deadline mode */
-               u64 tscdeadline = apic->lapic_timer.tscdeadline;
-
-               if (kvm_x86_ops->set_hv_timer &&
-                   !kvm_x86_ops->set_hv_timer(apic->vcpu, tscdeadline)) {
-                       apic->lapic_timer.hv_timer_in_use = true;
-                       trace_kvm_hv_timer_state(apic->vcpu->vcpu_id,
-                                       apic->lapic_timer.hv_timer_in_use);
-               } else
+               if (!(kvm_x86_ops->set_hv_timer && start_hv_tscdeadline(apic)))
                        start_sw_tscdeadline(apic);
        }
 }
@@ -1488,7 +1493,7 @@ int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
        switch (reg) {
        case APIC_ID:           /* Local APIC ID */
                if (!apic_x2apic_mode(apic))
-                       kvm_apic_set_id(apic, val >> 24);
+                       kvm_apic_set_xapic_id(apic, val >> 24);
                else
                        ret = 1;
                break;
@@ -1749,9 +1754,10 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
 
        /* update jump label if enable bit changes */
        if ((old_value ^ value) & MSR_IA32_APICBASE_ENABLE) {
-               if (value & MSR_IA32_APICBASE_ENABLE)
+               if (value & MSR_IA32_APICBASE_ENABLE) {
+                       kvm_apic_set_xapic_id(apic, vcpu->vcpu_id);
                        static_key_slow_dec_deferred(&apic_hw_disabled);
-               else
+               else
                        static_key_slow_inc(&apic_hw_disabled.key);
                recalculate_apic_map(vcpu->kvm);
        }
@@ -1791,8 +1797,11 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event)
        /* Stop the timer in case it's a reset to an active apic */
        hrtimer_cancel(&apic->lapic_timer.timer);
 
-       if (!init_event)
-               kvm_apic_set_id(apic, vcpu->vcpu_id);
+       if (!init_event) {
+               kvm_lapic_set_base(vcpu, APIC_DEFAULT_PHYS_BASE |
+                                        MSR_IA32_APICBASE_ENABLE);
+               kvm_apic_set_xapic_id(apic, vcpu->vcpu_id);
+       }
        kvm_apic_set_version(apic->vcpu);
 
        for (i = 0; i < KVM_APIC_LVT_NUM; i++)
@@ -1931,9 +1940,6 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu)
         * thinking that APIC satet has changed.
         */
        vcpu->arch.apic_base = MSR_IA32_APICBASE_ENABLE;
-       kvm_lapic_set_base(vcpu,
-                       APIC_DEFAULT_PHYS_BASE | MSR_IA32_APICBASE_ENABLE);
-
        static_key_slow_inc(&apic_sw_disabled.key); /* sw disabled at reset */
        kvm_lapic_reset(vcpu, false);
        kvm_iodevice_init(&apic->dev, &apic_mmio_ops);
@@ -2013,17 +2019,48 @@ int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu)
        return vector;
 }
 
-void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu,
-               struct kvm_lapic_state *s)
+static int kvm_apic_state_fixup(struct kvm_vcpu *vcpu,
+               struct kvm_lapic_state *s, bool set)
+{
+       if (apic_x2apic_mode(vcpu->arch.apic)) {
+               u32 *id = (u32 *)(s->regs + APIC_ID);
+
+               if (vcpu->kvm->arch.x2apic_format) {
+                       if (*id != vcpu->vcpu_id)
+                               return -EINVAL;
+               } else {
+                       if (set)
+                               *id >>= 24;
+                       else
+                               *id <<= 24;
+               }
+       }
+
+       return 0;
+}
+
+int kvm_apic_get_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s)
+{
+       memcpy(s->regs, vcpu->arch.apic->regs, sizeof(*s));
+       return kvm_apic_state_fixup(vcpu, s, false);
+}
+
+int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s)
 {
        struct kvm_lapic *apic = vcpu->arch.apic;
+       int r;
+
 
        kvm_lapic_set_base(vcpu, vcpu->arch.apic_base);
        /* set SPIV separately to get count of SW disabled APICs right */
        apic_set_spiv(apic, *((u32 *)(s->regs + APIC_SPIV)));
+
+       r = kvm_apic_state_fixup(vcpu, s, true);
+       if (r)
+               return r;
        memcpy(vcpu->arch.apic->regs, s->regs, sizeof *s);
-       /* call kvm_apic_set_id() to put apic into apic_map */
-       kvm_apic_set_id(apic, kvm_apic_id(apic));
+
+       recalculate_apic_map(vcpu->kvm);
        kvm_apic_set_version(vcpu);
 
        apic_update_ppr(apic);
@@ -2049,6 +2086,8 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu,
                kvm_rtc_eoi_tracking_restore_one(vcpu);
 
        vcpu->arch.apic_arb_prio = 0;
+
+       return 0;
 }
 
 void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu)
index 336ba51bb16ecc98d5c330bdb016c02fca9efd9f..f60d01c29d5107a49b37713addffed27c30097be 100644 (file)
@@ -81,8 +81,8 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
 
 u64 kvm_get_apic_base(struct kvm_vcpu *vcpu);
 int kvm_set_apic_base(struct kvm_vcpu *vcpu, struct msr_data *msr_info);
-void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu,
-               struct kvm_lapic_state *s);
+int kvm_apic_get_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s);
+int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s);
 int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu);
 
 u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu);
@@ -200,9 +200,15 @@ static inline int kvm_lapic_latched_init(struct kvm_vcpu *vcpu)
        return lapic_in_kernel(vcpu) && test_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events);
 }
 
-static inline int kvm_apic_id(struct kvm_lapic *apic)
+static inline u32 kvm_apic_id(struct kvm_lapic *apic)
 {
-       return (kvm_lapic_get_reg(apic, APIC_ID) >> 24) & 0xff;
+       /* To avoid a race between apic_base and following APIC_ID update when
+        * switching to x2apic_mode, the x2apic mode returns initial x2apic id.
+        */
+       if (apic_x2apic_mode(apic))
+               return apic->vcpu->vcpu_id;
+
+       return kvm_lapic_get_reg(apic, APIC_ID) >> 24;
 }
 
 bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector);
index 837bf23c5b067c02a9ca532408495c67012b073d..3041902ec827d851f87c91b74aa9cd22022a8629 100644 (file)
@@ -175,6 +175,7 @@ static u64 __read_mostly shadow_user_mask;
 static u64 __read_mostly shadow_accessed_mask;
 static u64 __read_mostly shadow_dirty_mask;
 static u64 __read_mostly shadow_mmio_mask;
+static u64 __read_mostly shadow_present_mask;
 
 static void mmu_spte_set(u64 *sptep, u64 spte);
 static void mmu_free_roots(struct kvm_vcpu *vcpu);
@@ -282,13 +283,14 @@ static bool check_mmio_spte(struct kvm_vcpu *vcpu, u64 spte)
 }
 
 void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
-               u64 dirty_mask, u64 nx_mask, u64 x_mask)
+               u64 dirty_mask, u64 nx_mask, u64 x_mask, u64 p_mask)
 {
        shadow_user_mask = user_mask;
        shadow_accessed_mask = accessed_mask;
        shadow_dirty_mask = dirty_mask;
        shadow_nx_mask = nx_mask;
        shadow_x_mask = x_mask;
+       shadow_present_mask = p_mask;
 }
 EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes);
 
@@ -304,7 +306,7 @@ static int is_nx(struct kvm_vcpu *vcpu)
 
 static int is_shadow_present_pte(u64 pte)
 {
-       return pte & PT_PRESENT_MASK && !is_mmio_spte(pte);
+       return (pte & 0xFFFFFFFFull) && !is_mmio_spte(pte);
 }
 
 static int is_large_pte(u64 pte)
@@ -2245,10 +2247,9 @@ static void link_shadow_page(struct kvm_vcpu *vcpu, u64 *sptep,
 {
        u64 spte;
 
-       BUILD_BUG_ON(VMX_EPT_READABLE_MASK != PT_PRESENT_MASK ||
-                       VMX_EPT_WRITABLE_MASK != PT_WRITABLE_MASK);
+       BUILD_BUG_ON(VMX_EPT_WRITABLE_MASK != PT_WRITABLE_MASK);
 
-       spte = __pa(sp->spt) | PT_PRESENT_MASK | PT_WRITABLE_MASK |
+       spte = __pa(sp->spt) | shadow_present_mask | PT_WRITABLE_MASK |
               shadow_user_mask | shadow_x_mask | shadow_accessed_mask;
 
        mmu_spte_set(sptep, spte);
@@ -2515,13 +2516,19 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
                    gfn_t gfn, kvm_pfn_t pfn, bool speculative,
                    bool can_unsync, bool host_writable)
 {
-       u64 spte;
+       u64 spte = 0;
        int ret = 0;
 
        if (set_mmio_spte(vcpu, sptep, gfn, pfn, pte_access))
                return 0;
 
-       spte = PT_PRESENT_MASK;
+       /*
+        * For the EPT case, shadow_present_mask is 0 if hardware
+        * supports exec-only page table entries.  In that case,
+        * ACC_USER_MASK and shadow_user_mask are used to represent
+        * read access.  See FNAME(gpte_access) in paging_tmpl.h.
+        */
+       spte |= shadow_present_mask;
        if (!speculative)
                spte |= shadow_accessed_mask;
 
@@ -3189,7 +3196,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
                MMU_WARN_ON(VALID_PAGE(root));
                if (vcpu->arch.mmu.root_level == PT32E_ROOT_LEVEL) {
                        pdptr = vcpu->arch.mmu.get_pdptr(vcpu, i);
-                       if (!is_present_gpte(pdptr)) {
+                       if (!(pdptr & PT_PRESENT_MASK)) {
                                vcpu->arch.mmu.pae_root[i] = 0;
                                continue;
                        }
@@ -3914,9 +3921,7 @@ static void update_permission_bitmask(struct kvm_vcpu *vcpu,
                                 *   clearer.
                                 */
                                smap = cr4_smap && u && !uf && !ff;
-                       } else
-                               /* Not really needed: no U/S accesses on ept  */
-                               u = 1;
+                       }
 
                        fault = (ff && !x) || (uf && !u) || (wf && !w) ||
                                (smapf && smap);
index 66b33b96a31b47138c9fef94c20e034d07678cb7..ddc56e91f2e491ff26631ccc432b2cf15cec05a0 100644 (file)
@@ -93,11 +93,6 @@ static inline int kvm_mmu_reload(struct kvm_vcpu *vcpu)
        return kvm_mmu_load(vcpu);
 }
 
-static inline int is_present_gpte(unsigned long pte)
-{
-       return pte & PT_PRESENT_MASK;
-}
-
 /*
  * Currently, we have two sorts of write-protection, a) the first one
  * write-protects guest page to sync the guest modification, b) another one is
index bc019f70e0b6bb374d851bdb3be32ea09e49768b..a01105485315ab56faffb79489f2eb55ba117c29 100644 (file)
@@ -131,7 +131,7 @@ static inline void FNAME(protect_clean_gpte)(unsigned *access, unsigned gpte)
 static inline int FNAME(is_present_gpte)(unsigned long pte)
 {
 #if PTTYPE != PTTYPE_EPT
-       return is_present_gpte(pte);
+       return pte & PT_PRESENT_MASK;
 #else
        return pte & 7;
 #endif
@@ -181,13 +181,19 @@ static bool FNAME(prefetch_invalid_gpte)(struct kvm_vcpu *vcpu,
        return true;
 }
 
+/*
+ * For PTTYPE_EPT, a page table can be executable but not readable
+ * on supported processors. Therefore, set_spte does not automatically
+ * set bit 0 if execute only is supported. Here, we repurpose ACC_USER_MASK
+ * to signify readability since it isn't used in the EPT case
+ */
 static inline unsigned FNAME(gpte_access)(struct kvm_vcpu *vcpu, u64 gpte)
 {
        unsigned access;
 #if PTTYPE == PTTYPE_EPT
        access = ((gpte & VMX_EPT_WRITABLE_MASK) ? ACC_WRITE_MASK : 0) |
                ((gpte & VMX_EPT_EXECUTABLE_MASK) ? ACC_EXEC_MASK : 0) |
-               ACC_USER_MASK;
+               ((gpte & VMX_EPT_READABLE_MASK) ? ACC_USER_MASK : 0);
 #else
        BUILD_BUG_ON(ACC_EXEC_MASK != PT_PRESENT_MASK);
        BUILD_BUG_ON(ACC_EXEC_MASK != 1);
index 5ff2927781100f08150553bc051f77c6eaaa7aaa..5bfdbbf1ce793df870326dc7eaa609ec17a6a2f6 100644 (file)
@@ -4935,6 +4935,12 @@ static int svm_check_intercept(struct kvm_vcpu *vcpu,
 static void svm_handle_external_intr(struct kvm_vcpu *vcpu)
 {
        local_irq_enable();
+       /*
+        * We must have an instruction with interrupts enabled, so
+        * the timer interrupt isn't delayed by the interrupt shadow.
+        */
+       asm("nop");
+       local_irq_disable();
 }
 
 static void svm_sched_in(struct kvm_vcpu *vcpu, int cpu)
index e185649fb8b72c2a1bbac60edd8eed0540cc98a2..b61cdadf8623d6ae807b7473d51213eb1eca50d3 100644 (file)
@@ -428,7 +428,6 @@ struct nested_vmx {
        struct pi_desc *pi_desc;
        bool pi_pending;
        u16 posted_intr_nv;
-       u64 msr_ia32_feature_control;
 
        struct hrtimer preemption_timer;
        bool preemption_timer_expired;
@@ -612,6 +611,14 @@ struct vcpu_vmx {
        bool guest_pkru_valid;
        u32 guest_pkru;
        u32 host_pkru;
+
+       /*
+        * Only bits masked by msr_ia32_feature_control_valid_bits can be set in
+        * msr_ia32_feature_control. FEATURE_CONTROL_LOCKED is always included
+        * in msr_ia32_feature_control_valid_bits.
+        */
+       u64 msr_ia32_feature_control;
+       u64 msr_ia32_feature_control_valid_bits;
 };
 
 enum segment_cache_field {
@@ -1105,7 +1112,7 @@ static inline bool cpu_has_broken_vmx_preemption_timer(void)
 
        /* Clear the reserved bits */
        eax &= ~(0x3U << 14 | 0xfU << 28);
-       for (i = 0; i < sizeof(vmx_preemption_cpu_tfms)/sizeof(u32); i++)
+       for (i = 0; i < ARRAY_SIZE(vmx_preemption_cpu_tfms); i++)
                if (eax == vmx_preemption_cpu_tfms[i])
                        return true;
 
@@ -1114,9 +1121,6 @@ static inline bool cpu_has_broken_vmx_preemption_timer(void)
 
 static inline bool cpu_has_vmx_preemption_timer(void)
 {
-       if (cpu_has_broken_vmx_preemption_timer())
-               return false;
-
        return vmcs_config.pin_based_exec_ctrl &
                PIN_BASED_VMX_PREEMPTION_TIMER;
 }
@@ -1668,6 +1672,11 @@ static __always_inline void vmcs_set_bits(unsigned long field, u32 mask)
        __vmcs_writel(field, __vmcs_readl(field) | mask);
 }
 
+static inline void vm_entry_controls_reset_shadow(struct vcpu_vmx *vmx)
+{
+       vmx->vm_entry_controls_shadow = vmcs_read32(VM_ENTRY_CONTROLS);
+}
+
 static inline void vm_entry_controls_init(struct vcpu_vmx *vmx, u32 val)
 {
        vmcs_write32(VM_ENTRY_CONTROLS, val);
@@ -1696,6 +1705,11 @@ static inline void vm_entry_controls_clearbit(struct vcpu_vmx *vmx, u32 val)
        vm_entry_controls_set(vmx, vm_entry_controls_get(vmx) & ~val);
 }
 
+static inline void vm_exit_controls_reset_shadow(struct vcpu_vmx *vmx)
+{
+       vmx->vm_exit_controls_shadow = vmcs_read32(VM_EXIT_CONTROLS);
+}
+
 static inline void vm_exit_controls_init(struct vcpu_vmx *vmx, u32 val)
 {
        vmcs_write32(VM_EXIT_CONTROLS, val);
@@ -2780,6 +2794,9 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
                vmx->nested.nested_vmx_ept_caps = VMX_EPT_PAGE_WALK_4_BIT |
                         VMX_EPTP_WB_BIT | VMX_EPT_2MB_PAGE_BIT |
                         VMX_EPT_INVEPT_BIT;
+               if (cpu_has_vmx_ept_execute_only())
+                       vmx->nested.nested_vmx_ept_caps |=
+                               VMX_EPT_EXECUTE_ONLY_BIT;
                vmx->nested.nested_vmx_ept_caps &= vmx_capability.ept;
                /*
                 * For nested guests, we don't do anything specific
@@ -2928,6 +2945,14 @@ static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
        return 0;
 }
 
+static inline bool vmx_feature_control_msr_valid(struct kvm_vcpu *vcpu,
+                                                uint64_t val)
+{
+       uint64_t valid_bits = to_vmx(vcpu)->msr_ia32_feature_control_valid_bits;
+
+       return !(val & ~valid_bits);
+}
+
 /*
  * Reads an msr value (of 'msr_index') into 'pdata'.
  * Returns 0 on success, non-0 otherwise.
@@ -2969,10 +2994,15 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
                        return 1;
                msr_info->data = vmcs_read64(GUEST_BNDCFGS);
                break;
-       case MSR_IA32_FEATURE_CONTROL:
-               if (!nested_vmx_allowed(vcpu))
+       case MSR_IA32_MCG_EXT_CTL:
+               if (!msr_info->host_initiated &&
+                   !(to_vmx(vcpu)->msr_ia32_feature_control &
+                     FEATURE_CONTROL_LMCE))
                        return 1;
-               msr_info->data = to_vmx(vcpu)->nested.msr_ia32_feature_control;
+               msr_info->data = vcpu->arch.mcg_ext_ctl;
+               break;
+       case MSR_IA32_FEATURE_CONTROL:
+               msr_info->data = to_vmx(vcpu)->msr_ia32_feature_control;
                break;
        case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
                if (!nested_vmx_allowed(vcpu))
@@ -3062,12 +3092,20 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
        case MSR_IA32_TSC_ADJUST:
                ret = kvm_set_msr_common(vcpu, msr_info);
                break;
+       case MSR_IA32_MCG_EXT_CTL:
+               if ((!msr_info->host_initiated &&
+                    !(to_vmx(vcpu)->msr_ia32_feature_control &
+                      FEATURE_CONTROL_LMCE)) ||
+                   (data & ~MCG_EXT_CTL_LMCE_EN))
+                       return 1;
+               vcpu->arch.mcg_ext_ctl = data;
+               break;
        case MSR_IA32_FEATURE_CONTROL:
-               if (!nested_vmx_allowed(vcpu) ||
-                   (to_vmx(vcpu)->nested.msr_ia32_feature_control &
+               if (!vmx_feature_control_msr_valid(vcpu, data) ||
+                   (to_vmx(vcpu)->msr_ia32_feature_control &
                     FEATURE_CONTROL_LOCKED && !msr_info->host_initiated))
                        return 1;
-               vmx->nested.msr_ia32_feature_control = data;
+               vmx->msr_ia32_feature_control = data;
                if (msr_info->host_initiated && data == 0)
                        vmx_leave_nested(vcpu);
                break;
@@ -3362,12 +3400,12 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
                      vmx_capability.ept, vmx_capability.vpid);
        }
 
-       min = VM_EXIT_SAVE_DEBUG_CONTROLS;
+       min = VM_EXIT_SAVE_DEBUG_CONTROLS | VM_EXIT_ACK_INTR_ON_EXIT;
 #ifdef CONFIG_X86_64
        min |= VM_EXIT_HOST_ADDR_SPACE_SIZE;
 #endif
        opt = VM_EXIT_SAVE_IA32_PAT | VM_EXIT_LOAD_IA32_PAT |
-               VM_EXIT_ACK_INTR_ON_EXIT | VM_EXIT_CLEAR_BNDCFGS;
+               VM_EXIT_CLEAR_BNDCFGS;
        if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_EXIT_CTLS,
                                &_vmexit_control) < 0)
                return -EIO;
@@ -3379,9 +3417,10 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
                                &_pin_based_exec_control) < 0)
                return -EIO;
 
+       if (cpu_has_broken_vmx_preemption_timer())
+               _pin_based_exec_control &= ~PIN_BASED_VMX_PREEMPTION_TIMER;
        if (!(_cpu_based_2nd_exec_control &
-               SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY) ||
-               !(_vmexit_control & VM_EXIT_ACK_INTR_ON_EXIT))
+               SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY))
                _pin_based_exec_control &= ~PIN_BASED_POSTED_INTR;
 
        min = VM_ENTRY_LOAD_DEBUG_CONTROLS;
@@ -6081,12 +6120,14 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu)
        gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS);
        trace_kvm_page_fault(gpa, exit_qualification);
 
-       /* It is a write fault? */
-       error_code = exit_qualification & PFERR_WRITE_MASK;
+       /* it is a read fault? */
+       error_code = (exit_qualification << 2) & PFERR_USER_MASK;
+       /* it is a write fault? */
+       error_code |= exit_qualification & PFERR_WRITE_MASK;
        /* It is a fetch fault? */
        error_code |= (exit_qualification << 2) & PFERR_FETCH_MASK;
        /* ept page table is present? */
-       error_code |= (exit_qualification >> 3) & PFERR_PRESENT_MASK;
+       error_code |= (exit_qualification & 0x38) != 0;
 
        vcpu->arch.exit_qualification = exit_qualification;
 
@@ -6420,9 +6461,6 @@ static __init int hardware_setup(void)
        for (msr = 0x800; msr <= 0x8ff; msr++)
                vmx_disable_intercept_msr_read_x2apic(msr);
 
-       /* According SDM, in x2apic mode, the whole id reg is used.  But in
-        * KVM, it only use the highest eight bits. Need to intercept it */
-       vmx_enable_intercept_msr_read_x2apic(0x802);
        /* TMCCT */
        vmx_enable_intercept_msr_read_x2apic(0x839);
        /* TPR */
@@ -6433,10 +6471,12 @@ static __init int hardware_setup(void)
        vmx_disable_intercept_msr_write_x2apic(0x83f);
 
        if (enable_ept) {
-               kvm_mmu_set_mask_ptes(0ull,
+               kvm_mmu_set_mask_ptes(VMX_EPT_READABLE_MASK,
                        (enable_ept_ad_bits) ? VMX_EPT_ACCESS_BIT : 0ull,
                        (enable_ept_ad_bits) ? VMX_EPT_DIRTY_BIT : 0ull,
-                       0ull, VMX_EPT_EXECUTABLE_MASK);
+                       0ull, VMX_EPT_EXECUTABLE_MASK,
+                       cpu_has_vmx_ept_execute_only() ?
+                                     0ull : VMX_EPT_READABLE_MASK);
                ept_set_mmio_spte_mask();
                kvm_enable_tdp();
        } else
@@ -6471,6 +6511,8 @@ static __init int hardware_setup(void)
 
        kvm_set_posted_intr_wakeup_handler(wakeup_handler);
 
+       kvm_mce_cap_supported |= MCG_LMCE_P;
+
        return alloc_kvm_area();
 
 out8:
@@ -6939,7 +6981,7 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
                return 1;
        }
 
-       if ((vmx->nested.msr_ia32_feature_control & VMXON_NEEDED_FEATURES)
+       if ((vmx->msr_ia32_feature_control & VMXON_NEEDED_FEATURES)
                        != VMXON_NEEDED_FEATURES) {
                kvm_inject_gp(vcpu, 0);
                return 1;
@@ -8012,6 +8054,8 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
                return nested_cpu_has2(vmcs12, SECONDARY_EXEC_XSAVES);
        case EXIT_REASON_PCOMMIT:
                return nested_cpu_has2(vmcs12, SECONDARY_EXEC_PCOMMIT);
+       case EXIT_REASON_PREEMPTION_TIMER:
+               return false;
        default:
                return true;
        }
@@ -8545,7 +8589,6 @@ static void vmx_handle_external_intr(struct kvm_vcpu *vcpu)
                        "push %[sp]\n\t"
 #endif
                        "pushf\n\t"
-                       "orl $0x200, (%%" _ASM_SP ")\n\t"
                        __ASM_SIZE(push) " $%c[cs]\n\t"
                        "call *%[entry]\n\t"
                        :
@@ -8558,8 +8601,7 @@ static void vmx_handle_external_intr(struct kvm_vcpu *vcpu)
                        [ss]"i"(__KERNEL_DS),
                        [cs]"i"(__KERNEL_CS)
                        );
-       } else
-               local_irq_enable();
+       }
 }
 
 static bool vmx_has_high_real_mode_segbase(void)
@@ -9056,6 +9098,8 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
                        goto free_vmcs;
        }
 
+       vmx->msr_ia32_feature_control_valid_bits = FEATURE_CONTROL_LOCKED;
+
        return &vmx->vcpu;
 
 free_vmcs:
@@ -9203,6 +9247,13 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
                        vmx->nested.nested_vmx_secondary_ctls_high &=
                                ~SECONDARY_EXEC_PCOMMIT;
        }
+
+       if (nested_vmx_allowed(vcpu))
+               to_vmx(vcpu)->msr_ia32_feature_control_valid_bits |=
+                       FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX;
+       else
+               to_vmx(vcpu)->msr_ia32_feature_control_valid_bits &=
+                       ~FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX;
 }
 
 static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
@@ -9759,9 +9810,14 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
        vmcs_write64(VMCS_LINK_POINTER, -1ull);
 
        exec_control = vmcs12->pin_based_vm_exec_control;
-       exec_control |= vmcs_config.pin_based_exec_ctrl;
+
+       /* Preemption timer setting is only taken from vmcs01.  */
        exec_control &= ~PIN_BASED_VMX_PREEMPTION_TIMER;
+       exec_control |= vmcs_config.pin_based_exec_ctrl;
+       if (vmx->hv_deadline_tsc == -1)
+               exec_control &= ~PIN_BASED_VMX_PREEMPTION_TIMER;
 
+       /* Posted interrupts setting is only taken from vmcs12.  */
        if (nested_cpu_has_posted_intr(vmcs12)) {
                /*
                 * Note that we use L0's vector here and in
@@ -10680,8 +10736,8 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
                                       vmcs12->vm_exit_intr_error_code,
                                       KVM_ISA_VMX);
 
-       vm_entry_controls_init(vmx, vmcs_read32(VM_ENTRY_CONTROLS));
-       vm_exit_controls_init(vmx, vmcs_read32(VM_EXIT_CONTROLS));
+       vm_entry_controls_reset_shadow(vmx);
+       vm_exit_controls_reset_shadow(vmx);
        vmx_segment_cache_clear(vmx);
 
        /* if no vmcs02 cache requested, remove the one we used */
@@ -10690,8 +10746,14 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
 
        load_vmcs12_host_state(vcpu, vmcs12);
 
-       /* Update TSC_OFFSET if TSC was changed while L2 ran */
+       /* Update any VMCS fields that might have changed while L2 ran */
        vmcs_write64(TSC_OFFSET, vmx->nested.vmcs01_tsc_offset);
+       if (vmx->hv_deadline_tsc == -1)
+               vmcs_clear_bits(PIN_BASED_VM_EXEC_CONTROL,
+                               PIN_BASED_VMX_PREEMPTION_TIMER);
+       else
+               vmcs_set_bits(PIN_BASED_VM_EXEC_CONTROL,
+                             PIN_BASED_VMX_PREEMPTION_TIMER);
 
        /* This is needed for same reason as it was needed in prepare_vmcs02 */
        vmx->host_rsp = 0;
@@ -10793,9 +10855,9 @@ static inline int u64_shl_div_u64(u64 a, unsigned int shift,
 static int vmx_set_hv_timer(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc)
 {
        struct vcpu_vmx *vmx = to_vmx(vcpu);
-       u64 tscl = rdtsc(), delta_tsc;
-
-       delta_tsc = guest_deadline_tsc - kvm_read_l1_tsc(vcpu, tscl);
+       u64 tscl = rdtsc();
+       u64 guest_tscl = kvm_read_l1_tsc(vcpu, tscl);
+       u64 delta_tsc = max(guest_deadline_tsc, guest_tscl) - guest_tscl;
 
        /* Convert to host delta tsc if tsc scaling is enabled */
        if (vcpu->arch.tsc_scaling_ratio != kvm_default_tsc_scaling_ratio &&
@@ -11042,7 +11104,7 @@ static int vmx_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
                 * We will support full lowest-priority interrupt later.
                 */
 
-               kvm_set_msi_irq(e, &irq);
+               kvm_set_msi_irq(kvm, e, &irq);
                if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu)) {
                        /*
                         * Make sure the IRTE is in remapped mode if
@@ -11087,6 +11149,16 @@ static int vmx_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
        return ret;
 }
 
+static void vmx_setup_mce(struct kvm_vcpu *vcpu)
+{
+       if (vcpu->arch.mcg_cap & MCG_LMCE_P)
+               to_vmx(vcpu)->msr_ia32_feature_control_valid_bits |=
+                       FEATURE_CONTROL_LMCE;
+       else
+               to_vmx(vcpu)->msr_ia32_feature_control_valid_bits &=
+                       ~FEATURE_CONTROL_LMCE;
+}
+
 static struct kvm_x86_ops vmx_x86_ops = {
        .cpu_has_kvm_support = cpu_has_kvm_support,
        .disabled_by_bios = vmx_disabled_by_bios,
@@ -11216,6 +11288,8 @@ static struct kvm_x86_ops vmx_x86_ops = {
        .set_hv_timer = vmx_set_hv_timer,
        .cancel_hv_timer = vmx_cancel_hv_timer,
 #endif
+
+       .setup_mce = vmx_setup_mce,
 };
 
 static int __init vmx_init(void)
index 299219630c9470e5e87c04553834b250b7d44755..a27b33033700aadd08637b6cfb63981858b48ee6 100644 (file)
@@ -70,7 +70,8 @@
 
 #define MAX_IO_MSRS 256
 #define KVM_MAX_MCE_BANKS 32
-#define KVM_MCE_CAP_SUPPORTED (MCG_CTL_P | MCG_SER_P)
+u64 __read_mostly kvm_mce_cap_supported = MCG_CTL_P | MCG_SER_P;
+EXPORT_SYMBOL_GPL(kvm_mce_cap_supported);
 
 #define emul_to_vcpu(ctxt) \
        container_of(ctxt, struct kvm_vcpu, arch.emulate_ctxt)
@@ -89,6 +90,9 @@ static u64 __read_mostly efer_reserved_bits = ~((u64)EFER_SCE);
 #define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
 
+#define KVM_X2APIC_API_VALID_FLAGS (KVM_X2APIC_API_USE_32BIT_IDS | \
+                                    KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK)
+
 static void update_cr8_intercept(struct kvm_vcpu *vcpu);
 static void process_nmi(struct kvm_vcpu *vcpu);
 static void enter_smm(struct kvm_vcpu *vcpu);
@@ -539,7 +543,7 @@ int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3)
                goto out;
        }
        for (i = 0; i < ARRAY_SIZE(pdpte); ++i) {
-               if (is_present_gpte(pdpte[i]) &&
+               if ((pdpte[i] & PT_PRESENT_MASK) &&
                    (pdpte[i] &
                     vcpu->arch.mmu.guest_rsvd_check.rsvd_bits_mask[0][2])) {
                        ret = 0;
@@ -984,6 +988,7 @@ static u32 emulated_msrs[] = {
        MSR_IA32_MISC_ENABLE,
        MSR_IA32_MCG_STATUS,
        MSR_IA32_MCG_CTL,
+       MSR_IA32_MCG_EXT_CTL,
        MSR_IA32_SMBASE,
 };
 
@@ -1163,7 +1168,7 @@ static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock)
        int version;
        int r;
        struct pvclock_wall_clock wc;
-       struct timespec boot;
+       struct timespec64 boot;
 
        if (!wall_clock)
                return;
@@ -1186,13 +1191,13 @@ static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock)
         * wall clock specified here.  guest system time equals host
         * system time for us, thus we must fill in host boot time here.
         */
-       getboottime(&boot);
+       getboottime64(&boot);
 
        if (kvm->arch.kvmclock_offset) {
-               struct timespec ts = ns_to_timespec(kvm->arch.kvmclock_offset);
-               boot = timespec_sub(boot, ts);
+               struct timespec64 ts = ns_to_timespec64(kvm->arch.kvmclock_offset);
+               boot = timespec64_sub(boot, ts);
        }
-       wc.sec = boot.tv_sec;
+       wc.sec = (u32)boot.tv_sec; /* overflow in 2106 guest time */
        wc.nsec = boot.tv_nsec;
        wc.version = version;
 
@@ -2623,6 +2628,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
        case KVM_CAP_TSC_CONTROL:
                r = kvm_has_tsc_control;
                break;
+       case KVM_CAP_X2APIC_API:
+               r = KVM_X2APIC_API_VALID_FLAGS;
+               break;
        default:
                r = 0;
                break;
@@ -2685,11 +2693,9 @@ long kvm_arch_dev_ioctl(struct file *filp,
                break;
        }
        case KVM_X86_GET_MCE_CAP_SUPPORTED: {
-               u64 mce_cap;
-
-               mce_cap = KVM_MCE_CAP_SUPPORTED;
                r = -EFAULT;
-               if (copy_to_user(argp, &mce_cap, sizeof mce_cap))
+               if (copy_to_user(argp, &kvm_mce_cap_supported,
+                                sizeof(kvm_mce_cap_supported)))
                        goto out;
                r = 0;
                break;
@@ -2779,15 +2785,17 @@ static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,
        if (vcpu->arch.apicv_active)
                kvm_x86_ops->sync_pir_to_irr(vcpu);
 
-       memcpy(s->regs, vcpu->arch.apic->regs, sizeof *s);
-
-       return 0;
+       return kvm_apic_get_state(vcpu, s);
 }
 
 static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu,
                                    struct kvm_lapic_state *s)
 {
-       kvm_apic_post_state_restore(vcpu, s);
+       int r;
+
+       r = kvm_apic_set_state(vcpu, s);
+       if (r)
+               return r;
        update_cr8_intercept(vcpu);
 
        return 0;
@@ -2872,7 +2880,7 @@ static int kvm_vcpu_ioctl_x86_setup_mce(struct kvm_vcpu *vcpu,
        r = -EINVAL;
        if (!bank_num || bank_num >= KVM_MAX_MCE_BANKS)
                goto out;
-       if (mcg_cap & ~(KVM_MCE_CAP_SUPPORTED | 0xff | 0xff0000))
+       if (mcg_cap & ~(kvm_mce_cap_supported | 0xff | 0xff0000))
                goto out;
        r = 0;
        vcpu->arch.mcg_cap = mcg_cap;
@@ -2882,6 +2890,9 @@ static int kvm_vcpu_ioctl_x86_setup_mce(struct kvm_vcpu *vcpu,
        /* Init IA32_MCi_CTL to all 1s */
        for (bank = 0; bank < bank_num; bank++)
                vcpu->arch.mce_banks[bank*4] = ~(u64)0;
+
+       if (kvm_x86_ops->setup_mce)
+               kvm_x86_ops->setup_mce(vcpu);
 out:
        return r;
 }
@@ -3794,6 +3805,18 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
                mutex_unlock(&kvm->lock);
                break;
        }
+       case KVM_CAP_X2APIC_API:
+               r = -EINVAL;
+               if (cap->args[0] & ~KVM_X2APIC_API_VALID_FLAGS)
+                       break;
+
+               if (cap->args[0] & KVM_X2APIC_API_USE_32BIT_IDS)
+                       kvm->arch.x2apic_format = true;
+               if (cap->args[0] & KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK)
+                       kvm->arch.x2apic_broadcast_quirk_disabled = true;
+
+               r = 0;
+               break;
        default:
                r = -EINVAL;
                break;
@@ -5875,8 +5898,8 @@ int kvm_arch_init(void *opaque)
        kvm_x86_ops = ops;
 
        kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK,
-                       PT_DIRTY_MASK, PT64_NX_MASK, 0);
-
+                       PT_DIRTY_MASK, PT64_NX_MASK, 0,
+                       PT_PRESENT_MASK);
        kvm_timer_init();
 
        perf_register_guest_info_callbacks(&kvm_guest_cbs);
@@ -6655,7 +6678,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 
        trace_kvm_entry(vcpu->vcpu_id);
        wait_lapic_expire(vcpu);
-       __kvm_guest_enter();
+       guest_enter_irqoff();
 
        if (unlikely(vcpu->arch.switch_db_regs)) {
                set_debugreg(0, 7);
@@ -6706,16 +6729,9 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 
        ++vcpu->stat.exits;
 
-       /*
-        * We must have an instruction between local_irq_enable() and
-        * kvm_guest_exit(), so the timer interrupt isn't delayed by
-        * the interrupt shadow.  The stat.exits increment will do nicely.
-        * But we need to prevent reordering, hence this barrier():
-        */
-       barrier();
-
-       kvm_guest_exit();
+       guest_exit_irqoff();
 
+       local_irq_enable();
        preempt_enable();
 
        vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
@@ -7911,7 +7927,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
        kfree(kvm->arch.vpic);
        kfree(kvm->arch.vioapic);
        kvm_free_vcpus(kvm);
-       kfree(rcu_dereference_check(kvm->arch.apic_map, 1));
+       kvfree(rcu_dereference_check(kvm->arch.apic_map, 1));
        kvm_mmu_uninit_vm(kvm);
 }
 
index d259274238db361a5b8d41116d03fc4e5642feda..ff4a32d24d56753788768f15ab0bd9ee102fd602 100644 (file)
@@ -84,7 +84,8 @@ static inline void context_tracking_init(void) { }
 
 
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
-static inline void guest_enter(void)
+/* must be called with irqs disabled */
+static inline void guest_enter_irqoff(void)
 {
        if (vtime_accounting_cpu_enabled())
                vtime_guest_enter(current);
@@ -93,9 +94,19 @@ static inline void guest_enter(void)
 
        if (context_tracking_is_enabled())
                __context_tracking_enter(CONTEXT_GUEST);
+
+       /* KVM does not hold any references to rcu protected data when it
+        * switches CPU into a guest mode. In fact switching to a guest mode
+        * is very similar to exiting to userspace from rcu point of view. In
+        * addition CPU may stay in a guest mode for quite a long time (up to
+        * one time slice). Lets treat guest mode as quiescent state, just like
+        * we do with user-mode execution.
+        */
+       if (!context_tracking_cpu_is_enabled())
+               rcu_virt_note_context_switch(smp_processor_id());
 }
 
-static inline void guest_exit(void)
+static inline void guest_exit_irqoff(void)
 {
        if (context_tracking_is_enabled())
                __context_tracking_exit(CONTEXT_GUEST);
@@ -107,7 +118,7 @@ static inline void guest_exit(void)
 }
 
 #else
-static inline void guest_enter(void)
+static inline void guest_enter_irqoff(void)
 {
        /*
         * This is running in ioctl context so its safe
@@ -116,9 +127,10 @@ static inline void guest_enter(void)
         */
        vtime_account_system(current);
        current->flags |= PF_VCPU;
+       rcu_virt_note_context_switch(smp_processor_id());
 }
 
-static inline void guest_exit(void)
+static inline void guest_exit_irqoff(void)
 {
        /* Flush the guest cputime we spent on the guest */
        vtime_account_system(current);
@@ -126,4 +138,22 @@ static inline void guest_exit(void)
 }
 #endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */
 
+static inline void guest_enter(void)
+{
+       unsigned long flags;
+
+       local_irq_save(flags);
+       guest_enter_irqoff();
+       local_irq_restore(flags);
+}
+
+static inline void guest_exit(void)
+{
+       unsigned long flags;
+
+       local_irq_save(flags);
+       guest_exit_irqoff();
+       local_irq_restore(flags);
+}
+
 #endif
index 614a98137c5f73c09c4f4d6b8edfaf3191467c7f..aafd702f3e212f9f3bf8a9cbca752382f2639ce9 100644 (file)
@@ -877,45 +877,6 @@ static inline void kvm_iommu_unmap_pages(struct kvm *kvm,
 }
 #endif
 
-/* must be called with irqs disabled */
-static inline void __kvm_guest_enter(void)
-{
-       guest_enter();
-       /* KVM does not hold any references to rcu protected data when it
-        * switches CPU into a guest mode. In fact switching to a guest mode
-        * is very similar to exiting to userspace from rcu point of view. In
-        * addition CPU may stay in a guest mode for quite a long time (up to
-        * one time slice). Lets treat guest mode as quiescent state, just like
-        * we do with user-mode execution.
-        */
-       if (!context_tracking_cpu_is_enabled())
-               rcu_virt_note_context_switch(smp_processor_id());
-}
-
-/* must be called with irqs disabled */
-static inline void __kvm_guest_exit(void)
-{
-       guest_exit();
-}
-
-static inline void kvm_guest_enter(void)
-{
-       unsigned long flags;
-
-       local_irq_save(flags);
-       __kvm_guest_enter();
-       local_irq_restore(flags);
-}
-
-static inline void kvm_guest_exit(void)
-{
-       unsigned long flags;
-
-       local_irq_save(flags);
-       __kvm_guest_exit();
-       local_irq_restore(flags);
-}
-
 /*
  * search_memslots() and __gfn_to_memslot() are here because they are
  * used in non-modular code in arch/powerpc/kvm/book3s_hv_rm_mmu.c.
@@ -1052,7 +1013,8 @@ int kvm_set_irq_routing(struct kvm *kvm,
                        const struct kvm_irq_routing_entry *entries,
                        unsigned nr,
                        unsigned flags);
-int kvm_set_routing_entry(struct kvm_kernel_irq_routing_entry *e,
+int kvm_set_routing_entry(struct kvm *kvm,
+                         struct kvm_kernel_irq_routing_entry *e,
                          const struct kvm_irq_routing_entry *ue);
 void kvm_free_irq_routing(struct kvm *kvm);
 
index f28292d73ddba75eddf58c4e909967b77c041bdf..8ade3eb6c640e0725e2c1fe0d930e6309cd0801f 100644 (file)
@@ -151,8 +151,9 @@ TRACE_EVENT(kvm_msi_set_irq,
                __entry->data           = data;
        ),
 
-       TP_printk("dst %u vec %u (%s|%s|%s%s)",
-                 (u8)(__entry->address >> 12), (u8)__entry->data,
+       TP_printk("dst %llx vec %u (%s|%s|%s%s)",
+                 (u8)(__entry->address >> 12) | ((__entry->address >> 32) & 0xffffff00),
+                 (u8)__entry->data,
                  __print_symbolic((__entry->data >> 8 & 0x7), kvm_deliver_mode),
                  (__entry->address & (1<<2)) ? "logical" : "physical",
                  (__entry->data & (1<<15)) ? "level" : "edge",
index d8c4c324cfae533632479324f1656eb2e054f690..8f2756c263d47529110d27e89dc96fcdaa98d1ec 100644 (file)
@@ -866,7 +866,9 @@ struct kvm_ppc_smmu_info {
 #define KVM_CAP_ARM_PMU_V3 126
 #define KVM_CAP_VCPU_ATTRIBUTES 127
 #define KVM_CAP_MAX_VCPU_ID 128
-#define KVM_CAP_MSI_DEVID 129
+#define KVM_CAP_X2APIC_API 129
+#define KVM_CAP_S390_USER_INSTR0 130
+#define KVM_CAP_MSI_DEVID 131
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -1318,4 +1320,7 @@ struct kvm_assigned_msix_entry {
        __u16 padding[3];
 };
 
+#define KVM_X2APIC_API_USE_32BIT_IDS            (1ULL << 0)
+#define KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK  (1ULL << 1)
+
 #endif /* __LINUX_KVM_H */
index c057784c844456f237adc9065bb4c9a3c230fdc6..e3ac22f90fa4bee95da1b62b6f6c5a66e18a35ab 100644 (file)
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -720,6 +720,7 @@ int fixup_user_fault(struct task_struct *tsk, struct mm_struct *mm,
        }
        return 0;
 }
+EXPORT_SYMBOL_GPL(fixup_user_fault);
 
 static __always_inline long __get_user_pages_locked(struct task_struct *tsk,
                                                struct mm_struct *mm,
index 8db197bb6c7a92d6508c66cd6a4cf3b9afc84562..df99e9c3b64d314dfee121b6bf6a753be10aeb9b 100644 (file)
@@ -135,7 +135,8 @@ void kvm_free_irq_routing(struct kvm *kvm)
        free_irq_routing_table(rt);
 }
 
-static int setup_routing_entry(struct kvm_irq_routing_table *rt,
+static int setup_routing_entry(struct kvm *kvm,
+                              struct kvm_irq_routing_table *rt,
                               struct kvm_kernel_irq_routing_entry *e,
                               const struct kvm_irq_routing_entry *ue)
 {
@@ -154,7 +155,7 @@ static int setup_routing_entry(struct kvm_irq_routing_table *rt,
 
        e->gsi = ue->gsi;
        e->type = ue->type;
-       r = kvm_set_routing_entry(e, ue);
+       r = kvm_set_routing_entry(kvm, e, ue);
        if (r)
                goto out;
        if (e->type == KVM_IRQ_ROUTING_IRQCHIP)
@@ -211,7 +212,7 @@ int kvm_set_irq_routing(struct kvm *kvm,
                        kfree(e);
                        goto out;
                }
-               r = setup_routing_entry(new, e, ue);
+               r = setup_routing_entry(kvm, new, e, ue);
                if (r) {
                        kfree(e);
                        goto out;
index bd2eb92c5d0eb3800c9fb5f7aa437d4a89a70672..61b31a5f76c856f2d7e7f09c6b68ff5ac2fea526 100644 (file)
@@ -1442,6 +1442,52 @@ static bool vma_is_valid(struct vm_area_struct *vma, bool write_fault)
        return true;
 }
 
+static int hva_to_pfn_remapped(struct vm_area_struct *vma,
+                              unsigned long addr, bool *async,
+                              bool write_fault, kvm_pfn_t *p_pfn)
+{
+       unsigned long pfn;
+       int r;
+
+       r = follow_pfn(vma, addr, &pfn);
+       if (r) {
+               /*
+                * get_user_pages fails for VM_IO and VM_PFNMAP vmas and does
+                * not call the fault handler, so do it here.
+                */
+               bool unlocked = false;
+               r = fixup_user_fault(current, current->mm, addr,
+                                    (write_fault ? FAULT_FLAG_WRITE : 0),
+                                    &unlocked);
+               if (unlocked)
+                       return -EAGAIN;
+               if (r)
+                       return r;
+
+               r = follow_pfn(vma, addr, &pfn);
+               if (r)
+                       return r;
+
+       }
+
+
+       /*
+        * Get a reference here because callers of *hva_to_pfn* and
+        * *gfn_to_pfn* ultimately call kvm_release_pfn_clean on the
+        * returned pfn.  This is only needed if the VMA has VM_MIXEDMAP
+        * set, but the kvm_get_pfn/kvm_release_pfn_clean pair will
+        * simply do nothing for reserved pfns.
+        *
+        * Whoever called remap_pfn_range is also going to call e.g.
+        * unmap_mapping_range before the underlying pages are freed,
+        * causing a call to our MMU notifier.
+        */ 
+       kvm_get_pfn(pfn);
+
+       *p_pfn = pfn;
+       return 0;
+}
+
 /*
  * Pin guest page in memory and return its pfn.
  * @addr: host virtual address which maps memory to the guest
@@ -1461,7 +1507,7 @@ static kvm_pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool *async,
 {
        struct vm_area_struct *vma;
        kvm_pfn_t pfn = 0;
-       int npages;
+       int npages, r;
 
        /* we can do it either atomically or asynchronously, not both */
        BUG_ON(atomic && async);
@@ -1483,14 +1529,17 @@ static kvm_pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool *async,
                goto exit;
        }
 
+retry:
        vma = find_vma_intersection(current->mm, addr, addr + 1);
 
        if (vma == NULL)
                pfn = KVM_PFN_ERR_FAULT;
-       else if ((vma->vm_flags & VM_PFNMAP)) {
-               pfn = ((addr - vma->vm_start) >> PAGE_SHIFT) +
-                       vma->vm_pgoff;
-               BUG_ON(!kvm_is_reserved_pfn(pfn));
+       else if (vma->vm_flags & (VM_IO | VM_PFNMAP)) {
+               r = hva_to_pfn_remapped(vma, addr, async, write_fault, &pfn);
+               if (r == -EAGAIN)
+                       goto retry;
+               if (r < 0)
+                       pfn = KVM_PFN_ERR_FAULT;
        } else {
                if (async && vma_is_valid(vma, write_fault))
                        *async = true;