]> asedeno.scripts.mit.edu Git - linux.git/commitdiff
Merge branch 'x86/cpufeature' of git://git.kernel.org/pub/scm/linux/kernel/git/tip...
authorRadim Krčmář <rkrcmar@redhat.com>
Tue, 17 Jan 2017 16:53:01 +0000 (17:53 +0100)
committerRadim Krčmář <rkrcmar@redhat.com>
Tue, 17 Jan 2017 16:53:01 +0000 (17:53 +0100)
For AVX512_VPOPCNTDQ.

1  2 
arch/x86/kvm/emulate.c
arch/x86/kvm/lapic.c
arch/x86/kvm/lapic.h
arch/x86/kvm/x86.c

diff --combined arch/x86/kvm/emulate.c
index 2b8349a2b14be0a558710089cb5d688a42ac1465,cedbba0f3402d2343ce069fe3ed6a07f44f68907..45c7306c8780b23494f22d8a775f1486ea29eafc
  #define NearBranch  ((u64)1 << 52)  /* Near branches */
  #define No16      ((u64)1 << 53)  /* No 16 bit operand */
  #define IncSP       ((u64)1 << 54)  /* SP is incremented before ModRM calc */
 +#define TwoMemOp    ((u64)1 << 55)  /* Instruction has two memory operand */
  
  #define DstXacc     (DstAccLo | SrcAccHi | SrcWrite)
  
@@@ -819,6 -818,20 +819,20 @@@ static int segmented_read_std(struct x8
        return ctxt->ops->read_std(ctxt, linear, data, size, &ctxt->exception);
  }
  
+ static int segmented_write_std(struct x86_emulate_ctxt *ctxt,
+                              struct segmented_address addr,
+                              void *data,
+                              unsigned int size)
+ {
+       int rc;
+       ulong linear;
+       rc = linearize(ctxt, addr, size, true, &linear);
+       if (rc != X86EMUL_CONTINUE)
+               return rc;
+       return ctxt->ops->write_std(ctxt, linear, data, size, &ctxt->exception);
+ }
  /*
   * Prefetch the remaining bytes of the instruction without crossing page
   * boundary if they are not in fetch_cache yet.
@@@ -1572,7 -1585,6 +1586,6 @@@ static int write_segment_descriptor(str
                                    &ctxt->exception);
  }
  
- /* Does not support long mode */
  static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
                                     u16 selector, int seg, u8 cpl,
                                     enum x86_transfer_type transfer,
  
        rpl = selector & 3;
  
-       /* NULL selector is not valid for TR, CS and SS (except for long mode) */
-       if ((seg == VCPU_SREG_CS
-            || (seg == VCPU_SREG_SS
-                && (ctxt->mode != X86EMUL_MODE_PROT64 || rpl != cpl))
-            || seg == VCPU_SREG_TR)
-           && null_selector)
-               goto exception;
        /* TR should be in GDT only */
        if (seg == VCPU_SREG_TR && (selector & (1 << 2)))
                goto exception;
  
-       if (null_selector) /* for NULL selector skip all following checks */
+       /* NULL selector is not valid for TR, CS and (except for long mode) SS */
+       if (null_selector) {
+               if (seg == VCPU_SREG_CS || seg == VCPU_SREG_TR)
+                       goto exception;
+               if (seg == VCPU_SREG_SS) {
+                       if (ctxt->mode != X86EMUL_MODE_PROT64 || rpl != cpl)
+                               goto exception;
+                       /*
+                        * ctxt->ops->set_segment expects the CPL to be in
+                        * SS.DPL, so fake an expand-up 32-bit data segment.
+                        */
+                       seg_desc.type = 3;
+                       seg_desc.p = 1;
+                       seg_desc.s = 1;
+                       seg_desc.dpl = cpl;
+                       seg_desc.d = 1;
+                       seg_desc.g = 1;
+               }
+               /* Skip all following checks */
                goto load;
+       }
  
        ret = read_segment_descriptor(ctxt, selector, &seg_desc, &desc_addr);
        if (ret != X86EMUL_CONTINUE)
@@@ -1738,6 -1764,21 +1765,21 @@@ static int load_segment_descriptor(stru
                                   u16 selector, int seg)
  {
        u8 cpl = ctxt->ops->cpl(ctxt);
+       /*
+        * None of MOV, POP and LSS can load a NULL selector in CPL=3, but
+        * they can load it at CPL<3 (Intel's manual says only LSS can,
+        * but it's wrong).
+        *
+        * However, the Intel manual says that putting IST=1/DPL=3 in
+        * an interrupt gate will result in SS=3 (the AMD manual instead
+        * says it doesn't), so allow SS=3 in __load_segment_descriptor
+        * and only forbid it here.
+        */
+       if (seg == VCPU_SREG_SS && selector == 3 &&
+           ctxt->mode == X86EMUL_MODE_PROT64)
+               return emulate_exception(ctxt, GP_VECTOR, 0, true);
        return __load_segment_descriptor(ctxt, selector, seg, cpl,
                                         X86_TRANSFER_NONE, NULL);
  }
@@@ -3686,8 -3727,8 +3728,8 @@@ static int emulate_store_desc_ptr(struc
        }
        /* Disable writeback. */
        ctxt->dst.type = OP_NONE;
-       return segmented_write(ctxt, ctxt->dst.addr.mem,
-                              &desc_ptr, 2 + ctxt->op_bytes);
+       return segmented_write_std(ctxt, ctxt->dst.addr.mem,
+                                  &desc_ptr, 2 + ctxt->op_bytes);
  }
  
  static int em_sgdt(struct x86_emulate_ctxt *ctxt)
@@@ -3933,7 -3974,7 +3975,7 @@@ static int em_fxsave(struct x86_emulate
        else
                size = offsetof(struct fxregs_state, xmm_space[0]);
  
-       return segmented_write(ctxt, ctxt->memop.addr.mem, &fx_state, size);
+       return segmented_write_std(ctxt, ctxt->memop.addr.mem, &fx_state, size);
  }
  
  static int fxrstor_fixup(struct x86_emulate_ctxt *ctxt,
@@@ -3975,7 -4016,7 +4017,7 @@@ static int em_fxrstor(struct x86_emulat
        if (rc != X86EMUL_CONTINUE)
                return rc;
  
-       rc = segmented_read(ctxt, ctxt->memop.addr.mem, &fx_state, 512);
+       rc = segmented_read_std(ctxt, ctxt->memop.addr.mem, &fx_state, 512);
        if (rc != X86EMUL_CONTINUE)
                return rc;
  
@@@ -4257,7 -4298,7 +4299,7 @@@ static const struct opcode group1[] = 
  };
  
  static const struct opcode group1A[] = {
 -      I(DstMem | SrcNone | Mov | Stack | IncSP, em_pop), N, N, N, N, N, N, N,
 +      I(DstMem | SrcNone | Mov | Stack | IncSP | TwoMemOp, em_pop), N, N, N, N, N, N, N,
  };
  
  static const struct opcode group2[] = {
@@@ -4295,7 -4336,7 +4337,7 @@@ static const struct opcode group5[] = 
        I(SrcMemFAddr | ImplicitOps,            em_call_far),
        I(SrcMem | NearBranch,                  em_jmp_abs),
        I(SrcMemFAddr | ImplicitOps,            em_jmp_far),
 -      I(SrcMem | Stack,                       em_push), D(Undefined),
 +      I(SrcMem | Stack | TwoMemOp,            em_push), D(Undefined),
  };
  
  static const struct opcode group6[] = {
@@@ -4515,8 -4556,8 +4557,8 @@@ static const struct opcode opcode_table
        /* 0xA0 - 0xA7 */
        I2bv(DstAcc | SrcMem | Mov | MemAbs, em_mov),
        I2bv(DstMem | SrcAcc | Mov | MemAbs | PageTable, em_mov),
 -      I2bv(SrcSI | DstDI | Mov | String, em_mov),
 -      F2bv(SrcSI | DstDI | String | NoWrite, em_cmp_r),
 +      I2bv(SrcSI | DstDI | Mov | String | TwoMemOp, em_mov),
 +      F2bv(SrcSI | DstDI | String | NoWrite | TwoMemOp, em_cmp_r),
        /* 0xA8 - 0xAF */
        F2bv(DstAcc | SrcImm | NoWrite, em_test),
        I2bv(SrcAcc | DstDI | Mov | String, em_mov),
@@@ -5630,14 -5671,3 +5672,14 @@@ void emulator_writeback_register_cache(
  {
        writeback_registers(ctxt);
  }
 +
 +bool emulator_can_use_gpa(struct x86_emulate_ctxt *ctxt)
 +{
 +      if (ctxt->rep_prefix && (ctxt->d & String))
 +              return false;
 +
 +      if (ctxt->d & TwoMemOp)
 +              return false;
 +
 +      return true;
 +}
diff --combined arch/x86/kvm/lapic.c
index 10a745faa659d11e8a2eba1e3c51c75a13146abf,2f6ef5121a4ca36ac73639e5ffc6aa86624067d2..33b799fd3a6ed25d7eb4ba812dffd126d03b55b9
@@@ -115,16 -115,6 +115,16 @@@ static inline int apic_enabled(struct k
        (LVT_MASK | APIC_MODE_MASK | APIC_INPUT_POLARITY | \
         APIC_LVT_REMOTE_IRR | APIC_LVT_LEVEL_TRIGGER)
  
 +static inline u8 kvm_xapic_id(struct kvm_lapic *apic)
 +{
 +      return kvm_lapic_get_reg(apic, APIC_ID) >> 24;
 +}
 +
 +static inline u32 kvm_x2apic_id(struct kvm_lapic *apic)
 +{
 +      return apic->vcpu->vcpu_id;
 +}
 +
  static inline bool kvm_apic_map_get_logical_dest(struct kvm_apic_map *map,
                u32 dest_id, struct kvm_lapic ***cluster, u16 *mask) {
        switch (map->mode) {
@@@ -169,13 -159,13 +169,13 @@@ static void recalculate_apic_map(struc
        struct kvm_apic_map *new, *old = NULL;
        struct kvm_vcpu *vcpu;
        int i;
 -      u32 max_id = 255;
 +      u32 max_id = 255; /* enough space for any xAPIC ID */
  
        mutex_lock(&kvm->arch.apic_map_lock);
  
        kvm_for_each_vcpu(i, vcpu, kvm)
                if (kvm_apic_present(vcpu))
 -                      max_id = max(max_id, kvm_apic_id(vcpu->arch.apic));
 +                      max_id = max(max_id, kvm_x2apic_id(vcpu->arch.apic));
  
        new = kvm_kvzalloc(sizeof(struct kvm_apic_map) +
                           sizeof(struct kvm_lapic *) * ((u64)max_id + 1));
                struct kvm_lapic *apic = vcpu->arch.apic;
                struct kvm_lapic **cluster;
                u16 mask;
 -              u32 ldr, aid;
 +              u32 ldr;
 +              u8 xapic_id;
 +              u32 x2apic_id;
  
                if (!kvm_apic_present(vcpu))
                        continue;
  
 -              aid = kvm_apic_id(apic);
 -              ldr = kvm_lapic_get_reg(apic, APIC_LDR);
 +              xapic_id = kvm_xapic_id(apic);
 +              x2apic_id = kvm_x2apic_id(apic);
  
 -              if (aid <= new->max_apic_id)
 -                      new->phys_map[aid] = apic;
 +              /* Hotplug hack: see kvm_apic_match_physical_addr(), ... */
 +              if ((apic_x2apic_mode(apic) || x2apic_id > 0xff) &&
 +                              x2apic_id <= new->max_apic_id)
 +                      new->phys_map[x2apic_id] = apic;
 +              /*
 +               * ... xAPIC ID of VCPUs with APIC ID > 0xff will wrap-around,
 +               * prevent them from masking VCPUs with APIC ID <= 0xff.
 +               */
 +              if (!apic_x2apic_mode(apic) && !new->phys_map[xapic_id])
 +                      new->phys_map[xapic_id] = apic;
 +
 +              ldr = kvm_lapic_get_reg(apic, APIC_LDR);
  
                if (apic_x2apic_mode(apic)) {
                        new->mode |= KVM_APIC_MODE_X2APIC;
@@@ -272,8 -250,6 +272,8 @@@ static inline void kvm_apic_set_x2apic_
  {
        u32 ldr = ((id >> 4) << 16) | (1 << (id & 0xf));
  
 +      WARN_ON_ONCE(id != apic->vcpu->vcpu_id);
 +
        kvm_lapic_set_reg(apic, APIC_ID, id);
        kvm_lapic_set_reg(apic, APIC_LDR, ldr);
        recalculate_apic_map(apic->vcpu->kvm);
@@@ -570,15 -546,7 +570,15 @@@ static void pv_eoi_clr_pending(struct k
        __clear_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention);
  }
  
 -static void apic_update_ppr(struct kvm_lapic *apic)
 +static int apic_has_interrupt_for_ppr(struct kvm_lapic *apic, u32 ppr)
 +{
 +      int highest_irr = apic_find_highest_irr(apic);
 +      if (highest_irr == -1 || (highest_irr & 0xF0) <= ppr)
 +              return -1;
 +      return highest_irr;
 +}
 +
 +static bool __apic_update_ppr(struct kvm_lapic *apic, u32 *new_ppr)
  {
        u32 tpr, isrv, ppr, old_ppr;
        int isr;
        apic_debug("vlapic %p, ppr 0x%x, isr 0x%x, isrv 0x%x",
                   apic, ppr, isr, isrv);
  
 -      if (old_ppr != ppr) {
 +      *new_ppr = ppr;
 +      if (old_ppr != ppr)
                kvm_lapic_set_reg(apic, APIC_PROCPRI, ppr);
 -              if (ppr < old_ppr)
 -                      kvm_make_request(KVM_REQ_EVENT, apic->vcpu);
 -      }
 +
 +      return ppr < old_ppr;
 +}
 +
 +static void apic_update_ppr(struct kvm_lapic *apic)
 +{
 +      u32 ppr;
 +
 +      if (__apic_update_ppr(apic, &ppr) &&
 +          apic_has_interrupt_for_ppr(apic, ppr) != -1)
 +              kvm_make_request(KVM_REQ_EVENT, apic->vcpu);
 +}
 +
 +void kvm_apic_update_ppr(struct kvm_vcpu *vcpu)
 +{
 +      apic_update_ppr(vcpu->arch.apic);
  }
 +EXPORT_SYMBOL_GPL(kvm_apic_update_ppr);
  
  static void apic_set_tpr(struct kvm_lapic *apic, u32 tpr)
  {
  
  static bool kvm_apic_broadcast(struct kvm_lapic *apic, u32 mda)
  {
 -      if (apic_x2apic_mode(apic))
 -              return mda == X2APIC_BROADCAST;
 -
 -      return GET_APIC_DEST_FIELD(mda) == APIC_BROADCAST;
 +      return mda == (apic_x2apic_mode(apic) ?
 +                      X2APIC_BROADCAST : APIC_BROADCAST);
  }
  
  static bool kvm_apic_match_physical_addr(struct kvm_lapic *apic, u32 mda)
                return true;
  
        if (apic_x2apic_mode(apic))
 -              return mda == kvm_apic_id(apic);
 +              return mda == kvm_x2apic_id(apic);
 +
 +      /*
 +       * Hotplug hack: Make LAPIC in xAPIC mode also accept interrupts as if
 +       * it were in x2APIC mode.  Hotplugged VCPUs start in xAPIC mode and
 +       * this allows unique addressing of VCPUs with APIC ID over 0xff.
 +       * The 0xff condition is needed because writeable xAPIC ID.
 +       */
 +      if (kvm_x2apic_id(apic) > 0xff && mda == kvm_x2apic_id(apic))
 +              return true;
  
 -      return mda == SET_APIC_DEST_FIELD(kvm_apic_id(apic));
 +      return mda == kvm_xapic_id(apic);
  }
  
  static bool kvm_apic_match_logical_addr(struct kvm_lapic *apic, u32 mda)
                       && (logical_id & mda & 0xffff) != 0;
  
        logical_id = GET_APIC_LOGICAL_ID(logical_id);
 -      mda = GET_APIC_DEST_FIELD(mda);
  
        switch (kvm_lapic_get_reg(apic, APIC_DFR)) {
        case APIC_DFR_FLAT:
  
  /* The KVM local APIC implementation has two quirks:
   *
 - *  - the xAPIC MDA stores the destination at bits 24-31, while this
 - *    is not true of struct kvm_lapic_irq's dest_id field.  This is
 - *    just a quirk in the API and is not problematic.
 + *  - Real hardware delivers interrupts destined to x2APIC ID > 0xff to LAPICs
 + *    in xAPIC mode if the "destination & 0xff" matches its xAPIC ID.
 + *    KVM doesn't do that aliasing.
   *
   *  - in-kernel IOAPIC messages have to be delivered directly to
   *    x2APIC, because the kernel does not support interrupt remapping.
@@@ -698,12 -645,13 +698,12 @@@ static u32 kvm_apic_mda(struct kvm_vcp
                struct kvm_lapic *source, struct kvm_lapic *target)
  {
        bool ipi = source != NULL;
 -      bool x2apic_mda = apic_x2apic_mode(ipi ? source : target);
  
        if (!vcpu->kvm->arch.x2apic_broadcast_quirk_disabled &&
 -          !ipi && dest_id == APIC_BROADCAST && x2apic_mda)
 +          !ipi && dest_id == APIC_BROADCAST && apic_x2apic_mode(target))
                return X2APIC_BROADCAST;
  
 -      return x2apic_mda ? dest_id : SET_APIC_DEST_FIELD(dest_id);
 +      return dest_id;
  }
  
  bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
@@@ -1959,9 -1907,9 +1959,9 @@@ void kvm_lapic_reset(struct kvm_vcpu *v
        vcpu->arch.apic_arb_prio = 0;
        vcpu->arch.apic_attention = 0;
  
 -      apic_debug("%s: vcpu=%p, id=%d, base_msr="
 +      apic_debug("%s: vcpu=%p, id=0x%x, base_msr="
                   "0x%016" PRIx64 ", base_address=0x%0lx.\n", __func__,
 -                 vcpu, kvm_apic_id(apic),
 +                 vcpu, kvm_lapic_get_reg(apic, APIC_ID),
                   vcpu->arch.apic_base, apic->base_address);
  }
  
@@@ -2073,13 -2021,17 +2073,13 @@@ nomem
  int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu)
  {
        struct kvm_lapic *apic = vcpu->arch.apic;
 -      int highest_irr;
 +      u32 ppr;
  
        if (!apic_enabled(apic))
                return -1;
  
 -      apic_update_ppr(apic);
 -      highest_irr = apic_find_highest_irr(apic);
 -      if ((highest_irr == -1) ||
 -          ((highest_irr & 0xF0) <= kvm_lapic_get_reg(apic, APIC_PROCPRI)))
 -              return -1;
 -      return highest_irr;
 +      __apic_update_ppr(apic, &ppr);
 +      return apic_has_interrupt_for_ppr(apic, ppr);
  }
  
  int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu)
@@@ -2115,7 -2067,6 +2115,7 @@@ int kvm_get_apic_interrupt(struct kvm_v
  {
        int vector = kvm_apic_has_interrupt(vcpu);
        struct kvm_lapic *apic = vcpu->arch.apic;
 +      u32 ppr;
  
        if (vector == -1)
                return -1;
         * because the process would deliver it through the IDT.
         */
  
 -      apic_set_isr(vector, apic);
 -      apic_update_ppr(apic);
        apic_clear_irr(vector, apic);
 -
        if (test_bit(vector, vcpu_to_synic(vcpu)->auto_eoi_bitmap)) {
 -              apic_clear_isr(vector, apic);
 +              /*
 +               * For auto-EOI interrupts, there might be another pending
 +               * interrupt above PPR, so check whether to raise another
 +               * KVM_REQ_EVENT.
 +               */
                apic_update_ppr(apic);
 +      } else {
 +              /*
 +               * For normal interrupts, PPR has been raised and there cannot
 +               * be a higher-priority pending interrupt---except if there was
 +               * a concurrent interrupt injection, but that would have
 +               * triggered KVM_REQ_EVENT already.
 +               */
 +              apic_set_isr(vector, apic);
 +              __apic_update_ppr(apic, &ppr);
        }
  
        return vector;
@@@ -2485,3 -2426,9 +2485,9 @@@ void kvm_lapic_init(void
        jump_label_rate_limit(&apic_hw_disabled, HZ);
        jump_label_rate_limit(&apic_sw_disabled, HZ);
  }
+ void kvm_lapic_exit(void)
+ {
+       static_key_deferred_flush(&apic_hw_disabled);
+       static_key_deferred_flush(&apic_sw_disabled);
+ }
diff --combined arch/x86/kvm/lapic.h
index 5b5b1ba644cb81f18355f613dac19c1bbacda136,ff8039d616723fd79c493c0a1369658e440224b2..05abd837b78a385f37c758f3e2ca6008f91de473
@@@ -73,7 -73,6 +73,7 @@@ bool kvm_apic_match_dest(struct kvm_vcp
  
  void __kvm_apic_update_irr(u32 *pir, void *regs);
  void kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir);
 +void kvm_apic_update_ppr(struct kvm_vcpu *vcpu);
  int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq,
                     struct dest_map *dest_map);
  int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type);
@@@ -111,6 -110,7 +111,7 @@@ static inline bool kvm_hv_vapic_assist_
  
  int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data);
  void kvm_lapic_init(void);
+ void kvm_lapic_exit(void);
  
  #define VEC_POS(v) ((v) & (32 - 1))
  #define REG_POS(v) (((v) >> 5) << 4)
@@@ -203,6 -203,17 +204,6 @@@ static inline int kvm_lapic_latched_ini
        return lapic_in_kernel(vcpu) && test_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events);
  }
  
 -static inline u32 kvm_apic_id(struct kvm_lapic *apic)
 -{
 -      /* To avoid a race between apic_base and following APIC_ID update when
 -       * switching to x2apic_mode, the x2apic mode returns initial x2apic id.
 -       */
 -      if (apic_x2apic_mode(apic))
 -              return apic->vcpu->vcpu_id;
 -
 -      return kvm_lapic_get_reg(apic, APIC_ID) >> 24;
 -}
 -
  bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector);
  
  void wait_lapic_expire(struct kvm_vcpu *vcpu);
diff --combined arch/x86/kvm/x86.c
index b02af6285887f06af51d13480b1d9a931c8871de,57d8a856cdc5ce938efb73fd8fae144867abde76..6e2c71ea0627e5893fc59bd1cfca30d8b5ae5a1c
@@@ -180,7 -180,6 +180,7 @@@ struct kvm_stats_debugfs_item debugfs_e
        { "insn_emulation_fail", VCPU_STAT(insn_emulation_fail) },
        { "irq_injections", VCPU_STAT(irq_injections) },
        { "nmi_injections", VCPU_STAT(nmi_injections) },
 +      { "req_event", VCPU_STAT(req_event) },
        { "mmu_shadow_zapped", VM_STAT(mmu_shadow_zapped) },
        { "mmu_pte_write", VM_STAT(mmu_pte_write) },
        { "mmu_pte_updated", VM_STAT(mmu_pte_updated) },
        { "mmu_unsync", VM_STAT(mmu_unsync) },
        { "remote_tlb_flush", VM_STAT(remote_tlb_flush) },
        { "largepages", VM_STAT(lpages) },
 +      { "max_mmu_page_hash_collisions",
 +              VM_STAT(max_mmu_page_hash_collisions) },
        { NULL }
  };
  
@@@ -3345,6 -3342,8 +3345,8 @@@ static int kvm_vcpu_ioctl_enable_cap(st
  
        switch (cap->cap) {
        case KVM_CAP_HYPERV_SYNIC:
+               if (!irqchip_in_kernel(vcpu->kvm))
+                       return -EINVAL;
                return kvm_hv_activate_synic(vcpu);
        default:
                return -EINVAL;
@@@ -3897,7 -3896,7 +3899,7 @@@ static int kvm_vm_ioctl_enable_cap(stru
                        goto split_irqchip_unlock;
                /* Pairs with irqchip_in_kernel. */
                smp_wmb();
 -              kvm->arch.irqchip_split = true;
 +              kvm->arch.irqchip_mode = KVM_IRQCHIP_SPLIT;
                kvm->arch.nr_reserved_ioapic_pins = cap->args[0];
                r = 0;
  split_irqchip_unlock:
@@@ -3960,41 -3959,40 +3962,41 @@@ long kvm_arch_vm_ioctl(struct file *fil
                r = kvm_vm_ioctl_get_nr_mmu_pages(kvm);
                break;
        case KVM_CREATE_IRQCHIP: {
 -              struct kvm_pic *vpic;
 -
                mutex_lock(&kvm->lock);
 +
                r = -EEXIST;
 -              if (kvm->arch.vpic)
 +              if (irqchip_in_kernel(kvm))
                        goto create_irqchip_unlock;
 +
                r = -EINVAL;
                if (kvm->created_vcpus)
                        goto create_irqchip_unlock;
 -              r = -ENOMEM;
 -              vpic = kvm_create_pic(kvm);
 -              if (vpic) {
 -                      r = kvm_ioapic_init(kvm);
 -                      if (r) {
 -                              mutex_lock(&kvm->slots_lock);
 -                              kvm_destroy_pic(vpic);
 -                              mutex_unlock(&kvm->slots_lock);
 -                              goto create_irqchip_unlock;
 -                      }
 -              } else
 +
 +              r = kvm_pic_init(kvm);
 +              if (r)
 +                      goto create_irqchip_unlock;
 +
 +              r = kvm_ioapic_init(kvm);
 +              if (r) {
 +                      mutex_lock(&kvm->slots_lock);
 +                      kvm_pic_destroy(kvm);
 +                      mutex_unlock(&kvm->slots_lock);
                        goto create_irqchip_unlock;
 +              }
 +
                r = kvm_setup_default_irq_routing(kvm);
                if (r) {
                        mutex_lock(&kvm->slots_lock);
                        mutex_lock(&kvm->irq_lock);
                        kvm_ioapic_destroy(kvm);
 -                      kvm_destroy_pic(vpic);
 +                      kvm_pic_destroy(kvm);
                        mutex_unlock(&kvm->irq_lock);
                        mutex_unlock(&kvm->slots_lock);
                        goto create_irqchip_unlock;
                }
 -              /* Write kvm->irq_routing before kvm->arch.vpic.  */
 +              /* Write kvm->irq_routing before enabling irqchip_in_kernel. */
                smp_wmb();
 -              kvm->arch.vpic = vpic;
 +              kvm->arch.irqchip_mode = KVM_IRQCHIP_KERNEL;
        create_irqchip_unlock:
                mutex_unlock(&kvm->lock);
                break;
                }
  
                r = -ENXIO;
 -              if (!irqchip_in_kernel(kvm) || irqchip_split(kvm))
 +              if (!irqchip_kernel(kvm))
                        goto get_irqchip_out;
                r = kvm_vm_ioctl_get_irqchip(kvm, chip);
                if (r)
                }
  
                r = -ENXIO;
 -              if (!irqchip_in_kernel(kvm) || irqchip_split(kvm))
 +              if (!irqchip_kernel(kvm))
                        goto set_irqchip_out;
                r = kvm_vm_ioctl_set_irqchip(kvm, chip);
                if (r)
  }
  EXPORT_SYMBOL_GPL(kvm_write_guest_virt_system);
  
 +static int vcpu_is_mmio_gpa(struct kvm_vcpu *vcpu, unsigned long gva,
 +                          gpa_t gpa, bool write)
 +{
 +      /* For APIC access vmexit */
 +      if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
 +              return 1;
 +
 +      if (vcpu_match_mmio_gpa(vcpu, gpa)) {
 +              trace_vcpu_match_mmio(gva, gpa, write, true);
 +              return 1;
 +      }
 +
 +      return 0;
 +}
 +
  static int vcpu_mmio_gva_to_gpa(struct kvm_vcpu *vcpu, unsigned long gva,
                                gpa_t *gpa, struct x86_exception *exception,
                                bool write)
        if (*gpa == UNMAPPED_GVA)
                return -1;
  
 -      /* For APIC access vmexit */
 -      if ((*gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
 -              return 1;
 -
 -      if (vcpu_match_mmio_gpa(vcpu, *gpa)) {
 -              trace_vcpu_match_mmio(gva, *gpa, write, true);
 -              return 1;
 -      }
 -
 -      return 0;
 +      return vcpu_is_mmio_gpa(vcpu, gva, *gpa, write);
  }
  
  int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
@@@ -4601,22 -4593,6 +4603,22 @@@ static int emulator_read_write_onepage(
        int handled, ret;
        bool write = ops->write;
        struct kvm_mmio_fragment *frag;
 +      struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
 +
 +      /*
 +       * If the exit was due to a NPF we may already have a GPA.
 +       * If the GPA is present, use it to avoid the GVA to GPA table walk.
 +       * Note, this cannot be used on string operations since string
 +       * operation using rep will only have the initial GPA from the NPF
 +       * occurred.
 +       */
 +      if (vcpu->arch.gpa_available &&
 +          emulator_can_use_gpa(ctxt) &&
 +          vcpu_is_mmio_gpa(vcpu, addr, exception->address, write) &&
 +          (addr & ~PAGE_MASK) == (exception->address & ~PAGE_MASK)) {
 +              gpa = exception->address;
 +              goto mmio;
 +      }
  
        ret = vcpu_mmio_gva_to_gpa(vcpu, addr, &gpa, exception, write);
  
@@@ -5633,9 -5609,6 +5635,9 @@@ int x86_emulate_instruction(struct kvm_
        }
  
  restart:
 +      /* Save the faulting GPA (cr2) in the address field */
 +      ctxt->exception.address = cr2;
 +
        r = x86_emulate_insn(ctxt);
  
        if (r == EMULATION_INTERCEPTED)
@@@ -6051,7 -6024,7 +6053,7 @@@ int kvm_arch_init(void *opaque
  
        kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK,
                        PT_DIRTY_MASK, PT64_NX_MASK, 0,
 -                      PT_PRESENT_MASK);
 +                      PT_PRESENT_MASK, 0);
        kvm_timer_init();
  
        perf_register_guest_info_callbacks(&kvm_guest_cbs);
@@@ -6074,6 -6047,7 +6076,7 @@@ out
  
  void kvm_arch_exit(void)
  {
+       kvm_lapic_exit();
        perf_unregister_guest_info_callbacks(&kvm_guest_cbs);
  
        if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
@@@ -6757,7 -6731,6 +6760,7 @@@ static int vcpu_enter_guest(struct kvm_
        }
  
        if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
 +              ++vcpu->stat.req_event;
                kvm_apic_accept_events(vcpu);
                if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) {
                        r = 1;