Merge branch 'x86/cpufeature' of git://git.kernel.org/pub/scm/linux/kernel/git/tip...

author Radim Krčmář <rkrcmar@redhat.com>

Tue, 17 Jan 2017 16:53:01 +0000 (17:53 +0100)

committer Radim Krčmář <rkrcmar@redhat.com>

Tue, 17 Jan 2017 16:53:01 +0000 (17:53 +0100)
author Radim Krčmář <rkrcmar@redhat.com>
Tue, 17 Jan 2017 16:53:01 +0000 (17:53 +0100)
committer Radim Krčmář <rkrcmar@redhat.com>
Tue, 17 Jan 2017 16:53:01 +0000 (17:53 +0100)
diff --combined arch/x86/kvm/emulate.c

index 2b8349a2b14be0a558710089cb5d688a42ac1465,cedbba0f3402d2343ce069fe3ed6a07f44f68907..45c7306c8780b23494f22d8a775f1486ea29eafc
--- 1/arch/x86/kvm/emulate.c
--- 2/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@@ -173,7 -173,6 +173,7 @@@
   #define NearBranch  ((u64)1 << 52)  /* Near branches */
   #define No16      ((u64)1 << 53)  /* No 16 bit operand */
   #define IncSP       ((u64)1 << 54)  /* SP is incremented before ModRM calc */
+ +#define TwoMemOp    ((u64)1 << 55)  /* Instruction has two memory operand */
   
   #define DstXacc     (DstAccLo | SrcAccHi | SrcWrite)
   
@@@ -819,6 -818,20 +819,20 @@@ static int segmented_read_std(struct x8
         return ctxt->ops->read_std(ctxt, linear, data, size, &ctxt->exception);
   }
   
+ static int segmented_write_std(struct x86_emulate_ctxt *ctxt,
+                              struct segmented_address addr,
+                              void *data,
+                              unsigned int size)
+ {
+       int rc;
+       ulong linear;
+ 
+       rc = linearize(ctxt, addr, size, true, &linear);
+       if (rc != X86EMUL_CONTINUE)
+               return rc;
+       return ctxt->ops->write_std(ctxt, linear, data, size, &ctxt->exception);
+ }
+ 
   /*
    * Prefetch the remaining bytes of the instruction without crossing page
    * boundary if they are not in fetch_cache yet.
@@@ -1572,7 -1585,6 +1586,6 @@@ static int write_segment_descriptor(str
                                     &ctxt->exception);
   }
   
- /* Does not support long mode */
   static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
                                      u16 selector, int seg, u8 cpl,
                                      enum x86_transfer_type transfer,
@@@ -1609,20 -1621,34 +1622,34 @@@
   
         rpl = selector & 3;
   
-       /* NULL selector is not valid for TR, CS and SS (except for long mode) */
-       if ((seg == VCPU_SREG_CS
-            || (seg == VCPU_SREG_SS
-                && (ctxt->mode != X86EMUL_MODE_PROT64 || rpl != cpl))
-            || seg == VCPU_SREG_TR)
-           && null_selector)
-               goto exception;
- 
         /* TR should be in GDT only */
         if (seg == VCPU_SREG_TR && (selector & (1 << 2)))
                 goto exception;
   
-       if (null_selector) /* for NULL selector skip all following checks */
+       /* NULL selector is not valid for TR, CS and (except for long mode) SS */
+       if (null_selector) {
+               if (seg == VCPU_SREG_CS || seg == VCPU_SREG_TR)
+                       goto exception;
+ 
+               if (seg == VCPU_SREG_SS) {
+                       if (ctxt->mode != X86EMUL_MODE_PROT64 || rpl != cpl)
+                               goto exception;
+ 
+                       /*
+                        * ctxt->ops->set_segment expects the CPL to be in
+                        * SS.DPL, so fake an expand-up 32-bit data segment.
+                        */
+                       seg_desc.type = 3;
+                       seg_desc.p = 1;
+                       seg_desc.s = 1;
+                       seg_desc.dpl = cpl;
+                       seg_desc.d = 1;
+                       seg_desc.g = 1;
+               }
+ 
+               /* Skip all following checks */
                 goto load;
+       }
   
         ret = read_segment_descriptor(ctxt, selector, &seg_desc, &desc_addr);
         if (ret != X86EMUL_CONTINUE)
@@@ -1738,6 -1764,21 +1765,21 @@@ static int load_segment_descriptor(stru
                                    u16 selector, int seg)
   {
         u8 cpl = ctxt->ops->cpl(ctxt);
+ 
+       /*
+        * None of MOV, POP and LSS can load a NULL selector in CPL=3, but
+        * they can load it at CPL<3 (Intel's manual says only LSS can,
+        * but it's wrong).
+        *
+        * However, the Intel manual says that putting IST=1/DPL=3 in
+        * an interrupt gate will result in SS=3 (the AMD manual instead
+        * says it doesn't), so allow SS=3 in __load_segment_descriptor
+        * and only forbid it here.
+        */
+       if (seg == VCPU_SREG_SS && selector == 3 &&
+           ctxt->mode == X86EMUL_MODE_PROT64)
+               return emulate_exception(ctxt, GP_VECTOR, 0, true);
+ 
         return __load_segment_descriptor(ctxt, selector, seg, cpl,
                                          X86_TRANSFER_NONE, NULL);
   }
@@@ -3686,8 -3727,8 +3728,8 @@@ static int emulate_store_desc_ptr(struc
         }
         /* Disable writeback. */
         ctxt->dst.type = OP_NONE;
-       return segmented_write(ctxt, ctxt->dst.addr.mem,
-                              &desc_ptr, 2 + ctxt->op_bytes);
+       return segmented_write_std(ctxt, ctxt->dst.addr.mem,
+                                  &desc_ptr, 2 + ctxt->op_bytes);
   }
   
   static int em_sgdt(struct x86_emulate_ctxt *ctxt)
@@@ -3933,7 -3974,7 +3975,7 @@@ static int em_fxsave(struct x86_emulate
         else
                 size = offsetof(struct fxregs_state, xmm_space[0]);
   
-       return segmented_write(ctxt, ctxt->memop.addr.mem, &fx_state, size);
+       return segmented_write_std(ctxt, ctxt->memop.addr.mem, &fx_state, size);
   }
   
   static int fxrstor_fixup(struct x86_emulate_ctxt *ctxt,
@@@ -3975,7 -4016,7 +4017,7 @@@ static int em_fxrstor(struct x86_emulat
         if (rc != X86EMUL_CONTINUE)
                 return rc;
   
-       rc = segmented_read(ctxt, ctxt->memop.addr.mem, &fx_state, 512);
+       rc = segmented_read_std(ctxt, ctxt->memop.addr.mem, &fx_state, 512);
         if (rc != X86EMUL_CONTINUE)
                 return rc;
   
@@@ -4257,7 -4298,7 +4299,7 @@@ static const struct opcode group1[] = 
   };
   
   static const struct opcode group1A[] = {
- -      I(DstMem | SrcNone | Mov | Stack | IncSP, em_pop), N, N, N, N, N, N, N,
+ +      I(DstMem | SrcNone | Mov | Stack | IncSP | TwoMemOp, em_pop), N, N, N, N, N, N, N,
   };
   
   static const struct opcode group2[] = {
@@@ -4295,7 -4336,7 +4337,7 @@@ static const struct opcode group5[] = 
         I(SrcMemFAddr | ImplicitOps,            em_call_far),
         I(SrcMem | NearBranch,                  em_jmp_abs),
         I(SrcMemFAddr | ImplicitOps,            em_jmp_far),
- -      I(SrcMem | Stack,                       em_push), D(Undefined),
+ +      I(SrcMem | Stack | TwoMemOp,            em_push), D(Undefined),
   };
   
   static const struct opcode group6[] = {
@@@ -4515,8 -4556,8 +4557,8 @@@ static const struct opcode opcode_table
         /* 0xA0 - 0xA7 */
         I2bv(DstAcc | SrcMem | Mov | MemAbs, em_mov),
         I2bv(DstMem | SrcAcc | Mov | MemAbs | PageTable, em_mov),
- -      I2bv(SrcSI | DstDI | Mov | String, em_mov),
- -      F2bv(SrcSI | DstDI | String | NoWrite, em_cmp_r),
+ +      I2bv(SrcSI | DstDI | Mov | String | TwoMemOp, em_mov),
+ +      F2bv(SrcSI | DstDI | String | NoWrite | TwoMemOp, em_cmp_r),
         /* 0xA8 - 0xAF */
         F2bv(DstAcc | SrcImm | NoWrite, em_test),
         I2bv(SrcAcc | DstDI | Mov | String, em_mov),
@@@ -5630,14 -5671,3 +5672,14 @@@ void emulator_writeback_register_cache(
   {
         writeback_registers(ctxt);
   }
+ +
+ +bool emulator_can_use_gpa(struct x86_emulate_ctxt *ctxt)
+ +{
+ +      if (ctxt->rep_prefix && (ctxt->d & String))
+ +              return false;
+ +
+ +      if (ctxt->d & TwoMemOp)
+ +              return false;
+ +
+ +      return true;
+ +}
diff --combined arch/x86/kvm/lapic.c

index 10a745faa659d11e8a2eba1e3c51c75a13146abf,2f6ef5121a4ca36ac73639e5ffc6aa86624067d2..33b799fd3a6ed25d7eb4ba812dffd126d03b55b9
--- 1/arch/x86/kvm/lapic.c
--- 2/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@@ -115,16 -115,6 +115,16 @@@ static inline int apic_enabled(struct k
         (LVT_MASK | APIC_MODE_MASK | APIC_INPUT_POLARITY | \
          APIC_LVT_REMOTE_IRR | APIC_LVT_LEVEL_TRIGGER)
   
+ +static inline u8 kvm_xapic_id(struct kvm_lapic *apic)
+ +{
+ +      return kvm_lapic_get_reg(apic, APIC_ID) >> 24;
+ +}
+ +
+ +static inline u32 kvm_x2apic_id(struct kvm_lapic *apic)
+ +{
+ +      return apic->vcpu->vcpu_id;
+ +}
+ +
   static inline bool kvm_apic_map_get_logical_dest(struct kvm_apic_map *map,
                 u32 dest_id, struct kvm_lapic ***cluster, u16 *mask) {
         switch (map->mode) {
@@@ -169,13 -159,13 +169,13 @@@ static void recalculate_apic_map(struc
         struct kvm_apic_map *new, *old = NULL;
         struct kvm_vcpu *vcpu;
         int i;
- -      u32 max_id = 255;
+ +      u32 max_id = 255; /* enough space for any xAPIC ID */
   
         mutex_lock(&kvm->arch.apic_map_lock);
   
         kvm_for_each_vcpu(i, vcpu, kvm)
                 if (kvm_apic_present(vcpu))
- -                      max_id = max(max_id, kvm_apic_id(vcpu->arch.apic));
+ +                      max_id = max(max_id, kvm_x2apic_id(vcpu->arch.apic));
   
         new = kvm_kvzalloc(sizeof(struct kvm_apic_map) +
                            sizeof(struct kvm_lapic *) * ((u64)max_id + 1));
@@@ -189,28 -179,16 +189,28 @@@
                 struct kvm_lapic *apic = vcpu->arch.apic;
                 struct kvm_lapic **cluster;
                 u16 mask;
- -              u32 ldr, aid;
+ +              u32 ldr;
+ +              u8 xapic_id;
+ +              u32 x2apic_id;
   
                 if (!kvm_apic_present(vcpu))
                         continue;
   
- -              aid = kvm_apic_id(apic);
- -              ldr = kvm_lapic_get_reg(apic, APIC_LDR);
+ +              xapic_id = kvm_xapic_id(apic);
+ +              x2apic_id = kvm_x2apic_id(apic);
   
- -              if (aid <= new->max_apic_id)
- -                      new->phys_map[aid] = apic;
+ +              /* Hotplug hack: see kvm_apic_match_physical_addr(), ... */
+ +              if ((apic_x2apic_mode(apic) || x2apic_id > 0xff) &&
+ +                              x2apic_id <= new->max_apic_id)
+ +                      new->phys_map[x2apic_id] = apic;
+ +              /*
+ +               * ... xAPIC ID of VCPUs with APIC ID > 0xff will wrap-around,
+ +               * prevent them from masking VCPUs with APIC ID <= 0xff.
+ +               */
+ +              if (!apic_x2apic_mode(apic) && !new->phys_map[xapic_id])
+ +                      new->phys_map[xapic_id] = apic;
+ +
+ +              ldr = kvm_lapic_get_reg(apic, APIC_LDR);
   
                 if (apic_x2apic_mode(apic)) {
                         new->mode |= KVM_APIC_MODE_X2APIC;
@@@ -272,8 -250,6 +272,8 @@@ static inline void kvm_apic_set_x2apic_
   {
         u32 ldr = ((id >> 4) << 16) | (1 << (id & 0xf));
   
+ +      WARN_ON_ONCE(id != apic->vcpu->vcpu_id);
+ +
         kvm_lapic_set_reg(apic, APIC_ID, id);
         kvm_lapic_set_reg(apic, APIC_LDR, ldr);
         recalculate_apic_map(apic->vcpu->kvm);
@@@ -570,15 -546,7 +570,15 @@@ static void pv_eoi_clr_pending(struct k
         __clear_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention);
   }
   
- -static void apic_update_ppr(struct kvm_lapic *apic)
+ +static int apic_has_interrupt_for_ppr(struct kvm_lapic *apic, u32 ppr)
+ +{
+ +      int highest_irr = apic_find_highest_irr(apic);
+ +      if (highest_irr == -1 || (highest_irr & 0xF0) <= ppr)
+ +              return -1;
+ +      return highest_irr;
+ +}
+ +
+ +static bool __apic_update_ppr(struct kvm_lapic *apic, u32 *new_ppr)
   {
         u32 tpr, isrv, ppr, old_ppr;
         int isr;
@@@ -596,27 -564,12 +596,27 @@@
         apic_debug("vlapic %p, ppr 0x%x, isr 0x%x, isrv 0x%x",
                    apic, ppr, isr, isrv);
   
- -      if (old_ppr != ppr) {
+ +      *new_ppr = ppr;
+ +      if (old_ppr != ppr)
                 kvm_lapic_set_reg(apic, APIC_PROCPRI, ppr);
- -              if (ppr < old_ppr)
- -                      kvm_make_request(KVM_REQ_EVENT, apic->vcpu);
- -      }
+ +
+ +      return ppr < old_ppr;
+ +}
+ +
+ +static void apic_update_ppr(struct kvm_lapic *apic)
+ +{
+ +      u32 ppr;
+ +
+ +      if (__apic_update_ppr(apic, &ppr) &&
+ +          apic_has_interrupt_for_ppr(apic, ppr) != -1)
+ +              kvm_make_request(KVM_REQ_EVENT, apic->vcpu);
+ +}
+ +
+ +void kvm_apic_update_ppr(struct kvm_vcpu *vcpu)
+ +{
+ +      apic_update_ppr(vcpu->arch.apic);
   }
+ +EXPORT_SYMBOL_GPL(kvm_apic_update_ppr);
   
   static void apic_set_tpr(struct kvm_lapic *apic, u32 tpr)
   {
@@@ -626,8 -579,10 +626,8 @@@
   
   static bool kvm_apic_broadcast(struct kvm_lapic *apic, u32 mda)
   {
- -      if (apic_x2apic_mode(apic))
- -              return mda == X2APIC_BROADCAST;
- -
- -      return GET_APIC_DEST_FIELD(mda) == APIC_BROADCAST;
+ +      return mda == (apic_x2apic_mode(apic) ?
+ +                      X2APIC_BROADCAST : APIC_BROADCAST);
   }
   
   static bool kvm_apic_match_physical_addr(struct kvm_lapic *apic, u32 mda)
@@@ -636,18 -591,9 +636,18 @@@
                 return true;
   
         if (apic_x2apic_mode(apic))
- -              return mda == kvm_apic_id(apic);
+ +              return mda == kvm_x2apic_id(apic);
+ +
+ +      /*
+ +       * Hotplug hack: Make LAPIC in xAPIC mode also accept interrupts as if
+ +       * it were in x2APIC mode.  Hotplugged VCPUs start in xAPIC mode and
+ +       * this allows unique addressing of VCPUs with APIC ID over 0xff.
+ +       * The 0xff condition is needed because writeable xAPIC ID.
+ +       */
+ +      if (kvm_x2apic_id(apic) > 0xff && mda == kvm_x2apic_id(apic))
+ +              return true;
   
- -      return mda == SET_APIC_DEST_FIELD(kvm_apic_id(apic));
+ +      return mda == kvm_xapic_id(apic);
   }
   
   static bool kvm_apic_match_logical_addr(struct kvm_lapic *apic, u32 mda)
@@@ -664,6 -610,7 +664,6 @@@
                        && (logical_id & mda & 0xffff) != 0;
   
         logical_id = GET_APIC_LOGICAL_ID(logical_id);
- -      mda = GET_APIC_DEST_FIELD(mda);
   
         switch (kvm_lapic_get_reg(apic, APIC_DFR)) {
         case APIC_DFR_FLAT:
@@@ -680,9 -627,9 +680,9 @@@
   
   /* The KVM local APIC implementation has two quirks:
    *
- - *  - the xAPIC MDA stores the destination at bits 24-31, while this
- - *    is not true of struct kvm_lapic_irq's dest_id field.  This is
- - *    just a quirk in the API and is not problematic.
+ + *  - Real hardware delivers interrupts destined to x2APIC ID > 0xff to LAPICs
+ + *    in xAPIC mode if the "destination & 0xff" matches its xAPIC ID.
+ + *    KVM doesn't do that aliasing.
    *
    *  - in-kernel IOAPIC messages have to be delivered directly to
    *    x2APIC, because the kernel does not support interrupt remapping.
@@@ -698,12 -645,13 +698,12 @@@ static u32 kvm_apic_mda(struct kvm_vcp
                 struct kvm_lapic *source, struct kvm_lapic *target)
   {
         bool ipi = source != NULL;
- -      bool x2apic_mda = apic_x2apic_mode(ipi ? source : target);
   
         if (!vcpu->kvm->arch.x2apic_broadcast_quirk_disabled &&
- -          !ipi && dest_id == APIC_BROADCAST && x2apic_mda)
+ +          !ipi && dest_id == APIC_BROADCAST && apic_x2apic_mode(target))
                 return X2APIC_BROADCAST;
   
- -      return x2apic_mda ? dest_id : SET_APIC_DEST_FIELD(dest_id);
+ +      return dest_id;
   }
   
   bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
@@@ -1959,9 -1907,9 +1959,9 @@@ void kvm_lapic_reset(struct kvm_vcpu *v
         vcpu->arch.apic_arb_prio = 0;
         vcpu->arch.apic_attention = 0;
   
- -      apic_debug("%s: vcpu=%p, id=%d, base_msr="
+ +      apic_debug("%s: vcpu=%p, id=0x%x, base_msr="
                    "0x%016" PRIx64 ", base_address=0x%0lx.\n", __func__,
- -                 vcpu, kvm_apic_id(apic),
+ +                 vcpu, kvm_lapic_get_reg(apic, APIC_ID),
                    vcpu->arch.apic_base, apic->base_address);
   }
   
@@@ -2073,13 -2021,17 +2073,13 @@@ nomem
   int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu)
   {
         struct kvm_lapic *apic = vcpu->arch.apic;
- -      int highest_irr;
+ +      u32 ppr;
   
         if (!apic_enabled(apic))
                 return -1;
   
- -      apic_update_ppr(apic);
- -      highest_irr = apic_find_highest_irr(apic);
- -      if ((highest_irr == -1) ||
- -          ((highest_irr & 0xF0) <= kvm_lapic_get_reg(apic, APIC_PROCPRI)))
- -              return -1;
- -      return highest_irr;
+ +      __apic_update_ppr(apic, &ppr);
+ +      return apic_has_interrupt_for_ppr(apic, ppr);
   }
   
   int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu)
@@@ -2115,7 -2067,6 +2115,7 @@@ int kvm_get_apic_interrupt(struct kvm_v
   {
         int vector = kvm_apic_has_interrupt(vcpu);
         struct kvm_lapic *apic = vcpu->arch.apic;
+ +      u32 ppr;
   
         if (vector == -1)
                 return -1;
@@@ -2127,23 -2078,13 +2127,23 @@@
          * because the process would deliver it through the IDT.
          */
   
- -      apic_set_isr(vector, apic);
- -      apic_update_ppr(apic);
         apic_clear_irr(vector, apic);
- -
         if (test_bit(vector, vcpu_to_synic(vcpu)->auto_eoi_bitmap)) {
- -              apic_clear_isr(vector, apic);
+ +              /*
+ +               * For auto-EOI interrupts, there might be another pending
+ +               * interrupt above PPR, so check whether to raise another
+ +               * KVM_REQ_EVENT.
+ +               */
                 apic_update_ppr(apic);
+ +      } else {
+ +              /*
+ +               * For normal interrupts, PPR has been raised and there cannot
+ +               * be a higher-priority pending interrupt---except if there was
+ +               * a concurrent interrupt injection, but that would have
+ +               * triggered KVM_REQ_EVENT already.
+ +               */
+ +              apic_set_isr(vector, apic);
+ +              __apic_update_ppr(apic, &ppr);
         }
   
         return vector;
@@@ -2485,3 -2426,9 +2485,9 @@@ void kvm_lapic_init(void
         jump_label_rate_limit(&apic_hw_disabled, HZ);
         jump_label_rate_limit(&apic_sw_disabled, HZ);
   }
+ 
+ void kvm_lapic_exit(void)
+ {
+       static_key_deferred_flush(&apic_hw_disabled);
+       static_key_deferred_flush(&apic_sw_disabled);
+ }
diff --combined arch/x86/kvm/lapic.h

index 5b5b1ba644cb81f18355f613dac19c1bbacda136,ff8039d616723fd79c493c0a1369658e440224b2..05abd837b78a385f37c758f3e2ca6008f91de473
--- 1/arch/x86/kvm/lapic.h
--- 2/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@@ -73,7 -73,6 +73,7 @@@ bool kvm_apic_match_dest(struct kvm_vcp
   
   void __kvm_apic_update_irr(u32 *pir, void *regs);
   void kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir);
+ +void kvm_apic_update_ppr(struct kvm_vcpu *vcpu);
   int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq,
                      struct dest_map *dest_map);
   int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type);
@@@ -111,6 -110,7 +111,7 @@@ static inline bool kvm_hv_vapic_assist_
   
   int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data);
   void kvm_lapic_init(void);
+ void kvm_lapic_exit(void);
   
   #define VEC_POS(v) ((v) & (32 - 1))
   #define REG_POS(v) (((v) >> 5) << 4)
@@@ -203,6 -203,17 +204,6 @@@ static inline int kvm_lapic_latched_ini
         return lapic_in_kernel(vcpu) && test_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events);
   }
   
- -static inline u32 kvm_apic_id(struct kvm_lapic *apic)
- -{
- -      /* To avoid a race between apic_base and following APIC_ID update when
- -       * switching to x2apic_mode, the x2apic mode returns initial x2apic id.
- -       */
- -      if (apic_x2apic_mode(apic))
- -              return apic->vcpu->vcpu_id;
- -
- -      return kvm_lapic_get_reg(apic, APIC_ID) >> 24;
- -}
- -
   bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector);
   
   void wait_lapic_expire(struct kvm_vcpu *vcpu);
diff --combined arch/x86/kvm/x86.c

index b02af6285887f06af51d13480b1d9a931c8871de,57d8a856cdc5ce938efb73fd8fae144867abde76..6e2c71ea0627e5893fc59bd1cfca30d8b5ae5a1c
--- 1/arch/x86/kvm/x86.c
--- 2/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@@ -180,7 -180,6 +180,7 @@@ struct kvm_stats_debugfs_item debugfs_e
         { "insn_emulation_fail", VCPU_STAT(insn_emulation_fail) },
         { "irq_injections", VCPU_STAT(irq_injections) },
         { "nmi_injections", VCPU_STAT(nmi_injections) },
+ +      { "req_event", VCPU_STAT(req_event) },
         { "mmu_shadow_zapped", VM_STAT(mmu_shadow_zapped) },
         { "mmu_pte_write", VM_STAT(mmu_pte_write) },
         { "mmu_pte_updated", VM_STAT(mmu_pte_updated) },
@@@ -191,8 -190,6 +191,8 @@@
         { "mmu_unsync", VM_STAT(mmu_unsync) },
         { "remote_tlb_flush", VM_STAT(remote_tlb_flush) },
         { "largepages", VM_STAT(lpages) },
+ +      { "max_mmu_page_hash_collisions",
+ +              VM_STAT(max_mmu_page_hash_collisions) },
         { NULL }
   };
   
@@@ -3345,6 -3342,8 +3345,8 @@@ static int kvm_vcpu_ioctl_enable_cap(st
   
         switch (cap->cap) {
         case KVM_CAP_HYPERV_SYNIC:
+               if (!irqchip_in_kernel(vcpu->kvm))
+                       return -EINVAL;
                 return kvm_hv_activate_synic(vcpu);
         default:
                 return -EINVAL;
@@@ -3897,7 -3896,7 +3899,7 @@@ static int kvm_vm_ioctl_enable_cap(stru
                         goto split_irqchip_unlock;
                 /* Pairs with irqchip_in_kernel. */
                 smp_wmb();
- -              kvm->arch.irqchip_split = true;
+ +              kvm->arch.irqchip_mode = KVM_IRQCHIP_SPLIT;
                 kvm->arch.nr_reserved_ioapic_pins = cap->args[0];
                 r = 0;
   split_irqchip_unlock:
@@@ -3960,41 -3959,40 +3962,41 @@@ long kvm_arch_vm_ioctl(struct file *fil
                 r = kvm_vm_ioctl_get_nr_mmu_pages(kvm);
                 break;
         case KVM_CREATE_IRQCHIP: {
- -              struct kvm_pic *vpic;
- -
                 mutex_lock(&kvm->lock);
+ +
                 r = -EEXIST;
- -              if (kvm->arch.vpic)
+ +              if (irqchip_in_kernel(kvm))
                         goto create_irqchip_unlock;
+ +
                 r = -EINVAL;
                 if (kvm->created_vcpus)
                         goto create_irqchip_unlock;
- -              r = -ENOMEM;
- -              vpic = kvm_create_pic(kvm);
- -              if (vpic) {
- -                      r = kvm_ioapic_init(kvm);
- -                      if (r) {
- -                              mutex_lock(&kvm->slots_lock);
- -                              kvm_destroy_pic(vpic);
- -                              mutex_unlock(&kvm->slots_lock);
- -                              goto create_irqchip_unlock;
- -                      }
- -              } else
+ +
+ +              r = kvm_pic_init(kvm);
+ +              if (r)
+ +                      goto create_irqchip_unlock;
+ +
+ +              r = kvm_ioapic_init(kvm);
+ +              if (r) {
+ +                      mutex_lock(&kvm->slots_lock);
+ +                      kvm_pic_destroy(kvm);
+ +                      mutex_unlock(&kvm->slots_lock);
                         goto create_irqchip_unlock;
+ +              }
+ +
                 r = kvm_setup_default_irq_routing(kvm);
                 if (r) {
                         mutex_lock(&kvm->slots_lock);
                         mutex_lock(&kvm->irq_lock);
                         kvm_ioapic_destroy(kvm);
- -                      kvm_destroy_pic(vpic);
+ +                      kvm_pic_destroy(kvm);
                         mutex_unlock(&kvm->irq_lock);
                         mutex_unlock(&kvm->slots_lock);
                         goto create_irqchip_unlock;
                 }
- -              /* Write kvm->irq_routing before kvm->arch.vpic.  */
+ +              /* Write kvm->irq_routing before enabling irqchip_in_kernel. */
                 smp_wmb();
- -              kvm->arch.vpic = vpic;
+ +              kvm->arch.irqchip_mode = KVM_IRQCHIP_KERNEL;
         create_irqchip_unlock:
                 mutex_unlock(&kvm->lock);
                 break;
@@@ -4030,7 -4028,7 +4032,7 @@@
                 }
   
                 r = -ENXIO;
- -              if (!irqchip_in_kernel(kvm) || irqchip_split(kvm))
+ +              if (!irqchip_kernel(kvm))
                         goto get_irqchip_out;
                 r = kvm_vm_ioctl_get_irqchip(kvm, chip);
                 if (r)
@@@ -4054,7 -4052,7 +4056,7 @@@
                 }
   
                 r = -ENXIO;
- -              if (!irqchip_in_kernel(kvm) || irqchip_split(kvm))
+ +              if (!irqchip_kernel(kvm))
                         goto set_irqchip_out;
                 r = kvm_vm_ioctl_set_irqchip(kvm, chip);
                 if (r)
@@@ -4463,21 -4461,6 +4465,21 @@@ out
   }
   EXPORT_SYMBOL_GPL(kvm_write_guest_virt_system);
   
+ +static int vcpu_is_mmio_gpa(struct kvm_vcpu *vcpu, unsigned long gva,
+ +                          gpa_t gpa, bool write)
+ +{
+ +      /* For APIC access vmexit */
+ +      if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
+ +              return 1;
+ +
+ +      if (vcpu_match_mmio_gpa(vcpu, gpa)) {
+ +              trace_vcpu_match_mmio(gva, gpa, write, true);
+ +              return 1;
+ +      }
+ +
+ +      return 0;
+ +}
+ +
   static int vcpu_mmio_gva_to_gpa(struct kvm_vcpu *vcpu, unsigned long gva,
                                 gpa_t *gpa, struct x86_exception *exception,
                                 bool write)
@@@ -4504,7 -4487,16 +4506,7 @@@
         if (*gpa == UNMAPPED_GVA)
                 return -1;
   
- -      /* For APIC access vmexit */
- -      if ((*gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
- -              return 1;
- -
- -      if (vcpu_match_mmio_gpa(vcpu, *gpa)) {
- -              trace_vcpu_match_mmio(gva, *gpa, write, true);
- -              return 1;
- -      }
- -
- -      return 0;
+ +      return vcpu_is_mmio_gpa(vcpu, gva, *gpa, write);
   }
   
   int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
@@@ -4601,22 -4593,6 +4603,22 @@@ static int emulator_read_write_onepage(
         int handled, ret;
         bool write = ops->write;
         struct kvm_mmio_fragment *frag;
+ +      struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
+ +
+ +      /*
+ +       * If the exit was due to a NPF we may already have a GPA.
+ +       * If the GPA is present, use it to avoid the GVA to GPA table walk.
+ +       * Note, this cannot be used on string operations since string
+ +       * operation using rep will only have the initial GPA from the NPF
+ +       * occurred.
+ +       */
+ +      if (vcpu->arch.gpa_available &&
+ +          emulator_can_use_gpa(ctxt) &&
+ +          vcpu_is_mmio_gpa(vcpu, addr, exception->address, write) &&
+ +          (addr & ~PAGE_MASK) == (exception->address & ~PAGE_MASK)) {
+ +              gpa = exception->address;
+ +              goto mmio;
+ +      }
   
         ret = vcpu_mmio_gva_to_gpa(vcpu, addr, &gpa, exception, write);
   
@@@ -5633,9 -5609,6 +5635,9 @@@ int x86_emulate_instruction(struct kvm_
         }
   
   restart:
+ +      /* Save the faulting GPA (cr2) in the address field */
+ +      ctxt->exception.address = cr2;
+ +
         r = x86_emulate_insn(ctxt);
   
         if (r == EMULATION_INTERCEPTED)
@@@ -6051,7 -6024,7 +6053,7 @@@ int kvm_arch_init(void *opaque
   
         kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK,
                         PT_DIRTY_MASK, PT64_NX_MASK, 0,
- -                      PT_PRESENT_MASK);
+ +                      PT_PRESENT_MASK, 0);
         kvm_timer_init();
   
         perf_register_guest_info_callbacks(&kvm_guest_cbs);
@@@ -6074,6 -6047,7 +6076,7 @@@ out
   
   void kvm_arch_exit(void)
   {
+       kvm_lapic_exit();
         perf_unregister_guest_info_callbacks(&kvm_guest_cbs);
   
         if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
@@@ -6757,7 -6731,6 +6760,7 @@@ static int vcpu_enter_guest(struct kvm_
         }
   
         if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
+ +              ++vcpu->stat.req_event;
                 kvm_apic_accept_events(vcpu);
                 if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) {
                         r = 1;
author	Radim Krčmář <rkrcmar@redhat.com>
	Tue, 17 Jan 2017 16:53:01 +0000 (17:53 +0100)
committer	Radim Krčmář <rkrcmar@redhat.com>
	Tue, 17 Jan 2017 16:53:01 +0000 (17:53 +0100)
		1	2
arch/x86/kvm/emulate.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kvm/lapic.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kvm/lapic.h	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kvm/x86.c	patch \|	diff1 \|	diff2 \|	blob \| history