]> asedeno.scripts.mit.edu Git - linux.git/commitdiff
Merge branch 'next' of git://git.kernel.org/pub/scm/linux/kernel/git/benh/powerpc
authorLinus Torvalds <torvalds@linux-foundation.org>
Wed, 11 Jun 2014 01:54:22 +0000 (18:54 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Wed, 11 Jun 2014 01:54:22 +0000 (18:54 -0700)
Pull powerpc updates from Ben Herrenschmidt:
 "Here is the bulk of the powerpc changes for this merge window.  It got
  a bit delayed in part because I wasn't paying attention, and in part
  because I discovered I had a core PCI change without a PCI maintainer
  ack in it.  Bjorn eventually agreed it was ok to merge it though we'll
  probably improve it later and I didn't want to rebase to add his ack.

  There is going to be a bit more next week, essentially fixes that I
  still want to sort through and test.

  The biggest item this time is the support to build the ppc64 LE kernel
  with our new v2 ABI.  We previously supported v2 userspace but the
  kernel itself was a tougher nut to crack.  This is now sorted mostly
  thanks to Anton and Rusty.

  We also have a fairly big series from Cedric that add support for
  64-bit LE zImage boot wrapper.  This was made harder by the fact that
  traditionally our zImage wrapper was always 32-bit, but our new LE
  toolchains don't really support 32-bit anymore (it's somewhat there
  but not really "supported") so we didn't want to rely on it.  This
  meant more churn that just endian fixes.

  This brings some more LE bits as well, such as the ability to run in
  LE mode without a hypervisor (ie. under OPAL firmware) by doing the
  right OPAL call to reinitialize the CPU to take HV interrupts in the
  right mode and the usual pile of endian fixes.

  There's another series from Gavin adding EEH improvements (one day we
  *will* have a release with less than 20 EEH patches, I promise!).

  Another highlight is the support for the "Split core" functionality on
  P8 by Michael.  This allows a P8 core to be split into "sub cores" of
  4 threads which allows the subcores to run different guests under KVM
  (the HW still doesn't support a partition per thread).

  And then the usual misc bits and fixes ..."

[ Further delayed by gmail deciding that BenH is a dirty spammer.
  Google knows.  ]

* 'next' of git://git.kernel.org/pub/scm/linux/kernel/git/benh/powerpc: (155 commits)
  powerpc/powernv: Add missing include to LPC code
  selftests/powerpc: Test the THP bug we fixed in the previous commit
  powerpc/mm: Check paca psize is up to date for huge mappings
  powerpc/powernv: Pass buffer size to OPAL validate flash call
  powerpc/pseries: hcall functions are exported to modules, need _GLOBAL_TOC()
  powerpc: Exported functions __clear_user and copy_page use r2 so need _GLOBAL_TOC()
  powerpc/powernv: Set memory_block_size_bytes to 256MB
  powerpc: Allow ppc_md platform hook to override memory_block_size_bytes
  powerpc/powernv: Fix endian issues in memory error handling code
  powerpc/eeh: Skip eeh sysfs when eeh is disabled
  powerpc: 64bit sendfile is capped at 2GB
  powerpc/powernv: Provide debugfs access to the LPC bus via OPAL
  powerpc/serial: Use saner flags when creating legacy ports
  powerpc: Add cpu family documentation
  powerpc/xmon: Fix up xmon format strings
  powerpc/powernv: Add calls to support little endian host
  powerpc: Document sysfs DSCR interface
  powerpc: Fix regression of per-CPU DSCR setting
  powerpc: Split __SYSFS_SPRSETUP macro
  arch: powerpc/fadump: Cleaning up inconsistent NULL checks
  ...

20 files changed:
1  2 
Documentation/devicetree/bindings/vendor-prefixes.txt
arch/powerpc/include/asm/kvm_ppc.h
arch/powerpc/include/asm/reg.h
arch/powerpc/include/asm/sections.h
arch/powerpc/include/asm/systbl.h
arch/powerpc/include/asm/topology.h
arch/powerpc/kernel/asm-offsets.c
arch/powerpc/kernel/epapr_paravirt.c
arch/powerpc/kernel/fadump.c
arch/powerpc/kernel/pci-common.c
arch/powerpc/kernel/pci_of_scan.c
arch/powerpc/kernel/smp.c
arch/powerpc/kvm/book3s_hv.c
arch/powerpc/kvm/book3s_hv_rmhandlers.S
arch/powerpc/kvm/powerpc.c
arch/powerpc/mm/hash_utils_64.c
arch/powerpc/mm/slb.c
arch/powerpc/platforms/powernv/opal.c
arch/powerpc/platforms/pseries/setup.c
drivers/pci/pci.c

index 5261271046ce4711c4efc7be33f0e44beddb9000,1a6793b9524fcea4de98aaca3cf267560fa90fb6..4d7f3758d1b43fd1c4e938e27bd0525f667025d7
@@@ -13,7 -13,6 +13,7 @@@ allwinner     Allwinner Technology Co., Ltd
  altr  Altera Corp.
  amcc  Applied Micro Circuits Corporation (APM, formally AMCC)
  amd   Advanced Micro Devices (AMD), Inc.
 +ams   AMS AG
  amstaos       AMS-Taos Inc.
  apm   Applied Micro Circuits Corporation (APM)
  arm   ARM Ltd.
@@@ -74,15 -73,12 +74,15 @@@ lantiq     Lantiq Semiconducto
  lg    LG Corporation
  linux Linux-specific binding
  lsi   LSI Corp. (LSI Logic)
 +lltc  Linear Technology Corporation
  marvell       Marvell Technology Group Ltd.
  maxim Maxim Integrated Products
 +micrel        Micrel Inc.
  microchip     Microchip Technology Inc.
  mosaixtech    Mosaix Technologies, Inc.
  moxa  Moxa
  mpl   MPL AG
 +mundoreader   Mundo Reader S.L.
  mxicy Macronix International Co., Ltd.
  national      National Semiconductor
  neonode               Neonode Inc.
@@@ -102,7 -98,6 +102,7 @@@ powervr     PowerVR (deprecated, use img
  qca   Qualcomm Atheros, Inc.
  qcom  Qualcomm Technologies, Inc
  qnap  QNAP Systems, Inc.
 +radxa Radxa
  raidsonic     RaidSonic Technology GmbH
  ralink        Mediatek/Ralink Technology Corp.
  ramtron       Ramtron International
@@@ -128,12 -123,10 +128,12 @@@ stericsson      ST-Ericsso
  synology      Synology, Inc.
  ti    Texas Instruments
  tlm   Trusted Logic Mobility
 +toradex       Toradex AG
  toshiba       Toshiba Corporation
  toumaz        Toumaz
  usi   Universal Scientifc Industrial Co., Ltd.
  v3    V3 Semiconductor
 +variscite     Variscite Ltd.
  via   VIA Technologies, Inc.
  voipac        Voipac Technologies s.r.o.
  winbond Winbond Electronics corp.
@@@ -142,3 -135,4 +142,4 @@@ wm Wondermedia Technologies, Inc
  xes   Extreme Engineering Solutions (X-ES)
  xlnx  Xilinx
  zyxel ZyXEL Communications Corp.
+ zarlink       Zarlink Semiconductor
index 4a7cc453be0b0b8193a52a9900d71eea3d2d27fc,2c8e39951ab52078fde196f7cde6faf943893f17..9c89cdd067a643a6a0572042e90f0d77066dc1d4
@@@ -337,6 -337,10 +337,10 @@@ static inline void kvmppc_fast_vcpu_kic
        vcpu->kvm->arch.kvm_ops->fast_vcpu_kick(vcpu);
  }
  
+ extern void kvm_hv_vm_activated(void);
+ extern void kvm_hv_vm_deactivated(void);
+ extern bool kvm_hv_mode_active(void);
  #else
  static inline void __init kvm_cma_reserve(void)
  {}
@@@ -356,6 -360,9 +360,9 @@@ static inline void kvmppc_fast_vcpu_kic
  {
        kvm_vcpu_kick(vcpu);
  }
+ static inline bool kvm_hv_mode_active(void)           { return false; }
  #endif
  
  #ifdef CONFIG_KVM_XICS
@@@ -448,84 -455,6 +455,84 @@@ static inline void kvmppc_mmu_flush_ica
        }
  }
  
 +/*
 + * Shared struct helpers. The shared struct can be little or big endian,
 + * depending on the guest endianness. So expose helpers to all of them.
 + */
 +static inline bool kvmppc_shared_big_endian(struct kvm_vcpu *vcpu)
 +{
 +#if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_KVM_BOOK3S_PR_POSSIBLE)
 +      /* Only Book3S_64 PR supports bi-endian for now */
 +      return vcpu->arch.shared_big_endian;
 +#elif defined(CONFIG_PPC_BOOK3S_64) && defined(__LITTLE_ENDIAN__)
 +      /* Book3s_64 HV on little endian is always little endian */
 +      return false;
 +#else
 +      return true;
 +#endif
 +}
 +
 +#define SHARED_WRAPPER_GET(reg, size)                                 \
 +static inline u##size kvmppc_get_##reg(struct kvm_vcpu *vcpu) \
 +{                                                                     \
 +      if (kvmppc_shared_big_endian(vcpu))                             \
 +             return be##size##_to_cpu(vcpu->arch.shared->reg);        \
 +      else                                                            \
 +             return le##size##_to_cpu(vcpu->arch.shared->reg);        \
 +}                                                                     \
 +
 +#define SHARED_WRAPPER_SET(reg, size)                                 \
 +static inline void kvmppc_set_##reg(struct kvm_vcpu *vcpu, u##size val)       \
 +{                                                                     \
 +      if (kvmppc_shared_big_endian(vcpu))                             \
 +             vcpu->arch.shared->reg = cpu_to_be##size(val);           \
 +      else                                                            \
 +             vcpu->arch.shared->reg = cpu_to_le##size(val);           \
 +}                                                                     \
 +
 +#define SHARED_WRAPPER(reg, size)                                     \
 +      SHARED_WRAPPER_GET(reg, size)                                   \
 +      SHARED_WRAPPER_SET(reg, size)                                   \
 +
 +SHARED_WRAPPER(critical, 64)
 +SHARED_WRAPPER(sprg0, 64)
 +SHARED_WRAPPER(sprg1, 64)
 +SHARED_WRAPPER(sprg2, 64)
 +SHARED_WRAPPER(sprg3, 64)
 +SHARED_WRAPPER(srr0, 64)
 +SHARED_WRAPPER(srr1, 64)
 +SHARED_WRAPPER(dar, 64)
 +SHARED_WRAPPER_GET(msr, 64)
 +static inline void kvmppc_set_msr_fast(struct kvm_vcpu *vcpu, u64 val)
 +{
 +      if (kvmppc_shared_big_endian(vcpu))
 +             vcpu->arch.shared->msr = cpu_to_be64(val);
 +      else
 +             vcpu->arch.shared->msr = cpu_to_le64(val);
 +}
 +SHARED_WRAPPER(dsisr, 32)
 +SHARED_WRAPPER(int_pending, 32)
 +SHARED_WRAPPER(sprg4, 64)
 +SHARED_WRAPPER(sprg5, 64)
 +SHARED_WRAPPER(sprg6, 64)
 +SHARED_WRAPPER(sprg7, 64)
 +
 +static inline u32 kvmppc_get_sr(struct kvm_vcpu *vcpu, int nr)
 +{
 +      if (kvmppc_shared_big_endian(vcpu))
 +             return be32_to_cpu(vcpu->arch.shared->sr[nr]);
 +      else
 +             return le32_to_cpu(vcpu->arch.shared->sr[nr]);
 +}
 +
 +static inline void kvmppc_set_sr(struct kvm_vcpu *vcpu, int nr, u32 val)
 +{
 +      if (kvmppc_shared_big_endian(vcpu))
 +             vcpu->arch.shared->sr[nr] = cpu_to_be32(val);
 +      else
 +             vcpu->arch.shared->sr[nr] = cpu_to_le32(val);
 +}
 +
  /*
   * Please call after prepare_to_enter. This function puts the lazy ee and irq
   * disabled tracking state back to normal mode, without actually enabling
@@@ -563,7 -492,7 +570,7 @@@ static inline ulong kvmppc_get_ea_index
        msr_64bit = MSR_SF;
  #endif
  
 -      if (!(vcpu->arch.shared->msr & msr_64bit))
 +      if (!(kvmppc_get_msr(vcpu) & msr_64bit))
                ea = (uint32_t)ea;
  
        return ea;
index 4852bcf270f37a30b68519e8b0ce3b52c5fe7d8c,2cd799b382ec4407b6dc5393507ab550672dd91d..bffd89d27301db29594688225eb83b1033f5d2e2
  #define SPRN_TEXASR   0x82    /* Transaction EXception & Summary */
  #define   TEXASR_FS   __MASK(63-36)   /* Transaction Failure Summary */
  #define SPRN_TEXASRU  0x83    /* ''      ''      ''    Upper 32  */
+ #define   TEXASR_FS     __MASK(63-36) /* TEXASR Failure Summary */
  #define SPRN_TFHAR    0x80    /* Transaction Failure Handler Addr */
  #define SPRN_CTRLF    0x088
  #define SPRN_CTRLT    0x098
  #define   CTRL_TE     0x00c00000      /* thread enable */
  #define   CTRL_RUNLATCH       0x1
  #define SPRN_DAWR     0xB4
+ #define SPRN_RPR      0xBA    /* Relative Priority Register */
  #define SPRN_CIABR    0xBB
  #define   CIABR_PRIV          0x3
  #define   CIABR_PRIV_USER     1
  #define SPRN_HSRR1    0x13B   /* Hypervisor Save/Restore 1 */
  #define SPRN_IC               0x350   /* Virtual Instruction Count */
  #define SPRN_VTB      0x351   /* Virtual Time Base */
+ #define SPRN_LDBAR    0x352   /* LD Base Address Register */
  #define SPRN_PMICR    0x354   /* Power Management Idle Control Reg */
  #define SPRN_PMSR     0x355   /* Power Management Status Reg */
+ #define SPRN_PMMAR    0x356   /* Power Management Memory Activity Register */
  #define SPRN_PMCR     0x374   /* Power Management Control Register */
  
  /* HFSCR and FSCR bit numbers are the same */
  #define HID0_BTCD     (1<<1)          /* Branch target cache disable */
  #define HID0_NOPDST   (1<<1)          /* No-op dst, dstt, etc. instr. */
  #define HID0_NOPTI    (1<<0)          /* No-op dcbt and dcbst instr. */
+ /* POWER8 HID0 bits */
+ #define HID0_POWER8_4LPARMODE __MASK(61)
+ #define HID0_POWER8_2LPARMODE __MASK(57)
+ #define HID0_POWER8_1TO2LPAR  __MASK(52)
+ #define HID0_POWER8_1TO4LPAR  __MASK(51)
+ #define HID0_POWER8_DYNLPARDIS        __MASK(48)
  
  #define SPRN_HID1     0x3F1           /* Hardware Implementation Register 1 */
  #ifdef CONFIG_6xx
  #define   MMCR0_PROBLEM_DISABLE MMCR0_FCP
  #define   MMCR0_FCM1  0x10000000UL /* freeze counters while MSR mark = 1 */
  #define   MMCR0_FCM0  0x08000000UL /* freeze counters while MSR mark = 0 */
 -#define   MMCR0_PMXE  0x04000000UL /* performance monitor exception enable */
 -#define   MMCR0_FCECE 0x02000000UL /* freeze ctrs on enabled cond or event */
 +#define   MMCR0_PMXE  ASM_CONST(0x04000000) /* perf mon exception enable */
 +#define   MMCR0_FCECE ASM_CONST(0x02000000) /* freeze ctrs on enabled cond or event */
  #define   MMCR0_TBEE  0x00400000UL /* time base exception enable */
  #define   MMCR0_BHRBA 0x00200000UL /* BHRB Access allowed in userspace */
  #define   MMCR0_EBE   0x00100000UL /* Event based branch enable */
  #define   MMCR0_PMCC  0x000c0000UL /* PMC control */
  #define   MMCR0_PMCC_U6       0x00080000UL /* PMC1-6 are R/W by user (PR) */
  #define   MMCR0_PMC1CE        0x00008000UL /* PMC1 count enable*/
 -#define   MMCR0_PMCjCE        0x00004000UL /* PMCj count enable*/
 +#define   MMCR0_PMCjCE        ASM_CONST(0x00004000) /* PMCj count enable*/
  #define   MMCR0_TRIGGER       0x00002000UL /* TRIGGER enable */
 -#define   MMCR0_PMAO_SYNC 0x00000800UL /* PMU interrupt is synchronous */
 -#define   MMCR0_PMAO  0x00000080UL /* performance monitor alert has occurred, set to 0 after handling exception */
 +#define   MMCR0_PMAO_SYNC ASM_CONST(0x00000800) /* PMU intr is synchronous */
 +#define   MMCR0_C56RUN        ASM_CONST(0x00000100) /* PMC5/6 count when RUN=0 */
 +/* performance monitor alert has occurred, set to 0 after handling exception */
 +#define   MMCR0_PMAO  ASM_CONST(0x00000080)
  #define   MMCR0_SHRFC 0x00000040UL /* SHRre freeze conditions between threads */
  #define   MMCR0_FC56  0x00000010UL /* freeze counters 5 and 6 */
  #define   MMCR0_FCTI  0x00000008UL /* freeze counters in tags inactive mode */
index 52179033067262f8f107be29c16b1fba6b8d4cee,d1bb96d5a298b3f82a068ba0dd9ec9b41095a695..a5e930aca804341a21cd90a0c8f2b4367de18e3f
@@@ -39,17 -39,7 +39,18 @@@ static inline int overlaps_kernel_text(
                (unsigned long)_stext < end;
  }
  
 +static inline int overlaps_kvm_tmp(unsigned long start, unsigned long end)
 +{
 +#ifdef CONFIG_KVM_GUEST
 +      extern char kvm_tmp[];
 +      return start < (unsigned long)kvm_tmp &&
 +              (unsigned long)&kvm_tmp[1024 * 1024] < end;
 +#else
 +      return 0;
 +#endif
 +}
 +
+ #if !defined(_CALL_ELF) || _CALL_ELF != 2
  #undef dereference_function_descriptor
  static inline void *dereference_function_descriptor(void *ptr)
  {
@@@ -60,6 -50,7 +61,7 @@@
                ptr = p;
        return ptr;
  }
+ #endif
  
  #endif
  
index ea4dc3a89c1f05d91390cdf9c498029617fdf7f4,35f8f2ffae8dccdd147a88bcbd48486ae6dc8fcc..babbeca6850f8f8fd090c59e8559d382243c32d4
@@@ -62,7 -62,7 +62,7 @@@ COMPAT_SYS_SPU(fcntl
  SYSCALL(ni_syscall)
  SYSCALL_SPU(setpgid)
  SYSCALL(ni_syscall)
- SYSX(sys_ni_syscall,sys_olduname, sys_olduname)
+ SYSX(sys_ni_syscall,sys_olduname,sys_olduname)
  SYSCALL_SPU(umask)
  SYSCALL_SPU(chroot)
  COMPAT_SYS(ustat)
@@@ -190,7 -190,7 +190,7 @@@ SYSCALL_SPU(getcwd
  SYSCALL_SPU(capget)
  SYSCALL_SPU(capset)
  COMPAT_SYS(sigaltstack)
COMPAT_SYS_SPU(sendfile)
SYSX_SPU(sys_sendfile64,compat_sys_sendfile,sys_sendfile)
  SYSCALL(ni_syscall)
  SYSCALL(ni_syscall)
  PPC_SYS(vfork)
@@@ -258,7 -258,7 +258,7 @@@ SYSCALL_SPU(tgkill
  COMPAT_SYS_SPU(utimes)
  COMPAT_SYS_SPU(statfs64)
  COMPAT_SYS_SPU(fstatfs64)
- SYSX(sys_ni_syscall, ppc_fadvise64_64, ppc_fadvise64_64)
+ SYSX(sys_ni_syscall,ppc_fadvise64_64,ppc_fadvise64_64)
  PPC_SYS_SPU(rtas)
  OLDSYS(debug_setcontext)
  SYSCALL(ni_syscall)
@@@ -295,7 -295,7 +295,7 @@@ SYSCALL_SPU(mkdirat
  SYSCALL_SPU(mknodat)
  SYSCALL_SPU(fchownat)
  COMPAT_SYS_SPU(futimesat)
- SYSX_SPU(sys_newfstatat, sys_fstatat64, sys_fstatat64)
+ SYSX_SPU(sys_newfstatat,sys_fstatat64,sys_fstatat64)
  SYSCALL_SPU(unlinkat)
  SYSCALL_SPU(renameat)
  SYSCALL_SPU(linkat)
@@@ -361,4 -361,3 +361,4 @@@ SYSCALL(finit_module
  SYSCALL(ni_syscall) /* sys_kcmp */
  SYSCALL_SPU(sched_setattr)
  SYSCALL_SPU(sched_getattr)
 +SYSCALL_SPU(renameat2)
index 6c8a8c5a37a1cc761386ba89db9e6136fca5f3c2,4d9fab0710e6934329dac9ad34845054ffddf538..5f1048eaa5b6041d1194457bec16509dcabe7907
@@@ -9,26 -9,17 +9,13 @@@ struct device_node
  #ifdef CONFIG_NUMA
  
  /*
 - * Before going off node we want the VM to try and reclaim from the local
 - * node. It does this if the remote distance is larger than RECLAIM_DISTANCE.
 - * With the default REMOTE_DISTANCE of 20 and the default RECLAIM_DISTANCE of
 - * 30, we never reclaim and go off node straight away.
 - *
 - * To fix this we choose a smaller value of RECLAIM_DISTANCE.
 + * If zone_reclaim_mode is enabled, a RECLAIM_DISTANCE of 10 will mean that
 + * all zones on all nodes will be eligible for zone_reclaim().
   */
  #define RECLAIM_DISTANCE 10
  
  #include <asm/mmzone.h>
  
- static inline int cpu_to_node(int cpu)
- {
-       int nid;
-       nid = numa_cpu_lookup_table[cpu];
-       /*
-        * During early boot, the numa-cpu lookup table might not have been
-        * setup for all CPUs yet. In such cases, default to node 0.
-        */
-       return (nid < 0) ? 0 : nid;
- }
  #define parent_node(node)     (node)
  
  #define cpumask_of_node(node) ((node) == -1 ?                         \
index 93e1465c849681729b0b158097562246cf8cd685,cba2697406b70175a0fcb1721e83381c42497fcf..f5995a912213f7cf9ecc9c1a2c083bfa7697f3f4
@@@ -54,7 -54,6 +54,7 @@@
  #endif
  #if defined(CONFIG_KVM) && defined(CONFIG_PPC_BOOK3S)
  #include <asm/kvm_book3s.h>
 +#include <asm/kvm_ppc.h>
  #endif
  
  #ifdef CONFIG_PPC32
@@@ -248,6 -247,7 +248,7 @@@ int main(void
  #endif
        DEFINE(PACAHWCPUID, offsetof(struct paca_struct, hw_cpu_id));
        DEFINE(PACAKEXECSTATE, offsetof(struct paca_struct, kexec_state));
+       DEFINE(PACA_DSCR, offsetof(struct paca_struct, dscr_default));
        DEFINE(PACA_STARTTIME, offsetof(struct paca_struct, starttime));
        DEFINE(PACA_STARTTIME_USER, offsetof(struct paca_struct, starttime_user));
        DEFINE(PACA_USER_TIME, offsetof(struct paca_struct, user_time));
        DEFINE(VCPU_XER, offsetof(struct kvm_vcpu, arch.xer));
        DEFINE(VCPU_CTR, offsetof(struct kvm_vcpu, arch.ctr));
        DEFINE(VCPU_LR, offsetof(struct kvm_vcpu, arch.lr));
 +#ifdef CONFIG_PPC_BOOK3S
        DEFINE(VCPU_TAR, offsetof(struct kvm_vcpu, arch.tar));
 +#endif
        DEFINE(VCPU_CR, offsetof(struct kvm_vcpu, arch.cr));
        DEFINE(VCPU_PC, offsetof(struct kvm_vcpu, arch.pc));
  #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
        DEFINE(VCPU_SHARED, offsetof(struct kvm_vcpu, arch.shared));
        DEFINE(VCPU_SHARED_MSR, offsetof(struct kvm_vcpu_arch_shared, msr));
        DEFINE(VCPU_SHADOW_MSR, offsetof(struct kvm_vcpu, arch.shadow_msr));
 +#if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_KVM_BOOK3S_PR_POSSIBLE)
 +      DEFINE(VCPU_SHAREDBE, offsetof(struct kvm_vcpu, arch.shared_big_endian));
 +#endif
  
        DEFINE(VCPU_SHARED_MAS0, offsetof(struct kvm_vcpu_arch_shared, mas0));
        DEFINE(VCPU_SHARED_MAS1, offsetof(struct kvm_vcpu_arch_shared, mas1));
        DEFINE(VCPU_DAR, offsetof(struct kvm_vcpu, arch.shregs.dar));
        DEFINE(VCPU_VPA, offsetof(struct kvm_vcpu, arch.vpa.pinned_addr));
        DEFINE(VCPU_VPA_DIRTY, offsetof(struct kvm_vcpu, arch.vpa.dirty));
 -      DEFINE(VCPU_INTR_MSR, offsetof(struct kvm_vcpu, arch.intr_msr));
  #endif
  #ifdef CONFIG_PPC_BOOK3S
        DEFINE(VCPU_VCPUID, offsetof(struct kvm_vcpu, vcpu_id));
        DEFINE(VCPU_SLB_NR, offsetof(struct kvm_vcpu, arch.slb_nr));
        DEFINE(VCPU_FAULT_DSISR, offsetof(struct kvm_vcpu, arch.fault_dsisr));
        DEFINE(VCPU_FAULT_DAR, offsetof(struct kvm_vcpu, arch.fault_dar));
 +      DEFINE(VCPU_INTR_MSR, offsetof(struct kvm_vcpu, arch.intr_msr));
        DEFINE(VCPU_LAST_INST, offsetof(struct kvm_vcpu, arch.last_inst));
        DEFINE(VCPU_TRAP, offsetof(struct kvm_vcpu, arch.trap));
        DEFINE(VCPU_CFAR, offsetof(struct kvm_vcpu, arch.cfar));
        DEFINE(VCPU_PPR, offsetof(struct kvm_vcpu, arch.ppr));
        DEFINE(VCPU_FSCR, offsetof(struct kvm_vcpu, arch.fscr));
 +      DEFINE(VCPU_SHADOW_FSCR, offsetof(struct kvm_vcpu, arch.shadow_fscr));
        DEFINE(VCPU_PSPB, offsetof(struct kvm_vcpu, arch.pspb));
        DEFINE(VCPU_EBBHR, offsetof(struct kvm_vcpu, arch.ebbhr));
        DEFINE(VCPU_EBBRR, offsetof(struct kvm_vcpu, arch.ebbrr));
  #ifdef CONFIG_PPC64
        SVCPU_FIELD(SVCPU_SLB, slb);
        SVCPU_FIELD(SVCPU_SLB_MAX, slb_max);
 +      SVCPU_FIELD(SVCPU_SHADOW_FSCR, shadow_fscr);
  #endif
  
        HSTATE_FIELD(HSTATE_HOST_R1, host_r1);
  #ifdef CONFIG_PPC_BOOK3S_64
        HSTATE_FIELD(HSTATE_CFAR, cfar);
        HSTATE_FIELD(HSTATE_PPR, ppr);
 +      HSTATE_FIELD(HSTATE_HOST_FSCR, host_fscr);
  #endif /* CONFIG_PPC_BOOK3S_64 */
  
  #else /* CONFIG_PPC_BOOK3S */
index 60d1a2259dbed01e15b58340c62119b4fb56b085,2d7eeae5b4d04d35560a47f4b68a6fac9db68f84..59e4ba74975d9cf6be38ffa9312d2cbe6572a829
@@@ -30,13 -30,14 +30,14 @@@ extern u32 epapr_ev_idle_start[]
  #endif
  
  bool epapr_paravirt_enabled;
+ static bool __maybe_unused epapr_has_idle;
  
  static int __init early_init_dt_scan_epapr(unsigned long node,
                                           const char *uname,
                                           int depth, void *data)
  {
        const u32 *insts;
 -      unsigned long len;
 +      int len;
        int i;
  
        insts = of_get_flat_dt_prop(node, "hcall-instructions", &len);
@@@ -56,7 -57,7 +57,7 @@@
  
  #if !defined(CONFIG_64BIT) || defined(CONFIG_PPC_BOOK3E_64)
        if (of_get_flat_dt_prop(node, "has-idle", NULL))
-               ppc_md.power_save = epapr_ev_idle;
+               epapr_has_idle = true;
  #endif
  
        epapr_paravirt_enabled = true;
@@@ -71,3 -72,14 +72,14 @@@ int __init epapr_paravirt_early_init(vo
        return 0;
  }
  
+ static int __init epapr_idle_init(void)
+ {
+ #if !defined(CONFIG_64BIT) || defined(CONFIG_PPC_BOOK3E_64)
+       if (epapr_has_idle)
+               ppc_md.power_save = epapr_ev_idle;
+ #endif
+       return 0;
+ }
+ postcore_initcall(epapr_idle_init);
index 7213d930918d736b834e5eccb45b66b4fce630c1,d55e8986730a087f69f3596ca8bb85b398a92477..742694c1d85238fe380e4ceecd7443506bb8d0b7
@@@ -55,9 -55,9 +55,9 @@@ int crash_mem_ranges
  int __init early_init_dt_scan_fw_dump(unsigned long node,
                        const char *uname, int depth, void *data)
  {
 -      __be32 *sections;
 +      const __be32 *sections;
        int i, num_sections;
 -      unsigned long size;
 +      int size;
        const int *token;
  
        if (depth != 1 || strcmp(uname, "rtas") != 0)
@@@ -69,7 -69,7 +69,7 @@@
         */
        token = of_get_flat_dt_prop(node, "ibm,configure-kernel-dump", NULL);
        if (!token)
-               return 0;
+               return 1;
  
        fw_dump.fadump_supported = 1;
        fw_dump.ibm_configure_kernel_dump = *token;
@@@ -92,7 -92,7 +92,7 @@@
                                        &size);
  
        if (!sections)
-               return 0;
+               return 1;
  
        num_sections = size / (3 * sizeof(u32));
  
                        break;
                }
        }
        return 1;
  }
  
@@@ -645,7 -646,7 +646,7 @@@ static int __init fadump_build_cpu_note
                }
                /* Lower 4 bytes of reg_value contains logical cpu id */
                cpu = reg_entry->reg_value & FADUMP_CPU_ID_MASK;
-               if (!cpumask_test_cpu(cpu, &fdh->cpu_online_mask)) {
+               if (fdh && !cpumask_test_cpu(cpu, &fdh->cpu_online_mask)) {
                        SKIP_TO_NEXT_CPU(reg_entry);
                        continue;
                }
        }
        fadump_final_note(note_buf);
  
-       pr_debug("Updating elfcore header (%llx) with cpu notes\n",
+       if (fdh) {
+               pr_debug("Updating elfcore header (%llx) with cpu notes\n",
                                                        fdh->elfcorehdr_addr);
-       fadump_update_elfcore_header((char *)__va(fdh->elfcorehdr_addr));
+               fadump_update_elfcore_header((char *)__va(fdh->elfcorehdr_addr));
+       }
        return 0;
  
  error_out:
index 24d342e917903f58aec6e37d56616aab0e1e029d,add166aa806a13e124933e49b35ccfdde305aa36..b49c72fd7f16510a8969f23b81848545d2a6b3f5
@@@ -21,6 -21,7 +21,7 @@@
  #include <linux/string.h>
  #include <linux/init.h>
  #include <linux/bootmem.h>
+ #include <linux/delay.h>
  #include <linux/export.h>
  #include <linux/of_address.h>
  #include <linux/of_pci.h>
@@@ -120,6 -121,25 +121,25 @@@ resource_size_t pcibios_window_alignmen
        return 1;
  }
  
+ void pcibios_reset_secondary_bus(struct pci_dev *dev)
+ {
+       u16 ctrl;
+       if (ppc_md.pcibios_reset_secondary_bus) {
+               ppc_md.pcibios_reset_secondary_bus(dev);
+               return;
+       }
+       pci_read_config_word(dev, PCI_BRIDGE_CONTROL, &ctrl);
+       ctrl |= PCI_BRIDGE_CTL_BUS_RESET;
+       pci_write_config_word(dev, PCI_BRIDGE_CONTROL, ctrl);
+       msleep(2);
+       ctrl &= ~PCI_BRIDGE_CTL_BUS_RESET;
+       pci_write_config_word(dev, PCI_BRIDGE_CONTROL, ctrl);
+       ssleep(1);
+ }
  static resource_size_t pcibios_io_size(const struct pci_controller *hose)
  {
  #ifdef CONFIG_PPC64
@@@ -201,6 -221,26 +221,6 @@@ struct pci_controller* pci_find_hose_fo
        return NULL;
  }
  
 -static ssize_t pci_show_devspec(struct device *dev,
 -              struct device_attribute *attr, char *buf)
 -{
 -      struct pci_dev *pdev;
 -      struct device_node *np;
 -
 -      pdev = to_pci_dev (dev);
 -      np = pci_device_to_OF_node(pdev);
 -      if (np == NULL || np->full_name == NULL)
 -              return 0;
 -      return sprintf(buf, "%s", np->full_name);
 -}
 -static DEVICE_ATTR(devspec, S_IRUGO, pci_show_devspec, NULL);
 -
 -/* Add sysfs properties */
 -int pcibios_add_platform_entries(struct pci_dev *pdev)
 -{
 -      return device_create_file(&pdev->dev, &dev_attr_devspec);
 -}
 -
  /*
   * Reads the interrupt pin to determine if interrupt is use by card.
   * If the interrupt is used, then gets the interrupt line from the
@@@ -646,60 -686,36 +666,36 @@@ void pci_resource_to_user(const struct 
  void pci_process_bridge_OF_ranges(struct pci_controller *hose,
                                  struct device_node *dev, int primary)
  {
-       const __be32 *ranges;
-       int rlen;
-       int pna = of_n_addr_cells(dev);
-       int np = pna + 5;
        int memno = 0;
-       u32 pci_space;
-       unsigned long long pci_addr, cpu_addr, pci_next, cpu_next, size;
        struct resource *res;
+       struct of_pci_range range;
+       struct of_pci_range_parser parser;
  
        printk(KERN_INFO "PCI host bridge %s %s ranges:\n",
               dev->full_name, primary ? "(primary)" : "");
  
-       /* Get ranges property */
-       ranges = of_get_property(dev, "ranges", &rlen);
-       if (ranges == NULL)
+       /* Check for ranges property */
+       if (of_pci_range_parser_init(&parser, dev))
                return;
  
        /* Parse it */
-       while ((rlen -= np * 4) >= 0) {
-               /* Read next ranges element */
-               pci_space = of_read_number(ranges, 1);
-               pci_addr = of_read_number(ranges + 1, 2);
-               cpu_addr = of_translate_address(dev, ranges + 3);
-               size = of_read_number(ranges + pna + 3, 2);
-               ranges += np;
+       for_each_of_pci_range(&parser, &range) {
                /* If we failed translation or got a zero-sized region
                 * (some FW try to feed us with non sensical zero sized regions
                 * such as power3 which look like some kind of attempt at exposing
                 * the VGA memory hole)
                 */
-               if (cpu_addr == OF_BAD_ADDR || size == 0)
+               if (range.cpu_addr == OF_BAD_ADDR || range.size == 0)
                        continue;
  
-               /* Now consume following elements while they are contiguous */
-               for (; rlen >= np * sizeof(u32);
-                    ranges += np, rlen -= np * 4) {
-                       if (of_read_number(ranges, 1) != pci_space)
-                               break;
-                       pci_next = of_read_number(ranges + 1, 2);
-                       cpu_next = of_translate_address(dev, ranges + 3);
-                       if (pci_next != pci_addr + size ||
-                           cpu_next != cpu_addr + size)
-                               break;
-                       size += of_read_number(ranges + pna + 3, 2);
-               }
                /* Act based on address space type */
                res = NULL;
-               switch ((pci_space >> 24) & 0x3) {
-               case 1:         /* PCI IO space */
+               switch (range.flags & IORESOURCE_TYPE_BITS) {
+               case IORESOURCE_IO:
                        printk(KERN_INFO
                               "  IO 0x%016llx..0x%016llx -> 0x%016llx\n",
-                              cpu_addr, cpu_addr + size - 1, pci_addr);
+                              range.cpu_addr, range.cpu_addr + range.size - 1,
+                              range.pci_addr);
  
                        /* We support only one IO range */
                        if (hose->pci_io_size) {
                        }
  #ifdef CONFIG_PPC32
                        /* On 32 bits, limit I/O space to 16MB */
-                       if (size > 0x01000000)
-                               size = 0x01000000;
+                       if (range.size > 0x01000000)
+                               range.size = 0x01000000;
  
                        /* 32 bits needs to map IOs here */
-                       hose->io_base_virt = ioremap(cpu_addr, size);
+                       hose->io_base_virt = ioremap(range.cpu_addr,
+                                               range.size);
  
                        /* Expect trouble if pci_addr is not 0 */
                        if (primary)
                        /* pci_io_size and io_base_phys always represent IO
                         * space starting at 0 so we factor in pci_addr
                         */
-                       hose->pci_io_size = pci_addr + size;
-                       hose->io_base_phys = cpu_addr - pci_addr;
+                       hose->pci_io_size = range.pci_addr + range.size;
+                       hose->io_base_phys = range.cpu_addr - range.pci_addr;
  
                        /* Build resource */
                        res = &hose->io_resource;
-                       res->flags = IORESOURCE_IO;
-                       res->start = pci_addr;
+                       range.cpu_addr = range.pci_addr;
                        break;
-               case 2:         /* PCI Memory space */
-               case 3:         /* PCI 64 bits Memory space */
+               case IORESOURCE_MEM:
                        printk(KERN_INFO
                               " MEM 0x%016llx..0x%016llx -> 0x%016llx %s\n",
-                              cpu_addr, cpu_addr + size - 1, pci_addr,
-                              (pci_space & 0x40000000) ? "Prefetch" : "");
+                              range.cpu_addr, range.cpu_addr + range.size - 1,
+                              range.pci_addr,
+                              (range.pci_space & 0x40000000) ?
+                              "Prefetch" : "");
  
                        /* We support only 3 memory ranges */
                        if (memno >= 3) {
                                continue;
                        }
                        /* Handles ISA memory hole space here */
-                       if (pci_addr == 0) {
+                       if (range.pci_addr == 0) {
                                if (primary || isa_mem_base == 0)
-                                       isa_mem_base = cpu_addr;
-                               hose->isa_mem_phys = cpu_addr;
-                               hose->isa_mem_size = size;
+                                       isa_mem_base = range.cpu_addr;
+                               hose->isa_mem_phys = range.cpu_addr;
+                               hose->isa_mem_size = range.size;
                        }
  
                        /* Build resource */
-                       hose->mem_offset[memno] = cpu_addr - pci_addr;
+                       hose->mem_offset[memno] = range.cpu_addr -
+                                                       range.pci_addr;
                        res = &hose->mem_resources[memno++];
-                       res->flags = IORESOURCE_MEM;
-                       if (pci_space & 0x40000000)
-                               res->flags |= IORESOURCE_PREFETCH;
-                       res->start = cpu_addr;
                        break;
                }
                if (res != NULL) {
-                       res->name = dev->full_name;
-                       res->end = res->start + size - 1;
-                       res->parent = NULL;
-                       res->sibling = NULL;
-                       res->child = NULL;
+                       of_pci_range_to_resource(&range, dev, res);
                }
        }
  }
index 059e244484fe95cc6cab46729d7771b052bb896a,ea6470c21f4e4e86152093c6126990a73544bf62..44562aa97f1611f5bcf1a26b56a425efa0c7a33e
@@@ -304,6 -304,9 +304,9 @@@ static struct pci_dev *of_scan_pci_dev(
        struct pci_dev *dev = NULL;
        const __be32 *reg;
        int reglen, devfn;
+ #ifdef CONFIG_EEH
+       struct eeh_dev *edev = of_node_to_eeh_dev(dn);
+ #endif
  
        pr_debug("  * %s\n", dn->full_name);
        if (!of_device_is_available(dn))
                return dev;
        }
  
+       /* Device removed permanently ? */
+ #ifdef CONFIG_EEH
+       if (edev && (edev->mode & EEH_DEV_REMOVED))
+               return NULL;
+ #endif
        /* create a new pci_dev for this device */
        dev = of_create_pci_dev(dn, bus, devfn);
        if (!dev)
@@@ -362,7 -371,8 +371,7 @@@ static void __of_scan_bus(struct device
  
        /* Now scan child busses */
        list_for_each_entry(dev, &bus->devices, bus_list) {
 -              if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE ||
 -                  dev->hdr_type == PCI_HEADER_TYPE_CARDBUS) {
 +              if (pci_is_bridge(dev)) {
                        of_scan_pci_bridge(dev);
                }
        }
index 10ffffef041413dc6da76a7c9564e4b79804c72a,6af946e9a9849ee6b3681326a9fadb5b6ca20821..7753af2d261381bcc21bf3e21f8ee4f4880847e9
@@@ -36,6 -36,7 +36,7 @@@
  #include <linux/atomic.h>
  #include <asm/irq.h>
  #include <asm/hw_irq.h>
+ #include <asm/kvm_ppc.h>
  #include <asm/page.h>
  #include <asm/pgtable.h>
  #include <asm/prom.h>
@@@ -390,6 -391,7 +391,7 @@@ void smp_prepare_boot_cpu(void
  #ifdef CONFIG_PPC64
        paca[boot_cpuid].__current = current;
  #endif
+       set_numa_node(numa_cpu_lookup_table[boot_cpuid]);
        current_set[boot_cpuid] = task_thread_info(current);
  }
  
@@@ -457,38 -459,9 +459,9 @@@ int generic_check_cpu_restart(unsigned 
        return per_cpu(cpu_state, cpu) == CPU_UP_PREPARE;
  }
  
- static atomic_t secondary_inhibit_count;
- /*
-  * Don't allow secondary CPU threads to come online
-  */
- void inhibit_secondary_onlining(void)
+ static bool secondaries_inhibited(void)
  {
-       /*
-        * This makes secondary_inhibit_count stable during cpu
-        * online/offline operations.
-        */
-       get_online_cpus();
-       atomic_inc(&secondary_inhibit_count);
-       put_online_cpus();
- }
- EXPORT_SYMBOL_GPL(inhibit_secondary_onlining);
- /*
-  * Allow secondary CPU threads to come online again
-  */
- void uninhibit_secondary_onlining(void)
- {
-       get_online_cpus();
-       atomic_dec(&secondary_inhibit_count);
-       put_online_cpus();
- }
- EXPORT_SYMBOL_GPL(uninhibit_secondary_onlining);
- static int secondaries_inhibited(void)
- {
-       return atomic_read(&secondary_inhibit_count);
+       return kvm_hv_mode_active();
  }
  
  #else /* HOTPLUG_CPU */
@@@ -517,7 -490,7 +490,7 @@@ int __cpu_up(unsigned int cpu, struct t
         * Don't allow secondary threads to come online if inhibited
         */
        if (threads_per_core > 1 && secondaries_inhibited() &&
-           cpu % threads_per_core != 0)
+           cpu_thread_in_subcore(cpu))
                return -EBUSY;
  
        if (smp_ops == NULL ||
@@@ -750,6 -723,12 +723,12 @@@ void start_secondary(void *unused
        }
        traverse_core_siblings(cpu, true);
  
+       /*
+        * numa_node_id() works after this.
+        */
+       set_numa_node(numa_cpu_lookup_table[cpu]);
+       set_numa_mem(local_memory_node(numa_cpu_lookup_table[cpu]));
        smp_wmb();
        notify_cpu_starting(cpu);
        set_cpu_online(cpu, true);
@@@ -766,28 -745,6 +745,28 @@@ int setup_profiling_timer(unsigned int 
        return 0;
  }
  
 +#ifdef CONFIG_SCHED_SMT
 +/* cpumask of CPUs with asymetric SMT dependancy */
 +static const int powerpc_smt_flags(void)
 +{
 +      int flags = SD_SHARE_CPUPOWER | SD_SHARE_PKG_RESOURCES;
 +
 +      if (cpu_has_feature(CPU_FTR_ASYM_SMT)) {
 +              printk_once(KERN_INFO "Enabling Asymmetric SMT scheduling\n");
 +              flags |= SD_ASYM_PACKING;
 +      }
 +      return flags;
 +}
 +#endif
 +
 +static struct sched_domain_topology_level powerpc_topology[] = {
 +#ifdef CONFIG_SCHED_SMT
 +      { cpu_smt_mask, powerpc_smt_flags, SD_INIT_NAME(SMT) },
 +#endif
 +      { cpu_cpu_mask, SD_INIT_NAME(DIE) },
 +      { NULL, },
 +};
 +
  void __init smp_cpus_done(unsigned int max_cpus)
  {
        cpumask_var_t old_mask;
  
        dump_numa_cpu_topology();
  
 -}
 +      set_sched_topology(powerpc_topology);
  
 -int arch_sd_sibling_asym_packing(void)
 -{
 -      if (cpu_has_feature(CPU_FTR_ASYM_SMT)) {
 -              printk_once(KERN_INFO "Enabling Asymmetric SMT scheduling\n");
 -              return SD_ASYM_PACKING;
 -      }
 -      return 0;
  }
  
  #ifdef CONFIG_HOTPLUG_CPU
index aba05bbb3e744b6e516afbe9093923793762040d,5e86f28c9d2f490cba11dd6d311371e80ae60dd8..7a12edbb61e7c2c9f6851bc0ef01e6b0b398ed83
@@@ -879,9 -879,24 +879,9 @@@ static int kvmppc_get_one_reg_hv(struc
        case KVM_REG_PPC_IAMR:
                *val = get_reg_val(id, vcpu->arch.iamr);
                break;
 -      case KVM_REG_PPC_FSCR:
 -              *val = get_reg_val(id, vcpu->arch.fscr);
 -              break;
        case KVM_REG_PPC_PSPB:
                *val = get_reg_val(id, vcpu->arch.pspb);
                break;
 -      case KVM_REG_PPC_EBBHR:
 -              *val = get_reg_val(id, vcpu->arch.ebbhr);
 -              break;
 -      case KVM_REG_PPC_EBBRR:
 -              *val = get_reg_val(id, vcpu->arch.ebbrr);
 -              break;
 -      case KVM_REG_PPC_BESCR:
 -              *val = get_reg_val(id, vcpu->arch.bescr);
 -              break;
 -      case KVM_REG_PPC_TAR:
 -              *val = get_reg_val(id, vcpu->arch.tar);
 -              break;
        case KVM_REG_PPC_DPDES:
                *val = get_reg_val(id, vcpu->arch.vcore->dpdes);
                break;
@@@ -1076,9 -1091,24 +1076,9 @@@ static int kvmppc_set_one_reg_hv(struc
        case KVM_REG_PPC_IAMR:
                vcpu->arch.iamr = set_reg_val(id, *val);
                break;
 -      case KVM_REG_PPC_FSCR:
 -              vcpu->arch.fscr = set_reg_val(id, *val);
 -              break;
        case KVM_REG_PPC_PSPB:
                vcpu->arch.pspb = set_reg_val(id, *val);
                break;
 -      case KVM_REG_PPC_EBBHR:
 -              vcpu->arch.ebbhr = set_reg_val(id, *val);
 -              break;
 -      case KVM_REG_PPC_EBBRR:
 -              vcpu->arch.ebbrr = set_reg_val(id, *val);
 -              break;
 -      case KVM_REG_PPC_BESCR:
 -              vcpu->arch.bescr = set_reg_val(id, *val);
 -              break;
 -      case KVM_REG_PPC_TAR:
 -              vcpu->arch.tar = set_reg_val(id, *val);
 -              break;
        case KVM_REG_PPC_DPDES:
                vcpu->arch.vcore->dpdes = set_reg_val(id, *val);
                break;
@@@ -1236,7 -1266,7 +1236,7 @@@ static struct kvm_vcpu *kvmppc_core_vcp
        int core;
        struct kvmppc_vcore *vcore;
  
-       core = id / threads_per_core;
+       core = id / threads_per_subcore;
        if (core >= KVM_MAX_VCORES)
                goto out;
  
                goto free_vcpu;
  
        vcpu->arch.shared = &vcpu->arch.shregs;
 +#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
 +      /*
 +       * The shared struct is never shared on HV,
 +       * so we can always use host endianness
 +       */
 +#ifdef __BIG_ENDIAN__
 +      vcpu->arch.shared_big_endian = true;
 +#else
 +      vcpu->arch.shared_big_endian = false;
 +#endif
 +#endif
        vcpu->arch.mmcr[0] = MMCR0_FC;
        vcpu->arch.ctrl = CTRL_RUNLATCH;
        /* default to host PVR, since we can't spoof it */
                        init_waitqueue_head(&vcore->wq);
                        vcore->preempt_tb = TB_NIL;
                        vcore->lpcr = kvm->arch.lpcr;
-                       vcore->first_vcpuid = core * threads_per_core;
+                       vcore->first_vcpuid = core * threads_per_subcore;
                        vcore->kvm = kvm;
                }
                kvm->arch.vcores[core] = vcore;
@@@ -1476,16 -1495,19 +1476,19 @@@ static void kvmppc_wait_for_nap(struct 
  static int on_primary_thread(void)
  {
        int cpu = smp_processor_id();
-       int thr = cpu_thread_in_core(cpu);
+       int thr;
  
-       if (thr)
+       /* Are we on a primary subcore? */
+       if (cpu_thread_in_subcore(cpu))
                return 0;
-       while (++thr < threads_per_core)
+       thr = 0;
+       while (++thr < threads_per_subcore)
                if (cpu_online(cpu + thr))
                        return 0;
  
        /* Grab all hw threads so they can't go into the kernel */
-       for (thr = 1; thr < threads_per_core; ++thr) {
+       for (thr = 1; thr < threads_per_subcore; ++thr) {
                if (kvmppc_grab_hwthread(cpu + thr)) {
                        /* Couldn't grab one; let the others go */
                        do {
@@@ -1544,15 -1566,18 +1547,18 @@@ static void kvmppc_run_core(struct kvmp
        }
  
        /*
-        * Make sure we are running on thread 0, and that
-        * secondary threads are offline.
+        * Make sure we are running on primary threads, and that secondary
+        * threads are offline.  Also check if the number of threads in this
+        * guest are greater than the current system threads per guest.
         */
-       if (threads_per_core > 1 && !on_primary_thread()) {
+       if ((threads_per_core > 1) &&
+           ((vc->num_threads > threads_per_subcore) || !on_primary_thread())) {
                list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
                        vcpu->arch.ret = -EBUSY;
                goto out;
        }
  
        vc->pcpu = smp_processor_id();
        list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) {
                kvmppc_start_thread(vcpu);
        /* wait for secondary threads to finish writing their state to memory */
        if (vc->nap_count < vc->n_woken)
                kvmppc_wait_for_nap(vc);
-       for (i = 0; i < threads_per_core; ++i)
+       for (i = 0; i < threads_per_subcore; ++i)
                kvmppc_release_hwthread(vc->pcpu + i);
        /* prevent other vcpu threads from doing kvmppc_start_thread() now */
        vc->vcore_state = VCORE_EXITING;
@@@ -1930,13 -1955,6 +1936,13 @@@ static void kvmppc_add_seg_page_size(st
         * support pte_enc here
         */
        (*sps)->enc[0].pte_enc = def->penc[linux_psize];
 +      /*
 +       * Add 16MB MPSS support if host supports it
 +       */
 +      if (linux_psize != MMU_PAGE_16M && def->penc[MMU_PAGE_16M] != -1) {
 +              (*sps)->enc[1].page_shift = 24;
 +              (*sps)->enc[1].pte_enc = def->penc[MMU_PAGE_16M];
 +      }
        (*sps)++;
  }
  
@@@ -2305,10 -2323,10 +2311,10 @@@ static int kvmppc_core_init_vm_hv(struc
        spin_lock_init(&kvm->arch.slot_phys_lock);
  
        /*
-        * Don't allow secondary CPU threads to come online
-        * while any KVM VMs exist.
+        * Track that we now have a HV mode VM active. This blocks secondary
+        * CPU threads from coming online.
         */
-       inhibit_secondary_onlining();
+       kvm_hv_vm_activated();
  
        return 0;
  }
@@@ -2324,7 -2342,7 +2330,7 @@@ static void kvmppc_free_vcores(struct k
  
  static void kvmppc_core_destroy_vm_hv(struct kvm *kvm)
  {
-       uninhibit_secondary_onlining();
+       kvm_hv_vm_deactivated();
  
        kvmppc_free_vcores(kvm);
        if (kvm->arch.rma) {
index 974793435a2e20e82a22780b3e7ad330c17ccf40,12f4ce5b4f7891e8779e86978bd915a9bf1ebbbe..77356fd25ccc96a8b4fd17746ea08edc0da16060
@@@ -86,12 -86,6 +86,12 @@@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S
        lbz     r4, LPPACA_PMCINUSE(r3)
        cmpwi   r4, 0
        beq     23f                     /* skip if not */
 +BEGIN_FTR_SECTION
 +      ld      r3, HSTATE_MMCR(r13)
 +      andi.   r4, r3, MMCR0_PMAO_SYNC | MMCR0_PMAO
 +      cmpwi   r4, MMCR0_PMAO
 +      beql    kvmppc_fix_pmao
 +END_FTR_SECTION_IFSET(CPU_FTR_PMAO_BUG)
        lwz     r3, HSTATE_PMC(r13)
        lwz     r4, HSTATE_PMC + 4(r13)
        lwz     r5, HSTATE_PMC + 8(r13)
@@@ -292,8 -286,7 +292,7 @@@ kvm_start_guest
        beq     kvm_no_guest
  
        /* Set HSTATE_DSCR(r13) to something sensible */
-       LOAD_REG_ADDR(r6, dscr_default)
-       ld      r6, 0(r6)
+       ld      r6, PACA_DSCR(r13)
        std     r6, HSTATE_DSCR(r13)
  
        bl      kvmppc_hv_entry
@@@ -743,12 -736,6 +742,12 @@@ skip_tm
        sldi    r3, r3, 31              /* MMCR0_FC (freeze counters) bit */
        mtspr   SPRN_MMCR0, r3          /* freeze all counters, disable ints */
        isync
 +BEGIN_FTR_SECTION
 +      ld      r3, VCPU_MMCR(r4)
 +      andi.   r5, r3, MMCR0_PMAO_SYNC | MMCR0_PMAO
 +      cmpwi   r5, MMCR0_PMAO
 +      beql    kvmppc_fix_pmao
 +END_FTR_SECTION_IFSET(CPU_FTR_PMAO_BUG)
        lwz     r3, VCPU_PMC(r4)        /* always load up guest PMU registers */
        lwz     r5, VCPU_PMC + 4(r4)    /* to prevent information leak */
        lwz     r6, VCPU_PMC + 8(r4)
@@@ -1335,110 -1322,6 +1334,110 @@@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206
        mr      r3, r9
        bl      kvmppc_save_fp
  
 +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
 +BEGIN_FTR_SECTION
 +      b       2f
 +END_FTR_SECTION_IFCLR(CPU_FTR_TM)
 +      /* Turn on TM. */
 +      mfmsr   r8
 +      li      r0, 1
 +      rldimi  r8, r0, MSR_TM_LG, 63-MSR_TM_LG
 +      mtmsrd  r8
 +
 +      ld      r5, VCPU_MSR(r9)
 +      rldicl. r5, r5, 64 - MSR_TS_S_LG, 62
 +      beq     1f      /* TM not active in guest. */
 +
 +      li      r3, TM_CAUSE_KVM_RESCHED
 +
 +      /* Clear the MSR RI since r1, r13 are all going to be foobar. */
 +      li      r5, 0
 +      mtmsrd  r5, 1
 +
 +      /* All GPRs are volatile at this point. */
 +      TRECLAIM(R3)
 +
 +      /* Temporarily store r13 and r9 so we have some regs to play with */
 +      SET_SCRATCH0(r13)
 +      GET_PACA(r13)
 +      std     r9, PACATMSCRATCH(r13)
 +      ld      r9, HSTATE_KVM_VCPU(r13)
 +
 +      /* Get a few more GPRs free. */
 +      std     r29, VCPU_GPRS_TM(29)(r9)
 +      std     r30, VCPU_GPRS_TM(30)(r9)
 +      std     r31, VCPU_GPRS_TM(31)(r9)
 +
 +      /* Save away PPR and DSCR soon so don't run with user values. */
 +      mfspr   r31, SPRN_PPR
 +      HMT_MEDIUM
 +      mfspr   r30, SPRN_DSCR
 +      ld      r29, HSTATE_DSCR(r13)
 +      mtspr   SPRN_DSCR, r29
 +
 +      /* Save all but r9, r13 & r29-r31 */
 +      reg = 0
 +      .rept   29
 +      .if (reg != 9) && (reg != 13)
 +      std     reg, VCPU_GPRS_TM(reg)(r9)
 +      .endif
 +      reg = reg + 1
 +      .endr
 +      /* ... now save r13 */
 +      GET_SCRATCH0(r4)
 +      std     r4, VCPU_GPRS_TM(13)(r9)
 +      /* ... and save r9 */
 +      ld      r4, PACATMSCRATCH(r13)
 +      std     r4, VCPU_GPRS_TM(9)(r9)
 +
 +      /* Reload stack pointer and TOC. */
 +      ld      r1, HSTATE_HOST_R1(r13)
 +      ld      r2, PACATOC(r13)
 +
 +      /* Set MSR RI now we have r1 and r13 back. */
 +      li      r5, MSR_RI
 +      mtmsrd  r5, 1
 +
 +      /* Save away checkpinted SPRs. */
 +      std     r31, VCPU_PPR_TM(r9)
 +      std     r30, VCPU_DSCR_TM(r9)
 +      mflr    r5
 +      mfcr    r6
 +      mfctr   r7
 +      mfspr   r8, SPRN_AMR
 +      mfspr   r10, SPRN_TAR
 +      std     r5, VCPU_LR_TM(r9)
 +      stw     r6, VCPU_CR_TM(r9)
 +      std     r7, VCPU_CTR_TM(r9)
 +      std     r8, VCPU_AMR_TM(r9)
 +      std     r10, VCPU_TAR_TM(r9)
 +
 +      /* Restore r12 as trap number. */
 +      lwz     r12, VCPU_TRAP(r9)
 +
 +      /* Save FP/VSX. */
 +      addi    r3, r9, VCPU_FPRS_TM
 +      bl      .store_fp_state
 +      addi    r3, r9, VCPU_VRS_TM
 +      bl      .store_vr_state
 +      mfspr   r6, SPRN_VRSAVE
 +      stw     r6, VCPU_VRSAVE_TM(r9)
 +1:
 +      /*
 +       * We need to save these SPRs after the treclaim so that the software
 +       * error code is recorded correctly in the TEXASR.  Also the user may
 +       * change these outside of a transaction, so they must always be
 +       * context switched.
 +       */
 +      mfspr   r5, SPRN_TFHAR
 +      mfspr   r6, SPRN_TFIAR
 +      mfspr   r7, SPRN_TEXASR
 +      std     r5, VCPU_TFHAR(r9)
 +      std     r6, VCPU_TFIAR(r9)
 +      std     r7, VCPU_TEXASR(r9)
 +2:
 +#endif
 +
        /* Increment yield count if they have a VPA */
        ld      r8, VCPU_VPA(r9)        /* do they have a VPA? */
        cmpdi   r8, 0
  25:
        /* Save PMU registers if requested */
        /* r8 and cr0.eq are live here */
 +BEGIN_FTR_SECTION
 +      /*
 +       * POWER8 seems to have a hardware bug where setting
 +       * MMCR0[PMAE] along with MMCR0[PMC1CE] and/or MMCR0[PMCjCE]
 +       * when some counters are already negative doesn't seem
 +       * to cause a performance monitor alert (and hence interrupt).
 +       * The effect of this is that when saving the PMU state,
 +       * if there is no PMU alert pending when we read MMCR0
 +       * before freezing the counters, but one becomes pending
 +       * before we read the counters, we lose it.
 +       * To work around this, we need a way to freeze the counters
 +       * before reading MMCR0.  Normally, freezing the counters
 +       * is done by writing MMCR0 (to set MMCR0[FC]) which
 +       * unavoidably writes MMCR0[PMA0] as well.  On POWER8,
 +       * we can also freeze the counters using MMCR2, by writing
 +       * 1s to all the counter freeze condition bits (there are
 +       * 9 bits each for 6 counters).
 +       */
 +      li      r3, -1                  /* set all freeze bits */
 +      clrrdi  r3, r3, 10
 +      mfspr   r10, SPRN_MMCR2
 +      mtspr   SPRN_MMCR2, r3
 +      isync
 +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
        li      r3, 1
        sldi    r3, r3, 31              /* MMCR0_FC (freeze counters) bit */
        mfspr   r4, SPRN_MMCR0          /* save MMCR0 */
@@@ -1498,9 -1357,6 +1497,9 @@@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206
        std     r4, VCPU_MMCR(r9)
        std     r5, VCPU_MMCR + 8(r9)
        std     r6, VCPU_MMCR + 16(r9)
 +BEGIN_FTR_SECTION
 +      std     r10, VCPU_MMCR + 24(r9)
 +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
        std     r7, VCPU_SIAR(r9)
        std     r8, VCPU_SDAR(r9)
        mfspr   r3, SPRN_PMC1
@@@ -1524,10 -1380,12 +1523,10 @@@ BEGIN_FTR_SECTIO
        stw     r11, VCPU_PMC + 28(r9)
  END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
  BEGIN_FTR_SECTION
 -      mfspr   r4, SPRN_MMCR2
        mfspr   r5, SPRN_SIER
        mfspr   r6, SPRN_SPMC1
        mfspr   r7, SPRN_SPMC2
        mfspr   r8, SPRN_MMCRS
 -      std     r4, VCPU_MMCR + 24(r9)
        std     r5, VCPU_SIER(r9)
        stw     r6, VCPU_PMC + 24(r9)
        stw     r7, VCPU_PMC + 28(r9)
@@@ -1799,7 -1657,7 +1798,7 @@@ kvmppc_hdsi
        /* Search the hash table. */
        mr      r3, r9                  /* vcpu pointer */
        li      r7, 1                   /* data fault */
-       bl      .kvmppc_hpte_hv_fault
+       bl      kvmppc_hpte_hv_fault
        ld      r9, HSTATE_KVM_VCPU(r13)
        ld      r10, VCPU_PC(r9)
        ld      r11, VCPU_MSR(r9)
@@@ -1873,7 -1731,7 +1872,7 @@@ kvmppc_hisi
        mr      r4, r10
        mr      r6, r11
        li      r7, 0                   /* instruction fault */
-       bl      .kvmppc_hpte_hv_fault
+       bl      kvmppc_hpte_hv_fault
        ld      r9, HSTATE_KVM_VCPU(r13)
        ld      r10, VCPU_PC(r9)
        ld      r11, VCPU_MSR(r9)
@@@ -1947,16 -1805,16 +1946,16 @@@ hcall_real_fallback
        .globl  hcall_real_table
  hcall_real_table:
        .long   0               /* 0 - unused */
-       .long   .kvmppc_h_remove - hcall_real_table
-       .long   .kvmppc_h_enter - hcall_real_table
-       .long   .kvmppc_h_read - hcall_real_table
+       .long   DOTSYM(kvmppc_h_remove) - hcall_real_table
+       .long   DOTSYM(kvmppc_h_enter) - hcall_real_table
+       .long   DOTSYM(kvmppc_h_read) - hcall_real_table
        .long   0               /* 0x10 - H_CLEAR_MOD */
        .long   0               /* 0x14 - H_CLEAR_REF */
-       .long   .kvmppc_h_protect - hcall_real_table
-       .long   .kvmppc_h_get_tce - hcall_real_table
-       .long   .kvmppc_h_put_tce - hcall_real_table
+       .long   DOTSYM(kvmppc_h_protect) - hcall_real_table
+       .long   DOTSYM(kvmppc_h_get_tce) - hcall_real_table
+       .long   DOTSYM(kvmppc_h_put_tce) - hcall_real_table
        .long   0               /* 0x24 - H_SET_SPRG0 */
-       .long   .kvmppc_h_set_dabr - hcall_real_table
+       .long   DOTSYM(kvmppc_h_set_dabr) - hcall_real_table
        .long   0               /* 0x2c */
        .long   0               /* 0x30 */
        .long   0               /* 0x34 */
        .long   0               /* 0x5c */
        .long   0               /* 0x60 */
  #ifdef CONFIG_KVM_XICS
-       .long   .kvmppc_rm_h_eoi - hcall_real_table
-       .long   .kvmppc_rm_h_cppr - hcall_real_table
-       .long   .kvmppc_rm_h_ipi - hcall_real_table
+       .long   DOTSYM(kvmppc_rm_h_eoi) - hcall_real_table
+       .long   DOTSYM(kvmppc_rm_h_cppr) - hcall_real_table
+       .long   DOTSYM(kvmppc_rm_h_ipi) - hcall_real_table
        .long   0               /* 0x70 - H_IPOLL */
-       .long   .kvmppc_rm_h_xirr - hcall_real_table
+       .long   DOTSYM(kvmppc_rm_h_xirr) - hcall_real_table
  #else
        .long   0               /* 0x64 - H_EOI */
        .long   0               /* 0x68 - H_CPPR */
        .long   0               /* 0xd4 */
        .long   0               /* 0xd8 */
        .long   0               /* 0xdc */
-       .long   .kvmppc_h_cede - hcall_real_table
+       .long   DOTSYM(kvmppc_h_cede) - hcall_real_table
        .long   0               /* 0xe4 */
        .long   0               /* 0xe8 */
        .long   0               /* 0xec */
        .long   0               /* 0x118 */
        .long   0               /* 0x11c */
        .long   0               /* 0x120 */
-       .long   .kvmppc_h_bulk_remove - hcall_real_table
+       .long   DOTSYM(kvmppc_h_bulk_remove) - hcall_real_table
        .long   0               /* 0x128 */
        .long   0               /* 0x12c */
        .long   0               /* 0x130 */
-       .long   .kvmppc_h_set_xdabr - hcall_real_table
+       .long   DOTSYM(kvmppc_h_set_xdabr) - hcall_real_table
  hcall_real_table_end:
  
  ignore_hdec:
@@@ -2256,7 -2114,7 +2255,7 @@@ kvm_cede_exit
        /* Try to handle a machine check in real mode */
  machine_check_realmode:
        mr      r3, r9          /* get vcpu pointer */
-       bl      .kvmppc_realmode_machine_check
+       bl      kvmppc_realmode_machine_check
        nop
        cmpdi   r3, 0           /* continue exiting from guest? */
        ld      r9, HSTATE_KVM_VCPU(r13)
        beq     mc_cont
        /* If not, deliver a machine check.  SRR0/1 are already set */
        li      r10, BOOK3S_INTERRUPT_MACHINE_CHECK
 +      ld      r11, VCPU_MSR(r9)
        bl      kvmppc_msr_interrupt
        b       fast_interrupt_c_return
  
@@@ -2469,21 -2326,3 +2468,21 @@@ kvmppc_msr_interrupt
        li      r0, 1
  1:    rldimi  r11, r0, MSR_TS_S_LG, 63 - MSR_TS_T_LG
        blr
 +
 +/*
 + * This works around a hardware bug on POWER8E processors, where
 + * writing a 1 to the MMCR0[PMAO] bit doesn't generate a
 + * performance monitor interrupt.  Instead, when we need to have
 + * an interrupt pending, we have to arrange for a counter to overflow.
 + */
 +kvmppc_fix_pmao:
 +      li      r3, 0
 +      mtspr   SPRN_MMCR2, r3
 +      lis     r3, (MMCR0_PMXE | MMCR0_FCECE)@h
 +      ori     r3, r3, MMCR0_PMCjCE | MMCR0_C56RUN
 +      mtspr   SPRN_MMCR0, r3
 +      lis     r3, 0x7fff
 +      ori     r3, r3, 0xffff
 +      mtspr   SPRN_PMC6, r3
 +      isync
 +      blr
index bab20f4104430489674b5c05e4a1223036e2eac3,27919a8715cf33287c27413e7e843bd66e4284da..61c738ab128383064f86d159cb6bd2dacbcc17b9
@@@ -125,27 -125,6 +125,27 @@@ int kvmppc_prepare_to_enter(struct kvm_
  }
  EXPORT_SYMBOL_GPL(kvmppc_prepare_to_enter);
  
 +#if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_KVM_BOOK3S_PR_POSSIBLE)
 +static void kvmppc_swab_shared(struct kvm_vcpu *vcpu)
 +{
 +      struct kvm_vcpu_arch_shared *shared = vcpu->arch.shared;
 +      int i;
 +
 +      shared->sprg0 = swab64(shared->sprg0);
 +      shared->sprg1 = swab64(shared->sprg1);
 +      shared->sprg2 = swab64(shared->sprg2);
 +      shared->sprg3 = swab64(shared->sprg3);
 +      shared->srr0 = swab64(shared->srr0);
 +      shared->srr1 = swab64(shared->srr1);
 +      shared->dar = swab64(shared->dar);
 +      shared->msr = swab64(shared->msr);
 +      shared->dsisr = swab32(shared->dsisr);
 +      shared->int_pending = swab32(shared->int_pending);
 +      for (i = 0; i < ARRAY_SIZE(shared->sr); i++)
 +              shared->sr[i] = swab32(shared->sr[i]);
 +}
 +#endif
 +
  int kvmppc_kvm_pv(struct kvm_vcpu *vcpu)
  {
        int nr = kvmppc_get_gpr(vcpu, 11);
        unsigned long __maybe_unused param4 = kvmppc_get_gpr(vcpu, 6);
        unsigned long r2 = 0;
  
 -      if (!(vcpu->arch.shared->msr & MSR_SF)) {
 +      if (!(kvmppc_get_msr(vcpu) & MSR_SF)) {
                /* 32 bit mode */
                param1 &= 0xffffffff;
                param2 &= 0xffffffff;
        switch (nr) {
        case KVM_HCALL_TOKEN(KVM_HC_PPC_MAP_MAGIC_PAGE):
        {
 -              vcpu->arch.magic_page_pa = param1;
 -              vcpu->arch.magic_page_ea = param2;
 +#if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_KVM_BOOK3S_PR_POSSIBLE)
 +              /* Book3S can be little endian, find it out here */
 +              int shared_big_endian = true;
 +              if (vcpu->arch.intr_msr & MSR_LE)
 +                      shared_big_endian = false;
 +              if (shared_big_endian != vcpu->arch.shared_big_endian)
 +                      kvmppc_swab_shared(vcpu);
 +              vcpu->arch.shared_big_endian = shared_big_endian;
 +#endif
 +
 +              if (!(param2 & MAGIC_PAGE_FLAG_NOT_MAPPED_NX)) {
 +                      /*
 +                       * Older versions of the Linux magic page code had
 +                       * a bug where they would map their trampoline code
 +                       * NX. If that's the case, remove !PR NX capability.
 +                       */
 +                      vcpu->arch.disable_kernel_nx = true;
 +                      kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
 +              }
 +
 +              vcpu->arch.magic_page_pa = param1 & ~0xfffULL;
 +              vcpu->arch.magic_page_ea = param2 & ~0xfffULL;
  
                r2 = KVM_MAGIC_FEAT_SR | KVM_MAGIC_FEAT_MAS0_TO_SPRG7;
  
@@@ -416,7 -375,6 +416,7 @@@ int kvm_dev_ioctl_check_extension(long 
        case KVM_CAP_SPAPR_TCE:
        case KVM_CAP_PPC_ALLOC_HTAB:
        case KVM_CAP_PPC_RTAS:
 +      case KVM_CAP_PPC_FIXUP_HCALL:
  #ifdef CONFIG_KVM_XICS
        case KVM_CAP_IRQ_XICS:
  #endif
  #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
        case KVM_CAP_PPC_SMT:
                if (hv_enabled)
-                       r = threads_per_core;
+                       r = threads_per_subcore;
                else
                        r = 0;
                break;
@@@ -1057,10 -1015,10 +1057,10 @@@ static int kvm_vm_ioctl_get_pvinfo(stru
        u32 inst_nop = 0x60000000;
  #ifdef CONFIG_KVM_BOOKE_HV
        u32 inst_sc1 = 0x44000022;
 -      pvinfo->hcall[0] = inst_sc1;
 -      pvinfo->hcall[1] = inst_nop;
 -      pvinfo->hcall[2] = inst_nop;
 -      pvinfo->hcall[3] = inst_nop;
 +      pvinfo->hcall[0] = cpu_to_be32(inst_sc1);
 +      pvinfo->hcall[1] = cpu_to_be32(inst_nop);
 +      pvinfo->hcall[2] = cpu_to_be32(inst_nop);
 +      pvinfo->hcall[3] = cpu_to_be32(inst_nop);
  #else
        u32 inst_lis = 0x3c000000;
        u32 inst_ori = 0x60000000;
         *    sc
         *    nop
         */
 -      pvinfo->hcall[0] = inst_lis | ((KVM_SC_MAGIC_R0 >> 16) & inst_imm_mask);
 -      pvinfo->hcall[1] = inst_ori | (KVM_SC_MAGIC_R0 & inst_imm_mask);
 -      pvinfo->hcall[2] = inst_sc;
 -      pvinfo->hcall[3] = inst_nop;
 +      pvinfo->hcall[0] = cpu_to_be32(inst_lis | ((KVM_SC_MAGIC_R0 >> 16) & inst_imm_mask));
 +      pvinfo->hcall[1] = cpu_to_be32(inst_ori | (KVM_SC_MAGIC_R0 & inst_imm_mask));
 +      pvinfo->hcall[2] = cpu_to_be32(inst_sc);
 +      pvinfo->hcall[3] = cpu_to_be32(inst_nop);
  #endif
  
        pvinfo->flags = KVM_PPC_PVINFO_FLAGS_EV_IDLE;
index 350aa58a6f9578da23cafde65b094cb1c2532c55,b435e229c0ef4e5cb5971a38e7bfb9d812394b0e..88fdd9d25077f2e77076ffb3197f6ec63844262e
@@@ -207,10 -207,6 +207,10 @@@ int htab_bolt_mapping(unsigned long vst
                if (overlaps_kernel_text(vaddr, vaddr + step))
                        tprot &= ~HPTE_R_N;
  
 +              /* Make kvm guest trampolines executable */
 +              if (overlaps_kvm_tmp(vaddr, vaddr + step))
 +                      tprot &= ~HPTE_R_N;
 +
                /*
                 * If relocatable, check if it overlaps interrupt vectors that
                 * are copied down to real 0. For relocatable kernel
@@@ -269,9 -265,9 +269,9 @@@ static int __init htab_dt_scan_seg_size
                                         const char *uname, int depth,
                                         void *data)
  {
 -      char *type = of_get_flat_dt_prop(node, "device_type", NULL);
 -      __be32 *prop;
 -      unsigned long size = 0;
 +      const char *type = of_get_flat_dt_prop(node, "device_type", NULL);
 +      const __be32 *prop;
 +      int size = 0;
  
        /* We are scanning "cpu" nodes only */
        if (type == NULL || strcmp(type, "cpu") != 0)
@@@ -324,9 -320,9 +324,9 @@@ static int __init htab_dt_scan_page_siz
                                          const char *uname, int depth,
                                          void *data)
  {
 -      char *type = of_get_flat_dt_prop(node, "device_type", NULL);
 -      __be32 *prop;
 -      unsigned long size = 0;
 +      const char *type = of_get_flat_dt_prop(node, "device_type", NULL);
 +      const __be32 *prop;
 +      int size = 0;
  
        /* We are scanning "cpu" nodes only */
        if (type == NULL || strcmp(type, "cpu") != 0)
  static int __init htab_dt_scan_hugepage_blocks(unsigned long node,
                                        const char *uname, int depth,
                                        void *data) {
 -      char *type = of_get_flat_dt_prop(node, "device_type", NULL);
 -      __be64 *addr_prop;
 -      __be32 *page_count_prop;
 +      const char *type = of_get_flat_dt_prop(node, "device_type", NULL);
 +      const __be64 *addr_prop;
 +      const __be32 *page_count_prop;
        unsigned int expected_pages;
        long unsigned int phys_addr;
        long unsigned int block_size;
@@@ -449,6 -445,24 +449,24 @@@ static void mmu_psize_set_default_penc(
                        mmu_psize_defs[bpsize].penc[apsize] = -1;
  }
  
+ #ifdef CONFIG_PPC_64K_PAGES
+ static bool might_have_hea(void)
+ {
+       /*
+        * The HEA ethernet adapter requires awareness of the
+        * GX bus. Without that awareness we can easily assume
+        * we will never see an HEA ethernet device.
+        */
+ #ifdef CONFIG_IBMEBUS
+       return !cpu_has_feature(CPU_FTR_ARCH_207S);
+ #else
+       return false;
+ #endif
+ }
+ #endif /* #ifdef CONFIG_PPC_64K_PAGES */
  static void __init htab_init_page_sizes(void)
  {
        int rc;
                        mmu_linear_psize = MMU_PAGE_64K;
                if (mmu_has_feature(MMU_FTR_CI_LARGE_PAGE)) {
                        /*
-                        * Don't use 64k pages for ioremap on pSeries, since
-                        * that would stop us accessing the HEA ethernet.
+                        * When running on pSeries using 64k pages for ioremap
+                        * would stop us accessing the HEA ethernet. So if we
+                        * have the chance of ever seeing one, stay at 4k.
                         */
-                       if (!machine_is(pseries))
+                       if (!might_have_hea() || !machine_is(pseries))
                                mmu_io_psize = MMU_PAGE_64K;
                } else
                        mmu_ci_restrictions = 1;
@@@ -550,8 -565,8 +569,8 @@@ static int __init htab_dt_scan_pftsize(
                                       const char *uname, int depth,
                                       void *data)
  {
 -      char *type = of_get_flat_dt_prop(node, "device_type", NULL);
 -      __be32 *prop;
 +      const char *type = of_get_flat_dt_prop(node, "device_type", NULL);
 +      const __be32 *prop;
  
        /* We are scanning "cpu" nodes only */
        if (type == NULL || strcmp(type, "cpu") != 0)
@@@ -607,47 -622,43 +626,43 @@@ int remove_section_mapping(unsigned lon
  }
  #endif /* CONFIG_MEMORY_HOTPLUG */
  
- #define FUNCTION_TEXT(A)      ((*(unsigned long *)(A)))
+ extern u32 htab_call_hpte_insert1[];
+ extern u32 htab_call_hpte_insert2[];
+ extern u32 htab_call_hpte_remove[];
+ extern u32 htab_call_hpte_updatepp[];
+ extern u32 ht64_call_hpte_insert1[];
+ extern u32 ht64_call_hpte_insert2[];
+ extern u32 ht64_call_hpte_remove[];
+ extern u32 ht64_call_hpte_updatepp[];
  
  static void __init htab_finish_init(void)
  {
-       extern unsigned int *htab_call_hpte_insert1;
-       extern unsigned int *htab_call_hpte_insert2;
-       extern unsigned int *htab_call_hpte_remove;
-       extern unsigned int *htab_call_hpte_updatepp;
  #ifdef CONFIG_PPC_HAS_HASH_64K
-       extern unsigned int *ht64_call_hpte_insert1;
-       extern unsigned int *ht64_call_hpte_insert2;
-       extern unsigned int *ht64_call_hpte_remove;
-       extern unsigned int *ht64_call_hpte_updatepp;
        patch_branch(ht64_call_hpte_insert1,
-               FUNCTION_TEXT(ppc_md.hpte_insert),
+               ppc_function_entry(ppc_md.hpte_insert),
                BRANCH_SET_LINK);
        patch_branch(ht64_call_hpte_insert2,
-               FUNCTION_TEXT(ppc_md.hpte_insert),
+               ppc_function_entry(ppc_md.hpte_insert),
                BRANCH_SET_LINK);
        patch_branch(ht64_call_hpte_remove,
-               FUNCTION_TEXT(ppc_md.hpte_remove),
+               ppc_function_entry(ppc_md.hpte_remove),
                BRANCH_SET_LINK);
        patch_branch(ht64_call_hpte_updatepp,
-               FUNCTION_TEXT(ppc_md.hpte_updatepp),
+               ppc_function_entry(ppc_md.hpte_updatepp),
                BRANCH_SET_LINK);
  #endif /* CONFIG_PPC_HAS_HASH_64K */
  
        patch_branch(htab_call_hpte_insert1,
-               FUNCTION_TEXT(ppc_md.hpte_insert),
+               ppc_function_entry(ppc_md.hpte_insert),
                BRANCH_SET_LINK);
        patch_branch(htab_call_hpte_insert2,
-               FUNCTION_TEXT(ppc_md.hpte_insert),
+               ppc_function_entry(ppc_md.hpte_insert),
                BRANCH_SET_LINK);
        patch_branch(htab_call_hpte_remove,
-               FUNCTION_TEXT(ppc_md.hpte_remove),
+               ppc_function_entry(ppc_md.hpte_remove),
                BRANCH_SET_LINK);
        patch_branch(htab_call_hpte_updatepp,
-               FUNCTION_TEXT(ppc_md.hpte_updatepp),
+               ppc_function_entry(ppc_md.hpte_updatepp),
                BRANCH_SET_LINK);
  }
  
@@@ -964,6 -975,22 +979,22 @@@ void hash_failure_debug(unsigned long e
                trap, vsid, ssize, psize, lpsize, pte);
  }
  
+ static void check_paca_psize(unsigned long ea, struct mm_struct *mm,
+                            int psize, bool user_region)
+ {
+       if (user_region) {
+               if (psize != get_paca_psize(ea)) {
+                       get_paca()->context = mm->context;
+                       slb_flush_and_rebolt();
+               }
+       } else if (get_paca()->vmalloc_sllp !=
+                  mmu_psize_defs[mmu_vmalloc_psize].sllp) {
+               get_paca()->vmalloc_sllp =
+                       mmu_psize_defs[mmu_vmalloc_psize].sllp;
+               slb_vmalloc_update();
+       }
+ }
  /* Result code is:
   *  0 - handled
   *  1 - normal page fault
@@@ -1085,6 -1112,8 +1116,8 @@@ int hash_page(unsigned long ea, unsigne
                        WARN_ON(1);
                }
  #endif
+               check_paca_psize(ea, mm, psize, user_region);
                goto bail;
        }
  
  #endif
                }
        }
-       if (user_region) {
-               if (psize != get_paca_psize(ea)) {
-                       get_paca()->context = mm->context;
-                       slb_flush_and_rebolt();
-               }
-       } else if (get_paca()->vmalloc_sllp !=
-                  mmu_psize_defs[mmu_vmalloc_psize].sllp) {
-               get_paca()->vmalloc_sllp =
-                       mmu_psize_defs[mmu_vmalloc_psize].sllp;
-               slb_vmalloc_update();
-       }
+       check_paca_psize(ea, mm, psize, user_region);
  #endif /* CONFIG_PPC_64K_PAGES */
  
  #ifdef CONFIG_PPC_HAS_HASH_64K
diff --combined arch/powerpc/mm/slb.c
index 964a5f61488a1a7e726e2e68e10cc8446e0fcb62,4623366f82e93cb311c377dae87330678fae031f..0399a6702958dd933f8581cf8b75cfe16a184bb3
@@@ -97,7 -97,7 +97,7 @@@ static inline void create_shadowed_slbe
  static void __slb_flush_and_rebolt(void)
  {
        /* If you change this make sure you change SLB_NUM_BOLTED
 -       * appropriately too. */
 +       * and PR KVM appropriately too. */
        unsigned long linear_llp, vmalloc_llp, lflags, vflags;
        unsigned long ksp_esid_data, ksp_vsid_data;
  
@@@ -256,10 -256,14 +256,14 @@@ static inline void patch_slb_encoding(u
        patch_instruction(insn_addr, insn);
  }
  
+ extern u32 slb_compare_rr_to_size[];
+ extern u32 slb_miss_kernel_load_linear[];
+ extern u32 slb_miss_kernel_load_io[];
+ extern u32 slb_compare_rr_to_size[];
+ extern u32 slb_miss_kernel_load_vmemmap[];
  void slb_set_size(u16 size)
  {
-       extern unsigned int *slb_compare_rr_to_size;
        if (mmu_slb_size == size)
                return;
  
@@@ -272,11 -276,7 +276,7 @@@ void slb_initialize(void
        unsigned long linear_llp, vmalloc_llp, io_llp;
        unsigned long lflags, vflags;
        static int slb_encoding_inited;
-       extern unsigned int *slb_miss_kernel_load_linear;
-       extern unsigned int *slb_miss_kernel_load_io;
-       extern unsigned int *slb_compare_rr_to_size;
  #ifdef CONFIG_SPARSEMEM_VMEMMAP
-       extern unsigned int *slb_miss_kernel_load_vmemmap;
        unsigned long vmemmap_llp;
  #endif
  
index f343183add07c189d02b0701e284f72effd614fc,539243e9dc23bb7a03b1725b2f1b377a5051ecda..199975613fe99fbbc030e34c205283dbb889f2c8
@@@ -57,11 -57,26 +57,26 @@@ static DEFINE_SPINLOCK(opal_notifier_lo
  static uint64_t last_notified_mask = 0x0ul;
  static atomic_t opal_notifier_hold = ATOMIC_INIT(0);
  
+ static void opal_reinit_cores(void)
+ {
+       /* Do the actual re-init, This will clobber all FPRs, VRs, etc...
+        *
+        * It will preserve non volatile GPRs and HSPRG0/1. It will
+        * also restore HIDs and other SPRs to their original value
+        * but it might clobber a bunch.
+        */
+ #ifdef __BIG_ENDIAN__
+       opal_reinit_cpus(OPAL_REINIT_CPUS_HILE_BE);
+ #else
+       opal_reinit_cpus(OPAL_REINIT_CPUS_HILE_LE);
+ #endif
+ }
  int __init early_init_dt_scan_opal(unsigned long node,
                                   const char *uname, int depth, void *data)
  {
        const void *basep, *entryp, *sizep;
 -      unsigned long basesz, entrysz, runtimesz;
 +      int basesz, entrysz, runtimesz;
  
        if (depth != 1 || strcmp(uname, "ibm,opal") != 0)
                return 0;
        opal.entry = of_read_number(entryp, entrysz/4);
        opal.size = of_read_number(sizep, runtimesz/4);
  
 -      pr_debug("OPAL Base  = 0x%llx (basep=%p basesz=%ld)\n",
 +      pr_debug("OPAL Base  = 0x%llx (basep=%p basesz=%d)\n",
                 opal.base, basep, basesz);
 -      pr_debug("OPAL Entry = 0x%llx (entryp=%p basesz=%ld)\n",
 +      pr_debug("OPAL Entry = 0x%llx (entryp=%p basesz=%d)\n",
                 opal.entry, entryp, entrysz);
 -      pr_debug("OPAL Entry = 0x%llx (sizep=%p runtimesz=%ld)\n",
 +      pr_debug("OPAL Entry = 0x%llx (sizep=%p runtimesz=%d)\n",
                 opal.size, sizep, runtimesz);
  
        powerpc_firmware_features |= FW_FEATURE_OPAL;
                printk("OPAL V1 detected !\n");
        }
  
+       /* Reinit all cores with the right endian */
+       opal_reinit_cores();
+       /* Restore some bits */
+       if (cur_cpu_spec->cpu_restore)
+               cur_cpu_spec->cpu_restore();
        return 1;
  }
  
  int __init early_init_dt_scan_recoverable_ranges(unsigned long node,
                                   const char *uname, int depth, void *data)
  {
 -      unsigned long i, psize, size;
 +      int i, psize, size;
        const __be32 *prop;
  
        if (depth != 1 || strcmp(uname, "ibm,opal") != 0)
@@@ -359,7 -381,7 +381,7 @@@ int opal_get_chars(uint32_t vtermno, ch
        if ((be64_to_cpu(evt) & OPAL_EVENT_CONSOLE_INPUT) == 0)
                return 0;
        len = cpu_to_be64(count);
 -      rc = opal_console_read(vtermno, &len, buf);     
 +      rc = opal_console_read(vtermno, &len, buf);
        if (rc == OPAL_SUCCESS)
                return be64_to_cpu(len);
        return 0;
index 099d2df976a2145f49198b599d43353412ea7457,adc21a0e34104b6eff390eb9f582768bdb6ee295..f2f40e64658f054aa650d1fd329a74b5dc478bde
@@@ -510,7 -510,11 +510,11 @@@ static void __init pSeries_setup_arch(v
  static int __init pSeries_init_panel(void)
  {
        /* Manually leave the kernel version on the panel. */
+ #ifdef __BIG_ENDIAN__
        ppc_md.progress("Linux ppc64\n", 0);
+ #else
+       ppc_md.progress("Linux ppc64le\n", 0);
+ #endif
        ppc_md.progress(init_utsname()->version, 0);
  
        return 0;
@@@ -665,7 -669,7 +669,7 @@@ static int __init pseries_probe_fw_feat
                                            void *data)
  {
        const char *prop;
 -      unsigned long len;
 +      int len;
        static int hypertas_found;
        static int vec5_found;
  
  static int __init pSeries_probe(void)
  {
        unsigned long root = of_get_flat_dt_root();
 -      char *dtype = of_get_flat_dt_prop(root, "device_type", NULL);
 +      const char *dtype = of_get_flat_dt_prop(root, "device_type", NULL);
  
        if (dtype == NULL)
                return 0;
@@@ -806,4 -810,7 +810,7 @@@ define_machine(pseries) 
  #ifdef CONFIG_KEXEC
        .machine_kexec          = pSeries_machine_kexec,
  #endif
+ #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
+       .memory_block_size      = pseries_memory_block_size,
+ #endif
  };
diff --combined drivers/pci/pci.c
index 436a76ab4bb1925228709e40bf97aa841551bb9c,633382d227f41ce00ebd2b2f612b7b36f84247df..212c63d780e755eacc91d6ad163faf397fa31728
@@@ -106,7 -106,7 +106,7 @@@ static bool pcie_ari_disabled
   * Given a PCI bus, returns the highest PCI bus number present in the set
   * including the given PCI bus and its list of child PCI buses.
   */
 -unsigned char pci_bus_max_busnr(struct pci_busbus)
 +unsigned char pci_bus_max_busnr(struct pci_bus *bus)
  {
        struct pci_bus *tmp;
        unsigned char max, n;
@@@ -1371,7 -1371,7 +1371,7 @@@ static void pcim_release(struct device 
                pci_disable_device(dev);
  }
  
 -static struct pci_devres * get_pci_dr(struct pci_dev *pdev)
 +static struct pci_devres *get_pci_dr(struct pci_dev *pdev)
  {
        struct pci_devres *dr, *new_dr;
  
        return devres_get(&pdev->dev, new_dr, NULL, NULL);
  }
  
 -static struct pci_devres * find_pci_dr(struct pci_dev *pdev)
 +static struct pci_devres *find_pci_dr(struct pci_dev *pdev)
  {
        if (pci_is_managed(pdev))
                return devres_find(&pdev->dev, pcim_release, NULL, NULL);
@@@ -1468,17 -1468,6 +1468,17 @@@ void __weak pcibios_release_device(stru
   */
  void __weak pcibios_disable_device (struct pci_dev *dev) {}
  
 +/**
 + * pcibios_penalize_isa_irq - penalize an ISA IRQ
 + * @irq: ISA IRQ to penalize
 + * @active: IRQ active or not
 + *
 + * Permits the platform to provide architecture-specific functionality when
 + * penalizing ISA IRQs. This is the default implementation. Architecture
 + * implementations can override this.
 + */
 +void __weak pcibios_penalize_isa_irq(int irq, int active) {}
 +
  static void do_pci_disable_device(struct pci_dev *dev)
  {
        u16 pci_command;
@@@ -3078,8 -3067,7 +3078,8 @@@ int pci_wait_for_pending_transaction(st
        if (!pci_is_pcie(dev))
                return 1;
  
 -      return pci_wait_for_pending(dev, PCI_EXP_DEVSTA, PCI_EXP_DEVSTA_TRPND);
 +      return pci_wait_for_pending(dev, pci_pcie_cap(dev) + PCI_EXP_DEVSTA,
 +                                  PCI_EXP_DEVSTA_TRPND);
  }
  EXPORT_SYMBOL(pci_wait_for_pending_transaction);
  
@@@ -3121,7 -3109,7 +3121,7 @@@ static int pci_af_flr(struct pci_dev *d
                return 0;
  
        /* Wait for Transaction Pending bit clean */
 -      if (pci_wait_for_pending(dev, PCI_AF_STATUS, PCI_AF_STATUS_TP))
 +      if (pci_wait_for_pending(dev, pos + PCI_AF_STATUS, PCI_AF_STATUS_TP))
                goto clear;
  
        dev_err(&dev->dev, "transaction is not cleared; "
@@@ -3179,14 -3167,7 +3179,7 @@@ static int pci_pm_reset(struct pci_dev 
        return 0;
  }
  
- /**
-  * pci_reset_bridge_secondary_bus - Reset the secondary bus on a PCI bridge.
-  * @dev: Bridge device
-  *
-  * Use the bridge control register to assert reset on the secondary bus.
-  * Devices on the secondary bus are left in power-on state.
-  */
- void pci_reset_bridge_secondary_bus(struct pci_dev *dev)
+ void __weak pcibios_reset_secondary_bus(struct pci_dev *dev)
  {
        u16 ctrl;
  
         */
        ssleep(1);
  }
+ /**
+  * pci_reset_bridge_secondary_bus - Reset the secondary bus on a PCI bridge.
+  * @dev: Bridge device
+  *
+  * Use the bridge control register to assert reset on the secondary bus.
+  * Devices on the secondary bus are left in power-on state.
+  */
+ void pci_reset_bridge_secondary_bus(struct pci_dev *dev)
+ {
+       pcibios_reset_secondary_bus(dev);
+ }
  EXPORT_SYMBOL_GPL(pci_reset_bridge_secondary_bus);
  
  static int pci_parent_bus_reset(struct pci_dev *dev, int probe)
@@@ -3317,27 -3310,8 +3322,27 @@@ static void pci_dev_unlock(struct pci_d
        pci_cfg_access_unlock(dev);
  }
  
 +/**
 + * pci_reset_notify - notify device driver of reset
 + * @dev: device to be notified of reset
 + * @prepare: 'true' if device is about to be reset; 'false' if reset attempt
 + *           completed
 + *
 + * Must be called prior to device access being disabled and after device
 + * access is restored.
 + */
 +static void pci_reset_notify(struct pci_dev *dev, bool prepare)
 +{
 +      const struct pci_error_handlers *err_handler =
 +                      dev->driver ? dev->driver->err_handler : NULL;
 +      if (err_handler && err_handler->reset_notify)
 +              err_handler->reset_notify(dev, prepare);
 +}
 +
  static void pci_dev_save_and_disable(struct pci_dev *dev)
  {
 +      pci_reset_notify(dev, true);
 +
        /*
         * Wake-up device prior to save.  PM registers default to D0 after
         * reset and a simple register restore doesn't reliably return
  static void pci_dev_restore(struct pci_dev *dev)
  {
        pci_restore_state(dev);
 +      pci_reset_notify(dev, false);
  }
  
  static int pci_dev_reset(struct pci_dev *dev, int probe)
  
        return rc;
  }
 +
  /**
   * __pci_reset_function - reset a PCI device function
   * @dev: PCI device to reset
@@@ -4158,7 -4130,7 +4163,7 @@@ int pci_set_vga_state(struct pci_dev *d
        u16 cmd;
        int rc;
  
 -      WARN_ON((flags & PCI_VGA_STATE_CHANGE_DECODES) & (command_bits & ~(PCI_COMMAND_IO|PCI_COMMAND_MEMORY)));
 +      WARN_ON((flags & PCI_VGA_STATE_CHANGE_DECODES) && (command_bits & ~(PCI_COMMAND_IO|PCI_COMMAND_MEMORY)));
  
        /* ARCH specific VGA enables */
        rc = pci_set_vga_state_arch(dev, decode, command_bits, flags);