]> asedeno.scripts.mit.edu Git - linux.git/commitdiff
Merge tag 'powerpc-4.17-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux
authorLinus Torvalds <torvalds@linux-foundation.org>
Sat, 7 Apr 2018 19:08:19 +0000 (12:08 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Sat, 7 Apr 2018 19:08:19 +0000 (12:08 -0700)
Pull powerpc updates from Michael Ellerman:
 "Notable changes:

   - Support for 4PB user address space on 64-bit, opt-in via mmap().

   - Removal of POWER4 support, which was accidentally broken in 2016
     and no one noticed, and blocked use of some modern instructions.

   - Workarounds so that the hypervisor can enable Transactional Memory
     on Power9.

   - A series to disable the DAWR (Data Address Watchpoint Register) on
     Power9.

   - More information displayed in the meltdown/spectre_v1/v2 sysfs
     files.

   - A vpermxor (Power8 Altivec) implementation for the raid6 Q
     Syndrome.

   - A big series to make the allocation of our pacas (per cpu area),
     kernel page tables, and per-cpu stacks NUMA aware when using the
     Radix MMU on Power9.

  And as usual many fixes, reworks and cleanups.

  Thanks to: Aaro Koskinen, Alexandre Belloni, Alexey Kardashevskiy,
  Alistair Popple, Andy Shevchenko, Aneesh Kumar K.V, Anshuman Khandual,
  Balbir Singh, Benjamin Herrenschmidt, Christophe Leroy, Christophe
  Lombard, Cyril Bur, Daniel Axtens, Dave Young, Finn Thain, Frederic
  Barrat, Gustavo Romero, Horia Geantă, Jonathan Neuschäfer, Kees Cook,
  Larry Finger, Laurent Dufour, Laurent Vivier, Logan Gunthorpe,
  Madhavan Srinivasan, Mark Greer, Mark Hairgrove, Markus Elfring,
  Mathieu Malaterre, Matt Brown, Matt Evans, Mauricio Faria de Oliveira,
  Michael Neuling, Naveen N. Rao, Nicholas Piggin, Paul Mackerras,
  Philippe Bergheaud, Ram Pai, Rob Herring, Sam Bobroff, Segher
  Boessenkool, Simon Guo, Simon Horman, Stewart Smith, Sukadev
  Bhattiprolu, Suraj Jitindar Singh, Thiago Jung Bauermann, Vaibhav
  Jain, Vaidyanathan Srinivasan, Vasant Hegde, Wei Yongjun"

* tag 'powerpc-4.17-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux: (207 commits)
  powerpc/64s/idle: Fix restore of AMOR on POWER9 after deep sleep
  powerpc/64s: Fix POWER9 DD2.2 and above in cputable features
  powerpc/64s: Fix pkey support in dt_cpu_ftrs, add CPU_FTR_PKEY bit
  powerpc/64s: Fix dt_cpu_ftrs to have restore_cpu clear unwanted LPCR bits
  Revert "powerpc/64s/idle: POWER9 ESL=0 stop avoid save/restore overhead"
  powerpc: iomap.c: introduce io{read|write}64_{lo_hi|hi_lo}
  powerpc: io.h: move iomap.h include so that it can use readq/writeq defs
  cxl: Fix possible deadlock when processing page faults from cxllib
  powerpc/hw_breakpoint: Only disable hw breakpoint if cpu supports it
  powerpc/mm/radix: Update command line parsing for disable_radix
  powerpc/mm/radix: Parse disable_radix commandline correctly.
  powerpc/mm/hugetlb: initialize the pagetable cache correctly for hugetlb
  powerpc/mm/radix: Update pte fragment count from 16 to 256 on radix
  powerpc/mm/keys: Update documentation and remove unnecessary check
  powerpc/64s/idle: POWER9 ESL=0 stop avoid save/restore overhead
  powerpc/64s/idle: Consolidate power9_offline_stop()/power9_idle_stop()
  powerpc/powernv: Always stop secondaries before reboot/shutdown
  powerpc: hard disable irqs in smp_send_stop loop
  powerpc: use NMI IPI for smp_send_stop
  powerpc/powernv: Fix SMT4 forcing idle code
  ...

275 files changed:
arch/powerpc/Makefile
arch/powerpc/boot/dts/acadia.dts
arch/powerpc/boot/dts/adder875-redboot.dts
arch/powerpc/boot/dts/adder875-uboot.dts
arch/powerpc/boot/dts/akebono.dts
arch/powerpc/boot/dts/amigaone.dts
arch/powerpc/boot/dts/asp834x-redboot.dts
arch/powerpc/boot/dts/bamboo.dts
arch/powerpc/boot/dts/c2k.dts
arch/powerpc/boot/dts/currituck.dts
arch/powerpc/boot/dts/digsy_mtc.dts
arch/powerpc/boot/dts/ebony.dts
arch/powerpc/boot/dts/eiger.dts
arch/powerpc/boot/dts/ep405.dts
arch/powerpc/boot/dts/fsl/mvme7100.dts
arch/powerpc/boot/dts/fsp2.dts
arch/powerpc/boot/dts/holly.dts
arch/powerpc/boot/dts/hotfoot.dts
arch/powerpc/boot/dts/icon.dts
arch/powerpc/boot/dts/iss4xx-mpic.dts
arch/powerpc/boot/dts/iss4xx.dts
arch/powerpc/boot/dts/katmai.dts
arch/powerpc/boot/dts/klondike.dts
arch/powerpc/boot/dts/ksi8560.dts
arch/powerpc/boot/dts/media5200.dts
arch/powerpc/boot/dts/mpc8272ads.dts
arch/powerpc/boot/dts/mpc866ads.dts
arch/powerpc/boot/dts/mpc885ads.dts
arch/powerpc/boot/dts/mvme5100.dts
arch/powerpc/boot/dts/obs600.dts
arch/powerpc/boot/dts/pq2fads.dts
arch/powerpc/boot/dts/rainier.dts
arch/powerpc/boot/dts/redwood.dts
arch/powerpc/boot/dts/sam440ep.dts
arch/powerpc/boot/dts/sequoia.dts
arch/powerpc/boot/dts/storcenter.dts
arch/powerpc/boot/dts/taishan.dts
arch/powerpc/boot/dts/virtex440-ml507.dts
arch/powerpc/boot/dts/virtex440-ml510.dts
arch/powerpc/boot/dts/walnut.dts
arch/powerpc/boot/dts/warp.dts
arch/powerpc/boot/dts/wii.dts
arch/powerpc/boot/dts/xpedite5200_xmon.dts
arch/powerpc/boot/dts/yosemite.dts
arch/powerpc/boot/libfdt_env.h
arch/powerpc/include/asm/asm-prototypes.h
arch/powerpc/include/asm/barrier.h
arch/powerpc/include/asm/book3s/64/hash-4k.h
arch/powerpc/include/asm/book3s/64/hash-64k.h
arch/powerpc/include/asm/book3s/64/hash.h
arch/powerpc/include/asm/book3s/64/mmu.h
arch/powerpc/include/asm/book3s/64/pgalloc.h
arch/powerpc/include/asm/book3s/64/pgtable.h
arch/powerpc/include/asm/book3s/64/radix-4k.h
arch/powerpc/include/asm/book3s/64/radix-64k.h
arch/powerpc/include/asm/book3s/64/radix.h
arch/powerpc/include/asm/book3s/64/slice.h [new file with mode: 0644]
arch/powerpc/include/asm/cacheflush.h
arch/powerpc/include/asm/cputable.h
arch/powerpc/include/asm/debug.h
arch/powerpc/include/asm/eeh.h
arch/powerpc/include/asm/eeh_event.h
arch/powerpc/include/asm/epapr_hcalls.h
arch/powerpc/include/asm/hugetlb.h
arch/powerpc/include/asm/hvcall.h
arch/powerpc/include/asm/hw_breakpoint.h
arch/powerpc/include/asm/io.h
arch/powerpc/include/asm/irq.h
arch/powerpc/include/asm/irq_work.h
arch/powerpc/include/asm/kvm_asm.h
arch/powerpc/include/asm/kvm_book3s.h
arch/powerpc/include/asm/kvm_book3s_64.h
arch/powerpc/include/asm/kvm_book3s_asm.h
arch/powerpc/include/asm/kvm_host.h
arch/powerpc/include/asm/kvm_ppc.h
arch/powerpc/include/asm/lppaca.h
arch/powerpc/include/asm/mmu-8xx.h
arch/powerpc/include/asm/mmu.h
arch/powerpc/include/asm/mmu_context.h
arch/powerpc/include/asm/nohash/32/slice.h [new file with mode: 0644]
arch/powerpc/include/asm/nohash/64/slice.h [new file with mode: 0644]
arch/powerpc/include/asm/opal-api.h
arch/powerpc/include/asm/opal.h
arch/powerpc/include/asm/paca.h
arch/powerpc/include/asm/page.h
arch/powerpc/include/asm/page_64.h
arch/powerpc/include/asm/perf_event_server.h
arch/powerpc/include/asm/plpar_wrappers.h
arch/powerpc/include/asm/pmc.h
arch/powerpc/include/asm/pnv-pci.h
arch/powerpc/include/asm/powernv.h
arch/powerpc/include/asm/ppc-opcode.h
arch/powerpc/include/asm/ppc_asm.h
arch/powerpc/include/asm/processor.h
arch/powerpc/include/asm/reg.h
arch/powerpc/include/asm/security_features.h [new file with mode: 0644]
arch/powerpc/include/asm/setup.h
arch/powerpc/include/asm/slice.h [new file with mode: 0644]
arch/powerpc/include/asm/smp.h
arch/powerpc/include/asm/sparsemem.h
arch/powerpc/include/asm/spinlock.h
arch/powerpc/include/asm/switch_to.h
arch/powerpc/include/asm/synch.h
arch/powerpc/include/asm/thread_info.h
arch/powerpc/include/asm/time.h
arch/powerpc/include/asm/uaccess.h
arch/powerpc/kernel/Makefile
arch/powerpc/kernel/asm-offsets.c
arch/powerpc/kernel/cpu_setup_6xx.S
arch/powerpc/kernel/cpu_setup_fsl_booke.S
arch/powerpc/kernel/cputable.c
arch/powerpc/kernel/crash.c
arch/powerpc/kernel/dt_cpu_ftrs.c
arch/powerpc/kernel/eeh.c
arch/powerpc/kernel/eeh_cache.c
arch/powerpc/kernel/eeh_driver.c
arch/powerpc/kernel/eeh_event.c
arch/powerpc/kernel/entry_64.S
arch/powerpc/kernel/exceptions-64s.S
arch/powerpc/kernel/head_64.S
arch/powerpc/kernel/hw_breakpoint.c
arch/powerpc/kernel/idle_book3s.S
arch/powerpc/kernel/iomap.c
arch/powerpc/kernel/kprobes.c
arch/powerpc/kernel/machine_kexec_64.c
arch/powerpc/kernel/machine_kexec_file_64.c
arch/powerpc/kernel/misc_64.S
arch/powerpc/kernel/nvram_64.c
arch/powerpc/kernel/paca.c
arch/powerpc/kernel/process.c
arch/powerpc/kernel/prom.c
arch/powerpc/kernel/prom_init.c
arch/powerpc/kernel/prom_init_check.sh
arch/powerpc/kernel/ptrace.c
arch/powerpc/kernel/security.c [new file with mode: 0644]
arch/powerpc/kernel/setup-common.c
arch/powerpc/kernel/setup.h
arch/powerpc/kernel/setup_32.c
arch/powerpc/kernel/setup_64.c
arch/powerpc/kernel/signal.h
arch/powerpc/kernel/signal_32.c
arch/powerpc/kernel/smp.c
arch/powerpc/kernel/sysfs.c
arch/powerpc/kernel/time.c
arch/powerpc/kernel/traps.c
arch/powerpc/kernel/vdso.c
arch/powerpc/kvm/Makefile
arch/powerpc/kvm/book3s_hv.c
arch/powerpc/kvm/book3s_hv_builtin.c
arch/powerpc/kvm/book3s_hv_interrupts.S
arch/powerpc/kvm/book3s_hv_rmhandlers.S
arch/powerpc/kvm/book3s_hv_tm.c [new file with mode: 0644]
arch/powerpc/kvm/book3s_hv_tm_builtin.c [new file with mode: 0644]
arch/powerpc/kvm/emulate.c
arch/powerpc/kvm/powerpc.c
arch/powerpc/lib/Makefile
arch/powerpc/lib/copypage_64.S
arch/powerpc/lib/copypage_power7.S
arch/powerpc/lib/copyuser_64.S
arch/powerpc/lib/copyuser_power7.S
arch/powerpc/lib/feature-fixups.c
arch/powerpc/lib/memcpy_64.S
arch/powerpc/lib/memcpy_power7.S
arch/powerpc/lib/sstep.c
arch/powerpc/mm/8xx_mmu.c
arch/powerpc/mm/copro_fault.c
arch/powerpc/mm/fault.c
arch/powerpc/mm/hash_native_64.c
arch/powerpc/mm/hash_utils_64.c
arch/powerpc/mm/hugetlbpage.c
arch/powerpc/mm/init_32.c
arch/powerpc/mm/init_64.c
arch/powerpc/mm/mem.c
arch/powerpc/mm/mmu_context_book3s64.c
arch/powerpc/mm/mmu_context_nohash.c
arch/powerpc/mm/mmu_decl.h
arch/powerpc/mm/numa.c
arch/powerpc/mm/pgtable-book3s64.c
arch/powerpc/mm/pgtable-hash64.c
arch/powerpc/mm/pgtable-radix.c
arch/powerpc/mm/pgtable_32.c
arch/powerpc/mm/pgtable_64.c
arch/powerpc/mm/pkeys.c
arch/powerpc/mm/slb.c
arch/powerpc/mm/slb_low.S
arch/powerpc/mm/slice.c
arch/powerpc/mm/tlb-radix.c
arch/powerpc/mm/tlb_hash64.c
arch/powerpc/oprofile/cell/spu_task_sync.c
arch/powerpc/oprofile/cell/vma_map.c
arch/powerpc/perf/Makefile
arch/powerpc/perf/core-book3s.c
arch/powerpc/perf/power4-pmu.c [deleted file]
arch/powerpc/perf/power9-events-list.h
arch/powerpc/perf/power9-pmu.c
arch/powerpc/platforms/4xx/msi.c
arch/powerpc/platforms/4xx/ocm.c
arch/powerpc/platforms/85xx/smp.c
arch/powerpc/platforms/8xx/m8xx_setup.c
arch/powerpc/platforms/Kconfig.cputype
arch/powerpc/platforms/cell/axon_msi.c
arch/powerpc/platforms/cell/smp.c
arch/powerpc/platforms/cell/spider-pci.c
arch/powerpc/platforms/cell/spufs/lscsa_alloc.c
arch/powerpc/platforms/embedded6xx/flipper-pic.c
arch/powerpc/platforms/embedded6xx/usbgecko_udbg.c
arch/powerpc/platforms/embedded6xx/wii.c
arch/powerpc/platforms/powermac/low_i2c.c
arch/powerpc/platforms/powermac/pfunc_core.c
arch/powerpc/platforms/powernv/Makefile
arch/powerpc/platforms/powernv/eeh-powernv.c
arch/powerpc/platforms/powernv/idle.c
arch/powerpc/platforms/powernv/npu-dma.c
arch/powerpc/platforms/powernv/opal-flash.c
arch/powerpc/platforms/powernv/opal-hmi.c
arch/powerpc/platforms/powernv/opal-imc.c
arch/powerpc/platforms/powernv/opal-memory-errors.c
arch/powerpc/platforms/powernv/opal-nvram.c
arch/powerpc/platforms/powernv/opal-psr.c
arch/powerpc/platforms/powernv/opal-sensor-groups.c
arch/powerpc/platforms/powernv/opal-wrappers.S
arch/powerpc/platforms/powernv/opal-xscom.c
arch/powerpc/platforms/powernv/opal.c
arch/powerpc/platforms/powernv/pci-cxl.c
arch/powerpc/platforms/powernv/pci-ioda.c
arch/powerpc/platforms/powernv/pci.c
arch/powerpc/platforms/powernv/setup.c
arch/powerpc/platforms/powernv/smp.c
arch/powerpc/platforms/powernv/subcore.c
arch/powerpc/platforms/powernv/vas-debug.c
arch/powerpc/platforms/powernv/vas-trace.h [new file with mode: 0644]
arch/powerpc/platforms/powernv/vas-window.c
arch/powerpc/platforms/powernv/vas.c
arch/powerpc/platforms/ps3/mm.c
arch/powerpc/platforms/pseries/hotplug-cpu.c
arch/powerpc/platforms/pseries/kexec.c
arch/powerpc/platforms/pseries/lpar.c
arch/powerpc/platforms/pseries/mobility.c
arch/powerpc/platforms/pseries/pseries.h
arch/powerpc/platforms/pseries/setup.c
arch/powerpc/platforms/pseries/smp.c
arch/powerpc/sysdev/mpic.c
arch/powerpc/sysdev/xics/icp-native.c
arch/powerpc/sysdev/xive/common.c
arch/powerpc/xmon/xmon.c
drivers/macintosh/adb-iop.c
drivers/macintosh/ans-lcd.c
drivers/macintosh/macio-adb.c
drivers/macintosh/rack-meter.c
drivers/macintosh/via-macii.c
drivers/macintosh/via-pmu.c
drivers/macintosh/via-pmu68k.c
drivers/misc/cxl/cxl.h
drivers/misc/cxl/cxllib.c
drivers/misc/cxl/native.c
drivers/misc/cxl/pci.c
drivers/misc/cxl/sysfs.c
drivers/pci/hotplug/pnv_php.c
include/linux/memblock.h
include/linux/raid/pq.h
lib/raid6/.gitignore
lib/raid6/Makefile
lib/raid6/algos.c
lib/raid6/altivec.uc
lib/raid6/test/Makefile
lib/raid6/vpermxor.uc [new file with mode: 0644]
mm/memblock.c
tools/testing/selftests/powerpc/benchmarks/.gitignore
tools/testing/selftests/powerpc/benchmarks/Makefile
tools/testing/selftests/powerpc/benchmarks/exec_target.c [new file with mode: 0644]
tools/testing/selftests/powerpc/benchmarks/fork.c [new file with mode: 0644]
tools/testing/selftests/powerpc/copyloops/Makefile
tools/testing/selftests/powerpc/tm/Makefile
tools/testing/selftests/powerpc/tm/tm-sigreturn.c [new file with mode: 0644]
tools/testing/selftests/powerpc/tm/tm-unavailable.c

index ccd2556bdb530db23d6a1ba9f1ffff907d62fb59..95813df908012235d97acac25c067fa6c42d3cbd 100644 (file)
@@ -141,11 +141,18 @@ AFLAGS-$(CONFIG_PPC64)    += $(call cc-option,-mabi=elfv1)
 endif
 CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mcmodel=medium,$(call cc-option,-mminimal-toc))
 CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mno-pointers-to-nested-functions)
+
 CFLAGS-$(CONFIG_PPC32) := -ffixed-r2 $(MULTIPLEWORD)
+CFLAGS-$(CONFIG_PPC32) += $(call cc-option,-mno-readonly-in-sdata)
 
 ifeq ($(CONFIG_PPC_BOOK3S_64),y)
-CFLAGS-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mtune=power7,-mtune=power4)
-CFLAGS-$(CONFIG_GENERIC_CPU) += -mcpu=power4
+ifeq ($(CONFIG_CPU_LITTLE_ENDIAN),y)
+CFLAGS-$(CONFIG_GENERIC_CPU) += -mcpu=power8
+CFLAGS-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mtune=power9,-mtune=power8)
+else
+CFLAGS-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mtune=power7,$(call cc-option,-mtune=power5))
+CFLAGS-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mcpu=power5,-mcpu=power4)
+endif
 else
 CFLAGS-$(CONFIG_GENERIC_CPU) += -mcpu=powerpc64
 endif
@@ -166,11 +173,11 @@ ifdef CONFIG_MPROFILE_KERNEL
 endif
 
 CFLAGS-$(CONFIG_CELL_CPU) += $(call cc-option,-mcpu=cell)
-CFLAGS-$(CONFIG_POWER4_CPU) += $(call cc-option,-mcpu=power4)
 CFLAGS-$(CONFIG_POWER5_CPU) += $(call cc-option,-mcpu=power5)
 CFLAGS-$(CONFIG_POWER6_CPU) += $(call cc-option,-mcpu=power6)
 CFLAGS-$(CONFIG_POWER7_CPU) += $(call cc-option,-mcpu=power7)
 CFLAGS-$(CONFIG_POWER8_CPU) += $(call cc-option,-mcpu=power8)
+CFLAGS-$(CONFIG_POWER9_CPU) += $(call cc-option,-mcpu=power9)
 
 # Altivec option not allowed with e500mc64 in GCC.
 ifeq ($(CONFIG_ALTIVEC),y)
@@ -243,6 +250,7 @@ endif
 cpu-as-$(CONFIG_4xx)           += -Wa,-m405
 cpu-as-$(CONFIG_ALTIVEC)       += $(call as-option,-Wa$(comma)-maltivec)
 cpu-as-$(CONFIG_E200)          += -Wa,-me200
+cpu-as-$(CONFIG_PPC_BOOK3S_64) += -Wa,-mpower4
 
 KBUILD_AFLAGS += $(cpu-as-y)
 KBUILD_CFLAGS += $(cpu-as-y)
index 86266159521edac282d9130c497fae59dde25101..deb52e41ab84b2b9d5ce0a35261f02d5f74ce5ef 100644 (file)
@@ -219,6 +219,6 @@ EBC0: ebc {
        };
 
        chosen {
-               linux,stdout-path = "/plb/opb/serial@ef600300";
+               stdout-path = "/plb/opb/serial@ef600300";
        };
 };
index 083984720b2f624cd4fcf5bf3c6cb5f39364c8bc..7f5ff416848297c9325e5a63ea0f2c53329b2cf7 100644 (file)
@@ -178,6 +178,6 @@ console: serial@a80 {
        };
 
        chosen {
-               linux,stdout-path = &console;
+               stdout-path = &console;
        };
 };
index e4554caf8f8d3b6a40ba27ac8a02c67a39abf724..bd9f33c57737c575e3ea9110aa449126e56be1ec 100644 (file)
@@ -177,6 +177,6 @@ console: serial@a80 {
        };
 
        chosen {
-               linux,stdout-path = &console;
+               stdout-path = &console;
        };
 };
index 746779202a122b3b504a8c751eff8ba7269f18ce..8a7a10139bc9fdc05869a6b68dbec8f3e30c7680 100644 (file)
@@ -410,6 +410,6 @@ PCIE3: pciex@28100000000 {
        };
 
        chosen {
-               linux,stdout-path = &UART0;
+               stdout-path = &UART0;
        };
 };
index 49ac36b16dd78d6b4e68ccfac530602d719ef20e..712430155b995786d22b936a9ff4aff3e0146c26 100644 (file)
@@ -168,6 +168,6 @@ disk@0 {
        };
 
        chosen {
-               linux,stdout-path = "/pci@80000000/isa@7/serial@3f8";
+               stdout-path = "/pci@80000000/isa@7/serial@3f8";
        };
 };
index 9198745f45fb745fba31d8ccfb241ae22853685b..e987b5af9326d6ee2e94c03657b253ecd3a86a07 100644 (file)
@@ -304,7 +304,7 @@ ipic: pic@700 {
 
        chosen {
                bootargs = "console=ttyS0,38400 root=/dev/mtdblock3 rootfstype=jffs2";
-               linux,stdout-path = &serial0;
+               stdout-path = &serial0;
        };
 
 };
index aa68911f6560a21449d19c283d0c0388016c5429..538e42b1120d861b28385bdf6468ba4c0a1e0b2c 100644 (file)
@@ -295,6 +295,6 @@ PCI0: pci@ec000000 {
        };
 
        chosen {
-               linux,stdout-path = "/plb/opb/serial@ef600300";
+               stdout-path = "/plb/opb/serial@ef600300";
        };
 };
index 27f169e3ade947a769e74bb34c0d8e07acd5cec4..c5beb72d18b7accca1334487890e0165d1c0b6e3 100644 (file)
@@ -361,6 +361,6 @@ partition@7800000 {
                };
        };
        chosen {
-               linux,stdout-path = &MPSC0;
+               stdout-path = &MPSC0;
        };
 };
index f2ad5815f08d7a241e89dc1c03bb6960448bd486..a04a4fcfde637937dbe21ae19e6a0ea0e55c27d2 100644 (file)
@@ -237,6 +237,6 @@ PCIE2: pciex@38100000000 {          // 2xGBIF0
        };
 
        chosen {
-               linux,stdout-path = &UART0;
+               stdout-path = &UART0;
        };
 };
index c280e75c86bfdd76de4c7ed859ca501044b9c064..c3922fc03e0b15261b9da2b6eaf11516b1a86a75 100644 (file)
@@ -78,7 +78,7 @@ eeprom@50 {
                        };
 
                        rtc@56 {
-                               compatible = "mc,rv3029c2";
+                               compatible = "microcrystal,rv3029";
                                reg = <0x56>;
                        };
 
index ec2d142291b4981028f7af4bcfa23cb1cdeef3b7..5d11e6ea74050b2bc62fee34f89c88413a516673 100644 (file)
@@ -332,6 +332,6 @@ PCIX0: pci@20ec00000 {
        };
 
        chosen {
-               linux,stdout-path = "/plb/opb/serial@40000200";
+               stdout-path = "/plb/opb/serial@40000200";
        };
 };
index 48bcf7187924a3565e4777ac968ad98f6528c44d..7a1231d9d6f04cb66f6e7f6917013819364601c6 100644 (file)
@@ -421,7 +421,7 @@ EMAC3: ethernet@ef600d00 {
 
        };
        chosen {
-               linux,stdout-path = "/plb/opb/serial@ef600200";
+               stdout-path = "/plb/opb/serial@ef600200";
        };
 
 };
index 53ef06cc213401680a4c9fe5c814b9a2cc411172..4ac9c5ab6e6b89a6d18754990141b530abf3fad9 100644 (file)
@@ -225,6 +225,6 @@ PCI0: pci@ec000000 {
        };
 
        chosen {
-               linux,stdout-path = "/plb/opb/serial@ef600300";
+               stdout-path = "/plb/opb/serial@ef600300";
        };
 };
index e2d306ad37a6a4c2feb2b113f0773b8ecd6c3190..721cb53758ae5dc0e17fb0b15068eb40f1e4cd58 100644 (file)
@@ -146,7 +146,7 @@ pci1: pcie@f1009000 {
        };
 
        chosen {
-               linux,stdout-path = &serial0;
+               stdout-path = &serial0;
        };
 };
 
index 6560283c5aecd55b48671bffd1e1567e261cc619..9311b86b1bd9ec8c87bab88b59b1ea82cba62dd8 100644 (file)
@@ -607,7 +607,7 @@ EHCI: ehci@2000000 {
        };
 
        chosen {
-               linux,stdout-path = "/plb/opb/serial@b0020000";
+               stdout-path = "/plb/opb/serial@b0020000";
                bootargs = "console=ttyS0,115200 rw log_buf_len=32768 debug";
        };
 };
index 43e6f0c8e44908db71119429495946b788595bfb..02bd304c7d3854de6c8a9c1097a9506d2569fa25 100644 (file)
@@ -191,6 +191,6 @@ RT0: router@1180 {
        };
 
        chosen {
-               linux,stdout-path = "/tsi109@c0000000/serial@7808";
+               stdout-path = "/tsi109@c0000000/serial@7808";
        };
 };
index 71d3bb4931dc05cc3e70d801bd32071692357afd..b93bf2d9dd5bea02bb259d76daf696bda2204a3a 100644 (file)
@@ -291,6 +291,6 @@ PCI0: pci@ec000000 {
        };
 
        chosen {
-               linux,stdout-path = &UART0;
+               stdout-path = &UART0;
        };
 };
index 9c94fd737f7c3bf8add1ac4f8c142170cc38e9f6..2e6e3a7b26040d0c1a84ebf4a3820ac5920d2b77 100644 (file)
@@ -442,6 +442,6 @@ xor-accel@400200000 {
        };
 
        chosen {
-               linux,stdout-path = "/plb/opb/serial@f0000200";
+               stdout-path = "/plb/opb/serial@f0000200";
        };
 };
index 23e9d9b7e40058027b8920b0fb722b7ee98913bc..f7063198b2dc6eabdfe829fc6b19a5ab422f8df0 100644 (file)
@@ -150,6 +150,6 @@ iss-block {
        };
 
        chosen {
-               linux,stdout-path = "/plb/opb/serial@40000200";
+               stdout-path = "/plb/opb/serial@40000200";
        };
 };
index 4ff6555c866d54905375e94e84b67ea322757f7d..5533aff25e41a612d705702c74c08031aa544fbc 100644 (file)
@@ -111,6 +111,6 @@ iss-block {
        };
 
        chosen {
-               linux,stdout-path = "/plb/opb/serial@40000200";
+               stdout-path = "/plb/opb/serial@40000200";
        };
 };
index f913dbe25d35a29dab9391ccbd2e96f203bf8672..02629e119b875193e6137400ebf391c9b899613c 100644 (file)
@@ -505,6 +505,6 @@ xor-accel@400200000 {
        };
 
        chosen {
-               linux,stdout-path = "/plb/opb/serial@f0000200";
+               stdout-path = "/plb/opb/serial@f0000200";
        };
 };
index 8c9429033618934fffb68187d67f2bfcf72a124a..d9613b7b945f3e0a9a0c4715766c6ff4a0c47b43 100644 (file)
@@ -222,6 +222,6 @@ EMAC1: ethernet@400a1000 {
        };
 
        chosen {
-               linux,stdout-path = "/plb/opb/serial@50001000";
+               stdout-path = "/plb/opb/serial@50001000";
        };
 };
index 5d68236e7c3ce09c6993d2568bba79737c8f42c0..fe6c17c8812a62545fd279cba29906c67090733a 100644 (file)
@@ -339,6 +339,6 @@ cpld@4,0 {
 
 
        chosen {
-               linux,stdout-path = "/soc/cpm/serial@91a00";
+               stdout-path = "/soc/cpm/serial@91a00";
        };
 };
index b5413cb85f13458f31a46019a5b868c12082f192..843f156a49c461a5a6fbcf054c7fdaf3963d16af 100644 (file)
@@ -25,7 +25,7 @@ aliases {
        };
 
        chosen {
-               linux,stdout-path = &console;
+               stdout-path = &console;
        };
 
        cpus {
index 6d2cddf64cfdb2cf998bf6d7afb601d2b3670a53..98282c18d9899da7018471f281df18ccf92b9e2e 100644 (file)
@@ -262,6 +262,6 @@ crypto@30000 {
        };
 
        chosen {
-               linux,stdout-path = "/soc/cpm/serial@11a00";
+               stdout-path = "/soc/cpm/serial@11a00";
        };
 };
index 34c1f48b1a09835b5d73abda38ea5ffad15ce603..4443fac3f576ab4251e4f0ad8baff17fa9dc2645 100644 (file)
@@ -185,6 +185,6 @@ i2c@860 {
        };
 
        chosen {
-               linux,stdout-path = "/soc/cpm/serial@a80";
+               stdout-path = "/soc/cpm/serial@a80";
        };
 };
index 4e93bd961e0f6c2c9638e79e0a7f38bbebb12cdf..5b037f51741df177cfb26ea11dd1a1e9bb093290 100644 (file)
@@ -227,6 +227,6 @@ i2c@860 {
        };
 
        chosen {
-               linux,stdout-path = "/soc/cpm/serial@a80";
+               stdout-path = "/soc/cpm/serial@a80";
        };
 };
index 1ecb341a232a50ca92d012fbd3bc6d7e6b242599..a7eb6d25903d883baa7f057e49e948511ab9f02c 100644 (file)
@@ -179,7 +179,7 @@ i8259: interrupt-controller@20 {
        };
 
        chosen {
-               linux,stdout-path = &serial0;
+               stdout-path = &serial0;
         };
 
 };
index 18e7d79ee4c31b358901a25bd1678671e7d224bc..d10b0411809b47ff6efdf5327e97ccc6780eb1ae 100644 (file)
@@ -309,6 +309,6 @@ GPIO: gpio@ef600800 {
                };
        };
         chosen {
-                linux,stdout-path = "/plb/opb/serial@ef600200";
+                stdout-path = "/plb/opb/serial@ef600200";
         };
 };
index 0c525ff0c257546d33d72407b16c374b0dbe02f9..a477615e346812b14d2ec4fe910522595ba43e4e 100644 (file)
@@ -242,6 +242,6 @@ PIC: interrupt-controller@10c00 {
        };
 
        chosen {
-               linux,stdout-path = "/soc/cpm/serial@11a00";
+               stdout-path = "/soc/cpm/serial@11a00";
        };
 };
index 9684c80e40931927cc5879dfa3935eebd12b70aa..e59829cff556e4b89940a2b918afce40a98ad2d3 100644 (file)
@@ -344,7 +344,7 @@ PCI0: pci@1ec000000 {
        };
 
        chosen {
-               linux,stdout-path = "/plb/opb/serial@ef600300";
+               stdout-path = "/plb/opb/serial@ef600300";
                bootargs = "console=ttyS0,115200";
        };
 };
index d86a3a4981182b5534f30e029c1b8824f3817792..f3e046fb49e28ea6548c872f0a2b43b90ea7d68c 100644 (file)
@@ -381,7 +381,7 @@ MSI: ppc4xx-msi@400300000 {
 
 
        chosen {
-               linux,stdout-path = "/plb/opb/serial@ef600200";
+               stdout-path = "/plb/opb/serial@ef600200";
        };
 
 };
index 088361cf4636b327fc6af7d831f7164756045813..7d15f18e1180bbb043be0199b6d91d7247976d04 100644 (file)
@@ -288,6 +288,6 @@ PCI0: pci@ec000000 {
        };
 
        chosen {
-               linux,stdout-path = "/plb/opb/serial@ef600300";
+               stdout-path = "/plb/opb/serial@ef600300";
        };
 };
index e41b88a5eaee5dfd694695492cb0cc9a8d3db35d..60d211da9593537e103ee4a99961e7b8d8edec68 100644 (file)
@@ -406,7 +406,7 @@ PCI0: pci@1ec000000 {
        };
 
        chosen {
-               linux,stdout-path = "/plb/opb/serial@ef600300";
+               stdout-path = "/plb/opb/serial@ef600300";
                bootargs = "console=ttyS0,115200";
        };
 };
index 2a555738517e716f959cdd2f598dea29253d69df..99f6f544dc5f89dcf2360f8e3678a83cae27153c 100644 (file)
@@ -137,6 +137,6 @@ pci0: pci@fe800000 {
        };
 
        chosen {
-               linux,stdout-path = &serial0;
+               stdout-path = &serial0;
        };
 };
index 1657ad0bf8a6a6d30d170706b3338edbce7f5b0e..803f1bff7fa8930e4b84d21a4d869f63f5a90bb3 100644 (file)
@@ -422,6 +422,6 @@ PCIX0: pci@20ec00000 {
        };
 
        chosen {
-               linux,stdout-path = "/plb/opb/serial@40000300";
+               stdout-path = "/plb/opb/serial@40000300";
        };
 };
index 391a4e299783d84830002c95535bd9a5c6d9c848..66f1c6312de624efb295458cd89ea45cbf224fdd 100644 (file)
@@ -32,7 +32,7 @@ DDR2_SDRAM: memory@0 {
        } ;
        chosen {
                bootargs = "console=ttyS0 root=/dev/ram";
-               linux,stdout-path = &RS232_Uart_1;
+               stdout-path = &RS232_Uart_1;
        } ;
        cpus {
                #address-cells = <1>;
index 81201d3907e2eec11e83dd0008aff2b5df00b7db..3b736ca26ddc802ccb6ff6086c0cb3b19700e610 100644 (file)
@@ -26,7 +26,7 @@ alias {
        } ;
        chosen {
                bootargs = "console=ttyS0 root=/dev/ram";
-               linux,stdout-path = "/plb@0/serial@83e00000";
+               stdout-path = "/plb@0/serial@83e00000";
        } ;
        cpus {
                #address-cells = <1>;
index 4a9f726ada139b47d4c45edff3b71b04cbd5f024..0872862c9363528e67fa6acc1d51b09971bc42ca 100644 (file)
@@ -241,6 +241,6 @@ PCI0: pci@ec000000 {
        };
 
        chosen {
-               linux,stdout-path = "/plb/opb/serial@ef600300";
+               stdout-path = "/plb/opb/serial@ef600300";
        };
 };
index ea9053ef48198f26d4eef62d7a71459f9fcd739c..b4f32740870e0c4d511ea574a6b23b2b0159d65a 100644 (file)
@@ -304,6 +304,6 @@ usb@ef601000 {
        };
 
        chosen {
-               linux,stdout-path = "/plb/opb/serial@ef600300";
+               stdout-path = "/plb/opb/serial@ef600300";
        };
 };
index 17a5babb098d426b54dc5eb01933c738b748b2c6..104b1d6d56951a49d0b6589541f0982660f1a358 100644 (file)
@@ -13,6 +13,7 @@
  */
 
 /dts-v1/;
+#include <dt-bindings/gpio/gpio.h>
 
 /*
  * This is commented-out for now.
@@ -176,6 +177,15 @@ GPIO: gpio@d8000c0 {
                        compatible = "nintendo,hollywood-gpio";
                        reg = <0x0d8000c0 0x40>;
                        gpio-controller;
+                       ngpios = <24>;
+
+                       gpio-line-names =
+                               "POWER", "SHUTDOWN", "FAN", "DC_DC",
+                               "DI_SPIN", "SLOT_LED", "EJECT_BTN", "SLOT_IN",
+                               "SENSOR_BAR", "DO_EJECT", "EEP_CS", "EEP_CLK",
+                               "EEP_MOSI", "EEP_MISO", "AVE_SCL", "AVE_SDA",
+                               "DEBUG0", "DEBUG1", "DEBUG2", "DEBUG3",
+                               "DEBUG4", "DEBUG5", "DEBUG6", "DEBUG7";
 
                        /*
                         * This is commented out while a standard binding
@@ -214,5 +224,16 @@ disk@d806000 {
                        interrupts = <2>;
                };
        };
+
+       gpio-leds {
+               compatible = "gpio-leds";
+
+               /* This is the blue LED in the disk drive slot */
+               drive-slot {
+                       label = "wii:blue:drive_slot";
+                       gpios = <&GPIO 5 GPIO_ACTIVE_HIGH>;
+                       panic-indicator;
+               };
+       };
 };
 
index 646acfbef0dd8c57b91b5483cd8bed606e69bcf2..d5e14421c39adf41ccf427759cee1cd47f740801 100644 (file)
@@ -503,6 +503,6 @@ pcie@0 {
 
        /* Needed for dtbImage boot wrapper compatibility */
        chosen {
-               linux,stdout-path = &serial0;
+               stdout-path = &serial0;
        };
 };
index 30bb4753577a92652d3524ac7632894e207a5c5a..56508785ce1386efe6720988b78581ce900e892a 100644 (file)
@@ -327,6 +327,6 @@ PCI0: pci@ec000000 {
        };
 
        chosen {
-               linux,stdout-path = "/plb/opb/serial@ef600300";
+               stdout-path = "/plb/opb/serial@ef600300";
        };
 };
index f52c31b1f48fa6a43b1bab1ed27c7f06eafadec8..2a0c8b1bf147959bd23b53d844acc19a242dcdbc 100644 (file)
@@ -7,8 +7,6 @@
 
 #include "of.h"
 
-typedef u32 uint32_t;
-typedef u64 uint64_t;
 typedef unsigned long uintptr_t;
 
 typedef __be16 fdt16_t;
index 7330150bfe34aa00bf8cc3ce73e0236beced250b..d9713ad62e3ca47fb99c786aeaf4e95c0a6377db 100644 (file)
@@ -62,6 +62,7 @@ void RunModeException(struct pt_regs *regs);
 void single_step_exception(struct pt_regs *regs);
 void program_check_exception(struct pt_regs *regs);
 void alignment_exception(struct pt_regs *regs);
+void slb_miss_bad_addr(struct pt_regs *regs);
 void StackOverflow(struct pt_regs *regs);
 void nonrecoverable_exception(struct pt_regs *regs);
 void kernel_fp_unavailable_exception(struct pt_regs *regs);
@@ -88,7 +89,18 @@ int sys_swapcontext(struct ucontext __user *old_ctx,
 long sys_swapcontext(struct ucontext __user *old_ctx,
                    struct ucontext __user *new_ctx,
                    int ctx_size, int r6, int r7, int r8, struct pt_regs *regs);
+int sys_debug_setcontext(struct ucontext __user *ctx,
+                        int ndbg, struct sig_dbg_op __user *dbg,
+                        int r6, int r7, int r8,
+                        struct pt_regs *regs);
+int
+ppc_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, struct timeval __user *tvp);
+unsigned long __init early_init(unsigned long dt_ptr);
+void __init machine_init(u64 dt_ptr);
 #endif
+
+long ppc_fadvise64_64(int fd, int advice, u32 offset_high, u32 offset_low,
+                     u32 len_high, u32 len_low);
 long sys_switch_endian(void);
 notrace unsigned int __check_irq_replay(void);
 void notrace restore_interrupts(void);
@@ -126,4 +138,7 @@ extern int __ucmpdi2(u64, u64);
 void _mcount(void);
 unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip);
 
+void pnv_power9_force_smt4_catch(void);
+void pnv_power9_force_smt4_release(void);
+
 #endif /* _ASM_POWERPC_ASM_PROTOTYPES_H */
index 10daa1d56e0a44f40ea082aea13af9104a686805..c7c63959ba91066f69b1147f59d4df4e20b39b1f 100644 (file)
@@ -35,7 +35,8 @@
 #define rmb()  __asm__ __volatile__ ("sync" : : : "memory")
 #define wmb()  __asm__ __volatile__ ("sync" : : : "memory")
 
-#ifdef __SUBARCH_HAS_LWSYNC
+/* The sub-arch has lwsync */
+#if defined(__powerpc64__) || defined(CONFIG_PPC_E500MC)
 #    define SMPWMB      LWSYNC
 #else
 #    define SMPWMB      eieio
index 67c5475311ee6e03b29486f8518dc74758263224..4b5423030d4bb7c8abcdc28bfacbdb8585547e2e 100644 (file)
 #define H_PUD_INDEX_SIZE  9
 #define H_PGD_INDEX_SIZE  9
 
+/*
+ * Each context is 512TB. But on 4k we restrict our max TASK size to 64TB
+ * Hence also limit max EA bits to 64TB.
+ */
+#define MAX_EA_BITS_PER_CONTEXT                46
+
 #ifndef __ASSEMBLY__
 #define H_PTE_TABLE_SIZE       (sizeof(pte_t) << H_PTE_INDEX_SIZE)
 #define H_PMD_TABLE_SIZE       (sizeof(pmd_t) << H_PMD_INDEX_SIZE)
 #define H_PAGE_COMBO   0x0
 #define H_PTE_FRAG_NR  0
 #define H_PTE_FRAG_SIZE_SHIFT  0
+
+/* memory key bits, only 8 keys supported */
+#define H_PTE_PKEY_BIT0        0
+#define H_PTE_PKEY_BIT1        0
+#define H_PTE_PKEY_BIT2        _RPAGE_RSV3
+#define H_PTE_PKEY_BIT3        _RPAGE_RSV4
+#define H_PTE_PKEY_BIT4        _RPAGE_RSV5
+
 /*
  * On all 4K setups, remap_4k_pfn() equates to remap_pfn_range()
  */
index 3bcf269f8f55470097ac56680685321bf13e62ba..cc82745355b3546f8c81637cec22389bc7eac422 100644 (file)
@@ -4,9 +4,15 @@
 
 #define H_PTE_INDEX_SIZE  8
 #define H_PMD_INDEX_SIZE  10
-#define H_PUD_INDEX_SIZE  7
+#define H_PUD_INDEX_SIZE  10
 #define H_PGD_INDEX_SIZE  8
 
+/*
+ * Each context is 512TB size. SLB miss for first context/default context
+ * is handled in the hotpath.
+ */
+#define MAX_EA_BITS_PER_CONTEXT                49
+
 /*
  * 64k aligned address free up few of the lower bits of RPN for us
  * We steal that here. For more deatils look at pte_pfn/pfn_pte()
 #define H_PAGE_BUSY    _RPAGE_RPN44     /* software: PTE & hash are busy */
 #define H_PAGE_HASHPTE _RPAGE_RPN43    /* PTE has associated HPTE */
 
+/* memory key bits. */
+#define H_PTE_PKEY_BIT0        _RPAGE_RSV1
+#define H_PTE_PKEY_BIT1        _RPAGE_RSV2
+#define H_PTE_PKEY_BIT2        _RPAGE_RSV3
+#define H_PTE_PKEY_BIT3        _RPAGE_RSV4
+#define H_PTE_PKEY_BIT4        _RPAGE_RSV5
+
 /*
  * We need to differentiate between explicit huge page and THP huge
  * page, since THP huge page also need to track real subpage details
 
 /* PTE flags to conserve for HPTE identification */
 #define _PAGE_HPTEFLAGS (H_PAGE_BUSY | H_PAGE_HASHPTE | H_PAGE_COMBO)
-/*
- * we support 16 fragments per PTE page of 64K size.
- */
-#define H_PTE_FRAG_NR  16
 /*
  * We use a 2K PTE page fragment and another 2K for storing
  * real_pte_t hash index
+ * 8 bytes per each pte entry and another 8 bytes for storing
+ * slot details.
  */
-#define H_PTE_FRAG_SIZE_SHIFT  12
-#define PTE_FRAG_SIZE (1UL << PTE_FRAG_SIZE_SHIFT)
+#define H_PTE_FRAG_SIZE_SHIFT  (H_PTE_INDEX_SIZE + 3 + 1)
+#define H_PTE_FRAG_NR  (PAGE_SIZE >> H_PTE_FRAG_SIZE_SHIFT)
 
 #ifndef __ASSEMBLY__
 #include <asm/errno.h>
index 935adcd92a81655ed79e9c21fb6e196233ca69b1..cc8cd656ccfe9fb80bc67e15ce3cc45c7e74be16 100644 (file)
@@ -212,7 +212,7 @@ extern int __meminit hash__vmemmap_create_mapping(unsigned long start,
 extern void hash__vmemmap_remove_mapping(unsigned long start,
                                     unsigned long page_size);
 
-int hash__create_section_mapping(unsigned long start, unsigned long end);
+int hash__create_section_mapping(unsigned long start, unsigned long end, int nid);
 int hash__remove_section_mapping(unsigned long start, unsigned long end);
 
 #endif /* !__ASSEMBLY__ */
index 37671feb2bf60f761140c8765c65cce29600cccd..5094696eecd6d4a010b08f5a4ff2bfb0d76eee02 100644 (file)
@@ -80,8 +80,29 @@ struct spinlock;
 /* Maximum possible number of NPUs in a system. */
 #define NV_MAX_NPUS 8
 
+/*
+ * One bit per slice. We have lower slices which cover 256MB segments
+ * upto 4G range. That gets us 16 low slices. For the rest we track slices
+ * in 1TB size.
+ */
+struct slice_mask {
+       u64 low_slices;
+       DECLARE_BITMAP(high_slices, SLICE_NUM_HIGH);
+};
+
 typedef struct {
-       mm_context_id_t id;
+       union {
+               /*
+                * We use id as the PIDR content for radix. On hash we can use
+                * more than one id. The extended ids are used when we start
+                * having address above 512TB. We allocate one extended id
+                * for each 512TB. The new id is then used with the 49 bit
+                * EA to build a new VA. We always use ESID_BITS_1T_MASK bits
+                * from EA and new context ids to build the new VAs.
+                */
+               mm_context_id_t id;
+               mm_context_id_t extended_id[TASK_SIZE_USER64/TASK_CONTEXT_SIZE];
+       };
        u16 user_psize;         /* page size index */
 
        /* Number of bits in the mm_cpumask */
@@ -94,9 +115,18 @@ typedef struct {
        struct npu_context *npu_context;
 
 #ifdef CONFIG_PPC_MM_SLICES
-       u64 low_slices_psize;   /* SLB page size encodings */
+        /* SLB page size encodings*/
+       unsigned char low_slices_psize[BITS_PER_LONG / BITS_PER_BYTE];
        unsigned char high_slices_psize[SLICE_ARRAY_SIZE];
        unsigned long slb_addr_limit;
+# ifdef CONFIG_PPC_64K_PAGES
+       struct slice_mask mask_64k;
+# endif
+       struct slice_mask mask_4k;
+# ifdef CONFIG_HUGETLB_PAGE
+       struct slice_mask mask_16m;
+       struct slice_mask mask_16g;
+# endif
 #else
        u16 sllp;               /* SLB page size encoding */
 #endif
@@ -177,5 +207,25 @@ extern void radix_init_pseries(void);
 static inline void radix_init_pseries(void) { };
 #endif
 
+static inline int get_ea_context(mm_context_t *ctx, unsigned long ea)
+{
+       int index = ea >> MAX_EA_BITS_PER_CONTEXT;
+
+       if (likely(index < ARRAY_SIZE(ctx->extended_id)))
+               return ctx->extended_id[index];
+
+       /* should never happen */
+       WARN_ON(1);
+       return 0;
+}
+
+static inline unsigned long get_user_vsid(mm_context_t *ctx,
+                                         unsigned long ea, int ssize)
+{
+       unsigned long context = get_ea_context(ctx, ea);
+
+       return get_vsid(context, ea, ssize);
+}
+
 #endif /* __ASSEMBLY__ */
 #endif /* _ASM_POWERPC_BOOK3S_64_MMU_H_ */
index 4746bc68d446d8e95427e67a86c3493a1d1f1668..558a159600adfbfe675a59d635a496e3ccae8742 100644 (file)
@@ -80,8 +80,18 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm)
 
        pgd = kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE),
                               pgtable_gfp_flags(mm, GFP_KERNEL));
+       /*
+        * With hugetlb, we don't clear the second half of the page table.
+        * If we share the same slab cache with the pmd or pud level table,
+        * we need to make sure we zero out the full table on alloc.
+        * With 4K we don't store slot in the second half. Hence we don't
+        * need to do this for 4k.
+        */
+#if defined(CONFIG_HUGETLB_PAGE) && defined(CONFIG_PPC_64K_PAGES) && \
+       ((H_PGD_INDEX_SIZE == H_PUD_CACHE_INDEX) ||                  \
+        (H_PGD_INDEX_SIZE == H_PMD_CACHE_INDEX))
        memset(pgd, 0, PGD_TABLE_SIZE);
-
+#endif
        return pgd;
 }
 
index a6b9f1d746002cd3479686603c76322d52676db9..47b5ffc8715d9c02e342c31459f201d66c7de399 100644 (file)
 /* Max physical address bit as per radix table */
 #define _RPAGE_PA_MAX          57
 
-#ifdef CONFIG_PPC_MEM_KEYS
-#ifdef CONFIG_PPC_64K_PAGES
-#define H_PTE_PKEY_BIT0        _RPAGE_RSV1
-#define H_PTE_PKEY_BIT1        _RPAGE_RSV2
-#else /* CONFIG_PPC_64K_PAGES */
-#define H_PTE_PKEY_BIT0        0 /* _RPAGE_RSV1 is not available */
-#define H_PTE_PKEY_BIT1        0 /* _RPAGE_RSV2 is not available */
-#endif /* CONFIG_PPC_64K_PAGES */
-#define H_PTE_PKEY_BIT2        _RPAGE_RSV3
-#define H_PTE_PKEY_BIT3        _RPAGE_RSV4
-#define H_PTE_PKEY_BIT4        _RPAGE_RSV5
-#else /*  CONFIG_PPC_MEM_KEYS */
-#define H_PTE_PKEY_BIT0        0
-#define H_PTE_PKEY_BIT1        0
-#define H_PTE_PKEY_BIT2        0
-#define H_PTE_PKEY_BIT3        0
-#define H_PTE_PKEY_BIT4        0
-#endif /*  CONFIG_PPC_MEM_KEYS */
-
 /*
  * Max physical address bit we will use for now.
  *
index a61aa9cd63ec9fefa8065ca662aae4d1db486de0..ca366ec863103ad812169aa8e373a292ae900094 100644 (file)
@@ -9,5 +9,10 @@
 #define RADIX_PMD_INDEX_SIZE  9  /* 1G huge page */
 #define RADIX_PUD_INDEX_SIZE    9
 #define RADIX_PGD_INDEX_SIZE  13
+/*
+ * One fragment per per page
+ */
+#define RADIX_PTE_FRAG_SIZE_SHIFT  (RADIX_PTE_INDEX_SIZE + 3)
+#define RADIX_PTE_FRAG_NR      (PAGE_SIZE >> RADIX_PTE_FRAG_SIZE_SHIFT)
 
 #endif /* _ASM_POWERPC_PGTABLE_RADIX_4K_H */
index c7e71ba295554949cbda2f9fb9b1c4d03551b689..830082496876d6b49d8a6c1db8808e3e79b00d9a 100644 (file)
 #define RADIX_PUD_INDEX_SIZE    9
 #define RADIX_PGD_INDEX_SIZE  13
 
+/*
+ * We use a 256 byte PTE page fragment in radix
+ * 8 bytes per each PTE entry.
+ */
+#define RADIX_PTE_FRAG_SIZE_SHIFT  (RADIX_PTE_INDEX_SIZE + 3)
+#define RADIX_PTE_FRAG_NR      (PAGE_SIZE >> RADIX_PTE_FRAG_SIZE_SHIFT)
 #endif /* _ASM_POWERPC_PGTABLE_RADIX_64K_H */
index 365010f665708495243cce6a607f5286c24716f9..705193e7192fb8a531974125d1f19f36d337037d 100644 (file)
@@ -313,7 +313,7 @@ static inline unsigned long radix__get_tree_size(void)
 }
 
 #ifdef CONFIG_MEMORY_HOTPLUG
-int radix__create_section_mapping(unsigned long start, unsigned long end);
+int radix__create_section_mapping(unsigned long start, unsigned long end, int nid);
 int radix__remove_section_mapping(unsigned long start, unsigned long end);
 #endif /* CONFIG_MEMORY_HOTPLUG */
 #endif /* __ASSEMBLY__ */
diff --git a/arch/powerpc/include/asm/book3s/64/slice.h b/arch/powerpc/include/asm/book3s/64/slice.h
new file mode 100644 (file)
index 0000000..db0deda
--- /dev/null
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_BOOK3S_64_SLICE_H
+#define _ASM_POWERPC_BOOK3S_64_SLICE_H
+
+#ifdef CONFIG_PPC_MM_SLICES
+
+#define SLICE_LOW_SHIFT                28
+#define SLICE_LOW_TOP          (0x100000000ul)
+#define SLICE_NUM_LOW          (SLICE_LOW_TOP >> SLICE_LOW_SHIFT)
+#define GET_LOW_SLICE_INDEX(addr)      ((addr) >> SLICE_LOW_SHIFT)
+
+#define SLICE_HIGH_SHIFT       40
+#define SLICE_NUM_HIGH         (H_PGTABLE_RANGE >> SLICE_HIGH_SHIFT)
+#define GET_HIGH_SLICE_INDEX(addr)     ((addr) >> SLICE_HIGH_SHIFT)
+
+#else /* CONFIG_PPC_MM_SLICES */
+
+#define get_slice_psize(mm, addr)      ((mm)->context.user_psize)
+#define slice_set_user_psize(mm, psize)                \
+do {                                           \
+       (mm)->context.user_psize = (psize);     \
+       (mm)->context.sllp = SLB_VSID_USER | mmu_psize_defs[(psize)].sllp; \
+} while (0)
+
+#endif /* CONFIG_PPC_MM_SLICES */
+
+#endif /* _ASM_POWERPC_BOOK3S_64_SLICE_H */
index b77f0364df94059982c709403d9d5ba3affd0b9c..11843e37d9cf92622bf5b62c30c4637afcf88370 100644 (file)
@@ -99,7 +99,6 @@ static inline void invalidate_dcache_range(unsigned long start,
 #ifdef CONFIG_PPC64
 extern void flush_dcache_range(unsigned long start, unsigned long stop);
 extern void flush_inval_dcache_range(unsigned long start, unsigned long stop);
-extern void flush_dcache_phys_range(unsigned long start, unsigned long stop);
 #endif
 
 #define copy_to_user_page(vma, page, vaddr, dst, src, len) \
index 2e2bacbdf6ed486f1aad88f3a7f3c73c2fd898df..931dda8be87c01de1c640f3e030b5f57d5239bb2 100644 (file)
@@ -131,41 +131,48 @@ static inline void cpu_feature_keys_init(void) { }
 
 /* CPU kernel features */
 
-/* Retain the 32b definitions all use bottom half of word */
+/* Definitions for features that we have on both 32-bit and 64-bit chips */
 #define CPU_FTR_COHERENT_ICACHE                ASM_CONST(0x00000001)
-#define CPU_FTR_L2CR                   ASM_CONST(0x00000002)
-#define CPU_FTR_SPEC7450               ASM_CONST(0x00000004)
-#define CPU_FTR_ALTIVEC                        ASM_CONST(0x00000008)
-#define CPU_FTR_TAU                    ASM_CONST(0x00000010)
-#define CPU_FTR_CAN_DOZE               ASM_CONST(0x00000020)
-#define CPU_FTR_USE_TB                 ASM_CONST(0x00000040)
-#define CPU_FTR_L2CSR                  ASM_CONST(0x00000080)
-#define CPU_FTR_601                    ASM_CONST(0x00000100)
-#define CPU_FTR_DBELL                  ASM_CONST(0x00000200)
-#define CPU_FTR_CAN_NAP                        ASM_CONST(0x00000400)
-#define CPU_FTR_L3CR                   ASM_CONST(0x00000800)
-#define CPU_FTR_L3_DISABLE_NAP         ASM_CONST(0x00001000)
-#define CPU_FTR_NAP_DISABLE_L2_PR      ASM_CONST(0x00002000)
-#define CPU_FTR_DUAL_PLL_750FX         ASM_CONST(0x00004000)
-#define CPU_FTR_NO_DPM                 ASM_CONST(0x00008000)
-#define CPU_FTR_476_DD2                        ASM_CONST(0x00010000)
-#define CPU_FTR_NEED_COHERENT          ASM_CONST(0x00020000)
-#define CPU_FTR_NO_BTIC                        ASM_CONST(0x00040000)
-#define CPU_FTR_DEBUG_LVL_EXC          ASM_CONST(0x00080000)
-#define CPU_FTR_NODSISRALIGN           ASM_CONST(0x00100000)
-#define CPU_FTR_PPC_LE                 ASM_CONST(0x00200000)
-#define CPU_FTR_REAL_LE                        ASM_CONST(0x00400000)
-#define CPU_FTR_FPU_UNAVAILABLE                ASM_CONST(0x00800000)
-#define CPU_FTR_UNIFIED_ID_CACHE       ASM_CONST(0x01000000)
-#define CPU_FTR_SPE                    ASM_CONST(0x02000000)
-#define CPU_FTR_NEED_PAIRED_STWCX      ASM_CONST(0x04000000)
-#define CPU_FTR_LWSYNC                 ASM_CONST(0x08000000)
-#define CPU_FTR_NOEXECUTE              ASM_CONST(0x10000000)
-#define CPU_FTR_INDEXED_DCR            ASM_CONST(0x20000000)
-#define CPU_FTR_EMB_HV                 ASM_CONST(0x40000000)
+#define CPU_FTR_ALTIVEC                        ASM_CONST(0x00000002)
+#define CPU_FTR_DBELL                  ASM_CONST(0x00000004)
+#define CPU_FTR_CAN_NAP                        ASM_CONST(0x00000008)
+#define CPU_FTR_DEBUG_LVL_EXC          ASM_CONST(0x00000010)
+#define CPU_FTR_NODSISRALIGN           ASM_CONST(0x00000020)
+#define CPU_FTR_FPU_UNAVAILABLE                ASM_CONST(0x00000040)
+#define CPU_FTR_LWSYNC                 ASM_CONST(0x00000080)
+#define CPU_FTR_NOEXECUTE              ASM_CONST(0x00000100)
+#define CPU_FTR_EMB_HV                 ASM_CONST(0x00000200)
+
+/* Definitions for features that only exist on 32-bit chips */
+#ifdef CONFIG_PPC32
+#define CPU_FTR_601                    ASM_CONST(0x00001000)
+#define CPU_FTR_L2CR                   ASM_CONST(0x00002000)
+#define CPU_FTR_SPEC7450               ASM_CONST(0x00004000)
+#define CPU_FTR_TAU                    ASM_CONST(0x00008000)
+#define CPU_FTR_CAN_DOZE               ASM_CONST(0x00010000)
+#define CPU_FTR_USE_RTC                        ASM_CONST(0x00020000)
+#define CPU_FTR_L3CR                   ASM_CONST(0x00040000)
+#define CPU_FTR_L3_DISABLE_NAP         ASM_CONST(0x00080000)
+#define CPU_FTR_NAP_DISABLE_L2_PR      ASM_CONST(0x00100000)
+#define CPU_FTR_DUAL_PLL_750FX         ASM_CONST(0x00200000)
+#define CPU_FTR_NO_DPM                 ASM_CONST(0x00400000)
+#define CPU_FTR_476_DD2                        ASM_CONST(0x00800000)
+#define CPU_FTR_NEED_COHERENT          ASM_CONST(0x01000000)
+#define CPU_FTR_NO_BTIC                        ASM_CONST(0x02000000)
+#define CPU_FTR_PPC_LE                 ASM_CONST(0x04000000)
+#define CPU_FTR_UNIFIED_ID_CACHE       ASM_CONST(0x08000000)
+#define CPU_FTR_SPE                    ASM_CONST(0x10000000)
+#define CPU_FTR_NEED_PAIRED_STWCX      ASM_CONST(0x20000000)
+#define CPU_FTR_INDEXED_DCR            ASM_CONST(0x40000000)
+
+#else  /* CONFIG_PPC32 */
+/* Define these to 0 for the sake of tests in common code */
+#define CPU_FTR_601                    (0)
+#define CPU_FTR_PPC_LE                 (0)
+#endif
 
 /*
- * Add the 64-bit processor unique features in the top half of the word;
+ * Definitions for the 64-bit processor unique features;
  * on 32-bit, make the names available but defined to be 0.
  */
 #ifdef __powerpc64__
@@ -174,38 +181,40 @@ static inline void cpu_feature_keys_init(void) { }
 #define LONG_ASM_CONST(x)              0
 #endif
 
-#define CPU_FTR_HVMODE                 LONG_ASM_CONST(0x0000000100000000)
-#define CPU_FTR_ARCH_201               LONG_ASM_CONST(0x0000000200000000)
-#define CPU_FTR_ARCH_206               LONG_ASM_CONST(0x0000000400000000)
-#define CPU_FTR_ARCH_207S              LONG_ASM_CONST(0x0000000800000000)
-#define CPU_FTR_ARCH_300               LONG_ASM_CONST(0x0000001000000000)
-#define CPU_FTR_MMCRA                  LONG_ASM_CONST(0x0000002000000000)
-#define CPU_FTR_CTRL                   LONG_ASM_CONST(0x0000004000000000)
-#define CPU_FTR_SMT                    LONG_ASM_CONST(0x0000008000000000)
-#define CPU_FTR_PAUSE_ZERO             LONG_ASM_CONST(0x0000010000000000)
-#define CPU_FTR_PURR                   LONG_ASM_CONST(0x0000020000000000)
-#define CPU_FTR_CELL_TB_BUG            LONG_ASM_CONST(0x0000040000000000)
-#define CPU_FTR_SPURR                  LONG_ASM_CONST(0x0000080000000000)
-#define CPU_FTR_DSCR                   LONG_ASM_CONST(0x0000100000000000)
-#define CPU_FTR_VSX                    LONG_ASM_CONST(0x0000200000000000)
-#define CPU_FTR_SAO                    LONG_ASM_CONST(0x0000400000000000)
-#define CPU_FTR_CP_USE_DCBTZ           LONG_ASM_CONST(0x0000800000000000)
-#define CPU_FTR_UNALIGNED_LD_STD       LONG_ASM_CONST(0x0001000000000000)
-#define CPU_FTR_ASYM_SMT               LONG_ASM_CONST(0x0002000000000000)
-#define CPU_FTR_STCX_CHECKS_ADDRESS    LONG_ASM_CONST(0x0004000000000000)
-#define CPU_FTR_POPCNTB                        LONG_ASM_CONST(0x0008000000000000)
-#define CPU_FTR_POPCNTD                        LONG_ASM_CONST(0x0010000000000000)
-#define CPU_FTR_PKEY                   LONG_ASM_CONST(0x0020000000000000)
-#define CPU_FTR_VMX_COPY               LONG_ASM_CONST(0x0040000000000000)
-#define CPU_FTR_TM                     LONG_ASM_CONST(0x0080000000000000)
-#define CPU_FTR_CFAR                   LONG_ASM_CONST(0x0100000000000000)
-#define        CPU_FTR_HAS_PPR                 LONG_ASM_CONST(0x0200000000000000)
-#define CPU_FTR_DAWR                   LONG_ASM_CONST(0x0400000000000000)
-#define CPU_FTR_DABRX                  LONG_ASM_CONST(0x0800000000000000)
-#define CPU_FTR_PMAO_BUG               LONG_ASM_CONST(0x1000000000000000)
-#define CPU_FTR_P9_TLBIE_BUG           LONG_ASM_CONST(0x2000000000000000)
-#define CPU_FTR_POWER9_DD1             LONG_ASM_CONST(0x4000000000000000)
-#define CPU_FTR_POWER9_DD2_1           LONG_ASM_CONST(0x8000000000000000)
+#define CPU_FTR_REAL_LE                        LONG_ASM_CONST(0x0000000000001000)
+#define CPU_FTR_HVMODE                 LONG_ASM_CONST(0x0000000000002000)
+#define CPU_FTR_ARCH_206               LONG_ASM_CONST(0x0000000000008000)
+#define CPU_FTR_ARCH_207S              LONG_ASM_CONST(0x0000000000010000)
+#define CPU_FTR_ARCH_300               LONG_ASM_CONST(0x0000000000020000)
+#define CPU_FTR_MMCRA                  LONG_ASM_CONST(0x0000000000040000)
+#define CPU_FTR_CTRL                   LONG_ASM_CONST(0x0000000000080000)
+#define CPU_FTR_SMT                    LONG_ASM_CONST(0x0000000000100000)
+#define CPU_FTR_PAUSE_ZERO             LONG_ASM_CONST(0x0000000000200000)
+#define CPU_FTR_PURR                   LONG_ASM_CONST(0x0000000000400000)
+#define CPU_FTR_CELL_TB_BUG            LONG_ASM_CONST(0x0000000000800000)
+#define CPU_FTR_SPURR                  LONG_ASM_CONST(0x0000000001000000)
+#define CPU_FTR_DSCR                   LONG_ASM_CONST(0x0000000002000000)
+#define CPU_FTR_VSX                    LONG_ASM_CONST(0x0000000004000000)
+#define CPU_FTR_SAO                    LONG_ASM_CONST(0x0000000008000000)
+#define CPU_FTR_CP_USE_DCBTZ           LONG_ASM_CONST(0x0000000010000000)
+#define CPU_FTR_UNALIGNED_LD_STD       LONG_ASM_CONST(0x0000000020000000)
+#define CPU_FTR_ASYM_SMT               LONG_ASM_CONST(0x0000000040000000)
+#define CPU_FTR_STCX_CHECKS_ADDRESS    LONG_ASM_CONST(0x0000000080000000)
+#define CPU_FTR_POPCNTB                        LONG_ASM_CONST(0x0000000100000000)
+#define CPU_FTR_POPCNTD                        LONG_ASM_CONST(0x0000000200000000)
+#define CPU_FTR_PKEY                   LONG_ASM_CONST(0x0000000400000000)
+#define CPU_FTR_VMX_COPY               LONG_ASM_CONST(0x0000000800000000)
+#define CPU_FTR_TM                     LONG_ASM_CONST(0x0000001000000000)
+#define CPU_FTR_CFAR                   LONG_ASM_CONST(0x0000002000000000)
+#define        CPU_FTR_HAS_PPR                 LONG_ASM_CONST(0x0000004000000000)
+#define CPU_FTR_DAWR                   LONG_ASM_CONST(0x0000008000000000)
+#define CPU_FTR_DABRX                  LONG_ASM_CONST(0x0000010000000000)
+#define CPU_FTR_PMAO_BUG               LONG_ASM_CONST(0x0000020000000000)
+#define CPU_FTR_POWER9_DD1             LONG_ASM_CONST(0x0000040000000000)
+#define CPU_FTR_POWER9_DD2_1           LONG_ASM_CONST(0x0000080000000000)
+#define CPU_FTR_P9_TM_HV_ASSIST                LONG_ASM_CONST(0x0000100000000000)
+#define CPU_FTR_P9_TM_XER_SO_BUG       LONG_ASM_CONST(0x0000200000000000)
+#define CPU_FTR_P9_TLBIE_BUG           LONG_ASM_CONST(0x0000400000000000)
 
 #ifndef __ASSEMBLY__
 
@@ -286,21 +295,19 @@ static inline void cpu_feature_keys_init(void) { }
 #endif
 
 #define CPU_FTRS_PPC601        (CPU_FTR_COMMON | CPU_FTR_601 | \
-       CPU_FTR_COHERENT_ICACHE | CPU_FTR_UNIFIED_ID_CACHE)
-#define CPU_FTRS_603   (CPU_FTR_COMMON | \
-           CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | \
+       CPU_FTR_COHERENT_ICACHE | CPU_FTR_UNIFIED_ID_CACHE | CPU_FTR_USE_RTC)
+#define CPU_FTRS_603   (CPU_FTR_COMMON | CPU_FTR_MAYBE_CAN_DOZE | \
            CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_PPC_LE)
-#define CPU_FTRS_604   (CPU_FTR_COMMON | \
-           CPU_FTR_USE_TB | CPU_FTR_PPC_LE)
+#define CPU_FTRS_604   (CPU_FTR_COMMON | CPU_FTR_PPC_LE)
 #define CPU_FTRS_740_NOTAU     (CPU_FTR_COMMON | \
-           CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | CPU_FTR_L2CR | \
+           CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_L2CR | \
            CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_PPC_LE)
 #define CPU_FTRS_740   (CPU_FTR_COMMON | \
-           CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | CPU_FTR_L2CR | \
+           CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_L2CR | \
            CPU_FTR_TAU | CPU_FTR_MAYBE_CAN_NAP | \
            CPU_FTR_PPC_LE)
 #define CPU_FTRS_750   (CPU_FTR_COMMON | \
-           CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | CPU_FTR_L2CR | \
+           CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_L2CR | \
            CPU_FTR_TAU | CPU_FTR_MAYBE_CAN_NAP | \
            CPU_FTR_PPC_LE)
 #define CPU_FTRS_750CL (CPU_FTRS_750)
@@ -309,125 +316,114 @@ static inline void cpu_feature_keys_init(void) { }
 #define CPU_FTRS_750FX (CPU_FTRS_750 | CPU_FTR_DUAL_PLL_750FX)
 #define CPU_FTRS_750GX (CPU_FTRS_750FX)
 #define CPU_FTRS_7400_NOTAU    (CPU_FTR_COMMON | \
-           CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | CPU_FTR_L2CR | \
+           CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_L2CR | \
            CPU_FTR_ALTIVEC_COMP | \
            CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_PPC_LE)
 #define CPU_FTRS_7400  (CPU_FTR_COMMON | \
-           CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | CPU_FTR_L2CR | \
+           CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_L2CR | \
            CPU_FTR_TAU | CPU_FTR_ALTIVEC_COMP | \
            CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_PPC_LE)
 #define CPU_FTRS_7450_20       (CPU_FTR_COMMON | \
-           CPU_FTR_USE_TB | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \
+           CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \
            CPU_FTR_L3CR | CPU_FTR_SPEC7450 | \
            CPU_FTR_NEED_COHERENT | CPU_FTR_PPC_LE | CPU_FTR_NEED_PAIRED_STWCX)
 #define CPU_FTRS_7450_21       (CPU_FTR_COMMON | \
-           CPU_FTR_USE_TB | \
            CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \
            CPU_FTR_L3CR | CPU_FTR_SPEC7450 | \
            CPU_FTR_NAP_DISABLE_L2_PR | CPU_FTR_L3_DISABLE_NAP | \
            CPU_FTR_NEED_COHERENT | CPU_FTR_PPC_LE | CPU_FTR_NEED_PAIRED_STWCX)
 #define CPU_FTRS_7450_23       (CPU_FTR_COMMON | \
-           CPU_FTR_USE_TB | CPU_FTR_NEED_PAIRED_STWCX | \
+           CPU_FTR_NEED_PAIRED_STWCX | \
            CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \
            CPU_FTR_L3CR | CPU_FTR_SPEC7450 | \
            CPU_FTR_NAP_DISABLE_L2_PR | CPU_FTR_NEED_COHERENT | CPU_FTR_PPC_LE)
 #define CPU_FTRS_7455_1        (CPU_FTR_COMMON | \
-           CPU_FTR_USE_TB | CPU_FTR_NEED_PAIRED_STWCX | \
+           CPU_FTR_NEED_PAIRED_STWCX | \
            CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | CPU_FTR_L3CR | \
            CPU_FTR_SPEC7450 | CPU_FTR_NEED_COHERENT | CPU_FTR_PPC_LE)
 #define CPU_FTRS_7455_20       (CPU_FTR_COMMON | \
-           CPU_FTR_USE_TB | CPU_FTR_NEED_PAIRED_STWCX | \
+           CPU_FTR_NEED_PAIRED_STWCX | \
            CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \
            CPU_FTR_L3CR | CPU_FTR_SPEC7450 | \
            CPU_FTR_NAP_DISABLE_L2_PR | CPU_FTR_L3_DISABLE_NAP | \
            CPU_FTR_NEED_COHERENT | CPU_FTR_PPC_LE)
 #define CPU_FTRS_7455  (CPU_FTR_COMMON | \
-           CPU_FTR_USE_TB | \
            CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \
            CPU_FTR_L3CR | CPU_FTR_SPEC7450 | CPU_FTR_NAP_DISABLE_L2_PR | \
            CPU_FTR_NEED_COHERENT | CPU_FTR_PPC_LE | CPU_FTR_NEED_PAIRED_STWCX)
 #define CPU_FTRS_7447_10       (CPU_FTR_COMMON | \
-           CPU_FTR_USE_TB | \
            CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \
            CPU_FTR_L3CR | CPU_FTR_SPEC7450 | CPU_FTR_NAP_DISABLE_L2_PR | \
            CPU_FTR_NEED_COHERENT | CPU_FTR_NO_BTIC | CPU_FTR_PPC_LE | \
            CPU_FTR_NEED_PAIRED_STWCX)
 #define CPU_FTRS_7447  (CPU_FTR_COMMON | \
-           CPU_FTR_USE_TB | \
            CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \
            CPU_FTR_L3CR | CPU_FTR_SPEC7450 | CPU_FTR_NAP_DISABLE_L2_PR | \
            CPU_FTR_NEED_COHERENT | CPU_FTR_PPC_LE | CPU_FTR_NEED_PAIRED_STWCX)
 #define CPU_FTRS_7447A (CPU_FTR_COMMON | \
-           CPU_FTR_USE_TB | \
            CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \
            CPU_FTR_SPEC7450 | CPU_FTR_NAP_DISABLE_L2_PR | \
            CPU_FTR_NEED_COHERENT | CPU_FTR_PPC_LE | CPU_FTR_NEED_PAIRED_STWCX)
 #define CPU_FTRS_7448  (CPU_FTR_COMMON | \
-           CPU_FTR_USE_TB | \
            CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \
            CPU_FTR_SPEC7450 | CPU_FTR_NAP_DISABLE_L2_PR | \
            CPU_FTR_PPC_LE | CPU_FTR_NEED_PAIRED_STWCX)
-#define CPU_FTRS_82XX  (CPU_FTR_COMMON | \
-           CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB)
+#define CPU_FTRS_82XX  (CPU_FTR_COMMON | CPU_FTR_MAYBE_CAN_DOZE)
 #define CPU_FTRS_G2_LE (CPU_FTR_COMMON | CPU_FTR_MAYBE_CAN_DOZE | \
-           CPU_FTR_USE_TB | CPU_FTR_MAYBE_CAN_NAP)
+           CPU_FTR_MAYBE_CAN_NAP)
 #define CPU_FTRS_E300  (CPU_FTR_MAYBE_CAN_DOZE | \
-           CPU_FTR_USE_TB | CPU_FTR_MAYBE_CAN_NAP | \
+           CPU_FTR_MAYBE_CAN_NAP | \
            CPU_FTR_COMMON)
 #define CPU_FTRS_E300C2        (CPU_FTR_MAYBE_CAN_DOZE | \
-           CPU_FTR_USE_TB | CPU_FTR_MAYBE_CAN_NAP | \
+           CPU_FTR_MAYBE_CAN_NAP | \
            CPU_FTR_COMMON | CPU_FTR_FPU_UNAVAILABLE)
-#define CPU_FTRS_CLASSIC32     (CPU_FTR_COMMON | CPU_FTR_USE_TB)
-#define CPU_FTRS_8XX   (CPU_FTR_USE_TB | CPU_FTR_NOEXECUTE)
-#define CPU_FTRS_40X   (CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN | CPU_FTR_NOEXECUTE)
-#define CPU_FTRS_44X   (CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN | CPU_FTR_NOEXECUTE)
-#define CPU_FTRS_440x6 (CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN | CPU_FTR_NOEXECUTE | \
+#define CPU_FTRS_CLASSIC32     (CPU_FTR_COMMON)
+#define CPU_FTRS_8XX   (CPU_FTR_NOEXECUTE)
+#define CPU_FTRS_40X   (CPU_FTR_NODSISRALIGN | CPU_FTR_NOEXECUTE)
+#define CPU_FTRS_44X   (CPU_FTR_NODSISRALIGN | CPU_FTR_NOEXECUTE)
+#define CPU_FTRS_440x6 (CPU_FTR_NODSISRALIGN | CPU_FTR_NOEXECUTE | \
            CPU_FTR_INDEXED_DCR)
 #define CPU_FTRS_47X   (CPU_FTRS_440x6)
-#define CPU_FTRS_E200  (CPU_FTR_USE_TB | CPU_FTR_SPE_COMP | \
+#define CPU_FTRS_E200  (CPU_FTR_SPE_COMP | \
            CPU_FTR_NODSISRALIGN | CPU_FTR_COHERENT_ICACHE | \
            CPU_FTR_UNIFIED_ID_CACHE | CPU_FTR_NOEXECUTE | \
            CPU_FTR_DEBUG_LVL_EXC)
-#define CPU_FTRS_E500  (CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | \
+#define CPU_FTRS_E500  (CPU_FTR_MAYBE_CAN_DOZE | \
            CPU_FTR_SPE_COMP | CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_NODSISRALIGN | \
            CPU_FTR_NOEXECUTE)
-#define CPU_FTRS_E500_2        (CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | \
+#define CPU_FTRS_E500_2        (CPU_FTR_MAYBE_CAN_DOZE | \
            CPU_FTR_SPE_COMP | CPU_FTR_MAYBE_CAN_NAP | \
            CPU_FTR_NODSISRALIGN | CPU_FTR_NOEXECUTE)
-#define CPU_FTRS_E500MC        (CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN | \
-           CPU_FTR_L2CSR | CPU_FTR_LWSYNC | CPU_FTR_NOEXECUTE | \
+#define CPU_FTRS_E500MC        (CPU_FTR_NODSISRALIGN | \
+           CPU_FTR_LWSYNC | CPU_FTR_NOEXECUTE | \
            CPU_FTR_DBELL | CPU_FTR_DEBUG_LVL_EXC | CPU_FTR_EMB_HV)
 /*
  * e5500/e6500 erratum A-006958 is a timebase bug that can use the
  * same workaround as CPU_FTR_CELL_TB_BUG.
  */
-#define CPU_FTRS_E5500 (CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN | \
-           CPU_FTR_L2CSR | CPU_FTR_LWSYNC | CPU_FTR_NOEXECUTE | \
+#define CPU_FTRS_E5500 (CPU_FTR_NODSISRALIGN | \
+           CPU_FTR_LWSYNC | CPU_FTR_NOEXECUTE | \
            CPU_FTR_DBELL | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \
            CPU_FTR_DEBUG_LVL_EXC | CPU_FTR_EMB_HV | CPU_FTR_CELL_TB_BUG)
-#define CPU_FTRS_E6500 (CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN | \
-           CPU_FTR_L2CSR | CPU_FTR_LWSYNC | CPU_FTR_NOEXECUTE | \
+#define CPU_FTRS_E6500 (CPU_FTR_NODSISRALIGN | \
+           CPU_FTR_LWSYNC | CPU_FTR_NOEXECUTE | \
            CPU_FTR_DBELL | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \
            CPU_FTR_DEBUG_LVL_EXC | CPU_FTR_EMB_HV | CPU_FTR_ALTIVEC_COMP | \
            CPU_FTR_CELL_TB_BUG | CPU_FTR_SMT)
 #define CPU_FTRS_GENERIC_32    (CPU_FTR_COMMON | CPU_FTR_NODSISRALIGN)
 
 /* 64-bit CPUs */
-#define CPU_FTRS_POWER4        (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
+#define CPU_FTRS_PPC970        (CPU_FTR_LWSYNC | \
            CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
-           CPU_FTR_MMCRA | CPU_FTR_CP_USE_DCBTZ | \
-           CPU_FTR_STCX_CHECKS_ADDRESS)
-#define CPU_FTRS_PPC970        (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
-           CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | CPU_FTR_ARCH_201 | \
            CPU_FTR_ALTIVEC_COMP | CPU_FTR_CAN_NAP | CPU_FTR_MMCRA | \
            CPU_FTR_CP_USE_DCBTZ | CPU_FTR_STCX_CHECKS_ADDRESS | \
            CPU_FTR_HVMODE | CPU_FTR_DABRX)
-#define CPU_FTRS_POWER5        (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
+#define CPU_FTRS_POWER5        (CPU_FTR_LWSYNC | \
            CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
            CPU_FTR_MMCRA | CPU_FTR_SMT | \
            CPU_FTR_COHERENT_ICACHE | CPU_FTR_PURR | \
            CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_DABRX)
-#define CPU_FTRS_POWER6 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
+#define CPU_FTRS_POWER6 (CPU_FTR_LWSYNC | \
            CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
            CPU_FTR_MMCRA | CPU_FTR_SMT | \
            CPU_FTR_COHERENT_ICACHE | \
@@ -435,7 +431,7 @@ static inline void cpu_feature_keys_init(void) { }
            CPU_FTR_DSCR | CPU_FTR_UNALIGNED_LD_STD | \
            CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_CFAR | \
            CPU_FTR_DABRX)
-#define CPU_FTRS_POWER7 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
+#define CPU_FTRS_POWER7 (CPU_FTR_LWSYNC | \
            CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | CPU_FTR_ARCH_206 |\
            CPU_FTR_MMCRA | CPU_FTR_SMT | \
            CPU_FTR_COHERENT_ICACHE | \
@@ -444,7 +440,7 @@ static inline void cpu_feature_keys_init(void) { }
            CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \
            CPU_FTR_CFAR | CPU_FTR_HVMODE | \
            CPU_FTR_VMX_COPY | CPU_FTR_HAS_PPR | CPU_FTR_DABRX | CPU_FTR_PKEY)
-#define CPU_FTRS_POWER8 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
+#define CPU_FTRS_POWER8 (CPU_FTR_LWSYNC | \
            CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | CPU_FTR_ARCH_206 |\
            CPU_FTR_MMCRA | CPU_FTR_SMT | \
            CPU_FTR_COHERENT_ICACHE | \
@@ -456,7 +452,7 @@ static inline void cpu_feature_keys_init(void) { }
            CPU_FTR_ARCH_207S | CPU_FTR_TM_COMP | CPU_FTR_PKEY)
 #define CPU_FTRS_POWER8E (CPU_FTRS_POWER8 | CPU_FTR_PMAO_BUG)
 #define CPU_FTRS_POWER8_DD1 (CPU_FTRS_POWER8 & ~CPU_FTR_DBELL)
-#define CPU_FTRS_POWER9 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
+#define CPU_FTRS_POWER9 (CPU_FTR_LWSYNC | \
            CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | CPU_FTR_ARCH_206 |\
            CPU_FTR_MMCRA | CPU_FTR_SMT | \
            CPU_FTR_COHERENT_ICACHE | \
@@ -464,33 +460,45 @@ static inline void cpu_feature_keys_init(void) { }
            CPU_FTR_DSCR | CPU_FTR_SAO  | \
            CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \
            CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY | \
-           CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_DAWR | \
-           CPU_FTR_ARCH_207S | CPU_FTR_TM_COMP | CPU_FTR_ARCH_300 | \
-           CPU_FTR_PKEY | CPU_FTR_P9_TLBIE_BUG)
+           CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_ARCH_207S | \
+           CPU_FTR_TM_COMP | CPU_FTR_ARCH_300 | CPU_FTR_PKEY | \
+           CPU_FTR_P9_TLBIE_BUG)
 #define CPU_FTRS_POWER9_DD1 ((CPU_FTRS_POWER9 | CPU_FTR_POWER9_DD1) & \
                             (~CPU_FTR_SAO))
 #define CPU_FTRS_POWER9_DD2_0 CPU_FTRS_POWER9
 #define CPU_FTRS_POWER9_DD2_1 (CPU_FTRS_POWER9 | CPU_FTR_POWER9_DD2_1)
-#define CPU_FTRS_CELL  (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
+#define CPU_FTRS_POWER9_DD2_2 (CPU_FTRS_POWER9 | CPU_FTR_POWER9_DD2_1 | \
+                              CPU_FTR_P9_TM_HV_ASSIST | \
+                              CPU_FTR_P9_TM_XER_SO_BUG)
+#define CPU_FTRS_CELL  (CPU_FTR_LWSYNC | \
            CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
            CPU_FTR_ALTIVEC_COMP | CPU_FTR_MMCRA | CPU_FTR_SMT | \
            CPU_FTR_PAUSE_ZERO  | CPU_FTR_CELL_TB_BUG | CPU_FTR_CP_USE_DCBTZ | \
            CPU_FTR_UNALIGNED_LD_STD | CPU_FTR_DABRX)
-#define CPU_FTRS_PA6T (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
+#define CPU_FTRS_PA6T (CPU_FTR_LWSYNC | \
            CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_ALTIVEC_COMP | \
            CPU_FTR_PURR | CPU_FTR_REAL_LE | CPU_FTR_DABRX)
-#define CPU_FTRS_COMPATIBLE    (CPU_FTR_USE_TB | CPU_FTR_PPCAS_ARCH_V2)
+#define CPU_FTRS_COMPATIBLE    (CPU_FTR_PPCAS_ARCH_V2)
 
 #ifdef __powerpc64__
 #ifdef CONFIG_PPC_BOOK3E
 #define CPU_FTRS_POSSIBLE      (CPU_FTRS_E6500 | CPU_FTRS_E5500)
 #else
+#ifdef CONFIG_CPU_LITTLE_ENDIAN
 #define CPU_FTRS_POSSIBLE      \
-           (CPU_FTRS_POWER4 | CPU_FTRS_PPC970 | CPU_FTRS_POWER5 | \
+           (CPU_FTRS_POWER7 | CPU_FTRS_POWER8E | CPU_FTRS_POWER8 | \
+            CPU_FTRS_POWER8_DD1 | CPU_FTR_ALTIVEC_COMP | CPU_FTR_VSX_COMP | \
+            CPU_FTRS_POWER9 | CPU_FTRS_POWER9_DD1 | CPU_FTRS_POWER9_DD2_1 | \
+            CPU_FTRS_POWER9_DD2_2)
+#else
+#define CPU_FTRS_POSSIBLE      \
+           (CPU_FTRS_PPC970 | CPU_FTRS_POWER5 | \
             CPU_FTRS_POWER6 | CPU_FTRS_POWER7 | CPU_FTRS_POWER8E | \
             CPU_FTRS_POWER8 | CPU_FTRS_POWER8_DD1 | CPU_FTRS_CELL | \
-            CPU_FTRS_PA6T | CPU_FTR_VSX | CPU_FTRS_POWER9 | \
-            CPU_FTRS_POWER9_DD1 | CPU_FTRS_POWER9_DD2_1)
+            CPU_FTRS_PA6T | CPU_FTR_VSX_COMP | CPU_FTR_ALTIVEC_COMP | \
+            CPU_FTRS_POWER9 | CPU_FTRS_POWER9_DD1 | CPU_FTRS_POWER9_DD2_1 | \
+            CPU_FTRS_POWER9_DD2_2)
+#endif /* CONFIG_CPU_LITTLE_ENDIAN */
 #endif
 #else
 enum {
@@ -537,12 +545,19 @@ enum {
 #ifdef CONFIG_PPC_BOOK3E
 #define CPU_FTRS_ALWAYS                (CPU_FTRS_E6500 & CPU_FTRS_E5500)
 #else
+#ifdef CONFIG_CPU_LITTLE_ENDIAN
+#define CPU_FTRS_ALWAYS \
+           (CPU_FTRS_POSSIBLE & ~CPU_FTR_HVMODE & CPU_FTRS_POWER7 & \
+            CPU_FTRS_POWER8E & CPU_FTRS_POWER8 & CPU_FTRS_POWER8_DD1 & \
+            CPU_FTRS_POWER9 & CPU_FTRS_POWER9_DD1 & CPU_FTRS_POWER9_DD2_1)
+#else
 #define CPU_FTRS_ALWAYS                \
-           (CPU_FTRS_POWER4 & CPU_FTRS_PPC970 & CPU_FTRS_POWER5 & \
+           (CPU_FTRS_PPC970 & CPU_FTRS_POWER5 & \
             CPU_FTRS_POWER6 & CPU_FTRS_POWER7 & CPU_FTRS_CELL & \
             CPU_FTRS_PA6T & CPU_FTRS_POWER8 & CPU_FTRS_POWER8E & \
             CPU_FTRS_POWER8_DD1 & ~CPU_FTR_HVMODE & CPU_FTRS_POSSIBLE & \
-            CPU_FTRS_POWER9)
+            CPU_FTRS_POWER9 & CPU_FTRS_POWER9_DD1 & CPU_FTRS_POWER9_DD2_1)
+#endif /* CONFIG_CPU_LITTLE_ENDIAN */
 #endif
 #else
 enum {
index fc97404de0a32a243d20dcb7e8ccea858864a33f..ce5da214ffe5b9571b1e10ec6fb757dba2845170 100644 (file)
@@ -47,6 +47,7 @@ static inline int debugger_fault_handler(struct pt_regs *regs) { return 0; }
 
 void set_breakpoint(struct arch_hw_breakpoint *brk);
 void __set_breakpoint(struct arch_hw_breakpoint *brk);
+bool ppc_breakpoint_available(void);
 #ifdef CONFIG_PPC_ADV_DEBUG_REGS
 extern void do_send_trap(struct pt_regs *regs, unsigned long address,
                         unsigned long error_code, int brkpt);
index fd37cc101f4f2f6fcba5fe1b5aecd7885d1da074..c2266ca61853c4df4c0d356abafc2a135d13fe45 100644 (file)
@@ -256,6 +256,12 @@ static inline void eeh_serialize_unlock(unsigned long flags)
        raw_spin_unlock_irqrestore(&confirm_error_lock, flags);
 }
 
+static inline bool eeh_state_active(int state)
+{
+       return (state & (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE))
+       == (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE);
+}
+
 typedef void *(*eeh_traverse_func)(void *data, void *flag);
 void eeh_set_pe_aux_size(int size);
 int eeh_phb_pe_create(struct pci_controller *phb);
index 1e551a2d6f8257f3fc78a73152ef65a3ebe24e52..9884e872686f382f2256836afc79bf474d01d99e 100644 (file)
@@ -34,7 +34,8 @@ struct eeh_event {
 int eeh_event_init(void);
 int eeh_send_failure_event(struct eeh_pe *pe);
 void eeh_remove_event(struct eeh_pe *pe, bool force);
-void eeh_handle_event(struct eeh_pe *pe);
+void eeh_handle_normal_event(struct eeh_pe *pe);
+void eeh_handle_special_event(void);
 
 #endif /* __KERNEL__ */
 #endif /* ASM_POWERPC_EEH_EVENT_H */
index 90863245df53b6b691fe020da386795bb4d115f2..d3a7e36f1402033adb59756185c01a4aaf1e6f25 100644 (file)
@@ -466,17 +466,17 @@ static inline unsigned long epapr_hypercall(unsigned long *in,
                            unsigned long *out,
                            unsigned long nr)
 {
-       unsigned long register r0 asm("r0");
-       unsigned long register r3 asm("r3") = in[0];
-       unsigned long register r4 asm("r4") = in[1];
-       unsigned long register r5 asm("r5") = in[2];
-       unsigned long register r6 asm("r6") = in[3];
-       unsigned long register r7 asm("r7") = in[4];
-       unsigned long register r8 asm("r8") = in[5];
-       unsigned long register r9 asm("r9") = in[6];
-       unsigned long register r10 asm("r10") = in[7];
-       unsigned long register r11 asm("r11") = nr;
-       unsigned long register r12 asm("r12");
+       register unsigned long r0 asm("r0");
+       register unsigned long r3 asm("r3") = in[0];
+       register unsigned long r4 asm("r4") = in[1];
+       register unsigned long r5 asm("r5") = in[2];
+       register unsigned long r6 asm("r6") = in[3];
+       register unsigned long r7 asm("r7") = in[4];
+       register unsigned long r8 asm("r8") = in[5];
+       register unsigned long r9 asm("r9") = in[6];
+       register unsigned long r10 asm("r10") = in[7];
+       register unsigned long r11 asm("r11") = nr;
+       register unsigned long r12 asm("r12");
 
        asm volatile("bl        epapr_hypercall_start"
                     : "=r"(r0), "=r"(r3), "=r"(r4), "=r"(r5), "=r"(r6),
index 6f6751d3eba9bbff3b758cf37c67246273c7f4cb..78540c074d70ba69394461af1439d6096283844a 100644 (file)
@@ -89,17 +89,17 @@ pte_t *huge_pte_offset_and_shift(struct mm_struct *mm,
 
 void flush_dcache_icache_hugepage(struct page *page);
 
-#if defined(CONFIG_PPC_MM_SLICES)
-int is_hugepage_only_range(struct mm_struct *mm, unsigned long addr,
+int slice_is_hugepage_only_range(struct mm_struct *mm, unsigned long addr,
                           unsigned long len);
-#else
+
 static inline int is_hugepage_only_range(struct mm_struct *mm,
                                         unsigned long addr,
                                         unsigned long len)
 {
+       if (IS_ENABLED(CONFIG_PPC_MM_SLICES) && !radix_enabled())
+               return slice_is_hugepage_only_range(mm, addr, len);
        return 0;
 }
-#endif
 
 void book3e_hugetlb_preload(struct vm_area_struct *vma, unsigned long ea,
                            pte_t pte);
index eca3f9c689070632492b18e9cc9d55d075024dfe..2e2dddab5d65be44ff5b3b081a26b6a672df79e5 100644 (file)
@@ -88,6 +88,7 @@
 #define H_P8           -61
 #define H_P9           -62
 #define H_TOO_BIG      -64
+#define H_UNSUPPORTED  -67
 #define H_OVERLAP      -68
 #define H_INTERRUPT    -69
 #define H_BAD_DATA     -70
 #define H_CPU_CHAR_L1D_FLUSH_ORI30     (1ull << 61) // IBM bit 2
 #define H_CPU_CHAR_L1D_FLUSH_TRIG2     (1ull << 60) // IBM bit 3
 #define H_CPU_CHAR_L1D_THREAD_PRIV     (1ull << 59) // IBM bit 4
+#define H_CPU_CHAR_BRANCH_HINTS_HONORED        (1ull << 58) // IBM bit 5
+#define H_CPU_CHAR_THREAD_RECONFIG_CTRL        (1ull << 57) // IBM bit 6
+#define H_CPU_CHAR_COUNT_CACHE_DISABLED        (1ull << 56) // IBM bit 7
 
 #define H_CPU_BEHAV_FAVOUR_SECURITY    (1ull << 63) // IBM bit 0
 #define H_CPU_BEHAV_L1D_FLUSH_PR       (1ull << 62) // IBM bit 1
index ac6432d9be4694010a9414c1ddf7e836cec9cfd6..8e7b09703ca45dbe2592441e5520176024baee24 100644 (file)
@@ -66,6 +66,7 @@ extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused,
                                                unsigned long val, void *data);
 int arch_install_hw_breakpoint(struct perf_event *bp);
 void arch_uninstall_hw_breakpoint(struct perf_event *bp);
+void arch_unregister_hw_breakpoint(struct perf_event *bp);
 void hw_breakpoint_pmu_read(struct perf_event *bp);
 extern void flush_ptrace_hw_breakpoint(struct task_struct *tsk);
 
@@ -79,9 +80,11 @@ static inline void hw_breakpoint_disable(void)
        brk.address = 0;
        brk.type = 0;
        brk.len = 0;
-       __set_breakpoint(&brk);
+       if (ppc_breakpoint_available())
+               __set_breakpoint(&brk);
 }
 extern void thread_change_pc(struct task_struct *tsk, struct pt_regs *regs);
+int hw_breakpoint_handler(struct die_args *args);
 
 #else  /* CONFIG_HAVE_HW_BREAKPOINT */
 static inline void hw_breakpoint_disable(void) { }
index 422f99cf992487a5f7660ff96defdb94ee581c38..af074923d598983b32c878152d7e37e5867d6662 100644 (file)
@@ -33,8 +33,6 @@ extern struct pci_dev *isa_bridge_pcidev;
 #include <asm/mmu.h>
 #include <asm/ppc_asm.h>
 
-#include <asm-generic/iomap.h>
-
 #ifdef CONFIG_PPC64
 #include <asm/paca.h>
 #endif
@@ -663,6 +661,8 @@ static inline void name at                                  \
 #define writel_relaxed(v, addr)        writel(v, addr)
 #define writeq_relaxed(v, addr)        writeq(v, addr)
 
+#include <asm-generic/iomap.h>
+
 #ifdef CONFIG_PPC32
 #define mmiowb()
 #else
index e8e3a0a04eb079b179d3a67c47641a0ad95b07da..ee39ce56b2a20e3e812c70fba56d188c109fce85 100644 (file)
@@ -66,6 +66,7 @@ extern void irq_ctx_init(void);
 extern void call_do_softirq(struct thread_info *tp);
 extern void call_do_irq(struct pt_regs *regs, struct thread_info *tp);
 extern void do_IRQ(struct pt_regs *regs);
+extern void __init init_IRQ(void);
 extern void __do_irq(struct pt_regs *regs);
 
 int irq_choose_cpu(const struct cpumask *mask);
index c6d3078bd8c3b442114326c10345b7484f5a0228..b8b0be8f1a07eec6530fe6a96f88c6dc61dce8d6 100644 (file)
@@ -6,5 +6,6 @@ static inline bool arch_irq_work_has_interrupt(void)
 {
        return true;
 }
+extern void arch_irq_work_raise(void);
 
 #endif /* _ASM_POWERPC_IRQ_WORK_H */
index 09a802bb702faf95b3e5f2875546acf085f2a392..a790d5cf6ea37da3bbb99e879cc59753757f1921 100644 (file)
 
 /* book3s_hv */
 
+#define BOOK3S_INTERRUPT_HV_SOFTPATCH  0x1500
+
 /*
  * Special trap used to indicate to host that this is a
  * passthrough interrupt that could not be handled
index 376ae803b69c60a5eb93c15c64f9c2c6597a256a..4c02a7378d067e6dd5afc12b7336f90353879abc 100644 (file)
@@ -241,6 +241,10 @@ extern void kvmppc_update_lpcr(struct kvm *kvm, unsigned long lpcr,
                        unsigned long mask);
 extern void kvmppc_set_fscr(struct kvm_vcpu *vcpu, u64 fscr);
 
+extern int kvmhv_p9_tm_emulation_early(struct kvm_vcpu *vcpu);
+extern int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu);
+extern void kvmhv_emulate_tm_rollback(struct kvm_vcpu *vcpu);
+
 extern void kvmppc_entry_trampoline(void);
 extern void kvmppc_hv_entry_trampoline(void);
 extern u32 kvmppc_alignment_dsisr(struct kvm_vcpu *vcpu, unsigned int inst);
index 998f7b7aaa9e5c1e905d5b202d9e2b091fc037b4..c424e44f4c0010e4e6f12f36fac1a792575948be 100644 (file)
@@ -472,6 +472,49 @@ static inline void set_dirty_bits_atomic(unsigned long *map, unsigned long i,
                        set_bit_le(i, map);
 }
 
+static inline u64 sanitize_msr(u64 msr)
+{
+       msr &= ~MSR_HV;
+       msr |= MSR_ME;
+       return msr;
+}
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+static inline void copy_from_checkpoint(struct kvm_vcpu *vcpu)
+{
+       vcpu->arch.cr  = vcpu->arch.cr_tm;
+       vcpu->arch.xer = vcpu->arch.xer_tm;
+       vcpu->arch.lr  = vcpu->arch.lr_tm;
+       vcpu->arch.ctr = vcpu->arch.ctr_tm;
+       vcpu->arch.amr = vcpu->arch.amr_tm;
+       vcpu->arch.ppr = vcpu->arch.ppr_tm;
+       vcpu->arch.dscr = vcpu->arch.dscr_tm;
+       vcpu->arch.tar = vcpu->arch.tar_tm;
+       memcpy(vcpu->arch.gpr, vcpu->arch.gpr_tm,
+              sizeof(vcpu->arch.gpr));
+       vcpu->arch.fp  = vcpu->arch.fp_tm;
+       vcpu->arch.vr  = vcpu->arch.vr_tm;
+       vcpu->arch.vrsave = vcpu->arch.vrsave_tm;
+}
+
+static inline void copy_to_checkpoint(struct kvm_vcpu *vcpu)
+{
+       vcpu->arch.cr_tm  = vcpu->arch.cr;
+       vcpu->arch.xer_tm = vcpu->arch.xer;
+       vcpu->arch.lr_tm  = vcpu->arch.lr;
+       vcpu->arch.ctr_tm = vcpu->arch.ctr;
+       vcpu->arch.amr_tm = vcpu->arch.amr;
+       vcpu->arch.ppr_tm = vcpu->arch.ppr;
+       vcpu->arch.dscr_tm = vcpu->arch.dscr;
+       vcpu->arch.tar_tm = vcpu->arch.tar;
+       memcpy(vcpu->arch.gpr_tm, vcpu->arch.gpr,
+              sizeof(vcpu->arch.gpr));
+       vcpu->arch.fp_tm  = vcpu->arch.fp;
+       vcpu->arch.vr_tm  = vcpu->arch.vr;
+       vcpu->arch.vrsave_tm = vcpu->arch.vrsave;
+}
+#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
+
 #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
 
 #endif /* __ASM_KVM_BOOK3S_64_H__ */
index ab386af2904fdb4bc08713c641027866a320a055..d978fdf698af2ad5e89a4243e7bf2efe3e4e15bb 100644 (file)
@@ -119,6 +119,7 @@ struct kvmppc_host_state {
        u8 host_ipi;
        u8 ptid;                /* thread number within subcore when split */
        u8 tid;                 /* thread number within whole core */
+       u8 fake_suspend;
        struct kvm_vcpu *kvm_vcpu;
        struct kvmppc_vcore *kvm_vcore;
        void __iomem *xics_phys;
index 1f53b562726fd9130880b1297284d9204990507b..deb54293398cb9510da43e4555fcabda18925f8d 100644 (file)
@@ -610,6 +610,7 @@ struct kvm_vcpu_arch {
        u64 tfhar;
        u64 texasr;
        u64 tfiar;
+       u64 orig_texasr;
 
        u32 cr_tm;
        u64 xer_tm;
index 7765a800ddaea08bd660ca1a00f21a228461a4a1..b7d066b037da9a5cf60f6f2b2080865c22c21a86 100644 (file)
@@ -436,15 +436,15 @@ struct openpic;
 extern void kvm_cma_reserve(void) __init;
 static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr)
 {
-       paca[cpu].kvm_hstate.xics_phys = (void __iomem *)addr;
+       paca_ptrs[cpu]->kvm_hstate.xics_phys = (void __iomem *)addr;
 }
 
 static inline void kvmppc_set_xive_tima(int cpu,
                                        unsigned long phys_addr,
                                        void __iomem *virt_addr)
 {
-       paca[cpu].kvm_hstate.xive_tima_phys = (void __iomem *)phys_addr;
-       paca[cpu].kvm_hstate.xive_tima_virt = virt_addr;
+       paca_ptrs[cpu]->kvm_hstate.xive_tima_phys = (void __iomem *)phys_addr;
+       paca_ptrs[cpu]->kvm_hstate.xive_tima_virt = virt_addr;
 }
 
 static inline u32 kvmppc_get_xics_latch(void)
@@ -458,7 +458,7 @@ static inline u32 kvmppc_get_xics_latch(void)
 
 static inline void kvmppc_set_host_ipi(int cpu, u8 host_ipi)
 {
-       paca[cpu].kvm_hstate.host_ipi = host_ipi;
+       paca_ptrs[cpu]->kvm_hstate.host_ipi = host_ipi;
 }
 
 static inline void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu)
index d0a2a2f9956471362e13093272955f42c0e4121b..7c23ce8a5a4c4fd405577e54a3b537fb14852ed7 100644 (file)
 #include <linux/threads.h>
 #include <asm/types.h>
 #include <asm/mmu.h>
+#include <asm/firmware.h>
 
 /*
- * We only have to have statically allocated lppaca structs on
- * legacy iSeries, which supports at most 64 cpus.
- */
-#define NR_LPPACAS     1
-
-/*
- * The Hypervisor barfs if the lppaca crosses a page boundary.  A 1k
- * alignment is sufficient to prevent this
+ * The lppaca is the "virtual processor area" registered with the hypervisor,
+ * H_REGISTER_VPA etc.
+ *
+ * According to PAPR, the structure is 640 bytes long, must be L1 cache line
+ * aligned, and must not cross a 4kB boundary. Its size field must be at
+ * least 640 bytes (but may be more).
+ *
+ * Pre-v4.14 KVM hypervisors reject the VPA if its size field is smaller than
+ * 1kB, so we dynamically allocate 1kB and advertise size as 1kB, but keep
+ * this structure as the canonical 640 byte size.
  */
 struct lppaca {
        /* cacheline 1 contains read-only data */
@@ -97,13 +100,11 @@ struct lppaca {
 
        __be32  page_ins;               /* CMO Hint - # page ins by OS */
        u8      reserved11[148];
-       volatile __be64 dtl_idx;                /* Dispatch Trace Log head index */
+       volatile __be64 dtl_idx;        /* Dispatch Trace Log head index */
        u8      reserved12[96];
-} __attribute__((__aligned__(0x400)));
-
-extern struct lppaca lppaca[];
+} ____cacheline_aligned;
 
-#define lppaca_of(cpu) (*paca[cpu].lppaca_ptr)
+#define lppaca_of(cpu) (*paca_ptrs[cpu]->lppaca_ptr)
 
 /*
  * We are using a non architected field to determine if a partition is
@@ -114,6 +115,8 @@ extern struct lppaca lppaca[];
 
 static inline bool lppaca_shared_proc(struct lppaca *l)
 {
+       if (!firmware_has_feature(FW_FEATURE_SPLPAR))
+               return false;
        return !!(l->__old_status & LPPACA_OLD_SHARED_PROC);
 }
 
index 2f806e329648dca4799e51262b95d6097c2e62cd..4f547752ae79595086c9ad55a44281ea68b52505 100644 (file)
 #define M_APG2         0x00000040
 #define M_APG3         0x00000060
 
+#ifdef CONFIG_PPC_MM_SLICES
+#include <asm/nohash/32/slice.h>
+#define SLICE_ARRAY_SIZE       (1 << (32 - SLICE_LOW_SHIFT - 1))
+#endif
+
 #ifndef __ASSEMBLY__
+struct slice_mask {
+       u64 low_slices;
+       DECLARE_BITMAP(high_slices, 0);
+};
+
 typedef struct {
        unsigned int id;
        unsigned int active;
        unsigned long vdso_base;
+#ifdef CONFIG_PPC_MM_SLICES
+       u16 user_psize;         /* page size index */
+       unsigned char low_slices_psize[SLICE_ARRAY_SIZE];
+       unsigned char high_slices_psize[0];
+       unsigned long slb_addr_limit;
+       struct slice_mask mask_base_psize; /* 4k or 16k */
+# ifdef CONFIG_HUGETLB_PAGE
+       struct slice_mask mask_512k;
+       struct slice_mask mask_8m;
+# endif
+#endif
 } mm_context_t;
 
 #define PHYS_IMMR_BASE (mfspr(SPRN_IMMR) & 0xfff80000)
index bb38312cff28ceba0ece038a4523dd22ce0910fa..61d15ce92278dcb434482639b5fb5ecbc17de353 100644 (file)
 /* MMU feature bit sets for various CPUs */
 #define MMU_FTRS_DEFAULT_HPTE_ARCH_V2  \
        MMU_FTR_HPTE_TABLE | MMU_FTR_PPCAS_ARCH_V2
-#define MMU_FTRS_POWER4                MMU_FTRS_DEFAULT_HPTE_ARCH_V2
-#define MMU_FTRS_PPC970                MMU_FTRS_POWER4 | MMU_FTR_TLBIE_CROP_VA
-#define MMU_FTRS_POWER5                MMU_FTRS_POWER4 | MMU_FTR_LOCKLESS_TLBIE
+#define MMU_FTRS_POWER         MMU_FTRS_DEFAULT_HPTE_ARCH_V2
+#define MMU_FTRS_PPC970                MMU_FTRS_POWER | MMU_FTR_TLBIE_CROP_VA
+#define MMU_FTRS_POWER5                MMU_FTRS_POWER | MMU_FTR_LOCKLESS_TLBIE
 #define MMU_FTRS_POWER6                MMU_FTRS_POWER5 | MMU_FTR_KERNEL_RO | MMU_FTR_68_BIT_VA
 #define MMU_FTRS_POWER7                MMU_FTRS_POWER6
 #define MMU_FTRS_POWER8                MMU_FTRS_POWER6
index 3a15b6db950175a5d717249aa1355439f3065da0..1835ca1505d6bb0c048bb3f14aa64db92728f09f 100644 (file)
@@ -60,12 +60,51 @@ extern int hash__alloc_context_id(void);
 extern void hash__reserve_context_id(int id);
 extern void __destroy_context(int context_id);
 static inline void mmu_context_init(void) { }
+
+static inline int alloc_extended_context(struct mm_struct *mm,
+                                        unsigned long ea)
+{
+       int context_id;
+
+       int index = ea >> MAX_EA_BITS_PER_CONTEXT;
+
+       context_id = hash__alloc_context_id();
+       if (context_id < 0)
+               return context_id;
+
+       VM_WARN_ON(mm->context.extended_id[index]);
+       mm->context.extended_id[index] = context_id;
+       return context_id;
+}
+
+static inline bool need_extra_context(struct mm_struct *mm, unsigned long ea)
+{
+       int context_id;
+
+       context_id = get_ea_context(&mm->context, ea);
+       if (!context_id)
+               return true;
+       return false;
+}
+
 #else
 extern void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next,
                               struct task_struct *tsk);
 extern unsigned long __init_new_context(void);
 extern void __destroy_context(unsigned long context_id);
 extern void mmu_context_init(void);
+static inline int alloc_extended_context(struct mm_struct *mm,
+                                        unsigned long ea)
+{
+       /* non book3s_64 should never find this called */
+       WARN_ON(1);
+       return -ENOMEM;
+}
+
+static inline bool need_extra_context(struct mm_struct *mm, unsigned long ea)
+{
+       return false;
+}
 #endif
 
 #if defined(CONFIG_KVM_BOOK3S_HV_POSSIBLE) && defined(CONFIG_PPC_RADIX_MMU)
diff --git a/arch/powerpc/include/asm/nohash/32/slice.h b/arch/powerpc/include/asm/nohash/32/slice.h
new file mode 100644 (file)
index 0000000..777d62e
--- /dev/null
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_NOHASH_32_SLICE_H
+#define _ASM_POWERPC_NOHASH_32_SLICE_H
+
+#ifdef CONFIG_PPC_MM_SLICES
+
+#define SLICE_LOW_SHIFT                26      /* 64 slices */
+#define SLICE_LOW_TOP          (0x100000000ull)
+#define SLICE_NUM_LOW          (SLICE_LOW_TOP >> SLICE_LOW_SHIFT)
+#define GET_LOW_SLICE_INDEX(addr)      ((addr) >> SLICE_LOW_SHIFT)
+
+#define SLICE_HIGH_SHIFT       0
+#define SLICE_NUM_HIGH         0ul
+#define GET_HIGH_SLICE_INDEX(addr)     (addr & 0)
+
+#endif /* CONFIG_PPC_MM_SLICES */
+
+#endif /* _ASM_POWERPC_NOHASH_32_SLICE_H */
diff --git a/arch/powerpc/include/asm/nohash/64/slice.h b/arch/powerpc/include/asm/nohash/64/slice.h
new file mode 100644 (file)
index 0000000..ad0d6e3
--- /dev/null
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_NOHASH_64_SLICE_H
+#define _ASM_POWERPC_NOHASH_64_SLICE_H
+
+#ifdef CONFIG_PPC_64K_PAGES
+#define get_slice_psize(mm, addr)      MMU_PAGE_64K
+#else /* CONFIG_PPC_64K_PAGES */
+#define get_slice_psize(mm, addr)      MMU_PAGE_4K
+#endif /* !CONFIG_PPC_64K_PAGES */
+#define slice_set_user_psize(mm, psize)        do { BUG(); } while (0)
+
+#endif /* _ASM_POWERPC_NOHASH_64_SLICE_H */
index 94bd1bf2c8732cbd6119bab581f1cffcfb7316b8..d886a5b7ff21063dd0664e25742ad79ec56d9fcc 100644 (file)
 #define OPAL_NPU_SPA_SETUP                     159
 #define OPAL_NPU_SPA_CLEAR_CACHE               160
 #define OPAL_NPU_TL_SET                                161
-#define OPAL_LAST                              161
+#define OPAL_PCI_GET_PBCQ_TUNNEL_BAR           164
+#define OPAL_PCI_SET_PBCQ_TUNNEL_BAR           165
+#define OPAL_LAST                              165
 
 /* Device tree flags */
 
index 12e70fb58700b04bc28c13d0455a139a030f25ad..7159e1a6a61aa7afdea367cc5c36ef2dddef2c9b 100644 (file)
@@ -204,6 +204,8 @@ int64_t opal_unregister_dump_region(uint32_t id);
 int64_t opal_slw_set_reg(uint64_t cpu_pir, uint64_t sprn, uint64_t val);
 int64_t opal_config_cpu_idle_state(uint64_t state, uint64_t flag);
 int64_t opal_pci_set_phb_cxl_mode(uint64_t phb_id, uint64_t mode, uint64_t pe_number);
+int64_t opal_pci_get_pbcq_tunnel_bar(uint64_t phb_id, uint64_t *addr);
+int64_t opal_pci_set_pbcq_tunnel_bar(uint64_t phb_id, uint64_t addr);
 int64_t opal_ipmi_send(uint64_t interface, struct opal_ipmi_msg *msg,
                uint64_t msg_len);
 int64_t opal_ipmi_recv(uint64_t interface, struct opal_ipmi_msg *msg,
@@ -323,7 +325,7 @@ struct rtc_time;
 extern unsigned long opal_get_boot_time(void);
 extern void opal_nvram_init(void);
 extern void opal_flash_update_init(void);
-extern void opal_flash_term_callback(void);
+extern void opal_flash_update_print_message(void);
 extern int opal_elog_init(void);
 extern void opal_platform_dump_init(void);
 extern void opal_sys_param_init(void);
index b62c31037cadefe742326c441b49ba5a0204f480..4185f1c9612501b51ffdafc2c8f125d1fa6ee1f3 100644 (file)
@@ -32,6 +32,7 @@
 #include <asm/accounting.h>
 #include <asm/hmi.h>
 #include <asm/cpuidle.h>
+#include <asm/atomic.h>
 
 register struct paca_struct *local_paca asm("r13");
 
@@ -46,7 +47,10 @@ extern unsigned int debug_smp_processor_id(void); /* from linux/smp.h */
 #define get_paca()     local_paca
 #endif
 
+#ifdef CONFIG_PPC_PSERIES
 #define get_lppaca()   (get_paca()->lppaca_ptr)
+#endif
+
 #define get_slb_shadow()       (get_paca()->slb_shadow_ptr)
 
 struct task_struct;
@@ -58,7 +62,7 @@ struct task_struct;
  * processor.
  */
 struct paca_struct {
-#ifdef CONFIG_PPC_BOOK3S
+#ifdef CONFIG_PPC_PSERIES
        /*
         * Because hw_cpu_id, unlike other paca fields, is accessed
         * routinely from other CPUs (from the IRQ code), we stick to
@@ -67,7 +71,8 @@ struct paca_struct {
         */
 
        struct lppaca *lppaca_ptr;      /* Pointer to LpPaca for PLIC */
-#endif /* CONFIG_PPC_BOOK3S */
+#endif /* CONFIG_PPC_PSERIES */
+
        /*
         * MAGIC: the spinlock functions in arch/powerpc/lib/locks.c 
         * load lock_token and paca_index with a single lwz
@@ -141,7 +146,7 @@ struct paca_struct {
 #ifdef CONFIG_PPC_BOOK3S
        mm_context_id_t mm_ctx_id;
 #ifdef CONFIG_PPC_MM_SLICES
-       u64 mm_ctx_low_slices_psize;
+       unsigned char mm_ctx_low_slices_psize[BITS_PER_LONG / BITS_PER_BYTE];
        unsigned char mm_ctx_high_slices_psize[SLICE_ARRAY_SIZE];
        unsigned long mm_ctx_slb_addr_limit;
 #else
@@ -160,10 +165,14 @@ struct paca_struct {
        u64 saved_msr;                  /* MSR saved here by enter_rtas */
        u16 trap_save;                  /* Used when bad stack is encountered */
        u8 irq_soft_mask;               /* mask for irq soft masking */
+       u8 soft_enabled;                /* irq soft-enable flag */
        u8 irq_happened;                /* irq happened while soft-disabled */
        u8 io_sync;                     /* writel() needs spin_unlock sync */
        u8 irq_work_pending;            /* IRQ_WORK interrupt while soft-disable */
        u8 nap_state_lost;              /* NV GPR values lost in power7_idle */
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+       u8 pmcregs_in_use;              /* pseries puts this in lppaca */
+#endif
        u64 sprg_vdso;                  /* Saved user-visible sprg */
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
        u64 tm_scratch;                 /* TM scratch area for reclaim */
@@ -177,6 +186,8 @@ struct paca_struct {
        u8 thread_mask;
        /* Mask to denote subcore sibling threads */
        u8 subcore_sibling_mask;
+       /* Flag to request this thread not to stop */
+       atomic_t dont_stop;
        /*
         * Pointer to an array which contains pointer
         * to the sibling threads' paca.
@@ -241,18 +252,20 @@ struct paca_struct {
        void *rfi_flush_fallback_area;
        u64 l1d_flush_size;
 #endif
-};
+} ____cacheline_aligned;
 
 extern void copy_mm_to_paca(struct mm_struct *mm);
-extern struct paca_struct *paca;
+extern struct paca_struct **paca_ptrs;
 extern void initialise_paca(struct paca_struct *new_paca, int cpu);
 extern void setup_paca(struct paca_struct *new_paca);
-extern void allocate_pacas(void);
+extern void allocate_paca_ptrs(void);
+extern void allocate_paca(int cpu);
 extern void free_unused_pacas(void);
 
 #else /* CONFIG_PPC64 */
 
-static inline void allocate_pacas(void) { };
+static inline void allocate_paca_ptrs(void) { };
+static inline void allocate_paca(int cpu) { };
 static inline void free_unused_pacas(void) { };
 
 #endif /* CONFIG_PPC64 */
index 8da5d4c1cab2b6d52a297117c6e6b27221d941c6..dec9ce5ba8afa5fe46153b006c77a98da787127c 100644 (file)
@@ -126,7 +126,15 @@ extern long long virt_phys_offset;
 
 #ifdef CONFIG_FLATMEM
 #define ARCH_PFN_OFFSET                ((unsigned long)(MEMORY_START >> PAGE_SHIFT))
-#define pfn_valid(pfn)         ((pfn) >= ARCH_PFN_OFFSET && (pfn) < max_mapnr)
+#ifndef __ASSEMBLY__
+extern unsigned long max_mapnr;
+static inline bool pfn_valid(unsigned long pfn)
+{
+       unsigned long min_pfn = ARCH_PFN_OFFSET;
+
+       return pfn >= min_pfn && pfn < max_mapnr;
+}
+#endif
 #endif
 
 #define virt_to_pfn(kaddr)     (__pa(kaddr) >> PAGE_SHIFT)
@@ -344,5 +352,6 @@ typedef struct page *pgtable_t;
 
 #include <asm-generic/memory_model.h>
 #endif /* __ASSEMBLY__ */
+#include <asm/slice.h>
 
 #endif /* _ASM_POWERPC_PAGE_H */
index 56234c6fcd61a295186eebf6bf98f11be24227b6..af04acdb873fcc41bd3e45611de2f5b72119cc81 100644 (file)
@@ -86,65 +86,6 @@ extern u64 ppc64_pft_size;
 
 #endif /* __ASSEMBLY__ */
 
-#ifdef CONFIG_PPC_MM_SLICES
-
-#define SLICE_LOW_SHIFT                28
-#define SLICE_HIGH_SHIFT       40
-
-#define SLICE_LOW_TOP          (0x100000000ul)
-#define SLICE_NUM_LOW          (SLICE_LOW_TOP >> SLICE_LOW_SHIFT)
-#define SLICE_NUM_HIGH         (H_PGTABLE_RANGE >> SLICE_HIGH_SHIFT)
-
-#define GET_LOW_SLICE_INDEX(addr)      ((addr) >> SLICE_LOW_SHIFT)
-#define GET_HIGH_SLICE_INDEX(addr)     ((addr) >> SLICE_HIGH_SHIFT)
-
-#ifndef __ASSEMBLY__
-struct mm_struct;
-
-extern unsigned long slice_get_unmapped_area(unsigned long addr,
-                                            unsigned long len,
-                                            unsigned long flags,
-                                            unsigned int psize,
-                                            int topdown);
-
-extern unsigned int get_slice_psize(struct mm_struct *mm,
-                                   unsigned long addr);
-
-extern void slice_set_user_psize(struct mm_struct *mm, unsigned int psize);
-extern void slice_set_range_psize(struct mm_struct *mm, unsigned long start,
-                                 unsigned long len, unsigned int psize);
-
-#endif /* __ASSEMBLY__ */
-#else
-#define slice_init()
-#ifdef CONFIG_PPC_BOOK3S_64
-#define get_slice_psize(mm, addr)      ((mm)->context.user_psize)
-#define slice_set_user_psize(mm, psize)                \
-do {                                           \
-       (mm)->context.user_psize = (psize);     \
-       (mm)->context.sllp = SLB_VSID_USER | mmu_psize_defs[(psize)].sllp; \
-} while (0)
-#else /* !CONFIG_PPC_BOOK3S_64 */
-#ifdef CONFIG_PPC_64K_PAGES
-#define get_slice_psize(mm, addr)      MMU_PAGE_64K
-#else /* CONFIG_PPC_64K_PAGES */
-#define get_slice_psize(mm, addr)      MMU_PAGE_4K
-#endif /* !CONFIG_PPC_64K_PAGES */
-#define slice_set_user_psize(mm, psize)        do { BUG(); } while(0)
-#endif /* CONFIG_PPC_BOOK3S_64 */
-
-#define slice_set_range_psize(mm, start, len, psize)   \
-       slice_set_user_psize((mm), (psize))
-#endif /* CONFIG_PPC_MM_SLICES */
-
-#ifdef CONFIG_HUGETLB_PAGE
-
-#ifdef CONFIG_PPC_MM_SLICES
-#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA
-#endif
-
-#endif /* !CONFIG_HUGETLB_PAGE */
-
 #define VM_DATA_DEFAULT_FLAGS \
        (is_32bit_task() ? \
         VM_DATA_DEFAULT_FLAGS32 : VM_DATA_DEFAULT_FLAGS64)
index 723bf48e7494b5e59af463fbc411ac301b364aac..67a8a9585d50f878aba7b664569b3e65b0df07ee 100644 (file)
@@ -53,6 +53,8 @@ struct power_pmu {
                               [PERF_COUNT_HW_CACHE_OP_MAX]
                               [PERF_COUNT_HW_CACHE_RESULT_MAX];
 
+       int             n_blacklist_ev;
+       int             *blacklist_ev;
        /* BHRB entries in the PMU */
        int             bhrb_nr;
 };
index 55eddf50d1498020ba7f17216c689ab89b84f3c7..96c1a46acbd0620ef2d9487b9f8908b36af2b352 100644 (file)
@@ -2,6 +2,8 @@
 #ifndef _ASM_POWERPC_PLPAR_WRAPPERS_H
 #define _ASM_POWERPC_PLPAR_WRAPPERS_H
 
+#ifdef CONFIG_PPC_PSERIES
+
 #include <linux/string.h>
 #include <linux/irqflags.h>
 
@@ -9,14 +11,6 @@
 #include <asm/paca.h>
 #include <asm/page.h>
 
-/* Get state of physical CPU from query_cpu_stopped */
-int smp_query_cpu_stopped(unsigned int pcpu);
-#define QCSS_STOPPED 0
-#define QCSS_STOPPING 1
-#define QCSS_NOT_STOPPED 2
-#define QCSS_HARDWARE_ERROR -1
-#define QCSS_HARDWARE_BUSY -2
-
 static inline long poll_pending(void)
 {
        return plpar_hcall_norets(H_POLL_PENDING);
@@ -311,17 +305,17 @@ static inline long enable_little_endian_exceptions(void)
        return plpar_set_mode(1, H_SET_MODE_RESOURCE_LE, 0, 0);
 }
 
-static inline long plapr_set_ciabr(unsigned long ciabr)
+static inline long plpar_set_ciabr(unsigned long ciabr)
 {
        return plpar_set_mode(0, H_SET_MODE_RESOURCE_SET_CIABR, ciabr, 0);
 }
 
-static inline long plapr_set_watchpoint0(unsigned long dawr0, unsigned long dawrx0)
+static inline long plpar_set_watchpoint0(unsigned long dawr0, unsigned long dawrx0)
 {
        return plpar_set_mode(0, H_SET_MODE_RESOURCE_SET_DAWR, dawr0, dawrx0);
 }
 
-static inline long plapr_signal_sys_reset(long cpu)
+static inline long plpar_signal_sys_reset(long cpu)
 {
        return plpar_hcall_norets(H_SIGNAL_SYS_RESET, cpu);
 }
@@ -340,4 +334,12 @@ static inline long plpar_get_cpu_characteristics(struct h_cpu_char_result *p)
        return rc;
 }
 
+#else /* !CONFIG_PPC_PSERIES */
+
+static inline long plpar_set_ciabr(unsigned long ciabr)
+{
+       return 0;
+}
+#endif /* CONFIG_PPC_PSERIES */
+
 #endif /* _ASM_POWERPC_PLPAR_WRAPPERS_H */
index 5a9ede4962cbee1d590d7dfb7d31e6f9fa4204d9..7ac3586c38abd5d15d7826b9b2f3ac7f2ce3cbb5 100644 (file)
@@ -31,10 +31,21 @@ void ppc_enable_pmcs(void);
 
 #ifdef CONFIG_PPC_BOOK3S_64
 #include <asm/lppaca.h>
+#include <asm/firmware.h>
 
 static inline void ppc_set_pmu_inuse(int inuse)
 {
-       get_lppaca()->pmcregs_in_use = inuse;
+#if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_KVM_BOOK3S_HV_POSSIBLE)
+       if (firmware_has_feature(FW_FEATURE_LPAR)) {
+#ifdef CONFIG_PPC_PSERIES
+               get_lppaca()->pmcregs_in_use = inuse;
+#endif
+       } else {
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+               get_paca()->pmcregs_in_use = inuse;
+#endif
+       }
+#endif
 }
 
 extern void power4_enable_pmcs(void);
index 3e5cf251ad9ad7da490ce0da39261a6f035614d6..d2d8c28db336a5c51f474fab906ab2ee7be7b118 100644 (file)
@@ -29,6 +29,12 @@ extern int pnv_pci_set_power_state(uint64_t id, uint8_t state,
 extern int pnv_pci_set_p2p(struct pci_dev *initiator, struct pci_dev *target,
                           u64 desc);
 
+extern int pnv_pci_enable_tunnel(struct pci_dev *dev, uint64_t *asnind);
+extern int pnv_pci_disable_tunnel(struct pci_dev *dev);
+extern int pnv_pci_set_tunnel_bar(struct pci_dev *dev, uint64_t addr,
+                                 int enable);
+extern int pnv_pci_get_as_notify_info(struct task_struct *task, u32 *lpid,
+                                     u32 *pid, u32 *tid);
 int pnv_phb_to_cxl_mode(struct pci_dev *dev, uint64_t mode);
 int pnv_cxl_ioda_msi_setup(struct pci_dev *dev, unsigned int hwirq,
                           unsigned int virq);
index dc5f6a5d45756234f6ce1a395345827d5e70d8ea..d1c2d2e658cf4d5b1d3c3718a377efa345067128 100644 (file)
@@ -40,6 +40,7 @@ static inline int pnv_npu2_handle_fault(struct npu_context *context,
 }
 
 static inline void pnv_tm_init(void) { }
+static inline void pnv_power9_force_smt4(void) { }
 #endif
 
 #endif /* _ASM_POWERNV_H */
index f1083bcf449c5a0f63e30ad226ee678293bf60fd..18883b8a6dace2b273fd2e0430177998286867d3 100644 (file)
 #define PPC_INST_MSGSYNC               0x7c0006ec
 #define PPC_INST_MSGSNDP               0x7c00011c
 #define PPC_INST_MSGCLRP               0x7c00015c
+#define PPC_INST_MTMSRD                        0x7c000164
 #define PPC_INST_MTTMR                 0x7c0003dc
 #define PPC_INST_NOP                   0x60000000
 #define PPC_INST_PASTE                 0x7c20070d
 #define PPC_INST_POPCNTB_MASK          0xfc0007fe
 #define PPC_INST_POPCNTD               0x7c0003f4
 #define PPC_INST_POPCNTW               0x7c0002f4
+#define PPC_INST_RFEBB                 0x4c000124
 #define PPC_INST_RFCI                  0x4c000066
 #define PPC_INST_RFDI                  0x4c00004e
+#define PPC_INST_RFID                  0x4c000024
 #define PPC_INST_RFMCI                 0x4c00004c
 #define PPC_INST_MFSPR                 0x7c0002a6
 #define PPC_INST_MFSPR_DSCR            0x7c1102a6
 #define PPC_INST_TLBSRX_DOT            0x7c0006a5
 #define PPC_INST_VPMSUMW               0x10000488
 #define PPC_INST_VPMSUMD               0x100004c8
+#define PPC_INST_VPERMXOR              0x1000002d
 #define PPC_INST_XXLOR                 0xf0000490
 #define PPC_INST_XXSWAPD               0xf0000250
 #define PPC_INST_XVCPSGNDP             0xf0000780
 #define PPC_INST_TRECHKPT              0x7c0007dd
 #define PPC_INST_TRECLAIM              0x7c00075d
 #define PPC_INST_TABORT                        0x7c00071d
+#define PPC_INST_TSR                   0x7c0005dd
 
 #define PPC_INST_NAP                   0x4c000364
 #define PPC_INST_SLEEP                 0x4c0003a4
 #define XVCPSGNDP(t, a, b)     stringify_in_c(.long (PPC_INST_XVCPSGNDP | \
                                               VSX_XX3((t), (a), (b))))
 
+#define VPERMXOR(vrt, vra, vrb, vrc)                           \
+       stringify_in_c(.long (PPC_INST_VPERMXOR |               \
+                             ___PPC_RT(vrt) | ___PPC_RA(vra) | \
+                             ___PPC_RB(vrb) | (((vrc) & 0x1f) << 6)))
+
 #define PPC_NAP                        stringify_in_c(.long PPC_INST_NAP)
 #define PPC_SLEEP              stringify_in_c(.long PPC_INST_SLEEP)
 #define PPC_WINKLE             stringify_in_c(.long PPC_INST_WINKLE)
index ae94b3626b6cd8790f03d60d1f58cbcc5db1b4d2..13f7f4c0e1eae8f57abf5ef075c7e8b06f06b3ae 100644 (file)
@@ -439,14 +439,11 @@ END_FTR_SECTION_IFCLR(CPU_FTR_601)
 
 /* The following stops all load and store data streams associated with stream
  * ID (ie. streams created explicitly).  The embedded and server mnemonics for
- * dcbt are different so we use machine "power4" here explicitly.
+ * dcbt are different so this must only be used for server.
  */
-#define DCBT_STOP_ALL_STREAM_IDS(scratch)      \
-.machine push ;                                        \
-.machine "power4" ;                            \
-       lis     scratch,0x60000000@h;           \
-       dcbt    0,scratch,0b01010;              \
-.machine pop
+#define DCBT_BOOK3S_STOP_ALL_STREAM_IDS(scratch)       \
+       lis     scratch,0x60000000@h;                   \
+       dcbt    0,scratch,0b01010
 
 /*
  * toreal/fromreal/tophys/tovirt macros. 32-bit BookE makes them
index 01299cdc980676a405ab0303b469b318576037a9..c4b36a494a63655528d2305e0393241b8743cb86 100644 (file)
@@ -109,6 +109,13 @@ void release_thread(struct task_struct *);
 #define TASK_SIZE_64TB  (0x0000400000000000UL)
 #define TASK_SIZE_128TB (0x0000800000000000UL)
 #define TASK_SIZE_512TB (0x0002000000000000UL)
+#define TASK_SIZE_1PB   (0x0004000000000000UL)
+#define TASK_SIZE_2PB   (0x0008000000000000UL)
+/*
+ * With 52 bits in the address we can support
+ * upto 4PB of range.
+ */
+#define TASK_SIZE_4PB   (0x0010000000000000UL)
 
 /*
  * For now 512TB is only supported with book3s and 64K linux page size.
@@ -117,11 +124,17 @@ void release_thread(struct task_struct *);
 /*
  * Max value currently used:
  */
-#define TASK_SIZE_USER64               TASK_SIZE_512TB
+#define TASK_SIZE_USER64               TASK_SIZE_4PB
 #define DEFAULT_MAP_WINDOW_USER64      TASK_SIZE_128TB
+#define TASK_CONTEXT_SIZE              TASK_SIZE_512TB
 #else
 #define TASK_SIZE_USER64               TASK_SIZE_64TB
 #define DEFAULT_MAP_WINDOW_USER64      TASK_SIZE_64TB
+/*
+ * We don't need to allocate extended context ids for 4K page size, because
+ * we limit the max effective address on this config to 64TB.
+ */
+#define TASK_CONTEXT_SIZE              TASK_SIZE_64TB
 #endif
 
 /*
@@ -505,6 +518,7 @@ extern int powersave_nap;   /* set if nap mode can be used in idle loop */
 extern unsigned long power7_idle_insn(unsigned long type); /* PNV_THREAD_NAP/etc*/
 extern void power7_idle_type(unsigned long type);
 extern unsigned long power9_idle_stop(unsigned long psscr_val);
+extern unsigned long power9_offline_stop(unsigned long psscr_val);
 extern void power9_idle_type(unsigned long stop_psscr_val,
                              unsigned long stop_psscr_mask);
 
index e6c7eadf6bceb7092e2615fc62b9d83195624ddf..cb0f272ce12355ef0ad4c3ed64c3f36fc230d954 100644 (file)
 #define PSSCR_SD               0x00400000 /* Status Disable */
 #define PSSCR_PLS      0xf000000000000000 /* Power-saving Level Status */
 #define PSSCR_GUEST_VIS        0xf0000000000003ff /* Guest-visible PSSCR fields */
+#define PSSCR_FAKE_SUSPEND     0x00000400 /* Fake-suspend bit (P9 DD2.2) */
+#define PSSCR_FAKE_SUSPEND_LG  10         /* Fake-suspend bit position */
 
 /* Floating Point Status and Control Register (FPSCR) Fields */
 #define FPSCR_FX       0x80000000      /* FPU exception summary */
 #define SPRN_TFIAR     0x81    /* Transaction Failure Inst Addr   */
 #define SPRN_TEXASR    0x82    /* Transaction EXception & Summary */
 #define SPRN_TEXASRU   0x83    /* ''      ''      ''    Upper 32  */
+#define   TEXASR_ABORT __MASK(63-31) /* terminated by tabort or treclaim */
+#define   TEXASR_SUSP  __MASK(63-32) /* tx failed in suspended state */
+#define   TEXASR_HV    __MASK(63-34) /* MSR[HV] when failure occurred */
+#define   TEXASR_PR    __MASK(63-35) /* MSR[PR] when failure occurred */
 #define   TEXASR_FS    __MASK(63-36) /* TEXASR Failure Summary */
+#define   TEXASR_EXACT __MASK(63-37) /* TFIAR value is exact */
 #define SPRN_TFHAR     0x80    /* Transaction Failure Handler Addr */
 #define SPRN_TIDR      144     /* Thread ID register */
 #define SPRN_CTRLF     0x088
diff --git a/arch/powerpc/include/asm/security_features.h b/arch/powerpc/include/asm/security_features.h
new file mode 100644 (file)
index 0000000..fa4d2e1
--- /dev/null
@@ -0,0 +1,74 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Security related feature bit definitions.
+ *
+ * Copyright 2018, Michael Ellerman, IBM Corporation.
+ */
+
+#ifndef _ASM_POWERPC_SECURITY_FEATURES_H
+#define _ASM_POWERPC_SECURITY_FEATURES_H
+
+
+extern unsigned long powerpc_security_features;
+extern bool rfi_flush;
+
+static inline void security_ftr_set(unsigned long feature)
+{
+       powerpc_security_features |= feature;
+}
+
+static inline void security_ftr_clear(unsigned long feature)
+{
+       powerpc_security_features &= ~feature;
+}
+
+static inline bool security_ftr_enabled(unsigned long feature)
+{
+       return !!(powerpc_security_features & feature);
+}
+
+
+// Features indicating support for Spectre/Meltdown mitigations
+
+// The L1-D cache can be flushed with ori r30,r30,0
+#define SEC_FTR_L1D_FLUSH_ORI30                0x0000000000000001ull
+
+// The L1-D cache can be flushed with mtspr 882,r0 (aka SPRN_TRIG2)
+#define SEC_FTR_L1D_FLUSH_TRIG2                0x0000000000000002ull
+
+// ori r31,r31,0 acts as a speculation barrier
+#define SEC_FTR_SPEC_BAR_ORI31         0x0000000000000004ull
+
+// Speculation past bctr is disabled
+#define SEC_FTR_BCCTRL_SERIALISED      0x0000000000000008ull
+
+// Entries in L1-D are private to a SMT thread
+#define SEC_FTR_L1D_THREAD_PRIV                0x0000000000000010ull
+
+// Indirect branch prediction cache disabled
+#define SEC_FTR_COUNT_CACHE_DISABLED   0x0000000000000020ull
+
+
+// Features indicating need for Spectre/Meltdown mitigations
+
+// The L1-D cache should be flushed on MSR[HV] 1->0 transition (hypervisor to guest)
+#define SEC_FTR_L1D_FLUSH_HV           0x0000000000000040ull
+
+// The L1-D cache should be flushed on MSR[PR] 0->1 transition (kernel to userspace)
+#define SEC_FTR_L1D_FLUSH_PR           0x0000000000000080ull
+
+// A speculation barrier should be used for bounds checks (Spectre variant 1)
+#define SEC_FTR_BNDS_CHK_SPEC_BAR      0x0000000000000100ull
+
+// Firmware configuration indicates user favours security over performance
+#define SEC_FTR_FAVOUR_SECURITY                0x0000000000000200ull
+
+
+// Features enabled by default
+#define SEC_FTR_DEFAULT \
+       (SEC_FTR_L1D_FLUSH_HV | \
+        SEC_FTR_L1D_FLUSH_PR | \
+        SEC_FTR_BNDS_CHK_SPEC_BAR | \
+        SEC_FTR_FAVOUR_SECURITY)
+
+#endif /* _ASM_POWERPC_SECURITY_FEATURES_H */
index 469b7fdc9be41cd9ab0ceb7f50c2b633c19f01a4..27fa52ed6d00deac4ed4579c8276c9eb79bc86c3 100644 (file)
@@ -23,6 +23,7 @@ extern void reloc_got2(unsigned long);
 #define PTRRELOC(x)    ((typeof(x)) add_reloc_offset((unsigned long)(x)))
 
 void check_for_initrd(void);
+void mem_topology_setup(void);
 void initmem_init(void);
 void setup_panic(void);
 #define ARCH_PANIC_TIMEOUT 180
@@ -49,7 +50,7 @@ enum l1d_flush_type {
        L1D_FLUSH_MTTRIG        = 0x8,
 };
 
-void __init setup_rfi_flush(enum l1d_flush_type, bool enable);
+void setup_rfi_flush(enum l1d_flush_type, bool enable);
 void do_rfi_flush_fixups(enum l1d_flush_type types);
 
 #endif /* !__ASSEMBLY__ */
diff --git a/arch/powerpc/include/asm/slice.h b/arch/powerpc/include/asm/slice.h
new file mode 100644 (file)
index 0000000..e40406c
--- /dev/null
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_SLICE_H
+#define _ASM_POWERPC_SLICE_H
+
+#ifdef CONFIG_PPC_BOOK3S_64
+#include <asm/book3s/64/slice.h>
+#elif defined(CONFIG_PPC64)
+#include <asm/nohash/64/slice.h>
+#elif defined(CONFIG_PPC_MMU_NOHASH)
+#include <asm/nohash/32/slice.h>
+#endif
+
+#ifdef CONFIG_PPC_MM_SLICES
+
+#ifdef CONFIG_HUGETLB_PAGE
+#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA
+#endif
+#define HAVE_ARCH_UNMAPPED_AREA
+#define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
+
+#ifndef __ASSEMBLY__
+
+struct mm_struct;
+
+unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
+                                     unsigned long flags, unsigned int psize,
+                                     int topdown);
+
+unsigned int get_slice_psize(struct mm_struct *mm, unsigned long addr);
+
+void slice_set_range_psize(struct mm_struct *mm, unsigned long start,
+                          unsigned long len, unsigned int psize);
+
+void slice_init_new_context_exec(struct mm_struct *mm);
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* CONFIG_PPC_MM_SLICES */
+
+#endif /* _ASM_POWERPC_SLICE_H */
index fac963e10d3957a86ac1549ae5abee2f5623fd53..cfecfee1194b9b72b56501a1cf0c9772138bb72c 100644 (file)
@@ -31,6 +31,7 @@
 
 extern int boot_cpuid;
 extern int spinning_secondaries;
+extern u32 *cpu_to_phys_id;
 
 extern void cpu_die(void);
 extern int cpu_to_chip_id(int cpu);
@@ -170,12 +171,12 @@ static inline const struct cpumask *cpu_sibling_mask(int cpu)
 #ifdef CONFIG_PPC64
 static inline int get_hard_smp_processor_id(int cpu)
 {
-       return paca[cpu].hw_cpu_id;
+       return paca_ptrs[cpu]->hw_cpu_id;
 }
 
 static inline void set_hard_smp_processor_id(int cpu, int phys)
 {
-       paca[cpu].hw_cpu_id = phys;
+       paca_ptrs[cpu]->hw_cpu_id = phys;
 }
 #else
 /* 32-bit */
index a7916ee6dfb6945a6aba6d85077362161e78fcbc..bc66712bdc3c0520e441cbc9a04b3ef73c6b5ae0 100644 (file)
@@ -17,7 +17,7 @@
 #endif /* CONFIG_SPARSEMEM */
 
 #ifdef CONFIG_MEMORY_HOTPLUG
-extern int create_section_mapping(unsigned long start, unsigned long end);
+extern int create_section_mapping(unsigned long start, unsigned long end, int nid);
 extern int remove_section_mapping(unsigned long start, unsigned long end);
 
 #ifdef CONFIG_PPC_BOOK3S_64
index b9ebc3085fb7932e632527df1f309e76260912d4..72dc4ddc2972a06539e41bf9bd51fb7c407b8049 100644 (file)
@@ -56,6 +56,8 @@
 #define vcpu_is_preempted vcpu_is_preempted
 static inline bool vcpu_is_preempted(int cpu)
 {
+       if (!firmware_has_feature(FW_FEATURE_SPLPAR))
+               return false;
        return !!(be32_to_cpu(lppaca_of(cpu).yield_count) & 1);
 }
 #endif
index c3ca42cdc9f5dc8f047c94df4d61a081eac3e4d9..be8c9fa239834389f2ec1d1e1f9f4b97f9171f70 100644 (file)
@@ -35,7 +35,6 @@ static inline void disable_kernel_fp(void)
        msr_check_and_clear(MSR_FP);
 }
 #else
-static inline void __giveup_fpu(struct task_struct *t) { }
 static inline void save_fpu(struct task_struct *t) { }
 static inline void flush_fp_to_thread(struct task_struct *t) { }
 #endif
index 63e7f5a1f1055480af62d6e6fcf6b9a10c453d9f..6ec546090ba1b186625d0effb5e1f4af64422d4f 100644 (file)
@@ -6,10 +6,6 @@
 #include <linux/stringify.h>
 #include <asm/feature-fixups.h>
 
-#if defined(__powerpc64__) || defined(CONFIG_PPC_E500MC)
-#define __SUBARCH_HAS_LWSYNC
-#endif
-
 #ifndef __ASSEMBLY__
 extern unsigned int __start___lwsync_fixup, __stop___lwsync_fixup;
 extern void do_lwsync_fixups(unsigned long value, void *fixup_start,
index 4a12c00f8de3ec1aa90069d01a9b1de3f3174d00..5964145db03d1be6c4079b4c55bb6c92468d0a63 100644 (file)
@@ -70,6 +70,7 @@ static inline struct thread_info *current_thread_info(void)
        return (struct thread_info *)val;
 }
 
+extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src);
 #endif /* __ASSEMBLY__ */
 
 /*
index b240666b7bc1e9e11fc183183cbe37ed56de51e2..db546c034905a3365fdfe4d58f6621d7c32cf10c 100644 (file)
@@ -31,6 +31,7 @@ extern void to_tm(int tim, struct rtc_time * tm);
 extern void tick_broadcast_ipi_handler(void);
 
 extern void generic_calibrate_decr(void);
+extern void hdec_interrupt(struct pt_regs *regs);
 
 /* Some sane defaults: 125 MHz timebase, 1GHz processor */
 extern unsigned long ppc_proc_freq;
@@ -46,7 +47,7 @@ struct div_result {
 /* Accessor functions for the timebase (RTC on 601) registers. */
 /* If one day CONFIG_POWER is added just define __USE_RTC as 1 */
 #ifdef CONFIG_6xx
-#define __USE_RTC()    (!cpu_has_feature(CPU_FTR_USE_TB))
+#define __USE_RTC()    (cpu_has_feature(CPU_FTR_USE_RTC))
 #else
 #define __USE_RTC()    0
 #endif
@@ -204,6 +205,7 @@ struct cpu_usage {
 DECLARE_PER_CPU(struct cpu_usage, cpu_usage_array);
 
 extern void secondary_cpu_time_init(void);
+extern void __init time_init(void);
 
 DECLARE_PER_CPU(u64, decrementers_next_tb);
 
index 51bfeb8777f065d843c0b0c1eb1cf910d7bbaaf3..a62ee663b2c834b22519ec1f88b6e557e8ac4f76 100644 (file)
 
 #else
 
-#define __access_ok(addr, size, segment)       \
-       (((addr) <= (segment).seg) &&           \
-        (((size) == 0) || (((size) - 1) <= ((segment).seg - (addr)))))
+static inline int __access_ok(unsigned long addr, unsigned long size,
+                       mm_segment_t seg)
+{
+       if (addr > seg.seg)
+               return 0;
+       return (size == 0 || size - 1 <= seg.seg - addr);
+}
 
 #endif
 
index 2358f97d62ecb07f7a24bfef0b8ec08dd038f8c4..2b4c40b255e4d385ccb087dd88936e85bbc17d55 100644 (file)
@@ -42,7 +42,7 @@ obj-$(CONFIG_VDSO32)          += vdso32/
 obj-$(CONFIG_PPC_WATCHDOG)     += watchdog.o
 obj-$(CONFIG_HAVE_HW_BREAKPOINT)       += hw_breakpoint.o
 obj-$(CONFIG_PPC_BOOK3S_64)    += cpu_setup_ppc970.o cpu_setup_pa6t.o
-obj-$(CONFIG_PPC_BOOK3S_64)    += cpu_setup_power.o
+obj-$(CONFIG_PPC_BOOK3S_64)    += cpu_setup_power.o security.o
 obj-$(CONFIG_PPC_BOOK3S_64)    += mce.o mce_power.o
 obj-$(CONFIG_PPC_BOOK3E_64)    += exceptions-64e.o idle_book3e.o
 obj-$(CONFIG_PPC64)            += vdso64/
index ea5eb91b836e4609138adfa854277359228180b6..6bee65f3cfd34bf896ef770f805ec62d417ae9db 100644 (file)
@@ -221,12 +221,17 @@ int main(void)
        OFFSET(PACA_EXMC, paca_struct, exmc);
        OFFSET(PACA_EXSLB, paca_struct, exslb);
        OFFSET(PACA_EXNMI, paca_struct, exnmi);
+#ifdef CONFIG_PPC_PSERIES
        OFFSET(PACALPPACAPTR, paca_struct, lppaca_ptr);
+#endif
        OFFSET(PACA_SLBSHADOWPTR, paca_struct, slb_shadow_ptr);
        OFFSET(SLBSHADOW_STACKVSID, slb_shadow, save_area[SLB_NUM_BOLTED - 1].vsid);
        OFFSET(SLBSHADOW_STACKESID, slb_shadow, save_area[SLB_NUM_BOLTED - 1].esid);
        OFFSET(SLBSHADOW_SAVEAREA, slb_shadow, save_area);
        OFFSET(LPPACA_PMCINUSE, lppaca, pmcregs_in_use);
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+       OFFSET(PACA_PMCINUSE, paca_struct, pmcregs_in_use);
+#endif
        OFFSET(LPPACA_DTLIDX, lppaca, dtl_idx);
        OFFSET(LPPACA_YIELDCOUNT, lppaca, yield_count);
        OFFSET(PACA_DTL_RIDX, paca_struct, dtl_ridx);
@@ -568,6 +573,7 @@ int main(void)
        OFFSET(VCPU_TFHAR, kvm_vcpu, arch.tfhar);
        OFFSET(VCPU_TFIAR, kvm_vcpu, arch.tfiar);
        OFFSET(VCPU_TEXASR, kvm_vcpu, arch.texasr);
+       OFFSET(VCPU_ORIG_TEXASR, kvm_vcpu, arch.orig_texasr);
        OFFSET(VCPU_GPR_TM, kvm_vcpu, arch.gpr_tm);
        OFFSET(VCPU_FPRS_TM, kvm_vcpu, arch.fp_tm.fpr);
        OFFSET(VCPU_VRS_TM, kvm_vcpu, arch.vr_tm.vr);
@@ -650,6 +656,7 @@ int main(void)
        HSTATE_FIELD(HSTATE_HOST_IPI, host_ipi);
        HSTATE_FIELD(HSTATE_PTID, ptid);
        HSTATE_FIELD(HSTATE_TID, tid);
+       HSTATE_FIELD(HSTATE_FAKE_SUSPEND, fake_suspend);
        HSTATE_FIELD(HSTATE_MMCR0, host_mmcr[0]);
        HSTATE_FIELD(HSTATE_MMCR1, host_mmcr[1]);
        HSTATE_FIELD(HSTATE_MMCRA, host_mmcr[2]);
@@ -759,6 +766,7 @@ int main(void)
        OFFSET(PACA_SUBCORE_SIBLING_MASK, paca_struct, subcore_sibling_mask);
        OFFSET(PACA_SIBLING_PACA_PTRS, paca_struct, thread_sibling_pacas);
        OFFSET(PACA_REQ_PSSCR, paca_struct, requested_psscr);
+       OFFSET(PACA_DONT_STOP, paca_struct, dont_stop);
 #define STOP_SPR(x, f) OFFSET(x, paca_struct, stop_sprs.f)
        STOP_SPR(STOP_PID, pid);
        STOP_SPR(STOP_LDBAR, ldbar);
index c5e5a94d9892a5472c646bf03eeb264db91cd747..a9f3970693e1b6b18a509beb0be537694971b382 100644 (file)
@@ -226,7 +226,7 @@ BEGIN_FTR_SECTION
        beq     1f
 END_FTR_SECTION_IFSET(CPU_FTR_L3CR)
        lwz     r6,CPU_SPEC_FEATURES(r4)
-       andi.   r0,r6,CPU_FTR_L3_DISABLE_NAP
+       andis.  r0,r6,CPU_FTR_L3_DISABLE_NAP@h
        beq     1f
        li      r7,CPU_FTR_CAN_NAP
        andc    r6,r6,r7
index 462aed9bcf512fa38856aad905ba9003b2b6c8f5..8d142e5d84cd037babf06bf2b90823d024493a41 100644 (file)
@@ -162,7 +162,7 @@ _GLOBAL(__setup_cpu_e5500)
         * the feature on the primary core, avoid doing it on the
         * secondary core.
         */
-       andis.  r6, r3, CPU_FTR_EMB_HV@h
+       andi.   r6, r3, CPU_FTR_EMB_HV
        beq     2f
        rlwinm  r3, r3, 0, ~CPU_FTR_EMB_HV
        stw     r3, CPU_SPEC_FEATURES(r4)
index c40a9fc1e5d1270e4bbe75507819e0aa6ed74abe..c8fc9691f8c7e02b26264e701f21fc5d64a56df2 100644 (file)
@@ -133,36 +133,6 @@ extern void __restore_cpu_e6500(void);
 
 static struct cpu_spec __initdata cpu_specs[] = {
 #ifdef CONFIG_PPC_BOOK3S_64
-       {       /* Power4 */
-               .pvr_mask               = 0xffff0000,
-               .pvr_value              = 0x00350000,
-               .cpu_name               = "POWER4 (gp)",
-               .cpu_features           = CPU_FTRS_POWER4,
-               .cpu_user_features      = COMMON_USER_POWER4,
-               .mmu_features           = MMU_FTRS_POWER4 | MMU_FTR_TLBIE_CROP_VA,
-               .icache_bsize           = 128,
-               .dcache_bsize           = 128,
-               .num_pmcs               = 8,
-               .pmc_type               = PPC_PMC_IBM,
-               .oprofile_cpu_type      = "ppc64/power4",
-               .oprofile_type          = PPC_OPROFILE_POWER4,
-               .platform               = "power4",
-       },
-       {       /* Power4+ */
-               .pvr_mask               = 0xffff0000,
-               .pvr_value              = 0x00380000,
-               .cpu_name               = "POWER4+ (gq)",
-               .cpu_features           = CPU_FTRS_POWER4,
-               .cpu_user_features      = COMMON_USER_POWER4,
-               .mmu_features           = MMU_FTRS_POWER4 | MMU_FTR_TLBIE_CROP_VA,
-               .icache_bsize           = 128,
-               .dcache_bsize           = 128,
-               .num_pmcs               = 8,
-               .pmc_type               = PPC_PMC_IBM,
-               .oprofile_cpu_type      = "ppc64/power4",
-               .oprofile_type          = PPC_OPROFILE_POWER4,
-               .platform               = "power4",
-       },
        {       /* PPC970 */
                .pvr_mask               = 0xffff0000,
                .pvr_value              = 0x00390000,
@@ -553,11 +523,30 @@ static struct cpu_spec __initdata cpu_specs[] = {
                .machine_check_early    = __machine_check_early_realmode_p9,
                .platform               = "power9",
        },
-       {       /* Power9 DD 2.1 or later (see DD2.0 above) */
+       {       /* Power9 DD 2.1 */
+               .pvr_mask               = 0xffffefff,
+               .pvr_value              = 0x004e0201,
+               .cpu_name               = "POWER9 (raw)",
+               .cpu_features           = CPU_FTRS_POWER9_DD2_1,
+               .cpu_user_features      = COMMON_USER_POWER9,
+               .cpu_user_features2     = COMMON_USER2_POWER9,
+               .mmu_features           = MMU_FTRS_POWER9,
+               .icache_bsize           = 128,
+               .dcache_bsize           = 128,
+               .num_pmcs               = 6,
+               .pmc_type               = PPC_PMC_IBM,
+               .oprofile_cpu_type      = "ppc64/power9",
+               .oprofile_type          = PPC_OPROFILE_INVALID,
+               .cpu_setup              = __setup_cpu_power9,
+               .cpu_restore            = __restore_cpu_power9,
+               .machine_check_early    = __machine_check_early_realmode_p9,
+               .platform               = "power9",
+       },
+       {       /* Power9 DD2.2 or later */
                .pvr_mask               = 0xffff0000,
                .pvr_value              = 0x004e0000,
                .cpu_name               = "POWER9 (raw)",
-               .cpu_features           = CPU_FTRS_POWER9_DD2_1,
+               .cpu_features           = CPU_FTRS_POWER9_DD2_2,
                .cpu_user_features      = COMMON_USER_POWER9,
                .cpu_user_features2     = COMMON_USER2_POWER9,
                .mmu_features           = MMU_FTRS_POWER9,
@@ -609,15 +598,15 @@ static struct cpu_spec __initdata cpu_specs[] = {
        {       /* default match */
                .pvr_mask               = 0x00000000,
                .pvr_value              = 0x00000000,
-               .cpu_name               = "POWER4 (compatible)",
+               .cpu_name               = "POWER5 (compatible)",
                .cpu_features           = CPU_FTRS_COMPATIBLE,
                .cpu_user_features      = COMMON_USER_PPC64,
-               .mmu_features           = MMU_FTRS_DEFAULT_HPTE_ARCH_V2,
+               .mmu_features           = MMU_FTRS_POWER,
                .icache_bsize           = 128,
                .dcache_bsize           = 128,
                .num_pmcs               = 6,
                .pmc_type               = PPC_PMC_IBM,
-               .platform               = "power4",
+               .platform               = "power5",
        }
 #endif /* CONFIG_PPC_BOOK3S_64 */
 
index 00b215125d3ea38beda6ac8f23d95e17f23ba87a..17c8b99680f21490f2c20cf8d135403bc9b854cb 100644 (file)
@@ -238,7 +238,7 @@ static void __maybe_unused crash_kexec_wait_realmode(int cpu)
                if (i == cpu)
                        continue;
 
-               while (paca[i].kexec_state < KEXEC_STATE_REAL_MODE) {
+               while (paca_ptrs[i]->kexec_state < KEXEC_STATE_REAL_MODE) {
                        barrier();
                        if (!cpu_possible(i) || !cpu_online(i) || (msecs <= 0))
                                break;
index 8ca5d5b74618371904ec7af9c436050ea35cdf0e..e88fbb1fdb8fe8d2d2e5750ba379b24f8baab717 100644 (file)
@@ -54,8 +54,7 @@ struct dt_cpu_feature {
 };
 
 #define CPU_FTRS_BASE \
-          (CPU_FTR_USE_TB | \
-           CPU_FTR_LWSYNC | \
+          (CPU_FTR_LWSYNC | \
            CPU_FTR_FPU_UNAVAILABLE |\
            CPU_FTR_NODSISRALIGN |\
            CPU_FTR_NOEXECUTE |\
@@ -84,6 +83,7 @@ static int hv_mode;
 
 static struct {
        u64     lpcr;
+       u64     lpcr_clear;
        u64     hfscr;
        u64     fscr;
 } system_registers;
@@ -92,6 +92,8 @@ static void (*init_pmu_registers)(void);
 
 static void __restore_cpu_cpufeatures(void)
 {
+       u64 lpcr;
+
        /*
         * LPCR is restored by the power on engine already. It can be changed
         * after early init e.g., by radix enable, and we have no unified API
@@ -104,8 +106,10 @@ static void __restore_cpu_cpufeatures(void)
         * The best we can do to accommodate secondary boot and idle restore
         * for now is "or" LPCR with existing.
         */
-
-       mtspr(SPRN_LPCR, system_registers.lpcr | mfspr(SPRN_LPCR));
+       lpcr = mfspr(SPRN_LPCR);
+       lpcr |= system_registers.lpcr;
+       lpcr &= ~system_registers.lpcr_clear;
+       mtspr(SPRN_LPCR, lpcr);
        if (hv_mode) {
                mtspr(SPRN_LPID, 0);
                mtspr(SPRN_HFSCR, system_registers.hfscr);
@@ -325,8 +329,9 @@ static int __init feat_enable_mmu_hash_v3(struct dt_cpu_feature *f)
 {
        u64 lpcr;
 
+       system_registers.lpcr_clear |= (LPCR_ISL | LPCR_UPRT | LPCR_HR);
        lpcr = mfspr(SPRN_LPCR);
-       lpcr &= ~LPCR_ISL;
+       lpcr &= ~(LPCR_ISL | LPCR_UPRT | LPCR_HR);
        mtspr(SPRN_LPCR, lpcr);
 
        cur_cpu_spec->mmu_features |= MMU_FTRS_HASH_BASE;
@@ -590,6 +595,8 @@ static struct dt_cpu_feature_match __initdata
        {"virtual-page-class-key-protection", feat_enable, 0},
        {"transactional-memory", feat_enable_tm, CPU_FTR_TM},
        {"transactional-memory-v3", feat_enable_tm, 0},
+       {"tm-suspend-hypervisor-assist", feat_enable, CPU_FTR_P9_TM_HV_ASSIST},
+       {"tm-suspend-xer-so-bug", feat_enable, CPU_FTR_P9_TM_XER_SO_BUG},
        {"idle-nap", feat_enable_idle_nap, 0},
        {"alignment-interrupt-dsisr", feat_enable_align_dsisr, 0},
        {"idle-stop", feat_enable_idle_stop, 0},
@@ -707,11 +714,28 @@ static __init void cpufeatures_cpu_quirks(void)
         */
        if ((version & 0xffffff00) == 0x004e0100)
                cur_cpu_spec->cpu_features |= CPU_FTR_POWER9_DD1;
+       else if ((version & 0xffffefff) == 0x004e0200)
+               ; /* DD2.0 has no feature flag */
        else if ((version & 0xffffefff) == 0x004e0201)
                cur_cpu_spec->cpu_features |= CPU_FTR_POWER9_DD2_1;
+       else if ((version & 0xffffefff) == 0x004e0202) {
+               cur_cpu_spec->cpu_features |= CPU_FTR_P9_TM_HV_ASSIST;
+               cur_cpu_spec->cpu_features |= CPU_FTR_P9_TM_XER_SO_BUG;
+               cur_cpu_spec->cpu_features |= CPU_FTR_POWER9_DD2_1;
+       } else /* DD2.1 and up have DD2_1 */
+               cur_cpu_spec->cpu_features |= CPU_FTR_POWER9_DD2_1;
 
-       if ((version & 0xffff0000) == 0x004e0000)
+       if ((version & 0xffff0000) == 0x004e0000) {
+               cur_cpu_spec->cpu_features &= ~(CPU_FTR_DAWR);
                cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_BUG;
+       }
+
+       /*
+        * PKEY was not in the initial base or feature node
+        * specification, but it should become optional in the next
+        * cpu feature version sequence.
+        */
+       cur_cpu_spec->cpu_features |= CPU_FTR_PKEY;
 }
 
 static void __init cpufeatures_setup_finished(void)
index 2b9df0040d6b507c1c64de47d9c7101c91f9a2f7..bc640e4c5ca5a67e36952e666110fc61cf04a4db 100644 (file)
@@ -394,9 +394,7 @@ static int eeh_phb_check_failure(struct eeh_pe *pe)
        /* Check PHB state */
        ret = eeh_ops->get_state(phb_pe, NULL);
        if ((ret < 0) ||
-           (ret == EEH_STATE_NOT_SUPPORT) ||
-           (ret & (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) ==
-           (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) {
+           (ret == EEH_STATE_NOT_SUPPORT) || eeh_state_active(ret)) {
                ret = 0;
                goto out;
        }
@@ -433,7 +431,6 @@ static int eeh_phb_check_failure(struct eeh_pe *pe)
 int eeh_dev_check_failure(struct eeh_dev *edev)
 {
        int ret;
-       int active_flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE);
        unsigned long flags;
        struct device_node *dn;
        struct pci_dev *dev;
@@ -525,8 +522,7 @@ int eeh_dev_check_failure(struct eeh_dev *edev)
         * state, PE is in good state.
         */
        if ((ret < 0) ||
-           (ret == EEH_STATE_NOT_SUPPORT) ||
-           ((ret & active_flags) == active_flags)) {
+           (ret == EEH_STATE_NOT_SUPPORT) || eeh_state_active(ret)) {
                eeh_stats.false_positives++;
                pe->false_positives++;
                rc = 0;
@@ -546,8 +542,7 @@ int eeh_dev_check_failure(struct eeh_dev *edev)
 
                /* Frozen parent PE ? */
                ret = eeh_ops->get_state(parent_pe, NULL);
-               if (ret > 0 &&
-                   (ret & active_flags) != active_flags)
+               if (ret > 0 && !eeh_state_active(ret))
                        pe = parent_pe;
 
                /* Next parent level */
@@ -888,7 +883,6 @@ static void *eeh_set_dev_freset(void *data, void *flag)
  */
 int eeh_pe_reset_full(struct eeh_pe *pe)
 {
-       int active_flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE);
        int reset_state = (EEH_PE_RESET | EEH_PE_CFG_BLOCKED);
        int type = EEH_RESET_HOT;
        unsigned int freset = 0;
@@ -919,7 +913,7 @@ int eeh_pe_reset_full(struct eeh_pe *pe)
 
                /* Wait until the PE is in a functioning state */
                state = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC);
-               if ((state & active_flags) == active_flags)
+               if (eeh_state_active(state))
                        break;
 
                if (state < 0) {
@@ -1352,16 +1346,15 @@ static int eeh_pe_change_owner(struct eeh_pe *pe)
        struct eeh_dev *edev, *tmp;
        struct pci_dev *pdev;
        struct pci_device_id *id;
-       int flags, ret;
+       int ret;
 
        /* Check PE state */
-       flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE);
        ret = eeh_ops->get_state(pe, NULL);
        if (ret < 0 || ret == EEH_STATE_NOT_SUPPORT)
                return 0;
 
        /* Unfrozen PE, nothing to do */
-       if ((ret & flags) == flags)
+       if (eeh_state_active(ret))
                return 0;
 
        /* Frozen PE, check if it needs PE level reset */
index d4cc266188092a197c3290b3e89fd201a6eb3eb1..201943d54a6ece9a022023e01e7453521e030fb3 100644 (file)
@@ -84,8 +84,7 @@ static inline struct eeh_dev *__eeh_addr_cache_get_device(unsigned long addr)
  * @addr: mmio (PIO) phys address or i/o port number
  *
  * Given an mmio phys address, or a port number, find a pci device
- * that implements this address.  Be sure to pci_dev_put the device
- * when finished.  I/O port numbers are assumed to be offset
+ * that implements this address.  I/O port numbers are assumed to be offset
  * from zero (that is, they do *not* have pci_io_addr added in).
  * It is safe to call this function within an interrupt.
  */
index 0c0b66fc5bfb32f6c61e8fa454fbfe1a467db89b..b8a329f048141c28940c49018ba67332c01096b6 100644 (file)
@@ -207,18 +207,18 @@ static void *eeh_report_error(void *data, void *userdata)
 
        if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe))
                return NULL;
+
+       device_lock(&dev->dev);
        dev->error_state = pci_channel_io_frozen;
 
        driver = eeh_pcid_get(dev);
-       if (!driver) return NULL;
+       if (!driver) goto out_no_dev;
 
        eeh_disable_irq(dev);
 
        if (!driver->err_handler ||
-           !driver->err_handler->error_detected) {
-               eeh_pcid_put(dev);
-               return NULL;
-       }
+           !driver->err_handler->error_detected)
+               goto out;
 
        rc = driver->err_handler->error_detected(dev, pci_channel_io_frozen);
 
@@ -227,8 +227,12 @@ static void *eeh_report_error(void *data, void *userdata)
        if (*res == PCI_ERS_RESULT_NONE) *res = rc;
 
        edev->in_error = true;
-       eeh_pcid_put(dev);
        pci_uevent_ers(dev, PCI_ERS_RESULT_NONE);
+
+out:
+       eeh_pcid_put(dev);
+out_no_dev:
+       device_unlock(&dev->dev);
        return NULL;
 }
 
@@ -251,15 +255,14 @@ static void *eeh_report_mmio_enabled(void *data, void *userdata)
        if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe))
                return NULL;
 
+       device_lock(&dev->dev);
        driver = eeh_pcid_get(dev);
-       if (!driver) return NULL;
+       if (!driver) goto out_no_dev;
 
        if (!driver->err_handler ||
            !driver->err_handler->mmio_enabled ||
-           (edev->mode & EEH_DEV_NO_HANDLER)) {
-               eeh_pcid_put(dev);
-               return NULL;
-       }
+           (edev->mode & EEH_DEV_NO_HANDLER))
+               goto out;
 
        rc = driver->err_handler->mmio_enabled(dev);
 
@@ -267,7 +270,10 @@ static void *eeh_report_mmio_enabled(void *data, void *userdata)
        if (rc == PCI_ERS_RESULT_NEED_RESET) *res = rc;
        if (*res == PCI_ERS_RESULT_NONE) *res = rc;
 
+out:
        eeh_pcid_put(dev);
+out_no_dev:
+       device_unlock(&dev->dev);
        return NULL;
 }
 
@@ -290,20 +296,20 @@ static void *eeh_report_reset(void *data, void *userdata)
 
        if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe))
                return NULL;
+
+       device_lock(&dev->dev);
        dev->error_state = pci_channel_io_normal;
 
        driver = eeh_pcid_get(dev);
-       if (!driver) return NULL;
+       if (!driver) goto out_no_dev;
 
        eeh_enable_irq(dev);
 
        if (!driver->err_handler ||
            !driver->err_handler->slot_reset ||
            (edev->mode & EEH_DEV_NO_HANDLER) ||
-           (!edev->in_error)) {
-               eeh_pcid_put(dev);
-               return NULL;
-       }
+           (!edev->in_error))
+               goto out;
 
        rc = driver->err_handler->slot_reset(dev);
        if ((*res == PCI_ERS_RESULT_NONE) ||
@@ -311,7 +317,10 @@ static void *eeh_report_reset(void *data, void *userdata)
        if (*res == PCI_ERS_RESULT_DISCONNECT &&
             rc == PCI_ERS_RESULT_NEED_RESET) *res = rc;
 
+out:
        eeh_pcid_put(dev);
+out_no_dev:
+       device_unlock(&dev->dev);
        return NULL;
 }
 
@@ -362,10 +371,12 @@ static void *eeh_report_resume(void *data, void *userdata)
 
        if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe))
                return NULL;
+
+       device_lock(&dev->dev);
        dev->error_state = pci_channel_io_normal;
 
        driver = eeh_pcid_get(dev);
-       if (!driver) return NULL;
+       if (!driver) goto out_no_dev;
 
        was_in_error = edev->in_error;
        edev->in_error = false;
@@ -375,18 +386,20 @@ static void *eeh_report_resume(void *data, void *userdata)
            !driver->err_handler->resume ||
            (edev->mode & EEH_DEV_NO_HANDLER) || !was_in_error) {
                edev->mode &= ~EEH_DEV_NO_HANDLER;
-               eeh_pcid_put(dev);
-               return NULL;
+               goto out;
        }
 
        driver->err_handler->resume(dev);
 
-       eeh_pcid_put(dev);
        pci_uevent_ers(dev, PCI_ERS_RESULT_RECOVERED);
+out:
+       eeh_pcid_put(dev);
 #ifdef CONFIG_PCI_IOV
        if (eeh_ops->notify_resume && eeh_dev_to_pdn(edev))
                eeh_ops->notify_resume(eeh_dev_to_pdn(edev));
 #endif
+out_no_dev:
+       device_unlock(&dev->dev);
        return NULL;
 }
 
@@ -406,23 +419,26 @@ static void *eeh_report_failure(void *data, void *userdata)
 
        if (!dev || eeh_dev_removed(edev) || eeh_pe_passed(edev->pe))
                return NULL;
+
+       device_lock(&dev->dev);
        dev->error_state = pci_channel_io_perm_failure;
 
        driver = eeh_pcid_get(dev);
-       if (!driver) return NULL;
+       if (!driver) goto out_no_dev;
 
        eeh_disable_irq(dev);
 
        if (!driver->err_handler ||
-           !driver->err_handler->error_detected) {
-               eeh_pcid_put(dev);
-               return NULL;
-       }
+           !driver->err_handler->error_detected)
+               goto out;
 
        driver->err_handler->error_detected(dev, pci_channel_io_perm_failure);
 
-       eeh_pcid_put(dev);
        pci_uevent_ers(dev, PCI_ERS_RESULT_DISCONNECT);
+out:
+       eeh_pcid_put(dev);
+out_no_dev:
+       device_unlock(&dev->dev);
        return NULL;
 }
 
@@ -619,17 +635,19 @@ int eeh_pe_reset_and_recover(struct eeh_pe *pe)
 
 /**
  * eeh_reset_device - Perform actual reset of a pci slot
+ * @driver_eeh_aware: Does the device's driver provide EEH support?
  * @pe: EEH PE
  * @bus: PCI bus corresponding to the isolcated slot
+ * @rmv_data: Optional, list to record removed devices
  *
  * This routine must be called to do reset on the indicated PE.
  * During the reset, udev might be invoked because those affected
  * PCI devices will be removed and then added.
  */
 static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus,
-                               struct eeh_rmv_data *rmv_data)
+                           struct eeh_rmv_data *rmv_data,
+                           bool driver_eeh_aware)
 {
-       struct pci_bus *frozen_bus = eeh_pe_bus_get(pe);
        time64_t tstamp;
        int cnt, rc;
        struct eeh_dev *edev;
@@ -645,16 +663,12 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus,
         * into pci_hp_add_devices().
         */
        eeh_pe_state_mark(pe, EEH_PE_KEEP);
-       if (bus) {
-               if (pe->type & EEH_PE_VF) {
-                       eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL);
-               } else {
-                       pci_lock_rescan_remove();
-                       pci_hp_remove_devices(bus);
-                       pci_unlock_rescan_remove();
-               }
-       } else if (frozen_bus) {
+       if (driver_eeh_aware || (pe->type & EEH_PE_VF)) {
                eeh_pe_dev_traverse(pe, eeh_rmv_device, rmv_data);
+       } else {
+               pci_lock_rescan_remove();
+               pci_hp_remove_devices(bus);
+               pci_unlock_rescan_remove();
        }
 
        /*
@@ -689,8 +703,9 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus,
         * the device up before the scripts have taken it down,
         * potentially weird things happen.
         */
-       if (bus) {
-               pr_info("EEH: Sleep 5s ahead of complete hotplug\n");
+       if (!driver_eeh_aware || rmv_data->removed) {
+               pr_info("EEH: Sleep 5s ahead of %s hotplug\n",
+                       (driver_eeh_aware ? "partial" : "complete"));
                ssleep(5);
 
                /*
@@ -703,19 +718,10 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus,
                if (pe->type & EEH_PE_VF) {
                        eeh_add_virt_device(edev, NULL);
                } else {
-                       eeh_pe_state_clear(pe, EEH_PE_PRI_BUS);
+                       if (!driver_eeh_aware)
+                               eeh_pe_state_clear(pe, EEH_PE_PRI_BUS);
                        pci_hp_add_devices(bus);
                }
-       } else if (frozen_bus && rmv_data->removed) {
-               pr_info("EEH: Sleep 5s ahead of partial hotplug\n");
-               ssleep(5);
-
-               edev = list_first_entry(&pe->edevs, struct eeh_dev, list);
-               eeh_pe_traverse(pe, eeh_pe_detach_dev, NULL);
-               if (pe->type & EEH_PE_VF)
-                       eeh_add_virt_device(edev, NULL);
-               else
-                       pci_hp_add_devices(frozen_bus);
        }
        eeh_pe_state_clear(pe, EEH_PE_KEEP);
 
@@ -733,28 +739,42 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus,
 
 /**
  * eeh_handle_normal_event - Handle EEH events on a specific PE
- * @pe: EEH PE
+ * @pe: EEH PE - which should not be used after we return, as it may
+ * have been invalidated.
  *
  * Attempts to recover the given PE.  If recovery fails or the PE has failed
  * too many times, remove the PE.
  *
- * Returns true if @pe should no longer be used, else false.
+ * While PHB detects address or data parity errors on particular PCI
+ * slot, the associated PE will be frozen. Besides, DMA's occurring
+ * to wild addresses (which usually happen due to bugs in device
+ * drivers or in PCI adapter firmware) can cause EEH error. #SERR,
+ * #PERR or other misc PCI-related errors also can trigger EEH errors.
+ *
+ * Recovery process consists of unplugging the device driver (which
+ * generated hotplug events to userspace), then issuing a PCI #RST to
+ * the device, then reconfiguring the PCI config space for all bridges
+ * & devices under this slot, and then finally restarting the device
+ * drivers (which cause a second set of hotplug events to go out to
+ * userspace).
  */
-static bool eeh_handle_normal_event(struct eeh_pe *pe)
+void eeh_handle_normal_event(struct eeh_pe *pe)
 {
-       struct pci_bus *frozen_bus;
+       struct pci_bus *bus;
        struct eeh_dev *edev, *tmp;
        int rc = 0;
        enum pci_ers_result result = PCI_ERS_RESULT_NONE;
        struct eeh_rmv_data rmv_data = {LIST_HEAD_INIT(rmv_data.edev_list), 0};
 
-       frozen_bus = eeh_pe_bus_get(pe);
-       if (!frozen_bus) {
+       bus = eeh_pe_bus_get(pe);
+       if (!bus) {
                pr_err("%s: Cannot find PCI bus for PHB#%x-PE#%x\n",
                        __func__, pe->phb->global_number, pe->addr);
-               return false;
+               return;
        }
 
+       eeh_pe_state_mark(pe, EEH_PE_RECOVERING);
+
        eeh_pe_update_time_stamp(pe);
        pe->freeze_count++;
        if (pe->freeze_count > eeh_max_freezes) {
@@ -806,7 +826,7 @@ static bool eeh_handle_normal_event(struct eeh_pe *pe)
         */
        if (result == PCI_ERS_RESULT_NONE) {
                pr_info("EEH: Reset with hotplug activity\n");
-               rc = eeh_reset_device(pe, frozen_bus, NULL);
+               rc = eeh_reset_device(pe, bus, NULL, false);
                if (rc) {
                        pr_warn("%s: Unable to reset, err=%d\n",
                                __func__, rc);
@@ -858,7 +878,7 @@ static bool eeh_handle_normal_event(struct eeh_pe *pe)
        /* If any device called out for a reset, then reset the slot */
        if (result == PCI_ERS_RESULT_NEED_RESET) {
                pr_info("EEH: Reset without hotplug activity\n");
-               rc = eeh_reset_device(pe, NULL, &rmv_data);
+               rc = eeh_reset_device(pe, bus, &rmv_data, true);
                if (rc) {
                        pr_warn("%s: Cannot reset, err=%d\n",
                                __func__, rc);
@@ -891,7 +911,7 @@ static bool eeh_handle_normal_event(struct eeh_pe *pe)
        pr_info("EEH: Notify device driver to resume\n");
        eeh_pe_dev_traverse(pe, eeh_report_resume, NULL);
 
-       return false;
+       goto final;
 
 hard_fail:
        /*
@@ -916,23 +936,21 @@ static bool eeh_handle_normal_event(struct eeh_pe *pe)
         * all removed devices correctly to avoid access
         * the their PCI config any more.
         */
-       if (frozen_bus) {
-               if (pe->type & EEH_PE_VF) {
-                       eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL);
-                       eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
-               } else {
-                       eeh_pe_state_clear(pe, EEH_PE_PRI_BUS);
-                       eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
-
-                       pci_lock_rescan_remove();
-                       pci_hp_remove_devices(frozen_bus);
-                       pci_unlock_rescan_remove();
+       if (pe->type & EEH_PE_VF) {
+               eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL);
+               eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
+       } else {
+               eeh_pe_state_clear(pe, EEH_PE_PRI_BUS);
+               eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
 
-                       /* The passed PE should no longer be used */
-                       return true;
-               }
+               pci_lock_rescan_remove();
+               pci_hp_remove_devices(bus);
+               pci_unlock_rescan_remove();
+               /* The passed PE should no longer be used */
+               return;
        }
-       return false;
+final:
+       eeh_pe_state_clear(pe, EEH_PE_RECOVERING);
 }
 
 /**
@@ -942,7 +960,7 @@ static bool eeh_handle_normal_event(struct eeh_pe *pe)
  * specific PE.  Iterates through possible failures and handles them as
  * necessary.
  */
-static void eeh_handle_special_event(void)
+void eeh_handle_special_event(void)
 {
        struct eeh_pe *pe, *phb_pe;
        struct pci_bus *bus;
@@ -1005,15 +1023,7 @@ static void eeh_handle_special_event(void)
                 */
                if (rc == EEH_NEXT_ERR_FROZEN_PE ||
                    rc == EEH_NEXT_ERR_FENCED_PHB) {
-                       /*
-                        * eeh_handle_normal_event() can make the PE stale if it
-                        * determines that the PE cannot possibly be recovered.
-                        * Don't modify the PE state if that's the case.
-                        */
-                       if (eeh_handle_normal_event(pe))
-                               continue;
-
-                       eeh_pe_state_clear(pe, EEH_PE_RECOVERING);
+                       eeh_handle_normal_event(pe);
                } else {
                        pci_lock_rescan_remove();
                        list_for_each_entry(hose, &hose_list, list_node) {
@@ -1049,28 +1059,3 @@ static void eeh_handle_special_event(void)
                        break;
        } while (rc != EEH_NEXT_ERR_NONE);
 }
-
-/**
- * eeh_handle_event - Reset a PCI device after hard lockup.
- * @pe: EEH PE
- *
- * While PHB detects address or data parity errors on particular PCI
- * slot, the associated PE will be frozen. Besides, DMA's occurring
- * to wild addresses (which usually happen due to bugs in device
- * drivers or in PCI adapter firmware) can cause EEH error. #SERR,
- * #PERR or other misc PCI-related errors also can trigger EEH errors.
- *
- * Recovery process consists of unplugging the device driver (which
- * generated hotplug events to userspace), then issuing a PCI #RST to
- * the device, then reconfiguring the PCI config space for all bridges
- * & devices under this slot, and then finally restarting the device
- * drivers (which cause a second set of hotplug events to go out to
- * userspace).
- */
-void eeh_handle_event(struct eeh_pe *pe)
-{
-       if (pe)
-               eeh_handle_normal_event(pe);
-       else
-               eeh_handle_special_event();
-}
index accbf8b5fd46f4a701e69ce4292a8dc03cf0dc0d..61c9356bf9c97bcd61cffb2855ff109b0ebe9dbb 100644 (file)
@@ -73,7 +73,6 @@ static int eeh_event_handler(void * dummy)
                /* We might have event without binding PE */
                pe = event->pe;
                if (pe) {
-                       eeh_pe_state_mark(pe, EEH_PE_RECOVERING);
                        if (pe->type & EEH_PE_PHB)
                                pr_info("EEH: Detected error on PHB#%x\n",
                                         pe->phb->global_number);
@@ -81,10 +80,9 @@ static int eeh_event_handler(void * dummy)
                                pr_info("EEH: Detected PCI bus error on "
                                        "PHB#%x-PE#%x\n",
                                        pe->phb->global_number, pe->addr);
-                       eeh_handle_event(pe);
-                       eeh_pe_state_clear(pe, EEH_PE_RECOVERING);
+                       eeh_handle_normal_event(pe);
                } else {
-                       eeh_handle_event(NULL);
+                       eeh_handle_special_event();
                }
 
                kfree(event);
index 2cb5109a7ea3d3d4959fa0d9360248218b56d688..51695608c68b3cd59141a084d632f4291265b398 100644 (file)
@@ -545,7 +545,7 @@ _GLOBAL(_switch)
 /* Cancel all explict user streams as they will have no use after context
  * switch and will stop the HW from creating streams itself
  */
-       DCBT_STOP_ALL_STREAM_IDS(r6)
+       DCBT_BOOK3S_STOP_ALL_STREAM_IDS(r6)
 #endif
 
        addi    r6,r4,-THREAD   /* Convert THREAD to 'current' */
index 1ecfd8ffb0986aea0a3904eb2f2c348412021180..ae6a849db60b1ae8440abcc776b8a5b59e57a641 100644 (file)
@@ -139,6 +139,21 @@ EXC_COMMON_BEGIN(system_reset_idle_common)
        b       pnv_powersave_wakeup
 #endif
 
+/*
+ * Set IRQS_ALL_DISABLED unconditionally so arch_irqs_disabled does
+ * the right thing. We do not want to reconcile because that goes
+ * through irq tracing which we don't want in NMI.
+ *
+ * Save PACAIRQHAPPENED because some code will do a hard disable
+ * (e.g., xmon). So we want to restore this back to where it was
+ * when we return. DAR is unused in the stack, so save it there.
+ */
+#define ADD_RECONCILE_NMI                                              \
+       li      r10,IRQS_ALL_DISABLED;                                  \
+       stb     r10,PACAIRQSOFTMASK(r13);                               \
+       lbz     r10,PACAIRQHAPPENED(r13);                               \
+       std     r10,_DAR(r1)
+
 EXC_COMMON_BEGIN(system_reset_common)
        /*
         * Increment paca->in_nmi then enable MSR_RI. SLB or MCE will be able
@@ -157,16 +172,56 @@ EXC_COMMON_BEGIN(system_reset_common)
        subi    r1,r1,INT_FRAME_SIZE
        EXCEPTION_COMMON_NORET_STACK(PACA_EXNMI, 0x100,
                        system_reset, system_reset_exception,
-                       ADD_NVGPRS;ADD_RECONCILE)
+                       ADD_NVGPRS;ADD_RECONCILE_NMI)
+
+       /* This (and MCE) can be simplified with mtmsrd L=1 */
+       /* Clear MSR_RI before setting SRR0 and SRR1. */
+       li      r0,MSR_RI
+       mfmsr   r9
+       andc    r9,r9,r0
+       mtmsrd  r9,1
 
        /*
-        * The stack is no longer in use, decrement in_nmi.
+        * MSR_RI is clear, now we can decrement paca->in_nmi.
         */
        lhz     r10,PACA_IN_NMI(r13)
        subi    r10,r10,1
        sth     r10,PACA_IN_NMI(r13)
 
-       b       ret_from_except
+       /*
+        * Restore soft mask settings.
+        */
+       ld      r10,_DAR(r1)
+       stb     r10,PACAIRQHAPPENED(r13)
+       ld      r10,SOFTE(r1)
+       stb     r10,PACAIRQSOFTMASK(r13)
+
+       /*
+        * Keep below code in synch with MACHINE_CHECK_HANDLER_WINDUP.
+        * Should share common bits...
+        */
+
+       /* Move original SRR0 and SRR1 into the respective regs */
+       ld      r9,_MSR(r1)
+       mtspr   SPRN_SRR1,r9
+       ld      r3,_NIP(r1)
+       mtspr   SPRN_SRR0,r3
+       ld      r9,_CTR(r1)
+       mtctr   r9
+       ld      r9,_XER(r1)
+       mtxer   r9
+       ld      r9,_LINK(r1)
+       mtlr    r9
+       REST_GPR(0, r1)
+       REST_8GPRS(2, r1)
+       REST_GPR(10, r1)
+       ld      r11,_CCR(r1)
+       mtcr    r11
+       REST_GPR(11, r1)
+       REST_2GPRS(12, r1)
+       /* restore original r1. */
+       ld      r1,GPR1(r1)
+       RFI_TO_USER_OR_KERNEL
 
 #ifdef CONFIG_PPC_PSERIES
 /*
@@ -621,7 +676,10 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
        lwz     r9,PACA_EXSLB+EX_CCR(r13)       /* get saved CR */
        mtlr    r10
 
-       beq-    8f              /* if bad address, make full stack frame */
+       /*
+        * Large address, check whether we have to allocate new contexts.
+        */
+       beq-    8f
 
        bne-    cr5,2f          /* if unrecoverable exception, oops */
 
@@ -629,14 +687,11 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
 
        bne     cr4,1f          /* returning to kernel */
 
-.machine       push
-.machine       "power4"
        mtcrf   0x80,r9
        mtcrf   0x08,r9         /* MSR[PR] indication is in cr4 */
        mtcrf   0x04,r9         /* MSR[RI] indication is in cr5 */
        mtcrf   0x02,r9         /* I/D indication is in cr6 */
        mtcrf   0x01,r9         /* slb_allocate uses cr0 and cr7 */
-.machine       pop
 
        RESTORE_CTR(r9, PACA_EXSLB)
        RESTORE_PPR_PACA(PACA_EXSLB, r9)
@@ -649,14 +704,11 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
        RFI_TO_USER
        b       .       /* prevent speculative execution */
 1:
-.machine       push
-.machine       "power4"
        mtcrf   0x80,r9
        mtcrf   0x08,r9         /* MSR[PR] indication is in cr4 */
        mtcrf   0x04,r9         /* MSR[RI] indication is in cr5 */
        mtcrf   0x02,r9         /* I/D indication is in cr6 */
        mtcrf   0x01,r9         /* slb_allocate uses cr0 and cr7 */
-.machine       pop
 
        RESTORE_CTR(r9, PACA_EXSLB)
        RESTORE_PPR_PACA(PACA_EXSLB, r9)
@@ -685,7 +737,7 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
        mr      r3,r12
        mfspr   r11,SPRN_SRR0
        mfspr   r12,SPRN_SRR1
-       LOAD_HANDLER(r10,bad_addr_slb)
+       LOAD_HANDLER(r10, large_addr_slb)
        mtspr   SPRN_SRR0,r10
        ld      r10,PACAKMSR(r13)
        mtspr   SPRN_SRR1,r10
@@ -700,7 +752,7 @@ EXC_COMMON_BEGIN(unrecov_slb)
        bl      unrecoverable_exception
        b       1b
 
-EXC_COMMON_BEGIN(bad_addr_slb)
+EXC_COMMON_BEGIN(large_addr_slb)
        EXCEPTION_PROLOG_COMMON(0x380, PACA_EXSLB)
        RECONCILE_IRQ_STATE(r10, r11)
        ld      r3, PACA_EXSLB+EX_DAR(r13)
@@ -710,7 +762,7 @@ EXC_COMMON_BEGIN(bad_addr_slb)
        std     r10, _TRAP(r1)
 2:     bl      save_nvgprs
        addi    r3, r1, STACK_FRAME_OVERHEAD
-       bl      slb_miss_bad_addr
+       bl      slb_miss_large_addr
        b       ret_from_except
 
 EXC_REAL_BEGIN(hardware_interrupt, 0x500, 0x100)
@@ -1273,7 +1325,7 @@ EXC_REAL_BEGIN(denorm_exception_hv, 0x1500, 0x100)
        bne+    denorm_assist
 #endif
 
-       KVMTEST_PR(0x1500)
+       KVMTEST_HV(0x1500)
        EXCEPTION_PROLOG_PSERIES_1(denorm_common, EXC_HV)
 EXC_REAL_END(denorm_exception_hv, 0x1500, 0x100)
 
@@ -1285,7 +1337,7 @@ EXC_VIRT_END(denorm_exception, 0x5500, 0x100)
 EXC_VIRT_NONE(0x5500, 0x100)
 #endif
 
-TRAMP_KVM_SKIP(PACA_EXGEN, 0x1500)
+TRAMP_KVM_HV(PACA_EXGEN, 0x1500)
 
 #ifdef CONFIG_PPC_DENORMALISATION
 TRAMP_REAL_BEGIN(denorm_assist)
@@ -1466,7 +1518,7 @@ TRAMP_REAL_BEGIN(rfi_flush_fallback)
        ld      r11,PACA_L1D_FLUSH_SIZE(r13)
        srdi    r11,r11,(7 + 3) /* 128 byte lines, unrolled 8x */
        mtctr   r11
-       DCBT_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */
+       DCBT_BOOK3S_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */
 
        /* order ld/st prior to dcbt stop all streams with flushing */
        sync
@@ -1506,7 +1558,7 @@ TRAMP_REAL_BEGIN(hrfi_flush_fallback)
        ld      r11,PACA_L1D_FLUSH_SIZE(r13)
        srdi    r11,r11,(7 + 3) /* 128 byte lines, unrolled 8x */
        mtctr   r11
-       DCBT_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */
+       DCBT_BOOK3S_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */
 
        /* order ld/st prior to dcbt stop all streams with flushing */
        sync
index a61151a6ea5e82f8edc5828d923d80b07863182a..6eca15f25c730bf6b02413a08024837184c8c296 100644 (file)
@@ -392,19 +392,20 @@ generic_secondary_common_init:
         * physical cpu id in r24, we need to search the pacas to find
         * which logical id maps to our physical one.
         */
-       LOAD_REG_ADDR(r13, paca)        /* Load paca pointer             */
-       ld      r13,0(r13)              /* Get base vaddr of paca array  */
 #ifndef CONFIG_SMP
-       addi    r13,r13,PACA_SIZE       /* know r13 if used accidentally */
        b       kexec_wait              /* wait for next kernel if !SMP  */
 #else
+       LOAD_REG_ADDR(r8, paca_ptrs)    /* Load paca_ptrs pointe         */
+       ld      r8,0(r8)                /* Get base vaddr of array       */
        LOAD_REG_ADDR(r7, nr_cpu_ids)   /* Load nr_cpu_ids address       */
        lwz     r7,0(r7)                /* also the max paca allocated   */
        li      r5,0                    /* logical cpu id                */
-1:     lhz     r6,PACAHWCPUID(r13)     /* Load HW procid from paca      */
+1:
+       sldi    r9,r5,3                 /* get paca_ptrs[] index from cpu id */
+       ldx     r13,r9,r8               /* r13 = paca_ptrs[cpu id]       */
+       lhz     r6,PACAHWCPUID(r13)     /* Load HW procid from paca      */
        cmpw    r6,r24                  /* Compare to our id             */
        beq     2f
-       addi    r13,r13,PACA_SIZE       /* Loop to next PACA on miss     */
        addi    r5,r5,1
        cmpw    r5,r7                   /* Check if more pacas exist     */
        blt     1b
@@ -756,10 +757,10 @@ _GLOBAL(pmac_secondary_start)
        mtmsrd  r3                      /* RI on */
 
        /* Set up a paca value for this processor. */
-       LOAD_REG_ADDR(r4,paca)          /* Load paca pointer            */
-       ld      r4,0(r4)                /* Get base vaddr of paca array */
-       mulli   r13,r24,PACA_SIZE       /* Calculate vaddr of right paca */
-       add     r13,r13,r4              /* for this processor.          */
+       LOAD_REG_ADDR(r4,paca_ptrs)     /* Load paca pointer            */
+       ld      r4,0(r4)                /* Get base vaddr of paca_ptrs array */
+       sldi    r5,r24,3                /* get paca_ptrs[] index from cpu id */
+       ldx     r13,r5,r4               /* r13 = paca_ptrs[cpu id]       */
        SET_PACA(r13)                   /* Save vaddr of paca in an SPRG*/
 
        /* Mark interrupts soft and hard disabled (they might be enabled
index 53b9c1dfd7d978dddf909d3699e06713d83c025a..4c1012b80d3bb5d5ec96e5de6408cd9cc887dd4b 100644 (file)
@@ -33,6 +33,7 @@
 #include <asm/hw_breakpoint.h>
 #include <asm/processor.h>
 #include <asm/sstep.h>
+#include <asm/debug.h>
 #include <linux/uaccess.h>
 
 /*
@@ -171,6 +172,8 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp)
         * HW_BREAKPOINT_ALIGN by rounding off to the lower address, the
         * 'symbolsize' should satisfy the check below.
         */
+       if (!ppc_breakpoint_available())
+               return -ENODEV;
        length_max = 8; /* DABR */
        if (cpu_has_feature(CPU_FTR_DAWR)) {
                length_max = 512 ; /* 64 doublewords */
index 01e1c1997893368b6ba52034aeeb0bd513ede70c..79d005445c6c996fa9a4cff42f3d52d557f91177 100644 (file)
@@ -325,12 +325,6 @@ enter_winkle:
  * r3 - PSSCR value corresponding to the requested stop state.
  */
 power_enter_stop:
-#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
-       /* Tell KVM we're entering idle */
-       li      r4,KVM_HWTHREAD_IN_IDLE
-       /* DO THIS IN REAL MODE!  See comment above. */
-       stb     r4,HSTATE_HWTHREAD_STATE(r13)
-#endif
 /*
  * Check if we are executing the lite variant with ESL=EC=0
  */
@@ -339,6 +333,7 @@ power_enter_stop:
        bne      .Lhandle_esl_ec_set
        PPC_STOP
        li      r3,0  /* Since we didn't lose state, return 0 */
+       std     r3, PACA_REQ_PSSCR(r13)
 
        /*
         * pnv_wakeup_noloss() expects r12 to contain the SRR1 value so
@@ -427,13 +422,49 @@ ALT_FTR_SECTION_END_NESTED_IFSET(CPU_FTR_ARCH_207S, 66);          \
 /*
  * Entered with MSR[EE]=0 and no soft-masked interrupts pending.
  * r3 contains desired PSSCR register value.
+ *
+ * Offline (CPU unplug) case also must notify KVM that the CPU is
+ * idle.
  */
+_GLOBAL(power9_offline_stop)
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+       /*
+        * Tell KVM we're entering idle.
+        * This does not have to be done in real mode because the P9 MMU
+        * is independent per-thread. Some steppings share radix/hash mode
+        * between threads, but in that case KVM has a barrier sync in real
+        * mode before and after switching between radix and hash.
+        */
+       li      r4,KVM_HWTHREAD_IN_IDLE
+       stb     r4,HSTATE_HWTHREAD_STATE(r13)
+#endif
+       /* fall through */
+
 _GLOBAL(power9_idle_stop)
        std     r3, PACA_REQ_PSSCR(r13)
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+BEGIN_FTR_SECTION
+       sync
+       lwz     r5, PACA_DONT_STOP(r13)
+       cmpwi   r5, 0
+       bne     1f
+END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_XER_SO_BUG)
+#endif
        mtspr   SPRN_PSSCR,r3
        LOAD_REG_ADDR(r4,power_enter_stop)
        b       pnv_powersave_common
        /* No return */
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+1:
+       /*
+        * We get here when TM / thread reconfiguration bug workaround
+        * code wants to get the CPU into SMT4 mode, and therefore
+        * we are being asked not to stop.
+        */
+       li      r3, 0
+       std     r3, PACA_REQ_PSSCR(r13)
+       blr             /* return 0 for wakeup cause / SRR1 value */
+#endif
 
 /*
  * On waking up from stop 0,1,2 with ESL=1 on POWER9 DD1,
@@ -520,6 +551,9 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300)
        mr      r3,r12
 
 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+       lbz     r0,HSTATE_HWTHREAD_STATE(r13)
+       cmpwi   r0,KVM_HWTHREAD_IN_KERNEL
+       beq     1f
        li      r0,KVM_HWTHREAD_IN_KERNEL
        stb     r0,HSTATE_HWTHREAD_STATE(r13)
        /* Order setting hwthread_state vs. testing hwthread_req */
@@ -584,6 +618,8 @@ FTR_SECTION_ELSE_NESTED(71)
        mfspr   r5, SPRN_PSSCR
        rldicl  r5,r5,4,60
 ALT_FTR_SECTION_END_NESTED_IFSET(CPU_FTR_POWER9_DD1, 71)
+       li      r0, 0           /* clear requested_psscr to say we're awake */
+       std     r0, PACA_REQ_PSSCR(r13)
        cmpd    cr4,r5,r4
        bge     cr4,pnv_wakeup_tb_loss /* returns to caller */
 
@@ -834,6 +870,8 @@ BEGIN_FTR_SECTION
        mtspr   SPRN_PTCR,r4
        ld      r4,_RPR(r1)
        mtspr   SPRN_RPR,r4
+       ld      r4,_AMOR(r1)
+       mtspr   SPRN_AMOR,r4
 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
 
        ld      r4,_TSCR(r1)
index aab456ed2a0061ceada12f33dd791cf56afc43fc..5ac84efc6ede8f893d701c0f03d2e735bf27ddf9 100644 (file)
@@ -45,12 +45,32 @@ u64 ioread64(void __iomem *addr)
 {
        return readq(addr);
 }
+u64 ioread64_lo_hi(void __iomem *addr)
+{
+       return readq(addr);
+}
+u64 ioread64_hi_lo(void __iomem *addr)
+{
+       return readq(addr);
+}
 u64 ioread64be(void __iomem *addr)
 {
        return readq_be(addr);
 }
+u64 ioread64be_lo_hi(void __iomem *addr)
+{
+       return readq_be(addr);
+}
+u64 ioread64be_hi_lo(void __iomem *addr)
+{
+       return readq_be(addr);
+}
 EXPORT_SYMBOL(ioread64);
+EXPORT_SYMBOL(ioread64_lo_hi);
+EXPORT_SYMBOL(ioread64_hi_lo);
 EXPORT_SYMBOL(ioread64be);
+EXPORT_SYMBOL(ioread64be_lo_hi);
+EXPORT_SYMBOL(ioread64be_hi_lo);
 #endif /* __powerpc64__ */
 
 void iowrite8(u8 val, void __iomem *addr)
@@ -83,12 +103,32 @@ void iowrite64(u64 val, void __iomem *addr)
 {
        writeq(val, addr);
 }
+void iowrite64_lo_hi(u64 val, void __iomem *addr)
+{
+       writeq(val, addr);
+}
+void iowrite64_hi_lo(u64 val, void __iomem *addr)
+{
+       writeq(val, addr);
+}
 void iowrite64be(u64 val, void __iomem *addr)
 {
        writeq_be(val, addr);
 }
+void iowrite64be_lo_hi(u64 val, void __iomem *addr)
+{
+       writeq_be(val, addr);
+}
+void iowrite64be_hi_lo(u64 val, void __iomem *addr)
+{
+       writeq_be(val, addr);
+}
 EXPORT_SYMBOL(iowrite64);
+EXPORT_SYMBOL(iowrite64_lo_hi);
+EXPORT_SYMBOL(iowrite64_hi_lo);
 EXPORT_SYMBOL(iowrite64be);
+EXPORT_SYMBOL(iowrite64be_lo_hi);
+EXPORT_SYMBOL(iowrite64be_hi_lo);
 #endif /* __powerpc64__ */
 
 /*
index ca5d5a081e75b9a92e5c4a11de2a422d43ed95a3..e4c5bf33970bf8d0c025f5c20a968ec5a0f80436 100644 (file)
@@ -455,29 +455,33 @@ static int trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
        }
 
        kretprobe_assert(ri, orig_ret_address, trampoline_address);
-       regs->nip = orig_ret_address;
+
        /*
-        * Make LR point to the orig_ret_address.
-        * When the 'nop' inside the kretprobe_trampoline
-        * is optimized, we can do a 'blr' after executing the
-        * detour buffer code.
+        * We get here through one of two paths:
+        * 1. by taking a trap -> kprobe_handler() -> here
+        * 2. by optprobe branch -> optimized_callback() -> opt_pre_handler() -> here
+        *
+        * When going back through (1), we need regs->nip to be setup properly
+        * as it is used to determine the return address from the trap.
+        * For (2), since nip is not honoured with optprobes, we instead setup
+        * the link register properly so that the subsequent 'blr' in
+        * kretprobe_trampoline jumps back to the right instruction.
+        *
+        * For nip, we should set the address to the previous instruction since
+        * we end up emulating it in kprobe_handler(), which increments the nip
+        * again.
         */
+       regs->nip = orig_ret_address - 4;
        regs->link = orig_ret_address;
 
-       reset_current_kprobe();
        kretprobe_hash_unlock(current, &flags);
-       preempt_enable_no_resched();
 
        hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) {
                hlist_del(&ri->hlist);
                kfree(ri);
        }
-       /*
-        * By returning a non-zero value, we are telling
-        * kprobe_handler() that we don't want the post_handler
-        * to run (and have re-enabled preemption)
-        */
-       return 1;
+
+       return 0;
 }
 NOKPROBE_SYMBOL(trampoline_probe_handler);
 
index 49d34d7271e789e8c261cd610458617f28eedaea..1044bf15d5eda6c06f7a739bf5f521a99142521d 100644 (file)
@@ -168,24 +168,25 @@ static void kexec_prepare_cpus_wait(int wait_state)
         * are correctly onlined.  If somehow we start a CPU on boot with RTAS
         * start-cpu, but somehow that CPU doesn't write callin_cpu_map[] in
         * time, the boot CPU will timeout.  If it does eventually execute
-        * stuff, the secondary will start up (paca[].cpu_start was written) and
-        * get into a peculiar state.  If the platform supports
-        * smp_ops->take_timebase(), the secondary CPU will probably be spinning
-        * in there.  If not (i.e. pseries), the secondary will continue on and
-        * try to online itself/idle/etc. If it survives that, we need to find
-        * these possible-but-not-online-but-should-be CPUs and chaperone them
-        * into kexec_smp_wait().
+        * stuff, the secondary will start up (paca_ptrs[]->cpu_start was
+        * written) and get into a peculiar state.
+        * If the platform supports smp_ops->take_timebase(), the secondary CPU
+        * will probably be spinning in there.  If not (i.e. pseries), the
+        * secondary will continue on and try to online itself/idle/etc. If it
+        * survives that, we need to find these
+        * possible-but-not-online-but-should-be CPUs and chaperone them into
+        * kexec_smp_wait().
         */
        for_each_online_cpu(i) {
                if (i == my_cpu)
                        continue;
 
-               while (paca[i].kexec_state < wait_state) {
+               while (paca_ptrs[i]->kexec_state < wait_state) {
                        barrier();
                        if (i != notified) {
                                printk(KERN_INFO "kexec: waiting for cpu %d "
                                       "(physical %d) to enter %i state\n",
-                                      i, paca[i].hw_cpu_id, wait_state);
+                                      i, paca_ptrs[i]->hw_cpu_id, wait_state);
                                notified = i;
                        }
                }
@@ -322,18 +323,24 @@ void default_machine_kexec(struct kimage *image)
        kexec_stack.thread_info.cpu = current_thread_info()->cpu;
 
        /* We need a static PACA, too; copy this CPU's PACA over and switch to
-        * it.  Also poison per_cpu_offset to catch anyone using non-static
-        * data.
+        * it. Also poison per_cpu_offset and NULL lppaca to catch anyone using
+        * non-static data.
         */
        memcpy(&kexec_paca, get_paca(), sizeof(struct paca_struct));
        kexec_paca.data_offset = 0xedeaddeadeeeeeeeUL;
-       paca = (struct paca_struct *)RELOC_HIDE(&kexec_paca, 0) -
-               kexec_paca.paca_index;
+#ifdef CONFIG_PPC_PSERIES
+       kexec_paca.lppaca_ptr = NULL;
+#endif
+       paca_ptrs[kexec_paca.paca_index] = &kexec_paca;
+
        setup_paca(&kexec_paca);
 
-       /* XXX: If anyone does 'dynamic lppacas' this will also need to be
-        * switched to a static version!
+       /*
+        * The lppaca should be unregistered at this point so the HV won't
+        * touch it. In the case of a crash, none of the lppacas are
+        * unregistered so there is not much we can do about it here.
         */
+
        /*
         * On Book3S, the copy must happen with the MMU off if we are either
         * using Radix page tables or we are not in an LPAR since we can
index e4395f937d63a3d8381854c7bfb89334217e2e16..45e0b7d5f20080e8291d229d89f9a38f1e724f46 100644 (file)
@@ -43,7 +43,7 @@ int arch_kexec_kernel_image_probe(struct kimage *image, void *buf,
 
        /* We don't support crash kernels yet. */
        if (image->type == KEXEC_TYPE_CRASH)
-               return -ENOTSUPP;
+               return -EOPNOTSUPP;
 
        for (i = 0; i < ARRAY_SIZE(kexec_file_loaders); i++) {
                fops = kexec_file_loaders[i];
index 3280953a82cf63c4372735762a09804fd9c21677..fa267e94090ac29080b6af8ad32906acd1a3787f 100644 (file)
@@ -144,44 +144,6 @@ _GLOBAL_TOC(flush_dcache_range)
        blr
 EXPORT_SYMBOL(flush_dcache_range)
 
-/*
- * Like above, but works on non-mapped physical addresses.
- * Use only for non-LPAR setups ! It also assumes real mode
- * is cacheable. Used for flushing out the DART before using
- * it as uncacheable memory 
- *
- * flush_dcache_phys_range(unsigned long start, unsigned long stop)
- *
- *    flush all bytes from start to stop-1 inclusive
- */
-_GLOBAL(flush_dcache_phys_range)
-       ld      r10,PPC64_CACHES@toc(r2)
-       lwz     r7,DCACHEL1BLOCKSIZE(r10)       /* Get dcache block size */
-       addi    r5,r7,-1
-       andc    r6,r3,r5                /* round low to line bdy */
-       subf    r8,r6,r4                /* compute length */
-       add     r8,r8,r5                /* ensure we get enough */
-       lwz     r9,DCACHEL1LOGBLOCKSIZE(r10)    /* Get log-2 of dcache block size */
-       srw.    r8,r8,r9                /* compute line count */
-       beqlr                           /* nothing to do? */
-       mfmsr   r5                      /* Disable MMU Data Relocation */
-       ori     r0,r5,MSR_DR
-       xori    r0,r0,MSR_DR
-       sync
-       mtmsr   r0
-       sync
-       isync
-       mtctr   r8
-0:     dcbst   0,r6
-       add     r6,r6,r7
-       bdnz    0b
-       sync
-       isync
-       mtmsr   r5                      /* Re-enable MMU Data Relocation */
-       sync
-       isync
-       blr
-
 _GLOBAL(flush_inval_dcache_range)
        ld      r10,PPC64_CACHES@toc(r2)
        lwz     r7,DCACHEL1BLOCKSIZE(r10)       /* Get dcache block size */
index 496d6393bd41824b565c66b731e1b4147c7e3923..ba681dac7b467be703dee435260e9e2e7a72ef19 100644 (file)
@@ -207,8 +207,7 @@ int nvram_write_os_partition(struct nvram_os_partition *part,
 
        tmp_index = part->index;
 
-       rc = ppc_md.nvram_write((char *)&info, sizeof(struct err_log_info),
-                               &tmp_index);
+       rc = ppc_md.nvram_write((char *)&info, sizeof(info), &tmp_index);
        if (rc <= 0) {
                pr_err("%s: Failed nvram_write (%d)\n", __func__, rc);
                return rc;
@@ -244,9 +243,7 @@ int nvram_read_partition(struct nvram_os_partition *part, char *buff,
        tmp_index = part->index;
 
        if (part->os_partition) {
-               rc = ppc_md.nvram_read((char *)&info,
-                                       sizeof(struct err_log_info),
-                                       &tmp_index);
+               rc = ppc_md.nvram_read((char *)&info, sizeof(info), &tmp_index);
                if (rc <= 0) {
                        pr_err("%s: Failed nvram_read (%d)\n", __func__, rc);
                        return rc;
@@ -1173,7 +1170,7 @@ int __init nvram_scan_partitions(void)
                               "detected: 0-length partition\n");
                        goto out;
                }
-               tmp_part = kmalloc(sizeof(struct nvram_partition), GFP_KERNEL);
+               tmp_part = kmalloc(sizeof(*tmp_part), GFP_KERNEL);
                err = -ENOMEM;
                if (!tmp_part) {
                        printk(KERN_ERR "nvram_scan_partitions: kmalloc failed\n");
index 95ffedf148856dd8444330a805a16c287e6399c9..0ee3e6d50f2885d519462af046e19560ca4ace8e 100644 (file)
 
 #include "setup.h"
 
-#ifdef CONFIG_PPC_BOOK3S
+#ifndef CONFIG_SMP
+#define boot_cpuid 0
+#endif
+
+static void *__init alloc_paca_data(unsigned long size, unsigned long align,
+                               unsigned long limit, int cpu)
+{
+       unsigned long pa;
+       int nid;
+
+       /*
+        * boot_cpuid paca is allocated very early before cpu_to_node is up.
+        * Set bottom-up mode, because the boot CPU should be on node-0,
+        * which will put its paca in the right place.
+        */
+       if (cpu == boot_cpuid) {
+               nid = -1;
+               memblock_set_bottom_up(true);
+       } else {
+               nid = early_cpu_to_node(cpu);
+       }
+
+       pa = memblock_alloc_base_nid(size, align, limit, nid, MEMBLOCK_NONE);
+       if (!pa) {
+               pa = memblock_alloc_base(size, align, limit);
+               if (!pa)
+                       panic("cannot allocate paca data");
+       }
+
+       if (cpu == boot_cpuid)
+               memblock_set_bottom_up(false);
+
+       return __va(pa);
+}
+
+#ifdef CONFIG_PPC_PSERIES
 
 /*
- * The structure which the hypervisor knows about - this structure
- * should not cross a page boundary.  The vpa_init/register_vpa call
- * is now known to fail if the lppaca structure crosses a page
- * boundary.  The lppaca is also used on POWER5 pSeries boxes.
- * The lppaca is 640 bytes long, and cannot readily
- * change since the hypervisor knows its layout, so a 1kB alignment
- * will suffice to ensure that it doesn't cross a page boundary.
+ * See asm/lppaca.h for more detail.
+ *
+ * lppaca structures must must be 1kB in size, L1 cache line aligned,
+ * and not cross 4kB boundary. A 1kB size and 1kB alignment will satisfy
+ * these requirements.
  */
-struct lppaca lppaca[] = {
-       [0 ... (NR_LPPACAS-1)] = {
+static inline void init_lppaca(struct lppaca *lppaca)
+{
+       BUILD_BUG_ON(sizeof(struct lppaca) != 640);
+
+       *lppaca = (struct lppaca) {
                .desc = cpu_to_be32(0xd397d781),        /* "LpPa" */
-               .size = cpu_to_be16(sizeof(struct lppaca)),
+               .size = cpu_to_be16(0x400),
                .fpregs_in_use = 1,
                .slb_count = cpu_to_be16(64),
                .vmxregs_in_use = 0,
-               .page_ins = 0,
-       },
+               .page_ins = 0, };
 };
 
-static struct lppaca *extra_lppacas;
-static long __initdata lppaca_size;
-
-static void __init allocate_lppacas(int nr_cpus, unsigned long limit)
-{
-       if (nr_cpus <= NR_LPPACAS)
-               return;
-
-       lppaca_size = PAGE_ALIGN(sizeof(struct lppaca) *
-                                (nr_cpus - NR_LPPACAS));
-       extra_lppacas = __va(memblock_alloc_base(lppaca_size,
-                                                PAGE_SIZE, limit));
-}
-
-static struct lppaca * __init new_lppaca(int cpu)
+static struct lppaca * __init new_lppaca(int cpu, unsigned long limit)
 {
        struct lppaca *lp;
+       size_t size = 0x400;
 
-       if (cpu < NR_LPPACAS)
-               return &lppaca[cpu];
+       BUILD_BUG_ON(size < sizeof(struct lppaca));
+
+       if (early_cpu_has_feature(CPU_FTR_HVMODE))
+               return NULL;
 
-       lp = extra_lppacas + (cpu - NR_LPPACAS);
-       *lp = lppaca[0];
+       lp = alloc_paca_data(size, 0x400, limit, cpu);
+       init_lppaca(lp);
 
        return lp;
 }
-
-static void __init free_lppacas(void)
-{
-       long new_size = 0, nr;
-
-       if (!lppaca_size)
-               return;
-       nr = num_possible_cpus() - NR_LPPACAS;
-       if (nr > 0)
-               new_size = PAGE_ALIGN(nr * sizeof(struct lppaca));
-       if (new_size >= lppaca_size)
-               return;
-
-       memblock_free(__pa(extra_lppacas) + new_size, lppaca_size - new_size);
-       lppaca_size = new_size;
-}
-
-#else
-
-static inline void allocate_lppacas(int nr_cpus, unsigned long limit) { }
-static inline void free_lppacas(void) { }
-
 #endif /* CONFIG_PPC_BOOK3S */
 
 #ifdef CONFIG_PPC_BOOK3S_64
 
 /*
- * 3 persistent SLBs are registered here.  The buffer will be zero
+ * 3 persistent SLBs are allocated here.  The buffer will be zero
  * initially, hence will all be invaild until we actually write them.
  *
  * If you make the number of persistent SLB entries dynamic, please also
  * update PR KVM to flush and restore them accordingly.
  */
-static struct slb_shadow * __initdata slb_shadow;
-
-static void __init allocate_slb_shadows(int nr_cpus, int limit)
-{
-       int size = PAGE_ALIGN(sizeof(struct slb_shadow) * nr_cpus);
-
-       if (early_radix_enabled())
-               return;
-
-       slb_shadow = __va(memblock_alloc_base(size, PAGE_SIZE, limit));
-       memset(slb_shadow, 0, size);
-}
-
-static struct slb_shadow * __init init_slb_shadow(int cpu)
+static struct slb_shadow * __init new_slb_shadow(int cpu, unsigned long limit)
 {
        struct slb_shadow *s;
 
-       if (early_radix_enabled())
-               return NULL;
-
-       s = &slb_shadow[cpu];
+       if (cpu != boot_cpuid) {
+               /*
+                * Boot CPU comes here before early_radix_enabled
+                * is parsed (e.g., for disable_radix). So allocate
+                * always and this will be fixed up in free_unused_pacas.
+                */
+               if (early_radix_enabled())
+                       return NULL;
+       }
 
-       /*
-        * When we come through here to initialise boot_paca, the slb_shadow
-        * buffers are not allocated yet. That's OK, we'll get one later in
-        * boot, but make sure we don't corrupt memory at 0.
-        */
-       if (!slb_shadow)
-               return NULL;
+       s = alloc_paca_data(sizeof(*s), L1_CACHE_BYTES, limit, cpu);
+       memset(s, 0, sizeof(*s));
 
        s->persistent = cpu_to_be32(SLB_NUM_BOLTED);
        s->buffer_length = cpu_to_be32(sizeof(*s));
@@ -137,10 +126,6 @@ static struct slb_shadow * __init init_slb_shadow(int cpu)
        return s;
 }
 
-#else /* !CONFIG_PPC_BOOK3S_64 */
-
-static void __init allocate_slb_shadows(int nr_cpus, int limit) { }
-
 #endif /* CONFIG_PPC_BOOK3S_64 */
 
 /* The Paca is an array with one entry per processor.  Each contains an
@@ -152,14 +137,15 @@ static void __init allocate_slb_shadows(int nr_cpus, int limit) { }
  * processors.  The processor VPD array needs one entry per physical
  * processor (not thread).
  */
-struct paca_struct *paca;
-EXPORT_SYMBOL(paca);
+struct paca_struct **paca_ptrs __read_mostly;
+EXPORT_SYMBOL(paca_ptrs);
 
 void __init initialise_paca(struct paca_struct *new_paca, int cpu)
 {
-#ifdef CONFIG_PPC_BOOK3S
-       new_paca->lppaca_ptr = new_lppaca(cpu);
-#else
+#ifdef CONFIG_PPC_PSERIES
+       new_paca->lppaca_ptr = NULL;
+#endif
+#ifdef CONFIG_PPC_BOOK3E
        new_paca->kernel_pgd = swapper_pg_dir;
 #endif
        new_paca->lock_token = 0x8000;
@@ -173,7 +159,7 @@ void __init initialise_paca(struct paca_struct *new_paca, int cpu)
        new_paca->__current = &init_task;
        new_paca->data_offset = 0xfeeeeeeeeeeeeeeeULL;
 #ifdef CONFIG_PPC_BOOK3S_64
-       new_paca->slb_shadow_ptr = init_slb_shadow(cpu);
+       new_paca->slb_shadow_ptr = NULL;
 #endif
 
 #ifdef CONFIG_PPC_BOOK3E
@@ -203,12 +189,25 @@ void setup_paca(struct paca_struct *new_paca)
 
 }
 
-static int __initdata paca_size;
+static int __initdata paca_nr_cpu_ids;
+static int __initdata paca_ptrs_size;
+static int __initdata paca_struct_size;
+
+void __init allocate_paca_ptrs(void)
+{
+       paca_nr_cpu_ids = nr_cpu_ids;
+
+       paca_ptrs_size = sizeof(struct paca_struct *) * nr_cpu_ids;
+       paca_ptrs = __va(memblock_alloc(paca_ptrs_size, 0));
+       memset(paca_ptrs, 0x88, paca_ptrs_size);
+}
 
-void __init allocate_pacas(void)
+void __init allocate_paca(int cpu)
 {
        u64 limit;
-       int cpu;
+       struct paca_struct *paca;
+
+       BUG_ON(cpu >= paca_nr_cpu_ids);
 
 #ifdef CONFIG_PPC_BOOK3S_64
        /*
@@ -220,40 +219,44 @@ void __init allocate_pacas(void)
        limit = ppc64_rma_size;
 #endif
 
-       paca_size = PAGE_ALIGN(sizeof(struct paca_struct) * nr_cpu_ids);
-
-       paca = __va(memblock_alloc_base(paca_size, PAGE_SIZE, limit));
-       memset(paca, 0, paca_size);
-
-       printk(KERN_DEBUG "Allocated %u bytes for %u pacas at %p\n",
-               paca_size, nr_cpu_ids, paca);
-
-       allocate_lppacas(nr_cpu_ids, limit);
-
-       allocate_slb_shadows(nr_cpu_ids, limit);
+       paca = alloc_paca_data(sizeof(struct paca_struct), L1_CACHE_BYTES,
+                               limit, cpu);
+       paca_ptrs[cpu] = paca;
+       memset(paca, 0, sizeof(struct paca_struct));
 
-       /* Can't use for_each_*_cpu, as they aren't functional yet */
-       for (cpu = 0; cpu < nr_cpu_ids; cpu++)
-               initialise_paca(&paca[cpu], cpu);
+       initialise_paca(paca, cpu);
+#ifdef CONFIG_PPC_PSERIES
+       paca->lppaca_ptr = new_lppaca(cpu, limit);
+#endif
+#ifdef CONFIG_PPC_BOOK3S_64
+       paca->slb_shadow_ptr = new_slb_shadow(cpu, limit);
+#endif
+       paca_struct_size += sizeof(struct paca_struct);
 }
 
 void __init free_unused_pacas(void)
 {
-       int new_size;
-
-       new_size = PAGE_ALIGN(sizeof(struct paca_struct) * nr_cpu_ids);
+       int new_ptrs_size;
 
-       if (new_size >= paca_size)
-               return;
+       new_ptrs_size = sizeof(struct paca_struct *) * nr_cpu_ids;
+       if (new_ptrs_size < paca_ptrs_size)
+               memblock_free(__pa(paca_ptrs) + new_ptrs_size,
+                                       paca_ptrs_size - new_ptrs_size);
 
-       memblock_free(__pa(paca) + new_size, paca_size - new_size);
+       paca_nr_cpu_ids = nr_cpu_ids;
+       paca_ptrs_size = new_ptrs_size;
 
-       printk(KERN_DEBUG "Freed %u bytes for unused pacas\n",
-               paca_size - new_size);
-
-       paca_size = new_size;
+#ifdef CONFIG_PPC_BOOK3S_64
+       if (early_radix_enabled()) {
+               /* Ugly fixup, see new_slb_shadow() */
+               memblock_free(__pa(paca_ptrs[boot_cpuid]->slb_shadow_ptr),
+                               sizeof(struct slb_shadow));
+               paca_ptrs[boot_cpuid]->slb_shadow_ptr = NULL;
+       }
+#endif
 
-       free_lppacas();
+       printk(KERN_DEBUG "Allocated %u bytes for %u pacas\n",
+                       paca_ptrs_size + paca_struct_size, nr_cpu_ids);
 }
 
 void copy_mm_to_paca(struct mm_struct *mm)
@@ -265,7 +268,8 @@ void copy_mm_to_paca(struct mm_struct *mm)
 #ifdef CONFIG_PPC_MM_SLICES
        VM_BUG_ON(!mm->context.slb_addr_limit);
        get_paca()->mm_ctx_slb_addr_limit = mm->context.slb_addr_limit;
-       get_paca()->mm_ctx_low_slices_psize = context->low_slices_psize;
+       memcpy(&get_paca()->mm_ctx_low_slices_psize,
+              &context->low_slices_psize, sizeof(context->low_slices_psize));
        memcpy(&get_paca()->mm_ctx_high_slices_psize,
               &context->high_slices_psize, TASK_SLICE_ARRAY_SZ(mm));
 #else /* CONFIG_PPC_MM_SLICES */
index 1738c4127b3207506e434dea167f755eea66ff7f..1237f13fed51862cbb5d7d2117c686cc06b2852b 100644 (file)
@@ -173,7 +173,7 @@ void __msr_check_and_clear(unsigned long bits)
 EXPORT_SYMBOL(__msr_check_and_clear);
 
 #ifdef CONFIG_PPC_FPU
-void __giveup_fpu(struct task_struct *tsk)
+static void __giveup_fpu(struct task_struct *tsk)
 {
        unsigned long msr;
 
@@ -556,7 +556,7 @@ void restore_math(struct pt_regs *regs)
        regs->msr = msr;
 }
 
-void save_all(struct task_struct *tsk)
+static void save_all(struct task_struct *tsk)
 {
        unsigned long usermsr;
 
@@ -718,7 +718,8 @@ static void set_debug_reg_defaults(struct thread_struct *thread)
 {
        thread->hw_brk.address = 0;
        thread->hw_brk.type = 0;
-       set_breakpoint(&thread->hw_brk);
+       if (ppc_breakpoint_available())
+               set_breakpoint(&thread->hw_brk);
 }
 #endif /* !CONFIG_HAVE_HW_BREAKPOINT */
 #endif /* CONFIG_PPC_ADV_DEBUG_REGS */
@@ -815,9 +816,14 @@ void __set_breakpoint(struct arch_hw_breakpoint *brk)
        memcpy(this_cpu_ptr(&current_brk), brk, sizeof(*brk));
 
        if (cpu_has_feature(CPU_FTR_DAWR))
+               // Power8 or later
                set_dawr(brk);
-       else
+       else if (!cpu_has_feature(CPU_FTR_ARCH_207S))
+               // Power7 or earlier
                set_dabr(brk);
+       else
+               // Shouldn't happen due to higher level checks
+               WARN_ON_ONCE(1);
 }
 
 void set_breakpoint(struct arch_hw_breakpoint *brk)
@@ -827,6 +833,18 @@ void set_breakpoint(struct arch_hw_breakpoint *brk)
        preempt_enable();
 }
 
+/* Check if we have DAWR or DABR hardware */
+bool ppc_breakpoint_available(void)
+{
+       if (cpu_has_feature(CPU_FTR_DAWR))
+               return true; /* POWER8 DAWR */
+       if (cpu_has_feature(CPU_FTR_ARCH_207S))
+               return false; /* POWER9 with DAWR disabled */
+       /* DABR: Everything but POWER8 and POWER9 */
+       return true;
+}
+EXPORT_SYMBOL_GPL(ppc_breakpoint_available);
+
 #ifdef CONFIG_PPC64
 DEFINE_PER_CPU(struct cpu_usage, cpu_usage_array);
 #endif
index 4dffef947b8ab57e47c8019c3e8f6f7b4ad04299..9dbed488aba1a12b78b997990f6282f9ce16fcb6 100644 (file)
@@ -291,11 +291,11 @@ static inline void identical_pvr_fixup(unsigned long node)
 
 static void __init check_cpu_feature_properties(unsigned long node)
 {
-       unsigned long i;
+       int i;
        struct feature_property *fp = feature_properties;
        const __be32 *prop;
 
-       for (i = 0; i < ARRAY_SIZE(feature_properties); ++i, ++fp) {
+       for (i = 0; i < (int)ARRAY_SIZE(feature_properties); ++i, ++fp) {
                prop = of_get_flat_dt_prop(node, fp->name, NULL);
                if (prop && be32_to_cpup(prop) >= fp->min_value) {
                        cur_cpu_spec->cpu_features |= fp->cpu_feature;
@@ -365,7 +365,6 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
        DBG("boot cpu: logical %d physical %d\n", found,
            be32_to_cpu(intserv[found_thread]));
        boot_cpuid = found;
-       set_hard_smp_processor_id(found, be32_to_cpu(intserv[found_thread]));
 
        /*
         * PAPR defines "logical" PVR values for cpus that
@@ -403,7 +402,9 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
                cur_cpu_spec->cpu_features &= ~CPU_FTR_SMT;
        else if (!dt_cpu_ftrs_in_use())
                cur_cpu_spec->cpu_features |= CPU_FTR_SMT;
+       allocate_paca(boot_cpuid);
 #endif
+       set_hard_smp_processor_id(found, be32_to_cpu(intserv[found_thread]));
 
        return 0;
 }
@@ -744,7 +745,7 @@ void __init early_init_devtree(void *params)
         * FIXME .. and the initrd too? */
        move_device_tree();
 
-       allocate_pacas();
+       allocate_paca_ptrs();
 
        DBG("Scanning CPUs ...\n");
 
@@ -874,5 +875,15 @@ EXPORT_SYMBOL(cpu_to_chip_id);
 
 bool arch_match_cpu_phys_id(int cpu, u64 phys_id)
 {
+#ifdef CONFIG_SMP
+       /*
+        * Early firmware scanning must use this rather than
+        * get_hard_smp_processor_id because we don't have pacas allocated
+        * until memory topology is discovered.
+        */
+       if (cpu_to_phys_id != NULL)
+               return (int)phys_id == cpu_to_phys_id[cpu];
+#endif
+
        return (int)phys_id == get_hard_smp_processor_id(cpu);
 }
index acf4b2e0530cb671df1e80d56927e8650f0c2f84..f9d6befb55a6ede366e9584ac5a7ffb81e6ef1f6 100644 (file)
@@ -171,7 +171,7 @@ static unsigned long __initdata prom_tce_alloc_start;
 static unsigned long __initdata prom_tce_alloc_end;
 #endif
 
-static bool __initdata prom_radix_disable;
+static bool prom_radix_disable __initdata = !IS_ENABLED(CONFIG_PPC_RADIX_MMU_DEFAULT);
 
 struct platform_support {
        bool hash_mmu;
@@ -641,9 +641,19 @@ static void __init early_cmdline_parse(void)
 
        opt = strstr(prom_cmd_line, "disable_radix");
        if (opt) {
-               prom_debug("Radix disabled from cmdline\n");
-               prom_radix_disable = true;
+               opt += 13;
+               if (*opt && *opt == '=') {
+                       bool val;
+
+                       if (kstrtobool(++opt, &val))
+                               prom_radix_disable = false;
+                       else
+                               prom_radix_disable = val;
+               } else
+                       prom_radix_disable = true;
        }
+       if (prom_radix_disable)
+               prom_debug("Radix disabled from cmdline\n");
 }
 
 #if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV)
@@ -1110,7 +1120,8 @@ static void __init prom_check_platform_support(void)
                }
        }
 
-       if (supported.radix_mmu && supported.radix_gtse) {
+       if (supported.radix_mmu && supported.radix_gtse &&
+           IS_ENABLED(CONFIG_PPC_RADIX_MMU)) {
                /* Radix preferred - but we require GTSE for now */
                prom_debug("Asking for radix with GTSE\n");
                ibm_architecture_vec.vec5.mmu = OV5_FEAT(OV5_MMU_RADIX);
@@ -1809,16 +1820,8 @@ static void __init prom_initialize_tce_table(void)
                 * size to 4 MB.  This is enough to map 2GB of PCI DMA space.
                 * By doing this, we avoid the pitfalls of trying to DMA to
                 * MMIO space and the DMA alias hole.
-                *
-                * On POWER4, firmware sets the TCE region by assuming
-                * each TCE table is 8MB. Using this memory for anything
-                * else will impact performance, so we always allocate 8MB.
-                * Anton
                 */
-               if (pvr_version_is(PVR_POWER4) || pvr_version_is(PVR_POWER4p))
-                       minsize = 8UL << 20;
-               else
-                       minsize = 4UL << 20;
+               minsize = 4UL << 20;
 
                /* Align to the greater of the align or size */
                align = max(minalign, minsize);
index 12640f7e726b29ce5ac9d256c589ce758a2521e8..acb6b9226352bc405c9fac7c57a173bbf4621ca5 100644 (file)
@@ -19,7 +19,7 @@
 WHITELIST="add_reloc_offset __bss_start __bss_stop copy_and_flush
 _end enter_prom memcpy memset reloc_offset __secondary_hold
 __secondary_hold_acknowledge __secondary_hold_spinloop __start
-strcmp strcpy strlcpy strlen strncmp strstr logo_linux_clut224
+strcmp strcpy strlcpy strlen strncmp strstr kstrtobool logo_linux_clut224
 reloc_got2 kernstart_addr memstart_addr linux_banner _stext
 __prom_init_toc_start __prom_init_toc_end btext_setup_display TOC."
 
index ca72d7391d404f9acb4dee60b800a2b1edd2f03c..d23cf632edf065b7c23b175a00afa1ace9f53b44 100644 (file)
@@ -41,6 +41,7 @@
 #include <asm/switch_to.h>
 #include <asm/tm.h>
 #include <asm/asm-prototypes.h>
+#include <asm/debug.h>
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/syscalls.h>
@@ -2378,6 +2379,7 @@ static int ptrace_set_debugreg(struct task_struct *task, unsigned long addr,
        struct perf_event_attr attr;
 #endif /* CONFIG_HAVE_HW_BREAKPOINT */
 #ifndef CONFIG_PPC_ADV_DEBUG_REGS
+       bool set_bp = true;
        struct arch_hw_breakpoint hw_brk;
 #endif
 
@@ -2411,9 +2413,10 @@ static int ptrace_set_debugreg(struct task_struct *task, unsigned long addr,
        hw_brk.address = data & (~HW_BRK_TYPE_DABR);
        hw_brk.type = (data & HW_BRK_TYPE_DABR) | HW_BRK_TYPE_PRIV_ALL;
        hw_brk.len = 8;
+       set_bp = (data) && (hw_brk.type & HW_BRK_TYPE_RDWR);
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
        bp = thread->ptrace_bps[0];
-       if ((!data) || !(hw_brk.type & HW_BRK_TYPE_RDWR)) {
+       if (!set_bp) {
                if (bp) {
                        unregister_hw_breakpoint(bp);
                        thread->ptrace_bps[0] = NULL;
@@ -2450,6 +2453,9 @@ static int ptrace_set_debugreg(struct task_struct *task, unsigned long addr,
                return PTR_ERR(bp);
        }
 
+#else /* !CONFIG_HAVE_HW_BREAKPOINT */
+       if (set_bp && (!ppc_breakpoint_available()))
+               return -ENODEV;
 #endif /* CONFIG_HAVE_HW_BREAKPOINT */
        task->thread.hw_brk = hw_brk;
 #else /* CONFIG_PPC_ADV_DEBUG_REGS */
@@ -2904,6 +2910,9 @@ static long ppc_set_hwdebug(struct task_struct *child,
        if (child->thread.hw_brk.address)
                return -ENOSPC;
 
+       if (!ppc_breakpoint_available())
+               return -ENODEV;
+
        child->thread.hw_brk = brk;
 
        return 1;
@@ -3052,7 +3061,10 @@ long arch_ptrace(struct task_struct *child, long request,
 #endif
 #else /* !CONFIG_PPC_ADV_DEBUG_REGS */
                dbginfo.num_instruction_bps = 0;
-               dbginfo.num_data_bps = 1;
+               if (ppc_breakpoint_available())
+                       dbginfo.num_data_bps = 1;
+               else
+                       dbginfo.num_data_bps = 0;
                dbginfo.num_condition_regs = 0;
 #ifdef CONFIG_PPC64
                dbginfo.data_bp_alignment = 8;
diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c
new file mode 100644 (file)
index 0000000..bab5a27
--- /dev/null
@@ -0,0 +1,88 @@
+// SPDX-License-Identifier: GPL-2.0+
+//
+// Security related flags and so on.
+//
+// Copyright 2018, Michael Ellerman, IBM Corporation.
+
+#include <linux/kernel.h>
+#include <linux/device.h>
+#include <linux/seq_buf.h>
+
+#include <asm/security_features.h>
+
+
+unsigned long powerpc_security_features __read_mostly = SEC_FTR_DEFAULT;
+
+ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf)
+{
+       bool thread_priv;
+
+       thread_priv = security_ftr_enabled(SEC_FTR_L1D_THREAD_PRIV);
+
+       if (rfi_flush || thread_priv) {
+               struct seq_buf s;
+               seq_buf_init(&s, buf, PAGE_SIZE - 1);
+
+               seq_buf_printf(&s, "Mitigation: ");
+
+               if (rfi_flush)
+                       seq_buf_printf(&s, "RFI Flush");
+
+               if (rfi_flush && thread_priv)
+                       seq_buf_printf(&s, ", ");
+
+               if (thread_priv)
+                       seq_buf_printf(&s, "L1D private per thread");
+
+               seq_buf_printf(&s, "\n");
+
+               return s.len;
+       }
+
+       if (!security_ftr_enabled(SEC_FTR_L1D_FLUSH_HV) &&
+           !security_ftr_enabled(SEC_FTR_L1D_FLUSH_PR))
+               return sprintf(buf, "Not affected\n");
+
+       return sprintf(buf, "Vulnerable\n");
+}
+
+ssize_t cpu_show_spectre_v1(struct device *dev, struct device_attribute *attr, char *buf)
+{
+       if (!security_ftr_enabled(SEC_FTR_BNDS_CHK_SPEC_BAR))
+               return sprintf(buf, "Not affected\n");
+
+       return sprintf(buf, "Vulnerable\n");
+}
+
+ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, char *buf)
+{
+       bool bcs, ccd, ori;
+       struct seq_buf s;
+
+       seq_buf_init(&s, buf, PAGE_SIZE - 1);
+
+       bcs = security_ftr_enabled(SEC_FTR_BCCTRL_SERIALISED);
+       ccd = security_ftr_enabled(SEC_FTR_COUNT_CACHE_DISABLED);
+       ori = security_ftr_enabled(SEC_FTR_SPEC_BAR_ORI31);
+
+       if (bcs || ccd) {
+               seq_buf_printf(&s, "Mitigation: ");
+
+               if (bcs)
+                       seq_buf_printf(&s, "Indirect branch serialisation (kernel only)");
+
+               if (bcs && ccd)
+                       seq_buf_printf(&s, ", ");
+
+               if (ccd)
+                       seq_buf_printf(&s, "Indirect branch cache disabled");
+       } else
+               seq_buf_printf(&s, "Vulnerable");
+
+       if (ori)
+               seq_buf_printf(&s, ", ori31 speculation barrier enabled");
+
+       seq_buf_printf(&s, "\n");
+
+       return s.len;
+}
index d73ec518ef8057e202c013b3dd4b98894ec7ea0b..0af5c11b9e784f21b3cd799bee34edc5d48a7d9c 100644 (file)
@@ -437,6 +437,8 @@ static void __init cpu_init_thread_core_maps(int tpc)
 }
 
 
+u32 *cpu_to_phys_id = NULL;
+
 /**
  * setup_cpu_maps - initialize the following cpu maps:
  *                  cpu_possible_mask
@@ -463,6 +465,10 @@ void __init smp_setup_cpu_maps(void)
 
        DBG("smp_setup_cpu_maps()\n");
 
+       cpu_to_phys_id = __va(memblock_alloc(nr_cpu_ids * sizeof(u32),
+                                                       __alignof__(u32)));
+       memset(cpu_to_phys_id, 0, nr_cpu_ids * sizeof(u32));
+
        for_each_node_by_type(dn, "cpu") {
                const __be32 *intserv;
                __be32 cpu_be;
@@ -480,6 +486,7 @@ void __init smp_setup_cpu_maps(void)
                        intserv = of_get_property(dn, "reg", &len);
                        if (!intserv) {
                                cpu_be = cpu_to_be32(cpu);
+                               /* XXX: what is this? uninitialized?? */
                                intserv = &cpu_be;      /* assume logical == phys */
                                len = 4;
                        }
@@ -499,8 +506,8 @@ void __init smp_setup_cpu_maps(void)
                                                "enable-method", "spin-table");
 
                        set_cpu_present(cpu, avail);
-                       set_hard_smp_processor_id(cpu, be32_to_cpu(intserv[j]));
                        set_cpu_possible(cpu, true);
+                       cpu_to_phys_id[cpu] = be32_to_cpu(intserv[j]);
                        cpu++;
                }
 
@@ -835,6 +842,23 @@ static __init void print_system_info(void)
        pr_info("-----------------------------------------------------\n");
 }
 
+#ifdef CONFIG_SMP
+static void smp_setup_pacas(void)
+{
+       int cpu;
+
+       for_each_possible_cpu(cpu) {
+               if (cpu == smp_processor_id())
+                       continue;
+               allocate_paca(cpu);
+               set_hard_smp_processor_id(cpu, cpu_to_phys_id[cpu]);
+       }
+
+       memblock_free(__pa(cpu_to_phys_id), nr_cpu_ids * sizeof(u32));
+       cpu_to_phys_id = NULL;
+}
+#endif
+
 /*
  * Called into from start_kernel this initializes memblock, which is used
  * to manage page allocation until mem_init is called.
@@ -888,8 +912,8 @@ void __init setup_arch(char **cmdline_p)
        /* Check the SMT related command line arguments (ppc64). */
        check_smt_enabled();
 
-       /* On BookE, setup per-core TLB data structures. */
-       setup_tlb_core_data();
+       /* Parse memory topology */
+       mem_topology_setup();
 
        /*
         * Release secondary cpus out of their spinloops at 0x60 now that
@@ -899,6 +923,11 @@ void __init setup_arch(char **cmdline_p)
         * so smp_release_cpus() does nothing for them.
         */
 #ifdef CONFIG_SMP
+       smp_setup_pacas();
+
+       /* On BookE, setup per-core TLB data structures. */
+       setup_tlb_core_data();
+
        smp_release_cpus();
 #endif
 
@@ -919,6 +948,8 @@ void __init setup_arch(char **cmdline_p)
 #ifdef CONFIG_PPC64
        if (!radix_enabled())
                init_mm.context.slb_addr_limit = DEFAULT_MAP_WINDOW_USER64;
+#elif defined(CONFIG_PPC_8xx)
+       init_mm.context.slb_addr_limit = DEFAULT_MAP_WINDOW;
 #else
 #error "context.addr_limit not initialized."
 #endif
index 3fc11e30308fc2700658ba0bebf6142140a8705b..d144df54ad4087c441f02cb214bc22b081bfe81a 100644 (file)
@@ -45,14 +45,11 @@ void emergency_stack_init(void);
 static inline void emergency_stack_init(void) { };
 #endif
 
-#ifdef CONFIG_PPC64
-void record_spr_defaults(void);
-#else
-static inline void record_spr_defaults(void) { };
-#endif
-
 #ifdef CONFIG_PPC64
 u64 ppc64_bolted_size(void);
+
+/* Default SPR values from firmware/kexec */
+extern unsigned long spr_default_dscr;
 #endif
 
 /*
index 51ebc01fff52549e00bf565b8718cda72d74bc31..74457485574b7db3ce7fa942e425c4a9138e03a7 100644 (file)
@@ -39,6 +39,7 @@
 #include <asm/udbg.h>
 #include <asm/code-patching.h>
 #include <asm/cpu_has_feature.h>
+#include <asm/asm-prototypes.h>
 
 #define DBG(fmt...)
 
@@ -121,7 +122,7 @@ notrace void __init machine_init(u64 dt_ptr)
 }
 
 /* Checks "l2cr=xxxx" command-line option */
-int __init ppc_setup_l2cr(char *str)
+static int __init ppc_setup_l2cr(char *str)
 {
        if (cpu_has_feature(CPU_FTR_L2CR)) {
                unsigned long val = simple_strtoul(str, NULL, 0);
@@ -134,7 +135,7 @@ int __init ppc_setup_l2cr(char *str)
 __setup("l2cr=", ppc_setup_l2cr);
 
 /* Checks "l3cr=xxxx" command-line option */
-int __init ppc_setup_l3cr(char *str)
+static int __init ppc_setup_l3cr(char *str)
 {
        if (cpu_has_feature(CPU_FTR_L3CR)) {
                unsigned long val = simple_strtoul(str, NULL, 0);
@@ -180,7 +181,7 @@ EXPORT_SYMBOL(nvram_sync);
 
 #endif /* CONFIG_NVRAM */
 
-int __init ppc_init(void)
+static int __init ppc_init(void)
 {
        /* clear the progress line */
        if (ppc_md.progress)
@@ -192,7 +193,6 @@ int __init ppc_init(void)
        }
        return 0;
 }
-
 arch_initcall(ppc_init);
 
 void __init irqstack_early_init(void)
index c388cc3357fa0e9f236277ac6f18a782a62c6bcd..66f2b6299c40bf69a4a1e5c7a31eefc61443a04c 100644 (file)
@@ -110,7 +110,7 @@ void __init setup_tlb_core_data(void)
                if (cpu_first_thread_sibling(boot_cpuid) == first)
                        first = boot_cpuid;
 
-               paca[cpu].tcd_ptr = &paca[first].tcd;
+               paca_ptrs[cpu]->tcd_ptr = &paca_ptrs[first]->tcd;
 
                /*
                 * If we have threads, we need either tlbsrx.
@@ -254,6 +254,14 @@ static void cpu_ready_for_interrupts(void)
        get_paca()->kernel_msr = MSR_KERNEL;
 }
 
+unsigned long spr_default_dscr = 0;
+
+void __init record_spr_defaults(void)
+{
+       if (early_cpu_has_feature(CPU_FTR_DSCR))
+               spr_default_dscr = mfspr(SPRN_DSCR);
+}
+
 /*
  * Early initialization entry point. This is called by head.S
  * with MMU translation disabled. We rely on the "feature" of
@@ -304,7 +312,11 @@ void __init early_setup(unsigned long dt_ptr)
        early_init_devtree(__va(dt_ptr));
 
        /* Now we know the logical id of our boot cpu, setup the paca. */
-       setup_paca(&paca[boot_cpuid]);
+       if (boot_cpuid != 0) {
+               /* Poison paca_ptrs[0] again if it's not the boot cpu */
+               memset(&paca_ptrs[0], 0x88, sizeof(paca_ptrs[0]));
+       }
+       setup_paca(paca_ptrs[boot_cpuid]);
        fixup_boot_paca();
 
        /*
@@ -599,6 +611,21 @@ __init u64 ppc64_bolted_size(void)
 #endif
 }
 
+static void *__init alloc_stack(unsigned long limit, int cpu)
+{
+       unsigned long pa;
+
+       pa = memblock_alloc_base_nid(THREAD_SIZE, THREAD_SIZE, limit,
+                                       early_cpu_to_node(cpu), MEMBLOCK_NONE);
+       if (!pa) {
+               pa = memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit);
+               if (!pa)
+                       panic("cannot allocate stacks");
+       }
+
+       return __va(pa);
+}
+
 void __init irqstack_early_init(void)
 {
        u64 limit = ppc64_bolted_size();
@@ -610,12 +637,8 @@ void __init irqstack_early_init(void)
         * accessed in realmode.
         */
        for_each_possible_cpu(i) {
-               softirq_ctx[i] = (struct thread_info *)
-                       __va(memblock_alloc_base(THREAD_SIZE,
-                                           THREAD_SIZE, limit));
-               hardirq_ctx[i] = (struct thread_info *)
-                       __va(memblock_alloc_base(THREAD_SIZE,
-                                           THREAD_SIZE, limit));
+               softirq_ctx[i] = alloc_stack(limit, i);
+               hardirq_ctx[i] = alloc_stack(limit, i);
        }
 }
 
@@ -623,20 +646,21 @@ void __init irqstack_early_init(void)
 void __init exc_lvl_early_init(void)
 {
        unsigned int i;
-       unsigned long sp;
 
        for_each_possible_cpu(i) {
-               sp = memblock_alloc(THREAD_SIZE, THREAD_SIZE);
-               critirq_ctx[i] = (struct thread_info *)__va(sp);
-               paca[i].crit_kstack = __va(sp + THREAD_SIZE);
+               void *sp;
+
+               sp = alloc_stack(ULONG_MAX, i);
+               critirq_ctx[i] = sp;
+               paca_ptrs[i]->crit_kstack = sp + THREAD_SIZE;
 
-               sp = memblock_alloc(THREAD_SIZE, THREAD_SIZE);
-               dbgirq_ctx[i] = (struct thread_info *)__va(sp);
-               paca[i].dbg_kstack = __va(sp + THREAD_SIZE);
+               sp = alloc_stack(ULONG_MAX, i);
+               dbgirq_ctx[i] = sp;
+               paca_ptrs[i]->dbg_kstack = sp + THREAD_SIZE;
 
-               sp = memblock_alloc(THREAD_SIZE, THREAD_SIZE);
-               mcheckirq_ctx[i] = (struct thread_info *)__va(sp);
-               paca[i].mc_kstack = __va(sp + THREAD_SIZE);
+               sp = alloc_stack(ULONG_MAX, i);
+               mcheckirq_ctx[i] = sp;
+               paca_ptrs[i]->mc_kstack = sp + THREAD_SIZE;
        }
 
        if (cpu_has_feature(CPU_FTR_DEBUG_LVL_EXC))
@@ -690,23 +714,24 @@ void __init emergency_stack_init(void)
 
        for_each_possible_cpu(i) {
                struct thread_info *ti;
-               ti = __va(memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit));
+
+               ti = alloc_stack(limit, i);
                memset(ti, 0, THREAD_SIZE);
                emerg_stack_init_thread_info(ti, i);
-               paca[i].emergency_sp = (void *)ti + THREAD_SIZE;
+               paca_ptrs[i]->emergency_sp = (void *)ti + THREAD_SIZE;
 
 #ifdef CONFIG_PPC_BOOK3S_64
                /* emergency stack for NMI exception handling. */
-               ti = __va(memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit));
+               ti = alloc_stack(limit, i);
                memset(ti, 0, THREAD_SIZE);
                emerg_stack_init_thread_info(ti, i);
-               paca[i].nmi_emergency_sp = (void *)ti + THREAD_SIZE;
+               paca_ptrs[i]->nmi_emergency_sp = (void *)ti + THREAD_SIZE;
 
                /* emergency stack for machine check exception handling. */
-               ti = __va(memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit));
+               ti = alloc_stack(limit, i);
                memset(ti, 0, THREAD_SIZE);
                emerg_stack_init_thread_info(ti, i);
-               paca[i].mc_emergency_sp = (void *)ti + THREAD_SIZE;
+               paca_ptrs[i]->mc_emergency_sp = (void *)ti + THREAD_SIZE;
 #endif
        }
 }
@@ -762,7 +787,7 @@ void __init setup_per_cpu_areas(void)
        delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start;
        for_each_possible_cpu(cpu) {
                 __per_cpu_offset[cpu] = delta + pcpu_unit_offsets[cpu];
-               paca[cpu].data_offset = __per_cpu_offset[cpu];
+               paca_ptrs[cpu]->data_offset = __per_cpu_offset[cpu];
        }
 }
 #endif
@@ -846,9 +871,6 @@ static void do_nothing(void *unused)
 
 void rfi_flush_enable(bool enable)
 {
-       if (rfi_flush == enable)
-               return;
-
        if (enable) {
                do_rfi_flush_fixups(enabled_flush_types);
                on_each_cpu(do_nothing, NULL, 1);
@@ -863,6 +885,10 @@ static void init_fallback_flush(void)
        u64 l1d_size, limit;
        int cpu;
 
+       /* Only allocate the fallback flush area once (at boot time). */
+       if (l1d_flush_fallback_area)
+               return;
+
        l1d_size = ppc64_caches.l1d.size;
        limit = min(ppc64_bolted_size(), ppc64_rma_size);
 
@@ -875,23 +901,24 @@ static void init_fallback_flush(void)
        memset(l1d_flush_fallback_area, 0, l1d_size * 2);
 
        for_each_possible_cpu(cpu) {
-               paca[cpu].rfi_flush_fallback_area = l1d_flush_fallback_area;
-               paca[cpu].l1d_flush_size = l1d_size;
+               struct paca_struct *paca = paca_ptrs[cpu];
+               paca->rfi_flush_fallback_area = l1d_flush_fallback_area;
+               paca->l1d_flush_size = l1d_size;
        }
 }
 
-void __init setup_rfi_flush(enum l1d_flush_type types, bool enable)
+void setup_rfi_flush(enum l1d_flush_type types, bool enable)
 {
        if (types & L1D_FLUSH_FALLBACK) {
-               pr_info("rfi-flush: Using fallback displacement flush\n");
+               pr_info("rfi-flush: fallback displacement flush available\n");
                init_fallback_flush();
        }
 
        if (types & L1D_FLUSH_ORI)
-               pr_info("rfi-flush: Using ori type flush\n");
+               pr_info("rfi-flush: ori type flush available\n");
 
        if (types & L1D_FLUSH_MTTRIG)
-               pr_info("rfi-flush: Using mttrig type flush\n");
+               pr_info("rfi-flush: mttrig type flush available\n");
 
        enabled_flush_types = types;
 
@@ -902,13 +929,19 @@ void __init setup_rfi_flush(enum l1d_flush_type types, bool enable)
 #ifdef CONFIG_DEBUG_FS
 static int rfi_flush_set(void *data, u64 val)
 {
+       bool enable;
+
        if (val == 1)
-               rfi_flush_enable(true);
+               enable = true;
        else if (val == 0)
-               rfi_flush_enable(false);
+               enable = false;
        else
                return -EINVAL;
 
+       /* Only do anything if we're changing state */
+       if (enable != rfi_flush)
+               rfi_flush_enable(enable);
+
        return 0;
 }
 
@@ -927,12 +960,4 @@ static __init int rfi_flush_debugfs_init(void)
 }
 device_initcall(rfi_flush_debugfs_init);
 #endif
-
-ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf)
-{
-       if (rfi_flush)
-               return sprintf(buf, "Mitigation: RFI Flush\n");
-
-       return sprintf(buf, "Vulnerable\n");
-}
 #endif /* CONFIG_PPC_BOOK3S_64 */
index 7c59d88b9d8636cdd6f94d4137cd2db8632dfd4b..a6467f843acffd4f7eb084f86426023add0dc903 100644 (file)
@@ -49,6 +49,11 @@ extern int handle_rt_signal64(struct ksignal *ksig, sigset_t *set,
 
 #else /* CONFIG_PPC64 */
 
+extern long sys_rt_sigreturn(int r3, int r4, int r5, int r6, int r7, int r8,
+                    struct pt_regs *regs);
+extern long sys_sigreturn(int r3, int r4, int r5, int r6, int r7, int r8,
+                      struct pt_regs *regs);
+
 static inline int handle_rt_signal64(struct ksignal *ksig, sigset_t *set,
                                     struct task_struct *tsk)
 {
index a46de0035214dc94772960245861a7bb43f16b8c..492f03451877e99678ed7a625769dcf6a283f0b2 100644 (file)
@@ -1045,7 +1045,7 @@ long sys_swapcontext(struct ucontext __user *old_ctx,
                     struct ucontext __user *new_ctx,
                     int ctx_size, int r6, int r7, int r8, struct pt_regs *regs)
 {
-       unsigned char tmp;
+       unsigned char tmp __maybe_unused;
        int ctx_has_vsx_region = 0;
 
 #ifdef CONFIG_PPC64
@@ -1231,7 +1231,7 @@ int sys_debug_setcontext(struct ucontext __user *ctx,
 {
        struct sig_dbg_op op;
        int i;
-       unsigned char tmp;
+       unsigned char tmp __maybe_unused;
        unsigned long new_msr = regs->msr;
 #ifdef CONFIG_PPC_ADV_DEBUG_REGS
        unsigned long new_dbcr0 = current->thread.debug.dbcr0;
index bbe7634b3a43538cbbcd11e03cc515aba99b0deb..e16ec7b3b427ea2d2ce2ca0a1229b16df51dc0e1 100644 (file)
@@ -123,8 +123,8 @@ int smp_generic_kick_cpu(int nr)
         * cpu_start field to become non-zero After we set cpu_start,
         * the processor will continue on to secondary_start
         */
-       if (!paca[nr].cpu_start) {
-               paca[nr].cpu_start = 1;
+       if (!paca_ptrs[nr]->cpu_start) {
+               paca_ptrs[nr]->cpu_start = 1;
                smp_mb();
                return 0;
        }
@@ -565,19 +565,28 @@ void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *))
 }
 #endif
 
+#ifdef CONFIG_NMI_IPI
+static void stop_this_cpu(struct pt_regs *regs)
+#else
 static void stop_this_cpu(void *dummy)
+#endif
 {
        /* Remove this CPU */
        set_cpu_online(smp_processor_id(), false);
 
-       local_irq_disable();
+       hard_irq_disable();
+       spin_begin();
        while (1)
-               ;
+               spin_cpu_relax();
 }
 
 void smp_send_stop(void)
 {
+#ifdef CONFIG_NMI_IPI
+       smp_send_nmi_ipi(NMI_IPI_ALL_OTHERS, stop_this_cpu, 1000000);
+#else
        smp_call_function(stop_this_cpu, NULL, 0);
+#endif
 }
 
 struct thread_info *current_set[NR_CPUS];
@@ -657,7 +666,7 @@ void smp_prepare_boot_cpu(void)
 {
        BUG_ON(smp_processor_id() != boot_cpuid);
 #ifdef CONFIG_PPC64
-       paca[boot_cpuid].__current = current;
+       paca_ptrs[boot_cpuid]->__current = current;
 #endif
        set_numa_node(numa_cpu_lookup_table[boot_cpuid]);
        current_set[boot_cpuid] = task_thread_info(current);
@@ -748,8 +757,8 @@ static void cpu_idle_thread_init(unsigned int cpu, struct task_struct *idle)
        struct thread_info *ti = task_thread_info(idle);
 
 #ifdef CONFIG_PPC64
-       paca[cpu].__current = idle;
-       paca[cpu].kstack = (unsigned long)ti + THREAD_SIZE - STACK_FRAME_OVERHEAD;
+       paca_ptrs[cpu]->__current = idle;
+       paca_ptrs[cpu]->kstack = (unsigned long)ti + THREAD_SIZE - STACK_FRAME_OVERHEAD;
 #endif
        ti->cpu = cpu;
        secondary_ti = current_set[cpu] = ti;
index 04d0bbd7a1dd03e13e47e4c5e10a647672955ea3..755dc98a57ae050e49acea53b0806c4ebcaca8c0 100644 (file)
@@ -20,6 +20,7 @@
 #include <asm/firmware.h>
 
 #include "cacheinfo.h"
+#include "setup.h"
 
 #ifdef CONFIG_PPC64
 #include <asm/paca.h>
@@ -588,21 +589,18 @@ static DEVICE_ATTR(dscr_default, 0600,
 
 static void sysfs_create_dscr_default(void)
 {
-       int err = 0;
-       if (cpu_has_feature(CPU_FTR_DSCR))
-               err = device_create_file(cpu_subsys.dev_root, &dev_attr_dscr_default);
-}
+       if (cpu_has_feature(CPU_FTR_DSCR)) {
+               int err = 0;
+               int cpu;
 
-void __init record_spr_defaults(void)
-{
-       int cpu;
+               dscr_default = spr_default_dscr;
+               for_each_possible_cpu(cpu)
+                       paca_ptrs[cpu]->dscr_default = dscr_default;
 
-       if (cpu_has_feature(CPU_FTR_DSCR)) {
-               dscr_default = mfspr(SPRN_DSCR);
-               for (cpu = 0; cpu < nr_cpu_ids; cpu++)
-                       paca[cpu].dscr_default = dscr_default;
+               err = device_create_file(cpu_subsys.dev_root, &dev_attr_dscr_default);
        }
 }
+
 #endif /* CONFIG_PPC64 */
 
 #ifdef HAS_PPC_PMC_PA6T
index a32823dcd9a4d24444cb5291f17ec79bc7fe23ef..360e71d455ccddb6a5b93cf64450d921d2f6122c 100644 (file)
@@ -266,6 +266,9 @@ void accumulate_stolen_time(void)
 
 static inline u64 calculate_stolen_time(u64 stop_tb)
 {
+       if (!firmware_has_feature(FW_FEATURE_SPLPAR))
+               return 0;
+
        if (get_paca()->dtl_ridx != be64_to_cpu(get_lppaca()->dtl_idx))
                return scan_dispatch_log(stop_tb);
 
@@ -1234,7 +1237,7 @@ void calibrate_delay(void)
 static int rtc_generic_get_time(struct device *dev, struct rtc_time *tm)
 {
        ppc_md.get_rtc_time(tm);
-       return rtc_valid_tm(tm);
+       return 0;
 }
 
 static int rtc_generic_set_time(struct device *dev, struct rtc_time *tm)
index 1e48d157196a6157b014d0ed6046cfde92d40777..a2ef0c0e6c315bf5b833d1d3c1d140d15a178b2e 100644 (file)
@@ -208,6 +208,12 @@ static void oops_end(unsigned long flags, struct pt_regs *regs,
        }
        raw_local_irq_restore(flags);
 
+       /*
+        * system_reset_excption handles debugger, crash dump, panic, for 0x100
+        */
+       if (TRAP(regs) == 0x100)
+               return;
+
        crash_fadump(regs, "die oops");
 
        if (kexec_should_crash(current))
@@ -272,8 +278,13 @@ void die(const char *str, struct pt_regs *regs, long err)
 {
        unsigned long flags;
 
-       if (debugger(regs))
-               return;
+       /*
+        * system_reset_excption handles debugger, crash dump, panic, for 0x100
+        */
+       if (TRAP(regs) != 0x100) {
+               if (debugger(regs))
+                       return;
+       }
 
        flags = oops_begin(regs);
        if (__die(str, regs, err))
@@ -460,7 +471,7 @@ static inline int check_io_access(struct pt_regs *regs)
 /* single-step stuff */
 #define single_stepping(regs)  (current->thread.debug.dbcr0 & DBCR0_IC)
 #define clear_single_step(regs)        (current->thread.debug.dbcr0 &= ~DBCR0_IC)
-
+#define clear_br_trace(regs)   do {} while(0)
 #else
 /* On non-4xx, the reason for the machine check or program
    exception is in the MSR. */
@@ -473,6 +484,7 @@ static inline int check_io_access(struct pt_regs *regs)
 
 #define single_stepping(regs)  ((regs)->msr & MSR_SE)
 #define clear_single_step(regs)        ((regs)->msr &= ~MSR_SE)
+#define clear_br_trace(regs)   ((regs)->msr &= ~MSR_BE)
 #endif
 
 #if defined(CONFIG_E500)
@@ -988,6 +1000,7 @@ void single_step_exception(struct pt_regs *regs)
        enum ctx_state prev_state = exception_enter();
 
        clear_single_step(regs);
+       clear_br_trace(regs);
 
        if (kprobe_post_handler(regs))
                return;
@@ -1495,18 +1508,6 @@ void alignment_exception(struct pt_regs *regs)
        exception_exit(prev_state);
 }
 
-void slb_miss_bad_addr(struct pt_regs *regs)
-{
-       enum ctx_state prev_state = exception_enter();
-
-       if (user_mode(regs))
-               _exception(SIGSEGV, regs, SEGV_BNDERR, regs->dar);
-       else
-               bad_page_fault(regs, regs->dar, SIGSEGV);
-
-       exception_exit(prev_state);
-}
-
 void StackOverflow(struct pt_regs *regs)
 {
        printk(KERN_CRIT "Kernel stack overflow in process %p, r1=%lx\n",
index 22b01a3962f06dd30618b72d283546b0d9d7ac09..b44ec104a5a16178662f021b768106bb43cda8dd 100644 (file)
@@ -99,26 +99,28 @@ static struct vdso_patch_def vdso_patches[] = {
                CPU_FTR_COHERENT_ICACHE, CPU_FTR_COHERENT_ICACHE,
                "__kernel_sync_dicache", "__kernel_sync_dicache_p5"
        },
+#ifdef CONFIG_PPC32
        {
-               CPU_FTR_USE_TB, 0,
+               CPU_FTR_USE_RTC, CPU_FTR_USE_RTC,
                "__kernel_gettimeofday", NULL
        },
        {
-               CPU_FTR_USE_TB, 0,
+               CPU_FTR_USE_RTC, CPU_FTR_USE_RTC,
                "__kernel_clock_gettime", NULL
        },
        {
-               CPU_FTR_USE_TB, 0,
+               CPU_FTR_USE_RTC, CPU_FTR_USE_RTC,
                "__kernel_clock_getres", NULL
        },
        {
-               CPU_FTR_USE_TB, 0,
+               CPU_FTR_USE_RTC, CPU_FTR_USE_RTC,
                "__kernel_get_tbfreq", NULL
        },
        {
-               CPU_FTR_USE_TB, 0,
+               CPU_FTR_USE_RTC, CPU_FTR_USE_RTC,
                "__kernel_time", NULL
        },
+#endif
 };
 
 /*
index 85ba80de713330e78fa48617ef26fc24658689f6..4b19da8c87aedfac4435c68e18f797c197e834a5 100644 (file)
@@ -74,9 +74,15 @@ kvm-hv-y += \
        book3s_64_mmu_hv.o \
        book3s_64_mmu_radix.o
 
+kvm-hv-$(CONFIG_PPC_TRANSACTIONAL_MEM) += \
+       book3s_hv_tm.o
+
 kvm-book3s_64-builtin-xics-objs-$(CONFIG_KVM_XICS) := \
        book3s_hv_rm_xics.o book3s_hv_rm_xive.o
 
+kvm-book3s_64-builtin-tm-objs-$(CONFIG_PPC_TRANSACTIONAL_MEM) += \
+       book3s_hv_tm_builtin.o
+
 ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
 kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) += \
        book3s_hv_hmi.o \
@@ -84,6 +90,7 @@ kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) += \
        book3s_hv_rm_mmu.o \
        book3s_hv_ras.o \
        book3s_hv_builtin.o \
+       $(kvm-book3s_64-builtin-tm-objs-y) \
        $(kvm-book3s_64-builtin-xics-objs-y)
 endif
 
index 9cb9448163c4bf7021822d6632fb6c94452187ed..81e2ea882d977bd338445565f2a78257e274abf1 100644 (file)
@@ -49,6 +49,7 @@
 #include <asm/reg.h>
 #include <asm/ppc-opcode.h>
 #include <asm/asm-prototypes.h>
+#include <asm/debug.h>
 #include <asm/disassemble.h>
 #include <asm/cputable.h>
 #include <asm/cacheflush.h>
@@ -170,7 +171,7 @@ static bool kvmppc_ipi_thread(int cpu)
 
 #if defined(CONFIG_PPC_ICP_NATIVE) && defined(CONFIG_SMP)
        if (cpu >= 0 && cpu < nr_cpu_ids) {
-               if (paca[cpu].kvm_hstate.xics_phys) {
+               if (paca_ptrs[cpu]->kvm_hstate.xics_phys) {
                        xics_wake_cpu(cpu);
                        return true;
                }
@@ -498,7 +499,8 @@ static unsigned long do_h_register_vpa(struct kvm_vcpu *vcpu,
                 * use 640 bytes of the structure though, so we should accept
                 * clients that set a size of 640.
                 */
-               if (len < 640)
+               BUILD_BUG_ON(sizeof(struct lppaca) != 640);
+               if (len < sizeof(struct lppaca))
                        break;
                vpap = &tvcpu->arch.vpa;
                err = 0;
@@ -741,6 +743,8 @@ static int kvmppc_h_set_mode(struct kvm_vcpu *vcpu, unsigned long mflags,
        case H_SET_MODE_RESOURCE_SET_DAWR:
                if (!kvmppc_power8_compatible(vcpu))
                        return H_P2;
+               if (!ppc_breakpoint_available())
+                       return H_P2;
                if (mflags)
                        return H_UNSUPPORTED_FLAG_START;
                if (value2 & DABRX_HYP)
@@ -1206,6 +1210,19 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
                        r = RESUME_GUEST;
                }
                break;
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+       case BOOK3S_INTERRUPT_HV_SOFTPATCH:
+               /*
+                * This occurs for various TM-related instructions that
+                * we need to emulate on POWER9 DD2.2.  We have already
+                * handled the cases where the guest was in real-suspend
+                * mode and was transitioning to transactional state.
+                */
+               r = kvmhv_p9_tm_emulation(vcpu);
+               break;
+#endif
+
        case BOOK3S_INTERRUPT_HV_RM_HARD:
                r = RESUME_PASSTHROUGH;
                break;
@@ -1978,7 +1995,9 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm,
         * turn off the HFSCR bit, which causes those instructions to trap.
         */
        vcpu->arch.hfscr = mfspr(SPRN_HFSCR);
-       if (!cpu_has_feature(CPU_FTR_TM))
+       if (cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST))
+               vcpu->arch.hfscr |= HFSCR_TM;
+       else if (!cpu_has_feature(CPU_FTR_TM_COMP))
                vcpu->arch.hfscr &= ~HFSCR_TM;
        if (cpu_has_feature(CPU_FTR_ARCH_300))
                vcpu->arch.hfscr &= ~HFSCR_MSGP;
@@ -2140,7 +2159,7 @@ static int kvmppc_grab_hwthread(int cpu)
        struct paca_struct *tpaca;
        long timeout = 10000;
 
-       tpaca = &paca[cpu];
+       tpaca = paca_ptrs[cpu];
 
        /* Ensure the thread won't go into the kernel if it wakes */
        tpaca->kvm_hstate.kvm_vcpu = NULL;
@@ -2173,7 +2192,7 @@ static void kvmppc_release_hwthread(int cpu)
 {
        struct paca_struct *tpaca;
 
-       tpaca = &paca[cpu];
+       tpaca = paca_ptrs[cpu];
        tpaca->kvm_hstate.hwthread_req = 0;
        tpaca->kvm_hstate.kvm_vcpu = NULL;
        tpaca->kvm_hstate.kvm_vcore = NULL;
@@ -2239,9 +2258,10 @@ static void kvmppc_start_thread(struct kvm_vcpu *vcpu, struct kvmppc_vcore *vc)
                vcpu->arch.thread_cpu = cpu;
                cpumask_set_cpu(cpu, &kvm->arch.cpu_in_guest);
        }
-       tpaca = &paca[cpu];
+       tpaca = paca_ptrs[cpu];
        tpaca->kvm_hstate.kvm_vcpu = vcpu;
        tpaca->kvm_hstate.ptid = cpu - vc->pcpu;
+       tpaca->kvm_hstate.fake_suspend = 0;
        /* Order stores to hstate.kvm_vcpu etc. before store to kvm_vcore */
        smp_wmb();
        tpaca->kvm_hstate.kvm_vcore = vc;
@@ -2264,7 +2284,7 @@ static void kvmppc_wait_for_nap(int n_threads)
                 * for any threads that still have a non-NULL vcore ptr.
                 */
                for (i = 1; i < n_threads; ++i)
-                       if (paca[cpu + i].kvm_hstate.kvm_vcore)
+                       if (paca_ptrs[cpu + i]->kvm_hstate.kvm_vcore)
                                break;
                if (i == n_threads) {
                        HMT_medium();
@@ -2274,7 +2294,7 @@ static void kvmppc_wait_for_nap(int n_threads)
        }
        HMT_medium();
        for (i = 1; i < n_threads; ++i)
-               if (paca[cpu + i].kvm_hstate.kvm_vcore)
+               if (paca_ptrs[cpu + i]->kvm_hstate.kvm_vcore)
                        pr_err("KVM: CPU %d seems to be stuck\n", cpu + i);
 }
 
@@ -2806,9 +2826,11 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
        }
 
        for (thr = 0; thr < controlled_threads; ++thr) {
-               paca[pcpu + thr].kvm_hstate.tid = thr;
-               paca[pcpu + thr].kvm_hstate.napping = 0;
-               paca[pcpu + thr].kvm_hstate.kvm_split_mode = sip;
+               struct paca_struct *paca = paca_ptrs[pcpu + thr];
+
+               paca->kvm_hstate.tid = thr;
+               paca->kvm_hstate.napping = 0;
+               paca->kvm_hstate.kvm_split_mode = sip;
        }
 
        /* Initiate micro-threading (split-core) on POWER8 if required */
@@ -2923,7 +2945,9 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
        } else if (hpt_on_radix) {
                /* Wait for all threads to have seen final sync */
                for (thr = 1; thr < controlled_threads; ++thr) {
-                       while (paca[pcpu + thr].kvm_hstate.kvm_split_mode) {
+                       struct paca_struct *paca = paca_ptrs[pcpu + thr];
+
+                       while (paca->kvm_hstate.kvm_split_mode) {
                                HMT_low();
                                barrier();
                        }
@@ -4388,7 +4412,7 @@ static int kvm_init_subcore_bitmap(void)
                int node = cpu_to_node(first_cpu);
 
                /* Ignore if it is already allocated. */
-               if (paca[first_cpu].sibling_subcore_state)
+               if (paca_ptrs[first_cpu]->sibling_subcore_state)
                        continue;
 
                sibling_subcore_state =
@@ -4403,7 +4427,8 @@ static int kvm_init_subcore_bitmap(void)
                for (j = 0; j < threads_per_core; j++) {
                        int cpu = first_cpu + j;
 
-                       paca[cpu].sibling_subcore_state = sibling_subcore_state;
+                       paca_ptrs[cpu]->sibling_subcore_state =
+                                               sibling_subcore_state;
                }
        }
        return 0;
@@ -4430,7 +4455,7 @@ static int kvmppc_book3s_init_hv(void)
 
        /*
         * We need a way of accessing the XICS interrupt controller,
-        * either directly, via paca[cpu].kvm_hstate.xics_phys, or
+        * either directly, via paca_ptrs[cpu]->kvm_hstate.xics_phys, or
         * indirectly, via OPAL.
         */
 #ifdef CONFIG_SMP
index 49a2c7825e045662bf1080d0888a00a0f076da6a..de18299f92b759288c13df1b3479cf6bc3403b08 100644 (file)
@@ -251,7 +251,7 @@ void kvmhv_rm_send_ipi(int cpu)
            return;
 
        /* Else poke the target with an IPI */
-       xics_phys = paca[cpu].kvm_hstate.xics_phys;
+       xics_phys = paca_ptrs[cpu]->kvm_hstate.xics_phys;
        if (xics_phys)
                __raw_rm_writeb(IPI_PRIORITY, xics_phys + XICS_MFRR);
        else
index dc54373c878010b6419f02568ad8626b53c474c8..0e84930332889507ce2461986c080aadd2bb14b2 100644 (file)
@@ -79,8 +79,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
        li      r5, 0
        mtspr   SPRN_MMCRA, r5
        isync
-       ld      r3, PACALPPACAPTR(r13)  /* is the host using the PMU? */
-       lbz     r5, LPPACA_PMCINUSE(r3)
+       lbz     r5, PACA_PMCINUSE(r13)  /* is the host using the PMU? */
        cmpwi   r5, 0
        beq     31f                     /* skip if not */
        mfspr   r5, SPRN_MMCR1
index f86a20270e508ef7108acc4fe02e556d40ff2378..bd63fa8a08b5dd3edd0800ac34e483a8501f401c 100644 (file)
@@ -113,8 +113,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
        mtspr   SPRN_SPRG_VDSO_WRITE,r3
 
        /* Reload the host's PMU registers */
-       ld      r3, PACALPPACAPTR(r13)  /* is the host using the PMU? */
-       lbz     r4, LPPACA_PMCINUSE(r3)
+       lbz     r4, PACA_PMCINUSE(r13) /* is the host using the PMU? */
        cmpwi   r4, 0
        beq     23f                     /* skip if not */
 BEGIN_FTR_SECTION
@@ -786,12 +785,18 @@ BEGIN_FTR_SECTION
 END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
 
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+/*
+ * Branch around the call if both CPU_FTR_TM and
+ * CPU_FTR_P9_TM_HV_ASSIST are off.
+ */
 BEGIN_FTR_SECTION
+       b       91f
+END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0)
        /*
         * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS INCLUDING CR
         */
        bl      kvmppc_restore_tm
-END_FTR_SECTION_IFSET(CPU_FTR_TM)
+91:
 #endif
 
        /* Load guest PMU registers */
@@ -885,8 +890,14 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
        ld      r6, VCPU_DAWRX(r4)
        ld      r7, VCPU_CIABR(r4)
        ld      r8, VCPU_TAR(r4)
+       /*
+        * Handle broken DAWR case by not writing it. This means we
+        * can still store the DAWR register for migration.
+        */
+BEGIN_FTR_SECTION
        mtspr   SPRN_DAWR, r5
        mtspr   SPRN_DAWRX, r6
+END_FTR_SECTION_IFSET(CPU_FTR_DAWR)
        mtspr   SPRN_CIABR, r7
        mtspr   SPRN_TAR, r8
        ld      r5, VCPU_IC(r4)
@@ -914,11 +925,14 @@ BEGIN_FTR_SECTION
        mtspr   SPRN_ACOP, r6
        mtspr   SPRN_CSIGR, r7
        mtspr   SPRN_TACR, r8
+       nop
 FTR_SECTION_ELSE
        /* POWER9-only registers */
        ld      r5, VCPU_TID(r4)
        ld      r6, VCPU_PSSCR(r4)
+       lbz     r8, HSTATE_FAKE_SUSPEND(r13)
        oris    r6, r6, PSSCR_EC@h      /* This makes stop trap to HV */
+       rldimi  r6, r8, PSSCR_FAKE_SUSPEND_LG, 63 - PSSCR_FAKE_SUSPEND_LG
        ld      r7, VCPU_HFSCR(r4)
        mtspr   SPRN_TIDR, r5
        mtspr   SPRN_PSSCR, r6
@@ -1370,6 +1384,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
        std     r3, VCPU_CTR(r9)
        std     r4, VCPU_XER(r9)
 
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+       /* For softpatch interrupt, go off and do TM instruction emulation */
+       cmpwi   r12, BOOK3S_INTERRUPT_HV_SOFTPATCH
+       beq     kvmppc_tm_emul
+#endif
+
        /* If this is a page table miss then see if it's theirs or ours */
        cmpwi   r12, BOOK3S_INTERRUPT_H_DATA_STORAGE
        beq     kvmppc_hdsi
@@ -1747,12 +1767,18 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
        bl      kvmppc_save_fp
 
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+/*
+ * Branch around the call if both CPU_FTR_TM and
+ * CPU_FTR_P9_TM_HV_ASSIST are off.
+ */
 BEGIN_FTR_SECTION
+       b       91f
+END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0)
        /*
         * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS INCLUDING CR
         */
        bl      kvmppc_save_tm
-END_FTR_SECTION_IFSET(CPU_FTR_TM)
+91:
 #endif
 
        /* Increment yield count if they have a VPA */
@@ -1852,6 +1878,10 @@ BEGIN_FTR_SECTION
        ld      r6, STACK_SLOT_DAWR(r1)
        ld      r7, STACK_SLOT_DAWRX(r1)
        mtspr   SPRN_CIABR, r5
+       /*
+        * If the DAWR doesn't work, it's ok to write these here as
+        * this value should always be zero
+       */
        mtspr   SPRN_DAWR, r6
        mtspr   SPRN_DAWRX, r7
 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
@@ -2055,6 +2085,42 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
        mtlr    r0
        blr
 
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+/*
+ * Softpatch interrupt for transactional memory emulation cases
+ * on POWER9 DD2.2.  This is early in the guest exit path - we
+ * haven't saved registers or done a treclaim yet.
+ */
+kvmppc_tm_emul:
+       /* Save instruction image in HEIR */
+       mfspr   r3, SPRN_HEIR
+       stw     r3, VCPU_HEIR(r9)
+
+       /*
+        * The cases we want to handle here are those where the guest
+        * is in real suspend mode and is trying to transition to
+        * transactional mode.
+        */
+       lbz     r0, HSTATE_FAKE_SUSPEND(r13)
+       cmpwi   r0, 0           /* keep exiting guest if in fake suspend */
+       bne     guest_exit_cont
+       rldicl  r3, r11, 64 - MSR_TS_S_LG, 62
+       cmpwi   r3, 1           /* or if not in suspend state */
+       bne     guest_exit_cont
+
+       /* Call C code to do the emulation */
+       mr      r3, r9
+       bl      kvmhv_p9_tm_emulation_early
+       nop
+       ld      r9, HSTATE_KVM_VCPU(r13)
+       li      r12, BOOK3S_INTERRUPT_HV_SOFTPATCH
+       cmpwi   r3, 0
+       beq     guest_exit_cont         /* continue exiting if not handled */
+       ld      r10, VCPU_PC(r9)
+       ld      r11, VCPU_MSR(r9)
+       b       fast_interrupt_c_return /* go back to guest if handled */
+#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
+
 /*
  * Check whether an HDSI is an HPTE not found fault or something else.
  * If it is an HPTE not found fault that is due to the guest accessing
@@ -2507,8 +2573,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
        li      r3,0
        blr
 
+2:
+BEGIN_FTR_SECTION
+       /* POWER9 with disabled DAWR */
+       li      r3, H_HARDWARE
+       blr
+END_FTR_SECTION_IFCLR(CPU_FTR_DAWR)
        /* Emulate H_SET_DABR/X on P8 for the sake of compat mode guests */
-2:     rlwimi  r5, r4, 5, DAWRX_DR | DAWRX_DW
+       rlwimi  r5, r4, 5, DAWRX_DR | DAWRX_DW
        rlwimi  r5, r4, 2, DAWRX_WT
        clrrdi  r4, r4, 3
        std     r4, VCPU_DAWR(r3)
@@ -2588,13 +2660,19 @@ _GLOBAL(kvmppc_h_cede)          /* r3 = vcpu pointer, r11 = msr, r13 = paca */
        bl      kvmppc_save_fp
 
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+/*
+ * Branch around the call if both CPU_FTR_TM and
+ * CPU_FTR_P9_TM_HV_ASSIST are off.
+ */
 BEGIN_FTR_SECTION
+       b       91f
+END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0)
        /*
         * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS INCLUDING CR
         */
        ld      r9, HSTATE_KVM_VCPU(r13)
        bl      kvmppc_save_tm
-END_FTR_SECTION_IFSET(CPU_FTR_TM)
+91:
 #endif
 
        /*
@@ -2701,12 +2779,18 @@ kvm_end_cede:
 #endif
 
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+/*
+ * Branch around the call if both CPU_FTR_TM and
+ * CPU_FTR_P9_TM_HV_ASSIST are off.
+ */
 BEGIN_FTR_SECTION
+       b       91f
+END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0)
        /*
         * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS INCLUDING CR
         */
        bl      kvmppc_restore_tm
-END_FTR_SECTION_IFSET(CPU_FTR_TM)
+91:
 #endif
 
        /* load up FP state */
@@ -3033,6 +3117,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
 kvmppc_save_tm:
        mflr    r0
        std     r0, PPC_LR_STKOFF(r1)
+       stdu    r1, -PPC_MIN_STKFRM(r1)
 
        /* Turn on TM. */
        mfmsr   r8
@@ -3047,6 +3132,24 @@ kvmppc_save_tm:
        std     r1, HSTATE_HOST_R1(r13)
        li      r3, TM_CAUSE_KVM_RESCHED
 
+BEGIN_FTR_SECTION
+       lbz     r0, HSTATE_FAKE_SUSPEND(r13) /* Were we fake suspended? */
+       cmpwi   r0, 0
+       beq     3f
+       rldicl. r8, r8, 64 - MSR_TS_S_LG, 62 /* Did we actually hrfid? */
+       beq     4f
+BEGIN_FTR_SECTION_NESTED(96)
+       bl      pnv_power9_force_smt4_catch
+END_FTR_SECTION_NESTED(CPU_FTR_P9_TM_XER_SO_BUG, CPU_FTR_P9_TM_XER_SO_BUG, 96)
+       nop
+       b       6f
+3:
+       /* Emulation of the treclaim instruction needs TEXASR before treclaim */
+       mfspr   r6, SPRN_TEXASR
+       std     r6, VCPU_ORIG_TEXASR(r9)
+6:
+END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_HV_ASSIST)
+
        /* Clear the MSR RI since r1, r13 are all going to be foobar. */
        li      r5, 0
        mtmsrd  r5, 1
@@ -3058,6 +3161,43 @@ kvmppc_save_tm:
        SET_SCRATCH0(r13)
        GET_PACA(r13)
        std     r9, PACATMSCRATCH(r13)
+
+       /* If doing TM emulation on POWER9 DD2.2, check for fake suspend mode */
+BEGIN_FTR_SECTION
+       lbz     r9, HSTATE_FAKE_SUSPEND(r13)
+       cmpwi   r9, 0
+       beq     2f
+       /*
+        * We were in fake suspend, so we are not going to save the
+        * register state as the guest checkpointed state (since
+        * we already have it), therefore we can now use any volatile GPR.
+        */
+       /* Reload stack pointer and TOC. */
+       ld      r1, HSTATE_HOST_R1(r13)
+       ld      r2, PACATOC(r13)
+       /* Set MSR RI now we have r1 and r13 back. */
+       li      r5, MSR_RI
+       mtmsrd  r5, 1
+       HMT_MEDIUM
+       ld      r6, HSTATE_DSCR(r13)
+       mtspr   SPRN_DSCR, r6
+BEGIN_FTR_SECTION_NESTED(96)
+       bl      pnv_power9_force_smt4_release
+END_FTR_SECTION_NESTED(CPU_FTR_P9_TM_XER_SO_BUG, CPU_FTR_P9_TM_XER_SO_BUG, 96)
+       nop
+
+4:
+       mfspr   r3, SPRN_PSSCR
+       /* PSSCR_FAKE_SUSPEND is a write-only bit, but clear it anyway */
+       li      r0, PSSCR_FAKE_SUSPEND
+       andc    r3, r3, r0
+       mtspr   SPRN_PSSCR, r3
+       ld      r9, HSTATE_KVM_VCPU(r13)
+       /* Don't save TEXASR, use value from last exit in real suspend state */
+       b       11f
+2:
+END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_HV_ASSIST)
+
        ld      r9, HSTATE_KVM_VCPU(r13)
 
        /* Get a few more GPRs free. */
@@ -3128,13 +3268,15 @@ kvmppc_save_tm:
         * change these outside of a transaction, so they must always be
         * context switched.
         */
+       mfspr   r7, SPRN_TEXASR
+       std     r7, VCPU_TEXASR(r9)
+11:
        mfspr   r5, SPRN_TFHAR
        mfspr   r6, SPRN_TFIAR
-       mfspr   r7, SPRN_TEXASR
        std     r5, VCPU_TFHAR(r9)
        std     r6, VCPU_TFIAR(r9)
-       std     r7, VCPU_TEXASR(r9)
 
+       addi    r1, r1, PPC_MIN_STKFRM
        ld      r0, PPC_LR_STKOFF(r1)
        mtlr    r0
        blr
@@ -3169,6 +3311,8 @@ kvmppc_restore_tm:
        mtspr   SPRN_TFIAR, r6
        mtspr   SPRN_TEXASR, r7
 
+       li      r0, 0
+       stb     r0, HSTATE_FAKE_SUSPEND(r13)
        ld      r5, VCPU_MSR(r4)
        rldicl. r5, r5, 64 - MSR_TS_S_LG, 62
        beqlr           /* TM not active in guest */
@@ -3182,6 +3326,15 @@ kvmppc_restore_tm:
        oris    r7, r7, (TEXASR_FS)@h
        mtspr   SPRN_TEXASR, r7
 
+       /*
+        * If we are doing TM emulation for the guest on a POWER9 DD2,
+        * then we don't actually do a trechkpt -- we either set up
+        * fake-suspend mode, or emulate a TM rollback.
+        */
+BEGIN_FTR_SECTION
+       b       .Ldo_tm_fake_load
+END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_HV_ASSIST)
+
        /*
         * We need to load up the checkpointed state for the guest.
         * We need to do this early as it will blow away any GPRs, VSRs and
@@ -3254,10 +3407,24 @@ kvmppc_restore_tm:
        /* Set the MSR RI since we have our registers back. */
        li      r5, MSR_RI
        mtmsrd  r5, 1
-
+9:
        ld      r0, PPC_LR_STKOFF(r1)
        mtlr    r0
        blr
+
+.Ldo_tm_fake_load:
+       cmpwi   r5, 1           /* check for suspended state */
+       bgt     10f
+       stb     r5, HSTATE_FAKE_SUSPEND(r13)
+       b       9b              /* and return */
+10:    stdu    r1, -PPC_MIN_STKFRM(r1)
+       /* guest is in transactional state, so simulate rollback */
+       mr      r3, r4
+       bl      kvmhv_emulate_tm_rollback
+       nop
+       ld      r4, HSTATE_KVM_VCPU(r13) /* our vcpu pointer has been trashed */
+       addi    r1, r1, PPC_MIN_STKFRM
+       b       9b
 #endif
 
 /*
diff --git a/arch/powerpc/kvm/book3s_hv_tm.c b/arch/powerpc/kvm/book3s_hv_tm.c
new file mode 100644 (file)
index 0000000..bf710ad
--- /dev/null
@@ -0,0 +1,216 @@
+/*
+ * Copyright 2017 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kvm_host.h>
+
+#include <asm/kvm_ppc.h>
+#include <asm/kvm_book3s.h>
+#include <asm/kvm_book3s_64.h>
+#include <asm/reg.h>
+#include <asm/ppc-opcode.h>
+
+static void emulate_tx_failure(struct kvm_vcpu *vcpu, u64 failure_cause)
+{
+       u64 texasr, tfiar;
+       u64 msr = vcpu->arch.shregs.msr;
+
+       tfiar = vcpu->arch.pc & ~0x3ull;
+       texasr = (failure_cause << 56) | TEXASR_ABORT | TEXASR_FS | TEXASR_EXACT;
+       if (MSR_TM_SUSPENDED(vcpu->arch.shregs.msr))
+               texasr |= TEXASR_SUSP;
+       if (msr & MSR_PR) {
+               texasr |= TEXASR_PR;
+               tfiar |= 1;
+       }
+       vcpu->arch.tfiar = tfiar;
+       /* Preserve ROT and TL fields of existing TEXASR */
+       vcpu->arch.texasr = (vcpu->arch.texasr & 0x3ffffff) | texasr;
+}
+
+/*
+ * This gets called on a softpatch interrupt on POWER9 DD2.2 processors.
+ * We expect to find a TM-related instruction to be emulated.  The
+ * instruction image is in vcpu->arch.emul_inst.  If the guest was in
+ * TM suspended or transactional state, the checkpointed state has been
+ * reclaimed and is in the vcpu struct.  The CPU is in virtual mode in
+ * host context.
+ */
+int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu)
+{
+       u32 instr = vcpu->arch.emul_inst;
+       u64 msr = vcpu->arch.shregs.msr;
+       u64 newmsr, bescr;
+       int ra, rs;
+
+       switch (instr & 0xfc0007ff) {
+       case PPC_INST_RFID:
+               /* XXX do we need to check for PR=0 here? */
+               newmsr = vcpu->arch.shregs.srr1;
+               /* should only get here for Sx -> T1 transition */
+               WARN_ON_ONCE(!(MSR_TM_SUSPENDED(msr) &&
+                              MSR_TM_TRANSACTIONAL(newmsr) &&
+                              (newmsr & MSR_TM)));
+               newmsr = sanitize_msr(newmsr);
+               vcpu->arch.shregs.msr = newmsr;
+               vcpu->arch.cfar = vcpu->arch.pc - 4;
+               vcpu->arch.pc = vcpu->arch.shregs.srr0;
+               return RESUME_GUEST;
+
+       case PPC_INST_RFEBB:
+               if ((msr & MSR_PR) && (vcpu->arch.vcore->pcr & PCR_ARCH_206)) {
+                       /* generate an illegal instruction interrupt */
+                       kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
+                       return RESUME_GUEST;
+               }
+               /* check EBB facility is available */
+               if (!(vcpu->arch.hfscr & HFSCR_EBB)) {
+                       /* generate an illegal instruction interrupt */
+                       kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
+                       return RESUME_GUEST;
+               }
+               if ((msr & MSR_PR) && !(vcpu->arch.fscr & FSCR_EBB)) {
+                       /* generate a facility unavailable interrupt */
+                       vcpu->arch.fscr = (vcpu->arch.fscr & ~(0xffull << 56)) |
+                               ((u64)FSCR_EBB_LG << 56);
+                       kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_FAC_UNAVAIL);
+                       return RESUME_GUEST;
+               }
+               bescr = vcpu->arch.bescr;
+               /* expect to see a S->T transition requested */
+               WARN_ON_ONCE(!(MSR_TM_SUSPENDED(msr) &&
+                              ((bescr >> 30) & 3) == 2));
+               bescr &= ~BESCR_GE;
+               if (instr & (1 << 11))
+                       bescr |= BESCR_GE;
+               vcpu->arch.bescr = bescr;
+               msr = (msr & ~MSR_TS_MASK) | MSR_TS_T;
+               vcpu->arch.shregs.msr = msr;
+               vcpu->arch.cfar = vcpu->arch.pc - 4;
+               vcpu->arch.pc = vcpu->arch.ebbrr;
+               return RESUME_GUEST;
+
+       case PPC_INST_MTMSRD:
+               /* XXX do we need to check for PR=0 here? */
+               rs = (instr >> 21) & 0x1f;
+               newmsr = kvmppc_get_gpr(vcpu, rs);
+               /* check this is a Sx -> T1 transition */
+               WARN_ON_ONCE(!(MSR_TM_SUSPENDED(msr) &&
+                              MSR_TM_TRANSACTIONAL(newmsr) &&
+                              (newmsr & MSR_TM)));
+               /* mtmsrd doesn't change LE */
+               newmsr = (newmsr & ~MSR_LE) | (msr & MSR_LE);
+               newmsr = sanitize_msr(newmsr);
+               vcpu->arch.shregs.msr = newmsr;
+               return RESUME_GUEST;
+
+       case PPC_INST_TSR:
+               /* check for PR=1 and arch 2.06 bit set in PCR */
+               if ((msr & MSR_PR) && (vcpu->arch.vcore->pcr & PCR_ARCH_206)) {
+                       /* generate an illegal instruction interrupt */
+                       kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
+                       return RESUME_GUEST;
+               }
+               /* check for TM disabled in the HFSCR or MSR */
+               if (!(vcpu->arch.hfscr & HFSCR_TM)) {
+                       /* generate an illegal instruction interrupt */
+                       kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
+                       return RESUME_GUEST;
+               }
+               if (!(msr & MSR_TM)) {
+                       /* generate a facility unavailable interrupt */
+                       vcpu->arch.fscr = (vcpu->arch.fscr & ~(0xffull << 56)) |
+                               ((u64)FSCR_TM_LG << 56);
+                       kvmppc_book3s_queue_irqprio(vcpu,
+                                               BOOK3S_INTERRUPT_FAC_UNAVAIL);
+                       return RESUME_GUEST;
+               }
+               /* Set CR0 to indicate previous transactional state */
+               vcpu->arch.cr = (vcpu->arch.cr & 0x0fffffff) |
+                       (((msr & MSR_TS_MASK) >> MSR_TS_S_LG) << 28);
+               /* L=1 => tresume, L=0 => tsuspend */
+               if (instr & (1 << 21)) {
+                       if (MSR_TM_SUSPENDED(msr))
+                               msr = (msr & ~MSR_TS_MASK) | MSR_TS_T;
+               } else {
+                       if (MSR_TM_TRANSACTIONAL(msr))
+                               msr = (msr & ~MSR_TS_MASK) | MSR_TS_S;
+               }
+               vcpu->arch.shregs.msr = msr;
+               return RESUME_GUEST;
+
+       case PPC_INST_TRECLAIM:
+               /* check for TM disabled in the HFSCR or MSR */
+               if (!(vcpu->arch.hfscr & HFSCR_TM)) {
+                       /* generate an illegal instruction interrupt */
+                       kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
+                       return RESUME_GUEST;
+               }
+               if (!(msr & MSR_TM)) {
+                       /* generate a facility unavailable interrupt */
+                       vcpu->arch.fscr = (vcpu->arch.fscr & ~(0xffull << 56)) |
+                               ((u64)FSCR_TM_LG << 56);
+                       kvmppc_book3s_queue_irqprio(vcpu,
+                                               BOOK3S_INTERRUPT_FAC_UNAVAIL);
+                       return RESUME_GUEST;
+               }
+               /* If no transaction active, generate TM bad thing */
+               if (!MSR_TM_ACTIVE(msr)) {
+                       kvmppc_core_queue_program(vcpu, SRR1_PROGTM);
+                       return RESUME_GUEST;
+               }
+               /* If failure was not previously recorded, recompute TEXASR */
+               if (!(vcpu->arch.orig_texasr & TEXASR_FS)) {
+                       ra = (instr >> 16) & 0x1f;
+                       if (ra)
+                               ra = kvmppc_get_gpr(vcpu, ra) & 0xff;
+                       emulate_tx_failure(vcpu, ra);
+               }
+
+               copy_from_checkpoint(vcpu);
+
+               /* Set CR0 to indicate previous transactional state */
+               vcpu->arch.cr = (vcpu->arch.cr & 0x0fffffff) |
+                       (((msr & MSR_TS_MASK) >> MSR_TS_S_LG) << 28);
+               vcpu->arch.shregs.msr &= ~MSR_TS_MASK;
+               return RESUME_GUEST;
+
+       case PPC_INST_TRECHKPT:
+               /* XXX do we need to check for PR=0 here? */
+               /* check for TM disabled in the HFSCR or MSR */
+               if (!(vcpu->arch.hfscr & HFSCR_TM)) {
+                       /* generate an illegal instruction interrupt */
+                       kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
+                       return RESUME_GUEST;
+               }
+               if (!(msr & MSR_TM)) {
+                       /* generate a facility unavailable interrupt */
+                       vcpu->arch.fscr = (vcpu->arch.fscr & ~(0xffull << 56)) |
+                               ((u64)FSCR_TM_LG << 56);
+                       kvmppc_book3s_queue_irqprio(vcpu,
+                                               BOOK3S_INTERRUPT_FAC_UNAVAIL);
+                       return RESUME_GUEST;
+               }
+               /* If transaction active or TEXASR[FS] = 0, bad thing */
+               if (MSR_TM_ACTIVE(msr) || !(vcpu->arch.texasr & TEXASR_FS)) {
+                       kvmppc_core_queue_program(vcpu, SRR1_PROGTM);
+                       return RESUME_GUEST;
+               }
+
+               copy_to_checkpoint(vcpu);
+
+               /* Set CR0 to indicate previous transactional state */
+               vcpu->arch.cr = (vcpu->arch.cr & 0x0fffffff) |
+                       (((msr & MSR_TS_MASK) >> MSR_TS_S_LG) << 28);
+               vcpu->arch.shregs.msr = msr | MSR_TS_S;
+               return RESUME_GUEST;
+       }
+
+       /* What should we do here? We didn't recognize the instruction */
+       WARN_ON_ONCE(1);
+       return RESUME_GUEST;
+}
diff --git a/arch/powerpc/kvm/book3s_hv_tm_builtin.c b/arch/powerpc/kvm/book3s_hv_tm_builtin.c
new file mode 100644 (file)
index 0000000..d98ccfd
--- /dev/null
@@ -0,0 +1,109 @@
+/*
+ * Copyright 2017 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kvm_host.h>
+
+#include <asm/kvm_ppc.h>
+#include <asm/kvm_book3s.h>
+#include <asm/kvm_book3s_64.h>
+#include <asm/reg.h>
+#include <asm/ppc-opcode.h>
+
+/*
+ * This handles the cases where the guest is in real suspend mode
+ * and we want to get back to the guest without dooming the transaction.
+ * The caller has checked that the guest is in real-suspend mode
+ * (MSR[TS] = S and the fake-suspend flag is not set).
+ */
+int kvmhv_p9_tm_emulation_early(struct kvm_vcpu *vcpu)
+{
+       u32 instr = vcpu->arch.emul_inst;
+       u64 newmsr, msr, bescr;
+       int rs;
+
+       switch (instr & 0xfc0007ff) {
+       case PPC_INST_RFID:
+               /* XXX do we need to check for PR=0 here? */
+               newmsr = vcpu->arch.shregs.srr1;
+               /* should only get here for Sx -> T1 transition */
+               if (!(MSR_TM_TRANSACTIONAL(newmsr) && (newmsr & MSR_TM)))
+                       return 0;
+               newmsr = sanitize_msr(newmsr);
+               vcpu->arch.shregs.msr = newmsr;
+               vcpu->arch.cfar = vcpu->arch.pc - 4;
+               vcpu->arch.pc = vcpu->arch.shregs.srr0;
+               return 1;
+
+       case PPC_INST_RFEBB:
+               /* check for PR=1 and arch 2.06 bit set in PCR */
+               msr = vcpu->arch.shregs.msr;
+               if ((msr & MSR_PR) && (vcpu->arch.vcore->pcr & PCR_ARCH_206))
+                       return 0;
+               /* check EBB facility is available */
+               if (!(vcpu->arch.hfscr & HFSCR_EBB) ||
+                   ((msr & MSR_PR) && !(mfspr(SPRN_FSCR) & FSCR_EBB)))
+                       return 0;
+               bescr = mfspr(SPRN_BESCR);
+               /* expect to see a S->T transition requested */
+               if (((bescr >> 30) & 3) != 2)
+                       return 0;
+               bescr &= ~BESCR_GE;
+               if (instr & (1 << 11))
+                       bescr |= BESCR_GE;
+               mtspr(SPRN_BESCR, bescr);
+               msr = (msr & ~MSR_TS_MASK) | MSR_TS_T;
+               vcpu->arch.shregs.msr = msr;
+               vcpu->arch.cfar = vcpu->arch.pc - 4;
+               vcpu->arch.pc = mfspr(SPRN_EBBRR);
+               return 1;
+
+       case PPC_INST_MTMSRD:
+               /* XXX do we need to check for PR=0 here? */
+               rs = (instr >> 21) & 0x1f;
+               newmsr = kvmppc_get_gpr(vcpu, rs);
+               msr = vcpu->arch.shregs.msr;
+               /* check this is a Sx -> T1 transition */
+               if (!(MSR_TM_TRANSACTIONAL(newmsr) && (newmsr & MSR_TM)))
+                       return 0;
+               /* mtmsrd doesn't change LE */
+               newmsr = (newmsr & ~MSR_LE) | (msr & MSR_LE);
+               newmsr = sanitize_msr(newmsr);
+               vcpu->arch.shregs.msr = newmsr;
+               return 1;
+
+       case PPC_INST_TSR:
+               /* we know the MSR has the TS field = S (0b01) here */
+               msr = vcpu->arch.shregs.msr;
+               /* check for PR=1 and arch 2.06 bit set in PCR */
+               if ((msr & MSR_PR) && (vcpu->arch.vcore->pcr & PCR_ARCH_206))
+                       return 0;
+               /* check for TM disabled in the HFSCR or MSR */
+               if (!(vcpu->arch.hfscr & HFSCR_TM) || !(msr & MSR_TM))
+                       return 0;
+               /* L=1 => tresume => set TS to T (0b10) */
+               if (instr & (1 << 21))
+                       vcpu->arch.shregs.msr = (msr & ~MSR_TS_MASK) | MSR_TS_T;
+               /* Set CR0 to 0b0010 */
+               vcpu->arch.cr = (vcpu->arch.cr & 0x0fffffff) | 0x20000000;
+               return 1;
+       }
+
+       return 0;
+}
+
+/*
+ * This is called when we are returning to a guest in TM transactional
+ * state.  We roll the guest state back to the checkpointed state.
+ */
+void kvmhv_emulate_tm_rollback(struct kvm_vcpu *vcpu)
+{
+       vcpu->arch.shregs.msr &= ~MSR_TS_MASK;  /* go to N state */
+       vcpu->arch.pc = vcpu->arch.tfhar;
+       copy_from_checkpoint(vcpu);
+       vcpu->arch.cr = (vcpu->arch.cr & 0x0fffffff) | 0xa0000000;
+}
index 4d8b4d6cebff24390c3921857771c34f57014a8f..fa888bfc347e6e6e10055cd7b2e36a6c5c4ecf2b 100644 (file)
@@ -45,12 +45,6 @@ void kvmppc_emulate_dec(struct kvm_vcpu *vcpu)
 #ifdef CONFIG_PPC_BOOK3S
        /* mtdec lowers the interrupt line when positive. */
        kvmppc_core_dequeue_dec(vcpu);
-
-       /* POWER4+ triggers a dec interrupt if the value is < 0 */
-       if (vcpu->arch.dec & 0x80000000) {
-               kvmppc_core_queue_dec(vcpu);
-               return;
-       }
 #endif
 
 #ifdef CONFIG_BOOKE
index 52c2053739862d2e7c53210458b0fbc9943f510b..4e387647b5af0a9fc351a675d872d96d7269d99b 100644 (file)
@@ -646,10 +646,13 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
                r = hv_enabled;
                break;
 #endif
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
        case KVM_CAP_PPC_HTM:
                r = hv_enabled &&
-                   (cur_cpu_spec->cpu_user_features2 & PPC_FEATURE2_HTM_COMP);
+                   (!!(cur_cpu_spec->cpu_user_features2 & PPC_FEATURE2_HTM) ||
+                    cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST));
                break;
+#endif
        default:
                r = 0;
                break;
index 3c29c9009bbf2c0a830d32a4fdf353ecb8a34b60..653901042ad7d094c18495f3e71d6c2aacf84e0a 100644 (file)
@@ -22,9 +22,11 @@ ifeq ($(call ld-ifversion, -lt, 225000000, y),y)
 extra-$(CONFIG_PPC64)  += crtsavres.o
 endif
 
+obj-$(CONFIG_PPC_BOOK3S_64) += copyuser_power7.o copypage_power7.o \
+                              memcpy_power7.o
+
 obj64-y        += copypage_64.o copyuser_64.o mem_64.o hweight_64.o \
-          copyuser_power7.o string_64.o copypage_power7.o memcpy_power7.o \
-          memcpy_64.o memcmp_64.o pmem.o
+          string_64.o memcpy_64.o memcmp_64.o pmem.o
 
 obj64-$(CONFIG_SMP)    += locks.o
 obj64-$(CONFIG_ALTIVEC)        += vmx-helper.o
index 4bcc9e76fb55658d78bdb803b2806371e0ad3c7d..8d5034f645f3ff599942e44a8740708c70dcf40b 100644 (file)
@@ -21,7 +21,9 @@ _GLOBAL_TOC(copy_page)
 BEGIN_FTR_SECTION
        lis     r5,PAGE_SIZE@h
 FTR_SECTION_ELSE
+#ifdef CONFIG_PPC_BOOK3S_64
        b       copypage_power7
+#endif
 ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY)
        ori     r5,r5,PAGE_SIZE@l
 BEGIN_FTR_SECTION
index ca5fc8fa7efc9207372da8d36c517a349432fa4f..8fa73b7ab20ee8a3b028e6a7ea13bfa556903a48 100644 (file)
@@ -42,8 +42,6 @@ _GLOBAL(copypage_power7)
        lis     r8,0x8000       /* GO=1 */
        clrldi  r8,r8,32
 
-.machine push
-.machine "power4"
        /* setup read stream 0  */
        dcbt    0,r4,0b01000    /* addr from */
        dcbt    0,r7,0b01010   /* length and depth from */
@@ -52,7 +50,6 @@ _GLOBAL(copypage_power7)
        dcbtst  0,r10,0b01010  /* length and depth to */
        eieio
        dcbt    0,r8,0b01010    /* all streams GO */
-.machine pop
 
 #ifdef CONFIG_ALTIVEC
        mflr    r0
index 08da06e1bd729c3374493cf570ce053fb5137e79..506677395681a9a25d304378b1602c75bb980cbc 100644 (file)
 
        .align  7
 _GLOBAL_TOC(__copy_tofrom_user)
+#ifdef CONFIG_PPC_BOOK3S_64
 BEGIN_FTR_SECTION
        nop
 FTR_SECTION_ELSE
        b       __copy_tofrom_user_power7
 ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY)
+#endif
 _GLOBAL(__copy_tofrom_user_base)
        /* first check for a whole page copy on a page boundary */
        cmpldi  cr1,r5,16
index d416a4a6657858809a9e54d0fea77cb49d8976a3..215e4760c09fff491cdfa80e0258a428aaf91b81 100644 (file)
@@ -312,8 +312,6 @@ err1;       stb     r0,0(r3)
        lis     r8,0x8000       /* GO=1 */
        clrldi  r8,r8,32
 
-.machine push
-.machine "power4"
        /* setup read stream 0 */
        dcbt    0,r6,0b01000   /* addr from */
        dcbt    0,r7,0b01010   /* length and depth from */
@@ -322,7 +320,6 @@ err1;       stb     r0,0(r3)
        dcbtst  0,r10,0b01010  /* length and depth to */
        eieio
        dcbt    0,r8,0b01010    /* all streams GO */
-.machine pop
 
        beq     cr1,.Lunwind_stack_nonvmx_copy
 
index 73697c4e34681b59ec2f1caaf22136ac07dbacf8..35f80ab7cbd83c6fbd7eca53955d4e14839ff837 100644 (file)
@@ -153,7 +153,14 @@ void do_rfi_flush_fixups(enum l1d_flush_type types)
                patch_instruction(dest + 2, instrs[2]);
        }
 
-       printk(KERN_DEBUG "rfi-flush: patched %d locations\n", i);
+       printk(KERN_DEBUG "rfi-flush: patched %d locations (%s flush)\n", i,
+               (types == L1D_FLUSH_NONE)       ? "no" :
+               (types == L1D_FLUSH_FALLBACK)   ? "fallback displacement" :
+               (types &  L1D_FLUSH_ORI)        ? (types & L1D_FLUSH_MTTRIG)
+                                                       ? "ori+mttrig type"
+                                                       : "ori type" :
+               (types &  L1D_FLUSH_MTTRIG)     ? "mttrig type"
+                                               : "unknown");
 }
 #endif /* CONFIG_PPC_BOOK3S_64 */
 
index f4d6088e2d5390b937f60e35819496dfeab17858..8d8265be1a5941745eedd80edc26c01826eccba9 100644 (file)
@@ -19,9 +19,11 @@ BEGIN_FTR_SECTION
        std     r3,-STACKFRAMESIZE+STK_REG(R31)(r1)     /* save destination pointer for return value */
 #endif
 FTR_SECTION_ELSE
+#ifdef CONFIG_PPC_BOOK3S_64
 #ifndef SELFTEST
        b       memcpy_power7
 #endif
+#endif
 ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY)
 #ifdef __LITTLE_ENDIAN__
        /* dumb little-endian memcpy that will get replaced at runtime */
index 193909abd18b3edea2170601081753f70cc39577..df7de9d3da087fc987e712bf40de3c557027c94e 100644 (file)
@@ -259,15 +259,12 @@ _GLOBAL(memcpy_power7)
        lis     r8,0x8000       /* GO=1 */
        clrldi  r8,r8,32
 
-.machine push
-.machine "power4"
        dcbt    0,r6,0b01000
        dcbt    0,r7,0b01010
        dcbtst  0,r9,0b01000
        dcbtst  0,r10,0b01010
        eieio
        dcbt    0,r8,0b01010    /* GO */
-.machine pop
 
        beq     cr1,.Lunwind_stack_nonvmx_copy
 
index 70274b7b4773a67be2bf3794e95a1976471b7b19..34d68f1b1b405c604b4a0d4bca6ddd87397c6f2d 100644 (file)
@@ -280,7 +280,7 @@ static nokprobe_inline int read_mem_aligned(unsigned long *dest,
  * Copy from userspace to a buffer, using the largest possible
  * aligned accesses, up to sizeof(long).
  */
-static int nokprobe_inline copy_mem_in(u8 *dest, unsigned long ea, int nb,
+static nokprobe_inline int copy_mem_in(u8 *dest, unsigned long ea, int nb,
                                       struct pt_regs *regs)
 {
        int err = 0;
@@ -385,7 +385,7 @@ static nokprobe_inline int write_mem_aligned(unsigned long val,
  * Copy from a buffer to userspace, using the largest possible
  * aligned accesses, up to sizeof(long).
  */
-static int nokprobe_inline copy_mem_out(u8 *dest, unsigned long ea, int nb,
+static nokprobe_inline int copy_mem_out(u8 *dest, unsigned long ea, int nb,
                                        struct pt_regs *regs)
 {
        int err = 0;
index 849f50cd62f289d82500ce05a8d97fb7829cee34..cf77d755246db6e4a57a0d41a0dd0ebc7adfb4fa 100644 (file)
@@ -192,7 +192,7 @@ void set_context(unsigned long id, pgd_t *pgd)
        mtspr(SPRN_M_TW, __pa(pgd) - offset);
 
        /* Update context */
-       mtspr(SPRN_M_CASID, id);
+       mtspr(SPRN_M_CASID, id - 1);
        /* sync */
        mb();
 }
index 697b70ad11950ab2e7f3cfdc971a9d4e33c6f6d2..7d0945bd3a61b6630b8e1958f36bd769b27eca8d 100644 (file)
@@ -112,7 +112,7 @@ int copro_calculate_slb(struct mm_struct *mm, u64 ea, struct copro_slb *slb)
                        return 1;
                psize = get_slice_psize(mm, ea);
                ssize = user_segment_size(ea);
-               vsid = get_vsid(mm->context.id, ea, ssize);
+               vsid = get_user_vsid(&mm->context, ea, ssize);
                vsidkey = SLB_VSID_USER;
                break;
        case VMALLOC_REGION_ID:
index 866446cf2d9abd5ae1b0a5ebc1076e16feca3f4c..c01d627e687ae1952c297f4f7b1225a62fc8956c 100644 (file)
@@ -297,7 +297,12 @@ static bool access_error(bool is_write, bool is_exec,
 
        if (unlikely(!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))))
                return true;
-
+       /*
+        * We should ideally do the vma pkey access check here. But in the
+        * fault path, handle_mm_fault() also does the same check. To avoid
+        * these multiple checks, we skip it here and handle access error due
+        * to pkeys later.
+        */
        return false;
 }
 
@@ -518,25 +523,16 @@ static int __do_page_fault(struct pt_regs *regs, unsigned long address,
 
 #ifdef CONFIG_PPC_MEM_KEYS
        /*
-        * if the HPTE is not hashed, hardware will not detect
-        * a key fault. Lets check if we failed because of a
-        * software detected key fault.
+        * we skipped checking for access error due to key earlier.
+        * Check that using handle_mm_fault error return.
         */
        if (unlikely(fault & VM_FAULT_SIGSEGV) &&
-               !arch_vma_access_permitted(vma, flags & FAULT_FLAG_WRITE,
-                       is_exec, 0)) {
-               /*
-                * The PGD-PDT...PMD-PTE tree may not have been fully setup.
-                * Hence we cannot walk the tree to locate the PTE, to locate
-                * the key. Hence let's use vma_pkey() to get the key; instead
-                * of get_mm_addr_key().
-                */
+               !arch_vma_access_permitted(vma, is_write, is_exec, 0)) {
+
                int pkey = vma_pkey(vma);
 
-               if (likely(pkey)) {
-                       up_read(&mm->mmap_sem);
-                       return bad_key_fault_exception(regs, address, pkey);
-               }
+               up_read(&mm->mmap_sem);
+               return bad_key_fault_exception(regs, address, pkey);
        }
 #endif /* CONFIG_PPC_MEM_KEYS */
 
index 656933c859256bff1c9f1848830c0616033218aa..1d049c78c82a957bf3346bf7269dc392f8667a99 100644 (file)
@@ -866,18 +866,6 @@ static void native_flush_hash_range(unsigned long number, int local)
        local_irq_restore(flags);
 }
 
-static int native_register_proc_table(unsigned long base, unsigned long page_size,
-                                     unsigned long table_size)
-{
-       unsigned long patb1 = base << 25; /* VSID */
-
-       patb1 |= (page_size << 5);  /* sllp */
-       patb1 |= table_size;
-
-       partition_tb->patb1 = cpu_to_be64(patb1);
-       return 0;
-}
-
 void __init hpte_init_native(void)
 {
        mmu_hash_ops.hpte_invalidate    = native_hpte_invalidate;
@@ -889,7 +877,4 @@ void __init hpte_init_native(void)
        mmu_hash_ops.hpte_clear_all     = native_hpte_clear;
        mmu_hash_ops.flush_hash_range = native_flush_hash_range;
        mmu_hash_ops.hugepage_invalidate   = native_hugepage_invalidate;
-
-       if (cpu_has_feature(CPU_FTR_ARCH_300))
-               register_process_table = native_register_proc_table;
 }
index cf290d415dcd8e9e314c63134c49cbd687e63fd7..0bd3790d35df419d1622f10754abd38be1575963 100644 (file)
@@ -132,9 +132,10 @@ EXPORT_SYMBOL(mmu_hash_ops);
  * is provided by the firmware.
  */
 
-/* Pre-POWER4 CPUs (4k pages only)
+/*
+ * Fallback (4k pages only)
  */
-static struct mmu_psize_def mmu_psize_defaults_old[] = {
+static struct mmu_psize_def mmu_psize_defaults[] = {
        [MMU_PAGE_4K] = {
                .shift  = 12,
                .sllp   = 0,
@@ -554,8 +555,8 @@ static void __init htab_scan_page_sizes(void)
        mmu_psize_set_default_penc();
 
        /* Default to 4K pages only */
-       memcpy(mmu_psize_defs, mmu_psize_defaults_old,
-              sizeof(mmu_psize_defaults_old));
+       memcpy(mmu_psize_defs, mmu_psize_defaults,
+              sizeof(mmu_psize_defaults));
 
        /*
         * Try to find the available page sizes in the device-tree
@@ -781,7 +782,7 @@ void resize_hpt_for_hotplug(unsigned long new_mem_size)
        }
 }
 
-int hash__create_section_mapping(unsigned long start, unsigned long end)
+int hash__create_section_mapping(unsigned long start, unsigned long end, int nid)
 {
        int rc = htab_bolt_mapping(start, end, __pa(start),
                                   pgprot_val(PAGE_KERNEL), mmu_linear_psize,
@@ -875,6 +876,12 @@ static void __init htab_initialize(void)
                /* Using a hypervisor which owns the htab */
                htab_address = NULL;
                _SDR1 = 0; 
+               /*
+                * On POWER9, we need to do a H_REGISTER_PROC_TBL hcall
+                * to inform the hypervisor that we wish to use the HPT.
+                */
+               if (cpu_has_feature(CPU_FTR_ARCH_300))
+                       register_process_table(0, 0, 0);
 #ifdef CONFIG_FA_DUMP
                /*
                 * If firmware assisted dump is active firmware preserves
@@ -1110,19 +1117,18 @@ unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap)
 #ifdef CONFIG_PPC_MM_SLICES
 static unsigned int get_paca_psize(unsigned long addr)
 {
-       u64 lpsizes;
-       unsigned char *hpsizes;
+       unsigned char *psizes;
        unsigned long index, mask_index;
 
        if (addr < SLICE_LOW_TOP) {
-               lpsizes = get_paca()->mm_ctx_low_slices_psize;
+               psizes = get_paca()->mm_ctx_low_slices_psize;
                index = GET_LOW_SLICE_INDEX(addr);
-               return (lpsizes >> (index * 4)) & 0xF;
+       } else {
+               psizes = get_paca()->mm_ctx_high_slices_psize;
+               index = GET_HIGH_SLICE_INDEX(addr);
        }
-       hpsizes = get_paca()->mm_ctx_high_slices_psize;
-       index = GET_HIGH_SLICE_INDEX(addr);
        mask_index = index & 0x1;
-       return (hpsizes[index >> 1] >> (mask_index * 4)) & 0xF;
+       return (psizes[index >> 1] >> (mask_index * 4)) & 0xF;
 }
 
 #else
@@ -1262,7 +1268,7 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea,
                }
                psize = get_slice_psize(mm, ea);
                ssize = user_segment_size(ea);
-               vsid = get_vsid(mm->context.id, ea, ssize);
+               vsid = get_user_vsid(&mm->context, ea, ssize);
                break;
        case VMALLOC_REGION_ID:
                vsid = get_kernel_vsid(ea, mmu_kernel_ssize);
@@ -1527,7 +1533,7 @@ void hash_preload(struct mm_struct *mm, unsigned long ea,
 
        /* Get VSID */
        ssize = user_segment_size(ea);
-       vsid = get_vsid(mm->context.id, ea, ssize);
+       vsid = get_user_vsid(&mm->context, ea, ssize);
        if (!vsid)
                return;
        /*
index 3a08d211d2ee0d4d214a2fafaaf77739f73ad5a9..f1153f8254e3d79fe34b970df96a54de7088e225 100644 (file)
@@ -122,9 +122,6 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
 #if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_8xx)
 #define HUGEPD_PGD_SHIFT PGDIR_SHIFT
 #define HUGEPD_PUD_SHIFT PUD_SHIFT
-#else
-#define HUGEPD_PGD_SHIFT PUD_SHIFT
-#define HUGEPD_PUD_SHIFT PMD_SHIFT
 #endif
 
 /*
@@ -553,9 +550,11 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
        struct hstate *hstate = hstate_file(file);
        int mmu_psize = shift_to_mmu_psize(huge_page_shift(hstate));
 
+#ifdef CONFIG_PPC_RADIX_MMU
        if (radix_enabled())
                return radix__hugetlb_get_unmapped_area(file, addr, len,
                                                       pgoff, flags);
+#endif
        return slice_get_unmapped_area(addr, len, flags, mmu_psize, 1);
 }
 #endif
@@ -563,10 +562,12 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
 unsigned long vma_mmu_pagesize(struct vm_area_struct *vma)
 {
 #ifdef CONFIG_PPC_MM_SLICES
-       unsigned int psize = get_slice_psize(vma->vm_mm, vma->vm_start);
        /* With radix we don't use slice, so derive it from vma*/
-       if (!radix_enabled())
+       if (!radix_enabled()) {
+               unsigned int psize = get_slice_psize(vma->vm_mm, vma->vm_start);
+
                return 1UL << mmu_psize_to_shift(psize);
+       }
 #endif
        return vma_kernel_pagesize(vma);
 }
@@ -663,15 +664,26 @@ static int __init hugetlbpage_init(void)
 
                shift = mmu_psize_to_shift(psize);
 
-               if (add_huge_page_size(1ULL << shift) < 0)
+#ifdef CONFIG_PPC_BOOK3S_64
+               if (shift > PGDIR_SHIFT)
                        continue;
-
+               else if (shift > PUD_SHIFT)
+                       pdshift = PGDIR_SHIFT;
+               else if (shift > PMD_SHIFT)
+                       pdshift = PUD_SHIFT;
+               else
+                       pdshift = PMD_SHIFT;
+#else
                if (shift < HUGEPD_PUD_SHIFT)
                        pdshift = PMD_SHIFT;
                else if (shift < HUGEPD_PGD_SHIFT)
                        pdshift = PUD_SHIFT;
                else
                        pdshift = PGDIR_SHIFT;
+#endif
+
+               if (add_huge_page_size(1ULL << shift) < 0)
+                       continue;
                /*
                 * if we have pdshift and shift value same, we don't
                 * use pgt cache for hugepd.
index 6419b33ca3099f5d73ffcbb451bbd2f6b8a90401..3e59e5d64b014d11b2d542e6eddc48b6a93e1b3b 100644 (file)
@@ -88,18 +88,13 @@ void MMU_init(void);
 int __map_without_bats;
 int __map_without_ltlbs;
 
-/*
- * This tells the system to allow ioremapping memory marked as reserved.
- */
-int __allow_ioremap_reserved;
-
 /* max amount of low RAM to map in */
 unsigned long __max_low_memory = MAX_LOW_MEM;
 
 /*
  * Check for command-line options that affect what MMU_init will do.
  */
-void __init MMU_setup(void)
+static void __init MMU_setup(void)
 {
        /* Check for nobats option (used in mapin_ram). */
        if (strstr(boot_command_line, "nobats")) {
index fdb424a29f0358743ed2d03907065f75a9bc1e72..51ce091914f9760b85853d7f197bceb5ef73e02a 100644 (file)
 
 #include "mmu_decl.h"
 
-#ifdef CONFIG_PPC_BOOK3S_64
-#if H_PGTABLE_RANGE > USER_VSID_RANGE
-#warning Limited user VSID range means pagetable space is wasted
-#endif
-#endif /* CONFIG_PPC_BOOK3S_64 */
-
 phys_addr_t memstart_addr = ~0;
 EXPORT_SYMBOL_GPL(memstart_addr);
 phys_addr_t kernstart_addr;
@@ -372,7 +366,7 @@ static int __init parse_disable_radix(char *p)
 {
        bool val;
 
-       if (strlen(p) == 0)
+       if (!p)
                val = true;
        else if (kstrtobool(p, &val))
                return -EINVAL;
index fe8c61149fb8230ea71e2231281beaaf79d2031b..737f8a4632ccc68abfdd61a1900380834b125340 100644 (file)
@@ -82,17 +82,7 @@ static inline pte_t *virt_to_kpte(unsigned long vaddr)
 
 int page_is_ram(unsigned long pfn)
 {
-#ifndef CONFIG_PPC64   /* XXX for now */
-       return pfn < max_pfn;
-#else
-       unsigned long paddr = (pfn << PAGE_SHIFT);
-       struct memblock_region *reg;
-
-       for_each_memblock(memory, reg)
-               if (paddr >= reg->base && paddr < (reg->base + reg->size))
-                       return 1;
-       return 0;
-#endif
+       return memblock_is_memory(__pfn_to_phys(pfn));
 }
 
 pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
@@ -117,7 +107,7 @@ int memory_add_physaddr_to_nid(u64 start)
 }
 #endif
 
-int __weak create_section_mapping(unsigned long start, unsigned long end)
+int __weak create_section_mapping(unsigned long start, unsigned long end, int nid)
 {
        return -ENODEV;
 }
@@ -127,7 +117,7 @@ int __weak remove_section_mapping(unsigned long start, unsigned long end)
        return -ENODEV;
 }
 
-int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap,
+int __meminit arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap,
                bool want_memblock)
 {
        unsigned long start_pfn = start >> PAGE_SHIFT;
@@ -137,7 +127,7 @@ int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap,
        resize_hpt_for_hotplug(memblock_phys_mem_size());
 
        start = (unsigned long)__va(start);
-       rc = create_section_mapping(start, start + size);
+       rc = create_section_mapping(start, start + size, nid);
        if (rc) {
                pr_warn("Unable to create mapping for hot added memory 0x%llx..0x%llx: %d\n",
                        start, start + size, rc);
@@ -148,7 +138,7 @@ int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap,
 }
 
 #ifdef CONFIG_MEMORY_HOTREMOVE
-int arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap)
+int __meminit arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap)
 {
        unsigned long start_pfn = start >> PAGE_SHIFT;
        unsigned long nr_pages = size >> PAGE_SHIFT;
@@ -212,7 +202,7 @@ walk_system_ram_range(unsigned long start_pfn, unsigned long nr_pages,
 EXPORT_SYMBOL_GPL(walk_system_ram_range);
 
 #ifndef CONFIG_NEED_MULTIPLE_NODES
-void __init initmem_init(void)
+void __init mem_topology_setup(void)
 {
        max_low_pfn = max_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT;
        min_low_pfn = MEMORY_START >> PAGE_SHIFT;
@@ -224,7 +214,10 @@ void __init initmem_init(void)
         * memblock_regions
         */
        memblock_set_node(0, (phys_addr_t)ULLONG_MAX, &memblock.memory, 0);
+}
 
+void __init initmem_init(void)
+{
        /* XXX need to clip this if using highmem? */
        sparse_memory_present_with_active_regions(0);
        sparse_init();
index 3f980baade4c19c27d1f6f42d4d0ee6a1893a4a7..b75194dff64c28a836876facdd1b9962fdaefd7a 100644 (file)
@@ -93,13 +93,6 @@ static int hash__init_new_context(struct mm_struct *mm)
        if (index < 0)
                return index;
 
-       /*
-        * In the case of exec, use the default limit,
-        * otherwise inherit it from the mm we are duplicating.
-        */
-       if (!mm->context.slb_addr_limit)
-               mm->context.slb_addr_limit = DEFAULT_MAP_WINDOW_USER64;
-
        /*
         * The old code would re-promote on fork, we don't do that when using
         * slices as it could cause problem promoting slices that have been
@@ -115,7 +108,7 @@ static int hash__init_new_context(struct mm_struct *mm)
         * check against 0 is OK.
         */
        if (mm->context.id == 0)
-               slice_set_user_psize(mm, mmu_virtual_psize);
+               slice_init_new_context_exec(mm);
 
        subpage_prot_init_new_context(mm);
 
@@ -186,6 +179,19 @@ void __destroy_context(int context_id)
 }
 EXPORT_SYMBOL_GPL(__destroy_context);
 
+static void destroy_contexts(mm_context_t *ctx)
+{
+       int index, context_id;
+
+       spin_lock(&mmu_context_lock);
+       for (index = 0; index < ARRAY_SIZE(ctx->extended_id); index++) {
+               context_id = ctx->extended_id[index];
+               if (context_id)
+                       ida_remove(&mmu_context_ida, context_id);
+       }
+       spin_unlock(&mmu_context_lock);
+}
+
 #ifdef CONFIG_PPC_64K_PAGES
 static void destroy_pagetable_page(struct mm_struct *mm)
 {
@@ -224,7 +230,7 @@ void destroy_context(struct mm_struct *mm)
        else
                subpage_prot_free(mm);
        destroy_pagetable_page(mm);
-       __destroy_context(mm->context.id);
+       destroy_contexts(&mm->context);
        mm->context.id = MMU_NO_CONTEXT;
 }
 
index 4554d65276826e9719e7b69529003f7873483582..be8f5c9d4d088fad3539b1ffeb9ea3fed3dc4ff7 100644 (file)
@@ -331,6 +331,17 @@ int init_new_context(struct task_struct *t, struct mm_struct *mm)
 {
        pr_hard("initing context for mm @%p\n", mm);
 
+#ifdef CONFIG_PPC_MM_SLICES
+       /*
+        * We have MMU_NO_CONTEXT set to be ~0. Hence check
+        * explicitly against context.id == 0. This ensures that we properly
+        * initialize context slice details for newly allocated mm's (which will
+        * have id == 0) and don't alter context slice inherited via fork (which
+        * will have id != 0).
+        */
+       if (mm->context.id == 0)
+               slice_init_new_context_exec(mm);
+#endif
        mm->context.id = MMU_NO_CONTEXT;
        mm->context.active = 0;
        return 0;
@@ -428,8 +439,8 @@ void __init mmu_context_init(void)
         *      -- BenH
         */
        if (mmu_has_feature(MMU_FTR_TYPE_8xx)) {
-               first_context = 0;
-               last_context = 15;
+               first_context = 1;
+               last_context = 16;
                no_selective_tlbil = true;
        } else if (mmu_has_feature(MMU_FTR_TYPE_47x)) {
                first_context = 1;
index 57fbc554c78589f4e076ccfe647a2bb6ebd9011a..c4c0a09a77752273d1b763663bcea112c343b2e8 100644 (file)
@@ -98,7 +98,6 @@ extern void setbat(int index, unsigned long virt, phys_addr_t phys,
                   unsigned int size, pgprot_t prot);
 
 extern int __map_without_bats;
-extern int __allow_ioremap_reserved;
 extern unsigned int rtas_data, rtas_size;
 
 struct hash_pte;
index edd8d0bc9364f2843688498b221d90f53647390d..57a5029b4521b0ea0f179b259ee3bef4dc0cfde5 100644 (file)
@@ -831,18 +831,13 @@ static void __init find_possible_nodes(void)
        of_node_put(rtas);
 }
 
-void __init initmem_init(void)
+void __init mem_topology_setup(void)
 {
-       int nid, cpu;
-
-       max_low_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT;
-       max_pfn = max_low_pfn;
+       int cpu;
 
        if (parse_numa_properties())
                setup_nonnuma();
 
-       memblock_dump_all();
-
        /*
         * Modify the set of possible NUMA nodes to reflect information
         * available about the set of online nodes, and the set of nodes
@@ -853,6 +848,23 @@ void __init initmem_init(void)
 
        find_possible_nodes();
 
+       setup_node_to_cpumask_map();
+
+       reset_numa_cpu_lookup_table();
+
+       for_each_present_cpu(cpu)
+               numa_setup_cpu(cpu);
+}
+
+void __init initmem_init(void)
+{
+       int nid;
+
+       max_low_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT;
+       max_pfn = max_low_pfn;
+
+       memblock_dump_all();
+
        for_each_online_node(nid) {
                unsigned long start_pfn, end_pfn;
 
@@ -863,10 +875,6 @@ void __init initmem_init(void)
 
        sparse_init();
 
-       setup_node_to_cpumask_map();
-
-       reset_numa_cpu_lookup_table();
-
        /*
         * We need the numa_cpu_lookup_table to be accurate for all CPUs,
         * even before we online them, so that we can use cpu_to_{node,mem}
@@ -876,8 +884,6 @@ void __init initmem_init(void)
         */
        cpuhp_setup_state_nocalls(CPUHP_POWER_NUMA_PREPARE, "powerpc/numa:prepare",
                                  ppc_numa_cpu_prepare, ppc_numa_cpu_dead);
-       for_each_present_cpu(cpu)
-               numa_setup_cpu(cpu);
 }
 
 static int __init early_numa(char *p)
@@ -1105,7 +1111,7 @@ static void setup_cpu_associativity_change_counters(void)
        for_each_possible_cpu(cpu) {
                int i;
                u8 *counts = vphn_cpu_change_counts[cpu];
-               volatile u8 *hypervisor_counts = lppaca[cpu].vphn_assoc_counts;
+               volatile u8 *hypervisor_counts = lppaca_of(cpu).vphn_assoc_counts;
 
                for (i = 0; i < distance_ref_points_depth; i++)
                        counts[i] = hypervisor_counts[i];
@@ -1131,7 +1137,7 @@ static int update_cpu_associativity_changes_mask(void)
        for_each_possible_cpu(cpu) {
                int i, changed = 0;
                u8 *counts = vphn_cpu_change_counts[cpu];
-               volatile u8 *hypervisor_counts = lppaca[cpu].vphn_assoc_counts;
+               volatile u8 *hypervisor_counts = lppaca_of(cpu).vphn_assoc_counts;
 
                for (i = 0; i < distance_ref_points_depth; i++) {
                        if (hypervisor_counts[i] != counts[i]) {
index 422e80253a3336380a4a42b3c72783ff447c3077..518518fb7c45afacc44dddc4a906ae46236c4066 100644 (file)
@@ -155,15 +155,15 @@ void mmu_cleanup_all(void)
 }
 
 #ifdef CONFIG_MEMORY_HOTPLUG
-int create_section_mapping(unsigned long start, unsigned long end)
+int __meminit create_section_mapping(unsigned long start, unsigned long end, int nid)
 {
        if (radix_enabled())
-               return radix__create_section_mapping(start, end);
+               return radix__create_section_mapping(start, end, nid);
 
-       return hash__create_section_mapping(start, end);
+       return hash__create_section_mapping(start, end, nid);
 }
 
-int remove_section_mapping(unsigned long start, unsigned long end)
+int __meminit remove_section_mapping(unsigned long start, unsigned long end)
 {
        if (radix_enabled())
                return radix__remove_section_mapping(start, end);
index 469808e77e58b271e716d54241a016cb711f0836..199bfda5f0d96dd8b85a567dd4966ee3fca33965 100644 (file)
 #define CREATE_TRACE_POINTS
 #include <trace/events/thp.h>
 
+#if H_PGTABLE_RANGE > (USER_VSID_RANGE * (TASK_SIZE_USER64 / TASK_CONTEXT_SIZE))
+#warning Limited user VSID range means pagetable space is wasted
+#endif
+
 #ifdef CONFIG_SPARSEMEM_VMEMMAP
 /*
  * vmemmap is the starting address of the virtual address space where
@@ -320,7 +324,7 @@ void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr,
 
        if (!is_kernel_addr(addr)) {
                ssize = user_segment_size(addr);
-               vsid = get_vsid(mm->context.id, addr, ssize);
+               vsid = get_user_vsid(&mm->context, addr, ssize);
                WARN_ON(vsid == 0);
        } else {
                vsid = get_kernel_vsid(addr, mmu_kernel_ssize);
index 2e10a964e29080149fe60b5d9a2220fdc710bb79..f1891e215e39e77994182d0e2ed6b0fa2c4b7bc5 100644 (file)
@@ -48,20 +48,88 @@ static int native_register_process_table(unsigned long base, unsigned long pg_sz
        return 0;
 }
 
-static __ref void *early_alloc_pgtable(unsigned long size)
+static __ref void *early_alloc_pgtable(unsigned long size, int nid,
+                       unsigned long region_start, unsigned long region_end)
 {
+       unsigned long pa = 0;
        void *pt;
 
-       pt = __va(memblock_alloc_base(size, size, MEMBLOCK_ALLOC_ANYWHERE));
+       if (region_start || region_end) /* has region hint */
+               pa = memblock_alloc_range(size, size, region_start, region_end,
+                                               MEMBLOCK_NONE);
+       else if (nid != -1) /* has node hint */
+               pa = memblock_alloc_base_nid(size, size,
+                                               MEMBLOCK_ALLOC_ANYWHERE,
+                                               nid, MEMBLOCK_NONE);
+
+       if (!pa)
+               pa = memblock_alloc_base(size, size, MEMBLOCK_ALLOC_ANYWHERE);
+
+       BUG_ON(!pa);
+
+       pt = __va(pa);
        memset(pt, 0, size);
 
        return pt;
 }
 
-int radix__map_kernel_page(unsigned long ea, unsigned long pa,
+static int early_map_kernel_page(unsigned long ea, unsigned long pa,
                          pgprot_t flags,
-                         unsigned int map_page_size)
+                         unsigned int map_page_size,
+                         int nid,
+                         unsigned long region_start, unsigned long region_end)
 {
+       unsigned long pfn = pa >> PAGE_SHIFT;
+       pgd_t *pgdp;
+       pud_t *pudp;
+       pmd_t *pmdp;
+       pte_t *ptep;
+
+       pgdp = pgd_offset_k(ea);
+       if (pgd_none(*pgdp)) {
+               pudp = early_alloc_pgtable(PUD_TABLE_SIZE, nid,
+                                               region_start, region_end);
+               pgd_populate(&init_mm, pgdp, pudp);
+       }
+       pudp = pud_offset(pgdp, ea);
+       if (map_page_size == PUD_SIZE) {
+               ptep = (pte_t *)pudp;
+               goto set_the_pte;
+       }
+       if (pud_none(*pudp)) {
+               pmdp = early_alloc_pgtable(PMD_TABLE_SIZE, nid,
+                                               region_start, region_end);
+               pud_populate(&init_mm, pudp, pmdp);
+       }
+       pmdp = pmd_offset(pudp, ea);
+       if (map_page_size == PMD_SIZE) {
+               ptep = pmdp_ptep(pmdp);
+               goto set_the_pte;
+       }
+       if (!pmd_present(*pmdp)) {
+               ptep = early_alloc_pgtable(PAGE_SIZE, nid,
+                                               region_start, region_end);
+               pmd_populate_kernel(&init_mm, pmdp, ptep);
+       }
+       ptep = pte_offset_kernel(pmdp, ea);
+
+set_the_pte:
+       set_pte_at(&init_mm, ea, ptep, pfn_pte(pfn, flags));
+       smp_wmb();
+       return 0;
+}
+
+/*
+ * nid, region_start, and region_end are hints to try to place the page
+ * table memory in the same node or region.
+ */
+static int __map_kernel_page(unsigned long ea, unsigned long pa,
+                         pgprot_t flags,
+                         unsigned int map_page_size,
+                         int nid,
+                         unsigned long region_start, unsigned long region_end)
+{
+       unsigned long pfn = pa >> PAGE_SHIFT;
        pgd_t *pgdp;
        pud_t *pudp;
        pmd_t *pmdp;
@@ -70,61 +138,48 @@ int radix__map_kernel_page(unsigned long ea, unsigned long pa,
         * Make sure task size is correct as per the max adddr
         */
        BUILD_BUG_ON(TASK_SIZE_USER64 > RADIX_PGTABLE_RANGE);
-       if (slab_is_available()) {
-               pgdp = pgd_offset_k(ea);
-               pudp = pud_alloc(&init_mm, pgdp, ea);
-               if (!pudp)
-                       return -ENOMEM;
-               if (map_page_size == PUD_SIZE) {
-                       ptep = (pte_t *)pudp;
-                       goto set_the_pte;
-               }
-               pmdp = pmd_alloc(&init_mm, pudp, ea);
-               if (!pmdp)
-                       return -ENOMEM;
-               if (map_page_size == PMD_SIZE) {
-                       ptep = pmdp_ptep(pmdp);
-                       goto set_the_pte;
-               }
-               ptep = pte_alloc_kernel(pmdp, ea);
-               if (!ptep)
-                       return -ENOMEM;
-       } else {
-               pgdp = pgd_offset_k(ea);
-               if (pgd_none(*pgdp)) {
-                       pudp = early_alloc_pgtable(PUD_TABLE_SIZE);
-                       BUG_ON(pudp == NULL);
-                       pgd_populate(&init_mm, pgdp, pudp);
-               }
-               pudp = pud_offset(pgdp, ea);
-               if (map_page_size == PUD_SIZE) {
-                       ptep = (pte_t *)pudp;
-                       goto set_the_pte;
-               }
-               if (pud_none(*pudp)) {
-                       pmdp = early_alloc_pgtable(PMD_TABLE_SIZE);
-                       BUG_ON(pmdp == NULL);
-                       pud_populate(&init_mm, pudp, pmdp);
-               }
-               pmdp = pmd_offset(pudp, ea);
-               if (map_page_size == PMD_SIZE) {
-                       ptep = pmdp_ptep(pmdp);
-                       goto set_the_pte;
-               }
-               if (!pmd_present(*pmdp)) {
-                       ptep = early_alloc_pgtable(PAGE_SIZE);
-                       BUG_ON(ptep == NULL);
-                       pmd_populate_kernel(&init_mm, pmdp, ptep);
-               }
-               ptep = pte_offset_kernel(pmdp, ea);
+
+       if (unlikely(!slab_is_available()))
+               return early_map_kernel_page(ea, pa, flags, map_page_size,
+                                               nid, region_start, region_end);
+
+       /*
+        * Should make page table allocation functions be able to take a
+        * node, so we can place kernel page tables on the right nodes after
+        * boot.
+        */
+       pgdp = pgd_offset_k(ea);
+       pudp = pud_alloc(&init_mm, pgdp, ea);
+       if (!pudp)
+               return -ENOMEM;
+       if (map_page_size == PUD_SIZE) {
+               ptep = (pte_t *)pudp;
+               goto set_the_pte;
+       }
+       pmdp = pmd_alloc(&init_mm, pudp, ea);
+       if (!pmdp)
+               return -ENOMEM;
+       if (map_page_size == PMD_SIZE) {
+               ptep = pmdp_ptep(pmdp);
+               goto set_the_pte;
        }
+       ptep = pte_alloc_kernel(pmdp, ea);
+       if (!ptep)
+               return -ENOMEM;
 
 set_the_pte:
-       set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT, flags));
+       set_pte_at(&init_mm, ea, ptep, pfn_pte(pfn, flags));
        smp_wmb();
        return 0;
 }
 
+int radix__map_kernel_page(unsigned long ea, unsigned long pa,
+                         pgprot_t flags,
+                         unsigned int map_page_size)
+{
+       return __map_kernel_page(ea, pa, flags, map_page_size, -1, 0, 0);
+}
+
 #ifdef CONFIG_STRICT_KERNEL_RWX
 void radix__change_memory_range(unsigned long start, unsigned long end,
                                unsigned long clear)
@@ -211,7 +266,8 @@ static inline void __meminit print_mapping(unsigned long start,
 }
 
 static int __meminit create_physical_mapping(unsigned long start,
-                                            unsigned long end)
+                                            unsigned long end,
+                                            int nid)
 {
        unsigned long vaddr, addr, mapping_size = 0;
        pgprot_t prot;
@@ -267,7 +323,7 @@ static int __meminit create_physical_mapping(unsigned long start,
                else
                        prot = PAGE_KERNEL;
 
-               rc = radix__map_kernel_page(vaddr, addr, prot, mapping_size);
+               rc = __map_kernel_page(vaddr, addr, prot, mapping_size, nid, start, end);
                if (rc)
                        return rc;
        }
@@ -276,7 +332,7 @@ static int __meminit create_physical_mapping(unsigned long start,
        return 0;
 }
 
-static void __init radix_init_pgtable(void)
+void __init radix_init_pgtable(void)
 {
        unsigned long rts_field;
        struct memblock_region *reg;
@@ -286,9 +342,16 @@ static void __init radix_init_pgtable(void)
        /*
         * Create the linear mapping, using standard page size for now
         */
-       for_each_memblock(memory, reg)
+       for_each_memblock(memory, reg) {
+               /*
+                * The memblock allocator  is up at this point, so the
+                * page tables will be allocated within the range. No
+                * need or a node (which we don't have yet).
+                */
                WARN_ON(create_physical_mapping(reg->base,
-                                               reg->base + reg->size));
+                                               reg->base + reg->size,
+                                               -1));
+       }
 
        /* Find out how many PID bits are supported */
        if (cpu_has_feature(CPU_FTR_HVMODE)) {
@@ -317,7 +380,7 @@ static void __init radix_init_pgtable(void)
         * host.
         */
        BUG_ON(PRTB_SIZE_SHIFT > 36);
-       process_tb = early_alloc_pgtable(1UL << PRTB_SIZE_SHIFT);
+       process_tb = early_alloc_pgtable(1UL << PRTB_SIZE_SHIFT, -1, 0, 0);
        /*
         * Fill in the process table.
         */
@@ -575,12 +638,8 @@ void __init radix__early_init_mmu(void)
 #ifdef CONFIG_PCI
        pci_io_base = ISA_IO_BASE;
 #endif
-
-       /*
-        * For now radix also use the same frag size
-        */
-       __pte_frag_nr = H_PTE_FRAG_NR;
-       __pte_frag_size_shift = H_PTE_FRAG_SIZE_SHIFT;
+       __pte_frag_nr = RADIX_PTE_FRAG_NR;
+       __pte_frag_size_shift = RADIX_PTE_FRAG_SIZE_SHIFT;
 
        if (!firmware_has_feature(FW_FEATURE_LPAR)) {
                radix_init_native();
@@ -695,7 +754,7 @@ struct change_mapping_params {
        unsigned long aligned_end;
 };
 
-static int stop_machine_change_mapping(void *data)
+static int __meminit stop_machine_change_mapping(void *data)
 {
        struct change_mapping_params *params =
                        (struct change_mapping_params *)data;
@@ -705,8 +764,8 @@ static int stop_machine_change_mapping(void *data)
 
        spin_unlock(&init_mm.page_table_lock);
        pte_clear(&init_mm, params->aligned_start, params->pte);
-       create_physical_mapping(params->aligned_start, params->start);
-       create_physical_mapping(params->end, params->aligned_end);
+       create_physical_mapping(params->aligned_start, params->start, -1);
+       create_physical_mapping(params->end, params->aligned_end, -1);
        spin_lock(&init_mm.page_table_lock);
        return 0;
 }
@@ -742,7 +801,7 @@ static void remove_pte_table(pte_t *pte_start, unsigned long addr,
 /*
  * clear the pte and potentially split the mapping helper
  */
-static void split_kernel_mapping(unsigned long addr, unsigned long end,
+static void __meminit split_kernel_mapping(unsigned long addr, unsigned long end,
                                unsigned long size, pte_t *pte)
 {
        unsigned long mask = ~(size - 1);
@@ -835,7 +894,7 @@ static void remove_pud_table(pud_t *pud_start, unsigned long addr,
        }
 }
 
-static void remove_pagetable(unsigned long start, unsigned long end)
+static void __meminit remove_pagetable(unsigned long start, unsigned long end)
 {
        unsigned long addr, next;
        pud_t *pud_base;
@@ -863,12 +922,12 @@ static void remove_pagetable(unsigned long start, unsigned long end)
        radix__flush_tlb_kernel_range(start, end);
 }
 
-int __ref radix__create_section_mapping(unsigned long start, unsigned long end)
+int __meminit radix__create_section_mapping(unsigned long start, unsigned long end, int nid)
 {
-       return create_physical_mapping(start, end);
+       return create_physical_mapping(start, end, nid);
 }
 
-int radix__remove_section_mapping(unsigned long start, unsigned long end)
+int __meminit radix__remove_section_mapping(unsigned long start, unsigned long end)
 {
        remove_pagetable(start, end);
        return 0;
@@ -876,19 +935,30 @@ int radix__remove_section_mapping(unsigned long start, unsigned long end)
 #endif /* CONFIG_MEMORY_HOTPLUG */
 
 #ifdef CONFIG_SPARSEMEM_VMEMMAP
+static int __map_kernel_page_nid(unsigned long ea, unsigned long pa,
+                                pgprot_t flags, unsigned int map_page_size,
+                                int nid)
+{
+       return __map_kernel_page(ea, pa, flags, map_page_size, nid, 0, 0);
+}
+
 int __meminit radix__vmemmap_create_mapping(unsigned long start,
                                      unsigned long page_size,
                                      unsigned long phys)
 {
        /* Create a PTE encoding */
        unsigned long flags = _PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_KERNEL_RW;
+       int nid = early_pfn_to_nid(phys >> PAGE_SHIFT);
+       int ret;
+
+       ret = __map_kernel_page_nid(start, phys, __pgprot(flags), page_size, nid);
+       BUG_ON(ret);
 
-       BUG_ON(radix__map_kernel_page(start, phys, __pgprot(flags), page_size));
        return 0;
 }
 
 #ifdef CONFIG_MEMORY_HOTPLUG
-void radix__vmemmap_remove_mapping(unsigned long start, unsigned long page_size)
+void __meminit radix__vmemmap_remove_mapping(unsigned long start, unsigned long page_size)
 {
        remove_pagetable(start, start + page_size);
 }
index d35d9ad3c1cd686b3c48d57f51cd1d8bae66acc0..120a49bfb9c645b83d2deb6a635a922f9af9f62a 100644 (file)
@@ -148,7 +148,7 @@ __ioremap_caller(phys_addr_t addr, unsigned long size, unsigned long flags,
         * mem_init() sets high_memory so only do the check after that.
         */
        if (slab_is_available() && (p < virt_to_phys(high_memory)) &&
-           !(__allow_ioremap_reserved && memblock_is_region_reserved(p, size))) {
+           page_is_ram(__phys_to_pfn(p))) {
                printk("__ioremap(): phys addr 0x%llx is RAM lr %ps\n",
                       (unsigned long long)p, __builtin_return_address(0));
                return NULL;
index adf469f312f2044b9b4035a201b63b3da393e6ce..9bf659d5078c85506fa46636bdf8330d7877ba53 100644 (file)
 
 #include "mmu_decl.h"
 
-#ifdef CONFIG_PPC_BOOK3S_64
-#if TASK_SIZE_USER64 > (1UL << (ESID_BITS + SID_SHIFT))
-#error TASK_SIZE_USER64 exceeds user VSID range
-#endif
-#endif
 
 #ifdef CONFIG_PPC_BOOK3S_64
 /*
index ba71c5481f42152683ad8a38c714b4c703d88b11..0eafdf01edc7d4bdaf04a7c1021ab19a3b5e808d 100644 (file)
@@ -119,18 +119,15 @@ int pkey_initialize(void)
 #else
        os_reserved = 0;
 #endif
+       initial_allocation_mask = ~0x0;
+       pkey_amr_uamor_mask = ~0x0ul;
+       pkey_iamr_mask = ~0x0ul;
        /*
-        * Bits are in LE format. NOTE: 1, 0 are reserved.
+        * key 0, 1 are reserved.
         * key 0 is the default key, which allows read/write/execute.
         * key 1 is recommended not to be used. PowerISA(3.0) page 1015,
         * programming note.
         */
-       initial_allocation_mask = ~0x0;
-
-       /* register mask is in BE format */
-       pkey_amr_uamor_mask = ~0x0ul;
-       pkey_iamr_mask = ~0x0ul;
-
        for (i = 2; i < (pkeys_total - os_reserved); i++) {
                initial_allocation_mask &= ~(0x1 << i);
                pkey_amr_uamor_mask &= ~(0x3ul << pkeyshift(i));
@@ -308,9 +305,9 @@ void thread_pkey_regs_init(struct thread_struct *thread)
        if (static_branch_likely(&pkey_disabled))
                return;
 
-       write_amr(read_amr() & pkey_amr_uamor_mask);
-       write_iamr(read_iamr() & pkey_iamr_mask);
-       write_uamor(read_uamor() & pkey_amr_uamor_mask);
+       thread->amr = read_amr() & pkey_amr_uamor_mask;
+       thread->iamr = read_iamr() & pkey_iamr_mask;
+       thread->uamor = read_uamor() & pkey_amr_uamor_mask;
 }
 
 static inline bool pkey_allows_readwrite(int pkey)
index 13cfe413b40d48cf0bca57fd757dc63bc01487dc..66577cc66dc9feaf533b097728da524e3d230b39 100644 (file)
@@ -22,6 +22,7 @@
 #include <asm/cacheflush.h>
 #include <asm/smp.h>
 #include <linux/compiler.h>
+#include <linux/context_tracking.h>
 #include <linux/mm_types.h>
 
 #include <asm/udbg.h>
@@ -340,3 +341,110 @@ void slb_initialize(void)
 
        asm volatile("isync":::"memory");
 }
+
+static void insert_slb_entry(unsigned long vsid, unsigned long ea,
+                            int bpsize, int ssize)
+{
+       unsigned long flags, vsid_data, esid_data;
+       enum slb_index index;
+       int slb_cache_index;
+
+       /*
+        * We are irq disabled, hence should be safe to access PACA.
+        */
+       index = get_paca()->stab_rr;
+
+       /*
+        * simple round-robin replacement of slb starting at SLB_NUM_BOLTED.
+        */
+       if (index < (mmu_slb_size - 1))
+               index++;
+       else
+               index = SLB_NUM_BOLTED;
+
+       get_paca()->stab_rr = index;
+
+       flags = SLB_VSID_USER | mmu_psize_defs[bpsize].sllp;
+       vsid_data = (vsid << slb_vsid_shift(ssize)) | flags |
+                   ((unsigned long) ssize << SLB_VSID_SSIZE_SHIFT);
+       esid_data = mk_esid_data(ea, ssize, index);
+
+       asm volatile("slbmte %0, %1" : : "r" (vsid_data), "r" (esid_data)
+                    : "memory");
+
+       /*
+        * Now update slb cache entries
+        */
+       slb_cache_index = get_paca()->slb_cache_ptr;
+       if (slb_cache_index < SLB_CACHE_ENTRIES) {
+               /*
+                * We have space in slb cache for optimized switch_slb().
+                * Top 36 bits from esid_data as per ISA
+                */
+               get_paca()->slb_cache[slb_cache_index++] = esid_data >> 28;
+               get_paca()->slb_cache_ptr++;
+       } else {
+               /*
+                * Our cache is full and the current cache content strictly
+                * doesn't indicate the active SLB conents. Bump the ptr
+                * so that switch_slb() will ignore the cache.
+                */
+               get_paca()->slb_cache_ptr = SLB_CACHE_ENTRIES + 1;
+       }
+}
+
+static void handle_multi_context_slb_miss(int context_id, unsigned long ea)
+{
+       struct mm_struct *mm = current->mm;
+       unsigned long vsid;
+       int bpsize;
+
+       /*
+        * We are always above 1TB, hence use high user segment size.
+        */
+       vsid = get_vsid(context_id, ea, mmu_highuser_ssize);
+       bpsize = get_slice_psize(mm, ea);
+       insert_slb_entry(vsid, ea, bpsize, mmu_highuser_ssize);
+}
+
+void slb_miss_large_addr(struct pt_regs *regs)
+{
+       enum ctx_state prev_state = exception_enter();
+       unsigned long ea = regs->dar;
+       int context;
+
+       if (REGION_ID(ea) != USER_REGION_ID)
+               goto slb_bad_addr;
+
+       /*
+        * Are we beyound what the page table layout supports ?
+        */
+       if ((ea & ~REGION_MASK) >= H_PGTABLE_RANGE)
+               goto slb_bad_addr;
+
+       /* Lower address should have been handled by asm code */
+       if (ea < (1UL << MAX_EA_BITS_PER_CONTEXT))
+               goto slb_bad_addr;
+
+       /*
+        * consider this as bad access if we take a SLB miss
+        * on an address above addr limit.
+        */
+       if (ea >= current->mm->context.slb_addr_limit)
+               goto slb_bad_addr;
+
+       context = get_ea_context(&current->mm->context, ea);
+       if (!context)
+               goto slb_bad_addr;
+
+       handle_multi_context_slb_miss(context, ea);
+       exception_exit(prev_state);
+       return;
+
+slb_bad_addr:
+       if (user_mode(regs))
+               _exception(SIGSEGV, regs, SEGV_BNDERR, ea);
+       else
+               bad_page_fault(regs, ea, SIGSEGV);
+       exception_exit(prev_state);
+}
index 2cf5ef3fc50dbfdc7207a1a0399682716ce8cf77..a83fbd2a4a245dadeb9840b5480d85cff5fa6e26 100644 (file)
@@ -75,10 +75,15 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_68_BIT_VA)
  */
 _GLOBAL(slb_allocate)
        /*
-        * check for bad kernel/user address
-        * (ea & ~REGION_MASK) >= PGTABLE_RANGE
+        * Check if the address falls within the range of the first context, or
+        * if we may need to handle multi context. For the first context we
+        * allocate the slb entry via the fast path below. For large address we
+        * branch out to C-code and see if additional contexts have been
+        * allocated.
+        * The test here is:
+        *   (ea & ~REGION_MASK) >= (1ull << MAX_EA_BITS_PER_CONTEXT)
         */
-       rldicr. r9,r3,4,(63 - H_PGTABLE_EADDR_SIZE - 4)
+       rldicr. r9,r3,4,(63 - MAX_EA_BITS_PER_CONTEXT - 4)
        bne-    8f
 
        srdi    r9,r3,60                /* get region */
@@ -200,10 +205,12 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_1T_SEGMENT)
 5:
        /*
         * Handle lpsizes
-        * r9 is get_paca()->context.low_slices_psize, r11 is index
+        * r9 is get_paca()->context.low_slices_psize[index], r11 is mask_index
         */
-       ld      r9,PACALOWSLICESPSIZE(r13)
-       mr      r11,r10
+       srdi    r11,r10,1 /* index */
+       addi    r9,r11,PACALOWSLICESPSIZE
+       lbzx    r9,r13,r9               /* r9 is lpsizes[r11] */
+       rldicl  r11,r10,0,63            /* r11 = r10 & 0x1 */
 6:
        sldi    r11,r11,2  /* index * 4 */
        /* Extract the psize and multiply to get an array offset */
index 23ec2c5e3b782412f8b10717cee352e56cc31217..9cd87d11fe4e70e353521c6ac14eddeb4bc93ce0 100644 (file)
 #include <asm/hugetlb.h>
 
 static DEFINE_SPINLOCK(slice_convert_lock);
-/*
- * One bit per slice. We have lower slices which cover 256MB segments
- * upto 4G range. That gets us 16 low slices. For the rest we track slices
- * in 1TB size.
- */
-struct slice_mask {
-       u64 low_slices;
-       DECLARE_BITMAP(high_slices, SLICE_NUM_HIGH);
-};
 
 #ifdef DEBUG
 int _slice_debug = 1;
 
-static void slice_print_mask(const char *label, struct slice_mask mask)
+static void slice_print_mask(const char *label, const struct slice_mask *mask)
 {
        if (!_slice_debug)
                return;
-       pr_devel("%s low_slice: %*pbl\n", label, (int)SLICE_NUM_LOW, &mask.low_slices);
-       pr_devel("%s high_slice: %*pbl\n", label, (int)SLICE_NUM_HIGH, mask.high_slices);
+       pr_devel("%s low_slice: %*pbl\n", label,
+                       (int)SLICE_NUM_LOW, &mask->low_slices);
+       pr_devel("%s high_slice: %*pbl\n", label,
+                       (int)SLICE_NUM_HIGH, mask->high_slices);
 }
 
 #define slice_dbg(fmt...) do { if (_slice_debug) pr_devel(fmt); } while (0)
 
 #else
 
-static void slice_print_mask(const char *label, struct slice_mask mask) {}
+static void slice_print_mask(const char *label, const struct slice_mask *mask) {}
 #define slice_dbg(fmt...)
 
 #endif
@@ -73,10 +66,12 @@ static void slice_range_to_mask(unsigned long start, unsigned long len,
        unsigned long end = start + len - 1;
 
        ret->low_slices = 0;
-       bitmap_zero(ret->high_slices, SLICE_NUM_HIGH);
+       if (SLICE_NUM_HIGH)
+               bitmap_zero(ret->high_slices, SLICE_NUM_HIGH);
 
        if (start < SLICE_LOW_TOP) {
-               unsigned long mend = min(end, (SLICE_LOW_TOP - 1));
+               unsigned long mend = min(end,
+                                        (unsigned long)(SLICE_LOW_TOP - 1));
 
                ret->low_slices = (1u << (GET_LOW_SLICE_INDEX(mend) + 1))
                        - (1u << GET_LOW_SLICE_INDEX(start));
@@ -113,11 +108,13 @@ static int slice_high_has_vma(struct mm_struct *mm, unsigned long slice)
        unsigned long start = slice << SLICE_HIGH_SHIFT;
        unsigned long end = start + (1ul << SLICE_HIGH_SHIFT);
 
+#ifdef CONFIG_PPC64
        /* Hack, so that each addresses is controlled by exactly one
         * of the high or low area bitmaps, the first high area starts
         * at 4GB, not 0 */
        if (start == 0)
                start = SLICE_LOW_TOP;
+#endif
 
        return !slice_area_is_free(mm, start, end - start);
 }
@@ -128,7 +125,8 @@ static void slice_mask_for_free(struct mm_struct *mm, struct slice_mask *ret,
        unsigned long i;
 
        ret->low_slices = 0;
-       bitmap_zero(ret->high_slices, SLICE_NUM_HIGH);
+       if (SLICE_NUM_HIGH)
+               bitmap_zero(ret->high_slices, SLICE_NUM_HIGH);
 
        for (i = 0; i < SLICE_NUM_LOW; i++)
                if (!slice_low_has_vma(mm, i))
@@ -142,53 +140,75 @@ static void slice_mask_for_free(struct mm_struct *mm, struct slice_mask *ret,
                        __set_bit(i, ret->high_slices);
 }
 
-static void slice_mask_for_size(struct mm_struct *mm, int psize, struct slice_mask *ret,
-                               unsigned long high_limit)
+#ifdef CONFIG_PPC_BOOK3S_64
+static struct slice_mask *slice_mask_for_size(struct mm_struct *mm, int psize)
 {
-       unsigned char *hpsizes;
-       int index, mask_index;
-       unsigned long i;
-       u64 lpsizes;
-
-       ret->low_slices = 0;
-       bitmap_zero(ret->high_slices, SLICE_NUM_HIGH);
+#ifdef CONFIG_PPC_64K_PAGES
+       if (psize == MMU_PAGE_64K)
+               return &mm->context.mask_64k;
+#endif
+       if (psize == MMU_PAGE_4K)
+               return &mm->context.mask_4k;
+#ifdef CONFIG_HUGETLB_PAGE
+       if (psize == MMU_PAGE_16M)
+               return &mm->context.mask_16m;
+       if (psize == MMU_PAGE_16G)
+               return &mm->context.mask_16g;
+#endif
+       BUG();
+}
+#elif defined(CONFIG_PPC_8xx)
+static struct slice_mask *slice_mask_for_size(struct mm_struct *mm, int psize)
+{
+       if (psize == mmu_virtual_psize)
+               return &mm->context.mask_base_psize;
+#ifdef CONFIG_HUGETLB_PAGE
+       if (psize == MMU_PAGE_512K)
+               return &mm->context.mask_512k;
+       if (psize == MMU_PAGE_8M)
+               return &mm->context.mask_8m;
+#endif
+       BUG();
+}
+#else
+#error "Must define the slice masks for page sizes supported by the platform"
+#endif
 
-       lpsizes = mm->context.low_slices_psize;
-       for (i = 0; i < SLICE_NUM_LOW; i++)
-               if (((lpsizes >> (i * 4)) & 0xf) == psize)
-                       ret->low_slices |= 1u << i;
+static bool slice_check_range_fits(struct mm_struct *mm,
+                          const struct slice_mask *available,
+                          unsigned long start, unsigned long len)
+{
+       unsigned long end = start + len - 1;
+       u64 low_slices = 0;
 
-       if (high_limit <= SLICE_LOW_TOP)
-               return;
+       if (start < SLICE_LOW_TOP) {
+               unsigned long mend = min(end,
+                                        (unsigned long)(SLICE_LOW_TOP - 1));
 
-       hpsizes = mm->context.high_slices_psize;
-       for (i = 0; i < GET_HIGH_SLICE_INDEX(high_limit); i++) {
-               mask_index = i & 0x1;
-               index = i >> 1;
-               if (((hpsizes[index] >> (mask_index * 4)) & 0xf) == psize)
-                       __set_bit(i, ret->high_slices);
+               low_slices = (1u << (GET_LOW_SLICE_INDEX(mend) + 1))
+                               - (1u << GET_LOW_SLICE_INDEX(start));
        }
-}
+       if ((low_slices & available->low_slices) != low_slices)
+               return false;
 
-static int slice_check_fit(struct mm_struct *mm,
-                          struct slice_mask mask, struct slice_mask available)
-{
-       DECLARE_BITMAP(result, SLICE_NUM_HIGH);
-       /*
-        * Make sure we just do bit compare only to the max
-        * addr limit and not the full bit map size.
-        */
-       unsigned long slice_count = GET_HIGH_SLICE_INDEX(mm->context.slb_addr_limit);
+       if (SLICE_NUM_HIGH && ((start + len) > SLICE_LOW_TOP)) {
+               unsigned long start_index = GET_HIGH_SLICE_INDEX(start);
+               unsigned long align_end = ALIGN(end, (1UL << SLICE_HIGH_SHIFT));
+               unsigned long count = GET_HIGH_SLICE_INDEX(align_end) - start_index;
+               unsigned long i;
 
-       bitmap_and(result, mask.high_slices,
-                  available.high_slices, slice_count);
+               for (i = start_index; i < start_index + count; i++) {
+                       if (!test_bit(i, available->high_slices))
+                               return false;
+               }
+       }
 
-       return (mask.low_slices & available.low_slices) == mask.low_slices &&
-               bitmap_equal(result, mask.high_slices, slice_count);
+       return true;
 }
 
 static void slice_flush_segments(void *parm)
 {
+#ifdef CONFIG_PPC64
        struct mm_struct *mm = parm;
        unsigned long flags;
 
@@ -200,40 +220,64 @@ static void slice_flush_segments(void *parm)
        local_irq_save(flags);
        slb_flush_and_rebolt();
        local_irq_restore(flags);
+#endif
 }
 
-static void slice_convert(struct mm_struct *mm, struct slice_mask mask, int psize)
+static void slice_convert(struct mm_struct *mm,
+                               const struct slice_mask *mask, int psize)
 {
        int index, mask_index;
        /* Write the new slice psize bits */
-       unsigned char *hpsizes;
-       u64 lpsizes;
+       unsigned char *hpsizes, *lpsizes;
+       struct slice_mask *psize_mask, *old_mask;
        unsigned long i, flags;
+       int old_psize;
 
        slice_dbg("slice_convert(mm=%p, psize=%d)\n", mm, psize);
        slice_print_mask(" mask", mask);
 
+       psize_mask = slice_mask_for_size(mm, psize);
+
        /* We need to use a spinlock here to protect against
         * concurrent 64k -> 4k demotion ...
         */
        spin_lock_irqsave(&slice_convert_lock, flags);
 
        lpsizes = mm->context.low_slices_psize;
-       for (i = 0; i < SLICE_NUM_LOW; i++)
-               if (mask.low_slices & (1u << i))
-                       lpsizes = (lpsizes & ~(0xful << (i * 4))) |
-                               (((unsigned long)psize) << (i * 4));
+       for (i = 0; i < SLICE_NUM_LOW; i++) {
+               if (!(mask->low_slices & (1u << i)))
+                       continue;
+
+               mask_index = i & 0x1;
+               index = i >> 1;
 
-       /* Assign the value back */
-       mm->context.low_slices_psize = lpsizes;
+               /* Update the slice_mask */
+               old_psize = (lpsizes[index] >> (mask_index * 4)) & 0xf;
+               old_mask = slice_mask_for_size(mm, old_psize);
+               old_mask->low_slices &= ~(1u << i);
+               psize_mask->low_slices |= 1u << i;
+
+               /* Update the sizes array */
+               lpsizes[index] = (lpsizes[index] & ~(0xf << (mask_index * 4))) |
+                               (((unsigned long)psize) << (mask_index * 4));
+       }
 
        hpsizes = mm->context.high_slices_psize;
        for (i = 0; i < GET_HIGH_SLICE_INDEX(mm->context.slb_addr_limit); i++) {
+               if (!test_bit(i, mask->high_slices))
+                       continue;
+
                mask_index = i & 0x1;
                index = i >> 1;
-               if (test_bit(i, mask.high_slices))
-                       hpsizes[index] = (hpsizes[index] &
-                                         ~(0xf << (mask_index * 4))) |
+
+               /* Update the slice_mask */
+               old_psize = (hpsizes[index] >> (mask_index * 4)) & 0xf;
+               old_mask = slice_mask_for_size(mm, old_psize);
+               __clear_bit(i, old_mask->high_slices);
+               __set_bit(i, psize_mask->high_slices);
+
+               /* Update the sizes array */
+               hpsizes[index] = (hpsizes[index] & ~(0xf << (mask_index * 4))) |
                                (((unsigned long)psize) << (mask_index * 4));
        }
 
@@ -254,26 +298,25 @@ static void slice_convert(struct mm_struct *mm, struct slice_mask mask, int psiz
  * 'available' slice_mark.
  */
 static bool slice_scan_available(unsigned long addr,
-                                struct slice_mask available,
-                                int end,
-                                unsigned long *boundary_addr)
+                                const struct slice_mask *available,
+                                int end, unsigned long *boundary_addr)
 {
        unsigned long slice;
        if (addr < SLICE_LOW_TOP) {
                slice = GET_LOW_SLICE_INDEX(addr);
                *boundary_addr = (slice + end) << SLICE_LOW_SHIFT;
-               return !!(available.low_slices & (1u << slice));
+               return !!(available->low_slices & (1u << slice));
        } else {
                slice = GET_HIGH_SLICE_INDEX(addr);
                *boundary_addr = (slice + end) ?
                        ((slice + end) << SLICE_HIGH_SHIFT) : SLICE_LOW_TOP;
-               return !!test_bit(slice, available.high_slices);
+               return !!test_bit(slice, available->high_slices);
        }
 }
 
 static unsigned long slice_find_area_bottomup(struct mm_struct *mm,
                                              unsigned long len,
-                                             struct slice_mask available,
+                                             const struct slice_mask *available,
                                              int psize, unsigned long high_limit)
 {
        int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT);
@@ -319,7 +362,7 @@ static unsigned long slice_find_area_bottomup(struct mm_struct *mm,
 
 static unsigned long slice_find_area_topdown(struct mm_struct *mm,
                                             unsigned long len,
-                                            struct slice_mask available,
+                                            const struct slice_mask *available,
                                             int psize, unsigned long high_limit)
 {
        int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT);
@@ -377,7 +420,7 @@ static unsigned long slice_find_area_topdown(struct mm_struct *mm,
 
 
 static unsigned long slice_find_area(struct mm_struct *mm, unsigned long len,
-                                    struct slice_mask mask, int psize,
+                                    const struct slice_mask *mask, int psize,
                                     int topdown, unsigned long high_limit)
 {
        if (topdown)
@@ -386,23 +429,33 @@ static unsigned long slice_find_area(struct mm_struct *mm, unsigned long len,
                return slice_find_area_bottomup(mm, len, mask, psize, high_limit);
 }
 
-static inline void slice_or_mask(struct slice_mask *dst, struct slice_mask *src)
+static inline void slice_copy_mask(struct slice_mask *dst,
+                                       const struct slice_mask *src)
 {
-       DECLARE_BITMAP(result, SLICE_NUM_HIGH);
-
-       dst->low_slices |= src->low_slices;
-       bitmap_or(result, dst->high_slices, src->high_slices, SLICE_NUM_HIGH);
-       bitmap_copy(dst->high_slices, result, SLICE_NUM_HIGH);
+       dst->low_slices = src->low_slices;
+       if (!SLICE_NUM_HIGH)
+               return;
+       bitmap_copy(dst->high_slices, src->high_slices, SLICE_NUM_HIGH);
 }
 
-static inline void slice_andnot_mask(struct slice_mask *dst, struct slice_mask *src)
+static inline void slice_or_mask(struct slice_mask *dst,
+                                       const struct slice_mask *src1,
+                                       const struct slice_mask *src2)
 {
-       DECLARE_BITMAP(result, SLICE_NUM_HIGH);
-
-       dst->low_slices &= ~src->low_slices;
+       dst->low_slices = src1->low_slices | src2->low_slices;
+       if (!SLICE_NUM_HIGH)
+               return;
+       bitmap_or(dst->high_slices, src1->high_slices, src2->high_slices, SLICE_NUM_HIGH);
+}
 
-       bitmap_andnot(result, dst->high_slices, src->high_slices, SLICE_NUM_HIGH);
-       bitmap_copy(dst->high_slices, result, SLICE_NUM_HIGH);
+static inline void slice_andnot_mask(struct slice_mask *dst,
+                                       const struct slice_mask *src1,
+                                       const struct slice_mask *src2)
+{
+       dst->low_slices = src1->low_slices & ~src2->low_slices;
+       if (!SLICE_NUM_HIGH)
+               return;
+       bitmap_andnot(dst->high_slices, src1->high_slices, src2->high_slices, SLICE_NUM_HIGH);
 }
 
 #ifdef CONFIG_PPC_64K_PAGES
@@ -415,10 +468,10 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
                                      unsigned long flags, unsigned int psize,
                                      int topdown)
 {
-       struct slice_mask mask;
        struct slice_mask good_mask;
        struct slice_mask potential_mask;
-       struct slice_mask compat_mask;
+       const struct slice_mask *maskp;
+       const struct slice_mask *compat_maskp = NULL;
        int fixed = (flags & MAP_FIXED);
        int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT);
        unsigned long page_size = 1UL << pshift;
@@ -442,23 +495,16 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
        }
 
        if (high_limit > mm->context.slb_addr_limit) {
+               /*
+                * Increasing the slb_addr_limit does not require
+                * slice mask cache to be recalculated because it should
+                * be already initialised beyond the old address limit.
+                */
                mm->context.slb_addr_limit = high_limit;
+
                on_each_cpu(slice_flush_segments, mm, 1);
        }
 
-       /*
-        * init different masks
-        */
-       mask.low_slices = 0;
-       bitmap_zero(mask.high_slices, SLICE_NUM_HIGH);
-
-       /* silence stupid warning */;
-       potential_mask.low_slices = 0;
-       bitmap_zero(potential_mask.high_slices, SLICE_NUM_HIGH);
-
-       compat_mask.low_slices = 0;
-       bitmap_zero(compat_mask.high_slices, SLICE_NUM_HIGH);
-
        /* Sanity checks */
        BUG_ON(mm->task_size == 0);
        BUG_ON(mm->context.slb_addr_limit == 0);
@@ -481,8 +527,7 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
        /* First make up a "good" mask of slices that have the right size
         * already
         */
-       slice_mask_for_size(mm, psize, &good_mask, high_limit);
-       slice_print_mask(" good_mask", good_mask);
+       maskp = slice_mask_for_size(mm, psize);
 
        /*
         * Here "good" means slices that are already the right page size,
@@ -503,40 +548,47 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
         *      search in good | compat | free, found => convert free.
         */
 
-#ifdef CONFIG_PPC_64K_PAGES
-       /* If we support combo pages, we can allow 64k pages in 4k slices */
-       if (psize == MMU_PAGE_64K) {
-               slice_mask_for_size(mm, MMU_PAGE_4K, &compat_mask, high_limit);
+       /*
+        * If we support combo pages, we can allow 64k pages in 4k slices
+        * The mask copies could be avoided in most cases here if we had
+        * a pointer to good mask for the next code to use.
+        */
+       if (IS_ENABLED(CONFIG_PPC_64K_PAGES) && psize == MMU_PAGE_64K) {
+               compat_maskp = slice_mask_for_size(mm, MMU_PAGE_4K);
                if (fixed)
-                       slice_or_mask(&good_mask, &compat_mask);
+                       slice_or_mask(&good_mask, maskp, compat_maskp);
+               else
+                       slice_copy_mask(&good_mask, maskp);
+       } else {
+               slice_copy_mask(&good_mask, maskp);
        }
-#endif
+
+       slice_print_mask(" good_mask", &good_mask);
+       if (compat_maskp)
+               slice_print_mask(" compat_mask", compat_maskp);
 
        /* First check hint if it's valid or if we have MAP_FIXED */
        if (addr != 0 || fixed) {
-               /* Build a mask for the requested range */
-               slice_range_to_mask(addr, len, &mask);
-               slice_print_mask(" mask", mask);
-
                /* Check if we fit in the good mask. If we do, we just return,
                 * nothing else to do
                 */
-               if (slice_check_fit(mm, mask, good_mask)) {
+               if (slice_check_range_fits(mm, &good_mask, addr, len)) {
                        slice_dbg(" fits good !\n");
-                       return addr;
+                       newaddr = addr;
+                       goto return_addr;
                }
        } else {
                /* Now let's see if we can find something in the existing
                 * slices for that size
                 */
-               newaddr = slice_find_area(mm, len, good_mask,
+               newaddr = slice_find_area(mm, len, &good_mask,
                                          psize, topdown, high_limit);
                if (newaddr != -ENOMEM) {
                        /* Found within the good mask, we don't have to setup,
                         * we thus return directly
                         */
                        slice_dbg(" found area at 0x%lx\n", newaddr);
-                       return newaddr;
+                       goto return_addr;
                }
        }
        /*
@@ -544,12 +596,15 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
         * empty and thus can be converted
         */
        slice_mask_for_free(mm, &potential_mask, high_limit);
-       slice_or_mask(&potential_mask, &good_mask);
-       slice_print_mask(" potential", potential_mask);
+       slice_or_mask(&potential_mask, &potential_mask, &good_mask);
+       slice_print_mask(" potential", &potential_mask);
 
-       if ((addr != 0 || fixed) && slice_check_fit(mm, mask, potential_mask)) {
-               slice_dbg(" fits potential !\n");
-               goto convert;
+       if (addr != 0 || fixed) {
+               if (slice_check_range_fits(mm, &potential_mask, addr, len)) {
+                       slice_dbg(" fits potential !\n");
+                       newaddr = addr;
+                       goto convert;
+               }
        }
 
        /* If we have MAP_FIXED and failed the above steps, then error out */
@@ -562,46 +617,64 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
         * anywhere in the good area.
         */
        if (addr) {
-               addr = slice_find_area(mm, len, good_mask,
-                                      psize, topdown, high_limit);
-               if (addr != -ENOMEM) {
-                       slice_dbg(" found area at 0x%lx\n", addr);
-                       return addr;
+               newaddr = slice_find_area(mm, len, &good_mask,
+                                         psize, topdown, high_limit);
+               if (newaddr != -ENOMEM) {
+                       slice_dbg(" found area at 0x%lx\n", newaddr);
+                       goto return_addr;
                }
        }
 
        /* Now let's see if we can find something in the existing slices
         * for that size plus free slices
         */
-       addr = slice_find_area(mm, len, potential_mask,
-                              psize, topdown, high_limit);
+       newaddr = slice_find_area(mm, len, &potential_mask,
+                                 psize, topdown, high_limit);
 
 #ifdef CONFIG_PPC_64K_PAGES
-       if (addr == -ENOMEM && psize == MMU_PAGE_64K) {
+       if (newaddr == -ENOMEM && psize == MMU_PAGE_64K) {
                /* retry the search with 4k-page slices included */
-               slice_or_mask(&potential_mask, &compat_mask);
-               addr = slice_find_area(mm, len, potential_mask,
-                                      psize, topdown, high_limit);
+               slice_or_mask(&potential_mask, &potential_mask, compat_maskp);
+               newaddr = slice_find_area(mm, len, &potential_mask,
+                                         psize, topdown, high_limit);
        }
 #endif
 
-       if (addr == -ENOMEM)
+       if (newaddr == -ENOMEM)
                return -ENOMEM;
 
-       slice_range_to_mask(addr, len, &mask);
-       slice_dbg(" found potential area at 0x%lx\n", addr);
-       slice_print_mask(" mask", mask);
+       slice_range_to_mask(newaddr, len, &potential_mask);
+       slice_dbg(" found potential area at 0x%lx\n", newaddr);
+       slice_print_mask(" mask", &potential_mask);
 
  convert:
-       slice_andnot_mask(&mask, &good_mask);
-       slice_andnot_mask(&mask, &compat_mask);
-       if (mask.low_slices || !bitmap_empty(mask.high_slices, SLICE_NUM_HIGH)) {
-               slice_convert(mm, mask, psize);
+       /*
+        * Try to allocate the context before we do slice convert
+        * so that we handle the context allocation failure gracefully.
+        */
+       if (need_extra_context(mm, newaddr)) {
+               if (alloc_extended_context(mm, newaddr) < 0)
+                       return -ENOMEM;
+       }
+
+       slice_andnot_mask(&potential_mask, &potential_mask, &good_mask);
+       if (compat_maskp && !fixed)
+               slice_andnot_mask(&potential_mask, &potential_mask, compat_maskp);
+       if (potential_mask.low_slices ||
+               (SLICE_NUM_HIGH &&
+                !bitmap_empty(potential_mask.high_slices, SLICE_NUM_HIGH))) {
+               slice_convert(mm, &potential_mask, psize);
                if (psize > MMU_PAGE_BASE)
                        on_each_cpu(slice_flush_segments, mm, 1);
        }
-       return addr;
+       return newaddr;
 
+return_addr:
+       if (need_extra_context(mm, newaddr)) {
+               if (alloc_extended_context(mm, newaddr) < 0)
+                       return -ENOMEM;
+       }
+       return newaddr;
 }
 EXPORT_SYMBOL_GPL(slice_get_unmapped_area);
 
@@ -627,94 +700,60 @@ unsigned long arch_get_unmapped_area_topdown(struct file *filp,
 
 unsigned int get_slice_psize(struct mm_struct *mm, unsigned long addr)
 {
-       unsigned char *hpsizes;
+       unsigned char *psizes;
        int index, mask_index;
 
-       /*
-        * Radix doesn't use slice, but can get enabled along with MMU_SLICE
-        */
-       if (radix_enabled()) {
-#ifdef CONFIG_PPC_64K_PAGES
-               return MMU_PAGE_64K;
-#else
-               return MMU_PAGE_4K;
-#endif
-       }
+       VM_BUG_ON(radix_enabled());
+
        if (addr < SLICE_LOW_TOP) {
-               u64 lpsizes;
-               lpsizes = mm->context.low_slices_psize;
+               psizes = mm->context.low_slices_psize;
                index = GET_LOW_SLICE_INDEX(addr);
-               return (lpsizes >> (index * 4)) & 0xf;
+       } else {
+               psizes = mm->context.high_slices_psize;
+               index = GET_HIGH_SLICE_INDEX(addr);
        }
-       hpsizes = mm->context.high_slices_psize;
-       index = GET_HIGH_SLICE_INDEX(addr);
        mask_index = index & 0x1;
-       return (hpsizes[index >> 1] >> (mask_index * 4)) & 0xf;
+       return (psizes[index >> 1] >> (mask_index * 4)) & 0xf;
 }
 EXPORT_SYMBOL_GPL(get_slice_psize);
 
-/*
- * This is called by hash_page when it needs to do a lazy conversion of
- * an address space from real 64K pages to combo 4K pages (typically
- * when hitting a non cacheable mapping on a processor or hypervisor
- * that won't allow them for 64K pages).
- *
- * This is also called in init_new_context() to change back the user
- * psize from whatever the parent context had it set to
- * N.B. This may be called before mm->context.id has been set.
- *
- * This function will only change the content of the {low,high)_slice_psize
- * masks, it will not flush SLBs as this shall be handled lazily by the
- * caller.
- */
-void slice_set_user_psize(struct mm_struct *mm, unsigned int psize)
+void slice_init_new_context_exec(struct mm_struct *mm)
 {
-       int index, mask_index;
-       unsigned char *hpsizes;
-       unsigned long flags, lpsizes;
-       unsigned int old_psize;
-       int i;
+       unsigned char *hpsizes, *lpsizes;
+       struct slice_mask *mask;
+       unsigned int psize = mmu_virtual_psize;
 
-       slice_dbg("slice_set_user_psize(mm=%p, psize=%d)\n", mm, psize);
+       slice_dbg("slice_init_new_context_exec(mm=%p)\n", mm);
 
-       VM_BUG_ON(radix_enabled());
-       spin_lock_irqsave(&slice_convert_lock, flags);
-
-       old_psize = mm->context.user_psize;
-       slice_dbg(" old_psize=%d\n", old_psize);
-       if (old_psize == psize)
-               goto bail;
+       /*
+        * In the case of exec, use the default limit. In the
+        * case of fork it is just inherited from the mm being
+        * duplicated.
+        */
+#ifdef CONFIG_PPC64
+       mm->context.slb_addr_limit = DEFAULT_MAP_WINDOW_USER64;
+#else
+       mm->context.slb_addr_limit = DEFAULT_MAP_WINDOW;
+#endif
 
        mm->context.user_psize = psize;
-       wmb();
 
+       /*
+        * Set all slice psizes to the default.
+        */
        lpsizes = mm->context.low_slices_psize;
-       for (i = 0; i < SLICE_NUM_LOW; i++)
-               if (((lpsizes >> (i * 4)) & 0xf) == old_psize)
-                       lpsizes = (lpsizes & ~(0xful << (i * 4))) |
-                               (((unsigned long)psize) << (i * 4));
-       /* Assign the value back */
-       mm->context.low_slices_psize = lpsizes;
+       memset(lpsizes, (psize << 4) | psize, SLICE_NUM_LOW >> 1);
 
        hpsizes = mm->context.high_slices_psize;
-       for (i = 0; i < SLICE_NUM_HIGH; i++) {
-               mask_index = i & 0x1;
-               index = i >> 1;
-               if (((hpsizes[index] >> (mask_index * 4)) & 0xf) == old_psize)
-                       hpsizes[index] = (hpsizes[index] &
-                                         ~(0xf << (mask_index * 4))) |
-                               (((unsigned long)psize) << (mask_index * 4));
-       }
-
-
-
-
-       slice_dbg(" lsps=%lx, hsps=%lx\n",
-                 (unsigned long)mm->context.low_slices_psize,
-                 (unsigned long)mm->context.high_slices_psize);
+       memset(hpsizes, (psize << 4) | psize, SLICE_NUM_HIGH >> 1);
 
- bail:
-       spin_unlock_irqrestore(&slice_convert_lock, flags);
+       /*
+        * Slice mask cache starts zeroed, fill the default size cache.
+        */
+       mask = slice_mask_for_size(mm, psize);
+       mask->low_slices = ~0UL;
+       if (SLICE_NUM_HIGH)
+               bitmap_fill(mask->high_slices, SLICE_NUM_HIGH);
 }
 
 void slice_set_range_psize(struct mm_struct *mm, unsigned long start,
@@ -725,7 +764,7 @@ void slice_set_range_psize(struct mm_struct *mm, unsigned long start,
        VM_BUG_ON(radix_enabled());
 
        slice_range_to_mask(start, len, &mask);
-       slice_convert(mm, mask, psize);
+       slice_convert(mm, &mask, psize);
 }
 
 #ifdef CONFIG_HUGETLB_PAGE
@@ -748,33 +787,27 @@ void slice_set_range_psize(struct mm_struct *mm, unsigned long start,
  * for now as we only use slices with hugetlbfs enabled. This should
  * be fixed as the generic code gets fixed.
  */
-int is_hugepage_only_range(struct mm_struct *mm, unsigned long addr,
+int slice_is_hugepage_only_range(struct mm_struct *mm, unsigned long addr,
                           unsigned long len)
 {
-       struct slice_mask mask, available;
+       const struct slice_mask *maskp;
        unsigned int psize = mm->context.user_psize;
-       unsigned long high_limit = mm->context.slb_addr_limit;
 
-       if (radix_enabled())
-               return 0;
+       VM_BUG_ON(radix_enabled());
 
-       slice_range_to_mask(addr, len, &mask);
-       slice_mask_for_size(mm, psize, &available, high_limit);
+       maskp = slice_mask_for_size(mm, psize);
 #ifdef CONFIG_PPC_64K_PAGES
        /* We need to account for 4k slices too */
        if (psize == MMU_PAGE_64K) {
-               struct slice_mask compat_mask;
-               slice_mask_for_size(mm, MMU_PAGE_4K, &compat_mask, high_limit);
-               slice_or_mask(&available, &compat_mask);
+               const struct slice_mask *compat_maskp;
+               struct slice_mask available;
+
+               compat_maskp = slice_mask_for_size(mm, MMU_PAGE_4K);
+               slice_or_mask(&available, maskp, compat_maskp);
+               return !slice_check_range_fits(mm, &available, addr, len);
        }
 #endif
 
-#if 0 /* too verbose */
-       slice_dbg("is_hugepage_only_range(mm=%p, addr=%lx, len=%lx)\n",
-                mm, addr, len);
-       slice_print_mask(" mask", mask);
-       slice_print_mask(" available", available);
-#endif
-       return !slice_check_fit(mm, mask, available);
+       return !slice_check_range_fits(mm, maskp, addr, len);
 }
 #endif
index a07f5372a4bf36ce726408891defc380dd9fea59..2fba6170ab3fa9bf472dc23bd01a57ec48f824b0 100644 (file)
@@ -98,7 +98,7 @@ static inline void __tlbiel_pid(unsigned long pid, int set,
        rb |= set << PPC_BITLSHIFT(51);
        rs = ((unsigned long)pid) << PPC_BITLSHIFT(31);
        prs = 1; /* process scoped */
-       r = 1;   /* raidx format */
+       r = 1;   /* radix format */
 
        asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
                     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
@@ -112,7 +112,7 @@ static inline void __tlbie_pid(unsigned long pid, unsigned long ric)
        rb = PPC_BIT(53); /* IS = 1 */
        rs = pid << PPC_BITLSHIFT(31);
        prs = 1; /* process scoped */
-       r = 1;   /* raidx format */
+       r = 1;   /* radix format */
 
        asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
                     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
@@ -128,7 +128,7 @@ static inline void __tlbiel_va(unsigned long va, unsigned long pid,
        rb |= ap << PPC_BITLSHIFT(58);
        rs = pid << PPC_BITLSHIFT(31);
        prs = 1; /* process scoped */
-       r = 1;   /* raidx format */
+       r = 1;   /* radix format */
 
        asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
                     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
@@ -144,7 +144,7 @@ static inline void __tlbie_va(unsigned long va, unsigned long pid,
        rb |= ap << PPC_BITLSHIFT(58);
        rs = pid << PPC_BITLSHIFT(31);
        prs = 1; /* process scoped */
-       r = 1;   /* raidx format */
+       r = 1;   /* radix format */
 
        asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
                     : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
@@ -668,7 +668,7 @@ void radix__flush_tlb_all(void)
 
        rb = 0x3 << PPC_BITLSHIFT(53); /* IS = 3 */
        prs = 0; /* partition scoped */
-       r = 1;   /* raidx format */
+       r = 1;   /* radix format */
        rs = 1 & ((1UL << 32) - 1); /* any LPID value to flush guest mappings */
 
        asm volatile("ptesync": : :"memory");
@@ -706,7 +706,7 @@ void radix__flush_tlb_pte_p9_dd1(unsigned long old_pte, struct mm_struct *mm,
 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
 extern void radix_kvm_prefetch_workaround(struct mm_struct *mm)
 {
-       unsigned int pid = mm->context.id;
+       unsigned long pid = mm->context.id;
 
        if (unlikely(pid == MMU_NO_CONTEXT))
                return;
@@ -734,7 +734,7 @@ extern void radix_kvm_prefetch_workaround(struct mm_struct *mm)
                for (; sib <= cpu_last_thread_sibling(cpu) && !flush; sib++) {
                        if (sib == cpu)
                                continue;
-                       if (paca[sib].kvm_hstate.kvm_vcpu)
+                       if (paca_ptrs[sib]->kvm_hstate.kvm_vcpu)
                                flush = true;
                }
                if (flush)
index 9b23f12e863cc14ff324b9c5ffed077c3a1012e8..87d71dd2544102b4d7b22e4c64341620413dd580 100644 (file)
@@ -89,7 +89,7 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
        /* Build full vaddr */
        if (!is_kernel_addr(addr)) {
                ssize = user_segment_size(addr);
-               vsid = get_vsid(mm->context.id, addr, ssize);
+               vsid = get_user_vsid(&mm->context, addr, ssize);
        } else {
                vsid = get_kernel_vsid(addr, mmu_kernel_ssize);
                ssize = mmu_kernel_ssize;
index 44d67b167e0b18eeb512e3c059817c31fcc0f2cc..2668cc414e4e0153411472020f287da3bfc1abae 100644 (file)
@@ -208,7 +208,7 @@ prepare_cached_spu_info(struct spu *spu, unsigned long objectId)
        /* Create cached_info and set spu_info[spu->number] to point to it.
         * spu->number is a system-wide value, not a per-node value.
         */
-       info = kzalloc(sizeof(struct cached_info), GFP_KERNEL);
+       info = kzalloc(sizeof(*info), GFP_KERNEL);
        if (!info) {
                printk(KERN_ERR "SPU_PROF: "
                       "%s, line %d: create vma_map failed\n",
index c579b16845da453b06517662c031d631a9db9b18..f40e37316dd67b77a2ee91876882701ff446e91f 100644 (file)
@@ -69,8 +69,8 @@ vma_map_add(struct vma_to_fileoffset_map *map, unsigned int vma,
            unsigned int size, unsigned int offset, unsigned int guard_ptr,
            unsigned int guard_val)
 {
-       struct vma_to_fileoffset_map *new =
-               kzalloc(sizeof(struct vma_to_fileoffset_map), GFP_KERNEL);
+       struct vma_to_fileoffset_map *new = kzalloc(sizeof(*new), GFP_KERNEL);
+
        if (!new) {
                printk(KERN_ERR "SPU_PROF: %s, line %d: malloc failed\n",
                       __func__, __LINE__);
index 57ebc655d2accd1b7d7e8b5c009990cd7017ddbd..82986d2acd9ba551c68cbdafc218a4a40eb393ca 100644 (file)
@@ -4,7 +4,7 @@ subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
 obj-$(CONFIG_PERF_EVENTS)      += callchain.o perf_regs.o
 
 obj-$(CONFIG_PPC_PERF_CTRS)    += core-book3s.o bhrb.o
-obj64-$(CONFIG_PPC_PERF_CTRS)  += power4-pmu.o ppc970-pmu.o power5-pmu.o \
+obj64-$(CONFIG_PPC_PERF_CTRS)  += ppc970-pmu.o power5-pmu.o \
                                   power5+-pmu.o power6-pmu.o power7-pmu.o \
                                   isa207-common.o power8-pmu.o power9-pmu.o
 obj32-$(CONFIG_PPC_PERF_CTRS)  += mpc7450-pmu.o
index f8908ea4ea736487a719f52650f6bff380961e62..3f66fcf8ad99ba06a45af3ff7f9f74c254fe3acd 100644 (file)
@@ -198,6 +198,10 @@ static inline void perf_get_data_addr(struct pt_regs *regs, u64 *addrp)
 
        if (!(mmcra & MMCRA_SAMPLE_ENABLE) || sdar_valid)
                *addrp = mfspr(SPRN_SDAR);
+
+       if (perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN) &&
+               is_kernel_addr(mfspr(SPRN_SDAR)))
+               *addrp = 0;
 }
 
 static bool regs_sihv(struct pt_regs *regs)
@@ -457,6 +461,16 @@ static void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw)
                                /* invalid entry */
                                continue;
 
+                       /*
+                        * BHRB rolling buffer could very much contain the kernel
+                        * addresses at this point. Check the privileges before
+                        * exporting it to userspace (avoid exposure of regions
+                        * where we could have speculative execution)
+                        */
+                       if (perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN) &&
+                               is_kernel_addr(addr))
+                               continue;
+
                        /* Branches are read most recent first (ie. mfbhrb 0 is
                         * the most recent branch).
                         * There are two types of valid entries:
@@ -1226,6 +1240,7 @@ static void power_pmu_disable(struct pmu *pmu)
                 */
                write_mmcr0(cpuhw, val);
                mb();
+               isync();
 
                /*
                 * Disable instruction sampling if it was enabled
@@ -1234,12 +1249,26 @@ static void power_pmu_disable(struct pmu *pmu)
                        mtspr(SPRN_MMCRA,
                              cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE);
                        mb();
+                       isync();
                }
 
                cpuhw->disabled = 1;
                cpuhw->n_added = 0;
 
                ebb_switch_out(mmcr0);
+
+#ifdef CONFIG_PPC64
+               /*
+                * These are readable by userspace, may contain kernel
+                * addresses and are not switched by context switch, so clear
+                * them now to avoid leaking anything to userspace in general
+                * including to another process.
+                */
+               if (ppmu->flags & PPMU_ARCH_207S) {
+                       mtspr(SPRN_SDAR, 0);
+                       mtspr(SPRN_SIAR, 0);
+               }
+#endif
        }
 
        local_irq_restore(flags);
@@ -1810,6 +1839,18 @@ static int hw_perf_cache_event(u64 config, u64 *eventp)
        return 0;
 }
 
+static bool is_event_blacklisted(u64 ev)
+{
+       int i;
+
+       for (i=0; i < ppmu->n_blacklist_ev; i++) {
+               if (ppmu->blacklist_ev[i] == ev)
+                       return true;
+       }
+
+       return false;
+}
+
 static int power_pmu_event_init(struct perf_event *event)
 {
        u64 ev;
@@ -1835,15 +1876,24 @@ static int power_pmu_event_init(struct perf_event *event)
                ev = event->attr.config;
                if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0)
                        return -EOPNOTSUPP;
+
+               if (ppmu->blacklist_ev && is_event_blacklisted(ev))
+                       return -EINVAL;
                ev = ppmu->generic_events[ev];
                break;
        case PERF_TYPE_HW_CACHE:
                err = hw_perf_cache_event(event->attr.config, &ev);
                if (err)
                        return err;
+
+               if (ppmu->blacklist_ev && is_event_blacklisted(ev))
+                       return -EINVAL;
                break;
        case PERF_TYPE_RAW:
                ev = event->attr.config;
+
+               if (ppmu->blacklist_ev && is_event_blacklisted(ev))
+                       return -EINVAL;
                break;
        default:
                return -ENOENT;
diff --git a/arch/powerpc/perf/power4-pmu.c b/arch/powerpc/perf/power4-pmu.c
deleted file mode 100644 (file)
index ce6072f..0000000
+++ /dev/null
@@ -1,622 +0,0 @@
-/*
- * Performance counter support for POWER4 (GP) and POWER4+ (GQ) processors.
- *
- * Copyright 2009 Paul Mackerras, IBM Corporation.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-#include <linux/kernel.h>
-#include <linux/perf_event.h>
-#include <linux/string.h>
-#include <asm/reg.h>
-#include <asm/cputable.h>
-
-/*
- * Bits in event code for POWER4
- */
-#define PM_PMC_SH      12      /* PMC number (1-based) for direct events */
-#define PM_PMC_MSK     0xf
-#define PM_UNIT_SH     8       /* TTMMUX number and setting - unit select */
-#define PM_UNIT_MSK    0xf
-#define PM_LOWER_SH    6
-#define PM_LOWER_MSK   1
-#define PM_LOWER_MSKS  0x40
-#define PM_BYTE_SH     4       /* Byte number of event bus to use */
-#define PM_BYTE_MSK    3
-#define PM_PMCSEL_MSK  7
-
-/*
- * Unit code values
- */
-#define PM_FPU         1
-#define PM_ISU1                2
-#define PM_IFU         3
-#define PM_IDU0                4
-#define PM_ISU1_ALT    6
-#define PM_ISU2                7
-#define PM_IFU_ALT     8
-#define PM_LSU0                9
-#define PM_LSU1                0xc
-#define PM_GPS         0xf
-
-/*
- * Bits in MMCR0 for POWER4
- */
-#define MMCR0_PMC1SEL_SH       8
-#define MMCR0_PMC2SEL_SH       1
-#define MMCR_PMCSEL_MSK                0x1f
-
-/*
- * Bits in MMCR1 for POWER4
- */
-#define MMCR1_TTM0SEL_SH       62
-#define MMCR1_TTC0SEL_SH       61
-#define MMCR1_TTM1SEL_SH       59
-#define MMCR1_TTC1SEL_SH       58
-#define MMCR1_TTM2SEL_SH       56
-#define MMCR1_TTC2SEL_SH       55
-#define MMCR1_TTM3SEL_SH       53
-#define MMCR1_TTC3SEL_SH       52
-#define MMCR1_TTMSEL_MSK       3
-#define MMCR1_TD_CP_DBG0SEL_SH 50
-#define MMCR1_TD_CP_DBG1SEL_SH 48
-#define MMCR1_TD_CP_DBG2SEL_SH 46
-#define MMCR1_TD_CP_DBG3SEL_SH 44
-#define MMCR1_DEBUG0SEL_SH     43
-#define MMCR1_DEBUG1SEL_SH     42
-#define MMCR1_DEBUG2SEL_SH     41
-#define MMCR1_DEBUG3SEL_SH     40
-#define MMCR1_PMC1_ADDER_SEL_SH        39
-#define MMCR1_PMC2_ADDER_SEL_SH        38
-#define MMCR1_PMC6_ADDER_SEL_SH        37
-#define MMCR1_PMC5_ADDER_SEL_SH        36
-#define MMCR1_PMC8_ADDER_SEL_SH        35
-#define MMCR1_PMC7_ADDER_SEL_SH        34
-#define MMCR1_PMC3_ADDER_SEL_SH        33
-#define MMCR1_PMC4_ADDER_SEL_SH        32
-#define MMCR1_PMC3SEL_SH       27
-#define MMCR1_PMC4SEL_SH       22
-#define MMCR1_PMC5SEL_SH       17
-#define MMCR1_PMC6SEL_SH       12
-#define MMCR1_PMC7SEL_SH       7
-#define MMCR1_PMC8SEL_SH       2       /* note bit 0 is in MMCRA for GP */
-
-static short mmcr1_adder_bits[8] = {
-       MMCR1_PMC1_ADDER_SEL_SH,
-       MMCR1_PMC2_ADDER_SEL_SH,
-       MMCR1_PMC3_ADDER_SEL_SH,
-       MMCR1_PMC4_ADDER_SEL_SH,
-       MMCR1_PMC5_ADDER_SEL_SH,
-       MMCR1_PMC6_ADDER_SEL_SH,
-       MMCR1_PMC7_ADDER_SEL_SH,
-       MMCR1_PMC8_ADDER_SEL_SH
-};
-
-/*
- * Bits in MMCRA
- */
-#define MMCRA_PMC8SEL0_SH      17      /* PMC8SEL bit 0 for GP */
-
-/*
- * Layout of constraint bits:
- * 6666555555555544444444443333333333222222222211111111110000000000
- * 3210987654321098765432109876543210987654321098765432109876543210
- *        |[  >[  >[   >|||[  >[  ><  ><  ><  ><  ><><><><><><><><>
- *        | UC1 UC2 UC3 ||| PS1 PS2 B0  B1  B2  B3 P1P2P3P4P5P6P7P8
- *       \SMPL         ||\TTC3SEL
- *                     |\TTC_IFU_SEL
- *                     \TTM2SEL0
- *
- * SMPL - SAMPLE_ENABLE constraint
- *     56: SAMPLE_ENABLE value 0x0100_0000_0000_0000
- *
- * UC1 - unit constraint 1: can't have all three of FPU/ISU1/IDU0|ISU2
- *     55: UC1 error 0x0080_0000_0000_0000
- *     54: FPU events needed 0x0040_0000_0000_0000
- *     53: ISU1 events needed 0x0020_0000_0000_0000
- *     52: IDU0|ISU2 events needed 0x0010_0000_0000_0000
- *
- * UC2 - unit constraint 2: can't have all three of FPU/IFU/LSU0
- *     51: UC2 error 0x0008_0000_0000_0000
- *     50: FPU events needed 0x0004_0000_0000_0000
- *     49: IFU events needed 0x0002_0000_0000_0000
- *     48: LSU0 events needed 0x0001_0000_0000_0000
- *
- * UC3 - unit constraint 3: can't have all four of LSU0/IFU/IDU0|ISU2/ISU1
- *     47: UC3 error 0x8000_0000_0000
- *     46: LSU0 events needed 0x4000_0000_0000
- *     45: IFU events needed 0x2000_0000_0000
- *     44: IDU0|ISU2 events needed 0x1000_0000_0000
- *     43: ISU1 events needed 0x0800_0000_0000
- *
- * TTM2SEL0
- *     42: 0 = IDU0 events needed
- *                1 = ISU2 events needed 0x0400_0000_0000
- *
- * TTC_IFU_SEL
- *     41: 0 = IFU.U events needed
- *                1 = IFU.L events needed 0x0200_0000_0000
- *
- * TTC3SEL
- *     40: 0 = LSU1.U events needed
- *                1 = LSU1.L events needed 0x0100_0000_0000
- *
- * PS1
- *     39: PS1 error 0x0080_0000_0000
- *     36-38: count of events needing PMC1/2/5/6 0x0070_0000_0000
- *
- * PS2
- *     35: PS2 error 0x0008_0000_0000
- *     32-34: count of events needing PMC3/4/7/8 0x0007_0000_0000
- *
- * B0
- *     28-31: Byte 0 event source 0xf000_0000
- *                1 = FPU
- *        2 = ISU1
- *        3 = IFU
- *        4 = IDU0
- *        7 = ISU2
- *        9 = LSU0
- *        c = LSU1
- *        f = GPS
- *
- * B1, B2, B3
- *     24-27, 20-23, 16-19: Byte 1, 2, 3 event sources
- *
- * P8
- *     15: P8 error 0x8000
- *     14-15: Count of events needing PMC8
- *
- * P1..P7
- *     0-13: Count of events needing PMC1..PMC7
- *
- * Note: this doesn't allow events using IFU.U to be combined with events
- * using IFU.L, though that is feasible (using TTM0 and TTM2).  However
- * there are no listed events for IFU.L (they are debug events not
- * verified for performance monitoring) so this shouldn't cause a
- * problem.
- */
-
-static struct unitinfo {
-       unsigned long   value, mask;
-       int             unit;
-       int             lowerbit;
-} p4_unitinfo[16] = {
-       [PM_FPU]  = { 0x44000000000000ul, 0x88000000000000ul, PM_FPU, 0 },
-       [PM_ISU1] = { 0x20080000000000ul, 0x88000000000000ul, PM_ISU1, 0 },
-       [PM_ISU1_ALT] =
-                   { 0x20080000000000ul, 0x88000000000000ul, PM_ISU1, 0 },
-       [PM_IFU]  = { 0x02200000000000ul, 0x08820000000000ul, PM_IFU, 41 },
-       [PM_IFU_ALT] =
-                   { 0x02200000000000ul, 0x08820000000000ul, PM_IFU, 41 },
-       [PM_IDU0] = { 0x10100000000000ul, 0x80840000000000ul, PM_IDU0, 1 },
-       [PM_ISU2] = { 0x10140000000000ul, 0x80840000000000ul, PM_ISU2, 0 },
-       [PM_LSU0] = { 0x01400000000000ul, 0x08800000000000ul, PM_LSU0, 0 },
-       [PM_LSU1] = { 0x00000000000000ul, 0x00010000000000ul, PM_LSU1, 40 },
-       [PM_GPS]  = { 0x00000000000000ul, 0x00000000000000ul, PM_GPS, 0 }
-};
-
-static unsigned char direct_marked_event[8] = {
-       (1<<2) | (1<<3),        /* PMC1: PM_MRK_GRP_DISP, PM_MRK_ST_CMPL */
-       (1<<3) | (1<<5),        /* PMC2: PM_THRESH_TIMEO, PM_MRK_BRU_FIN */
-       (1<<3),                 /* PMC3: PM_MRK_ST_CMPL_INT */
-       (1<<4) | (1<<5),        /* PMC4: PM_MRK_GRP_CMPL, PM_MRK_CRU_FIN */
-       (1<<4) | (1<<5),        /* PMC5: PM_MRK_GRP_TIMEO */
-       (1<<3) | (1<<4) | (1<<5),
-               /* PMC6: PM_MRK_ST_GPS, PM_MRK_FXU_FIN, PM_MRK_GRP_ISSUED */
-       (1<<4) | (1<<5),        /* PMC7: PM_MRK_FPU_FIN, PM_MRK_INST_FIN */
-       (1<<4),                 /* PMC8: PM_MRK_LSU_FIN */
-};
-
-/*
- * Returns 1 if event counts things relating to marked instructions
- * and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not.
- */
-static int p4_marked_instr_event(u64 event)
-{
-       int pmc, psel, unit, byte, bit;
-       unsigned int mask;
-
-       pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
-       psel = event & PM_PMCSEL_MSK;
-       if (pmc) {
-               if (direct_marked_event[pmc - 1] & (1 << psel))
-                       return 1;
-               if (psel == 0)          /* add events */
-                       bit = (pmc <= 4)? pmc - 1: 8 - pmc;
-               else if (psel == 6)     /* decode events */
-                       bit = 4;
-               else
-                       return 0;
-       } else
-               bit = psel;
-
-       byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
-       unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
-       mask = 0;
-       switch (unit) {
-       case PM_LSU1:
-               if (event & PM_LOWER_MSKS)
-                       mask = 1 << 28;         /* byte 7 bit 4 */
-               else
-                       mask = 6 << 24;         /* byte 3 bits 1 and 2 */
-               break;
-       case PM_LSU0:
-               /* byte 3, bit 3; byte 2 bits 0,2,3,4,5; byte 1 */
-               mask = 0x083dff00;
-       }
-       return (mask >> (byte * 8 + bit)) & 1;
-}
-
-static int p4_get_constraint(u64 event, unsigned long *maskp,
-                            unsigned long *valp)
-{
-       int pmc, byte, unit, lower, sh;
-       unsigned long mask = 0, value = 0;
-       int grp = -1;
-
-       pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
-       if (pmc) {
-               if (pmc > 8)
-                       return -1;
-               sh = (pmc - 1) * 2;
-               mask |= 2 << sh;
-               value |= 1 << sh;
-               grp = ((pmc - 1) >> 1) & 1;
-       }
-       unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
-       byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
-       if (unit) {
-               lower = (event >> PM_LOWER_SH) & PM_LOWER_MSK;
-
-               /*
-                * Bus events on bytes 0 and 2 can be counted
-                * on PMC1/2/5/6; bytes 1 and 3 on PMC3/4/7/8.
-                */
-               if (!pmc)
-                       grp = byte & 1;
-
-               if (!p4_unitinfo[unit].unit)
-                       return -1;
-               mask  |= p4_unitinfo[unit].mask;
-               value |= p4_unitinfo[unit].value;
-               sh = p4_unitinfo[unit].lowerbit;
-               if (sh > 1)
-                       value |= (unsigned long)lower << sh;
-               else if (lower != sh)
-                       return -1;
-               unit = p4_unitinfo[unit].unit;
-
-               /* Set byte lane select field */
-               mask  |= 0xfULL << (28 - 4 * byte);
-               value |= (unsigned long)unit << (28 - 4 * byte);
-       }
-       if (grp == 0) {
-               /* increment PMC1/2/5/6 field */
-               mask  |= 0x8000000000ull;
-               value |= 0x1000000000ull;
-       } else {
-               /* increment PMC3/4/7/8 field */
-               mask  |= 0x800000000ull;
-               value |= 0x100000000ull;
-       }
-
-       /* Marked instruction events need sample_enable set */
-       if (p4_marked_instr_event(event)) {
-               mask  |= 1ull << 56;
-               value |= 1ull << 56;
-       }
-
-       /* PMCSEL=6 decode events on byte 2 need sample_enable clear */
-       if (pmc && (event & PM_PMCSEL_MSK) == 6 && byte == 2)
-               mask  |= 1ull << 56;
-
-       *maskp = mask;
-       *valp = value;
-       return 0;
-}
-
-static unsigned int ppc_inst_cmpl[] = {
-       0x1001, 0x4001, 0x6001, 0x7001, 0x8001
-};
-
-static int p4_get_alternatives(u64 event, unsigned int flags, u64 alt[])
-{
-       int i, j, na;
-
-       alt[0] = event;
-       na = 1;
-
-       /* 2 possibilities for PM_GRP_DISP_REJECT */
-       if (event == 0x8003 || event == 0x0224) {
-               alt[1] = event ^ (0x8003 ^ 0x0224);
-               return 2;
-       }
-
-       /* 2 possibilities for PM_ST_MISS_L1 */
-       if (event == 0x0c13 || event == 0x0c23) {
-               alt[1] = event ^ (0x0c13 ^ 0x0c23);
-               return 2;
-       }
-
-       /* several possibilities for PM_INST_CMPL */
-       for (i = 0; i < ARRAY_SIZE(ppc_inst_cmpl); ++i) {
-               if (event == ppc_inst_cmpl[i]) {
-                       for (j = 0; j < ARRAY_SIZE(ppc_inst_cmpl); ++j)
-                               if (j != i)
-                                       alt[na++] = ppc_inst_cmpl[j];
-                       break;
-               }
-       }
-
-       return na;
-}
-
-static int p4_compute_mmcr(u64 event[], int n_ev,
-                          unsigned int hwc[], unsigned long mmcr[], struct perf_event *pevents[])
-{
-       unsigned long mmcr0 = 0, mmcr1 = 0, mmcra = 0;
-       unsigned int pmc, unit, byte, psel, lower;
-       unsigned int ttm, grp;
-       unsigned int pmc_inuse = 0;
-       unsigned int pmc_grp_use[2];
-       unsigned char busbyte[4];
-       unsigned char unituse[16];
-       unsigned int unitlower = 0;
-       int i;
-
-       if (n_ev > 8)
-               return -1;
-
-       /* First pass to count resource use */
-       pmc_grp_use[0] = pmc_grp_use[1] = 0;
-       memset(busbyte, 0, sizeof(busbyte));
-       memset(unituse, 0, sizeof(unituse));
-       for (i = 0; i < n_ev; ++i) {
-               pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
-               if (pmc) {
-                       if (pmc_inuse & (1 << (pmc - 1)))
-                               return -1;
-                       pmc_inuse |= 1 << (pmc - 1);
-                       /* count 1/2/5/6 vs 3/4/7/8 use */
-                       ++pmc_grp_use[((pmc - 1) >> 1) & 1];
-               }
-               unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
-               byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
-               lower = (event[i] >> PM_LOWER_SH) & PM_LOWER_MSK;
-               if (unit) {
-                       if (!pmc)
-                               ++pmc_grp_use[byte & 1];
-                       if (unit == 6 || unit == 8)
-                               /* map alt ISU1/IFU codes: 6->2, 8->3 */
-                               unit = (unit >> 1) - 1;
-                       if (busbyte[byte] && busbyte[byte] != unit)
-                               return -1;
-                       busbyte[byte] = unit;
-                       lower <<= unit;
-                       if (unituse[unit] && lower != (unitlower & lower))
-                               return -1;
-                       unituse[unit] = 1;
-                       unitlower |= lower;
-               }
-       }
-       if (pmc_grp_use[0] > 4 || pmc_grp_use[1] > 4)
-               return -1;
-
-       /*
-        * Assign resources and set multiplexer selects.
-        *
-        * Units 1,2,3 are on TTM0, 4,6,7 on TTM1, 8,10 on TTM2.
-        * Each TTMx can only select one unit, but since
-        * units 2 and 6 are both ISU1, and 3 and 8 are both IFU,
-        * we have some choices.
-        */
-       if (unituse[2] & (unituse[1] | (unituse[3] & unituse[9]))) {
-               unituse[6] = 1;         /* Move 2 to 6 */
-               unituse[2] = 0;
-       }
-       if (unituse[3] & (unituse[1] | unituse[2])) {
-               unituse[8] = 1;         /* Move 3 to 8 */
-               unituse[3] = 0;
-               unitlower = (unitlower & ~8) | ((unitlower & 8) << 5);
-       }
-       /* Check only one unit per TTMx */
-       if (unituse[1] + unituse[2] + unituse[3] > 1 ||
-           unituse[4] + unituse[6] + unituse[7] > 1 ||
-           unituse[8] + unituse[9] > 1 ||
-           (unituse[5] | unituse[10] | unituse[11] |
-            unituse[13] | unituse[14]))
-               return -1;
-
-       /* Set TTMxSEL fields.  Note, units 1-3 => TTM0SEL codes 0-2 */
-       mmcr1 |= (unsigned long)(unituse[3] * 2 + unituse[2])
-               << MMCR1_TTM0SEL_SH;
-       mmcr1 |= (unsigned long)(unituse[7] * 3 + unituse[6] * 2)
-               << MMCR1_TTM1SEL_SH;
-       mmcr1 |= (unsigned long)unituse[9] << MMCR1_TTM2SEL_SH;
-
-       /* Set TTCxSEL fields. */
-       if (unitlower & 0xe)
-               mmcr1 |= 1ull << MMCR1_TTC0SEL_SH;
-       if (unitlower & 0xf0)
-               mmcr1 |= 1ull << MMCR1_TTC1SEL_SH;
-       if (unitlower & 0xf00)
-               mmcr1 |= 1ull << MMCR1_TTC2SEL_SH;
-       if (unitlower & 0x7000)
-               mmcr1 |= 1ull << MMCR1_TTC3SEL_SH;
-
-       /* Set byte lane select fields. */
-       for (byte = 0; byte < 4; ++byte) {
-               unit = busbyte[byte];
-               if (!unit)
-                       continue;
-               if (unit == 0xf) {
-                       /* special case for GPS */
-                       mmcr1 |= 1ull << (MMCR1_DEBUG0SEL_SH - byte);
-               } else {
-                       if (!unituse[unit])
-                               ttm = unit - 1;         /* 2->1, 3->2 */
-                       else
-                               ttm = unit >> 2;
-                       mmcr1 |= (unsigned long)ttm
-                               << (MMCR1_TD_CP_DBG0SEL_SH - 2 * byte);
-               }
-       }
-
-       /* Second pass: assign PMCs, set PMCxSEL and PMCx_ADDER_SEL fields */
-       for (i = 0; i < n_ev; ++i) {
-               pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
-               unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
-               byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
-               psel = event[i] & PM_PMCSEL_MSK;
-               if (!pmc) {
-                       /* Bus event or 00xxx direct event (off or cycles) */
-                       if (unit)
-                               psel |= 0x10 | ((byte & 2) << 2);
-                       for (pmc = 0; pmc < 8; ++pmc) {
-                               if (pmc_inuse & (1 << pmc))
-                                       continue;
-                               grp = (pmc >> 1) & 1;
-                               if (unit) {
-                                       if (grp == (byte & 1))
-                                               break;
-                               } else if (pmc_grp_use[grp] < 4) {
-                                       ++pmc_grp_use[grp];
-                                       break;
-                               }
-                       }
-                       pmc_inuse |= 1 << pmc;
-               } else {
-                       /* Direct event */
-                       --pmc;
-                       if (psel == 0 && (byte & 2))
-                               /* add events on higher-numbered bus */
-                               mmcr1 |= 1ull << mmcr1_adder_bits[pmc];
-                       else if (psel == 6 && byte == 3)
-                               /* seem to need to set sample_enable here */
-                               mmcra |= MMCRA_SAMPLE_ENABLE;
-                       psel |= 8;
-               }
-               if (pmc <= 1)
-                       mmcr0 |= psel << (MMCR0_PMC1SEL_SH - 7 * pmc);
-               else
-                       mmcr1 |= psel << (MMCR1_PMC3SEL_SH - 5 * (pmc - 2));
-               if (pmc == 7)   /* PMC8 */
-                       mmcra |= (psel & 1) << MMCRA_PMC8SEL0_SH;
-               hwc[i] = pmc;
-               if (p4_marked_instr_event(event[i]))
-                       mmcra |= MMCRA_SAMPLE_ENABLE;
-       }
-
-       if (pmc_inuse & 1)
-               mmcr0 |= MMCR0_PMC1CE;
-       if (pmc_inuse & 0xfe)
-               mmcr0 |= MMCR0_PMCjCE;
-
-       mmcra |= 0x2000;        /* mark only one IOP per PPC instruction */
-
-       /* Return MMCRx values */
-       mmcr[0] = mmcr0;
-       mmcr[1] = mmcr1;
-       mmcr[2] = mmcra;
-       return 0;
-}
-
-static void p4_disable_pmc(unsigned int pmc, unsigned long mmcr[])
-{
-       /*
-        * Setting the PMCxSEL field to 0 disables PMC x.
-        * (Note that pmc is 0-based here, not 1-based.)
-        */
-       if (pmc <= 1) {
-               mmcr[0] &= ~(0x1fUL << (MMCR0_PMC1SEL_SH - 7 * pmc));
-       } else {
-               mmcr[1] &= ~(0x1fUL << (MMCR1_PMC3SEL_SH - 5 * (pmc - 2)));
-               if (pmc == 7)
-                       mmcr[2] &= ~(1UL << MMCRA_PMC8SEL0_SH);
-       }
-}
-
-static int p4_generic_events[] = {
-       [PERF_COUNT_HW_CPU_CYCLES]              = 7,
-       [PERF_COUNT_HW_INSTRUCTIONS]            = 0x1001,
-       [PERF_COUNT_HW_CACHE_REFERENCES]        = 0x8c10, /* PM_LD_REF_L1 */
-       [PERF_COUNT_HW_CACHE_MISSES]            = 0x3c10, /* PM_LD_MISS_L1 */
-       [PERF_COUNT_HW_BRANCH_INSTRUCTIONS]     = 0x330,  /* PM_BR_ISSUED */
-       [PERF_COUNT_HW_BRANCH_MISSES]           = 0x331,  /* PM_BR_MPRED_CR */
-};
-
-#define C(x)   PERF_COUNT_HW_CACHE_##x
-
-/*
- * Table of generalized cache-related events.
- * 0 means not supported, -1 means nonsensical, other values
- * are event codes.
- */
-static int power4_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
-       [C(L1D)] = {            /*      RESULT_ACCESS   RESULT_MISS */
-               [C(OP_READ)] = {        0x8c10,         0x3c10  },
-               [C(OP_WRITE)] = {       0x7c10,         0xc13   },
-               [C(OP_PREFETCH)] = {    0xc35,          0       },
-       },
-       [C(L1I)] = {            /*      RESULT_ACCESS   RESULT_MISS */
-               [C(OP_READ)] = {        0,              0       },
-               [C(OP_WRITE)] = {       -1,             -1      },
-               [C(OP_PREFETCH)] = {    0,              0       },
-       },
-       [C(LL)] = {             /*      RESULT_ACCESS   RESULT_MISS */
-               [C(OP_READ)] = {        0,              0       },
-               [C(OP_WRITE)] = {       0,              0       },
-               [C(OP_PREFETCH)] = {    0xc34,          0       },
-       },
-       [C(DTLB)] = {           /*      RESULT_ACCESS   RESULT_MISS */
-               [C(OP_READ)] = {        0,              0x904   },
-               [C(OP_WRITE)] = {       -1,             -1      },
-               [C(OP_PREFETCH)] = {    -1,             -1      },
-       },
-       [C(ITLB)] = {           /*      RESULT_ACCESS   RESULT_MISS */
-               [C(OP_READ)] = {        0,              0x900   },
-               [C(OP_WRITE)] = {       -1,             -1      },
-               [C(OP_PREFETCH)] = {    -1,             -1      },
-       },
-       [C(BPU)] = {            /*      RESULT_ACCESS   RESULT_MISS */
-               [C(OP_READ)] = {        0x330,          0x331   },
-               [C(OP_WRITE)] = {       -1,             -1      },
-               [C(OP_PREFETCH)] = {    -1,             -1      },
-       },
-       [C(NODE)] = {           /*      RESULT_ACCESS   RESULT_MISS */
-               [C(OP_READ)] = {        -1,             -1      },
-               [C(OP_WRITE)] = {       -1,             -1      },
-               [C(OP_PREFETCH)] = {    -1,             -1      },
-       },
-};
-
-static struct power_pmu power4_pmu = {
-       .name                   = "POWER4/4+",
-       .n_counter              = 8,
-       .max_alternatives       = 5,
-       .add_fields             = 0x0000001100005555ul,
-       .test_adder             = 0x0011083300000000ul,
-       .compute_mmcr           = p4_compute_mmcr,
-       .get_constraint         = p4_get_constraint,
-       .get_alternatives       = p4_get_alternatives,
-       .disable_pmc            = p4_disable_pmc,
-       .n_generic              = ARRAY_SIZE(p4_generic_events),
-       .generic_events         = p4_generic_events,
-       .cache_events           = &power4_cache_events,
-       .flags                  = PPMU_NO_SIPR | PPMU_NO_CONT_SAMPLING,
-};
-
-static int __init init_power4_pmu(void)
-{
-       if (!cur_cpu_spec->oprofile_cpu_type ||
-           strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power4"))
-               return -ENODEV;
-
-       return register_power_pmu(&power4_pmu);
-}
-
-early_initcall(init_power4_pmu);
index e99c6bf4d391073e0fdb94c588421b8b821efab5..7de344b7d9cc262472db1c3b3d068ad37e5d556d 100644 (file)
@@ -69,3 +69,31 @@ EVENT(PM_BR_CMPL_ALT,                                0x10012)
 EVENT(PM_BR_2PATH,                             0x20036)
 /* ALternate branch event that are not strongly biased */
 EVENT(PM_BR_2PATH_ALT,                         0x40036)
+
+/* Blacklisted events */
+EVENT(PM_MRK_ST_DONE_L2,                       0x10134)
+EVENT(PM_RADIX_PWC_L1_HIT,                     0x1f056)
+EVENT(PM_FLOP_CMPL,                            0x100f4)
+EVENT(PM_MRK_NTF_FIN,                          0x20112)
+EVENT(PM_RADIX_PWC_L2_HIT,                     0x2d024)
+EVENT(PM_IFETCH_THROTTLE,                      0x3405e)
+EVENT(PM_MRK_L2_TM_ST_ABORT_SISTER,            0x3e15c)
+EVENT(PM_RADIX_PWC_L3_HIT,                     0x3f056)
+EVENT(PM_RUN_CYC_SMT2_MODE,                    0x3006c)
+EVENT(PM_TM_TX_PASS_RUN_INST,                  0x4e014)
+EVENT(PM_DISP_HELD_SYNC_HOLD,                  0x4003c)
+EVENT(PM_DTLB_MISS_16G,                                0x1c058)
+EVENT(PM_DERAT_MISS_2M,                                0x1c05a)
+EVENT(PM_DTLB_MISS_2M,                         0x1c05c)
+EVENT(PM_MRK_DTLB_MISS_1G,                     0x1d15c)
+EVENT(PM_DTLB_MISS_4K,                         0x2c056)
+EVENT(PM_DERAT_MISS_1G,                                0x2c05a)
+EVENT(PM_MRK_DERAT_MISS_2M,                    0x2d152)
+EVENT(PM_MRK_DTLB_MISS_4K,                     0x2d156)
+EVENT(PM_MRK_DTLB_MISS_16G,                    0x2d15e)
+EVENT(PM_DTLB_MISS_64K,                                0x3c056)
+EVENT(PM_MRK_DERAT_MISS_1G,                    0x3d152)
+EVENT(PM_MRK_DTLB_MISS_64K,                    0x3d156)
+EVENT(PM_DTLB_MISS_16M,                                0x4c056)
+EVENT(PM_DTLB_MISS_1G,                         0x4c05a)
+EVENT(PM_MRK_DTLB_MISS_16M,                    0x4c15e)
index 24b5b5b7a2064179a6e6685ee3331cf45b6861be..2ca0b33b4efb27fac48b1ba0dc6851afcf312efc 100644 (file)
@@ -101,9 +101,45 @@ enum {
 #define POWER9_MMCRA_IFM2              0x0000000080000000UL
 #define POWER9_MMCRA_IFM3              0x00000000C0000000UL
 
+/* Nasty Power9 specific hack */
+#define PVR_POWER9_CUMULUS             0x00002000
+
 /* PowerISA v2.07 format attribute structure*/
 extern struct attribute_group isa207_pmu_format_group;
 
+int p9_dd21_bl_ev[] = {
+       PM_MRK_ST_DONE_L2,
+       PM_RADIX_PWC_L1_HIT,
+       PM_FLOP_CMPL,
+       PM_MRK_NTF_FIN,
+       PM_RADIX_PWC_L2_HIT,
+       PM_IFETCH_THROTTLE,
+       PM_MRK_L2_TM_ST_ABORT_SISTER,
+       PM_RADIX_PWC_L3_HIT,
+       PM_RUN_CYC_SMT2_MODE,
+       PM_TM_TX_PASS_RUN_INST,
+       PM_DISP_HELD_SYNC_HOLD,
+};
+
+int p9_dd22_bl_ev[] = {
+       PM_DTLB_MISS_16G,
+       PM_DERAT_MISS_2M,
+       PM_DTLB_MISS_2M,
+       PM_MRK_DTLB_MISS_1G,
+       PM_DTLB_MISS_4K,
+       PM_DERAT_MISS_1G,
+       PM_MRK_DERAT_MISS_2M,
+       PM_MRK_DTLB_MISS_4K,
+       PM_MRK_DTLB_MISS_16G,
+       PM_DTLB_MISS_64K,
+       PM_MRK_DERAT_MISS_1G,
+       PM_MRK_DTLB_MISS_64K,
+       PM_DISP_HELD_SYNC_HOLD,
+       PM_DTLB_MISS_16M,
+       PM_DTLB_MISS_1G,
+       PM_MRK_DTLB_MISS_16M,
+};
+
 /* Table of alternatives, sorted by column 0 */
 static const unsigned int power9_event_alternatives[][MAX_ALT] = {
        { PM_INST_DISP,                 PM_INST_DISP_ALT },
@@ -446,12 +482,24 @@ static struct power_pmu power9_pmu = {
 static int __init init_power9_pmu(void)
 {
        int rc = 0;
+       unsigned int pvr = mfspr(SPRN_PVR);
 
        /* Comes from cpu_specs[] */
        if (!cur_cpu_spec->oprofile_cpu_type ||
            strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power9"))
                return -ENODEV;
 
+       /* Blacklist events */
+       if (!(pvr & PVR_POWER9_CUMULUS)) {
+               if ((PVR_CFG(pvr) == 2) && (PVR_MIN(pvr) == 1)) {
+                       power9_pmu.blacklist_ev = p9_dd21_bl_ev;
+                       power9_pmu.n_blacklist_ev = ARRAY_SIZE(p9_dd21_bl_ev);
+               } else if ((PVR_CFG(pvr) == 2) && (PVR_MIN(pvr) == 2)) {
+                       power9_pmu.blacklist_ev = p9_dd22_bl_ev;
+                       power9_pmu.n_blacklist_ev = ARRAY_SIZE(p9_dd22_bl_ev);
+               }
+       }
+
        if (cpu_has_feature(CPU_FTR_POWER9_DD1)) {
                /*
                 * Since PM_INST_CMPL may not provide right counts in all
index d50417e23add8961d8b96addbfeefd38d384464d..96aaae67892802d9153d3f603c67437d5fe802eb 100644 (file)
@@ -223,7 +223,7 @@ static int ppc4xx_msi_probe(struct platform_device *dev)
 
        dev_dbg(&dev->dev, "PCIE-MSI: Setting up MSI support...\n");
 
-       msi = kzalloc(sizeof(struct ppc4xx_msi), GFP_KERNEL);
+       msi = kzalloc(sizeof(*msi), GFP_KERNEL);
        if (!msi) {
                dev_err(&dev->dev, "No memory for MSI structure\n");
                return -ENOMEM;
@@ -241,7 +241,8 @@ static int ppc4xx_msi_probe(struct platform_device *dev)
        if (!msi_irqs)
                return -ENODEV;
 
-       if (ppc4xx_setup_pcieh_hw(dev, res, msi))
+       err = ppc4xx_setup_pcieh_hw(dev, res, msi);
+       if (err)
                goto error_out;
 
        err = ppc4xx_msi_init_allocator(dev, msi);
index 85d9e37f5ccbf76c93c2cf71c1d70677d7a14366..69d9f60d9fe5357a5fd3c18b28833cb5198e1211 100644 (file)
@@ -339,7 +339,7 @@ void *ppc4xx_ocm_alloc(phys_addr_t *phys, int size, int align,
                if (IS_ERR_VALUE(offset))
                        continue;
 
-               ocm_blk = kzalloc(sizeof(struct ocm_block), GFP_KERNEL);
+               ocm_blk = kzalloc(sizeof(*ocm_blk), GFP_KERNEL);
                if (!ocm_blk) {
                        printk(KERN_ERR "PPC4XX OCM: could not allocate ocm block");
                        rh_free(ocm_reg->rh, offset);
index f51fd35f4618a777fd003fcfa991c03fa4f0e30d..7e966f4cf19af8fa6a9456ce71858fdf6a4c4bb2 100644 (file)
@@ -147,7 +147,7 @@ static void qoriq_cpu_kill(unsigned int cpu)
        for (i = 0; i < 500; i++) {
                if (is_cpu_dead(cpu)) {
 #ifdef CONFIG_PPC64
-                       paca[cpu].cpu_start = 0;
+                       paca_ptrs[cpu]->cpu_start = 0;
 #endif
                        return;
                }
@@ -328,7 +328,7 @@ static int smp_85xx_kick_cpu(int nr)
                return ret;
 
 done:
-       paca[nr].cpu_start = 1;
+       paca_ptrs[nr]->cpu_start = 1;
        generic_set_cpu_up(nr);
 
        return ret;
@@ -409,14 +409,14 @@ void mpc85xx_smp_kexec_cpu_down(int crash_shutdown, int secondary)
        }
 
        if (disable_threadbit) {
-               while (paca[disable_cpu].kexec_state < KEXEC_STATE_REAL_MODE) {
+               while (paca_ptrs[disable_cpu]->kexec_state < KEXEC_STATE_REAL_MODE) {
                        barrier();
                        now = mftb();
                        if (!notified && now - start > 1000000) {
                                pr_info("%s/%d: waiting for cpu %d to enter KEXEC_STATE_REAL_MODE (%d)\n",
                                        __func__, smp_processor_id(),
                                        disable_cpu,
-                                       paca[disable_cpu].kexec_state);
+                                       paca_ptrs[disable_cpu]->kexec_state);
                                notified = true;
                        }
                }
index e1274db53d48a54f4d2614ac9a8bcc4e213ed949..2188d691a40f1456410e0c6b5619d64d28e14afe 100644 (file)
@@ -217,13 +217,7 @@ void __noreturn mpc8xx_restart(char *cmd)
 
 static void cpm_cascade(struct irq_desc *desc)
 {
-       struct irq_chip *chip = irq_desc_get_chip(desc);
-       int cascade_irq = cpm_get_irq();
-
-       if (cascade_irq >= 0)
-               generic_handle_irq(cascade_irq);
-
-       chip->irq_eoi(&desc->irq_data);
+       generic_handle_irq(cpm_get_irq());
 }
 
 /* Initialize the internal interrupt controllers.  The number of
index a429d859f15d7ae04635453b0089131a596a56fc..67d3125d06100b24390e7ba70cdc151e330de46f 100644 (file)
@@ -61,7 +61,7 @@ choice
        help
          There are two families of 64 bit PowerPC chips supported.
          The most common ones are the desktop and server CPUs
-         (POWER4, POWER5, 970, POWER5+, POWER6, POWER7, POWER8 ...)
+         (POWER5, 970, POWER5+, POWER6, POWER7, POWER8, POWER9 ...)
 
          The other are the "embedded" processors compliant with the
          "Book 3E" variant of the architecture
@@ -87,7 +87,6 @@ endchoice
 choice
        prompt "CPU selection"
        depends on PPC64
-       default POWER8_CPU if CPU_LITTLE_ENDIAN
        default GENERIC_CPU
        help
          This will create a kernel which is optimised for a particular CPU.
@@ -96,17 +95,18 @@ choice
          If unsure, select Generic.
 
 config GENERIC_CPU
-       bool "Generic"
+       bool "Generic (POWER4 and above)"
        depends on !CPU_LITTLE_ENDIAN
 
+config GENERIC_CPU
+       bool "Generic (POWER8 and above)"
+       depends on CPU_LITTLE_ENDIAN
+       select ARCH_HAS_FAST_MULTIPLIER
+
 config CELL_CPU
        bool "Cell Broadband Engine"
        depends on PPC_BOOK3S_64 && !CPU_LITTLE_ENDIAN
 
-config POWER4_CPU
-       bool "POWER4"
-       depends on PPC_BOOK3S_64 && !CPU_LITTLE_ENDIAN
-
 config POWER5_CPU
        bool "POWER5"
        depends on PPC_BOOK3S_64 && !CPU_LITTLE_ENDIAN
@@ -125,6 +125,11 @@ config POWER8_CPU
        depends on PPC_BOOK3S_64
        select ARCH_HAS_FAST_MULTIPLIER
 
+config POWER9_CPU
+       bool "POWER9"
+       depends on PPC_BOOK3S_64
+       select ARCH_HAS_FAST_MULTIPLIER
+
 config E5500_CPU
        bool "Freescale e5500"
        depends on E500
@@ -326,6 +331,7 @@ config PPC_BOOK3E_MMU
 config PPC_MM_SLICES
        bool
        default y if PPC_BOOK3S_64
+       default y if PPC_8xx && HUGETLB_PAGE
        default n
 
 config PPC_HAVE_PMU_SUPPORT
index 6ea3f248b1558550bcde3df3d8749d7708c44a2c..326d34e2aa0295a5d6831091071d07f61ffc83c1 100644 (file)
@@ -342,7 +342,7 @@ static int axon_msi_probe(struct platform_device *device)
 
        pr_devel("axon_msi: setting up dn %pOF\n", dn);
 
-       msic = kzalloc(sizeof(struct axon_msic), GFP_KERNEL);
+       msic = kzalloc(sizeof(*msic), GFP_KERNEL);
        if (!msic) {
                printk(KERN_ERR "axon_msi: couldn't allocate msic for %pOF\n",
                       dn);
index f84d52a2db40a975197b3465b633e20da2835f89..1aeac5761e0ba4ce82ce9bd2390f5d01e34a1729 100644 (file)
@@ -83,7 +83,7 @@ static inline int smp_startup_cpu(unsigned int lcpu)
        pcpu = get_hard_smp_processor_id(lcpu);
 
        /* Fixup atomic count: it exited inside IRQ handler. */
-       task_thread_info(paca[lcpu].__current)->preempt_count   = 0;
+       task_thread_info(paca_ptrs[lcpu]->__current)->preempt_count     = 0;
 
        /*
         * If the RTAS start-cpu token does not exist then presume the
@@ -126,7 +126,7 @@ static int smp_cell_kick_cpu(int nr)
         * cpu_start field to become non-zero After we set cpu_start,
         * the processor will continue on to secondary_start
         */
-       paca[nr].cpu_start = 1;
+       paca_ptrs[nr]->cpu_start = 1;
 
        return 0;
 }
index d1e61e273e64a8ba4e4eefc768c70698e94dc3f3..1200d0dea512dc6ca0997e9913cb4dc2d6a3e429 100644 (file)
@@ -133,7 +133,7 @@ int __init spiderpci_iowa_init(struct iowa_bus *bus, void *data)
        pr_debug("SPIDERPCI-IOWA:Bus initialize for spider(%pOF)\n",
                 np);
 
-       priv = kzalloc(sizeof(struct spiderpci_iowa_private), GFP_KERNEL);
+       priv = kzalloc(sizeof(*priv), GFP_KERNEL);
        if (!priv) {
                pr_err("SPIDERPCI-IOWA:"
                       "Can't allocate struct spiderpci_iowa_private");
index b847e94035664dbdd6f5d9c3c4223d27c921f81d..d9de848dae478c20fbace99a95ecdce4baab4ab2 100644 (file)
@@ -36,7 +36,7 @@ int spu_alloc_lscsa(struct spu_state *csa)
        struct spu_lscsa *lscsa;
        unsigned char *p;
 
-       lscsa = vzalloc(sizeof(struct spu_lscsa));
+       lscsa = vzalloc(sizeof(*lscsa));
        if (!lscsa)
                return -ENOMEM;
        csa->lscsa = lscsa;
index ade83829d5e8bb60d60616a1bc8de5336dedf7c8..7206f3f573d45f304e688ec243d755c295fe32e3 100644 (file)
@@ -132,7 +132,7 @@ static void __flipper_quiesce(void __iomem *io_base)
        out_be32(io_base + FLIPPER_ICR, 0xffffffff);
 }
 
-struct irq_domain * __init flipper_pic_init(struct device_node *np)
+static struct irq_domain * __init flipper_pic_init(struct device_node *np)
 {
        struct device_node *pi;
        struct irq_domain *irq_domain = NULL;
index 7feb325b636b87d97c238572e3fcf1f840eb5a2f..5c7e7ce6dbab99dba7b360500702336275f3f165 100644 (file)
@@ -169,7 +169,7 @@ static int ug_getc(void)
 /*
  * Transmits a character.
  */
-void ug_udbg_putc(char ch)
+static void ug_udbg_putc(char ch)
 {
        ug_putc(ch);
 }
index 3fd683e40bc951ed0795e4a0b1b072936edfea1c..8bb46dcbebd845cb54cf2c303fa9c726ff5c09e3 100644 (file)
@@ -44,6 +44,7 @@
 #define HW_GPIO_BASE(idx)      (idx * 0x20)
 #define HW_GPIO_OUT(idx)       (HW_GPIO_BASE(idx) + 0)
 #define HW_GPIO_DIR(idx)       (HW_GPIO_BASE(idx) + 4)
+#define HW_GPIO_OWNER          (HW_GPIO_BASE(1) + 0x1c)
 
 #define HW_GPIO_SHUTDOWN       (1<<1)
 #define HW_GPIO_SLOT_LED       (1<<5)
@@ -79,21 +80,9 @@ void __init wii_memory_fixups(void)
        BUG_ON(memblock.memory.cnt != 2);
        BUG_ON(!page_aligned(p[0].base) || !page_aligned(p[1].base));
 
-       /* trim unaligned tail */
-       memblock_remove(ALIGN(p[1].base + p[1].size, PAGE_SIZE),
-                       (phys_addr_t)ULLONG_MAX);
-
-       /* determine hole, add & reserve them */
+       /* determine hole */
        wii_hole_start = ALIGN(p[0].base + p[0].size, PAGE_SIZE);
        wii_hole_size = p[1].base - wii_hole_start;
-       memblock_add(wii_hole_start, wii_hole_size);
-       memblock_reserve(wii_hole_start, wii_hole_size);
-
-       BUG_ON(memblock.memory.cnt != 1);
-       __memblock_dump_all();
-
-       /* allow ioremapping the address space in the hole */
-       __allow_ioremap_reserved = 1;
 }
 
 unsigned long __init wii_mmu_mapin_mem2(unsigned long top)
@@ -176,6 +165,12 @@ static void wii_power_off(void)
        local_irq_disable();
 
        if (hw_gpio) {
+               /*
+                * set the owner of the shutdown pin to ARM, because it is
+                * accessed through the registers for the ARM, below
+                */
+               clrbits32(hw_gpio + HW_GPIO_OWNER, HW_GPIO_SHUTDOWN);
+
                /* make sure that the poweroff GPIO is configured as output */
                setbits32(hw_gpio + HW_GPIO_DIR(1), HW_GPIO_SHUTDOWN);
 
@@ -239,7 +234,7 @@ static int __init wii_device_probe(void)
        if (!machine_is(wii))
                return 0;
 
-       of_platform_bus_probe(NULL, wii_of_bus, NULL);
+       of_platform_populate(NULL, wii_of_bus, NULL, NULL);
        return 0;
 }
 device_initcall(wii_device_probe);
index 3408f315ef48ed238a43f81c82e63a9bcb652e00..fa89f30e7f27c138c0610777f4d86321be60239e 100644 (file)
@@ -492,7 +492,7 @@ static struct pmac_i2c_host_kw *__init kw_i2c_host_init(struct device_node *np)
        const u32               *psteps, *prate, *addrp;
        u32                     steps;
 
-       host = kzalloc(sizeof(struct pmac_i2c_host_kw), GFP_KERNEL);
+       host = kzalloc(sizeof(*host), GFP_KERNEL);
        if (host == NULL) {
                printk(KERN_ERR "low_i2c: Can't allocate host for %pOF\n",
                       np);
index df3c93bef228b6fa73ae4a72288f97d16e258c0e..e0462fedcdb8f50e50eb1267dfb76d2683bf5e58 100644 (file)
@@ -643,7 +643,7 @@ static int pmf_add_function_prop(struct pmf_device *dev, void *driverdata,
 
        while (length >= 12) {
                /* Allocate a structure */
-               func = kzalloc(sizeof(struct pmf_function), GFP_KERNEL);
+               func = kzalloc(sizeof(*func), GFP_KERNEL);
                if (func == NULL)
                        goto bail;
                kref_init(&func->ref);
@@ -719,7 +719,7 @@ int pmf_register_driver(struct device_node *np,
                return -EBUSY;
        }
 
-       dev = kzalloc(sizeof(struct pmf_device), GFP_KERNEL);
+       dev = kzalloc(sizeof(*dev), GFP_KERNEL);
        if (dev == NULL) {
                DBG("pmf: no memory !\n");
                return -ENOMEM;
index 6c9d5199a7e2a17f2f291572e88311fbbaf56713..703a350a7f4e76615aed3537580d2a6421f8410b 100644 (file)
@@ -16,5 +16,4 @@ obj-$(CONFIG_OPAL_PRD)        += opal-prd.o
 obj-$(CONFIG_PERF_EVENTS) += opal-imc.o
 obj-$(CONFIG_PPC_MEMTRACE)     += memtrace.o
 obj-$(CONFIG_PPC_VAS)  += vas.o vas-window.o vas-debug.o
-obj-$(CONFIG_PPC_FTW)  += nx-ftw.o
 obj-$(CONFIG_OCXL_BASE)        += ocxl.o
index 33c86c1a17204445a231828c0b7467335a823098..ddfc3544d28514aedf4e57008e687940ce94cda3 100644 (file)
@@ -1425,11 +1425,8 @@ static int pnv_eeh_get_pe(struct pci_controller *hose,
        dev_pe = dev_pe->parent;
        while (dev_pe && !(dev_pe->type & EEH_PE_PHB)) {
                int ret;
-               int active_flags = (EEH_STATE_MMIO_ACTIVE |
-                                   EEH_STATE_DMA_ACTIVE);
-
                ret = eeh_ops->get_state(dev_pe, NULL);
-               if (ret <= 0 || (ret & active_flags) == active_flags) {
+               if (ret <= 0 || eeh_state_active(ret)) {
                        dev_pe = dev_pe->parent;
                        continue;
                }
@@ -1463,7 +1460,6 @@ static int pnv_eeh_next_error(struct eeh_pe **pe)
        struct eeh_pe *phb_pe, *parent_pe;
        __be64 frozen_pe_no;
        __be16 err_type, severity;
-       int active_flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE);
        long rc;
        int state, ret = EEH_NEXT_ERR_NONE;
 
@@ -1626,8 +1622,7 @@ static int pnv_eeh_next_error(struct eeh_pe **pe)
 
                                /* Frozen parent PE ? */
                                state = eeh_ops->get_state(parent_pe, NULL);
-                               if (state > 0 &&
-                                   (state & active_flags) != active_flags)
+                               if (state > 0 && !eeh_state_active(state))
                                        *pe = parent_pe;
 
                                /* Next parent level */
index 443d5ca719958e5170374edbcaaf7d2cc52b8104..1f12ab1e6030fed70c8b176225095c34c7ac040e 100644 (file)
@@ -24,6 +24,7 @@
 #include <asm/code-patching.h>
 #include <asm/smp.h>
 #include <asm/runlatch.h>
+#include <asm/dbell.h>
 
 #include "powernv.h"
 #include "subcore.h"
@@ -80,7 +81,7 @@ static int pnv_save_sprs_for_deep_states(void)
 
        for_each_possible_cpu(cpu) {
                uint64_t pir = get_hard_smp_processor_id(cpu);
-               uint64_t hsprg0_val = (uint64_t)&paca[cpu];
+               uint64_t hsprg0_val = (uint64_t)paca_ptrs[cpu];
 
                rc = opal_slw_set_reg(pir, SPRN_HSPRG0, hsprg0_val);
                if (rc != 0)
@@ -173,12 +174,12 @@ static void pnv_alloc_idle_core_states(void)
                for (j = 0; j < threads_per_core; j++) {
                        int cpu = first_cpu + j;
 
-                       paca[cpu].core_idle_state_ptr = core_idle_state;
-                       paca[cpu].thread_idle_state = PNV_THREAD_RUNNING;
-                       paca[cpu].thread_mask = 1 << j;
+                       paca_ptrs[cpu]->core_idle_state_ptr = core_idle_state;
+                       paca_ptrs[cpu]->thread_idle_state = PNV_THREAD_RUNNING;
+                       paca_ptrs[cpu]->thread_mask = 1 << j;
                        if (!cpu_has_feature(CPU_FTR_POWER9_DD1))
                                continue;
-                       paca[cpu].thread_sibling_pacas =
+                       paca_ptrs[cpu]->thread_sibling_pacas =
                                kmalloc_node(paca_ptr_array_size,
                                             GFP_KERNEL, node);
                }
@@ -387,6 +388,78 @@ void power9_idle(void)
        power9_idle_type(pnv_default_stop_val, pnv_default_stop_mask);
 }
 
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+/*
+ * This is used in working around bugs in thread reconfiguration
+ * on POWER9 (at least up to Nimbus DD2.2) relating to transactional
+ * memory and the way that XER[SO] is checkpointed.
+ * This function forces the core into SMT4 in order by asking
+ * all other threads not to stop, and sending a message to any
+ * that are in a stop state.
+ * Must be called with preemption disabled.
+ */
+void pnv_power9_force_smt4_catch(void)
+{
+       int cpu, cpu0, thr;
+       int awake_threads = 1;          /* this thread is awake */
+       int poke_threads = 0;
+       int need_awake = threads_per_core;
+
+       cpu = smp_processor_id();
+       cpu0 = cpu & ~(threads_per_core - 1);
+       for (thr = 0; thr < threads_per_core; ++thr) {
+               if (cpu != cpu0 + thr)
+                       atomic_inc(&paca_ptrs[cpu0+thr]->dont_stop);
+       }
+       /* order setting dont_stop vs testing requested_psscr */
+       mb();
+       for (thr = 0; thr < threads_per_core; ++thr) {
+               if (!paca_ptrs[cpu0+thr]->requested_psscr)
+                       ++awake_threads;
+               else
+                       poke_threads |= (1 << thr);
+       }
+
+       /* If at least 3 threads are awake, the core is in SMT4 already */
+       if (awake_threads < need_awake) {
+               /* We have to wake some threads; we'll use msgsnd */
+               for (thr = 0; thr < threads_per_core; ++thr) {
+                       if (poke_threads & (1 << thr)) {
+                               ppc_msgsnd_sync();
+                               ppc_msgsnd(PPC_DBELL_MSGTYPE, 0,
+                                          paca_ptrs[cpu0+thr]->hw_cpu_id);
+                       }
+               }
+               /* now spin until at least 3 threads are awake */
+               do {
+                       for (thr = 0; thr < threads_per_core; ++thr) {
+                               if ((poke_threads & (1 << thr)) &&
+                                   !paca_ptrs[cpu0+thr]->requested_psscr) {
+                                       ++awake_threads;
+                                       poke_threads &= ~(1 << thr);
+                               }
+                       }
+               } while (awake_threads < need_awake);
+       }
+}
+EXPORT_SYMBOL_GPL(pnv_power9_force_smt4_catch);
+
+void pnv_power9_force_smt4_release(void)
+{
+       int cpu, cpu0, thr;
+
+       cpu = smp_processor_id();
+       cpu0 = cpu & ~(threads_per_core - 1);
+
+       /* clear all the dont_stop flags */
+       for (thr = 0; thr < threads_per_core; ++thr) {
+               if (cpu != cpu0 + thr)
+                       atomic_dec(&paca_ptrs[cpu0+thr]->dont_stop);
+       }
+}
+EXPORT_SYMBOL_GPL(pnv_power9_force_smt4_release);
+#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
+
 #ifdef CONFIG_HOTPLUG_CPU
 static void pnv_program_cpu_hotplug_lpcr(unsigned int cpu, u64 lpcr_val)
 {
@@ -434,7 +507,7 @@ unsigned long pnv_cpu_offline(unsigned int cpu)
                psscr = mfspr(SPRN_PSSCR);
                psscr = (psscr & ~pnv_deepest_stop_psscr_mask) |
                                                pnv_deepest_stop_psscr_val;
-               srr1 = power9_idle_stop(psscr);
+               srr1 = power9_offline_stop(psscr);
 
        } else if ((idle_states & OPAL_PM_WINKLE_ENABLED) &&
                   (idle_states & OPAL_PM_LOSE_FULL_CONTEXT)) {
@@ -749,7 +822,8 @@ static int __init pnv_init_idle_states(void)
                        for (i = 0; i < threads_per_core; i++) {
                                int j = base_cpu + i;
 
-                               paca[j].thread_sibling_pacas[idx] = &paca[cpu];
+                               paca_ptrs[j]->thread_sibling_pacas[idx] =
+                                       paca_ptrs[cpu];
                        }
                }
        }
index 0a253b64ac5fed049f81082183a35ef6e0fdf76b..69a4f9e8bd554f137dd01b930b1b3d87e204fd47 100644 (file)
@@ -410,6 +410,11 @@ struct npu_context {
        void *priv;
 };
 
+struct mmio_atsd_reg {
+       struct npu *npu;
+       int reg;
+};
+
 /*
  * Find a free MMIO ATSD register and mark it in use. Return -ENOSPC
  * if none are available.
@@ -419,7 +424,7 @@ static int get_mmio_atsd_reg(struct npu *npu)
        int i;
 
        for (i = 0; i < npu->mmio_atsd_count; i++) {
-               if (!test_and_set_bit(i, &npu->mmio_atsd_usage))
+               if (!test_and_set_bit_lock(i, &npu->mmio_atsd_usage))
                        return i;
        }
 
@@ -428,86 +433,90 @@ static int get_mmio_atsd_reg(struct npu *npu)
 
 static void put_mmio_atsd_reg(struct npu *npu, int reg)
 {
-       clear_bit(reg, &npu->mmio_atsd_usage);
+       clear_bit_unlock(reg, &npu->mmio_atsd_usage);
 }
 
 /* MMIO ATSD register offsets */
 #define XTS_ATSD_AVA  1
 #define XTS_ATSD_STAT 2
 
-static int mmio_launch_invalidate(struct npu *npu, unsigned long launch,
-                               unsigned long va)
+static void mmio_launch_invalidate(struct mmio_atsd_reg *mmio_atsd_reg,
+                               unsigned long launch, unsigned long va)
 {
-       int mmio_atsd_reg;
-
-       do {
-               mmio_atsd_reg = get_mmio_atsd_reg(npu);
-               cpu_relax();
-       } while (mmio_atsd_reg < 0);
+       struct npu *npu = mmio_atsd_reg->npu;
+       int reg = mmio_atsd_reg->reg;
 
        __raw_writeq(cpu_to_be64(va),
-               npu->mmio_atsd_regs[mmio_atsd_reg] + XTS_ATSD_AVA);
+               npu->mmio_atsd_regs[reg] + XTS_ATSD_AVA);
        eieio();
-       __raw_writeq(cpu_to_be64(launch), npu->mmio_atsd_regs[mmio_atsd_reg]);
-
-       return mmio_atsd_reg;
+       __raw_writeq(cpu_to_be64(launch), npu->mmio_atsd_regs[reg]);
 }
 
-static int mmio_invalidate_pid(struct npu *npu, unsigned long pid, bool flush)
+static void mmio_invalidate_pid(struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS],
+                               unsigned long pid, bool flush)
 {
+       int i;
        unsigned long launch;
 
-       /* IS set to invalidate matching PID */
-       launch = PPC_BIT(12);
+       for (i = 0; i <= max_npu2_index; i++) {
+               if (mmio_atsd_reg[i].reg < 0)
+                       continue;
+
+               /* IS set to invalidate matching PID */
+               launch = PPC_BIT(12);
 
-       /* PRS set to process-scoped */
-       launch |= PPC_BIT(13);
+               /* PRS set to process-scoped */
+               launch |= PPC_BIT(13);
 
-       /* AP */
-       launch |= (u64) mmu_get_ap(mmu_virtual_psize) << PPC_BITLSHIFT(17);
+               /* AP */
+               launch |= (u64)
+                       mmu_get_ap(mmu_virtual_psize) << PPC_BITLSHIFT(17);
 
-       /* PID */
-       launch |= pid << PPC_BITLSHIFT(38);
+               /* PID */
+               launch |= pid << PPC_BITLSHIFT(38);
 
-       /* No flush */
-       launch |= !flush << PPC_BITLSHIFT(39);
+               /* No flush */
+               launch |= !flush << PPC_BITLSHIFT(39);
 
-       /* Invalidating the entire process doesn't use a va */
-       return mmio_launch_invalidate(npu, launch, 0);
+               /* Invalidating the entire process doesn't use a va */
+               mmio_launch_invalidate(&mmio_atsd_reg[i], launch, 0);
+       }
 }
 
-static int mmio_invalidate_va(struct npu *npu, unsigned long va,
-                       unsigned long pid, bool flush)
+static void mmio_invalidate_va(struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS],
+                       unsigned long va, unsigned long pid, bool flush)
 {
+       int i;
        unsigned long launch;
 
-       /* IS set to invalidate target VA */
-       launch = 0;
+       for (i = 0; i <= max_npu2_index; i++) {
+               if (mmio_atsd_reg[i].reg < 0)
+                       continue;
 
-       /* PRS set to process scoped */
-       launch |= PPC_BIT(13);
+               /* IS set to invalidate target VA */
+               launch = 0;
 
-       /* AP */
-       launch |= (u64) mmu_get_ap(mmu_virtual_psize) << PPC_BITLSHIFT(17);
+               /* PRS set to process scoped */
+               launch |= PPC_BIT(13);
 
-       /* PID */
-       launch |= pid << PPC_BITLSHIFT(38);
+               /* AP */
+               launch |= (u64)
+                       mmu_get_ap(mmu_virtual_psize) << PPC_BITLSHIFT(17);
 
-       /* No flush */
-       launch |= !flush << PPC_BITLSHIFT(39);
+               /* PID */
+               launch |= pid << PPC_BITLSHIFT(38);
 
-       return mmio_launch_invalidate(npu, launch, va);
+               /* No flush */
+               launch |= !flush << PPC_BITLSHIFT(39);
+
+               mmio_launch_invalidate(&mmio_atsd_reg[i], launch, va);
+       }
 }
 
 #define mn_to_npu_context(x) container_of(x, struct npu_context, mn)
 
-struct mmio_atsd_reg {
-       struct npu *npu;
-       int reg;
-};
-
 static void mmio_invalidate_wait(
-       struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS], bool flush)
+       struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS])
 {
        struct npu *npu;
        int i, reg;
@@ -522,16 +531,67 @@ static void mmio_invalidate_wait(
                reg = mmio_atsd_reg[i].reg;
                while (__raw_readq(npu->mmio_atsd_regs[reg] + XTS_ATSD_STAT))
                        cpu_relax();
+       }
+}
 
-               put_mmio_atsd_reg(npu, reg);
+/*
+ * Acquires all the address translation shootdown (ATSD) registers required to
+ * launch an ATSD on all links this npu_context is active on.
+ */
+static void acquire_atsd_reg(struct npu_context *npu_context,
+                       struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS])
+{
+       int i, j;
+       struct npu *npu;
+       struct pci_dev *npdev;
+       struct pnv_phb *nphb;
 
+       for (i = 0; i <= max_npu2_index; i++) {
+               mmio_atsd_reg[i].reg = -1;
+               for (j = 0; j < NV_MAX_LINKS; j++) {
+                       /*
+                        * There are no ordering requirements with respect to
+                        * the setup of struct npu_context, but to ensure
+                        * consistent behaviour we need to ensure npdev[][] is
+                        * only read once.
+                        */
+                       npdev = READ_ONCE(npu_context->npdev[i][j]);
+                       if (!npdev)
+                               continue;
+
+                       nphb = pci_bus_to_host(npdev->bus)->private_data;
+                       npu = &nphb->npu;
+                       mmio_atsd_reg[i].npu = npu;
+                       mmio_atsd_reg[i].reg = get_mmio_atsd_reg(npu);
+                       while (mmio_atsd_reg[i].reg < 0) {
+                               mmio_atsd_reg[i].reg = get_mmio_atsd_reg(npu);
+                               cpu_relax();
+                       }
+                       break;
+               }
+       }
+}
+
+/*
+ * Release previously acquired ATSD registers. To avoid deadlocks the registers
+ * must be released in the same order they were acquired above in
+ * acquire_atsd_reg.
+ */
+static void release_atsd_reg(struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS])
+{
+       int i;
+
+       for (i = 0; i <= max_npu2_index; i++) {
                /*
-                * The GPU requires two flush ATSDs to ensure all entries have
-                * been flushed. We use PID 0 as it will never be used for a
-                * process on the GPU.
+                * We can't rely on npu_context->npdev[][] being the same here
+                * as when acquire_atsd_reg() was called, hence we use the
+                * values stored in mmio_atsd_reg during the acquire phase
+                * rather than re-reading npdev[][].
                 */
-               if (flush)
-                       mmio_invalidate_pid(npu, 0, true);
+               if (mmio_atsd_reg[i].reg < 0)
+                       continue;
+
+               put_mmio_atsd_reg(mmio_atsd_reg[i].npu, mmio_atsd_reg[i].reg);
        }
 }
 
@@ -542,10 +602,6 @@ static void mmio_invalidate_wait(
 static void mmio_invalidate(struct npu_context *npu_context, int va,
                        unsigned long address, bool flush)
 {
-       int i, j;
-       struct npu *npu;
-       struct pnv_phb *nphb;
-       struct pci_dev *npdev;
        struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS];
        unsigned long pid = npu_context->mm->context.id;
 
@@ -561,37 +617,25 @@ static void mmio_invalidate(struct npu_context *npu_context, int va,
         * Loop over all the NPUs this process is active on and launch
         * an invalidate.
         */
-       for (i = 0; i <= max_npu2_index; i++) {
-               mmio_atsd_reg[i].reg = -1;
-               for (j = 0; j < NV_MAX_LINKS; j++) {
-                       npdev = npu_context->npdev[i][j];
-                       if (!npdev)
-                               continue;
-
-                       nphb = pci_bus_to_host(npdev->bus)->private_data;
-                       npu = &nphb->npu;
-                       mmio_atsd_reg[i].npu = npu;
-
-                       if (va)
-                               mmio_atsd_reg[i].reg =
-                                       mmio_invalidate_va(npu, address, pid,
-                                                       flush);
-                       else
-                               mmio_atsd_reg[i].reg =
-                                       mmio_invalidate_pid(npu, pid, flush);
-
-                       /*
-                        * The NPU hardware forwards the shootdown to all GPUs
-                        * so we only have to launch one shootdown per NPU.
-                        */
-                       break;
-               }
+       acquire_atsd_reg(npu_context, mmio_atsd_reg);
+       if (va)
+               mmio_invalidate_va(mmio_atsd_reg, address, pid, flush);
+       else
+               mmio_invalidate_pid(mmio_atsd_reg, pid, flush);
+
+       mmio_invalidate_wait(mmio_atsd_reg);
+       if (flush) {
+               /*
+                * The GPU requires two flush ATSDs to ensure all entries have
+                * been flushed. We use PID 0 as it will never be used for a
+                * process on the GPU.
+                */
+               mmio_invalidate_pid(mmio_atsd_reg, 0, true);
+               mmio_invalidate_wait(mmio_atsd_reg);
+               mmio_invalidate_pid(mmio_atsd_reg, 0, true);
+               mmio_invalidate_wait(mmio_atsd_reg);
        }
-
-       mmio_invalidate_wait(mmio_atsd_reg, flush);
-       if (flush)
-               /* Wait for the flush to complete */
-               mmio_invalidate_wait(mmio_atsd_reg, false);
+       release_atsd_reg(mmio_atsd_reg);
 }
 
 static void pnv_npu2_mn_release(struct mmu_notifier *mn,
@@ -680,6 +724,11 @@ struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
                /* No nvlink associated with this GPU device */
                return ERR_PTR(-ENODEV);
 
+       nvlink_dn = of_parse_phandle(npdev->dev.of_node, "ibm,nvlink", 0);
+       if (WARN_ON(of_property_read_u32(nvlink_dn, "ibm,npu-link-index",
+                                                       &nvlink_index)))
+               return ERR_PTR(-ENODEV);
+
        if (!mm || mm->context.id == 0) {
                /*
                 * Kernel thread contexts are not supported and context id 0 is
@@ -707,26 +756,40 @@ struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
         */
        npu_context = mm->context.npu_context;
        if (!npu_context) {
+               rc = -ENOMEM;
                npu_context = kzalloc(sizeof(struct npu_context), GFP_KERNEL);
-               if (!npu_context)
-                       return ERR_PTR(-ENOMEM);
+               if (npu_context) {
+                       kref_init(&npu_context->kref);
+                       npu_context->mm = mm;
+                       npu_context->mn.ops = &nv_nmmu_notifier_ops;
+                       rc = __mmu_notifier_register(&npu_context->mn, mm);
+               }
+
+               if (rc) {
+                       kfree(npu_context);
+                       opal_npu_destroy_context(nphb->opal_id, mm->context.id,
+                                       PCI_DEVID(gpdev->bus->number,
+                                               gpdev->devfn));
+                       return ERR_PTR(rc);
+               }
 
                mm->context.npu_context = npu_context;
-               npu_context->mm = mm;
-               npu_context->mn.ops = &nv_nmmu_notifier_ops;
-               __mmu_notifier_register(&npu_context->mn, mm);
-               kref_init(&npu_context->kref);
        } else {
-               kref_get(&npu_context->kref);
+               WARN_ON(!kref_get_unless_zero(&npu_context->kref));
        }
 
        npu_context->release_cb = cb;
        npu_context->priv = priv;
-       nvlink_dn = of_parse_phandle(npdev->dev.of_node, "ibm,nvlink", 0);
-       if (WARN_ON(of_property_read_u32(nvlink_dn, "ibm,npu-link-index",
-                                                       &nvlink_index)))
-               return ERR_PTR(-ENODEV);
-       npu_context->npdev[npu->index][nvlink_index] = npdev;
+
+       /*
+        * npdev is a pci_dev pointer setup by the PCI code. We assign it to
+        * npdev[][] to indicate to the mmu notifiers that an invalidation
+        * should also be sent over this nvlink. The notifiers don't use any
+        * other fields in npu_context, so we just need to ensure that when they
+        * deference npu_context->npdev[][] it is either a valid pointer or
+        * NULL.
+        */
+       WRITE_ONCE(npu_context->npdev[npu->index][nvlink_index], npdev);
 
        if (!nphb->npu.nmmu_flush) {
                /*
@@ -778,7 +841,7 @@ void pnv_npu2_destroy_context(struct npu_context *npu_context,
        if (WARN_ON(of_property_read_u32(nvlink_dn, "ibm,npu-link-index",
                                                        &nvlink_index)))
                return;
-       npu_context->npdev[npu->index][nvlink_index] = NULL;
+       WRITE_ONCE(npu_context->npdev[npu->index][nvlink_index], NULL);
        opal_npu_destroy_context(nphb->opal_id, npu_context->mm->context.id,
                                PCI_DEVID(gpdev->bus->number, gpdev->devfn));
        kref_put(&npu_context->kref, pnv_npu2_release_context);
index 2fa3ac80cb4e5c002ae17b1382e2d278282363e0..b37015101bf61d976e9d11cecd144c60f3b60f2b 100644 (file)
@@ -303,26 +303,9 @@ static int opal_flash_update(int op)
        return rc;
 }
 
-/* Return CPUs to OPAL before starting FW update */
-static void flash_return_cpu(void *info)
-{
-       int cpu = smp_processor_id();
-
-       if (!cpu_online(cpu))
-               return;
-
-       /* Disable IRQ */
-       hard_irq_disable();
-
-       /* Return the CPU to OPAL */
-       opal_return_cpu();
-}
-
 /* This gets called just before system reboots */
-void opal_flash_term_callback(void)
+void opal_flash_update_print_message(void)
 {
-       struct cpumask mask;
-
        if (update_flash_data.status != FLASH_IMG_READY)
                return;
 
@@ -333,15 +316,6 @@ void opal_flash_term_callback(void)
 
        /* Small delay to help getting the above message out */
        msleep(500);
-
-       /* Return secondary CPUs to firmware */
-       cpumask_copy(&mask, cpu_online_mask);
-       cpumask_clear_cpu(smp_processor_id(), &mask);
-       if (!cpumask_empty(&mask))
-               smp_call_function_many(&mask,
-                                      flash_return_cpu, NULL, false);
-       /* Hard disable interrupts */
-       hard_irq_disable();
 }
 
 /*
@@ -418,12 +392,12 @@ static int alloc_image_buf(char *buffer, size_t count)
        void *addr;
        int size;
 
-       if (count < sizeof(struct image_header_t)) {
+       if (count < sizeof(image_header)) {
                pr_warn("FLASH: Invalid candidate image\n");
                return -EINVAL;
        }
 
-       memcpy(&image_header, (void *)buffer, sizeof(struct image_header_t));
+       memcpy(&image_header, (void *)buffer, sizeof(image_header));
        image_data.size = be32_to_cpu(image_header.size);
        pr_debug("FLASH: Candidate image size = %u\n", image_data.size);
 
index c9e1a4ff295ccea0d9a716b3c6992adbf54b5aba..4efc95b4c7d41b199a316dcdbc7e6bbd175a1626 100644 (file)
@@ -314,7 +314,7 @@ static int opal_handle_hmi_event(struct notifier_block *nb,
                pr_err("HMI: out of memory, Opal message event not handled\n");
                return -ENOMEM;
        }
-       memcpy(&msg_node->hmi_evt, hmi_evt, sizeof(struct OpalHMIEvent));
+       memcpy(&msg_node->hmi_evt, hmi_evt, sizeof(*hmi_evt));
 
        spin_lock_irqsave(&opal_hmi_evt_lock, flags);
        list_add(&msg_node->list, &opal_hmi_evt_list);
index f6f55ab4980e7684a09942a510daf689f79f6d1c..2a14fda5ea26007754b69052efb1736cd054e79e 100644 (file)
@@ -110,11 +110,11 @@ static int imc_get_mem_addr_nest(struct device_node *node,
        if (nr_chips <= 0)
                return -ENODEV;
 
-       base_addr_arr = kcalloc(nr_chips, sizeof(u64), GFP_KERNEL);
+       base_addr_arr = kcalloc(nr_chips, sizeof(*base_addr_arr), GFP_KERNEL);
        if (!base_addr_arr)
                return -ENOMEM;
 
-       chipid_arr = kcalloc(nr_chips, sizeof(u32), GFP_KERNEL);
+       chipid_arr = kcalloc(nr_chips, sizeof(*chipid_arr), GFP_KERNEL);
        if (!chipid_arr)
                return -ENOMEM;
 
@@ -125,8 +125,8 @@ static int imc_get_mem_addr_nest(struct device_node *node,
                                                                nr_chips))
                goto error;
 
-       pmu_ptr->mem_info = kcalloc(nr_chips, sizeof(struct imc_mem_info),
-                                                               GFP_KERNEL);
+       pmu_ptr->mem_info = kcalloc(nr_chips, sizeof(*pmu_ptr->mem_info),
+                                   GFP_KERNEL);
        if (!pmu_ptr->mem_info)
                goto error;
 
@@ -161,7 +161,7 @@ static int imc_pmu_create(struct device_node *parent, int pmu_index, int domain)
        u32 offset;
 
        /* memory for pmu */
-       pmu_ptr = kzalloc(sizeof(struct imc_pmu), GFP_KERNEL);
+       pmu_ptr = kzalloc(sizeof(*pmu_ptr), GFP_KERNEL);
        if (!pmu_ptr)
                return -ENOMEM;
 
index 8ddc1accf199d3f6fbc6ce58c880afaa044e8914..dcb42bcb5efa2417a99d74ada4c066bc5f9214b2 100644 (file)
@@ -112,7 +112,7 @@ static int opal_memory_err_event(struct notifier_block *nb,
                       "handled\n");
                return -ENOMEM;
        }
-       memcpy(&msg_node->msg, msg, sizeof(struct opal_msg));
+       memcpy(&msg_node->msg, msg, sizeof(msg_node->msg));
 
        spin_lock_irqsave(&opal_mem_err_lock, flags);
        list_add(&msg_node->list, &opal_memory_err_list);
index 9db4398ded5de1f6c8e9a672a51008df1fbc85d7..ba2ff06a2c98b9315e99f2758cedd81903208a85 100644 (file)
@@ -59,6 +59,10 @@ static ssize_t opal_nvram_write(char *buf, size_t count, loff_t *index)
                if (rc == OPAL_BUSY_EVENT)
                        opal_poll_events(NULL);
        }
+
+       if (rc)
+               return -EIO;
+
        *index += count;
        return count;
 }
index 7313b7fc9071f889cd3053ce938de9b2cf313a57..74986b35cf774a2032441b5167e3697ac06a5c4b 100644 (file)
@@ -136,7 +136,7 @@ void __init opal_psr_init(void)
                return;
        }
 
-       psr_attrs = kcalloc(of_get_child_count(psr), sizeof(struct psr_attr),
+       psr_attrs = kcalloc(of_get_child_count(psr), sizeof(*psr_attrs),
                            GFP_KERNEL);
        if (!psr_attrs)
                return;
index 7e5a235ebf767700649d2a57202ec3dd4b3616ff..541c9ea04a32f6fac9094b2d36779978aa1043cb 100644 (file)
@@ -166,13 +166,13 @@ void __init opal_sensor_groups_init(void)
                if (!nr_attrs)
                        continue;
 
-               sgs[i].sgattrs = kcalloc(nr_attrs, sizeof(struct sg_attr),
+               sgs[i].sgattrs = kcalloc(nr_attrs, sizeof(*sgs[i].sgattrs),
                                         GFP_KERNEL);
                if (!sgs[i].sgattrs)
                        goto out_sgs_sgattrs;
 
                sgs[i].sg.attrs = kcalloc(nr_attrs + 1,
-                                         sizeof(struct attribute *),
+                                         sizeof(*sgs[i].sg.attrs),
                                          GFP_KERNEL);
 
                if (!sgs[i].sg.attrs) {
index 1b2936ba604087cc33673ebe5a564a5dca93a5e5..3da30c2f26b45875fd38e6449c54cb2010c847d2 100644 (file)
@@ -323,3 +323,5 @@ OPAL_CALL(opal_sensor_group_clear,          OPAL_SENSOR_GROUP_CLEAR);
 OPAL_CALL(opal_npu_spa_setup,                  OPAL_NPU_SPA_SETUP);
 OPAL_CALL(opal_npu_spa_clear_cache,            OPAL_NPU_SPA_CLEAR_CACHE);
 OPAL_CALL(opal_npu_tl_set,                     OPAL_NPU_TL_SET);
+OPAL_CALL(opal_pci_get_pbcq_tunnel_bar,                OPAL_PCI_GET_PBCQ_TUNNEL_BAR);
+OPAL_CALL(opal_pci_set_pbcq_tunnel_bar,                OPAL_PCI_SET_PBCQ_TUNNEL_BAR);
index 81c0a943dea9bb581d0f6d663df5c431a5529127..22d5e1110dbb9e8a25d3c1c600b91897250eb642 100644 (file)
@@ -46,7 +46,7 @@ static scom_map_t opal_scom_map(struct device_node *dev, u64 reg, u64 count)
                        __func__, dev);
                return SCOM_MAP_INVALID;
        }
-       m = kmalloc(sizeof(struct opal_scom_map), GFP_KERNEL);
+       m = kmalloc(sizeof(*m), GFP_KERNEL);
        if (!m)
                return NULL;
        m->chip = be32_to_cpup(gcid);
index c15182765ff59a4809ac5777a8bdeb4fae0b89e9..516e23de5a3d3fe0ecd28aba329e22c13db48eab 100644 (file)
@@ -490,9 +490,12 @@ void pnv_platform_error_reboot(struct pt_regs *regs, const char *msg)
         *    opal to trigger checkstop explicitly for error analysis.
         *    The FSP PRD component would have already got notified
         *    about this error through other channels.
+        * 4. We are running on a newer skiboot that by default does
+        *    not cause a checkstop, drops us back to the kernel to
+        *    extract context and state at the time of the error.
         */
 
-       ppc_md.restart(NULL);
+       panic(msg);
 }
 
 int opal_machine_check(struct pt_regs *regs)
index 94498a04558b1f20bb9ba73808994997ddeee2bf..cee003de63afbdb2800626c44a9cb8f5c27ea063 100644 (file)
 
 #include "pci.h"
 
-struct device_node *pnv_pci_get_phb_node(struct pci_dev *dev)
-{
-       struct pci_controller *hose = pci_bus_to_host(dev->bus);
-
-       return of_node_get(hose->dn);
-}
-EXPORT_SYMBOL(pnv_pci_get_phb_node);
-
 int pnv_phb_to_cxl_mode(struct pci_dev *dev, uint64_t mode)
 {
        struct pci_controller *hose = pci_bus_to_host(dev->bus);
index a6c92c78c9b20b9cc2507cfb5c3e749e05a6e19d..3f9c69d7623a9ad34d99d4d889cfcd516b5b35b0 100644 (file)
@@ -2681,14 +2681,23 @@ static struct pnv_ioda_pe *gpe_table_group_to_npe(
 static long pnv_pci_ioda2_npu_set_window(struct iommu_table_group *table_group,
                int num, struct iommu_table *tbl)
 {
+       struct pnv_ioda_pe *npe = gpe_table_group_to_npe(table_group);
+       int num2 = (num == 0) ? 1 : 0;
        long ret = pnv_pci_ioda2_set_window(table_group, num, tbl);
 
        if (ret)
                return ret;
 
-       ret = pnv_npu_set_window(gpe_table_group_to_npe(table_group), num, tbl);
-       if (ret)
+       if (table_group->tables[num2])
+               pnv_npu_unset_window(npe, num2);
+
+       ret = pnv_npu_set_window(npe, num, tbl);
+       if (ret) {
                pnv_pci_ioda2_unset_window(table_group, num);
+               if (table_group->tables[num2])
+                       pnv_npu_set_window(npe, num2,
+                                       table_group->tables[num2]);
+       }
 
        return ret;
 }
@@ -2697,12 +2706,24 @@ static long pnv_pci_ioda2_npu_unset_window(
                struct iommu_table_group *table_group,
                int num)
 {
+       struct pnv_ioda_pe *npe = gpe_table_group_to_npe(table_group);
+       int num2 = (num == 0) ? 1 : 0;
        long ret = pnv_pci_ioda2_unset_window(table_group, num);
 
        if (ret)
                return ret;
 
-       return pnv_npu_unset_window(gpe_table_group_to_npe(table_group), num);
+       if (!npe->table_group.tables[num])
+               return 0;
+
+       ret = pnv_npu_unset_window(npe, num);
+       if (ret)
+               return ret;
+
+       if (table_group->tables[num2])
+               ret = pnv_npu_set_window(npe, num2, table_group->tables[num2]);
+
+       return ret;
 }
 
 static void pnv_ioda2_npu_take_ownership(struct iommu_table_group *table_group)
@@ -3843,7 +3864,7 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
        phb_id = be64_to_cpup(prop64);
        pr_debug("  PHB-ID  : 0x%016llx\n", phb_id);
 
-       phb = memblock_virt_alloc(sizeof(struct pnv_phb), 0);
+       phb = memblock_virt_alloc(sizeof(*phb), 0);
 
        /* Allocate PCI controller */
        phb->hose = hose = pcibios_alloc_controller(np);
index 69d102cbf48f2b386f58cb88223ee99f25da797e..b265ecc0836a9e89d4347faf211f2b9506dd66cf 100644 (file)
@@ -18,6 +18,7 @@
 #include <linux/io.h>
 #include <linux/msi.h>
 #include <linux/iommu.h>
+#include <linux/sched/mm.h>
 
 #include <asm/sections.h>
 #include <asm/io.h>
@@ -38,6 +39,7 @@
 #include "pci.h"
 
 static DEFINE_MUTEX(p2p_mutex);
+static DEFINE_MUTEX(tunnel_mutex);
 
 int pnv_pci_get_slot_id(struct device_node *np, uint64_t *id)
 {
@@ -1092,6 +1094,139 @@ int pnv_pci_set_p2p(struct pci_dev *initiator, struct pci_dev *target, u64 desc)
 }
 EXPORT_SYMBOL_GPL(pnv_pci_set_p2p);
 
+struct device_node *pnv_pci_get_phb_node(struct pci_dev *dev)
+{
+       struct pci_controller *hose = pci_bus_to_host(dev->bus);
+
+       return of_node_get(hose->dn);
+}
+EXPORT_SYMBOL(pnv_pci_get_phb_node);
+
+int pnv_pci_enable_tunnel(struct pci_dev *dev, u64 *asnind)
+{
+       struct device_node *np;
+       const __be32 *prop;
+       struct pnv_ioda_pe *pe;
+       uint16_t window_id;
+       int rc;
+
+       if (!radix_enabled())
+               return -ENXIO;
+
+       if (!(np = pnv_pci_get_phb_node(dev)))
+               return -ENXIO;
+
+       prop = of_get_property(np, "ibm,phb-indications", NULL);
+       of_node_put(np);
+
+       if (!prop || !prop[1])
+               return -ENXIO;
+
+       *asnind = (u64)be32_to_cpu(prop[1]);
+       pe = pnv_ioda_get_pe(dev);
+       if (!pe)
+               return -ENODEV;
+
+       /* Increase real window size to accept as_notify messages. */
+       window_id = (pe->pe_number << 1 ) + 1;
+       rc = opal_pci_map_pe_dma_window_real(pe->phb->opal_id, pe->pe_number,
+                                            window_id, pe->tce_bypass_base,
+                                            (uint64_t)1 << 48);
+       return opal_error_code(rc);
+}
+EXPORT_SYMBOL_GPL(pnv_pci_enable_tunnel);
+
+int pnv_pci_disable_tunnel(struct pci_dev *dev)
+{
+       struct pnv_ioda_pe *pe;
+
+       pe = pnv_ioda_get_pe(dev);
+       if (!pe)
+               return -ENODEV;
+
+       /* Restore default real window size. */
+       pnv_pci_ioda2_set_bypass(pe, true);
+       return 0;
+}
+EXPORT_SYMBOL_GPL(pnv_pci_disable_tunnel);
+
+int pnv_pci_set_tunnel_bar(struct pci_dev *dev, u64 addr, int enable)
+{
+       __be64 val;
+       struct pci_controller *hose;
+       struct pnv_phb *phb;
+       u64 tunnel_bar;
+       int rc;
+
+       if (!opal_check_token(OPAL_PCI_GET_PBCQ_TUNNEL_BAR))
+               return -ENXIO;
+       if (!opal_check_token(OPAL_PCI_SET_PBCQ_TUNNEL_BAR))
+               return -ENXIO;
+
+       hose = pci_bus_to_host(dev->bus);
+       phb = hose->private_data;
+
+       mutex_lock(&tunnel_mutex);
+       rc = opal_pci_get_pbcq_tunnel_bar(phb->opal_id, &val);
+       if (rc != OPAL_SUCCESS) {
+               rc = -EIO;
+               goto out;
+       }
+       tunnel_bar = be64_to_cpu(val);
+       if (enable) {
+               /*
+               * Only one device per PHB can use atomics.
+               * Our policy is first-come, first-served.
+               */
+               if (tunnel_bar) {
+                       if (tunnel_bar != addr)
+                               rc = -EBUSY;
+                       else
+                               rc = 0; /* Setting same address twice is ok */
+                       goto out;
+               }
+       } else {
+               /*
+               * The device that owns atomics and wants to release
+               * them must pass the same address with enable == 0.
+               */
+               if (tunnel_bar != addr) {
+                       rc = -EPERM;
+                       goto out;
+               }
+               addr = 0x0ULL;
+       }
+       rc = opal_pci_set_pbcq_tunnel_bar(phb->opal_id, addr);
+       rc = opal_error_code(rc);
+out:
+       mutex_unlock(&tunnel_mutex);
+       return rc;
+}
+EXPORT_SYMBOL_GPL(pnv_pci_set_tunnel_bar);
+
+#ifdef CONFIG_PPC64    /* for thread.tidr */
+int pnv_pci_get_as_notify_info(struct task_struct *task, u32 *lpid, u32 *pid,
+                              u32 *tid)
+{
+       struct mm_struct *mm = NULL;
+
+       if (task == NULL)
+               return -EINVAL;
+
+       mm = get_task_mm(task);
+       if (mm == NULL)
+               return -EINVAL;
+
+       *pid = mm->context.id;
+       mmput(mm);
+
+       *tid = task->thread.tidr;
+       *lpid = mfspr(SPRN_LPID);
+       return 0;
+}
+EXPORT_SYMBOL_GPL(pnv_pci_get_as_notify_info);
+#endif
+
 void pnv_pci_shutdown(void)
 {
        struct pci_controller *hose;
index 092715b9674bb93309b3793ed4ccd2719407e5db..ef8c9ce53a616910d264f4875b9ec72311a110a8 100644 (file)
 #include <asm/smp.h>
 #include <asm/tm.h>
 #include <asm/setup.h>
+#include <asm/security_features.h>
 
 #include "powernv.h"
 
+
+static bool fw_feature_is(const char *state, const char *name,
+                         struct device_node *fw_features)
+{
+       struct device_node *np;
+       bool rc = false;
+
+       np = of_get_child_by_name(fw_features, name);
+       if (np) {
+               rc = of_property_read_bool(np, state);
+               of_node_put(np);
+       }
+
+       return rc;
+}
+
+static void init_fw_feat_flags(struct device_node *np)
+{
+       if (fw_feature_is("enabled", "inst-spec-barrier-ori31,31,0", np))
+               security_ftr_set(SEC_FTR_SPEC_BAR_ORI31);
+
+       if (fw_feature_is("enabled", "fw-bcctrl-serialized", np))
+               security_ftr_set(SEC_FTR_BCCTRL_SERIALISED);
+
+       if (fw_feature_is("enabled", "inst-l1d-flush-ori30,30,0", np))
+               security_ftr_set(SEC_FTR_L1D_FLUSH_ORI30);
+
+       if (fw_feature_is("enabled", "inst-l1d-flush-trig2", np))
+               security_ftr_set(SEC_FTR_L1D_FLUSH_TRIG2);
+
+       if (fw_feature_is("enabled", "fw-l1d-thread-split", np))
+               security_ftr_set(SEC_FTR_L1D_THREAD_PRIV);
+
+       if (fw_feature_is("enabled", "fw-count-cache-disabled", np))
+               security_ftr_set(SEC_FTR_COUNT_CACHE_DISABLED);
+
+       /*
+        * The features below are enabled by default, so we instead look to see
+        * if firmware has *disabled* them, and clear them if so.
+        */
+       if (fw_feature_is("disabled", "speculation-policy-favor-security", np))
+               security_ftr_clear(SEC_FTR_FAVOUR_SECURITY);
+
+       if (fw_feature_is("disabled", "needs-l1d-flush-msr-pr-0-to-1", np))
+               security_ftr_clear(SEC_FTR_L1D_FLUSH_PR);
+
+       if (fw_feature_is("disabled", "needs-l1d-flush-msr-hv-1-to-0", np))
+               security_ftr_clear(SEC_FTR_L1D_FLUSH_HV);
+
+       if (fw_feature_is("disabled", "needs-spec-barrier-for-bound-checks", np))
+               security_ftr_clear(SEC_FTR_BNDS_CHK_SPEC_BAR);
+}
+
 static void pnv_setup_rfi_flush(void)
 {
        struct device_node *np, *fw_features;
        enum l1d_flush_type type;
-       int enable;
+       bool enable;
 
        /* Default to fallback in case fw-features are not available */
        type = L1D_FLUSH_FALLBACK;
-       enable = 1;
 
        np = of_find_node_by_name(NULL, "ibm,opal");
        fw_features = of_get_child_by_name(np, "fw-features");
        of_node_put(np);
 
        if (fw_features) {
-               np = of_get_child_by_name(fw_features, "inst-l1d-flush-trig2");
-               if (np && of_property_read_bool(np, "enabled"))
-                       type = L1D_FLUSH_MTTRIG;
+               init_fw_feat_flags(fw_features);
+               of_node_put(fw_features);
 
-               of_node_put(np);
+               if (security_ftr_enabled(SEC_FTR_L1D_FLUSH_TRIG2))
+                       type = L1D_FLUSH_MTTRIG;
 
-               np = of_get_child_by_name(fw_features, "inst-l1d-flush-ori30,30,0");
-               if (np && of_property_read_bool(np, "enabled"))
+               if (security_ftr_enabled(SEC_FTR_L1D_FLUSH_ORI30))
                        type = L1D_FLUSH_ORI;
-
-               of_node_put(np);
-
-               /* Enable unless firmware says NOT to */
-               enable = 2;
-               np = of_get_child_by_name(fw_features, "needs-l1d-flush-msr-hv-1-to-0");
-               if (np && of_property_read_bool(np, "disabled"))
-                       enable--;
-
-               of_node_put(np);
-
-               np = of_get_child_by_name(fw_features, "needs-l1d-flush-msr-pr-0-to-1");
-               if (np && of_property_read_bool(np, "disabled"))
-                       enable--;
-
-               np = of_get_child_by_name(fw_features, "speculation-policy-favor-security");
-               if (np && of_property_read_bool(np, "disabled"))
-                       enable = 0;
-
-               of_node_put(np);
-               of_node_put(fw_features);
        }
 
-       setup_rfi_flush(type, enable > 0);
+       enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && \
+                (security_ftr_enabled(SEC_FTR_L1D_FLUSH_PR)   || \
+                 security_ftr_enabled(SEC_FTR_L1D_FLUSH_HV));
+
+       setup_rfi_flush(type, enable);
 }
 
 static void __init pnv_setup_arch(void)
@@ -166,17 +201,12 @@ static void pnv_prepare_going_down(void)
         */
        opal_event_shutdown();
 
-       /* Soft disable interrupts */
-       local_irq_disable();
+       /* Print flash update message if one is scheduled. */
+       opal_flash_update_print_message();
 
-       /*
-        * Return secondary CPUs to firwmare if a flash update
-        * is pending otherwise we will get all sort of error
-        * messages about CPU being stuck etc.. This will also
-        * have the side effect of hard disabling interrupts so
-        * past this point, the kernel is effectively dead.
-        */
-       opal_flash_term_callback();
+       smp_send_stop();
+
+       hard_irq_disable();
 }
 
 static void  __noreturn pnv_restart(char *cmd)
@@ -258,7 +288,7 @@ static void pnv_kexec_wait_secondaries_down(void)
                        if (i != notified) {
                                printk(KERN_INFO "kexec: waiting for cpu %d "
                                       "(physical %d) to enter OPAL\n",
-                                      i, paca[i].hw_cpu_id);
+                                      i, paca_ptrs[i]->hw_cpu_id);
                                notified = i;
                        }
 
@@ -270,7 +300,7 @@ static void pnv_kexec_wait_secondaries_down(void)
                        if (timeout-- == 0) {
                                printk(KERN_ERR "kexec: timed out waiting for "
                                       "cpu %d (physical %d) to enter OPAL\n",
-                                      i, paca[i].hw_cpu_id);
+                                      i, paca_ptrs[i]->hw_cpu_id);
                                break;
                        }
                }
index 9664c8461f032c1eed34d28660034b5a1adcc873..19af6de6b6f00f602a7630770875d8cdfa2fdc24 100644 (file)
@@ -80,7 +80,7 @@ static int pnv_smp_kick_cpu(int nr)
         * If we already started or OPAL is not supported, we just
         * kick the CPU via the PACA
         */
-       if (paca[nr].cpu_start || !firmware_has_feature(FW_FEATURE_OPAL))
+       if (paca_ptrs[nr]->cpu_start || !firmware_has_feature(FW_FEATURE_OPAL))
                goto kick;
 
        /*
index 596ae2e98040d1db175af075887e9280294865cc..45563004feda66a5bf8abc3a0fae7b2251f36cd5 100644 (file)
@@ -280,7 +280,7 @@ void update_subcore_sibling_mask(void)
                int offset = (tid / threads_per_subcore) * threads_per_subcore;
                int mask = sibling_mask_first_cpu << offset;
 
-               paca[cpu].subcore_sibling_mask = mask;
+               paca_ptrs[cpu]->subcore_sibling_mask = mask;
 
        }
 }
index ca22f1eae050d25d4aef64f1db31f0f10b5641d4..4f7276ebdf9ccbd472ca182646295209ad75f45f 100644 (file)
@@ -166,19 +166,20 @@ void vas_window_init_dbgdir(struct vas_window *window)
 
        return;
 
-free_name:
-       kfree(window->dbgname);
-       window->dbgname = NULL;
-
 remove_dir:
        debugfs_remove_recursive(window->dbgdir);
        window->dbgdir = NULL;
+
+free_name:
+       kfree(window->dbgname);
+       window->dbgname = NULL;
 }
 
 void vas_instance_init_dbgdir(struct vas_instance *vinst)
 {
        struct dentry *d;
 
+       vas_init_dbgdir();
        if (!vas_debugfs)
                return;
 
@@ -201,8 +202,18 @@ void vas_instance_init_dbgdir(struct vas_instance *vinst)
        vinst->dbgdir = NULL;
 }
 
+/*
+ * Set up the "root" VAS debugfs dir. Return if we already set it up
+ * (or failed to) in an earlier instance of VAS.
+ */
 void vas_init_dbgdir(void)
 {
+       static bool first_time = true;
+
+       if (!first_time)
+               return;
+
+       first_time = false;
        vas_debugfs = debugfs_create_dir("vas", NULL);
        if (IS_ERR(vas_debugfs))
                vas_debugfs = NULL;
diff --git a/arch/powerpc/platforms/powernv/vas-trace.h b/arch/powerpc/platforms/powernv/vas-trace.h
new file mode 100644 (file)
index 0000000..a449b9f
--- /dev/null
@@ -0,0 +1,113 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM   vas
+
+#if !defined(_VAS_TRACE_H) || defined(TRACE_HEADER_MULTI_READ)
+
+#define _VAS_TRACE_H
+#include <linux/tracepoint.h>
+#include <linux/sched.h>
+#include <asm/vas.h>
+
+TRACE_EVENT(   vas_rx_win_open,
+
+               TP_PROTO(struct task_struct *tsk,
+                        int vasid,
+                        int cop,
+                        struct vas_rx_win_attr *rxattr),
+
+               TP_ARGS(tsk, vasid, cop, rxattr),
+
+               TP_STRUCT__entry(
+                       __field(struct task_struct *, tsk)
+                       __field(int, pid)
+                       __field(int, cop)
+                       __field(int, vasid)
+                       __field(struct vas_rx_win_attr *, rxattr)
+                       __field(int, lnotify_lpid)
+                       __field(int, lnotify_pid)
+                       __field(int, lnotify_tid)
+               ),
+
+               TP_fast_assign(
+                       __entry->pid = tsk->pid;
+                       __entry->vasid = vasid;
+                       __entry->cop = cop;
+                       __entry->lnotify_lpid = rxattr->lnotify_lpid;
+                       __entry->lnotify_pid = rxattr->lnotify_pid;
+                       __entry->lnotify_tid = rxattr->lnotify_tid;
+               ),
+
+               TP_printk("pid=%d, vasid=%d, cop=%d, lpid=%d, pid=%d, tid=%d",
+                       __entry->pid, __entry->vasid, __entry->cop,
+                       __entry->lnotify_lpid, __entry->lnotify_pid,
+                       __entry->lnotify_tid)
+);
+
+TRACE_EVENT(   vas_tx_win_open,
+
+               TP_PROTO(struct task_struct *tsk,
+                        int vasid,
+                        int cop,
+                        struct vas_tx_win_attr *txattr),
+
+               TP_ARGS(tsk, vasid, cop, txattr),
+
+               TP_STRUCT__entry(
+                       __field(struct task_struct *, tsk)
+                       __field(int, pid)
+                       __field(int, cop)
+                       __field(int, vasid)
+                       __field(struct vas_tx_win_attr *, txattr)
+                       __field(int, lpid)
+                       __field(int, pidr)
+               ),
+
+               TP_fast_assign(
+                       __entry->pid = tsk->pid;
+                       __entry->vasid = vasid;
+                       __entry->cop = cop;
+                       __entry->lpid = txattr->lpid;
+                       __entry->pidr = txattr->pidr;
+               ),
+
+               TP_printk("pid=%d, vasid=%d, cop=%d, lpid=%d, pidr=%d",
+                       __entry->pid, __entry->vasid, __entry->cop,
+                       __entry->lpid, __entry->pidr)
+);
+
+TRACE_EVENT(   vas_paste_crb,
+
+               TP_PROTO(struct task_struct *tsk,
+                       struct vas_window *win),
+
+               TP_ARGS(tsk, win),
+
+               TP_STRUCT__entry(
+                       __field(struct task_struct *, tsk)
+                       __field(struct vas_window *, win)
+                       __field(int, pid)
+                       __field(int, vasid)
+                       __field(int, winid)
+                       __field(unsigned long, paste_kaddr)
+               ),
+
+               TP_fast_assign(
+                       __entry->pid = tsk->pid;
+                       __entry->vasid = win->vinst->vas_id;
+                       __entry->winid = win->winid;
+                       __entry->paste_kaddr = (unsigned long)win->paste_kaddr
+               ),
+
+               TP_printk("pid=%d, vasid=%d, winid=%d, paste_kaddr=0x%016lx\n",
+                       __entry->pid, __entry->vasid, __entry->winid,
+                       __entry->paste_kaddr)
+);
+
+#endif /* _VAS_TRACE_H */
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH ../../arch/powerpc/platforms/powernv
+#define TRACE_INCLUDE_FILE vas-trace
+#include <trace/define_trace.h>
index b7c53a51c31bbe5ba5fa62adbf6d1e97c3c1e8c3..ff9f488123310b0503dcb1b139af6d3ee4afe567 100644 (file)
@@ -21,6 +21,9 @@
 #include "vas.h"
 #include "copy-paste.h"
 
+#define CREATE_TRACE_POINTS
+#include "vas-trace.h"
+
 /*
  * Compute the paste address region for the window @window using the
  * ->paste_base_addr and ->paste_win_id_shift we got from device tree.
@@ -880,6 +883,8 @@ struct vas_window *vas_rx_win_open(int vasid, enum vas_cop_type cop,
        struct vas_winctx winctx;
        struct vas_instance *vinst;
 
+       trace_vas_rx_win_open(current, vasid, cop, rxattr);
+
        if (!rx_win_args_valid(cop, rxattr))
                return ERR_PTR(-EINVAL);
 
@@ -1008,6 +1013,8 @@ struct vas_window *vas_tx_win_open(int vasid, enum vas_cop_type cop,
        struct vas_winctx winctx;
        struct vas_instance *vinst;
 
+       trace_vas_tx_win_open(current, vasid, cop, attr);
+
        if (!tx_win_args_valid(cop, attr))
                return ERR_PTR(-EINVAL);
 
@@ -1100,6 +1107,8 @@ int vas_paste_crb(struct vas_window *txwin, int offset, bool re)
        void *addr;
        uint64_t val;
 
+       trace_vas_paste_crb(current, txwin);
+
        /*
         * Only NX windows are supported for now and hardware assumes
         * report-enable flag is set for NX windows. Ensure software
index aebbe95c9230bc4b85016a945303307aa96c369e..5a2b24cbbc886df50f875a4674a635c159fef104 100644 (file)
@@ -160,8 +160,6 @@ static int __init vas_init(void)
        int found = 0;
        struct device_node *dn;
 
-       vas_init_dbgdir();
-
        platform_driver_register(&vas_driver);
 
        for_each_compatible_node(dn, NULL, "ibm,vas") {
@@ -169,8 +167,10 @@ static int __init vas_init(void)
                found++;
        }
 
-       if (!found)
+       if (!found) {
+               platform_driver_unregister(&vas_driver);
                return -ENODEV;
+       }
 
        pr_devel("Found %d instances\n", found);
 
index 7f870ec29daf0675e366daff6552096de03adb41..8c7009d001d96aba4766a2f590ea3011347b9181 100644 (file)
@@ -524,8 +524,7 @@ static int dma_sb_map_pages(struct ps3_dma_region *r, unsigned long phys_addr,
        int result;
        struct dma_chunk *c;
 
-       c = kzalloc(sizeof(struct dma_chunk), GFP_ATOMIC);
-
+       c = kzalloc(sizeof(*c), GFP_ATOMIC);
        if (!c) {
                result = -ENOMEM;
                goto fail_alloc;
@@ -570,8 +569,7 @@ static int dma_ioc0_map_pages(struct ps3_dma_region *r, unsigned long phys_addr,
 
        DBG(KERN_ERR "%s: phy=%#lx, lpar%#lx, len=%#lx\n", __func__,
            phys_addr, ps3_mm_phys_to_lpar(phys_addr), len);
-       c = kzalloc(sizeof(struct dma_chunk), GFP_ATOMIC);
-
+       c = kzalloc(sizeof(*c), GFP_ATOMIC);
        if (!c) {
                result = -ENOMEM;
                goto fail_alloc;
index 652d3e96b812b93834323e1a3a60a1e5bbab3612..6ef77caf7bcf47a7af035d31891707f9dd40a3b2 100644 (file)
@@ -234,7 +234,7 @@ static void pseries_cpu_die(unsigned int cpu)
         * done here.  Change isolate state to Isolate and
         * change allocation-state to Unusable.
         */
-       paca[cpu].cpu_start = 0;
+       paca_ptrs[cpu]->cpu_start = 0;
 }
 
 /*
index eeb13429d68535e50e2d80f05ce8d89ba01fbc4c..3fe12679697549154b9ebe3905ed062da1f40e93 100644 (file)
 
 void pseries_kexec_cpu_down(int crash_shutdown, int secondary)
 {
-       /* Don't risk a hypervisor call if we're crashing */
+       /*
+        * Don't risk a hypervisor call if we're crashing
+        * XXX: Why? The hypervisor is not crashing. It might be better
+        * to at least attempt unregister to avoid the hypervisor stepping
+        * on our memory.
+        */
        if (firmware_has_feature(FW_FEATURE_SPLPAR) && !crash_shutdown) {
                int ret;
                int cpu = smp_processor_id();
index 0ee4a469a4ae1df8fb5d3ff871adcfba5e58093a..adb996ed51e13a0ac814b64d929db1d476c284ea 100644 (file)
@@ -99,7 +99,7 @@ void vpa_init(int cpu)
         * reports that.  All SPLPAR support SLB shadow buffer.
         */
        if (!radix_enabled() && firmware_has_feature(FW_FEATURE_SPLPAR)) {
-               addr = __pa(paca[cpu].slb_shadow_ptr);
+               addr = __pa(paca_ptrs[cpu]->slb_shadow_ptr);
                ret = register_slb_shadow(hwcpu, addr);
                if (ret)
                        pr_err("WARNING: SLB shadow buffer registration for "
@@ -111,7 +111,7 @@ void vpa_init(int cpu)
        /*
         * Register dispatch trace log, if one has been allocated.
         */
-       pp = &paca[cpu];
+       pp = paca_ptrs[cpu];
        dtl = pp->dispatch_log;
        if (dtl) {
                pp->dtl_ridx = 0;
@@ -306,14 +306,14 @@ static long pSeries_lpar_hpte_updatepp(unsigned long slot,
 
        want_v = hpte_encode_avpn(vpn, psize, ssize);
 
-       pr_devel("    update: avpnv=%016lx, hash=%016lx, f=%lx, psize: %d ...",
-                want_v, slot, flags, psize);
-
        flags = (newpp & 7) | H_AVPN;
        if (mmu_has_feature(MMU_FTR_KERNEL_RO))
                /* Move pp0 into bit 8 (IBM 55) */
                flags |= (newpp & HPTE_R_PP0) >> 55;
 
+       pr_devel("    update: avpnv=%016lx, hash=%016lx, f=%lx, psize: %d ...",
+                want_v, slot, flags, psize);
+
        lpar_rc = plpar_pte_protect(flags, slot, want_v);
 
        if (lpar_rc == H_NOT_FOUND) {
@@ -726,15 +726,18 @@ static int pseries_lpar_resize_hpt(unsigned long shift)
        return 0;
 }
 
-/* Actually only used for radix, so far */
 static int pseries_lpar_register_process_table(unsigned long base,
                        unsigned long page_size, unsigned long table_size)
 {
        long rc;
-       unsigned long flags = PROC_TABLE_NEW;
+       unsigned long flags = 0;
 
+       if (table_size)
+               flags |= PROC_TABLE_NEW;
        if (radix_enabled())
                flags |= PROC_TABLE_RADIX | PROC_TABLE_GTSE;
+       else
+               flags |= PROC_TABLE_HPT_SLB;
        for (;;) {
                rc = plpar_hcall_norets(H_REGISTER_PROC_TBL, flags, base,
                                        page_size, table_size);
@@ -760,6 +763,7 @@ void __init hpte_init_pseries(void)
        mmu_hash_ops.flush_hash_range    = pSeries_lpar_flush_hash_range;
        mmu_hash_ops.hpte_clear_all      = pseries_hpte_clear_all;
        mmu_hash_ops.hugepage_invalidate = pSeries_lpar_hugepage_invalidate;
+       register_process_table           = pseries_lpar_register_process_table;
 
        if (firmware_has_feature(FW_FEATURE_HPT_RESIZE))
                mmu_hash_ops.resize_hpt = pseries_lpar_resize_hpt;
index 0f7fb7170b03ce5646aa13b9b9b55f5965f09aae..8a8033a249c738f10bfa3bf60e861b2fc8c715d3 100644 (file)
@@ -348,6 +348,9 @@ void post_mobility_fixup(void)
                printk(KERN_ERR "Post-mobility device tree update "
                        "failed: %d\n", rc);
 
+       /* Possibly switch to a new RFI flush type */
+       pseries_setup_rfi_flush();
+
        return;
 }
 
index 1ae1d9f4dbe99935130971cdc390d8f581ad788e..60db2ee511fb4669bc53d84686fddc317bda4148 100644 (file)
@@ -27,6 +27,14 @@ extern int pSeries_machine_check_exception(struct pt_regs *regs);
 
 #ifdef CONFIG_SMP
 extern void smp_init_pseries(void);
+
+/* Get state of physical CPU from query_cpu_stopped */
+int smp_query_cpu_stopped(unsigned int pcpu);
+#define QCSS_STOPPED 0
+#define QCSS_STOPPING 1
+#define QCSS_NOT_STOPPED 2
+#define QCSS_HARDWARE_ERROR -1
+#define QCSS_HARDWARE_BUSY -2
 #else
 static inline void smp_init_pseries(void) { };
 #endif
@@ -100,4 +108,6 @@ static inline unsigned long cmo_get_page_size(void)
 
 int dlpar_workqueue_init(void);
 
+void pseries_setup_rfi_flush(void);
+
 #endif /* _PSERIES_PSERIES_H */
index 1a527625acf78dea27f7bde430e23f02f12f5baf..b55ad4286dc7f81f0c9d2e7d130860757773c3b3 100644 (file)
@@ -68,6 +68,7 @@
 #include <asm/plpar_wrappers.h>
 #include <asm/kexec.h>
 #include <asm/isa-bridge.h>
+#include <asm/security_features.h>
 
 #include "pseries.h"
 
@@ -246,7 +247,7 @@ static int alloc_dispatch_logs(void)
                return 0;
 
        for_each_possible_cpu(cpu) {
-               pp = &paca[cpu];
+               pp = paca_ptrs[cpu];
                dtl = kmem_cache_alloc(dtl_cache, GFP_KERNEL);
                if (!dtl) {
                        pr_warn("Failed to allocate dispatch trace log for cpu %d\n",
@@ -459,36 +460,78 @@ static void __init find_and_init_phbs(void)
        of_pci_check_probe_only();
 }
 
-static void pseries_setup_rfi_flush(void)
+static void init_cpu_char_feature_flags(struct h_cpu_char_result *result)
+{
+       /*
+        * The features below are disabled by default, so we instead look to see
+        * if firmware has *enabled* them, and set them if so.
+        */
+       if (result->character & H_CPU_CHAR_SPEC_BAR_ORI31)
+               security_ftr_set(SEC_FTR_SPEC_BAR_ORI31);
+
+       if (result->character & H_CPU_CHAR_BCCTRL_SERIALISED)
+               security_ftr_set(SEC_FTR_BCCTRL_SERIALISED);
+
+       if (result->character & H_CPU_CHAR_L1D_FLUSH_ORI30)
+               security_ftr_set(SEC_FTR_L1D_FLUSH_ORI30);
+
+       if (result->character & H_CPU_CHAR_L1D_FLUSH_TRIG2)
+               security_ftr_set(SEC_FTR_L1D_FLUSH_TRIG2);
+
+       if (result->character & H_CPU_CHAR_L1D_THREAD_PRIV)
+               security_ftr_set(SEC_FTR_L1D_THREAD_PRIV);
+
+       if (result->character & H_CPU_CHAR_COUNT_CACHE_DISABLED)
+               security_ftr_set(SEC_FTR_COUNT_CACHE_DISABLED);
+
+       /*
+        * The features below are enabled by default, so we instead look to see
+        * if firmware has *disabled* them, and clear them if so.
+        */
+       if (!(result->behaviour & H_CPU_BEHAV_FAVOUR_SECURITY))
+               security_ftr_clear(SEC_FTR_FAVOUR_SECURITY);
+
+       if (!(result->behaviour & H_CPU_BEHAV_L1D_FLUSH_PR))
+               security_ftr_clear(SEC_FTR_L1D_FLUSH_PR);
+
+       if (!(result->behaviour & H_CPU_BEHAV_BNDS_CHK_SPEC_BAR))
+               security_ftr_clear(SEC_FTR_BNDS_CHK_SPEC_BAR);
+}
+
+void pseries_setup_rfi_flush(void)
 {
        struct h_cpu_char_result result;
        enum l1d_flush_type types;
        bool enable;
        long rc;
 
-       /* Enable by default */
-       enable = true;
+       /*
+        * Set features to the defaults assumed by init_cpu_char_feature_flags()
+        * so it can set/clear again any features that might have changed after
+        * migration, and in case the hypercall fails and it is not even called.
+        */
+       powerpc_security_features = SEC_FTR_DEFAULT;
 
        rc = plpar_get_cpu_characteristics(&result);
-       if (rc == H_SUCCESS) {
-               types = L1D_FLUSH_NONE;
+       if (rc == H_SUCCESS)
+               init_cpu_char_feature_flags(&result);
 
-               if (result.character & H_CPU_CHAR_L1D_FLUSH_TRIG2)
-                       types |= L1D_FLUSH_MTTRIG;
-               if (result.character & H_CPU_CHAR_L1D_FLUSH_ORI30)
-                       types |= L1D_FLUSH_ORI;
+       /*
+        * We're the guest so this doesn't apply to us, clear it to simplify
+        * handling of it elsewhere.
+        */
+       security_ftr_clear(SEC_FTR_L1D_FLUSH_HV);
 
-               /* Use fallback if nothing set in hcall */
-               if (types == L1D_FLUSH_NONE)
-                       types = L1D_FLUSH_FALLBACK;
+       types = L1D_FLUSH_FALLBACK;
 
-               if ((!(result.behaviour & H_CPU_BEHAV_L1D_FLUSH_PR)) ||
-                   (!(result.behaviour & H_CPU_BEHAV_FAVOUR_SECURITY)))
-                       enable = false;
-       } else {
-               /* Default to fallback if case hcall is not available */
-               types = L1D_FLUSH_FALLBACK;
-       }
+       if (security_ftr_enabled(SEC_FTR_L1D_FLUSH_TRIG2))
+               types |= L1D_FLUSH_MTTRIG;
+
+       if (security_ftr_enabled(SEC_FTR_L1D_FLUSH_ORI30))
+               types |= L1D_FLUSH_ORI;
+
+       enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && \
+                security_ftr_enabled(SEC_FTR_L1D_FLUSH_PR);
 
        setup_rfi_flush(types, enable);
 }
@@ -739,7 +782,7 @@ static int pseries_set_dawr(unsigned long dawr, unsigned long dawrx)
        /* PAPR says we can't set HYP */
        dawrx &= ~DAWRX_HYP;
 
-       return  plapr_set_watchpoint0(dawr, dawrx);
+       return  plpar_set_watchpoint0(dawr, dawrx);
 }
 
 #define CMO_CHARACTERISTICS_TOKEN 44
index 2e184829e5d496d7ce1ecf0808c3a6185b04395b..3df46123cce34961e26345a8db9e650135feea02 100644 (file)
@@ -110,7 +110,7 @@ static inline int smp_startup_cpu(unsigned int lcpu)
        }
 
        /* Fixup atomic count: it exited inside IRQ handler. */
-       task_thread_info(paca[lcpu].__current)->preempt_count   = 0;
+       task_thread_info(paca_ptrs[lcpu]->__current)->preempt_count     = 0;
 #ifdef CONFIG_HOTPLUG_CPU
        if (get_cpu_current_state(lcpu) == CPU_STATE_INACTIVE)
                goto out;
@@ -165,7 +165,7 @@ static int smp_pSeries_kick_cpu(int nr)
         * cpu_start field to become non-zero After we set cpu_start,
         * the processor will continue on to secondary_start
         */
-       paca[nr].cpu_start = 1;
+       paca_ptrs[nr]->cpu_start = 1;
 #ifdef CONFIG_HOTPLUG_CPU
        set_preferred_offline_state(nr, CPU_STATE_ONLINE);
 
@@ -215,7 +215,7 @@ static int pseries_cause_nmi_ipi(int cpu)
                hwcpu = get_hard_smp_processor_id(cpu);
        }
 
-       if (plapr_signal_sys_reset(hwcpu) == H_SUCCESS)
+       if (plpar_signal_sys_reset(hwcpu) == H_SUCCESS)
                return 1;
 
        return 0;
index 73067805300a26338b0868292cff95c8b78f5ce1..1d4e0ef658d38f4ba7deafa877d1250b82e1459c 100644 (file)
@@ -626,7 +626,7 @@ static inline u32 mpic_physmask(u32 cpumask)
        int i;
        u32 mask = 0;
 
-       for (i = 0; i < min(32, NR_CPUS); ++i, cpumask >>= 1)
+       for (i = 0; i < min(32, NR_CPUS) && cpu_possible(i); ++i, cpumask >>= 1)
                mask |= (cpumask & 1) << get_hard_smp_processor_id(i);
        return mask;
 }
index 1459f4e8b698e440d524e5454460bc3f9206baa6..37bfbc54aacb438e4fe111968977bfd320014c2e 100644 (file)
@@ -164,7 +164,7 @@ void icp_native_cause_ipi_rm(int cpu)
         * Just like the cause_ipi functions, it is required to
         * include a full barrier before causing the IPI.
         */
-       xics_phys = paca[cpu].kvm_hstate.xics_phys;
+       xics_phys = paca_ptrs[cpu]->kvm_hstate.xics_phys;
        mb();
        __raw_rm_writeb(IPI_PRIORITY, xics_phys + XICS_MFRR);
 }
index 40c06110821c36221010fa221326950759c9c5c1..3459015092fa62e3f5a4c61cb7d37a812e391cf8 100644 (file)
@@ -246,7 +246,7 @@ notrace void xmon_xive_do_dump(int cpu)
                u64 val = xive_esb_read(&xc->ipi_data, XIVE_ESB_GET);
                xmon_printf("  IPI state: %x:%c%c\n", xc->hw_ipi,
                        val & XIVE_ESB_VAL_P ? 'P' : 'p',
-                       val & XIVE_ESB_VAL_P ? 'Q' : 'q');
+                       val & XIVE_ESB_VAL_Q ? 'Q' : 'q');
        }
 #endif
 }
index 82e1a3ee6e0fc0e8bf53ea22e8dd986ab2de508b..a0842f1ff72ca931cb576834d9fe8e5e42c9748e 100644 (file)
@@ -41,6 +41,7 @@
 #include <asm/pgtable.h>
 #include <asm/mmu.h>
 #include <asm/mmu_context.h>
+#include <asm/plpar_wrappers.h>
 #include <asm/cputable.h>
 #include <asm/rtas.h>
 #include <asm/sstep.h>
 #include <asm/paca.h>
 #endif
 
-#if defined(CONFIG_PPC_SPLPAR)
-#include <asm/plpar_wrappers.h>
-#else
-static inline long plapr_set_ciabr(unsigned long ciabr) {return 0; };
-#endif
-
 #include "nonstdio.h"
 #include "dis-asm.h"
 
@@ -328,7 +323,7 @@ static void write_ciabr(unsigned long ciabr)
                mtspr(SPRN_CIABR, ciabr);
                return;
        }
-       plapr_set_ciabr(ciabr);
+       plpar_set_ciabr(ciabr);
 }
 
 /**
@@ -1273,6 +1268,16 @@ static long check_bp_loc(unsigned long addr)
        return 1;
 }
 
+/* Force enable xmon if not already enabled */
+static inline void force_enable_xmon(void)
+{
+       /* Enable xmon hooks if needed */
+       if (!xmon_on) {
+               printf("xmon: Enabling debugger hooks\n");
+               xmon_on = 1;
+       }
+}
+
 static char *breakpoint_help_string =
     "Breakpoint command usage:\n"
     "b                show breakpoints\n"
@@ -1297,6 +1302,10 @@ bpt_cmds(void)
        static const char badaddr[] = "Only kernel addresses are permitted for breakpoints\n";
        int mode;
        case 'd':       /* bd - hardware data breakpoint */
+               if (!ppc_breakpoint_available()) {
+                       printf("Hardware data breakpoint not supported on this cpu\n");
+                       break;
+               }
                mode = 7;
                cmd = inchar();
                if (cmd == 'r')
@@ -1315,6 +1324,8 @@ bpt_cmds(void)
                        dabr.address &= ~HW_BRK_TYPE_DABR;
                        dabr.enabled = mode | BP_DABR;
                }
+
+               force_enable_xmon();
                break;
 
        case 'i':       /* bi - hardware instr breakpoint */
@@ -1335,6 +1346,7 @@ bpt_cmds(void)
                if (bp != NULL) {
                        bp->enabled |= BP_CIABR;
                        iabr = bp;
+                       force_enable_xmon();
                }
                break;
 #endif
@@ -1399,8 +1411,10 @@ bpt_cmds(void)
                if (!check_bp_loc(a))
                        break;
                bp = new_breakpoint(a);
-               if (bp != NULL)
+               if (bp != NULL) {
                        bp->enabled |= BP_TRAP;
+                       force_enable_xmon();
+               }
                break;
        }
 }
@@ -2327,7 +2341,7 @@ static void dump_one_paca(int cpu)
        catch_memory_errors = 1;
        sync();
 
-       p = &paca[cpu];
+       p = paca_ptrs[cpu];
 
        printf("paca for cpu 0x%x @ %px:\n", cpu, p);
 
@@ -3649,11 +3663,35 @@ device_initcall(setup_xmon_sysrq);
 #endif /* CONFIG_MAGIC_SYSRQ */
 
 #ifdef CONFIG_DEBUG_FS
+static void clear_all_bpt(void)
+{
+       int i;
+
+       /* clear/unpatch all breakpoints */
+       remove_bpts();
+       remove_cpu_bpts();
+
+       /* Disable all breakpoints */
+       for (i = 0; i < NBPTS; ++i)
+               bpts[i].enabled = 0;
+
+       /* Clear any data or iabr breakpoints */
+       if (iabr || dabr.enabled) {
+               iabr = NULL;
+               dabr.enabled = 0;
+       }
+
+       printf("xmon: All breakpoints cleared\n");
+}
+
 static int xmon_dbgfs_set(void *data, u64 val)
 {
        xmon_on = !!val;
        xmon_init(xmon_on);
 
+       /* make sure all breakpoints removed when disabling */
+       if (!xmon_on)
+               clear_all_bpt();
        return 0;
 }
 
index 15db69d8ba697deb1426eff545c872c8bfc6e62d..ca623e6446e4cfa49ed3574e893a673dc232b148 100644 (file)
@@ -53,13 +53,13 @@ static void adb_iop_poll(void);
 static int adb_iop_reset_bus(void);
 
 struct adb_driver adb_iop_driver = {
-       "ISM IOP",
-       adb_iop_probe,
-       adb_iop_init,
-       adb_iop_send_request,
-       adb_iop_autopoll,
-       adb_iop_poll,
-       adb_iop_reset_bus
+       .name         = "ISM IOP",
+       .probe        = adb_iop_probe,
+       .init         = adb_iop_init,
+       .send_request = adb_iop_send_request,
+       .autopoll     = adb_iop_autopoll,
+       .poll         = adb_iop_poll,
+       .reset_bus    = adb_iop_reset_bus
 };
 
 static void adb_iop_end_req(struct adb_request *req, int state)
index 1de81d922d8ace11c08ee367f5af525aed134dcd..c8e078b911c743e048fa9588c63a042aa49ffd0c 100644 (file)
@@ -201,3 +201,4 @@ anslcd_exit(void)
 
 module_init(anslcd_init);
 module_exit(anslcd_exit);
+MODULE_LICENSE("GPL v2");
index 9a6223add30e6dfd3f2e2ec2efa42335a7bb1a77..eb3adfb7f88d36684b7fd4fe6a564a63f772d435 100644 (file)
@@ -70,14 +70,13 @@ static void macio_adb_poll(void);
 static int macio_adb_reset_bus(void);
 
 struct adb_driver macio_adb_driver = {
-       "MACIO",
-       macio_probe,
-       macio_init,
-       macio_send_request,
-       /*macio_write,*/
-       macio_adb_autopoll,
-       macio_adb_poll,
-       macio_adb_reset_bus
+       .name         = "MACIO",
+       .probe        = macio_probe,
+       .init         = macio_init,
+       .send_request = macio_send_request,
+       .autopoll     = macio_adb_autopoll,
+       .poll         = macio_adb_poll,
+       .reset_bus    = macio_adb_reset_bus,
 };
 
 int macio_probe(void)
index 910b5b6f96b1439a5843b8b08aa81b86e6ca5e9b..1f29d2413c7400a9783b0e944f259dc62dfe399a 100644 (file)
@@ -154,8 +154,8 @@ static void rackmeter_do_pause(struct rackmeter *rm, int pause)
                DBDMA_DO_STOP(rm->dma_regs);
                return;
        }
-       memset(rdma->buf1, 0, ARRAY_SIZE(rdma->buf1));
-       memset(rdma->buf2, 0, ARRAY_SIZE(rdma->buf2));
+       memset(rdma->buf1, 0, sizeof(rdma->buf1));
+       memset(rdma->buf2, 0, sizeof(rdma->buf2));
 
        rm->dma_buf_v->mark = 0;
 
@@ -397,7 +397,7 @@ static int rackmeter_probe(struct macio_dev* mdev,
        }
 
        /* Create and initialize our instance data */
-       rm = kzalloc(sizeof(struct rackmeter), GFP_KERNEL);
+       rm = kzalloc(sizeof(*rm), GFP_KERNEL);
        if (rm == NULL) {
                printk(KERN_ERR "rackmeter: failed to allocate memory !\n");
                rc = -ENOMEM;
index 4ba06a1695ea16fc2e02987f23213578f68dc890..cf6f7d52d6beef08a7ea9c15eff259f3578a17ed 100644 (file)
@@ -91,13 +91,13 @@ static void macii_poll(void);
 static int macii_reset_bus(void);
 
 struct adb_driver via_macii_driver = {
-       "Mac II",
-       macii_probe,
-       macii_init,
-       macii_send_request,
-       macii_autopoll,
-       macii_poll,
-       macii_reset_bus
+       .name         = "Mac II",
+       .probe        = macii_probe,
+       .init         = macii_init,
+       .send_request = macii_send_request,
+       .autopoll     = macii_autopoll,
+       .poll         = macii_poll,
+       .reset_bus    = macii_reset_bus,
 };
 
 static enum macii_state {
index 94c0f3f7df699c1c58b83b2573e7e2d2c4cfa7ed..433dbeddfcf9e28b31616af40e41912986a3b7c6 100644 (file)
@@ -198,14 +198,14 @@ static const struct file_operations pmu_battery_proc_fops;
 static const struct file_operations pmu_options_proc_fops;
 
 #ifdef CONFIG_ADB
-struct adb_driver via_pmu_driver = {
-       "PMU",
-       pmu_probe,
-       pmu_init,
-       pmu_send_request,
-       pmu_adb_autopoll,
-       pmu_poll_adb,
-       pmu_adb_reset_bus
+const struct adb_driver via_pmu_driver = {
+       .name         = "PMU",
+       .probe        = pmu_probe,
+       .init         = pmu_init,
+       .send_request = pmu_send_request,
+       .autopoll     = pmu_adb_autopoll,
+       .poll         = pmu_poll_adb,
+       .reset_bus    = pmu_adb_reset_bus,
 };
 #endif /* CONFIG_ADB */
 
index 7d9c4baf8c114a4cac3207efaeec4d3849901b61..d545ed45e482a573ff28694093790132781f86a0 100644 (file)
@@ -120,13 +120,13 @@ static void pmu_enable_backlight(int on);
 static void pmu_set_brightness(int level);
 
 struct adb_driver via_pmu_driver = {
-       "68K PMU",
-       pmu_probe,
-       pmu_init,
-       pmu_send_request,
-       pmu_autopoll,
-       pmu_poll,
-       pmu_reset_bus
+       .name         = "68K PMU",
+       .probe        = pmu_probe,
+       .init         = pmu_init,
+       .send_request = pmu_send_request,
+       .autopoll     = pmu_autopoll,
+       .poll         = pmu_poll,
+       .reset_bus    = pmu_reset_bus,
 };
 
 /*
index 4f015da78f283952ad506ae2ff2bf39297ef3dff..a4c9c8297a6d825db6321032f7125aa01c9ca751 100644 (file)
@@ -369,6 +369,9 @@ static const cxl_p2n_reg_t CXL_PSL_WED_An     = {0x0A0};
 #define CXL_PSL_TFC_An_AE (1ull << (63-30)) /* Restart PSL with address error */
 #define CXL_PSL_TFC_An_R  (1ull << (63-31)) /* Restart PSL transaction */
 
+/****** CXL_PSL_DEBUG *****************************************************/
+#define CXL_PSL_DEBUG_CDC  (1ull << (63-27)) /* Coherent Data cache support */
+
 /****** CXL_XSL9_IERAT_ERAT - CAIA 2 **********************************/
 #define CXL_XSL9_IERAT_MLPID    (1ull << (63-0))  /* Match LPID */
 #define CXL_XSL9_IERAT_MPID     (1ull << (63-1))  /* Match PID */
@@ -669,6 +672,7 @@ struct cxl_native {
        irq_hw_number_t err_hwirq;
        unsigned int err_virq;
        u64 ps_off;
+       bool no_data_cache; /* set if no data cache on the card */
        const struct cxl_service_layer_ops *sl_ops;
 };
 
@@ -1065,7 +1069,7 @@ int cxl_psl_purge(struct cxl_afu *afu);
 int cxl_calc_capp_routing(struct pci_dev *dev, u64 *chipid,
                          u32 *phb_index, u64 *capp_unit_id);
 int cxl_slot_is_switched(struct pci_dev *dev);
-int cxl_get_xsl9_dsnctl(u64 capp_unit_id, u64 *reg);
+int cxl_get_xsl9_dsnctl(struct pci_dev *dev, u64 capp_unit_id, u64 *reg);
 u64 cxl_calculate_sr(bool master, bool kernel, bool real_mode, bool p9);
 
 void cxl_native_irq_dump_regs_psl9(struct cxl_context *ctx);
index 30ccba436b3b1cada1dd838632c4b8167769319f..0bc7c31cf7395734a1053f7cc10840c970b3eda9 100644 (file)
@@ -99,7 +99,7 @@ int cxllib_get_xsl_config(struct pci_dev *dev, struct cxllib_xsl_config *cfg)
        if (rc)
                return rc;
 
-       rc = cxl_get_xsl9_dsnctl(capp_unit_id, &cfg->dsnctl);
+       rc = cxl_get_xsl9_dsnctl(dev, capp_unit_id, &cfg->dsnctl);
        if (rc)
                return rc;
        if (cpu_has_feature(CPU_FTR_POWER9_DD1)) {
@@ -208,49 +208,74 @@ int cxllib_get_PE_attributes(struct task_struct *task,
 }
 EXPORT_SYMBOL_GPL(cxllib_get_PE_attributes);
 
-int cxllib_handle_fault(struct mm_struct *mm, u64 addr, u64 size, u64 flags)
+static int get_vma_info(struct mm_struct *mm, u64 addr,
+                       u64 *vma_start, u64 *vma_end,
+                       unsigned long *page_size)
 {
-       int rc;
-       u64 dar;
        struct vm_area_struct *vma = NULL;
-       unsigned long page_size;
-
-       if (mm == NULL)
-               return -EFAULT;
+       int rc = 0;
 
        down_read(&mm->mmap_sem);
 
        vma = find_vma(mm, addr);
        if (!vma) {
-               pr_err("Can't find vma for addr %016llx\n", addr);
                rc = -EFAULT;
                goto out;
        }
-       /* get the size of the pages allocated */
-       page_size = vma_kernel_pagesize(vma);
-
-       for (dar = (addr & ~(page_size - 1)); dar < (addr + size); dar += page_size) {
-               if (dar < vma->vm_start || dar >= vma->vm_end) {
-                       vma = find_vma(mm, addr);
-                       if (!vma) {
-                               pr_err("Can't find vma for addr %016llx\n", addr);
-                               rc = -EFAULT;
-                               goto out;
-                       }
-                       /* get the size of the pages allocated */
-                       page_size = vma_kernel_pagesize(vma);
+       *page_size = vma_kernel_pagesize(vma);
+       *vma_start = vma->vm_start;
+       *vma_end = vma->vm_end;
+out:
+       up_read(&mm->mmap_sem);
+       return rc;
+}
+
+int cxllib_handle_fault(struct mm_struct *mm, u64 addr, u64 size, u64 flags)
+{
+       int rc;
+       u64 dar, vma_start, vma_end;
+       unsigned long page_size;
+
+       if (mm == NULL)
+               return -EFAULT;
+
+       /*
+        * The buffer we have to process can extend over several pages
+        * and may also cover several VMAs.
+        * We iterate over all the pages. The page size could vary
+        * between VMAs.
+        */
+       rc = get_vma_info(mm, addr, &vma_start, &vma_end, &page_size);
+       if (rc)
+               return rc;
+
+       for (dar = (addr & ~(page_size - 1)); dar < (addr + size);
+            dar += page_size) {
+               if (dar < vma_start || dar >= vma_end) {
+                       /*
+                        * We don't hold the mm->mmap_sem semaphore
+                        * while iterating, since the semaphore is
+                        * required by one of the lower-level page
+                        * fault processing functions and it could
+                        * create a deadlock.
+                        *
+                        * It means the VMAs can be altered between 2
+                        * loop iterations and we could theoretically
+                        * miss a page (however unlikely). But that's
+                        * not really a problem, as the driver will
+                        * retry access, get another page fault on the
+                        * missing page and call us again.
+                        */
+                       rc = get_vma_info(mm, dar, &vma_start, &vma_end,
+                                       &page_size);
+                       if (rc)
+                               return rc;
                }
 
                rc = cxl_handle_mm_fault(mm, flags, dar);
-               if (rc) {
-                       pr_err("cxl_handle_mm_fault failed %d", rc);
-                       rc = -EFAULT;
-                       goto out;
-               }
+               if (rc)
+                       return -EFAULT;
        }
-       rc = 0;
-out:
-       up_read(&mm->mmap_sem);
-       return rc;
+       return 0;
 }
 EXPORT_SYMBOL_GPL(cxllib_handle_fault);
index 1b3d7c65ea3fe2a7f22307630302e29cd69f7b66..98f867fcef24a7badeca48c278b05052d72d43b0 100644 (file)
@@ -353,8 +353,17 @@ int cxl_data_cache_flush(struct cxl *adapter)
        u64 reg;
        unsigned long timeout = jiffies + (HZ * CXL_TIMEOUT);
 
-       pr_devel("Flushing data cache\n");
+       /*
+        * Do a datacache flush only if datacache is available.
+        * In case of PSL9D datacache absent hence flush operation.
+        * would timeout.
+        */
+       if (adapter->native->no_data_cache) {
+               pr_devel("No PSL data cache. Ignoring cache flush req.\n");
+               return 0;
+       }
 
+       pr_devel("Flushing data cache\n");
        reg = cxl_p1_read(adapter, CXL_PSL_Control);
        reg |= CXL_PSL_Control_Fr;
        cxl_p1_write(adapter, CXL_PSL_Control, reg);
index 758842f65a1b372ea8aecf48fe8a58d738dba0d7..83f1d08058fc234dc9b141b543084ece35c53b0f 100644 (file)
@@ -407,21 +407,59 @@ int cxl_calc_capp_routing(struct pci_dev *dev, u64 *chipid,
        return 0;
 }
 
-int cxl_get_xsl9_dsnctl(u64 capp_unit_id, u64 *reg)
+static DEFINE_MUTEX(indications_mutex);
+
+static int get_phb_indications(struct pci_dev *dev, u64 *capiind, u64 *asnind,
+                              u64 *nbwind)
+{
+       static u64 nbw, asn, capi = 0;
+       struct device_node *np;
+       const __be32 *prop;
+
+       mutex_lock(&indications_mutex);
+       if (!capi) {
+               if (!(np = pnv_pci_get_phb_node(dev))) {
+                       mutex_unlock(&indications_mutex);
+                       return -ENODEV;
+               }
+
+               prop = of_get_property(np, "ibm,phb-indications", NULL);
+               if (!prop) {
+                       nbw = 0x0300UL; /* legacy values */
+                       asn = 0x0400UL;
+                       capi = 0x0200UL;
+               } else {
+                       nbw = (u64)be32_to_cpu(prop[2]);
+                       asn = (u64)be32_to_cpu(prop[1]);
+                       capi = (u64)be32_to_cpu(prop[0]);
+               }
+               of_node_put(np);
+       }
+       *capiind = capi;
+       *asnind = asn;
+       *nbwind = nbw;
+       mutex_unlock(&indications_mutex);
+       return 0;
+}
+
+int cxl_get_xsl9_dsnctl(struct pci_dev *dev, u64 capp_unit_id, u64 *reg)
 {
        u64 xsl_dsnctl;
+       u64 capiind, asnind, nbwind;
 
        /*
         * CAPI Identifier bits [0:7]
         * bit 61:60 MSI bits --> 0
         * bit 59 TVT selector --> 0
         */
+       if (get_phb_indications(dev, &capiind, &asnind, &nbwind))
+               return -ENODEV;
 
        /*
         * Tell XSL where to route data to.
         * The field chipid should match the PHB CAPI_CMPM register
         */
-       xsl_dsnctl = ((u64)0x2 << (63-7)); /* Bit 57 */
+       xsl_dsnctl = (capiind << (63-15)); /* Bit 57 */
        xsl_dsnctl |= (capp_unit_id << (63-15));
 
        /* nMMU_ID Defaults to: b’000001001’*/
@@ -435,14 +473,14 @@ int cxl_get_xsl9_dsnctl(u64 capp_unit_id, u64 *reg)
                 * nbwind=0x03, bits [57:58], must include capi indicator.
                 * Not supported on P9 DD1.
                 */
-               xsl_dsnctl |= ((u64)0x03 << (63-47));
+               xsl_dsnctl |= (nbwind << (63-55));
 
                /*
                 * Upper 16b address bits of ASB_Notify messages sent to the
                 * system. Need to match the PHB’s ASN Compare/Mask Register.
                 * Not supported on P9 DD1.
                 */
-               xsl_dsnctl |= ((u64)0x04 << (63-55));
+               xsl_dsnctl |= asnind;
        }
 
        *reg = xsl_dsnctl;
@@ -456,13 +494,14 @@ static int init_implementation_adapter_regs_psl9(struct cxl *adapter,
        u64 chipid;
        u32 phb_index;
        u64 capp_unit_id;
+       u64 psl_debug;
        int rc;
 
        rc = cxl_calc_capp_routing(dev, &chipid, &phb_index, &capp_unit_id);
        if (rc)
                return rc;
 
-       rc = cxl_get_xsl9_dsnctl(capp_unit_id, &xsl_dsnctl);
+       rc = cxl_get_xsl9_dsnctl(dev, capp_unit_id, &xsl_dsnctl);
        if (rc)
                return rc;
 
@@ -503,8 +542,22 @@ static int init_implementation_adapter_regs_psl9(struct cxl *adapter,
        if (cxl_is_power9_dd1()) {
                /* Disabling deadlock counter CAR */
                cxl_p1_write(adapter, CXL_PSL9_GP_CT, 0x0020000000000001ULL);
-       } else
-               cxl_p1_write(adapter, CXL_PSL9_DEBUG, 0x4000000000000000ULL);
+               /* Enable NORST */
+               cxl_p1_write(adapter, CXL_PSL9_DEBUG, 0x8000000000000000ULL);
+       } else {
+               /* Enable NORST and DD2 features */
+               cxl_p1_write(adapter, CXL_PSL9_DEBUG, 0xC000000000000000ULL);
+       }
+
+       /*
+        * Check if PSL has data-cache. We need to flush adapter datacache
+        * when as its about to be removed.
+        */
+       psl_debug = cxl_p1_read(adapter, CXL_PSL9_DEBUG);
+       if (psl_debug & CXL_PSL_DEBUG_CDC) {
+               dev_dbg(&dev->dev, "No data-cache present\n");
+               adapter->native->no_data_cache = true;
+       }
 
        return 0;
 }
@@ -568,12 +621,6 @@ static int init_implementation_adapter_regs_xsl(struct cxl *adapter, struct pci_
 /* For the PSL this is a multiple for 0 < n <= 7: */
 #define PSL_2048_250MHZ_CYCLES 1
 
-static void write_timebase_ctrl_psl9(struct cxl *adapter)
-{
-       cxl_p1_write(adapter, CXL_PSL9_TB_CTLSTAT,
-                    TBSYNC_CNT(2 * PSL_2048_250MHZ_CYCLES));
-}
-
 static void write_timebase_ctrl_psl8(struct cxl *adapter)
 {
        cxl_p1_write(adapter, CXL_PSL_TB_CTLSTAT,
@@ -612,9 +659,6 @@ static u64 timebase_read_xsl(struct cxl *adapter)
 
 static void cxl_setup_psl_timebase(struct cxl *adapter, struct pci_dev *dev)
 {
-       u64 psl_tb;
-       int delta;
-       unsigned int retry = 0;
        struct device_node *np;
 
        adapter->psl_timebase_synced = false;
@@ -635,26 +679,13 @@ static void cxl_setup_psl_timebase(struct cxl *adapter, struct pci_dev *dev)
         * Setup PSL Timebase Control and Status register
         * with the recommended Timebase Sync Count value
         */
-       adapter->native->sl_ops->write_timebase_ctrl(adapter);
+       if (adapter->native->sl_ops->write_timebase_ctrl)
+               adapter->native->sl_ops->write_timebase_ctrl(adapter);
 
        /* Enable PSL Timebase */
        cxl_p1_write(adapter, CXL_PSL_Control, 0x0000000000000000);
        cxl_p1_write(adapter, CXL_PSL_Control, CXL_PSL_Control_tb);
 
-       /* Wait until CORE TB and PSL TB difference <= 16usecs */
-       do {
-               msleep(1);
-               if (retry++ > 5) {
-                       dev_info(&dev->dev, "PSL timebase can't synchronize\n");
-                       return;
-               }
-               psl_tb = adapter->native->sl_ops->timebase_read(adapter);
-               delta = mftb() - psl_tb;
-               if (delta < 0)
-                       delta = -delta;
-       } while (tb_to_ns(delta) > 16000);
-
-       adapter->psl_timebase_synced = true;
        return;
 }
 
@@ -1449,10 +1480,8 @@ int cxl_pci_reset(struct cxl *adapter)
 
        /*
         * The adapter is about to be reset, so ignore errors.
-        * Not supported on P9 DD1
         */
-       if ((cxl_is_power8()) || (!(cxl_is_power9_dd1())))
-               cxl_data_cache_flush(adapter);
+       cxl_data_cache_flush(adapter);
 
        /* pcie_warm_reset requests a fundamental pci reset which includes a
         * PERST assert/deassert.  PERST triggers a loading of the image
@@ -1801,7 +1830,6 @@ static const struct cxl_service_layer_ops psl9_ops = {
        .psl_irq_dump_registers = cxl_native_irq_dump_regs_psl9,
        .err_irq_dump_registers = cxl_native_err_irq_dump_regs_psl9,
        .debugfs_stop_trace = cxl_stop_trace_psl9,
-       .write_timebase_ctrl = write_timebase_ctrl_psl9,
        .timebase_read = timebase_read_psl9,
        .capi_mode = OPAL_PHB_CAPI_MODE_CAPI,
        .needs_reset_before_disable = true,
@@ -1936,10 +1964,8 @@ static void cxl_pci_remove_adapter(struct cxl *adapter)
 
        /*
         * Flush adapter datacache as its about to be removed.
-        * Not supported on P9 DD1.
         */
-       if ((cxl_is_power8()) || (!(cxl_is_power9_dd1())))
-               cxl_data_cache_flush(adapter);
+       cxl_data_cache_flush(adapter);
 
        cxl_deconfigure_adapter(adapter);
 
index a8b6d6a635e962b057325d9d5d6af96382474eba..95285b7f636ff3f854876e5743658f91d88c3e3d 100644 (file)
@@ -62,7 +62,19 @@ static ssize_t psl_timebase_synced_show(struct device *device,
                                        char *buf)
 {
        struct cxl *adapter = to_cxl_adapter(device);
+       u64 psl_tb, delta;
 
+       /* Recompute the status only in native mode */
+       if (cpu_has_feature(CPU_FTR_HVMODE)) {
+               psl_tb = adapter->native->sl_ops->timebase_read(adapter);
+               delta = abs(mftb() - psl_tb);
+
+               /* CORE TB and PSL TB difference <= 16usecs ? */
+               adapter->psl_timebase_synced = (tb_to_ns(delta) < 16000) ? true : false;
+               pr_devel("PSL timebase %s - delta: 0x%016llx\n",
+                        (tb_to_ns(delta) < 16000) ? "synchronized" :
+                        "not synchronized", tb_to_ns(delta));
+       }
        return scnprintf(buf, PAGE_SIZE, "%i\n", adapter->psl_timebase_synced);
 }
 
index 23da3046f160d6527805bc22484fc004fe0a8703..d44100687dfea9411b609a3526775797b1fdb3a6 100644 (file)
@@ -919,8 +919,8 @@ static void pnv_php_unregister_one(struct device_node *dn)
                return;
 
        php_slot->state = PNV_PHP_STATE_OFFLINE;
-       pnv_php_put_slot(php_slot);
        pci_hp_deregister(&php_slot->slot);
+       pnv_php_put_slot(php_slot);
 }
 
 static void pnv_php_unregister(struct device_node *dn)
index 0257aee7ab4bd466019a89a7d9e3f4874c2b3420..ca59883c836450995933b09aa85eaa3cf5a53926 100644 (file)
@@ -318,6 +318,9 @@ static inline bool memblock_bottom_up(void)
 phys_addr_t __init memblock_alloc_range(phys_addr_t size, phys_addr_t align,
                                        phys_addr_t start, phys_addr_t end,
                                        ulong flags);
+phys_addr_t memblock_alloc_base_nid(phys_addr_t size,
+                                       phys_addr_t align, phys_addr_t max_addr,
+                                       int nid, ulong flags);
 phys_addr_t memblock_alloc_base(phys_addr_t size, phys_addr_t align,
                                phys_addr_t max_addr);
 phys_addr_t __memblock_alloc_base(phys_addr_t size, phys_addr_t align,
index a366cc3144795b66bfb1bec4911eb3eb6f162a05..ea8505204fdfc6dd17cab030f9ead1af419455ef 100644 (file)
@@ -106,6 +106,10 @@ extern const struct raid6_calls raid6_avx512x1;
 extern const struct raid6_calls raid6_avx512x2;
 extern const struct raid6_calls raid6_avx512x4;
 extern const struct raid6_calls raid6_s390vx8;
+extern const struct raid6_calls raid6_vpermxor1;
+extern const struct raid6_calls raid6_vpermxor2;
+extern const struct raid6_calls raid6_vpermxor4;
+extern const struct raid6_calls raid6_vpermxor8;
 
 struct raid6_recov_calls {
        void (*data2)(int, size_t, int, int, void **);
index f01b1cb04f91949a58359bf4503cfbf5849e3db8..3de0d8921286095fe1f6a437364ccff1fdbb8b61 100644 (file)
@@ -4,3 +4,4 @@ int*.c
 tables.c
 neon?.c
 s390vx?.c
+vpermxor*.c
index 44d6b46df051be9d56faaf72a48803d9e2bd9136..2f8b61dfd9b0e0fec458cc1bdb9ce1852b58cf1d 100644 (file)
@@ -5,7 +5,8 @@ raid6_pq-y      += algos.o recov.o tables.o int1.o int2.o int4.o \
                   int8.o int16.o int32.o
 
 raid6_pq-$(CONFIG_X86) += recov_ssse3.o recov_avx2.o mmx.o sse1.o sse2.o avx2.o avx512.o recov_avx512.o
-raid6_pq-$(CONFIG_ALTIVEC) += altivec1.o altivec2.o altivec4.o altivec8.o
+raid6_pq-$(CONFIG_ALTIVEC) += altivec1.o altivec2.o altivec4.o altivec8.o \
+                              vpermxor1.o vpermxor2.o vpermxor4.o vpermxor8.o
 raid6_pq-$(CONFIG_KERNEL_MODE_NEON) += neon.o neon1.o neon2.o neon4.o neon8.o recov_neon.o recov_neon_inner.o
 raid6_pq-$(CONFIG_S390) += s390vx8.o recov_s390xc.o
 
@@ -90,6 +91,30 @@ $(obj)/altivec8.c:   UNROLL := 8
 $(obj)/altivec8.c:   $(src)/altivec.uc $(src)/unroll.awk FORCE
        $(call if_changed,unroll)
 
+CFLAGS_vpermxor1.o += $(altivec_flags)
+targets += vpermxor1.c
+$(obj)/vpermxor1.c: UNROLL := 1
+$(obj)/vpermxor1.c: $(src)/vpermxor.uc $(src)/unroll.awk FORCE
+       $(call if_changed,unroll)
+
+CFLAGS_vpermxor2.o += $(altivec_flags)
+targets += vpermxor2.c
+$(obj)/vpermxor2.c: UNROLL := 2
+$(obj)/vpermxor2.c: $(src)/vpermxor.uc $(src)/unroll.awk FORCE
+       $(call if_changed,unroll)
+
+CFLAGS_vpermxor4.o += $(altivec_flags)
+targets += vpermxor4.c
+$(obj)/vpermxor4.c: UNROLL := 4
+$(obj)/vpermxor4.c: $(src)/vpermxor.uc $(src)/unroll.awk FORCE
+       $(call if_changed,unroll)
+
+CFLAGS_vpermxor8.o += $(altivec_flags)
+targets += vpermxor8.c
+$(obj)/vpermxor8.c: UNROLL := 8
+$(obj)/vpermxor8.c: $(src)/vpermxor.uc $(src)/unroll.awk FORCE
+       $(call if_changed,unroll)
+
 CFLAGS_neon1.o += $(NEON_FLAGS)
 targets += neon1.c
 $(obj)/neon1.c:   UNROLL := 1
index c65aa80d67ed475ba8a07123d8083ef6df91a662..5065b1e7e32759535942fed1e149079e61c1dcda 100644 (file)
@@ -74,6 +74,10 @@ const struct raid6_calls * const raid6_algos[] = {
        &raid6_altivec2,
        &raid6_altivec4,
        &raid6_altivec8,
+       &raid6_vpermxor1,
+       &raid6_vpermxor2,
+       &raid6_vpermxor4,
+       &raid6_vpermxor8,
 #endif
 #if defined(CONFIG_S390)
        &raid6_s390vx8,
index 682aae8a1fef2d78ba289bb63cdf697b84cb1648..d20ed0d114111cc55706d45739263d48be505f32 100644 (file)
 
 #include <linux/raid/pq.h>
 
+#ifdef CONFIG_ALTIVEC
+
 #include <altivec.h>
 #ifdef __KERNEL__
 # include <asm/cputable.h>
 # include <asm/switch_to.h>
+#endif /* __KERNEL__ */
 
 /*
  * This is the C data type to use.  We use a vector of
index fabc477b1417083bee06d90c8df234dd0e219abe..5d73f5cb4d8a78f0887cc6cfbf10a29ec5d5f51c 100644 (file)
@@ -45,10 +45,12 @@ else ifeq ($(HAS_NEON),yes)
         CFLAGS += -DCONFIG_KERNEL_MODE_NEON=1
 else
         HAS_ALTIVEC := $(shell printf '\#include <altivec.h>\nvector int a;\n' |\
-                         gcc -c -x c - >&/dev/null && \
-                         rm ./-.o && echo yes)
+                         gcc -c -x c - >/dev/null && rm ./-.o && echo yes)
         ifeq ($(HAS_ALTIVEC),yes)
-                OBJS += altivec1.o altivec2.o altivec4.o altivec8.o
+                CFLAGS += -I../../../arch/powerpc/include
+                CFLAGS += -DCONFIG_ALTIVEC
+                OBJS += altivec1.o altivec2.o altivec4.o altivec8.o \
+                        vpermxor1.o vpermxor2.o vpermxor4.o vpermxor8.o
         endif
 endif
 
@@ -95,6 +97,18 @@ altivec4.c: altivec.uc ../unroll.awk
 altivec8.c: altivec.uc ../unroll.awk
        $(AWK) ../unroll.awk -vN=8 < altivec.uc > $@
 
+vpermxor1.c: vpermxor.uc ../unroll.awk
+       $(AWK) ../unroll.awk -vN=1 < vpermxor.uc > $@
+
+vpermxor2.c: vpermxor.uc ../unroll.awk
+       $(AWK) ../unroll.awk -vN=2 < vpermxor.uc > $@
+
+vpermxor4.c: vpermxor.uc ../unroll.awk
+       $(AWK) ../unroll.awk -vN=4 < vpermxor.uc > $@
+
+vpermxor8.c: vpermxor.uc ../unroll.awk
+       $(AWK) ../unroll.awk -vN=8 < vpermxor.uc > $@
+
 int1.c: int.uc ../unroll.awk
        $(AWK) ../unroll.awk -vN=1 < int.uc > $@
 
@@ -117,7 +131,7 @@ tables.c: mktables
        ./mktables > tables.c
 
 clean:
-       rm -f *.o *.a mktables mktables.c *.uc int*.c altivec*.c neon*.c tables.c raid6test
+       rm -f *.o *.a mktables mktables.c *.uc int*.c altivec*.c vpermxor*.c neon*.c tables.c raid6test
 
 spotless: clean
        rm -f *~
diff --git a/lib/raid6/vpermxor.uc b/lib/raid6/vpermxor.uc
new file mode 100644 (file)
index 0000000..10475dc
--- /dev/null
@@ -0,0 +1,105 @@
+/*
+ * Copyright 2017, Matt Brown, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * vpermxor$#.c
+ *
+ * Based on H. Peter Anvin's paper - The mathematics of RAID-6
+ *
+ * $#-way unrolled portable integer math RAID-6 instruction set
+ * This file is postprocessed using unroll.awk
+ *
+ * vpermxor$#.c makes use of the vpermxor instruction to optimise the RAID6 Q
+ * syndrome calculations.
+ * This can be run on systems which have both Altivec and vpermxor instruction.
+ *
+ * This instruction was introduced in POWER8 - ISA v2.07.
+ */
+
+#include <linux/raid/pq.h>
+#ifdef CONFIG_ALTIVEC
+
+#include <altivec.h>
+#ifdef __KERNEL__
+#include <asm/cputable.h>
+#include <asm/ppc-opcode.h>
+#include <asm/switch_to.h>
+#endif
+
+typedef vector unsigned char unative_t;
+#define NSIZE sizeof(unative_t)
+
+static const vector unsigned char gf_low = {0x1e, 0x1c, 0x1a, 0x18, 0x16, 0x14,
+                                           0x12, 0x10, 0x0e, 0x0c, 0x0a, 0x08,
+                                           0x06, 0x04, 0x02,0x00};
+static const vector unsigned char gf_high = {0xfd, 0xdd, 0xbd, 0x9d, 0x7d, 0x5d,
+                                            0x3d, 0x1d, 0xe0, 0xc0, 0xa0, 0x80,
+                                            0x60, 0x40, 0x20, 0x00};
+
+static void noinline raid6_vpermxor$#_gen_syndrome_real(int disks, size_t bytes,
+                                                       void **ptrs)
+{
+       u8 **dptr = (u8 **)ptrs;
+       u8 *p, *q;
+       int d, z, z0;
+       unative_t wp$$, wq$$, wd$$;
+
+       z0 = disks - 3;         /* Highest data disk */
+       p = dptr[z0+1];         /* XOR parity */
+       q = dptr[z0+2];         /* RS syndrome */
+
+       for (d = 0; d < bytes; d += NSIZE*$#) {
+               wp$$ = wq$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE];
+
+               for (z = z0-1; z>=0; z--) {
+                       wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE];
+                       /* P syndrome */
+                       wp$$ = vec_xor(wp$$, wd$$);
+
+                       /* Q syndrome */
+                       asm(VPERMXOR(%0,%1,%2,%3):"=v"(wq$$):"v"(gf_high), "v"(gf_low), "v"(wq$$));
+                       wq$$ = vec_xor(wq$$, wd$$);
+               }
+               *(unative_t *)&p[d+NSIZE*$$] = wp$$;
+               *(unative_t *)&q[d+NSIZE*$$] = wq$$;
+       }
+}
+
+static void raid6_vpermxor$#_gen_syndrome(int disks, size_t bytes, void **ptrs)
+{
+       preempt_disable();
+       enable_kernel_altivec();
+
+       raid6_vpermxor$#_gen_syndrome_real(disks, bytes, ptrs);
+
+       disable_kernel_altivec();
+       preempt_enable();
+}
+
+int raid6_have_altivec_vpermxor(void);
+#if $# == 1
+int raid6_have_altivec_vpermxor(void)
+{
+       /* Check if arch has both altivec and the vpermxor instructions */
+# ifdef __KERNEL__
+       return (cpu_has_feature(CPU_FTR_ALTIVEC_COMP) &&
+               cpu_has_feature(CPU_FTR_ARCH_207S));
+# else
+       return 1;
+#endif
+
+}
+#endif
+
+const struct raid6_calls raid6_vpermxor$# = {
+       raid6_vpermxor$#_gen_syndrome,
+       NULL,
+       raid6_have_altivec_vpermxor,
+       "vpermxor$#",
+       0
+};
+#endif
index 9b04568ad42a67600af97468dbf51e04e875e473..5108356ad8aaddc9d24d9a0edf716553d75507b5 100644 (file)
@@ -1163,7 +1163,7 @@ phys_addr_t __init memblock_alloc_range(phys_addr_t size, phys_addr_t align,
                                        flags);
 }
 
-static phys_addr_t __init memblock_alloc_base_nid(phys_addr_t size,
+phys_addr_t __init memblock_alloc_base_nid(phys_addr_t size,
                                        phys_addr_t align, phys_addr_t max_addr,
                                        int nid, ulong flags)
 {
index 04dc1e6ef2ce7325a2b40368850fc7ce1e8eacbf..9161679b1e1adf6814c11b39acc822bf4d8dea82 100644 (file)
@@ -1,5 +1,7 @@
 gettimeofday
 context_switch
+fork
+exec_target
 mmap_bench
 futex_bench
 null_syscall
index a35058e3766c2408955f045df4ca39786f34f33f..b4d7432a0ecd1b4af5fa5fe2071276172a684dc5 100644 (file)
@@ -1,5 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0
-TEST_GEN_PROGS := gettimeofday context_switch mmap_bench futex_bench null_syscall
+TEST_GEN_PROGS := gettimeofday context_switch fork mmap_bench futex_bench null_syscall
+TEST_GEN_FILES := exec_target
 
 CFLAGS += -O2
 
@@ -10,3 +11,7 @@ $(TEST_GEN_PROGS): ../harness.c
 $(OUTPUT)/context_switch: ../utils.c
 $(OUTPUT)/context_switch: CFLAGS += -maltivec -mvsx -mabi=altivec
 $(OUTPUT)/context_switch: LDLIBS += -lpthread
+
+$(OUTPUT)/fork: LDLIBS += -lpthread
+
+$(OUTPUT)/exec_target: CFLAGS += -static -nostartfiles
diff --git a/tools/testing/selftests/powerpc/benchmarks/exec_target.c b/tools/testing/selftests/powerpc/benchmarks/exec_target.c
new file mode 100644 (file)
index 0000000..3c9c144
--- /dev/null
@@ -0,0 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+/*
+ * Part of fork context switch microbenchmark.
+ *
+ * Copyright 2018, Anton Blanchard, IBM Corp.
+ */
+
+void _exit(int);
+void _start(void)
+{
+       _exit(0);
+}
diff --git a/tools/testing/selftests/powerpc/benchmarks/fork.c b/tools/testing/selftests/powerpc/benchmarks/fork.c
new file mode 100644 (file)
index 0000000..d312e63
--- /dev/null
@@ -0,0 +1,325 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+/*
+ * Context switch microbenchmark.
+ *
+ * Copyright 2018, Anton Blanchard, IBM Corp.
+ */
+
+#define _GNU_SOURCE
+#include <assert.h>
+#include <errno.h>
+#include <getopt.h>
+#include <limits.h>
+#include <linux/futex.h>
+#include <pthread.h>
+#include <sched.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/shm.h>
+#include <sys/syscall.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+static unsigned int timeout = 30;
+
+static void set_cpu(int cpu)
+{
+       cpu_set_t cpuset;
+
+       if (cpu == -1)
+               return;
+
+       CPU_ZERO(&cpuset);
+       CPU_SET(cpu, &cpuset);
+
+       if (sched_setaffinity(0, sizeof(cpuset), &cpuset)) {
+               perror("sched_setaffinity");
+               exit(1);
+       }
+}
+
+static void start_process_on(void *(*fn)(void *), void *arg, int cpu)
+{
+       int pid;
+
+       pid = fork();
+       if (pid == -1) {
+               perror("fork");
+               exit(1);
+       }
+
+       if (pid)
+               return;
+
+       set_cpu(cpu);
+
+       fn(arg);
+
+       exit(0);
+}
+
+static int cpu;
+static int do_fork = 0;
+static int do_vfork = 0;
+static int do_exec = 0;
+static char *exec_file;
+static int exec_target = 0;
+static unsigned long iterations;
+static unsigned long iterations_prev;
+
+static void run_exec(void)
+{
+       char *const argv[] = { "./exec_target", NULL };
+
+       if (execve("./exec_target", argv, NULL) == -1) {
+               perror("execve");
+               exit(1);
+       }
+}
+
+static void bench_fork(void)
+{
+       while (1) {
+               pid_t pid = fork();
+               if (pid == -1) {
+                       perror("fork");
+                       exit(1);
+               }
+               if (pid == 0) {
+                       if (do_exec)
+                               run_exec();
+                       _exit(0);
+               }
+               pid = waitpid(pid, NULL, 0);
+               if (pid == -1) {
+                       perror("waitpid");
+                       exit(1);
+               }
+               iterations++;
+       }
+}
+
+static void bench_vfork(void)
+{
+       while (1) {
+               pid_t pid = vfork();
+               if (pid == -1) {
+                       perror("fork");
+                       exit(1);
+               }
+               if (pid == 0) {
+                       if (do_exec)
+                               run_exec();
+                       _exit(0);
+               }
+               pid = waitpid(pid, NULL, 0);
+               if (pid == -1) {
+                       perror("waitpid");
+                       exit(1);
+               }
+               iterations++;
+       }
+}
+
+static void *null_fn(void *arg)
+{
+       pthread_exit(NULL);
+}
+
+static void bench_thread(void)
+{
+       pthread_t tid;
+       cpu_set_t cpuset;
+       pthread_attr_t attr;
+       int rc;
+
+       rc = pthread_attr_init(&attr);
+       if (rc) {
+               errno = rc;
+               perror("pthread_attr_init");
+               exit(1);
+       }
+
+       if (cpu != -1) {
+               CPU_ZERO(&cpuset);
+               CPU_SET(cpu, &cpuset);
+
+               rc = pthread_attr_setaffinity_np(&attr, sizeof(cpu_set_t), &cpuset);
+               if (rc) {
+                       errno = rc;
+                       perror("pthread_attr_setaffinity_np");
+                       exit(1);
+               }
+       }
+
+       while (1) {
+               rc = pthread_create(&tid, &attr, null_fn, NULL);
+               if (rc) {
+                       errno = rc;
+                       perror("pthread_create");
+                       exit(1);
+               }
+               rc = pthread_join(tid, NULL);
+               if (rc) {
+                       errno = rc;
+                       perror("pthread_join");
+                       exit(1);
+               }
+               iterations++;
+       }
+}
+
+static void sigalrm_handler(int junk)
+{
+       unsigned long i = iterations;
+
+       printf("%ld\n", i - iterations_prev);
+       iterations_prev = i;
+
+       if (--timeout == 0)
+               kill(0, SIGUSR1);
+
+       alarm(1);
+}
+
+static void sigusr1_handler(int junk)
+{
+       exit(0);
+}
+
+static void *bench_proc(void *arg)
+{
+       signal(SIGALRM, sigalrm_handler);
+       alarm(1);
+
+       if (do_fork)
+               bench_fork();
+       else if (do_vfork)
+               bench_vfork();
+       else
+               bench_thread();
+
+       return NULL;
+}
+
+static struct option options[] = {
+       { "fork", no_argument, &do_fork, 1 },
+       { "vfork", no_argument, &do_vfork, 1 },
+       { "exec", no_argument, &do_exec, 1 },
+       { "timeout", required_argument, 0, 's' },
+       { "exec-target", no_argument, &exec_target, 1 },
+       { NULL },
+};
+
+static void usage(void)
+{
+       fprintf(stderr, "Usage: fork <options> CPU\n\n");
+       fprintf(stderr, "\t\t--fork\tUse fork() (default threads)\n");
+       fprintf(stderr, "\t\t--vfork\tUse vfork() (default threads)\n");
+       fprintf(stderr, "\t\t--exec\tAlso exec() (default no exec)\n");
+       fprintf(stderr, "\t\t--timeout=X\tDuration in seconds to run (default 30)\n");
+       fprintf(stderr, "\t\t--exec-target\tInternal option for exec workload\n");
+}
+
+int main(int argc, char *argv[])
+{
+       signed char c;
+
+       while (1) {
+               int option_index = 0;
+
+               c = getopt_long(argc, argv, "", options, &option_index);
+
+               if (c == -1)
+                       break;
+
+               switch (c) {
+               case 0:
+                       if (options[option_index].flag != 0)
+                               break;
+
+                       usage();
+                       exit(1);
+                       break;
+
+               case 's':
+                       timeout = atoi(optarg);
+                       break;
+
+               default:
+                       usage();
+                       exit(1);
+               }
+       }
+
+       if (do_fork && do_vfork) {
+               usage();
+               exit(1);
+       }
+       if (do_exec && !do_fork && !do_vfork) {
+               usage();
+               exit(1);
+       }
+
+       if (do_exec) {
+               char *dirname = strdup(argv[0]);
+               int i;
+               i = strlen(dirname) - 1;
+               while (i) {
+                       if (dirname[i] == '/') {
+                               dirname[i] = '\0';
+                               if (chdir(dirname) == -1) {
+                                       perror("chdir");
+                                       exit(1);
+                               }
+                               break;
+                       }
+                       i--;
+               }
+       }
+
+       if (exec_target) {
+               exit(0);
+       }
+
+       if (((argc - optind) != 1)) {
+               cpu = -1;
+       } else {
+               cpu = atoi(argv[optind++]);
+       }
+
+       if (do_exec)
+               exec_file = argv[0];
+
+       set_cpu(cpu);
+
+       printf("Using ");
+       if (do_fork)
+               printf("fork");
+       else if (do_vfork)
+               printf("vfork");
+       else
+               printf("clone");
+
+       if (do_exec)
+               printf(" + exec");
+
+       printf(" on cpu %d\n", cpu);
+
+       /* Create a new process group so we can signal everyone for exit */
+       setpgid(getpid(), getpid());
+
+       signal(SIGUSR1, sigusr1_handler);
+
+       start_process_on(bench_proc, NULL, cpu);
+
+       while (1)
+               sleep(3600);
+
+       return 0;
+}
index ac4a52e19e590cd6b8019d0d57ec1b9bcf78fb55..eedce3366f64be29fd9c8d7a0643bd2d45a56a7e 100644 (file)
@@ -5,8 +5,8 @@ CFLAGS += -I$(CURDIR)
 CFLAGS += -D SELFTEST
 CFLAGS += -maltivec
 
-# Use our CFLAGS for the implicit .S rule
-ASFLAGS = $(CFLAGS)
+# Use our CFLAGS for the implicit .S rule & set the asm machine type
+ASFLAGS = $(CFLAGS) -Wa,-mpower4
 
 TEST_GEN_PROGS := copyuser_64 copyuser_power7 memcpy_64 memcpy_power7
 EXTRA_SOURCES := validate.c ../harness.c
index 5c72ff978f2784babc71464b2baeb62c44089ce2..c0e45d2dde25d115b73ae2e14002a8b24167abbc 100644 (file)
@@ -4,7 +4,7 @@ SIGNAL_CONTEXT_CHK_TESTS := tm-signal-context-chk-gpr tm-signal-context-chk-fpu
 
 TEST_GEN_PROGS := tm-resched-dscr tm-syscall tm-signal-msr-resv tm-signal-stack \
        tm-vmxcopy tm-fork tm-tar tm-tmspr tm-vmx-unavail tm-unavailable tm-trap \
-       $(SIGNAL_CONTEXT_CHK_TESTS)
+       $(SIGNAL_CONTEXT_CHK_TESTS) tm-sigreturn
 
 include ../../lib.mk
 
diff --git a/tools/testing/selftests/powerpc/tm/tm-sigreturn.c b/tools/testing/selftests/powerpc/tm/tm-sigreturn.c
new file mode 100644 (file)
index 0000000..85d6344
--- /dev/null
@@ -0,0 +1,92 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright 2015, Laurent Dufour, IBM Corp.
+ *
+ * Test the kernel's signal returning code to check reclaim is done if the
+ * sigreturn() is called while in a transaction (suspended since active is
+ * already dropped trough the system call path).
+ *
+ * The kernel must discard the transaction when entering sigreturn, since
+ * restoring the potential TM SPRS from the signal frame is requiring to not be
+ * in a transaction.
+ */
+
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "tm.h"
+#include "utils.h"
+
+
+void handler(int sig)
+{
+       uint64_t ret;
+
+       asm __volatile__(
+               "li             3,1             ;"
+               "tbegin.                        ;"
+               "beq            1f              ;"
+               "li             3,0             ;"
+               "tsuspend.                      ;"
+               "1:                             ;"
+               "std%X[ret]     3, %[ret]       ;"
+               : [ret] "=m"(ret)
+               :
+               : "memory", "3", "cr0");
+
+       if (ret)
+               exit(1);
+
+       /*
+        * We return from the signal handle while in a suspended transaction
+        */
+}
+
+
+int tm_sigreturn(void)
+{
+       struct sigaction sa;
+       uint64_t ret = 0;
+
+       SKIP_IF(!have_htm());
+
+       memset(&sa, 0, sizeof(sa));
+       sa.sa_handler = handler;
+       sigemptyset(&sa.sa_mask);
+
+       if (sigaction(SIGSEGV, &sa, NULL))
+               exit(1);
+
+       asm __volatile__(
+               "tbegin.                        ;"
+               "beq            1f              ;"
+               "li             3,0             ;"
+               "std            3,0(3)          ;" /* trigger SEGV */
+               "li             3,1             ;"
+               "std%X[ret]     3,%[ret]        ;"
+               "tend.                          ;"
+               "b              2f              ;"
+               "1:                             ;"
+               "li             3,2             ;"
+               "std%X[ret]     3,%[ret]        ;"
+               "2:                             ;"
+               : [ret] "=m"(ret)
+               :
+               : "memory", "3", "cr0");
+
+       if (ret != 2)
+               exit(1);
+
+       exit(0);
+}
+
+int main(void)
+{
+       return test_harness(tm_sigreturn, "tm_sigreturn");
+}
index e6a0fad2bfd019beb398e9c024c4ccc584f8e4d7..156c8e750259b07f9280b39b3a4fbd24acd71e87 100644 (file)
@@ -80,7 +80,7 @@ bool is_failure(uint64_t condition_reg)
        return ((condition_reg >> 28) & 0xa) == 0xa;
 }
 
-void *ping(void *input)
+void *tm_una_ping(void *input)
 {
 
        /*
@@ -280,7 +280,7 @@ void *ping(void *input)
 }
 
 /* Thread to force context switch */
-void *pong(void *not_used)
+void *tm_una_pong(void *not_used)
 {
        /* Wait thread get its name "pong". */
        if (DEBUG)
@@ -311,11 +311,11 @@ void test_fp_vec(int fp, int vec, pthread_attr_t *attr)
        do {
                int rc;
 
-               /* Bind 'ping' to CPU 0, as specified in 'attr'. */
-               rc = pthread_create(&t0, attr, ping, (void *) &flags);
+               /* Bind to CPU 0, as specified in 'attr'. */
+               rc = pthread_create(&t0, attr, tm_una_ping, (void *) &flags);
                if (rc)
                        pr_err(rc, "pthread_create()");
-               rc = pthread_setname_np(t0, "ping");
+               rc = pthread_setname_np(t0, "tm_una_ping");
                if (rc)
                        pr_warn(rc, "pthread_setname_np");
                rc = pthread_join(t0, &ret_value);
@@ -333,13 +333,15 @@ void test_fp_vec(int fp, int vec, pthread_attr_t *attr)
        }
 }
 
-int main(int argc, char **argv)
+int tm_unavailable_test(void)
 {
        int rc, exception; /* FP = 0, VEC = 1, VSX = 2 */
        pthread_t t1;
        pthread_attr_t attr;
        cpu_set_t cpuset;
 
+       SKIP_IF(!have_htm());
+
        /* Set only CPU 0 in the mask. Both threads will be bound to CPU 0. */
        CPU_ZERO(&cpuset);
        CPU_SET(0, &cpuset);
@@ -354,12 +356,12 @@ int main(int argc, char **argv)
        if (rc)
                pr_err(rc, "pthread_attr_setaffinity_np()");
 
-       rc = pthread_create(&t1, &attr /* Bind 'pong' to CPU 0 */, pong, NULL);
+       rc = pthread_create(&t1, &attr /* Bind to CPU 0 */, tm_una_pong, NULL);
        if (rc)
                pr_err(rc, "pthread_create()");
 
        /* Name it for systemtap convenience */
-       rc = pthread_setname_np(t1, "pong");
+       rc = pthread_setname_np(t1, "tm_una_pong");
        if (rc)
                pr_warn(rc, "pthread_create()");
 
@@ -394,3 +396,9 @@ int main(int argc, char **argv)
                exit(0);
        }
 }
+
+int main(int argc, char **argv)
+{
+       test_harness_set_timeout(220);
+       return test_harness(tm_unavailable_test, "tm_unavailable_test");
+}