powerpc/64s: Allocate LPPACAs individually

author Nicholas Piggin <npiggin@gmail.com>

Tue, 13 Feb 2018 15:08:13 +0000 (01:08 +1000)

committer Michael Ellerman <mpe@ellerman.id.au>

Fri, 30 Mar 2018 12:34:24 +0000 (23:34 +1100)
author Nicholas Piggin <npiggin@gmail.com>
Tue, 13 Feb 2018 15:08:13 +0000 (01:08 +1000)
committer Michael Ellerman <mpe@ellerman.id.au>
Fri, 30 Mar 2018 12:34:24 +0000 (23:34 +1100)
diff --git a/arch/powerpc/include/asm/lppaca.h b/arch/powerpc/include/asm/lppaca.h

index 6e4589eee2dae653a904575ad148bd235cdac773..65d589689f013e67416a4150e99bf64367d380e4 100644 (file)
--- a/arch/powerpc/include/asm/lppaca.h
+++ b/arch/powerpc/include/asm/lppaca.h
@@ -36,14 +36,16 @@
  #include <asm/mmu.h>
  
  /*
- * We only have to have statically allocated lppaca structs on
- * legacy iSeries, which supports at most 64 cpus.
- */
-#define NR_LPPACAS     1
-
-/*
- * The Hypervisor barfs if the lppaca crosses a page boundary.  A 1k
- * alignment is sufficient to prevent this
+ * The lppaca is the "virtual processor area" registered with the hypervisor,
+ * H_REGISTER_VPA etc.
+ *
+ * According to PAPR, the structure is 640 bytes long, must be L1 cache line
+ * aligned, and must not cross a 4kB boundary. Its size field must be at
+ * least 640 bytes (but may be more).
+ *
+ * Pre-v4.14 KVM hypervisors reject the VPA if its size field is smaller than
+ * 1kB, so we dynamically allocate 1kB and advertise size as 1kB, but keep
+ * this structure as the canonical 640 byte size.
   */
  struct lppaca {
         /* cacheline 1 contains read-only data */
@@ -97,11 +99,9 @@ struct lppaca {
  
         __be32  page_ins;               /* CMO Hint - # page ins by OS */
         u8      reserved11[148];
-       volatile __be64 dtl_idx;                /* Dispatch Trace Log head index */
+       volatile __be64 dtl_idx;        /* Dispatch Trace Log head index */
         u8      reserved12[96];
-} __attribute__((__aligned__(0x400)));
-
-extern struct lppaca lppaca[];
+} ____cacheline_aligned;
  
  #define lppaca_of(cpu) (*paca_ptrs[cpu]->lppaca_ptr)
  
diff --git a/arch/powerpc/kernel/machine_kexec_64.c b/arch/powerpc/kernel/machine_kexec_64.c

index a250e3331f948d7c8ac281b14ea72ec0c136e265..1044bf15d5eda6c06f7a739bf5f521a99142521d 100644 (file)
--- a/arch/powerpc/kernel/machine_kexec_64.c
+++ b/arch/powerpc/kernel/machine_kexec_64.c
@@ -323,17 +323,24 @@ void default_machine_kexec(struct kimage *image)
         kexec_stack.thread_info.cpu = current_thread_info()->cpu;
  
         /* We need a static PACA, too; copy this CPU's PACA over and switch to
-        * it.  Also poison per_cpu_offset to catch anyone using non-static
-        * data.
+        * it. Also poison per_cpu_offset and NULL lppaca to catch anyone using
+        * non-static data.
          */
         memcpy(&kexec_paca, get_paca(), sizeof(struct paca_struct));
         kexec_paca.data_offset = 0xedeaddeadeeeeeeeUL;
+#ifdef CONFIG_PPC_PSERIES
+       kexec_paca.lppaca_ptr = NULL;
+#endif
         paca_ptrs[kexec_paca.paca_index] = &kexec_paca;
+
         setup_paca(&kexec_paca);
  
-       /* XXX: If anyone does 'dynamic lppacas' this will also need to be
-        * switched to a static version!
+       /*
+        * The lppaca should be unregistered at this point so the HV won't
+        * touch it. In the case of a crash, none of the lppacas are
+        * unregistered so there is not much we can do about it here.
          */
+
         /*
          * On Book3S, the copy must happen with the MMU off if we are either
          * using Radix page tables or we are not in an LPAR since we can
diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c

index eef4891c9af6854c0de87b1348ab484acdd73b2b..6cddb9bdc151f35a5cbf0e3ee4c8e69e6064a143 100644 (file)
--- a/arch/powerpc/kernel/paca.c
+++ b/arch/powerpc/kernel/paca.c
@@ -23,82 +23,50 @@
  #ifdef CONFIG_PPC_PSERIES
  
  /*
- * The structure which the hypervisor knows about - this structure
- * should not cross a page boundary.  The vpa_init/register_vpa call
- * is now known to fail if the lppaca structure crosses a page
- * boundary.  The lppaca is also used on POWER5 pSeries boxes.
- * The lppaca is 640 bytes long, and cannot readily
- * change since the hypervisor knows its layout, so a 1kB alignment
- * will suffice to ensure that it doesn't cross a page boundary.
+ * See asm/lppaca.h for more detail.
+ *
+ * lppaca structures must must be 1kB in size, L1 cache line aligned,
+ * and not cross 4kB boundary. A 1kB size and 1kB alignment will satisfy
+ * these requirements.
   */
-struct lppaca lppaca[] = {
-       [0 ... (NR_LPPACAS-1)] = {
+static inline void init_lppaca(struct lppaca *lppaca)
+{
+       BUILD_BUG_ON(sizeof(struct lppaca) != 640);
+
+       *lppaca = (struct lppaca) {
                 .desc = cpu_to_be32(0xd397d781),        /* "LpPa" */
-               .size = cpu_to_be16(sizeof(struct lppaca)),
+               .size = cpu_to_be16(0x400),
                 .fpregs_in_use = 1,
                 .slb_count = cpu_to_be16(64),
                 .vmxregs_in_use = 0,
-               .page_ins = 0,
-       },
+               .page_ins = 0, };
  };
  
-static struct lppaca *extra_lppacas;
-static long __initdata lppaca_size;
-
-static void __init allocate_lppacas(int nr_cpus, unsigned long limit)
-{
-       if (early_cpu_has_feature(CPU_FTR_HVMODE))
-               return;
-
-       if (nr_cpus <= NR_LPPACAS)
-               return;
-
-       lppaca_size = PAGE_ALIGN(sizeof(struct lppaca) *
-                                (nr_cpus - NR_LPPACAS));
-       extra_lppacas = __va(memblock_alloc_base(lppaca_size,
-                                                PAGE_SIZE, limit));
-}
-
-static struct lppaca * __init new_lppaca(int cpu)
+static struct lppaca * __init new_lppaca(int cpu, unsigned long limit)
  {
         struct lppaca *lp;
+       size_t size = 0x400;
+
+       BUILD_BUG_ON(size < sizeof(struct lppaca));
  
         if (early_cpu_has_feature(CPU_FTR_HVMODE))
                 return NULL;
  
-       if (cpu < NR_LPPACAS)
-               return &lppaca[cpu];
-
-       lp = extra_lppacas + (cpu - NR_LPPACAS);
-       *lp = lppaca[0];
+       lp = __va(memblock_alloc_base(size, 0x400, limit));
+       init_lppaca(lp);
  
         return lp;
  }
  
-static void __init free_lppacas(void)
+static void __init free_lppaca(struct lppaca *lp)
  {
-       long new_size = 0, nr;
+       size_t size = 0x400;
  
         if (early_cpu_has_feature(CPU_FTR_HVMODE))
                 return;
  
-       if (!lppaca_size)
-               return;
-       nr = num_possible_cpus() - NR_LPPACAS;
-       if (nr > 0)
-               new_size = PAGE_ALIGN(nr * sizeof(struct lppaca));
-       if (new_size >= lppaca_size)
-               return;
-
-       memblock_free(__pa(extra_lppacas) + new_size, lppaca_size - new_size);
-       lppaca_size = new_size;
+       memblock_free(__pa(lp), size);
  }
-
-#else
-
-static inline void allocate_lppacas(int nr_cpus, unsigned long limit) { }
-static inline void free_lppacas(void) { }
-
  #endif /* CONFIG_PPC_BOOK3S */
  
  #ifdef CONFIG_PPC_BOOK3S_64
@@ -167,7 +135,7 @@ EXPORT_SYMBOL(paca_ptrs);
  void __init initialise_paca(struct paca_struct *new_paca, int cpu)
  {
  #ifdef CONFIG_PPC_PSERIES
-       new_paca->lppaca_ptr = new_lppaca(cpu);
+       new_paca->lppaca_ptr = NULL;
  #endif
  #ifdef CONFIG_PPC_BOOK3E
         new_paca->kernel_pgd = swapper_pg_dir;
@@ -254,13 +222,15 @@ void __init allocate_pacas(void)
         printk(KERN_DEBUG "Allocated %lu bytes for %u pacas\n",
                         size, nr_cpu_ids);
  
-       allocate_lppacas(nr_cpu_ids, limit);
-
         allocate_slb_shadows(nr_cpu_ids, limit);
  
         /* Can't use for_each_*_cpu, as they aren't functional yet */
-       for (cpu = 0; cpu < nr_cpu_ids; cpu++)
+       for (cpu = 0; cpu < nr_cpu_ids; cpu++) {
                 initialise_paca(paca_ptrs[cpu], cpu);
+#ifdef CONFIG_PPC_PSERIES
+               paca_ptrs[cpu]->lppaca_ptr = new_lppaca(cpu, limit);
+#endif
+       }
  }
  
  void __init free_unused_pacas(void)
@@ -272,6 +242,9 @@ void __init free_unused_pacas(void)
         for (cpu = 0; cpu < paca_nr_cpu_ids; cpu++) {
                 if (!cpu_possible(cpu)) {
                         unsigned long pa = __pa(paca_ptrs[cpu]);
+#ifdef CONFIG_PPC_PSERIES
+                       free_lppaca(paca_ptrs[cpu]->lppaca_ptr);
+#endif
                         memblock_free(pa, sizeof(struct paca_struct));
                         paca_ptrs[cpu] = NULL;
                         size += sizeof(struct paca_struct);
@@ -288,8 +261,6 @@ void __init free_unused_pacas(void)
         if (size)
                 printk(KERN_DEBUG "Freed %lu bytes for unused pacas\n", size);
  
-       free_lppacas();
-
         paca_nr_cpu_ids = nr_cpu_ids;
         paca_ptrs_size = new_ptrs_size;
  }
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c

index 41fce69714d56aeec7e3887eb0f5a41717c98ce7..9b48d4a191ff9769c22e35759ff9bd442eaa568a 100644 (file)
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -498,7 +498,8 @@ static unsigned long do_h_register_vpa(struct kvm_vcpu *vcpu,
                  * use 640 bytes of the structure though, so we should accept
                  * clients that set a size of 640.
                  */
-               if (len < 640)
+               BUILD_BUG_ON(sizeof(struct lppaca) != 640);
+               if (len < sizeof(struct lppaca))
                         break;
                 vpap = &tvcpu->arch.vpa;
                 err = 0;
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c

index 314d19ab9385e038a4f38c18a50364da4873eb86..e9ec465068f1c9bc6e40f151086bb3b29567bbae 100644 (file)
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -1110,7 +1110,7 @@ static void setup_cpu_associativity_change_counters(void)
         for_each_possible_cpu(cpu) {
                 int i;
                 u8 *counts = vphn_cpu_change_counts[cpu];
-               volatile u8 *hypervisor_counts = lppaca[cpu].vphn_assoc_counts;
+               volatile u8 *hypervisor_counts = lppaca_of(cpu).vphn_assoc_counts;
  
                 for (i = 0; i < distance_ref_points_depth; i++)
                         counts[i] = hypervisor_counts[i];
@@ -1136,7 +1136,7 @@ static int update_cpu_associativity_changes_mask(void)
         for_each_possible_cpu(cpu) {
                 int i, changed = 0;
                 u8 *counts = vphn_cpu_change_counts[cpu];
-               volatile u8 *hypervisor_counts = lppaca[cpu].vphn_assoc_counts;
+               volatile u8 *hypervisor_counts = lppaca_of(cpu).vphn_assoc_counts;
  
                 for (i = 0; i < distance_ref_points_depth; i++) {
                         if (hypervisor_counts[i] != counts[i]) {
diff --git a/arch/powerpc/platforms/pseries/kexec.c b/arch/powerpc/platforms/pseries/kexec.c

index eeb13429d68535e50e2d80f05ce8d89ba01fbc4c..3fe12679697549154b9ebe3905ed062da1f40e93 100644 (file)
--- a/arch/powerpc/platforms/pseries/kexec.c
+++ b/arch/powerpc/platforms/pseries/kexec.c
@@ -23,7 +23,12 @@
  
  void pseries_kexec_cpu_down(int crash_shutdown, int secondary)
  {
-       /* Don't risk a hypervisor call if we're crashing */
+       /*
+        * Don't risk a hypervisor call if we're crashing
+        * XXX: Why? The hypervisor is not crashing. It might be better
+        * to at least attempt unregister to avoid the hypervisor stepping
+        * on our memory.
+        */
         if (firmware_has_feature(FW_FEATURE_SPLPAR) && !crash_shutdown) {
                 int ret;
                 int cpu = smp_processor_id();
author	Nicholas Piggin <npiggin@gmail.com>
	Tue, 13 Feb 2018 15:08:13 +0000 (01:08 +1000)
committer	Michael Ellerman <mpe@ellerman.id.au>
	Fri, 30 Mar 2018 12:34:24 +0000 (23:34 +1100)
arch/powerpc/include/asm/lppaca.h		patch \| blob \| history
arch/powerpc/kernel/machine_kexec_64.c		patch \| blob \| history
arch/powerpc/kernel/paca.c		patch \| blob \| history
arch/powerpc/kvm/book3s_hv.c		patch \| blob \| history
arch/powerpc/mm/numa.c		patch \| blob \| history
arch/powerpc/platforms/pseries/kexec.c		patch \| blob \| history