arch/x86/mm/cpu_entry_area.c

   1 // SPDX-License-Identifier: GPL-2.0
   2
   3 #include <linux/spinlock.h>
   4 #include <linux/percpu.h>
   5 #include <linux/kallsyms.h>
   6 #include <linux/kcore.h>
   7
   8 #include <asm/cpu_entry_area.h>
   9 #include <asm/pgtable.h>
  10 #include <asm/fixmap.h>
  11 #include <asm/desc.h>
  12
  13 static DEFINE_PER_CPU_PAGE_ALIGNED(struct entry_stack_page, entry_stack_storage);
  14
  15 #ifdef CONFIG_X86_64
  16 static DEFINE_PER_CPU_PAGE_ALIGNED(struct exception_stacks, exception_stacks);
  17 DEFINE_PER_CPU(struct cea_exception_stacks*, cea_exception_stacks);
  18 #endif
  19
  20 #if defined(CONFIG_X86_32) && defined(CONFIG_DOUBLEFAULT)
  21 DECLARE_PER_CPU_PAGE_ALIGNED(struct doublefault_stack, doublefault_stack);
  22 #endif
  23
  24 struct cpu_entry_area *get_cpu_entry_area(int cpu)
  25 {
  26         unsigned long va = CPU_ENTRY_AREA_PER_CPU + cpu * CPU_ENTRY_AREA_SIZE;
  27         BUILD_BUG_ON(sizeof(struct cpu_entry_area) % PAGE_SIZE != 0);
  28
  29         return (struct cpu_entry_area *) va;
  30 }
  31 EXPORT_SYMBOL(get_cpu_entry_area);
  32
  33 void cea_set_pte(void *cea_vaddr, phys_addr_t pa, pgprot_t flags)
  34 {
  35         unsigned long va = (unsigned long) cea_vaddr;
  36         pte_t pte = pfn_pte(pa >> PAGE_SHIFT, flags);
  37
  38         /*
  39          * The cpu_entry_area is shared between the user and kernel
  40          * page tables.  All of its ptes can safely be global.
  41          * _PAGE_GLOBAL gets reused to help indicate PROT_NONE for
  42          * non-present PTEs, so be careful not to set it in that
  43          * case to avoid confusion.
  44          */
  45         if (boot_cpu_has(X86_FEATURE_PGE) &&
  46             (pgprot_val(flags) & _PAGE_PRESENT))
  47                 pte = pte_set_flags(pte, _PAGE_GLOBAL);
  48
  49         set_pte_vaddr(va, pte);
  50 }
  51
  52 static void __init
  53 cea_map_percpu_pages(void *cea_vaddr, void *ptr, int pages, pgprot_t prot)
  54 {
  55         for ( ; pages; pages--, cea_vaddr+= PAGE_SIZE, ptr += PAGE_SIZE)
  56                 cea_set_pte(cea_vaddr, per_cpu_ptr_to_phys(ptr), prot);
  57 }
  58
  59 static void __init percpu_setup_debug_store(unsigned int cpu)
  60 {
  61 #ifdef CONFIG_CPU_SUP_INTEL
  62         unsigned int npages;
  63         void *cea;
  64
  65         if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
  66                 return;
  67
  68         cea = &get_cpu_entry_area(cpu)->cpu_debug_store;
  69         npages = sizeof(struct debug_store) / PAGE_SIZE;
  70         BUILD_BUG_ON(sizeof(struct debug_store) % PAGE_SIZE != 0);
  71         cea_map_percpu_pages(cea, &per_cpu(cpu_debug_store, cpu), npages,
  72                              PAGE_KERNEL);
  73
  74         cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers;
  75         /*
  76          * Force the population of PMDs for not yet allocated per cpu
  77          * memory like debug store buffers.
  78          */
  79         npages = sizeof(struct debug_store_buffers) / PAGE_SIZE;
  80         for (; npages; npages--, cea += PAGE_SIZE)
  81                 cea_set_pte(cea, 0, PAGE_NONE);
  82 #endif
  83 }
  84
  85 #ifdef CONFIG_X86_64
  86
  87 #define cea_map_stack(name) do {                                        \
  88         npages = sizeof(estacks->name## _stack) / PAGE_SIZE;            \
  89         cea_map_percpu_pages(cea->estacks.name## _stack,                \
  90                         estacks->name## _stack, npages, PAGE_KERNEL);   \
  91         } while (0)
  92
  93 static void __init percpu_setup_exception_stacks(unsigned int cpu)
  94 {
  95         struct exception_stacks *estacks = per_cpu_ptr(&exception_stacks, cpu);
  96         struct cpu_entry_area *cea = get_cpu_entry_area(cpu);
  97         unsigned int npages;
  98
  99         BUILD_BUG_ON(sizeof(exception_stacks) % PAGE_SIZE != 0);
 100
 101         per_cpu(cea_exception_stacks, cpu) = &cea->estacks;
 102
 103         /*
 104          * The exceptions stack mappings in the per cpu area are protected
 105          * by guard pages so each stack must be mapped separately. DB2 is
 106          * not mapped; it just exists to catch triple nesting of #DB.
 107          */
 108         cea_map_stack(DF);
 109         cea_map_stack(NMI);
 110         cea_map_stack(DB1);
 111         cea_map_stack(DB);
 112         cea_map_stack(MCE);
 113 }
 114 #else
 115 static inline void percpu_setup_exception_stacks(unsigned int cpu)
 116 {
 117 #ifdef CONFIG_DOUBLEFAULT
 118         struct cpu_entry_area *cea = get_cpu_entry_area(cpu);
 119
 120         cea_map_percpu_pages(&cea->doublefault_stack,
 121                              &per_cpu(doublefault_stack, cpu), 1, PAGE_KERNEL);
 122 #endif
 123 }
 124 #endif
 125
 126 /* Setup the fixmap mappings only once per-processor */
 127 static void __init setup_cpu_entry_area(unsigned int cpu)
 128 {
 129         struct cpu_entry_area *cea = get_cpu_entry_area(cpu);
 130 #ifdef CONFIG_X86_64
 131         /* On 64-bit systems, we use a read-only fixmap GDT and TSS. */
 132         pgprot_t gdt_prot = PAGE_KERNEL_RO;
 133         pgprot_t tss_prot = PAGE_KERNEL_RO;
 134 #else
 135         /*
 136          * On native 32-bit systems, the GDT cannot be read-only because
 137          * our double fault handler uses a task gate, and entering through
 138          * a task gate needs to change an available TSS to busy.  If the
 139          * GDT is read-only, that will triple fault.  The TSS cannot be
 140          * read-only because the CPU writes to it on task switches.
 141          *
 142          * On Xen PV, the GDT must be read-only because the hypervisor
 143          * requires it.
 144          */
 145         pgprot_t gdt_prot = boot_cpu_has(X86_FEATURE_XENPV) ?
 146                 PAGE_KERNEL_RO : PAGE_KERNEL;
 147         pgprot_t tss_prot = PAGE_KERNEL;
 148 #endif
 149
 150         cea_set_pte(&cea->gdt, get_cpu_gdt_paddr(cpu), gdt_prot);
 151
 152         cea_map_percpu_pages(&cea->entry_stack_page,
 153                              per_cpu_ptr(&entry_stack_storage, cpu), 1,
 154                              PAGE_KERNEL);
 155
 156         /*
 157          * The Intel SDM says (Volume 3, 7.2.1):
 158          *
 159          *  Avoid placing a page boundary in the part of the TSS that the
 160          *  processor reads during a task switch (the first 104 bytes). The
 161          *  processor may not correctly perform address translations if a
 162          *  boundary occurs in this area. During a task switch, the processor
 163          *  reads and writes into the first 104 bytes of each TSS (using
 164          *  contiguous physical addresses beginning with the physical address
 165          *  of the first byte of the TSS). So, after TSS access begins, if
 166          *  part of the 104 bytes is not physically contiguous, the processor
 167          *  will access incorrect information without generating a page-fault
 168          *  exception.
 169          *
 170          * There are also a lot of errata involving the TSS spanning a page
 171          * boundary.  Assert that we're not doing that.
 172          */
 173         BUILD_BUG_ON((offsetof(struct tss_struct, x86_tss) ^
 174                       offsetofend(struct tss_struct, x86_tss)) & PAGE_MASK);
 175         BUILD_BUG_ON(sizeof(struct tss_struct) % PAGE_SIZE != 0);
 176         /*
 177          * VMX changes the host TR limit to 0x67 after a VM exit. This is
 178          * okay, since 0x67 covers the size of struct x86_hw_tss. Make sure
 179          * that this is correct.
 180          */
 181         BUILD_BUG_ON(offsetof(struct tss_struct, x86_tss) != 0);
 182         BUILD_BUG_ON(sizeof(struct x86_hw_tss) != 0x68);
 183
 184         cea_map_percpu_pages(&cea->tss, &per_cpu(cpu_tss_rw, cpu),
 185                              sizeof(struct tss_struct) / PAGE_SIZE, tss_prot);
 186
 187 #ifdef CONFIG_X86_32
 188         per_cpu(cpu_entry_area, cpu) = cea;
 189 #endif
 190
 191         percpu_setup_exception_stacks(cpu);
 192
 193         percpu_setup_debug_store(cpu);
 194 }
 195
 196 static __init void setup_cpu_entry_area_ptes(void)
 197 {
 198 #ifdef CONFIG_X86_32
 199         unsigned long start, end;
 200
 201         /* The +1 is for the readonly IDT: */
 202         BUILD_BUG_ON((CPU_ENTRY_AREA_PAGES+1)*PAGE_SIZE != CPU_ENTRY_AREA_MAP_SIZE);
 203         BUILD_BUG_ON(CPU_ENTRY_AREA_TOTAL_SIZE != CPU_ENTRY_AREA_MAP_SIZE);
 204         BUG_ON(CPU_ENTRY_AREA_BASE & ~PMD_MASK);
 205
 206         start = CPU_ENTRY_AREA_BASE;
 207         end = start + CPU_ENTRY_AREA_MAP_SIZE;
 208
 209         /* Careful here: start + PMD_SIZE might wrap around */
 210         for (; start < end && start >= CPU_ENTRY_AREA_BASE; start += PMD_SIZE)
 211                 populate_extra_pte(start);
 212 #endif
 213 }
 214
 215 void __init setup_cpu_entry_areas(void)
 216 {
 217         unsigned int cpu;
 218
 219         setup_cpu_entry_area_ptes();
 220
 221         for_each_possible_cpu(cpu)
 222                 setup_cpu_entry_area(cpu);
 223
 224         /*
 225          * This is the last essential update to swapper_pgdir which needs
 226          * to be synchronized to initial_page_table on 32bit.
 227          */
 228         sync_initial_page_table();
 229 }