mm, locking: Rework {set,clear,mm}_tlb_flush_pending()

author Peter Zijlstra <peterz@infradead.org>

Wed, 7 Jun 2017 16:05:07 +0000 (18:05 +0200)

committer Ingo Molnar <mingo@kernel.org>

Thu, 10 Aug 2017 10:29:01 +0000 (12:29 +0200)
author Peter Zijlstra <peterz@infradead.org>
Wed, 7 Jun 2017 16:05:07 +0000 (18:05 +0200)
committer Ingo Molnar <mingo@kernel.org>
Thu, 10 Aug 2017 10:29:01 +0000 (12:29 +0200)
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h

index 7f384bb62d8ec6bc7eafa25828b0716be63c7ccb..36ea3cf7d85e625bd5fe4259cfeae4217d5e6be7 100644 (file)
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -531,23 +531,44 @@ static inline cpumask_t *mm_cpumask(struct mm_struct *mm)
   */
  static inline bool mm_tlb_flush_pending(struct mm_struct *mm)
  {
-       barrier();
+       /*
+        * Must be called with PTL held; such that our PTL acquire will have
+        * observed the store from set_tlb_flush_pending().
+        */
         return mm->tlb_flush_pending;
  }
  static inline void set_tlb_flush_pending(struct mm_struct *mm)
  {
         mm->tlb_flush_pending = true;
-
         /*
-        * Guarantee that the tlb_flush_pending store does not leak into the
-        * critical section updating the page tables
+        * The only time this value is relevant is when there are indeed pages
+        * to flush. And we'll only flush pages after changing them, which
+        * requires the PTL.
+        *
+        * So the ordering here is:
+        *
+        *      mm->tlb_flush_pending = true;
+        *      spin_lock(&ptl);
+        *      ...
+        *      set_pte_at();
+        *      spin_unlock(&ptl);
+        *
+        *                              spin_lock(&ptl)
+        *                              mm_tlb_flush_pending();
+        *                              ....
+        *                              spin_unlock(&ptl);
+        *
+        *      flush_tlb_range();
+        *      mm->tlb_flush_pending = false;
+        *
+        * So the =true store is constrained by the PTL unlock, and the =false
+        * store is constrained by the TLB invalidate.
          */
-       smp_mb__before_spinlock();
  }
  /* Clearing is done after a TLB flush, which also provides a barrier. */
  static inline void clear_tlb_flush_pending(struct mm_struct *mm)
  {
-       barrier();
+       /* see set_tlb_flush_pending */
         mm->tlb_flush_pending = false;
  }
  #else
diff --git a/mm/huge_memory.c b/mm/huge_memory.c

index 86975dec0ba160feadfb8aa0d13b8f2be943638d..c76a720b936b8afbe525a4db8b22fcbee2d1736c 100644 (file)
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1410,6 +1410,7 @@ int do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t pmd)
         unsigned long haddr = vmf->address & HPAGE_PMD_MASK;
         int page_nid = -1, this_nid = numa_node_id();
         int target_nid, last_cpupid = -1;
+       bool need_flush = false;
         bool page_locked;
         bool migrated = false;
         bool was_writable;
@@ -1495,11 +1496,30 @@ int do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t pmd)
                 goto clear_pmdnuma;
         }
  
+       /*
+        * Since we took the NUMA fault, we must have observed the !accessible
+        * bit. Make sure all other CPUs agree with that, to avoid them
+        * modifying the page we're about to migrate.
+        *
+        * Must be done under PTL such that we'll observe the relevant
+        * set_tlb_flush_pending().
+        */
+       if (mm_tlb_flush_pending(vma->vm_mm))
+               need_flush = true;
+
         /*
          * Migrate the THP to the requested node, returns with page unlocked
          * and access rights restored.
          */
         spin_unlock(vmf->ptl);
+
+       /*
+        * We are not sure a pending tlb flush here is for a huge page
+        * mapping or not. Hence use the tlb range variant
+        */
+       if (need_flush)
+               flush_tlb_range(vma, haddr, haddr + HPAGE_PMD_SIZE);
+
         migrated = migrate_misplaced_transhuge_page(vma->vm_mm, vma,
                                 vmf->pmd, pmd, vmf->address, page, target_nid);
         if (migrated) {
diff --git a/mm/migrate.c b/mm/migrate.c

index 62767155187356d54d1fa7333ad402e76183ca0b..d68a41da6abb0743d6b09cc49c5c9524463715c3 100644 (file)
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1937,12 +1937,6 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
                 put_page(new_page);
                 goto out_fail;
         }
-       /*
-        * We are not sure a pending tlb flush here is for a huge page
-        * mapping or not. Hence use the tlb range variant
-        */
-       if (mm_tlb_flush_pending(mm))
-               flush_tlb_range(vma, mmun_start, mmun_end);
  
         /* Prepare a page as a migration target */
         __SetPageLocked(new_page);
author	Peter Zijlstra <peterz@infradead.org>
	Wed, 7 Jun 2017 16:05:07 +0000 (18:05 +0200)
committer	Ingo Molnar <mingo@kernel.org>
	Thu, 10 Aug 2017 10:29:01 +0000 (12:29 +0200)
include/linux/mm_types.h		patch \| blob \| history
mm/huge_memory.c		patch \| blob \| history
mm/migrate.c		patch \| blob \| history