]> asedeno.scripts.mit.edu Git - linux.git/commitdiff
Merge branch 'linus' into locking/core, to resolve conflicts
authorIngo Molnar <mingo@kernel.org>
Fri, 11 Aug 2017 11:51:59 +0000 (13:51 +0200)
committerIngo Molnar <mingo@kernel.org>
Fri, 11 Aug 2017 11:51:59 +0000 (13:51 +0200)
 Conflicts:
include/linux/mm_types.h
mm/huge_memory.c

I removed the smp_mb__before_spinlock() like the following commit does:

  8b1b436dd1cc ("mm, locking: Rework {set,clear,mm}_tlb_flush_pending()")

and fixed up the affected commits.

Signed-off-by: Ingo Molnar <mingo@kernel.org>
1  2 
fs/userfaultfd.c
include/linux/mm_types.h
kernel/fork.c
mm/huge_memory.c
mm/page_alloc.c
net/ipv4/udp.c

diff --combined fs/userfaultfd.c
index 44fcbefd84a2008209bf48ecdd2321799ca452f0,b0d5897bc4e6d0e019c79f65b6d41df1d3b0d050..886085b47c75e6914cb1084cb4e8ec681af92945
@@@ -109,24 -109,27 +109,24 @@@ static int userfaultfd_wake_function(wa
                goto out;
        WRITE_ONCE(uwq->waken, true);
        /*
 -       * The implicit smp_mb__before_spinlock in try_to_wake_up()
 -       * renders uwq->waken visible to other CPUs before the task is
 -       * waken.
 +       * The Program-Order guarantees provided by the scheduler
 +       * ensure uwq->waken is visible before the task is woken.
         */
        ret = wake_up_state(wq->private, mode);
 -      if (ret)
 +      if (ret) {
                /*
                 * Wake only once, autoremove behavior.
                 *
 -               * After the effect of list_del_init is visible to the
 -               * other CPUs, the waitqueue may disappear from under
 -               * us, see the !list_empty_careful() in
 -               * handle_userfault(). try_to_wake_up() has an
 -               * implicit smp_mb__before_spinlock, and the
 -               * wq->private is read before calling the extern
 -               * function "wake_up_state" (which in turns calls
 -               * try_to_wake_up). While the spin_lock;spin_unlock;
 -               * wouldn't be enough, the smp_mb__before_spinlock is
 -               * enough to avoid an explicit smp_mb() here.
 +               * After the effect of list_del_init is visible to the other
 +               * CPUs, the waitqueue may disappear from under us, see the
 +               * !list_empty_careful() in handle_userfault().
 +               *
 +               * try_to_wake_up() has an implicit smp_mb(), and the
 +               * wq->private is read before calling the extern function
 +               * "wake_up_state" (which in turns calls try_to_wake_up).
                 */
                list_del_init(&wq->entry);
 +      }
  out:
        return ret;
  }
@@@ -1597,7 -1600,7 +1597,7 @@@ static int userfaultfd_copy(struct user
                                   uffdio_copy.len);
                mmput(ctx->mm);
        } else {
-               return -ENOSPC;
+               return -ESRCH;
        }
        if (unlikely(put_user(ret, &user_uffdio_copy->copy)))
                return -EFAULT;
@@@ -1644,7 -1647,7 +1644,7 @@@ static int userfaultfd_zeropage(struct 
                                     uffdio_zeropage.range.len);
                mmput(ctx->mm);
        } else {
-               return -ENOSPC;
+               return -ESRCH;
        }
        if (unlikely(put_user(ret, &user_uffdio_zeropage->zeropage)))
                return -EFAULT;
diff --combined include/linux/mm_types.h
index 36ea3cf7d85e625bd5fe4259cfeae4217d5e6be7,3cadee0a350889f748e7b1a999b449ae003e9c3f..dc1edec05a3fa7459cd34d3ac934478daecb7fdb
@@@ -487,14 -487,12 +487,12 @@@ struct mm_struct 
        /* numa_scan_seq prevents two threads setting pte_numa */
        int numa_scan_seq;
  #endif
- #if defined(CONFIG_NUMA_BALANCING) || defined(CONFIG_COMPACTION)
        /*
         * An operation with batched TLB flushing is going on. Anything that
         * can move process memory needs to flush the TLB when moving a
         * PROT_NONE or PROT_NUMA mapped page.
         */
-       bool tlb_flush_pending;
- #endif
+       atomic_t tlb_flush_pending;
  #ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
        /* See flush_tlb_batched_pending() */
        bool tlb_flush_batched;
@@@ -522,67 -520,60 +520,83 @@@ static inline cpumask_t *mm_cpumask(str
        return mm->cpu_vm_mask_var;
  }
  
- #if defined(CONFIG_NUMA_BALANCING) || defined(CONFIG_COMPACTION)
+ struct mmu_gather;
+ extern void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
+                               unsigned long start, unsigned long end);
+ extern void tlb_finish_mmu(struct mmu_gather *tlb,
+                               unsigned long start, unsigned long end);
  /*
   * Memory barriers to keep this state in sync are graciously provided by
   * the page table locks, outside of which no page table modifications happen.
-  * The barriers below prevent the compiler from re-ordering the instructions
-  * around the memory barriers that are already present in the code.
+  * The barriers are used to ensure the order between tlb_flush_pending updates,
+  * which happen while the lock is not taken, and the PTE updates, which happen
+  * while the lock is taken, are serialized.
   */
  static inline bool mm_tlb_flush_pending(struct mm_struct *mm)
  {
-       return mm->tlb_flush_pending;
 +      /*
 +       * Must be called with PTL held; such that our PTL acquire will have
 +       * observed the store from set_tlb_flush_pending().
 +       */
+       return atomic_read(&mm->tlb_flush_pending) > 0;
+ }
+ /*
+  * Returns true if there are two above TLB batching threads in parallel.
+  */
+ static inline bool mm_tlb_flush_nested(struct mm_struct *mm)
+ {
+       return atomic_read(&mm->tlb_flush_pending) > 1;
+ }
+ static inline void init_tlb_flush_pending(struct mm_struct *mm)
+ {
+       atomic_set(&mm->tlb_flush_pending, 0);
  }
- static inline void set_tlb_flush_pending(struct mm_struct *mm)
+ static inline void inc_tlb_flush_pending(struct mm_struct *mm)
  {
-       mm->tlb_flush_pending = true;
+       atomic_inc(&mm->tlb_flush_pending);
        /*
 -       * Guarantee that the tlb_flush_pending increase does not leak into the
 -       * critical section updating the page tables
 +       * The only time this value is relevant is when there are indeed pages
 +       * to flush. And we'll only flush pages after changing them, which
 +       * requires the PTL.
 +       *
 +       * So the ordering here is:
 +       *
-        *      mm->tlb_flush_pending = true;
++       *      atomic_inc(&mm->tlb_flush_pending);
 +       *      spin_lock(&ptl);
 +       *      ...
 +       *      set_pte_at();
 +       *      spin_unlock(&ptl);
 +       *
 +       *                              spin_lock(&ptl)
 +       *                              mm_tlb_flush_pending();
 +       *                              ....
 +       *                              spin_unlock(&ptl);
 +       *
 +       *      flush_tlb_range();
-        *      mm->tlb_flush_pending = false;
++       *      atomic_dec(&mm->tlb_flush_pending);
 +       *
 +       * So the =true store is constrained by the PTL unlock, and the =false
 +       * store is constrained by the TLB invalidate.
         */
 -      smp_mb__before_spinlock();
  }
  /* Clearing is done after a TLB flush, which also provides a barrier. */
- static inline void clear_tlb_flush_pending(struct mm_struct *mm)
- {
-       /* see set_tlb_flush_pending */
-       mm->tlb_flush_pending = false;
- }
- #else
- static inline bool mm_tlb_flush_pending(struct mm_struct *mm)
- {
-       return false;
- }
- static inline void set_tlb_flush_pending(struct mm_struct *mm)
- {
- }
- static inline void clear_tlb_flush_pending(struct mm_struct *mm)
+ static inline void dec_tlb_flush_pending(struct mm_struct *mm)
  {
+       /*
+        * Guarantee that the tlb_flush_pending does not not leak into the
+        * critical section, since we must order the PTE change and changes to
+        * the pending TLB flush indication. We could have relied on TLB flush
+        * as a memory barrier, but this behavior is not clearly documented.
+        */
+       smp_mb__before_atomic();
+       atomic_dec(&mm->tlb_flush_pending);
  }
- #endif
  
  struct vm_fault;
  
diff --combined kernel/fork.c
index cbf2221ee81a45dfd94be55f38f4c38fdece04ca,e075b7780421dee1d8243b9dc178248398c5f189..5fc09911fbb937bf01ab303f7ea7a94528c3d1d5
@@@ -484,8 -484,6 +484,8 @@@ void __init fork_init(void
        cpuhp_setup_state(CPUHP_BP_PREPARE_DYN, "fork:vm_stack_cache",
                          NULL, free_vm_stack_cache);
  #endif
 +
 +      lockdep_init_task(&init_task);
  }
  
  int __weak arch_dup_task_struct(struct task_struct *dst,
@@@ -809,7 -807,7 +809,7 @@@ static struct mm_struct *mm_init(struc
        mm_init_aio(mm);
        mm_init_owner(mm, p);
        mmu_notifier_mm_init(mm);
-       clear_tlb_flush_pending(mm);
+       init_tlb_flush_pending(mm);
  #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS
        mm->pmd_huge_pte = NULL;
  #endif
@@@ -1693,7 -1691,6 +1693,7 @@@ static __latent_entropy struct task_str
        p->lockdep_depth = 0; /* no locks held yet */
        p->curr_chain_key = 0;
        p->lockdep_recursion = 0;
 +      lockdep_init_task(p);
  #endif
  
  #ifdef CONFIG_DEBUG_MUTEXES
@@@ -1952,7 -1949,6 +1952,7 @@@ bad_fork_cleanup_audit
  bad_fork_cleanup_perf:
        perf_event_free_task(p);
  bad_fork_cleanup_policy:
 +      lockdep_free_task(p);
  #ifdef CONFIG_NUMA
        mpol_put(p->mempolicy);
  bad_fork_cleanup_threadgroup_lock:
diff --combined mm/huge_memory.c
index c76a720b936b8afbe525a4db8b22fcbee2d1736c,216114f6ef0b7f8c09378edd3615d6a39527ead0..ce883459e2466d70b8484d529f75697efdde96c4
@@@ -1410,7 -1410,6 +1410,7 @@@ int do_huge_pmd_numa_page(struct vm_fau
        unsigned long haddr = vmf->address & HPAGE_PMD_MASK;
        int page_nid = -1, this_nid = numa_node_id();
        int target_nid, last_cpupid = -1;
 +      bool need_flush = false;
        bool page_locked;
        bool migrated = false;
        bool was_writable;
                goto clear_pmdnuma;
        }
  
+       /*
+        * The page_table_lock above provides a memory barrier
+        * with change_protection_range.
+        */
+       if (mm_tlb_flush_pending(vma->vm_mm))
+               flush_tlb_range(vma, haddr, haddr + HPAGE_PMD_SIZE);
 +      /*
 +       * Since we took the NUMA fault, we must have observed the !accessible
 +       * bit. Make sure all other CPUs agree with that, to avoid them
 +       * modifying the page we're about to migrate.
 +       *
 +       * Must be done under PTL such that we'll observe the relevant
 +       * set_tlb_flush_pending().
 +       */
 +      if (mm_tlb_flush_pending(vma->vm_mm))
 +              need_flush = true;
 +
        /*
         * Migrate the THP to the requested node, returns with page unlocked
         * and access rights restored.
         */
        spin_unlock(vmf->ptl);
 +
 +      /*
 +       * We are not sure a pending tlb flush here is for a huge page
 +       * mapping or not. Hence use the tlb range variant
 +       */
 +      if (need_flush)
 +              flush_tlb_range(vma, haddr, haddr + HPAGE_PMD_SIZE);
 +
        migrated = migrate_misplaced_transhuge_page(vma->vm_mm, vma,
                                vmf->pmd, pmd, vmf->address, page, target_nid);
        if (migrated) {
diff --combined mm/page_alloc.c
index c20d8960180261cd96adc483519fd0ad1fe674d2,6d00f746c2fd96452661fde3f704289eed7f1f70..6acf612fdd990005e4a2dd3f05f0105ddf4f1513
@@@ -66,7 -66,6 +66,7 @@@
  #include <linux/kthread.h>
  #include <linux/memcontrol.h>
  #include <linux/ftrace.h>
 +#include <linux/lockdep.h>
  
  #include <asm/sections.h>
  #include <asm/tlbflush.h>
@@@ -3491,47 -3490,6 +3491,47 @@@ should_compact_retry(struct alloc_conte
  }
  #endif /* CONFIG_COMPACTION */
  
 +#ifdef CONFIG_LOCKDEP
 +struct lockdep_map __fs_reclaim_map =
 +      STATIC_LOCKDEP_MAP_INIT("fs_reclaim", &__fs_reclaim_map);
 +
 +static bool __need_fs_reclaim(gfp_t gfp_mask)
 +{
 +      gfp_mask = current_gfp_context(gfp_mask);
 +
 +      /* no reclaim without waiting on it */
 +      if (!(gfp_mask & __GFP_DIRECT_RECLAIM))
 +              return false;
 +
 +      /* this guy won't enter reclaim */
 +      if ((current->flags & PF_MEMALLOC) && !(gfp_mask & __GFP_NOMEMALLOC))
 +              return false;
 +
 +      /* We're only interested __GFP_FS allocations for now */
 +      if (!(gfp_mask & __GFP_FS))
 +              return false;
 +
 +      if (gfp_mask & __GFP_NOLOCKDEP)
 +              return false;
 +
 +      return true;
 +}
 +
 +void fs_reclaim_acquire(gfp_t gfp_mask)
 +{
 +      if (__need_fs_reclaim(gfp_mask))
 +              lock_map_acquire(&__fs_reclaim_map);
 +}
 +EXPORT_SYMBOL_GPL(fs_reclaim_acquire);
 +
 +void fs_reclaim_release(gfp_t gfp_mask)
 +{
 +      if (__need_fs_reclaim(gfp_mask))
 +              lock_map_release(&__fs_reclaim_map);
 +}
 +EXPORT_SYMBOL_GPL(fs_reclaim_release);
 +#endif
 +
  /* Perform direct synchronous page reclaim */
  static int
  __perform_reclaim(gfp_t gfp_mask, unsigned int order,
        /* We now go into synchronous reclaim */
        cpuset_memory_pressure_bump();
        noreclaim_flag = memalloc_noreclaim_save();
 -      lockdep_set_current_reclaim_state(gfp_mask);
 +      fs_reclaim_acquire(gfp_mask);
        reclaim_state.reclaimed_slab = 0;
        current->reclaim_state = &reclaim_state;
  
                                                                ac->nodemask);
  
        current->reclaim_state = NULL;
 -      lockdep_clear_current_reclaim_state();
 +      fs_reclaim_release(gfp_mask);
        memalloc_noreclaim_restore(noreclaim_flag);
  
        cond_resched();
@@@ -4083,8 -4041,7 +4083,8 @@@ static inline bool prepare_alloc_pages(
                        *alloc_flags |= ALLOC_CPUSET;
        }
  
 -      lockdep_trace_alloc(gfp_mask);
 +      fs_reclaim_acquire(gfp_mask);
 +      fs_reclaim_release(gfp_mask);
  
        might_sleep_if(gfp_mask & __GFP_DIRECT_RECLAIM);
  
@@@ -4501,8 -4458,9 +4501,9 @@@ long si_mem_available(void
         * Part of the reclaimable slab consists of items that are in use,
         * and cannot be freed. Cap this estimate at the low watermark.
         */
-       available += global_page_state(NR_SLAB_RECLAIMABLE) -
-                    min(global_page_state(NR_SLAB_RECLAIMABLE) / 2, wmark_low);
+       available += global_node_page_state(NR_SLAB_RECLAIMABLE) -
+                    min(global_node_page_state(NR_SLAB_RECLAIMABLE) / 2,
+                        wmark_low);
  
        if (available < 0)
                available = 0;
@@@ -4645,8 -4603,8 +4646,8 @@@ void show_free_areas(unsigned int filte
                global_node_page_state(NR_FILE_DIRTY),
                global_node_page_state(NR_WRITEBACK),
                global_node_page_state(NR_UNSTABLE_NFS),
-               global_page_state(NR_SLAB_RECLAIMABLE),
-               global_page_state(NR_SLAB_UNRECLAIMABLE),
+               global_node_page_state(NR_SLAB_RECLAIMABLE),
+               global_node_page_state(NR_SLAB_UNRECLAIMABLE),
                global_node_page_state(NR_FILE_MAPPED),
                global_node_page_state(NR_SHMEM),
                global_page_state(NR_PAGETABLE),
@@@ -7711,7 -7669,7 +7712,7 @@@ int alloc_contig_range(unsigned long st
  
        /* Make sure the range is really isolated. */
        if (test_pages_isolated(outer_start, end, false)) {
-               pr_info("%s: [%lx, %lx) PFNs busy\n",
+               pr_info_ratelimited("%s: [%lx, %lx) PFNs busy\n",
                        __func__, outer_start, end);
                ret = -EBUSY;
                goto done;
diff --combined net/ipv4/udp.c
index 3037339ed8b8a1cbf5673d204f78e1844073cb56,a7c804f73990a0610bc85c02fc2dd76858973c22..b34f09b20fef1f69fd2d95d776e572eed6485a04
@@@ -802,7 -802,7 +802,7 @@@ static int udp_send_skb(struct sk_buff 
        if (is_udplite)                                  /*     UDP-Lite      */
                csum = udplite_csum(skb);
  
-       else if (sk->sk_no_check_tx) {   /* UDP csum disabled */
+       else if (sk->sk_no_check_tx && !skb_is_gso(skb)) {   /* UDP csum off */
  
                skb->ip_summed = CHECKSUM_NONE;
                goto send;
@@@ -1809,7 -1809,8 +1809,7 @@@ static int __udp_queue_rcv_skb(struct s
  static struct static_key udp_encap_needed __read_mostly;
  void udp_encap_enable(void)
  {
 -      if (!static_key_enabled(&udp_encap_needed))
 -              static_key_slow_inc(&udp_encap_needed);
 +      static_key_enable(&udp_encap_needed);
  }
  EXPORT_SYMBOL(udp_encap_enable);