Merge tag 'mlx5-fixes-2019-10-18' of git://git.kernel.org/pub/scm/linux/kernel/git...

[linux.git] / mm / gup.c
diff --git a/mm/gup.c b/mm/gup.c

index 98f13ab37bacc1d206760e6e6ab554a2536ff7a7..8f236a335ae9d25a81665f282f7ef9ccba7802dc 100644 (file)
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -29,85 +29,70 @@ struct follow_page_context {
         unsigned int page_mask;
  };
  
-typedef int (*set_dirty_func_t)(struct page *page);
-
-static void __put_user_pages_dirty(struct page **pages,
-                                  unsigned long npages,
-                                  set_dirty_func_t sdf)
-{
-       unsigned long index;
-
-       for (index = 0; index < npages; index++) {
-               struct page *page = compound_head(pages[index]);
-
-               /*
-                * Checking PageDirty at this point may race with
-                * clear_page_dirty_for_io(), but that's OK. Two key cases:
-                *
-                * 1) This code sees the page as already dirty, so it skips
-                * the call to sdf(). That could happen because
-                * clear_page_dirty_for_io() called page_mkclean(),
-                * followed by set_page_dirty(). However, now the page is
-                * going to get written back, which meets the original
-                * intention of setting it dirty, so all is well:
-                * clear_page_dirty_for_io() goes on to call
-                * TestClearPageDirty(), and write the page back.
-                *
-                * 2) This code sees the page as clean, so it calls sdf().
-                * The page stays dirty, despite being written back, so it
-                * gets written back again in the next writeback cycle.
-                * This is harmless.
-                */
-               if (!PageDirty(page))
-                       sdf(page);
-
-               put_user_page(page);
-       }
-}
-
  /**
- * put_user_pages_dirty() - release and dirty an array of gup-pinned pages
- * @pages:  array of pages to be marked dirty and released.
+ * put_user_pages_dirty_lock() - release and optionally dirty gup-pinned pages
+ * @pages:  array of pages to be maybe marked dirty, and definitely released.
   * @npages: number of pages in the @pages array.
+ * @make_dirty: whether to mark the pages dirty
   *
   * "gup-pinned page" refers to a page that has had one of the get_user_pages()
   * variants called on that page.
   *
   * For each page in the @pages array, make that page (or its head page, if a
- * compound page) dirty, if it was previously listed as clean. Then, release
- * the page using put_user_page().
+ * compound page) dirty, if @make_dirty is true, and if the page was previously
+ * listed as clean. In any case, releases all pages using put_user_page(),
+ * possibly via put_user_pages(), for the non-dirty case.
   *
   * Please see the put_user_page() documentation for details.
   *
- * set_page_dirty(), which does not lock the page, is used here.
- * Therefore, it is the caller's responsibility to ensure that this is
- * safe. If not, then put_user_pages_dirty_lock() should be called instead.
+ * set_page_dirty_lock() is used internally. If instead, set_page_dirty() is
+ * required, then the caller should a) verify that this is really correct,
+ * because _lock() is usually required, and b) hand code it:
+ * set_page_dirty_lock(), put_user_page().
   *
   */
-void put_user_pages_dirty(struct page **pages, unsigned long npages)
+void put_user_pages_dirty_lock(struct page **pages, unsigned long npages,
+                              bool make_dirty)
  {
-       __put_user_pages_dirty(pages, npages, set_page_dirty);
-}
-EXPORT_SYMBOL(put_user_pages_dirty);
+       unsigned long index;
  
-/**
- * put_user_pages_dirty_lock() - release and dirty an array of gup-pinned pages
- * @pages:  array of pages to be marked dirty and released.
- * @npages: number of pages in the @pages array.
- *
- * For each page in the @pages array, make that page (or its head page, if a
- * compound page) dirty, if it was previously listed as clean. Then, release
- * the page using put_user_page().
- *
- * Please see the put_user_page() documentation for details.
- *
- * This is just like put_user_pages_dirty(), except that it invokes
- * set_page_dirty_lock(), instead of set_page_dirty().
- *
- */
-void put_user_pages_dirty_lock(struct page **pages, unsigned long npages)
-{
-       __put_user_pages_dirty(pages, npages, set_page_dirty_lock);
+       /*
+        * TODO: this can be optimized for huge pages: if a series of pages is
+        * physically contiguous and part of the same compound page, then a
+        * single operation to the head page should suffice.
+        */
+
+       if (!make_dirty) {
+               put_user_pages(pages, npages);
+               return;
+       }
+
+       for (index = 0; index < npages; index++) {
+               struct page *page = compound_head(pages[index]);
+               /*
+                * Checking PageDirty at this point may race with
+                * clear_page_dirty_for_io(), but that's OK. Two key
+                * cases:
+                *
+                * 1) This code sees the page as already dirty, so it
+                * skips the call to set_page_dirty(). That could happen
+                * because clear_page_dirty_for_io() called
+                * page_mkclean(), followed by set_page_dirty().
+                * However, now the page is going to get written back,
+                * which meets the original intention of setting it
+                * dirty, so all is well: clear_page_dirty_for_io() goes
+                * on to call TestClearPageDirty(), and write the page
+                * back.
+                *
+                * 2) This code sees the page as clean, so it calls
+                * set_page_dirty(). The page stays dirty, despite being
+                * written back, so it gets written back again in the
+                * next writeback cycle. This is harmless.
+                */
+               if (!PageDirty(page))
+                       set_page_dirty_lock(page);
+               put_user_page(page);
+       }
  }
  EXPORT_SYMBOL(put_user_pages_dirty_lock);
  
@@ -399,7 +384,7 @@ static struct page *follow_pmd_mask(struct vm_area_struct *vma,
                 spin_unlock(ptl);
                 return follow_page_pte(vma, address, pmd, flags, &ctx->pgmap);
         }
-       if (flags & FOLL_SPLIT) {
+       if (flags & (FOLL_SPLIT | FOLL_SPLIT_PMD)) {
                 int ret;
                 page = pmd_page(*pmd);
                 if (is_huge_zero_page(page)) {
@@ -408,7 +393,7 @@ static struct page *follow_pmd_mask(struct vm_area_struct *vma,
                         split_huge_pmd(vma, pmd, address);
                         if (pmd_trans_unstable(pmd))
                                 ret = -EBUSY;
-               } else {
+               } else if (flags & FOLL_SPLIT) {
                         if (unlikely(!try_get_page(page))) {
                                 spin_unlock(ptl);
                                 return ERR_PTR(-ENOMEM);
@@ -420,6 +405,10 @@ static struct page *follow_pmd_mask(struct vm_area_struct *vma,
                         put_page(page);
                         if (pmd_none(*pmd))
                                 return no_page_table(vma, flags);
+               } else {  /* flags & FOLL_SPLIT_PMD */
+                       spin_unlock(ptl);
+                       split_huge_pmd(vma, pmd, address);
+                       ret = pte_alloc(mm, pmd) ? -ENOMEM : 0;
                 }
  
                 return ret ? ERR_PTR(ret) :
@@ -799,6 +788,8 @@ static long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
         if (!nr_pages)
                 return 0;
  
+       start = untagged_addr(start);
+
         VM_BUG_ON(!!pages != !!(gup_flags & FOLL_GET));
  
         /*
@@ -961,6 +952,8 @@ int fixup_user_fault(struct task_struct *tsk, struct mm_struct *mm,
         struct vm_area_struct *vma;
         vm_fault_t ret, major = 0;
  
+       address = untagged_addr(address);
+
         if (unlocked)
                 fault_flags |= FAULT_FLAG_ALLOW_RETRY;
  
@@ -1460,7 +1453,7 @@ static long check_and_migrate_cma_pages(struct task_struct *tsk,
                  * gup may start from a tail page. Advance step by the left
                  * part.
                  */
-               step = (1 << compound_order(head)) - (pages[i] - head);
+               step = compound_nr(head) - (pages[i] - head);
                 /*
                  * If we get a page from the CMA zone, since we are going to
                  * be pinning these entries, we might as well move them out
@@ -1980,7 +1973,8 @@ static unsigned long hugepte_addr_end(unsigned long addr, unsigned long end,
  }
  
  static int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
-                      unsigned long end, int write, struct page **pages, int *nr)
+                      unsigned long end, unsigned int flags,
+                      struct page **pages, int *nr)
  {
         unsigned long pte_end;
         struct page *head, *page;
@@ -1993,7 +1987,7 @@ static int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
  
         pte = READ_ONCE(*ptep);
  
-       if (!pte_access_permitted(pte, write))
+       if (!pte_access_permitted(pte, flags & FOLL_WRITE))
                 return 0;
  
         /* hugepages are never "special" */
@@ -2030,7 +2024,7 @@ static int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
  }
  
  static int gup_huge_pd(hugepd_t hugepd, unsigned long addr,
-               unsigned int pdshift, unsigned long end, int write,
+               unsigned int pdshift, unsigned long end, unsigned int flags,
                 struct page **pages, int *nr)
  {
         pte_t *ptep;
@@ -2040,7 +2034,7 @@ static int gup_huge_pd(hugepd_t hugepd, unsigned long addr,
         ptep = hugepte_offset(hugepd, addr, pdshift);
         do {
                 next = hugepte_addr_end(addr, end, sz);
-               if (!gup_hugepte(ptep, sz, addr, end, write, pages, nr))
+               if (!gup_hugepte(ptep, sz, addr, end, flags, pages, nr))
                         return 0;
         } while (ptep++, addr = next, addr != end);
  
@@ -2048,7 +2042,7 @@ static int gup_huge_pd(hugepd_t hugepd, unsigned long addr,
  }
  #else
  static inline int gup_huge_pd(hugepd_t hugepd, unsigned long addr,
-               unsigned pdshift, unsigned long end, int write,
+               unsigned int pdshift, unsigned long end, unsigned int flags,
                 struct page **pages, int *nr)
  {
         return 0;
@@ -2056,7 +2050,8 @@ static inline int gup_huge_pd(hugepd_t hugepd, unsigned long addr,
  #endif /* CONFIG_ARCH_HAS_HUGEPD */
  
  static int gup_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr,
-               unsigned long end, unsigned int flags, struct page **pages, int *nr)
+                       unsigned long end, unsigned int flags,
+                       struct page **pages, int *nr)
  {
         struct page *head, *page;
         int refs;