]> asedeno.scripts.mit.edu Git - linux.git/blobdiff - mm/madvise.c
mm: thp: make deferred split shrinker memcg aware
[linux.git] / mm / madvise.c
index 968df3aa069fda3ca88121769ddad04e7390fe6e..88babcc384b9d4362b929d1771921dda30a80b67 100644 (file)
 #include <linux/userfaultfd_k.h>
 #include <linux/hugetlb.h>
 #include <linux/falloc.h>
+#include <linux/fadvise.h>
 #include <linux/sched.h>
 #include <linux/ksm.h>
 #include <linux/fs.h>
 #include <linux/file.h>
 #include <linux/blkdev.h>
 #include <linux/backing-dev.h>
+#include <linux/pagewalk.h>
 #include <linux/swap.h>
 #include <linux/swapops.h>
 #include <linux/shmem_fs.h>
@@ -225,19 +227,9 @@ static int swapin_walk_pmd_entry(pmd_t *pmd, unsigned long start,
        return 0;
 }
 
-static void force_swapin_readahead(struct vm_area_struct *vma,
-               unsigned long start, unsigned long end)
-{
-       struct mm_walk walk = {
-               .mm = vma->vm_mm,
-               .pmd_entry = swapin_walk_pmd_entry,
-               .private = vma,
-       };
-
-       walk_page_range(start, end, &walk);
-
-       lru_add_drain();        /* Push any new pages onto the LRU now */
-}
+static const struct mm_walk_ops swapin_walk_ops = {
+       .pmd_entry              = swapin_walk_pmd_entry,
+};
 
 static void force_shm_swapin_readahead(struct vm_area_struct *vma,
                unsigned long start, unsigned long end,
@@ -275,11 +267,13 @@ static long madvise_willneed(struct vm_area_struct *vma,
                             unsigned long start, unsigned long end)
 {
        struct file *file = vma->vm_file;
+       loff_t offset;
 
        *prev = vma;
 #ifdef CONFIG_SWAP
        if (!file) {
-               force_swapin_readahead(vma, start, end);
+               walk_page_range(vma->vm_mm, start, end, &swapin_walk_ops, vma);
+               lru_add_drain(); /* Push any new pages onto the LRU now */
                return 0;
        }
 
@@ -298,12 +292,20 @@ static long madvise_willneed(struct vm_area_struct *vma,
                return 0;
        }
 
-       start = ((start - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
-       if (end > vma->vm_end)
-               end = vma->vm_end;
-       end = ((end - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
-
-       force_page_cache_readahead(file->f_mapping, file, start, end - start);
+       /*
+        * Filesystem's fadvise may need to take various locks.  We need to
+        * explicitly grab a reference because the vma (and hence the
+        * vma's reference to the file) can go away as soon as we drop
+        * mmap_sem.
+        */
+       *prev = NULL;   /* tell sys_madvise we drop mmap_sem */
+       get_file(file);
+       up_read(&current->mm->mmap_sem);
+       offset = (loff_t)(start - vma->vm_start)
+                       + ((loff_t)vma->vm_pgoff << PAGE_SHIFT);
+       vfs_fadvise(file, offset, end - start, POSIX_FADV_WILLNEED);
+       fput(file);
+       down_read(&current->mm->mmap_sem);
        return 0;
 }
 
@@ -440,20 +442,9 @@ static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr,
        return 0;
 }
 
-static void madvise_free_page_range(struct mmu_gather *tlb,
-                            struct vm_area_struct *vma,
-                            unsigned long addr, unsigned long end)
-{
-       struct mm_walk free_walk = {
-               .pmd_entry = madvise_free_pte_range,
-               .mm = vma->vm_mm,
-               .private = tlb,
-       };
-
-       tlb_start_vma(tlb, vma);
-       walk_page_range(addr, end, &free_walk);
-       tlb_end_vma(tlb, vma);
-}
+static const struct mm_walk_ops madvise_free_walk_ops = {
+       .pmd_entry              = madvise_free_pte_range,
+};
 
 static int madvise_free_single_vma(struct vm_area_struct *vma,
                        unsigned long start_addr, unsigned long end_addr)
@@ -480,7 +471,10 @@ static int madvise_free_single_vma(struct vm_area_struct *vma,
        update_hiwater_rss(mm);
 
        mmu_notifier_invalidate_range_start(&range);
-       madvise_free_page_range(&tlb, vma, range.start, range.end);
+       tlb_start_vma(&tlb, vma);
+       walk_page_range(vma->vm_mm, range.start, range.end,
+                       &madvise_free_walk_ops, &tlb);
+       tlb_end_vma(&tlb, vma);
        mmu_notifier_invalidate_range_end(&range);
        tlb_finish_mmu(&tlb, range.start, range.end);