]> asedeno.scripts.mit.edu Git - linux.git/blobdiff - mm/filemap.c
Merge branch 'irq-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...
[linux.git] / mm / filemap.c
index 6b36516bc31d85bb79db543e354734306381854e..81adec8ee02cc3bdb765625e28c3d765f203e512 100644 (file)
@@ -36,6 +36,8 @@
 #include <linux/cleancache.h>
 #include <linux/shmem_fs.h>
 #include <linux/rmap.h>
+#include <linux/delayacct.h>
+#include <linux/psi.h>
 #include "internal.h"
 
 #define CREATE_TRACE_POINTS
@@ -894,12 +896,9 @@ int add_to_page_cache_lru(struct page *page, struct address_space *mapping,
                 * data from the working set, only to cache data that will
                 * get overwritten with something else, is a waste of memory.
                 */
-               if (!(gfp_mask & __GFP_WRITE) &&
-                   shadow && workingset_refault(shadow)) {
-                       SetPageActive(page);
-                       workingset_activation(page);
-               } else
-                       ClearPageActive(page);
+               WARN_ON_ONCE(PageActive(page));
+               if (!(gfp_mask & __GFP_WRITE) && shadow)
+                       workingset_refault(page, shadow);
                lru_cache_add(page);
        }
        return ret;
@@ -1055,8 +1054,18 @@ static inline int wait_on_page_bit_common(wait_queue_head_t *q,
 {
        struct wait_page_queue wait_page;
        wait_queue_entry_t *wait = &wait_page.wait;
+       bool thrashing = false;
+       unsigned long pflags;
        int ret = 0;
 
+       if (bit_nr == PG_locked &&
+           !PageUptodate(page) && PageWorkingset(page)) {
+               if (!PageSwapBacked(page))
+                       delayacct_thrashing_start();
+               psi_memstall_enter(&pflags);
+               thrashing = true;
+       }
+
        init_wait(wait);
        wait->flags = lock ? WQ_FLAG_EXCLUSIVE : 0;
        wait->func = wake_page_function;
@@ -1095,6 +1104,12 @@ static inline int wait_on_page_bit_common(wait_queue_head_t *q,
 
        finish_wait(q, wait);
 
+       if (thrashing) {
+               if (!PageSwapBacked(page))
+                       delayacct_thrashing_end();
+               psi_memstall_leave(&pflags);
+       }
+
        /*
         * A signal could leave PageWaiters set. Clearing it here if
         * !waitqueue_active would be possible (by open-coding finish_wait),
@@ -2034,7 +2049,7 @@ static ssize_t generic_file_buffered_read(struct kiocb *iocb,
                                        !mapping->a_ops->is_partially_uptodate)
                                goto page_not_up_to_date;
                        /* pipes can't handle partially uptodate pages */
-                       if (unlikely(iter->type & ITER_PIPE))
+                       if (unlikely(iov_iter_is_pipe(iter)))
                                goto page_not_up_to_date;
                        if (!trylock_page(page))
                                goto page_not_up_to_date;
@@ -2493,9 +2508,7 @@ vm_fault_t filemap_fault(struct vm_fault *vmf)
         * system is low on memory, or a problem occurs while trying
         * to schedule I/O.
         */
-       if (error == -ENOMEM)
-               return VM_FAULT_OOM;
-       return VM_FAULT_SIGBUS;
+       return vmf_error(error);
 
 page_not_uptodate:
        /*
@@ -2644,9 +2657,9 @@ int generic_file_readonly_mmap(struct file *file, struct vm_area_struct *vma)
        return generic_file_mmap(file, vma);
 }
 #else
-int filemap_page_mkwrite(struct vm_fault *vmf)
+vm_fault_t filemap_page_mkwrite(struct vm_fault *vmf)
 {
-       return -ENOSYS;
+       return VM_FAULT_SIGBUS;
 }
 int generic_file_mmap(struct file * file, struct vm_area_struct * vma)
 {
@@ -2811,6 +2824,42 @@ struct page *read_cache_page_gfp(struct address_space *mapping,
 }
 EXPORT_SYMBOL(read_cache_page_gfp);
 
+/*
+ * Don't operate on ranges the page cache doesn't support, and don't exceed the
+ * LFS limits.  If pos is under the limit it becomes a short access.  If it
+ * exceeds the limit we return -EFBIG.
+ */
+static int generic_access_check_limits(struct file *file, loff_t pos,
+                                      loff_t *count)
+{
+       struct inode *inode = file->f_mapping->host;
+       loff_t max_size = inode->i_sb->s_maxbytes;
+
+       if (!(file->f_flags & O_LARGEFILE))
+               max_size = MAX_NON_LFS;
+
+       if (unlikely(pos >= max_size))
+               return -EFBIG;
+       *count = min(*count, max_size - pos);
+       return 0;
+}
+
+static int generic_write_check_limits(struct file *file, loff_t pos,
+                                     loff_t *count)
+{
+       loff_t limit = rlimit(RLIMIT_FSIZE);
+
+       if (limit != RLIM_INFINITY) {
+               if (pos >= limit) {
+                       send_sig(SIGXFSZ, current, 0);
+                       return -EFBIG;
+               }
+               *count = min(*count, limit - pos);
+       }
+
+       return generic_access_check_limits(file, pos, count);
+}
+
 /*
  * Performs necessary checks before doing a write
  *
@@ -2822,8 +2871,8 @@ inline ssize_t generic_write_checks(struct kiocb *iocb, struct iov_iter *from)
 {
        struct file *file = iocb->ki_filp;
        struct inode *inode = file->f_mapping->host;
-       unsigned long limit = rlimit(RLIMIT_FSIZE);
-       loff_t pos;
+       loff_t count;
+       int ret;
 
        if (!iov_iter_count(from))
                return 0;
@@ -2832,43 +2881,99 @@ inline ssize_t generic_write_checks(struct kiocb *iocb, struct iov_iter *from)
        if (iocb->ki_flags & IOCB_APPEND)
                iocb->ki_pos = i_size_read(inode);
 
-       pos = iocb->ki_pos;
-
        if ((iocb->ki_flags & IOCB_NOWAIT) && !(iocb->ki_flags & IOCB_DIRECT))
                return -EINVAL;
 
-       if (limit != RLIM_INFINITY) {
-               if (iocb->ki_pos >= limit) {
-                       send_sig(SIGXFSZ, current, 0);
-                       return -EFBIG;
-               }
-               iov_iter_truncate(from, limit - (unsigned long)pos);
-       }
+       count = iov_iter_count(from);
+       ret = generic_write_check_limits(file, iocb->ki_pos, &count);
+       if (ret)
+               return ret;
+
+       iov_iter_truncate(from, count);
+       return iov_iter_count(from);
+}
+EXPORT_SYMBOL(generic_write_checks);
+
+/*
+ * Performs necessary checks before doing a clone.
+ *
+ * Can adjust amount of bytes to clone.
+ * Returns appropriate error code that caller should return or
+ * zero in case the clone should be allowed.
+ */
+int generic_remap_checks(struct file *file_in, loff_t pos_in,
+                        struct file *file_out, loff_t pos_out,
+                        loff_t *req_count, unsigned int remap_flags)
+{
+       struct inode *inode_in = file_in->f_mapping->host;
+       struct inode *inode_out = file_out->f_mapping->host;
+       uint64_t count = *req_count;
+       uint64_t bcount;
+       loff_t size_in, size_out;
+       loff_t bs = inode_out->i_sb->s_blocksize;
+       int ret;
+
+       /* The start of both ranges must be aligned to an fs block. */
+       if (!IS_ALIGNED(pos_in, bs) || !IS_ALIGNED(pos_out, bs))
+               return -EINVAL;
+
+       /* Ensure offsets don't wrap. */
+       if (pos_in + count < pos_in || pos_out + count < pos_out)
+               return -EINVAL;
+
+       size_in = i_size_read(inode_in);
+       size_out = i_size_read(inode_out);
+
+       /* Dedupe requires both ranges to be within EOF. */
+       if ((remap_flags & REMAP_FILE_DEDUP) &&
+           (pos_in >= size_in || pos_in + count > size_in ||
+            pos_out >= size_out || pos_out + count > size_out))
+               return -EINVAL;
+
+       /* Ensure the infile range is within the infile. */
+       if (pos_in >= size_in)
+               return -EINVAL;
+       count = min(count, size_in - (uint64_t)pos_in);
+
+       ret = generic_access_check_limits(file_in, pos_in, &count);
+       if (ret)
+               return ret;
+
+       ret = generic_write_check_limits(file_out, pos_out, &count);
+       if (ret)
+               return ret;
 
        /*
-        * LFS rule
+        * If the user wanted us to link to the infile's EOF, round up to the
+        * next block boundary for this check.
+        *
+        * Otherwise, make sure the count is also block-aligned, having
+        * already confirmed the starting offsets' block alignment.
         */
-       if (unlikely(pos + iov_iter_count(from) > MAX_NON_LFS &&
-                               !(file->f_flags & O_LARGEFILE))) {
-               if (pos >= MAX_NON_LFS)
-                       return -EFBIG;
-               iov_iter_truncate(from, MAX_NON_LFS - (unsigned long)pos);
+       if (pos_in + count == size_in) {
+               bcount = ALIGN(size_in, bs) - pos_in;
+       } else {
+               if (!IS_ALIGNED(count, bs))
+                       count = ALIGN_DOWN(count, bs);
+               bcount = count;
        }
 
+       /* Don't allow overlapped cloning within the same file. */
+       if (inode_in == inode_out &&
+           pos_out + bcount > pos_in &&
+           pos_out < pos_in + bcount)
+               return -EINVAL;
+
        /*
-        * Are we about to exceed the fs block limit ?
-        *
-        * If we have written data it becomes a short write.  If we have
-        * exceeded without writing data we send a signal and return EFBIG.
-        * Linus frestrict idea will clean these up nicely..
+        * We shortened the request but the caller can't deal with that, so
+        * bounce the request back to userspace.
         */
-       if (unlikely(pos >= inode->i_sb->s_maxbytes))
-               return -EFBIG;
+       if (*req_count != count && !(remap_flags & REMAP_FILE_CAN_SHORTEN))
+               return -EINVAL;
 
-       iov_iter_truncate(from, inode->i_sb->s_maxbytes - pos);
-       return iov_iter_count(from);
+       *req_count = count;
+       return 0;
 }
-EXPORT_SYMBOL(generic_write_checks);
 
 int pagecache_write_begin(struct file *file, struct address_space *mapping,
                                loff_t pos, unsigned len, unsigned flags,
@@ -2908,7 +3013,7 @@ generic_file_direct_write(struct kiocb *iocb, struct iov_iter *from)
        if (iocb->ki_flags & IOCB_NOWAIT) {
                /* If there are pages to writeback, return */
                if (filemap_range_has_page(inode->i_mapping, pos,
-                                          pos + iov_iter_count(from)))
+                                          pos + write_len))
                        return -EAGAIN;
        } else {
                written = filemap_write_and_wait_range(mapping, pos,