]> asedeno.scripts.mit.edu Git - linux.git/blobdiff - fs/ext4/extents.c
ext4: fix race between writepages and enabling EXT4_EXTENTS_FL
[linux.git] / fs / ext4 / extents.c
index ee83fe7c98aaf5bbb3267e1d67004a6d0491984f..954013d6076b629330773edcc894edf1a2f6dbfe 100644 (file)
@@ -161,8 +161,9 @@ static int ext4_ext_get_access(handle_t *handle, struct inode *inode,
  *  - ENOMEM
  *  - EIO
  */
-int __ext4_ext_dirty(const char *where, unsigned int line, handle_t *handle,
-                    struct inode *inode, struct ext4_ext_path *path)
+static int __ext4_ext_dirty(const char *where, unsigned int line,
+                           handle_t *handle, struct inode *inode,
+                           struct ext4_ext_path *path)
 {
        int err;
 
@@ -179,6 +180,9 @@ int __ext4_ext_dirty(const char *where, unsigned int line, handle_t *handle,
        return err;
 }
 
+#define ext4_ext_dirty(handle, inode, path) \
+               __ext4_ext_dirty(__func__, __LINE__, (handle), (inode), (path))
+
 static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode,
                              struct ext4_ext_path *path,
                              ext4_lblk_t block)
@@ -309,53 +313,6 @@ ext4_force_split_extent_at(handle_t *handle, struct inode *inode,
                        (nofail ? EXT4_GET_BLOCKS_METADATA_NOFAIL:0));
 }
 
-/*
- * Calculate the number of metadata blocks needed
- * to allocate @blocks
- * Worse case is one block per extent
- */
-int ext4_ext_calc_metadata_amount(struct inode *inode, ext4_lblk_t lblock)
-{
-       struct ext4_inode_info *ei = EXT4_I(inode);
-       int idxs;
-
-       idxs = ((inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header))
-               / sizeof(struct ext4_extent_idx));
-
-       /*
-        * If the new delayed allocation block is contiguous with the
-        * previous da block, it can share index blocks with the
-        * previous block, so we only need to allocate a new index
-        * block every idxs leaf blocks.  At ldxs**2 blocks, we need
-        * an additional index block, and at ldxs**3 blocks, yet
-        * another index blocks.
-        */
-       if (ei->i_da_metadata_calc_len &&
-           ei->i_da_metadata_calc_last_lblock+1 == lblock) {
-               int num = 0;
-
-               if ((ei->i_da_metadata_calc_len % idxs) == 0)
-                       num++;
-               if ((ei->i_da_metadata_calc_len % (idxs*idxs)) == 0)
-                       num++;
-               if ((ei->i_da_metadata_calc_len % (idxs*idxs*idxs)) == 0) {
-                       num++;
-                       ei->i_da_metadata_calc_len = 0;
-               } else
-                       ei->i_da_metadata_calc_len++;
-               ei->i_da_metadata_calc_last_lblock++;
-               return num;
-       }
-
-       /*
-        * In the worst case we need a new set of index blocks at
-        * every level of the inode's extent tree.
-        */
-       ei->i_da_metadata_calc_len = 1;
-       ei->i_da_metadata_calc_last_lblock = lblock;
-       return ext_depth(inode) + 1;
-}
-
 static int
 ext4_ext_max_entries(struct inode *inode, int depth)
 {
@@ -511,6 +468,30 @@ int ext4_ext_check_inode(struct inode *inode)
        return ext4_ext_check(inode, ext_inode_hdr(inode), ext_depth(inode), 0);
 }
 
+static void ext4_cache_extents(struct inode *inode,
+                              struct ext4_extent_header *eh)
+{
+       struct ext4_extent *ex = EXT_FIRST_EXTENT(eh);
+       ext4_lblk_t prev = 0;
+       int i;
+
+       for (i = le16_to_cpu(eh->eh_entries); i > 0; i--, ex++) {
+               unsigned int status = EXTENT_STATUS_WRITTEN;
+               ext4_lblk_t lblk = le32_to_cpu(ex->ee_block);
+               int len = ext4_ext_get_actual_len(ex);
+
+               if (prev && (prev != lblk))
+                       ext4_es_cache_extent(inode, prev, lblk - prev, ~0,
+                                            EXTENT_STATUS_HOLE);
+
+               if (ext4_ext_is_unwritten(ex))
+                       status = EXTENT_STATUS_UNWRITTEN;
+               ext4_es_cache_extent(inode, lblk, len,
+                                    ext4_ext_pblock(ex), status);
+               prev = lblk + len;
+       }
+}
+
 static struct buffer_head *
 __read_extent_tree_block(const char *function, unsigned int line,
                         struct inode *inode, ext4_fsblk_t pblk, int depth,
@@ -545,26 +526,7 @@ __read_extent_tree_block(const char *function, unsigned int line,
         */
        if (!(flags & EXT4_EX_NOCACHE) && depth == 0) {
                struct ext4_extent_header *eh = ext_block_hdr(bh);
-               struct ext4_extent *ex = EXT_FIRST_EXTENT(eh);
-               ext4_lblk_t prev = 0;
-               int i;
-
-               for (i = le16_to_cpu(eh->eh_entries); i > 0; i--, ex++) {
-                       unsigned int status = EXTENT_STATUS_WRITTEN;
-                       ext4_lblk_t lblk = le32_to_cpu(ex->ee_block);
-                       int len = ext4_ext_get_actual_len(ex);
-
-                       if (prev && (prev != lblk))
-                               ext4_es_cache_extent(inode, prev,
-                                                    lblk - prev, ~0,
-                                                    EXTENT_STATUS_HOLE);
-
-                       if (ext4_ext_is_unwritten(ex))
-                               status = EXTENT_STATUS_UNWRITTEN;
-                       ext4_es_cache_extent(inode, lblk, len,
-                                            ext4_ext_pblock(ex), status);
-                       prev = lblk + len;
-               }
+               ext4_cache_extents(inode, eh);
        }
        return bh;
 errout:
@@ -650,8 +612,9 @@ static void ext4_ext_show_path(struct inode *inode, struct ext4_ext_path *path)
        ext_debug("path:");
        for (k = 0; k <= l; k++, path++) {
                if (path->p_idx) {
-                 ext_debug("  %d->%llu", le32_to_cpu(path->p_idx->ei_block),
-                           ext4_idx_pblock(path->p_idx));
+                       ext_debug("  %d->%llu",
+                                 le32_to_cpu(path->p_idx->ei_block),
+                                 ext4_idx_pblock(path->p_idx));
                } else if (path->p_ext) {
                        ext_debug("  %d:[%d]%d:%llu ",
                                  le32_to_cpu(path->p_ext->ee_block),
@@ -732,11 +695,12 @@ void ext4_ext_drop_refs(struct ext4_ext_path *path)
        if (!path)
                return;
        depth = path->p_depth;
-       for (i = 0; i <= depth; i++, path++)
+       for (i = 0; i <= depth; i++, path++) {
                if (path->p_bh) {
                        brelse(path->p_bh);
                        path->p_bh = NULL;
                }
+       }
 }
 
 /*
@@ -778,8 +742,8 @@ ext4_ext_binsearch_idx(struct inode *inode,
 
                chix = ix = EXT_FIRST_INDEX(eh);
                for (k = 0; k < le16_to_cpu(eh->eh_entries); k++, ix++) {
-                 if (k != 0 &&
-                     le32_to_cpu(ix->ei_block) <= le32_to_cpu(ix[-1].ei_block)) {
+                       if (k != 0 && le32_to_cpu(ix->ei_block) <=
+                           le32_to_cpu(ix[-1].ei_block)) {
                                printk(KERN_DEBUG "k=%d, ix=0x%p, "
                                       "first=0x%p\n", k,
                                       ix, EXT_FIRST_INDEX(eh));
@@ -912,6 +876,8 @@ ext4_find_extent(struct inode *inode, ext4_lblk_t block,
        path[0].p_bh = NULL;
 
        i = depth;
+       if (!(flags & EXT4_EX_NOCACHE) && depth == 0)
+               ext4_cache_extents(inode, eh);
        /* walk through the tree */
        while (i) {
                ext_debug("depth %d: num %d, max %d\n",
@@ -1633,17 +1599,16 @@ ext4_ext_next_allocated_block(struct ext4_ext_path *path)
                return EXT_MAX_BLOCKS;
 
        while (depth >= 0) {
+               struct ext4_ext_path *p = &path[depth];
+
                if (depth == path->p_depth) {
                        /* leaf */
-                       if (path[depth].p_ext &&
-                               path[depth].p_ext !=
-                                       EXT_LAST_EXTENT(path[depth].p_hdr))
-                         return le32_to_cpu(path[depth].p_ext[1].ee_block);
+                       if (p->p_ext && p->p_ext != EXT_LAST_EXTENT(p->p_hdr))
+                               return le32_to_cpu(p->p_ext[1].ee_block);
                } else {
                        /* index */
-                       if (path[depth].p_idx !=
-                                       EXT_LAST_INDEX(path[depth].p_hdr))
-                         return le32_to_cpu(path[depth].p_idx[1].ei_block);
+                       if (p->p_idx != EXT_LAST_INDEX(p->p_hdr))
+                               return le32_to_cpu(p->p_idx[1].ei_block);
                }
                depth--;
        }
@@ -1743,9 +1708,9 @@ static int ext4_ext_correct_indexes(handle_t *handle, struct inode *inode,
        return err;
 }
 
-int
-ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1,
-                               struct ext4_extent *ex2)
+static int ext4_can_extents_be_merged(struct inode *inode,
+                                     struct ext4_extent *ex1,
+                                     struct ext4_extent *ex2)
 {
        unsigned short ext1_ee_len, ext2_ee_len;
 
@@ -1759,11 +1724,6 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1,
                        le32_to_cpu(ex2->ee_block))
                return 0;
 
-       /*
-        * To allow future support for preallocated extents to be added
-        * as an RO_COMPAT feature, refuse to merge to extents if
-        * this can result in the top bit of ee_len being set.
-        */
        if (ext1_ee_len + ext2_ee_len > EXT_INIT_MAX_LEN)
                return 0;
 
@@ -1871,13 +1831,14 @@ static void ext4_ext_try_to_merge_up(handle_t *handle,
 }
 
 /*
- * This function tries to merge the @ex extent to neighbours in the tree.
- * return 1 if merge left else 0.
+ * This function tries to merge the @ex extent to neighbours in the tree, then
+ * tries to collapse the extent tree into the inode.
  */
 static void ext4_ext_try_to_merge(handle_t *handle,
                                  struct inode *inode,
                                  struct ext4_ext_path *path,
-                                 struct ext4_extent *ex) {
+                                 struct ext4_extent *ex)
+{
        struct ext4_extent_header *eh;
        unsigned int depth;
        int merge_done = 0;
@@ -3719,9 +3680,6 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
                max_zeroout = sbi->s_extent_max_zeroout_kb >>
                        (inode->i_sb->s_blocksize_bits - 10);
 
-       if (IS_ENCRYPTED(inode))
-               max_zeroout = 0;
-
        /*
         * five cases:
         * 1. split the extent into three extents.
@@ -4707,6 +4665,10 @@ static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset,
        return ret > 0 ? ret2 : ret;
 }
 
+static int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len);
+
+static int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len);
+
 static long ext4_zero_range(struct file *file, loff_t offset,
                            loff_t len, int mode)
 {
@@ -4724,9 +4686,6 @@ static long ext4_zero_range(struct file *file, loff_t offset,
 
        trace_ext4_zero_range(inode, offset, len, mode);
 
-       if (!S_ISREG(inode->i_mode))
-               return -EINVAL;
-
        /* Call ext4_force_commit to flush all data in case of data=journal. */
        if (ext4_should_journal_data(inode)) {
                ret = ext4_force_commit(inode->i_sb);
@@ -4766,7 +4725,7 @@ static long ext4_zero_range(struct file *file, loff_t offset,
        }
 
        if (!(mode & FALLOC_FL_KEEP_SIZE) &&
-           (offset + len > i_size_read(inode) ||
+           (offset + len > inode->i_size ||
             offset + len > EXT4_I(inode)->i_disksize)) {
                new_size = offset + len;
                ret = inode_newsize_ok(inode, new_size);
@@ -4850,7 +4809,7 @@ static long ext4_zero_range(struct file *file, loff_t offset,
                * Mark that we allocate beyond EOF so the subsequent truncate
                * can proceed even if the new size is the same as i_size.
                */
-               if ((offset + len) > i_size_read(inode))
+               if (offset + len > inode->i_size)
                        ext4_set_inode_flag(inode, EXT4_INODE_EOFBLOCKS);
        }
        ext4_mark_inode_dirty(handle, inode);
@@ -4891,14 +4850,9 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
         * range since we would need to re-encrypt blocks with a
         * different IV or XTS tweak (which are based on the logical
         * block number).
-        *
-        * XXX It's not clear why zero range isn't working, but we'll
-        * leave it disabled for encrypted inodes for now.  This is a
-        * bug we should fix....
         */
        if (IS_ENCRYPTED(inode) &&
-           (mode & (FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_INSERT_RANGE |
-                    FALLOC_FL_ZERO_RANGE)))
+           (mode & (FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_INSERT_RANGE)))
                return -EOPNOTSUPP;
 
        /* Return error if mode is not supported */
@@ -4942,7 +4896,7 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
        }
 
        if (!(mode & FALLOC_FL_KEEP_SIZE) &&
-           (offset + len > i_size_read(inode) ||
+           (offset + len > inode->i_size ||
             offset + len > EXT4_I(inode)->i_disksize)) {
                new_size = offset + len;
                ret = inode_newsize_ok(inode, new_size);
@@ -5269,7 +5223,7 @@ ext4_ext_shift_path_extents(struct ext4_ext_path *path, ext4_lblk_t shift,
 {
        int depth, err = 0;
        struct ext4_extent *ex_start, *ex_last;
-       bool update = 0;
+       bool update = false;
        depth = path->p_depth;
 
        while (depth >= 0) {
@@ -5285,7 +5239,7 @@ ext4_ext_shift_path_extents(struct ext4_ext_path *path, ext4_lblk_t shift,
                                goto out;
 
                        if (ex_start == EXT_FIRST_EXTENT(path[depth].p_hdr))
-                               update = 1;
+                               update = true;
 
                        while (ex_start <= ex_last) {
                                if (SHIFT == SHIFT_LEFT) {
@@ -5473,7 +5427,7 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
  * This implements the fallocate's collapse range functionality for ext4
  * Returns: 0 and non-zero on error.
  */
-int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
+static int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
 {
        struct super_block *sb = inode->i_sb;
        ext4_lblk_t punch_start, punch_stop;
@@ -5490,12 +5444,8 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
        if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
                return -EOPNOTSUPP;
 
-       /* Collapse range works only on fs block size aligned offsets. */
-       if (offset & (EXT4_CLUSTER_SIZE(sb) - 1) ||
-           len & (EXT4_CLUSTER_SIZE(sb) - 1))
-               return -EINVAL;
-
-       if (!S_ISREG(inode->i_mode))
+       /* Collapse range works only on fs cluster size aligned regions. */
+       if (!IS_ALIGNED(offset | len, EXT4_CLUSTER_SIZE(sb)))
                return -EINVAL;
 
        trace_ext4_collapse_range(inode, offset, len);
@@ -5515,7 +5465,7 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
         * There is no need to overlap collapse range with EOF, in which case
         * it is effectively a truncate operation
         */
-       if (offset + len >= i_size_read(inode)) {
+       if (offset + len >= inode->i_size) {
                ret = -EINVAL;
                goto out_mutex;
        }
@@ -5593,7 +5543,7 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
                goto out_stop;
        }
 
-       new_size = i_size_read(inode) - len;
+       new_size = inode->i_size - len;
        i_size_write(inode, new_size);
        EXT4_I(inode)->i_disksize = new_size;
 
@@ -5621,7 +5571,7 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
  * by len bytes.
  * Returns 0 on success, error otherwise.
  */
-int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)
+static int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)
 {
        struct super_block *sb = inode->i_sb;
        handle_t *handle;
@@ -5640,14 +5590,10 @@ int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)
        if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
                return -EOPNOTSUPP;
 
-       /* Insert range works only on fs block size aligned offsets. */
-       if (offset & (EXT4_CLUSTER_SIZE(sb) - 1) ||
-                       len & (EXT4_CLUSTER_SIZE(sb) - 1))
+       /* Insert range works only on fs cluster size aligned regions. */
+       if (!IS_ALIGNED(offset | len, EXT4_CLUSTER_SIZE(sb)))
                return -EINVAL;
 
-       if (!S_ISREG(inode->i_mode))
-               return -EOPNOTSUPP;
-
        trace_ext4_insert_range(inode, offset, len);
 
        offset_lblk = offset >> EXT4_BLOCK_SIZE_BITS(sb);
@@ -5667,14 +5613,14 @@ int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)
                goto out_mutex;
        }
 
-       /* Check for wrap through zero */
-       if (inode->i_size + len > inode->i_sb->s_maxbytes) {
+       /* Check whether the maximum file size would be exceeded */
+       if (len > inode->i_sb->s_maxbytes - inode->i_size) {
                ret = -EFBIG;
                goto out_mutex;
        }
 
-       /* Offset should be less than i_size */
-       if (offset >= i_size_read(inode)) {
+       /* Offset must be less than i_size */
+       if (offset >= inode->i_size) {
                ret = -EINVAL;
                goto out_mutex;
        }