]> asedeno.scripts.mit.edu Git - linux.git/commitdiff
Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4
authorLinus Torvalds <torvalds@linux-foundation.org>
Sun, 8 Jun 2014 20:03:35 +0000 (13:03 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Sun, 8 Jun 2014 20:03:35 +0000 (13:03 -0700)
Pull ext4 updates from Ted Ts'o:
 "Clean ups and miscellaneous bug fixes, in particular for the new
  collapse_range and zero_range fallocate functions.  In addition,
  improve the scalability of adding and remove inodes from the orphan
  list"

* tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (25 commits)
  ext4: handle symlink properly with inline_data
  ext4: fix wrong assert in ext4_mb_normalize_request()
  ext4: fix zeroing of page during writeback
  ext4: remove unused local variable "stored" from ext4_readdir(...)
  ext4: fix ZERO_RANGE test failure in data journalling
  ext4: reduce contention on s_orphan_lock
  ext4: use sbi in ext4_orphan_{add|del}()
  ext4: use EXT_MAX_BLOCKS in ext4_es_can_be_merged()
  ext4: add missing BUFFER_TRACE before ext4_journal_get_write_access
  ext4: remove unnecessary double parentheses
  ext4: do not destroy ext4_groupinfo_caches if ext4_mb_init() fails
  ext4: make local functions static
  ext4: fix block bitmap validation when bigalloc, ^flex_bg
  ext4: fix block bitmap initialization under sparse_super2
  ext4: find the group descriptors on a 1k-block bigalloc,meta_bg filesystem
  ext4: avoid unneeded lookup when xattr name is invalid
  ext4: fix data integrity sync in ordered mode
  ext4: remove obsoleted check
  ext4: add a new spinlock i_raw_lock to protect the ext4's raw inode
  ext4: fix locking for O_APPEND writes
  ...

1  2 
fs/ext4/mballoc.c
fs/ext4/page-io.c
fs/ext4/resize.c
include/linux/page-flags.h
mm/page-writeback.c

diff --combined fs/ext4/mballoc.c
index afe8a133e3d1eee604378774b3fa2f2b3925c82e,11dc4e0033cfdefb3adb3234a83493a806e94fa8..59e31622cc6ef41cdd8474d47e43e1e634da676a
@@@ -1044,8 -1044,6 +1044,8 @@@ int ext4_mb_init_group(struct super_blo
         * allocating. If we are looking at the buddy cache we would
         * have taken a reference using ext4_mb_load_buddy and that
         * would have pinned buddy page to page cache.
 +       * The call to ext4_mb_get_buddy_page_lock will mark the
 +       * page accessed.
         */
        ret = ext4_mb_get_buddy_page_lock(sb, group, &e4b);
        if (ret || !EXT4_MB_GRP_NEED_INIT(this_grp)) {
                ret = -EIO;
                goto err;
        }
 -      mark_page_accessed(page);
  
        if (e4b.bd_buddy_page == NULL) {
                /*
                ret = -EIO;
                goto err;
        }
 -      mark_page_accessed(page);
  err:
        ext4_mb_put_buddy_page_lock(&e4b);
        return ret;
@@@ -1141,7 -1141,7 +1141,7 @@@ ext4_mb_load_buddy(struct super_block *
  
        /* we could use find_or_create_page(), but it locks page
         * what we'd like to avoid in fast path ... */
 -      page = find_get_page(inode->i_mapping, pnum);
 +      page = find_get_page_flags(inode->i_mapping, pnum, FGP_ACCESSED);
        if (page == NULL || !PageUptodate(page)) {
                if (page)
                        /*
                ret = -EIO;
                goto err;
        }
 +
 +      /* Pages marked accessed already */
        e4b->bd_bitmap_page = page;
        e4b->bd_bitmap = page_address(page) + (poff * sb->s_blocksize);
 -      mark_page_accessed(page);
  
        block++;
        pnum = block / blocks_per_page;
        poff = block % blocks_per_page;
  
 -      page = find_get_page(inode->i_mapping, pnum);
 +      page = find_get_page_flags(inode->i_mapping, pnum, FGP_ACCESSED);
        if (page == NULL || !PageUptodate(page)) {
                if (page)
                        page_cache_release(page);
                ret = -EIO;
                goto err;
        }
 +
 +      /* Pages marked accessed already */
        e4b->bd_buddy_page = page;
        e4b->bd_buddy = page_address(page) + (poff * sb->s_blocksize);
 -      mark_page_accessed(page);
  
        BUG_ON(e4b->bd_bitmap_page == NULL);
        BUG_ON(e4b->bd_buddy_page == NULL);
@@@ -2619,7 -2617,7 +2619,7 @@@ int ext4_mb_init(struct super_block *sb
        sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group);
        if (sbi->s_locality_groups == NULL) {
                ret = -ENOMEM;
-               goto out_free_groupinfo_slab;
+               goto out;
        }
        for_each_possible_cpu(i) {
                struct ext4_locality_group *lg;
  out_free_locality_groups:
        free_percpu(sbi->s_locality_groups);
        sbi->s_locality_groups = NULL;
- out_free_groupinfo_slab:
-       ext4_groupinfo_destroy_slabs();
  out:
        kfree(sbi->s_mb_offsets);
        sbi->s_mb_offsets = NULL;
@@@ -2878,6 -2874,7 +2876,7 @@@ ext4_mb_mark_diskspace_used(struct ext4
        if (!bitmap_bh)
                goto out_err;
  
+       BUFFER_TRACE(bitmap_bh, "getting write access");
        err = ext4_journal_get_write_access(handle, bitmap_bh);
        if (err)
                goto out_err;
        ext4_debug("using block group %u(%d)\n", ac->ac_b_ex.fe_group,
                        ext4_free_group_clusters(sb, gdp));
  
+       BUFFER_TRACE(gdp_bh, "get_write_access");
        err = ext4_journal_get_write_access(handle, gdp_bh);
        if (err)
                goto out_err;
@@@ -3147,7 -3145,7 +3147,7 @@@ ext4_mb_normalize_request(struct ext4_a
        }
        BUG_ON(start + size <= ac->ac_o_ex.fe_logical &&
                        start > ac->ac_o_ex.fe_logical);
-       BUG_ON(size <= 0 || size > EXT4_CLUSTERS_PER_GROUP(ac->ac_sb));
+       BUG_ON(size <= 0 || size > EXT4_BLOCKS_PER_GROUP(ac->ac_sb));
  
        /* now prepare goal request */
  
diff --combined fs/ext4/page-io.c
index 1a64e7a52b841be24ddd3daa56cb47939f07656c,b6a3804a98551a5bfd002bc296bf360bea4a45a0..b24a2541a9baaa0d4c22e80a75050af2517a417d
@@@ -401,7 -401,8 +401,8 @@@ submit_and_retry
  int ext4_bio_write_page(struct ext4_io_submit *io,
                        struct page *page,
                        int len,
-                       struct writeback_control *wbc)
+                       struct writeback_control *wbc,
+                       bool keep_towrite)
  {
        struct inode *inode = page->mapping->host;
        unsigned block_start, blocksize;
        BUG_ON(!PageLocked(page));
        BUG_ON(PageWriteback(page));
  
-       set_page_writeback(page);
+       if (keep_towrite)
+               set_page_writeback_keepwrite(page);
+       else
+               set_page_writeback(page);
        ClearPageError(page);
  
 -       * Comments copied from block_write_full_page_endio:
+       /*
++       * Comments copied from block_write_full_page:
+        *
+        * The page straddles i_size.  It must be zeroed out on each and every
+        * writepage invocation because it may be mmapped.  "A file is mapped
+        * in multiples of the page size.  For a file that is not a multiple of
+        * the page size, the remaining memory is zeroed when mapped, and
+        * writes to that region are not written out to the file."
+        */
+       if (len < PAGE_CACHE_SIZE)
+               zero_user_segment(page, len, PAGE_CACHE_SIZE);
        /*
         * In the first loop we prepare and mark buffers to submit. We have to
         * mark all buffers in the page before submitting so that
        do {
                block_start = bh_offset(bh);
                if (block_start >= len) {
-                       /*
-                        * Comments copied from block_write_full_page:
-                        *
-                        * The page straddles i_size.  It must be zeroed out on
-                        * each and every writepage invocation because it may
-                        * be mmapped.  "A file is mapped in multiples of the
-                        * page size.  For a file that is not a multiple of
-                        * the  page size, the remaining memory is zeroed when
-                        * mapped, and writes to that region are not written
-                        * out to the file."
-                        */
-                       zero_user_segment(page, block_start,
-                                         block_start + blocksize);
                        clear_buffer_dirty(bh);
                        set_buffer_uptodate(bh);
                        continue;
diff --combined fs/ext4/resize.c
index 08b3c116915b8289faccc489c8876382a565351d,bb9adabb7ec69015c0873c3348ebaff7a1f161f7..bb0e80f03e2eb7291c91b9808f96d5469edfd903
@@@ -42,7 -42,7 +42,7 @@@ int ext4_resize_begin(struct super_bloc
  void ext4_resize_end(struct super_block *sb)
  {
        clear_bit_unlock(EXT4_RESIZING, &EXT4_SB(sb)->s_resize_flags);
 -      smp_mb__after_clear_bit();
 +      smp_mb__after_atomic();
  }
  
  static ext4_group_t ext4_meta_bg_first_group(struct super_block *sb,
@@@ -348,6 -348,7 +348,7 @@@ static struct buffer_head *bclean(handl
        bh = sb_getblk(sb, blk);
        if (unlikely(!bh))
                return ERR_PTR(-ENOMEM);
+       BUFFER_TRACE(bh, "get_write_access");
        if ((err = ext4_journal_get_write_access(handle, bh))) {
                brelse(bh);
                bh = ERR_PTR(err);
@@@ -426,6 -427,7 +427,7 @@@ static int set_flexbg_block_bitmap(stru
                if (unlikely(!bh))
                        return -ENOMEM;
  
+               BUFFER_TRACE(bh, "get_write_access");
                err = ext4_journal_get_write_access(handle, bh);
                if (err)
                        return err;
@@@ -518,6 -520,7 +520,7 @@@ static int setup_new_flex_group_blocks(
                                goto out;
                        }
  
+                       BUFFER_TRACE(gdb, "get_write_access");
                        err = ext4_journal_get_write_access(handle, gdb);
                        if (err) {
                                brelse(gdb);
@@@ -790,14 -793,17 +793,17 @@@ static int add_new_gdb(handle_t *handle
                goto exit_dind;
        }
  
+       BUFFER_TRACE(EXT4_SB(sb)->s_sbh, "get_write_access");
        err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh);
        if (unlikely(err))
                goto exit_dind;
  
+       BUFFER_TRACE(gdb_bh, "get_write_access");
        err = ext4_journal_get_write_access(handle, gdb_bh);
        if (unlikely(err))
                goto exit_dind;
  
+       BUFFER_TRACE(dind, "get_write_access");
        err = ext4_journal_get_write_access(handle, dind);
        if (unlikely(err))
                ext4_std_error(sb, err);
@@@ -902,6 -908,7 +908,7 @@@ static int add_new_gdb_meta_bg(struct s
        EXT4_SB(sb)->s_group_desc = n_group_desc;
        EXT4_SB(sb)->s_gdb_count++;
        ext4_kvfree(o_group_desc);
+       BUFFER_TRACE(gdb_bh, "get_write_access");
        err = ext4_journal_get_write_access(handle, gdb_bh);
        if (unlikely(err))
                brelse(gdb_bh);
@@@ -977,6 -984,7 +984,7 @@@ static int reserve_backup_gdb(handle_t 
        }
  
        for (i = 0; i < reserved_gdb; i++) {
+               BUFFER_TRACE(primary[i], "get_write_access");
                if ((err = ext4_journal_get_write_access(handle, primary[i])))
                        goto exit_bh;
        }
@@@ -1084,6 -1092,7 +1092,7 @@@ static void update_backups(struct super
                ext4_debug("update metadata backup %llu(+%llu)\n",
                           backup_block, backup_block -
                           ext4_group_first_block_no(sb, group));
+               BUFFER_TRACE(bh, "get_write_access");
                if ((err = ext4_journal_get_write_access(handle, bh)))
                        break;
                lock_buffer(bh);
@@@ -1163,6 -1172,7 +1172,7 @@@ static int ext4_add_new_descs(handle_t 
                 */
                if (gdb_off) {
                        gdb_bh = sbi->s_group_desc[gdb_num];
+                       BUFFER_TRACE(gdb_bh, "get_write_access");
                        err = ext4_journal_get_write_access(handle, gdb_bh);
  
                        if (!err && reserved_gdb && ext4_bg_num_gdb(sb, group))
@@@ -1433,6 -1443,7 +1443,7 @@@ static int ext4_flex_group_add(struct s
                goto exit;
        }
  
+       BUFFER_TRACE(sbi->s_sbh, "get_write_access");
        err = ext4_journal_get_write_access(handle, sbi->s_sbh);
        if (err)
                goto exit_journal;
@@@ -1645,6 -1656,7 +1656,7 @@@ static int ext4_group_extend_no_check(s
                return err;
        }
  
+       BUFFER_TRACE(EXT4_SB(sb)->s_sbh, "get_write_access");
        err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh);
        if (err) {
                ext4_warning(sb, "error %d on journal write access", err);
@@@ -1804,6 -1816,7 +1816,7 @@@ static int ext4_convert_meta_bg(struct 
        if (IS_ERR(handle))
                return PTR_ERR(handle);
  
+       BUFFER_TRACE(sbi->s_sbh, "get_write_access");
        err = ext4_journal_get_write_access(handle, sbi->s_sbh);
        if (err)
                goto errout;
index 2093eb72785ea499a32c4ab70d8f20d1715d4530,ca71a1d347a0870666faad2f76cbf55230bbe2e4..3c545b48aeabdd177a09920fceda7b550baae54d
@@@ -198,7 -198,6 +198,7 @@@ struct page;       /* forward declaration *
  TESTPAGEFLAG(Locked, locked)
  PAGEFLAG(Error, error) TESTCLEARFLAG(Error, error)
  PAGEFLAG(Referenced, referenced) TESTCLEARFLAG(Referenced, referenced)
 +      __SETPAGEFLAG(Referenced, referenced)
  PAGEFLAG(Dirty, dirty) TESTSCFLAG(Dirty, dirty) __CLEARPAGEFLAG(Dirty, dirty)
  PAGEFLAG(LRU, lru) __CLEARPAGEFLAG(LRU, lru)
  PAGEFLAG(Active, active) __CLEARPAGEFLAG(Active, active)
@@@ -209,7 -208,6 +209,7 @@@ PAGEFLAG(Pinned, pinned) TESTSCFLAG(Pin
  PAGEFLAG(SavePinned, savepinned);                     /* Xen */
  PAGEFLAG(Reserved, reserved) __CLEARPAGEFLAG(Reserved, reserved)
  PAGEFLAG(SwapBacked, swapbacked) __CLEARPAGEFLAG(SwapBacked, swapbacked)
 +      __SETPAGEFLAG(SwapBacked, swapbacked)
  
  __PAGEFLAG(SlobFree, slob_free)
  
@@@ -319,13 -317,23 +319,23 @@@ CLEARPAGEFLAG(Uptodate, uptodate
  extern void cancel_dirty_page(struct page *page, unsigned int account_size);
  
  int test_clear_page_writeback(struct page *page);
- int test_set_page_writeback(struct page *page);
+ int __test_set_page_writeback(struct page *page, bool keep_write);
+ #define test_set_page_writeback(page)                 \
+       __test_set_page_writeback(page, false)
+ #define test_set_page_writeback_keepwrite(page)       \
+       __test_set_page_writeback(page, true)
  
  static inline void set_page_writeback(struct page *page)
  {
        test_set_page_writeback(page);
  }
  
+ static inline void set_page_writeback_keepwrite(struct page *page)
+ {
+       test_set_page_writeback_keepwrite(page);
+ }
  #ifdef CONFIG_PAGEFLAGS_EXTENDED
  /*
   * System with lots of page flags available. This allows separate
diff --combined mm/page-writeback.c
index 7d9a4ef0a0788025ddff7905698a7245eb135f73,d8691d9de3c427b6347f92115726eda825ffa01b..518e2c3f4c75f0014a03817b4910cf4050a2f480
@@@ -155,6 -155,24 +155,6 @@@ static unsigned long writeout_period_ti
   */
  #define VM_COMPLETIONS_PERIOD_LEN (3*HZ)
  
 -/*
 - * Work out the current dirty-memory clamping and background writeout
 - * thresholds.
 - *
 - * The main aim here is to lower them aggressively if there is a lot of mapped
 - * memory around.  To avoid stressing page reclaim with lots of unreclaimable
 - * pages.  It is better to clamp down on writers than to start swapping, and
 - * performing lots of scanning.
 - *
 - * We only allow 1/2 of the currently-unmapped memory to be dirtied.
 - *
 - * We don't permit the clamping level to fall below 5% - that is getting rather
 - * excessive.
 - *
 - * We make sure that the background writeout level is below the adjusted
 - * clamping level.
 - */
 -
  /*
   * In a memory zone, there is a certain amount of pages we consider
   * available for the page cache, which is essentially the number of
@@@ -575,14 -593,14 +575,14 @@@ unsigned long bdi_dirty_limit(struct ba
   * (5) the closer to setpoint, the smaller |df/dx| (and the reverse)
   *     => fast response on large errors; small oscillation near setpoint
   */
 -static inline long long pos_ratio_polynom(unsigned long setpoint,
 +static long long pos_ratio_polynom(unsigned long setpoint,
                                          unsigned long dirty,
                                          unsigned long limit)
  {
        long long pos_ratio;
        long x;
  
 -      x = div_s64(((s64)setpoint - (s64)dirty) << RATELIMIT_CALC_SHIFT,
 +      x = div64_s64(((s64)setpoint - (s64)dirty) << RATELIMIT_CALC_SHIFT,
                    limit - setpoint + 1);
        pos_ratio = x;
        pos_ratio = pos_ratio * x >> RATELIMIT_CALC_SHIFT;
@@@ -824,7 -842,7 +824,7 @@@ static unsigned long bdi_position_ratio
        x_intercept = bdi_setpoint + span;
  
        if (bdi_dirty < x_intercept - span / 4) {
 -              pos_ratio = div_u64(pos_ratio * (x_intercept - bdi_dirty),
 +              pos_ratio = div64_u64(pos_ratio * (x_intercept - bdi_dirty),
                                    x_intercept - bdi_setpoint + 1);
        } else
                pos_ratio /= 4;
@@@ -1605,7 -1623,7 +1605,7 @@@ void balance_dirty_pages_ratelimited(st
         * 1000+ tasks, all of them start dirtying pages at exactly the same
         * time, hence all honoured too large initial task->nr_dirtied_pause.
         */
 -      p =  &__get_cpu_var(bdp_ratelimits);
 +      p =  this_cpu_ptr(&bdp_ratelimits);
        if (unlikely(current->nr_dirtied >= ratelimit))
                *p = 0;
        else if (unlikely(*p >= ratelimit_pages)) {
         * short-lived tasks (eg. gcc invocations in a kernel build) escaping
         * the dirty throttling and livelock other long-run dirtiers.
         */
 -      p = &__get_cpu_var(dirty_throttle_leaks);
 +      p = this_cpu_ptr(&dirty_throttle_leaks);
        if (*p > 0 && current->nr_dirtied < ratelimit) {
                unsigned long nr_pages_dirtied;
                nr_pages_dirtied = min(*p, ratelimit - current->nr_dirtied);
@@@ -1664,7 -1682,7 +1664,7 @@@ void throttle_vm_writeout(gfp_t gfp_mas
  /*
   * sysctl handler for /proc/sys/vm/dirty_writeback_centisecs
   */
 -int dirty_writeback_centisecs_handler(ctl_table *table, int write,
 +int dirty_writeback_centisecs_handler(struct ctl_table *table, int write,
        void __user *buffer, size_t *length, loff_t *ppos)
  {
        proc_dointvec(table, write, buffer, length, ppos);
@@@ -2380,7 -2398,7 +2380,7 @@@ int test_clear_page_writeback(struct pa
        return ret;
  }
  
- int test_set_page_writeback(struct page *page)
+ int __test_set_page_writeback(struct page *page, bool keep_write)
  {
        struct address_space *mapping = page_mapping(page);
        int ret;
                        radix_tree_tag_clear(&mapping->page_tree,
                                                page_index(page),
                                                PAGECACHE_TAG_DIRTY);
-               radix_tree_tag_clear(&mapping->page_tree,
-                                    page_index(page),
-                                    PAGECACHE_TAG_TOWRITE);
+               if (!keep_write)
+                       radix_tree_tag_clear(&mapping->page_tree,
+                                               page_index(page),
+                                               PAGECACHE_TAG_TOWRITE);
                spin_unlock_irqrestore(&mapping->tree_lock, flags);
        } else {
                ret = TestSetPageWriteback(page);
        return ret;
  
  }
- EXPORT_SYMBOL(test_set_page_writeback);
+ EXPORT_SYMBOL(__test_set_page_writeback);
  
  /*
   * Return true if any of the pages in the mapping are marked with the