Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4

author Linus Torvalds <torvalds@linux-foundation.org>

Sun, 8 Jun 2014 20:03:35 +0000 (13:03 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Sun, 8 Jun 2014 20:03:35 +0000 (13:03 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Sun, 8 Jun 2014 20:03:35 +0000 (13:03 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Sun, 8 Jun 2014 20:03:35 +0000 (13:03 -0700)
diff --combined fs/ext4/mballoc.c

index afe8a133e3d1eee604378774b3fa2f2b3925c82e,11dc4e0033cfdefb3adb3234a83493a806e94fa8..59e31622cc6ef41cdd8474d47e43e1e634da676a
--- 1/fs/ext4/mballoc.c
--- 2/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@@ -1044,8 -1044,6 +1044,8 @@@ int ext4_mb_init_group(struct super_blo
          * allocating. If we are looking at the buddy cache we would
          * have taken a reference using ext4_mb_load_buddy and that
          * would have pinned buddy page to page cache.
+ +       * The call to ext4_mb_get_buddy_page_lock will mark the
+ +       * page accessed.
          */
         ret = ext4_mb_get_buddy_page_lock(sb, group, &e4b);
         if (ret || !EXT4_MB_GRP_NEED_INIT(this_grp)) {
@@@ -1064,6 -1062,7 +1064,6 @@@
                 ret = -EIO;
                 goto err;
         }
- -      mark_page_accessed(page);
   
         if (e4b.bd_buddy_page == NULL) {
                 /*
@@@ -1083,6 -1082,7 +1083,6 @@@
                 ret = -EIO;
                 goto err;
         }
- -      mark_page_accessed(page);
   err:
         ext4_mb_put_buddy_page_lock(&e4b);
         return ret;
@@@ -1141,7 -1141,7 +1141,7 @@@ ext4_mb_load_buddy(struct super_block *
   
         /* we could use find_or_create_page(), but it locks page
          * what we'd like to avoid in fast path ... */
- -      page = find_get_page(inode->i_mapping, pnum);
+ +      page = find_get_page_flags(inode->i_mapping, pnum, FGP_ACCESSED);
         if (page == NULL || !PageUptodate(page)) {
                 if (page)
                         /*
@@@ -1176,16 -1176,15 +1176,16 @@@
                 ret = -EIO;
                 goto err;
         }
+ +
+ +      /* Pages marked accessed already */
         e4b->bd_bitmap_page = page;
         e4b->bd_bitmap = page_address(page) + (poff * sb->s_blocksize);
- -      mark_page_accessed(page);
   
         block++;
         pnum = block / blocks_per_page;
         poff = block % blocks_per_page;
   
- -      page = find_get_page(inode->i_mapping, pnum);
+ +      page = find_get_page_flags(inode->i_mapping, pnum, FGP_ACCESSED);
         if (page == NULL || !PageUptodate(page)) {
                 if (page)
                         page_cache_release(page);
@@@ -1210,10 -1209,9 +1210,10 @@@
                 ret = -EIO;
                 goto err;
         }
+ +
+ +      /* Pages marked accessed already */
         e4b->bd_buddy_page = page;
         e4b->bd_buddy = page_address(page) + (poff * sb->s_blocksize);
- -      mark_page_accessed(page);
   
         BUG_ON(e4b->bd_bitmap_page == NULL);
         BUG_ON(e4b->bd_buddy_page == NULL);
@@@ -2619,7 -2617,7 +2619,7 @@@ int ext4_mb_init(struct super_block *sb
         sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group);
         if (sbi->s_locality_groups == NULL) {
                 ret = -ENOMEM;
-               goto out_free_groupinfo_slab;
+               goto out;
         }
         for_each_possible_cpu(i) {
                 struct ext4_locality_group *lg;
@@@ -2644,8 -2642,6 +2644,6 @@@
   out_free_locality_groups:
         free_percpu(sbi->s_locality_groups);
         sbi->s_locality_groups = NULL;
- out_free_groupinfo_slab:
-       ext4_groupinfo_destroy_slabs();
   out:
         kfree(sbi->s_mb_offsets);
         sbi->s_mb_offsets = NULL;
@@@ -2878,6 -2874,7 +2876,7 @@@ ext4_mb_mark_diskspace_used(struct ext4
         if (!bitmap_bh)
                 goto out_err;
   
+       BUFFER_TRACE(bitmap_bh, "getting write access");
         err = ext4_journal_get_write_access(handle, bitmap_bh);
         if (err)
                 goto out_err;
@@@ -2890,6 -2887,7 +2889,7 @@@
         ext4_debug("using block group %u(%d)\n", ac->ac_b_ex.fe_group,
                         ext4_free_group_clusters(sb, gdp));
   
+       BUFFER_TRACE(gdp_bh, "get_write_access");
         err = ext4_journal_get_write_access(handle, gdp_bh);
         if (err)
                 goto out_err;
@@@ -3147,7 -3145,7 +3147,7 @@@ ext4_mb_normalize_request(struct ext4_a
         }
         BUG_ON(start + size <= ac->ac_o_ex.fe_logical &&
                         start > ac->ac_o_ex.fe_logical);
-       BUG_ON(size <= 0 || size > EXT4_CLUSTERS_PER_GROUP(ac->ac_sb));
+       BUG_ON(size <= 0 || size > EXT4_BLOCKS_PER_GROUP(ac->ac_sb));
   
         /* now prepare goal request */
   
diff --combined fs/ext4/page-io.c

index 1a64e7a52b841be24ddd3daa56cb47939f07656c,b6a3804a98551a5bfd002bc296bf360bea4a45a0..b24a2541a9baaa0d4c22e80a75050af2517a417d
--- 1/fs/ext4/page-io.c
--- 2/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@@ -401,7 -401,8 +401,8 @@@ submit_and_retry
   int ext4_bio_write_page(struct ext4_io_submit *io,
                         struct page *page,
                         int len,
-                       struct writeback_control *wbc)
+                       struct writeback_control *wbc,
+                       bool keep_towrite)
   {
         struct inode *inode = page->mapping->host;
         unsigned block_start, blocksize;
@@@ -414,9 -415,23 +415,23 @@@
         BUG_ON(!PageLocked(page));
         BUG_ON(PageWriteback(page));
   
-       set_page_writeback(page);
+       if (keep_towrite)
+               set_page_writeback_keepwrite(page);
+       else
+               set_page_writeback(page);
         ClearPageError(page);
   
- -       * Comments copied from block_write_full_page_endio:
+       /*
++       * Comments copied from block_write_full_page:
+        *
+        * The page straddles i_size.  It must be zeroed out on each and every
+        * writepage invocation because it may be mmapped.  "A file is mapped
+        * in multiples of the page size.  For a file that is not a multiple of
+        * the page size, the remaining memory is zeroed when mapped, and
+        * writes to that region are not written out to the file."
+        */
+       if (len < PAGE_CACHE_SIZE)
+               zero_user_segment(page, len, PAGE_CACHE_SIZE);
         /*
          * In the first loop we prepare and mark buffers to submit. We have to
          * mark all buffers in the page before submitting so that
@@@ -428,19 -443,6 +443,6 @@@
         do {
                 block_start = bh_offset(bh);
                 if (block_start >= len) {
-                       /*
-                        * Comments copied from block_write_full_page:
-                        *
-                        * The page straddles i_size.  It must be zeroed out on
-                        * each and every writepage invocation because it may
-                        * be mmapped.  "A file is mapped in multiples of the
-                        * page size.  For a file that is not a multiple of
-                        * the  page size, the remaining memory is zeroed when
-                        * mapped, and writes to that region are not written
-                        * out to the file."
-                        */
-                       zero_user_segment(page, block_start,
-                                         block_start + blocksize);
                         clear_buffer_dirty(bh);
                         set_buffer_uptodate(bh);
                         continue;
diff --combined fs/ext4/resize.c

index 08b3c116915b8289faccc489c8876382a565351d,bb9adabb7ec69015c0873c3348ebaff7a1f161f7..bb0e80f03e2eb7291c91b9808f96d5469edfd903
--- 1/fs/ext4/resize.c
--- 2/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@@ -42,7 -42,7 +42,7 @@@ int ext4_resize_begin(struct super_bloc
   void ext4_resize_end(struct super_block *sb)
   {
         clear_bit_unlock(EXT4_RESIZING, &EXT4_SB(sb)->s_resize_flags);
- -      smp_mb__after_clear_bit();
+ +      smp_mb__after_atomic();
   }
   
   static ext4_group_t ext4_meta_bg_first_group(struct super_block *sb,
@@@ -348,6 -348,7 +348,7 @@@ static struct buffer_head *bclean(handl
         bh = sb_getblk(sb, blk);
         if (unlikely(!bh))
                 return ERR_PTR(-ENOMEM);
+       BUFFER_TRACE(bh, "get_write_access");
         if ((err = ext4_journal_get_write_access(handle, bh))) {
                 brelse(bh);
                 bh = ERR_PTR(err);
@@@ -426,6 -427,7 +427,7 @@@ static int set_flexbg_block_bitmap(stru
                 if (unlikely(!bh))
                         return -ENOMEM;
   
+               BUFFER_TRACE(bh, "get_write_access");
                 err = ext4_journal_get_write_access(handle, bh);
                 if (err)
                         return err;
@@@ -518,6 -520,7 +520,7 @@@ static int setup_new_flex_group_blocks(
                                 goto out;
                         }
   
+                       BUFFER_TRACE(gdb, "get_write_access");
                         err = ext4_journal_get_write_access(handle, gdb);
                         if (err) {
                                 brelse(gdb);
@@@ -790,14 -793,17 +793,17 @@@ static int add_new_gdb(handle_t *handle
                 goto exit_dind;
         }
   
+       BUFFER_TRACE(EXT4_SB(sb)->s_sbh, "get_write_access");
         err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh);
         if (unlikely(err))
                 goto exit_dind;
   
+       BUFFER_TRACE(gdb_bh, "get_write_access");
         err = ext4_journal_get_write_access(handle, gdb_bh);
         if (unlikely(err))
                 goto exit_dind;
   
+       BUFFER_TRACE(dind, "get_write_access");
         err = ext4_journal_get_write_access(handle, dind);
         if (unlikely(err))
                 ext4_std_error(sb, err);
@@@ -902,6 -908,7 +908,7 @@@ static int add_new_gdb_meta_bg(struct s
         EXT4_SB(sb)->s_group_desc = n_group_desc;
         EXT4_SB(sb)->s_gdb_count++;
         ext4_kvfree(o_group_desc);
+       BUFFER_TRACE(gdb_bh, "get_write_access");
         err = ext4_journal_get_write_access(handle, gdb_bh);
         if (unlikely(err))
                 brelse(gdb_bh);
@@@ -977,6 -984,7 +984,7 @@@ static int reserve_backup_gdb(handle_t 
         }
   
         for (i = 0; i < reserved_gdb; i++) {
+               BUFFER_TRACE(primary[i], "get_write_access");
                 if ((err = ext4_journal_get_write_access(handle, primary[i])))
                         goto exit_bh;
         }
@@@ -1084,6 -1092,7 +1092,7 @@@ static void update_backups(struct super
                 ext4_debug("update metadata backup %llu(+%llu)\n",
                            backup_block, backup_block -
                            ext4_group_first_block_no(sb, group));
+               BUFFER_TRACE(bh, "get_write_access");
                 if ((err = ext4_journal_get_write_access(handle, bh)))
                         break;
                 lock_buffer(bh);
@@@ -1163,6 -1172,7 +1172,7 @@@ static int ext4_add_new_descs(handle_t 
                  */
                 if (gdb_off) {
                         gdb_bh = sbi->s_group_desc[gdb_num];
+                       BUFFER_TRACE(gdb_bh, "get_write_access");
                         err = ext4_journal_get_write_access(handle, gdb_bh);
   
                         if (!err && reserved_gdb && ext4_bg_num_gdb(sb, group))
@@@ -1433,6 -1443,7 +1443,7 @@@ static int ext4_flex_group_add(struct s
                 goto exit;
         }
   
+       BUFFER_TRACE(sbi->s_sbh, "get_write_access");
         err = ext4_journal_get_write_access(handle, sbi->s_sbh);
         if (err)
                 goto exit_journal;
@@@ -1645,6 -1656,7 +1656,7 @@@ static int ext4_group_extend_no_check(s
                 return err;
         }
   
+       BUFFER_TRACE(EXT4_SB(sb)->s_sbh, "get_write_access");
         err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh);
         if (err) {
                 ext4_warning(sb, "error %d on journal write access", err);
@@@ -1804,6 -1816,7 +1816,7 @@@ static int ext4_convert_meta_bg(struct 
         if (IS_ERR(handle))
                 return PTR_ERR(handle);
   
+       BUFFER_TRACE(sbi->s_sbh, "get_write_access");
         err = ext4_journal_get_write_access(handle, sbi->s_sbh);
         if (err)
                 goto errout;
diff --combined include/linux/page-flags.h

index 2093eb72785ea499a32c4ab70d8f20d1715d4530,ca71a1d347a0870666faad2f76cbf55230bbe2e4..3c545b48aeabdd177a09920fceda7b550baae54d
--- 1/include/linux/page-flags.h
--- 2/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@@ -198,7 -198,6 +198,7 @@@ struct page;       /* forward declaration *
   TESTPAGEFLAG(Locked, locked)
   PAGEFLAG(Error, error) TESTCLEARFLAG(Error, error)
   PAGEFLAG(Referenced, referenced) TESTCLEARFLAG(Referenced, referenced)
+ +      __SETPAGEFLAG(Referenced, referenced)
   PAGEFLAG(Dirty, dirty) TESTSCFLAG(Dirty, dirty) __CLEARPAGEFLAG(Dirty, dirty)
   PAGEFLAG(LRU, lru) __CLEARPAGEFLAG(LRU, lru)
   PAGEFLAG(Active, active) __CLEARPAGEFLAG(Active, active)
@@@ -209,7 -208,6 +209,7 @@@ PAGEFLAG(Pinned, pinned) TESTSCFLAG(Pin
   PAGEFLAG(SavePinned, savepinned);                     /* Xen */
   PAGEFLAG(Reserved, reserved) __CLEARPAGEFLAG(Reserved, reserved)
   PAGEFLAG(SwapBacked, swapbacked) __CLEARPAGEFLAG(SwapBacked, swapbacked)
+ +      __SETPAGEFLAG(SwapBacked, swapbacked)
   
   __PAGEFLAG(SlobFree, slob_free)
   
@@@ -319,13 -317,23 +319,23 @@@ CLEARPAGEFLAG(Uptodate, uptodate
   extern void cancel_dirty_page(struct page *page, unsigned int account_size);
   
   int test_clear_page_writeback(struct page *page);
- int test_set_page_writeback(struct page *page);
+ int __test_set_page_writeback(struct page *page, bool keep_write);
+ 
+ #define test_set_page_writeback(page)                 \
+       __test_set_page_writeback(page, false)
+ #define test_set_page_writeback_keepwrite(page)       \
+       __test_set_page_writeback(page, true)
   
   static inline void set_page_writeback(struct page *page)
   {
         test_set_page_writeback(page);
   }
   
+ static inline void set_page_writeback_keepwrite(struct page *page)
+ {
+       test_set_page_writeback_keepwrite(page);
+ }
+ 
   #ifdef CONFIG_PAGEFLAGS_EXTENDED
   /*
    * System with lots of page flags available. This allows separate
diff --combined mm/page-writeback.c

index 7d9a4ef0a0788025ddff7905698a7245eb135f73,d8691d9de3c427b6347f92115726eda825ffa01b..518e2c3f4c75f0014a03817b4910cf4050a2f480
--- 1/mm/page-writeback.c
--- 2/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@@ -155,6 -155,24 +155,6 @@@ static unsigned long writeout_period_ti
    */
   #define VM_COMPLETIONS_PERIOD_LEN (3*HZ)
   
- -/*
- - * Work out the current dirty-memory clamping and background writeout
- - * thresholds.
- - *
- - * The main aim here is to lower them aggressively if there is a lot of mapped
- - * memory around.  To avoid stressing page reclaim with lots of unreclaimable
- - * pages.  It is better to clamp down on writers than to start swapping, and
- - * performing lots of scanning.
- - *
- - * We only allow 1/2 of the currently-unmapped memory to be dirtied.
- - *
- - * We don't permit the clamping level to fall below 5% - that is getting rather
- - * excessive.
- - *
- - * We make sure that the background writeout level is below the adjusted
- - * clamping level.
- - */
- -
   /*
    * In a memory zone, there is a certain amount of pages we consider
    * available for the page cache, which is essentially the number of
@@@ -575,14 -593,14 +575,14 @@@ unsigned long bdi_dirty_limit(struct ba
    * (5) the closer to setpoint, the smaller |df/dx| (and the reverse)
    *     => fast response on large errors; small oscillation near setpoint
    */
- -static inline long long pos_ratio_polynom(unsigned long setpoint,
+ +static long long pos_ratio_polynom(unsigned long setpoint,
                                           unsigned long dirty,
                                           unsigned long limit)
   {
         long long pos_ratio;
         long x;
   
- -      x = div_s64(((s64)setpoint - (s64)dirty) << RATELIMIT_CALC_SHIFT,
+ +      x = div64_s64(((s64)setpoint - (s64)dirty) << RATELIMIT_CALC_SHIFT,
                     limit - setpoint + 1);
         pos_ratio = x;
         pos_ratio = pos_ratio * x >> RATELIMIT_CALC_SHIFT;
@@@ -824,7 -842,7 +824,7 @@@ static unsigned long bdi_position_ratio
         x_intercept = bdi_setpoint + span;
   
         if (bdi_dirty < x_intercept - span / 4) {
- -              pos_ratio = div_u64(pos_ratio * (x_intercept - bdi_dirty),
+ +              pos_ratio = div64_u64(pos_ratio * (x_intercept - bdi_dirty),
                                     x_intercept - bdi_setpoint + 1);
         } else
                 pos_ratio /= 4;
@@@ -1605,7 -1623,7 +1605,7 @@@ void balance_dirty_pages_ratelimited(st
          * 1000+ tasks, all of them start dirtying pages at exactly the same
          * time, hence all honoured too large initial task->nr_dirtied_pause.
          */
- -      p =  &__get_cpu_var(bdp_ratelimits);
+ +      p =  this_cpu_ptr(&bdp_ratelimits);
         if (unlikely(current->nr_dirtied >= ratelimit))
                 *p = 0;
         else if (unlikely(*p >= ratelimit_pages)) {
@@@ -1617,7 -1635,7 +1617,7 @@@
          * short-lived tasks (eg. gcc invocations in a kernel build) escaping
          * the dirty throttling and livelock other long-run dirtiers.
          */
- -      p = &__get_cpu_var(dirty_throttle_leaks);
+ +      p = this_cpu_ptr(&dirty_throttle_leaks);
         if (*p > 0 && current->nr_dirtied < ratelimit) {
                 unsigned long nr_pages_dirtied;
                 nr_pages_dirtied = min(*p, ratelimit - current->nr_dirtied);
@@@ -1664,7 -1682,7 +1664,7 @@@ void throttle_vm_writeout(gfp_t gfp_mas
   /*
    * sysctl handler for /proc/sys/vm/dirty_writeback_centisecs
    */
- -int dirty_writeback_centisecs_handler(ctl_table *table, int write,
+ +int dirty_writeback_centisecs_handler(struct ctl_table *table, int write,
         void __user *buffer, size_t *length, loff_t *ppos)
   {
         proc_dointvec(table, write, buffer, length, ppos);
@@@ -2380,7 -2398,7 +2380,7 @@@ int test_clear_page_writeback(struct pa
         return ret;
   }
   
- int test_set_page_writeback(struct page *page)
+ int __test_set_page_writeback(struct page *page, bool keep_write)
   {
         struct address_space *mapping = page_mapping(page);
         int ret;
@@@ -2405,9 -2423,10 +2405,10 @@@
                         radix_tree_tag_clear(&mapping->page_tree,
                                                 page_index(page),
                                                 PAGECACHE_TAG_DIRTY);
-               radix_tree_tag_clear(&mapping->page_tree,
-                                    page_index(page),
-                                    PAGECACHE_TAG_TOWRITE);
+               if (!keep_write)
+                       radix_tree_tag_clear(&mapping->page_tree,
+                                               page_index(page),
+                                               PAGECACHE_TAG_TOWRITE);
                 spin_unlock_irqrestore(&mapping->tree_lock, flags);
         } else {
                 ret = TestSetPageWriteback(page);
@@@ -2418,7 -2437,7 +2419,7 @@@
         return ret;
   
   }
- EXPORT_SYMBOL(test_set_page_writeback);
+ EXPORT_SYMBOL(__test_set_page_writeback);
   
   /*
    * Return true if any of the pages in the mapping are marked with the
author	Linus Torvalds <torvalds@linux-foundation.org>
	Sun, 8 Jun 2014 20:03:35 +0000 (13:03 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Sun, 8 Jun 2014 20:03:35 +0000 (13:03 -0700)
		1	2
fs/ext4/mballoc.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/ext4/page-io.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/ext4/resize.c	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/page-flags.h	patch \|	diff1 \|	diff2 \|	blob \| history
mm/page-writeback.c	patch \|	diff1 \|	diff2 \|	blob \| history