]> asedeno.scripts.mit.edu Git - linux.git/commitdiff
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux...
authorLinus Torvalds <torvalds@linux-foundation.org>
Tue, 17 Jan 2012 23:49:54 +0000 (15:49 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Tue, 17 Jan 2012 23:49:54 +0000 (15:49 -0800)
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs: (62 commits)
  Btrfs: use larger system chunks
  Btrfs: add a delalloc mutex to inodes for delalloc reservations
  Btrfs: space leak tracepoints
  Btrfs: protect orphan block rsv with spin_lock
  Btrfs: add allocator tracepoints
  Btrfs: don't call btrfs_throttle in file write
  Btrfs: release space on error in page_mkwrite
  Btrfs: fix btrfsck error 400 when truncating a compressed
  Btrfs: do not use btrfs_end_transaction_throttle everywhere
  Btrfs: add balance progress reporting
  Btrfs: allow for resuming restriper after it was paused
  Btrfs: allow for canceling restriper
  Btrfs: allow for pausing restriper
  Btrfs: add skip_balance mount option
  Btrfs: recover balance on mount
  Btrfs: save balance parameters to disk
  Btrfs: soft profile changing mode (aka soft convert)
  Btrfs: implement online profile changing
  Btrfs: do not reduce profile in do_chunk_alloc()
  Btrfs: virtual address space subset filter
  ...

Fix up trivial conflict in fs/btrfs/ioctl.c due to the use of the new
mnt_drop_write_file() helper.

1  2 
fs/btrfs/disk-io.c
fs/btrfs/file.c
fs/btrfs/free-space-cache.c
fs/btrfs/inode.c
fs/btrfs/ioctl.c
fs/btrfs/super.c

diff --combined fs/btrfs/disk-io.c
index d8525662ca7a721b18e197191816ea16b095eb19,da4457f84d78d7246ac8ce4f1640664f90d4e92f..89e99eb384db54a8006ae991bd83ea2e61abe0eb
@@@ -43,6 -43,7 +43,7 @@@
  #include "tree-log.h"
  #include "free-space-cache.h"
  #include "inode-map.h"
+ #include "check-integrity.h"
  
  static struct extent_io_ops btree_extent_io_ops;
  static void end_workqueue_fn(struct btrfs_work *work);
@@@ -872,8 -873,7 +873,8 @@@ static int btree_submit_bio_hook(struc
  
  #ifdef CONFIG_MIGRATION
  static int btree_migratepage(struct address_space *mapping,
 -                      struct page *newpage, struct page *page)
 +                      struct page *newpage, struct page *page,
 +                      enum migrate_mode mode)
  {
        /*
         * we can't safely write a btree page from here,
        if (page_has_private(page) &&
            !try_to_release_page(page, GFP_KERNEL))
                return -EAGAIN;
 -      return migrate_page(mapping, newpage, page);
 +      return migrate_page(mapping, newpage, page, mode);
  }
  #endif
  
@@@ -1244,7 -1244,8 +1245,8 @@@ static struct btrfs_root *alloc_log_tre
        root->ref_cows = 0;
  
        leaf = btrfs_alloc_free_block(trans, root, root->leafsize, 0,
-                                     BTRFS_TREE_LOG_OBJECTID, NULL, 0, 0, 0);
+                                     BTRFS_TREE_LOG_OBJECTID, NULL,
+                                     0, 0, 0, 0);
        if (IS_ERR(leaf)) {
                kfree(root);
                return ERR_CAST(leaf);
@@@ -1580,7 -1581,9 +1582,7 @@@ static int cleaner_kthread(void *arg
                        btrfs_run_defrag_inodes(root->fs_info);
                }
  
 -              if (freezing(current)) {
 -                      refrigerator();
 -              } else {
 +              if (!try_to_freeze()) {
                        set_current_state(TASK_INTERRUPTIBLE);
                        if (!kthread_should_stop())
                                schedule();
@@@ -1634,7 -1637,9 +1636,7 @@@ sleep
                wake_up_process(root->fs_info->cleaner_kthread);
                mutex_unlock(&root->fs_info->transaction_kthread_mutex);
  
 -              if (freezing(current)) {
 -                      refrigerator();
 -              } else {
 +              if (!try_to_freeze()) {
                        set_current_state(TASK_INTERRUPTIBLE);
                        if (!kthread_should_stop() &&
                            !btrfs_transaction_blocked(root->fs_info))
@@@ -1998,6 -2003,17 +2000,17 @@@ struct btrfs_root *open_ctree(struct su
        init_waitqueue_head(&fs_info->scrub_pause_wait);
        init_rwsem(&fs_info->scrub_super_lock);
        fs_info->scrub_workers_refcnt = 0;
+ #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
+       fs_info->check_integrity_print_mask = 0;
+ #endif
+       spin_lock_init(&fs_info->balance_lock);
+       mutex_init(&fs_info->balance_mutex);
+       atomic_set(&fs_info->balance_running, 0);
+       atomic_set(&fs_info->balance_pause_req, 0);
+       atomic_set(&fs_info->balance_cancel_req, 0);
+       fs_info->balance_ctl = NULL;
+       init_waitqueue_head(&fs_info->balance_wait_q);
  
        sb->s_blocksize = 4096;
        sb->s_blocksize_bits = blksize_bits(4096);
           (unsigned long)btrfs_header_chunk_tree_uuid(chunk_root->node),
           BTRFS_UUID_SIZE);
  
-       mutex_lock(&fs_info->chunk_mutex);
        ret = btrfs_read_chunk_tree(chunk_root);
-       mutex_unlock(&fs_info->chunk_mutex);
        if (ret) {
                printk(KERN_WARNING "btrfs: failed to read chunk tree on %s\n",
                       sb->s_id);
@@@ -2318,9 -2332,6 +2329,6 @@@ retry_root_backup
  
        fs_info->generation = generation;
        fs_info->last_trans_committed = generation;
-       fs_info->data_alloc_profile = (u64)-1;
-       fs_info->metadata_alloc_profile = (u64)-1;
-       fs_info->system_alloc_profile = fs_info->metadata_alloc_profile;
  
        ret = btrfs_init_space_info(fs_info);
        if (ret) {
                btrfs_set_opt(fs_info->mount_opt, SSD);
        }
  
+ #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
+       if (btrfs_test_opt(tree_root, CHECK_INTEGRITY)) {
+               ret = btrfsic_mount(tree_root, fs_devices,
+                                   btrfs_test_opt(tree_root,
+                                       CHECK_INTEGRITY_INCLUDING_EXTENT_DATA) ?
+                                   1 : 0,
+                                   fs_info->check_integrity_print_mask);
+               if (ret)
+                       printk(KERN_WARNING "btrfs: failed to initialize"
+                              " integrity check module %s\n", sb->s_id);
+       }
+ #endif
        /* do not make disk changes in broken FS */
        if (btrfs_super_log_root(disk_super) != 0 &&
            !(fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)) {
                if (!err)
                        err = btrfs_orphan_cleanup(fs_info->tree_root);
                up_read(&fs_info->cleanup_work_sem);
+               if (!err)
+                       err = btrfs_recover_balance(fs_info->tree_root);
                if (err) {
                        close_ctree(tree_root);
                        return ERR_PTR(err);
@@@ -2631,7 -2659,7 +2656,7 @@@ static int write_dev_supers(struct btrf
                 * we fua the first super.  The others we allow
                 * to go down lazy.
                 */
-               ret = submit_bh(WRITE_FUA, bh);
+               ret = btrfsic_submit_bh(WRITE_FUA, bh);
                if (ret)
                        errors++;
        }
@@@ -2708,7 -2736,7 +2733,7 @@@ static int write_dev_flush(struct btrfs
        device->flush_bio = bio;
  
        bio_get(bio);
-       submit_bio(WRITE_FLUSH, bio);
+       btrfsic_submit_bio(WRITE_FLUSH, bio);
  
        return 0;
  }
@@@ -2972,6 -3000,9 +2997,9 @@@ int close_ctree(struct btrfs_root *root
        fs_info->closing = 1;
        smp_mb();
  
+       /* pause restriper - we want to resume on mount */
+       btrfs_pause_balance(root->fs_info);
        btrfs_scrub_cancel(root);
  
        /* wait for any defraggers to finish */
        btrfs_stop_workers(&fs_info->caching_workers);
        btrfs_stop_workers(&fs_info->readahead_workers);
  
+ #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
+       if (btrfs_test_opt(root, CHECK_INTEGRITY))
+               btrfsic_unmount(root, fs_info->fs_devices);
+ #endif
        btrfs_close_devices(fs_info->fs_devices);
        btrfs_mapping_tree_free(&fs_info->mapping_tree);
  
diff --combined fs/btrfs/file.c
index 034d985032296cd4dfbc80e4d6717ac8b4ea77c4,0f61e11a2998d87bbf246447eae3161b79aad278..859ba2dd88903ba207c7b0e448e0f5ce7f99e46e
@@@ -678,7 -678,7 +678,7 @@@ next_slot
                                                disk_bytenr, num_bytes, 0,
                                                root->root_key.objectid,
                                                new_key.objectid,
-                                               start - extent_offset);
+                                               start - extent_offset, 0);
                                BUG_ON(ret);
                                *hint_byte = disk_bytenr;
                        }
                                                disk_bytenr, num_bytes, 0,
                                                root->root_key.objectid,
                                                key.objectid, key.offset -
-                                               extent_offset);
+                                               extent_offset, 0);
                                BUG_ON(ret);
                                inode_sub_bytes(inode,
                                                extent_end - key.offset);
@@@ -962,7 -962,7 +962,7 @@@ again
  
                ret = btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, 0,
                                           root->root_key.objectid,
-                                          ino, orig_offset);
+                                          ino, orig_offset, 0);
                BUG_ON(ret);
  
                if (split == start) {
                del_nr++;
                ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
                                        0, root->root_key.objectid,
-                                       ino, orig_offset);
+                                       ino, orig_offset, 0);
                BUG_ON(ret);
        }
        other_start = 0;
                del_nr++;
                ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
                                        0, root->root_key.objectid,
-                                       ino, orig_offset);
+                                       ino, orig_offset, 0);
                BUG_ON(ret);
        }
        if (del_nr == 0) {
@@@ -1081,7 -1081,7 +1081,7 @@@ static noinline int prepare_pages(struc
  again:
        for (i = 0; i < num_pages; i++) {
                pages[i] = find_or_create_page(inode->i_mapping, index + i,
 -                                             mask);
 +                                             mask | __GFP_WRITE);
                if (!pages[i]) {
                        faili = i - 1;
                        err = -ENOMEM;
                                     GFP_NOFS);
        }
        for (i = 0; i < num_pages; i++) {
 -              clear_page_dirty_for_io(pages[i]);
 +              if (clear_page_dirty_for_io(pages[i]))
 +                      account_page_redirty(pages[i]);
                set_page_extent_mapped(pages[i]);
                WARN_ON(!PageLocked(pages[i]));
        }
@@@ -1274,7 -1273,6 +1274,6 @@@ static noinline ssize_t __btrfs_buffere
                                                   dirty_pages);
                if (dirty_pages < (root->leafsize >> PAGE_CACHE_SHIFT) + 1)
                        btrfs_btree_balance_dirty(root, 1);
-               btrfs_throttle(root);
  
                pos += copied;
                num_written += copied;
index 9a897bf795380808e728c8d074cb58c5bcd9cf89,efe20032e4a1385f3c8ada815eab5c82c93fce9b..d20ff87ca603bba8255b04a95f34d11bb385ab33
@@@ -319,9 -319,11 +319,11 @@@ static void io_ctl_drop_pages(struct io
        io_ctl_unmap_page(io_ctl);
  
        for (i = 0; i < io_ctl->num_pages; i++) {
-               ClearPageChecked(io_ctl->pages[i]);
-               unlock_page(io_ctl->pages[i]);
-               page_cache_release(io_ctl->pages[i]);
+               if (io_ctl->pages[i]) {
+                       ClearPageChecked(io_ctl->pages[i]);
+                       unlock_page(io_ctl->pages[i]);
+                       page_cache_release(io_ctl->pages[i]);
+               }
        }
  }
  
@@@ -423,7 -425,7 +425,7 @@@ static void io_ctl_set_crc(struct io_ct
        }
  
        if (index == 0)
 -              offset = sizeof(u32) * io_ctl->num_pages;;
 +              offset = sizeof(u32) * io_ctl->num_pages;
  
        crc = btrfs_csum_data(io_ctl->root, io_ctl->orig + offset, crc,
                              PAGE_CACHE_SIZE - offset);
@@@ -635,7 -637,10 +637,10 @@@ int __load_free_space_cache(struct btrf
        if (!num_entries)
                return 0;
  
-       io_ctl_init(&io_ctl, inode, root);
+       ret = io_ctl_init(&io_ctl, inode, root);
+       if (ret)
+               return ret;
        ret = readahead_cache(inode);
        if (ret)
                goto out;
@@@ -838,7 -843,7 +843,7 @@@ int __btrfs_write_out_cache(struct btrf
        struct io_ctl io_ctl;
        struct list_head bitmap_list;
        struct btrfs_key key;
-       u64 start, end, len;
+       u64 start, extent_start, extent_end, len;
        int entries = 0;
        int bitmaps = 0;
        int ret;
        if (!i_size_read(inode))
                return -1;
  
-       io_ctl_init(&io_ctl, inode, root);
+       ret = io_ctl_init(&io_ctl, inode, root);
+       if (ret)
+               return -1;
  
        /* Get the cluster for this block_group if it exists */
        if (block_group && !list_empty(&block_group->cluster_list))
                                     struct btrfs_free_cluster,
                                     block_group_list);
  
-       /*
-        * We shouldn't have switched the pinned extents yet so this is the
-        * right one
-        */
-       unpin = root->fs_info->pinned_extents;
        /* Lock all pages first so we can lock the extent safely. */
        io_ctl_prepare_pages(&io_ctl, inode, 0);
  
        lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1,
                         0, &cached_state, GFP_NOFS);
  
-       /*
-        * When searching for pinned extents, we need to start at our start
-        * offset.
-        */
-       if (block_group)
-               start = block_group->key.objectid;
        node = rb_first(&ctl->free_space_offset);
        if (!node && cluster) {
                node = rb_first(&cluster->root);
         * We want to add any pinned extents to our free space cache
         * so we don't leak the space
         */
+       /*
+        * We shouldn't have switched the pinned extents yet so this is the
+        * right one
+        */
+       unpin = root->fs_info->pinned_extents;
+       if (block_group)
+               start = block_group->key.objectid;
        while (block_group && (start < block_group->key.objectid +
                               block_group->key.offset)) {
-               ret = find_first_extent_bit(unpin, start, &start, &end,
+               ret = find_first_extent_bit(unpin, start,
+                                           &extent_start, &extent_end,
                                            EXTENT_DIRTY);
                if (ret) {
                        ret = 0;
                }
  
                /* This pinned extent is out of our range */
-               if (start >= block_group->key.objectid +
+               if (extent_start >= block_group->key.objectid +
                    block_group->key.offset)
                        break;
  
-               len = block_group->key.objectid +
-                       block_group->key.offset - start;
-               len = min(len, end + 1 - start);
+               extent_start = max(extent_start, start);
+               extent_end = min(block_group->key.objectid +
+                                block_group->key.offset, extent_end + 1);
+               len = extent_end - extent_start;
  
                entries++;
-               ret = io_ctl_add_entry(&io_ctl, start, len, NULL);
+               ret = io_ctl_add_entry(&io_ctl, extent_start, len, NULL);
                if (ret)
                        goto out_nospc;
  
-               start = end + 1;
+               start = extent_end;
        }
  
        /* Write out the bitmaps */
  static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group,
                                struct btrfs_free_space *entry,
                                struct btrfs_free_cluster *cluster,
-                               u64 offset, u64 bytes, u64 min_bytes)
+                               u64 offset, u64 bytes,
+                               u64 cont1_bytes, u64 min_bytes)
  {
        struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
        unsigned long next_zero;
        unsigned long i;
-       unsigned long search_bits;
-       unsigned long total_bits;
+       unsigned long want_bits;
+       unsigned long min_bits;
        unsigned long found_bits;
        unsigned long start = 0;
        unsigned long total_found = 0;
        int ret;
-       bool found = false;
  
        i = offset_to_bit(entry->offset, block_group->sectorsize,
                          max_t(u64, offset, entry->offset));
-       search_bits = bytes_to_bits(bytes, block_group->sectorsize);
-       total_bits = bytes_to_bits(min_bytes, block_group->sectorsize);
+       want_bits = bytes_to_bits(bytes, block_group->sectorsize);
+       min_bits = bytes_to_bits(min_bytes, block_group->sectorsize);
  
  again:
        found_bits = 0;
             i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, i + 1)) {
                next_zero = find_next_zero_bit(entry->bitmap,
                                               BITS_PER_BITMAP, i);
-               if (next_zero - i >= search_bits) {
+               if (next_zero - i >= min_bits) {
                        found_bits = next_zero - i;
                        break;
                }
        if (!found_bits)
                return -ENOSPC;
  
-       if (!found) {
+       if (!total_found) {
                start = i;
                cluster->max_size = 0;
-               found = true;
        }
  
        total_found += found_bits;
        if (cluster->max_size < found_bits * block_group->sectorsize)
                cluster->max_size = found_bits * block_group->sectorsize;
  
-       if (total_found < total_bits) {
-               i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, next_zero);
-               if (i - start > total_bits * 2) {
-                       total_found = 0;
-                       cluster->max_size = 0;
-                       found = false;
-               }
+       if (total_found < want_bits || cluster->max_size < cont1_bytes) {
+               i = next_zero + 1;
                goto again;
        }
  
                                 &entry->offset_index, 1);
        BUG_ON(ret);
  
+       trace_btrfs_setup_cluster(block_group, cluster,
+                                 total_found * block_group->sectorsize, 1);
        return 0;
  }
  
  /*
   * This searches the block group for just extents to fill the cluster with.
+  * Try to find a cluster with at least bytes total bytes, at least one
+  * extent of cont1_bytes, and other clusters of at least min_bytes.
   */
  static noinline int
  setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group,
                        struct btrfs_free_cluster *cluster,
                        struct list_head *bitmaps, u64 offset, u64 bytes,
-                       u64 min_bytes)
+                       u64 cont1_bytes, u64 min_bytes)
  {
        struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
        struct btrfs_free_space *first = NULL;
        struct btrfs_free_space *entry = NULL;
-       struct btrfs_free_space *prev = NULL;
        struct btrfs_free_space *last;
        struct rb_node *node;
        u64 window_start;
        u64 window_free;
        u64 max_extent;
-       u64 max_gap = 128 * 1024;
+       u64 total_size = 0;
  
        entry = tree_search_offset(ctl, offset, 0, 1);
        if (!entry)
         * We don't want bitmaps, so just move along until we find a normal
         * extent entry.
         */
-       while (entry->bitmap) {
-               if (list_empty(&entry->list))
+       while (entry->bitmap || entry->bytes < min_bytes) {
+               if (entry->bitmap && list_empty(&entry->list))
                        list_add_tail(&entry->list, bitmaps);
                node = rb_next(&entry->offset_index);
                if (!node)
        max_extent = entry->bytes;
        first = entry;
        last = entry;
-       prev = entry;
  
-       while (window_free <= min_bytes) {
-               node = rb_next(&entry->offset_index);
-               if (!node)
-                       return -ENOSPC;
+       for (node = rb_next(&entry->offset_index); node;
+            node = rb_next(&entry->offset_index)) {
                entry = rb_entry(node, struct btrfs_free_space, offset_index);
  
                if (entry->bitmap) {
                        continue;
                }
  
-               /*
-                * we haven't filled the empty size and the window is
-                * very large.  reset and try again
-                */
-               if (entry->offset - (prev->offset + prev->bytes) > max_gap ||
-                   entry->offset - window_start > (min_bytes * 2)) {
-                       first = entry;
-                       window_start = entry->offset;
-                       window_free = entry->bytes;
-                       last = entry;
+               if (entry->bytes < min_bytes)
+                       continue;
+               last = entry;
+               window_free += entry->bytes;
+               if (entry->bytes > max_extent)
                        max_extent = entry->bytes;
-               } else {
-                       last = entry;
-                       window_free += entry->bytes;
-                       if (entry->bytes > max_extent)
-                               max_extent = entry->bytes;
-               }
-               prev = entry;
        }
  
+       if (window_free < bytes || max_extent < cont1_bytes)
+               return -ENOSPC;
        cluster->window_start = first->offset;
  
        node = &first->offset_index;
  
                entry = rb_entry(node, struct btrfs_free_space, offset_index);
                node = rb_next(&entry->offset_index);
-               if (entry->bitmap)
+               if (entry->bitmap || entry->bytes < min_bytes)
                        continue;
  
                rb_erase(&entry->offset_index, &ctl->free_space_offset);
                ret = tree_insert_offset(&cluster->root, entry->offset,
                                         &entry->offset_index, 0);
+               total_size += entry->bytes;
                BUG_ON(ret);
        } while (node && entry != last);
  
        cluster->max_size = max_extent;
+       trace_btrfs_setup_cluster(block_group, cluster, total_size, 0);
        return 0;
  }
  
@@@ -2460,7 -2453,7 +2453,7 @@@ static noinline in
  setup_cluster_bitmap(struct btrfs_block_group_cache *block_group,
                     struct btrfs_free_cluster *cluster,
                     struct list_head *bitmaps, u64 offset, u64 bytes,
-                    u64 min_bytes)
+                    u64 cont1_bytes, u64 min_bytes)
  {
        struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
        struct btrfs_free_space *entry;
                if (entry->bytes < min_bytes)
                        continue;
                ret = btrfs_bitmap_cluster(block_group, entry, cluster, offset,
-                                          bytes, min_bytes);
+                                          bytes, cont1_bytes, min_bytes);
                if (!ret)
                        return 0;
        }
  
  /*
   * here we try to find a cluster of blocks in a block group.  The goal
-  * is to find at least bytes free and up to empty_size + bytes free.
+  * is to find at least bytes+empty_size.
   * We might not find them all in one contiguous area.
   *
   * returns zero and sets up cluster if things worked out, otherwise
@@@ -2515,23 -2508,24 +2508,24 @@@ int btrfs_find_space_cluster(struct btr
        struct btrfs_free_space *entry, *tmp;
        LIST_HEAD(bitmaps);
        u64 min_bytes;
+       u64 cont1_bytes;
        int ret;
  
-       /* for metadata, allow allocates with more holes */
+       /*
+        * Choose the minimum extent size we'll require for this
+        * cluster.  For SSD_SPREAD, don't allow any fragmentation.
+        * For metadata, allow allocates with smaller extents.  For
+        * data, keep it dense.
+        */
        if (btrfs_test_opt(root, SSD_SPREAD)) {
-               min_bytes = bytes + empty_size;
+               cont1_bytes = min_bytes = bytes + empty_size;
        } else if (block_group->flags & BTRFS_BLOCK_GROUP_METADATA) {
-               /*
-                * we want to do larger allocations when we are
-                * flushing out the delayed refs, it helps prevent
-                * making more work as we go along.
-                */
-               if (trans->transaction->delayed_refs.flushing)
-                       min_bytes = max(bytes, (bytes + empty_size) >> 1);
-               else
-                       min_bytes = max(bytes, (bytes + empty_size) >> 4);
-       } else
-               min_bytes = max(bytes, (bytes + empty_size) >> 2);
+               cont1_bytes = bytes;
+               min_bytes = block_group->sectorsize;
+       } else {
+               cont1_bytes = max(bytes, (bytes + empty_size) >> 2);
+               min_bytes = block_group->sectorsize;
+       }
  
        spin_lock(&ctl->tree_lock);
  
         * If we know we don't have enough space to make a cluster don't even
         * bother doing all the work to try and find one.
         */
-       if (ctl->free_space < min_bytes) {
+       if (ctl->free_space < bytes) {
                spin_unlock(&ctl->tree_lock);
                return -ENOSPC;
        }
                goto out;
        }
  
+       trace_btrfs_find_cluster(block_group, offset, bytes, empty_size,
+                                min_bytes);
+       INIT_LIST_HEAD(&bitmaps);
        ret = setup_cluster_no_bitmap(block_group, cluster, &bitmaps, offset,
-                                     bytes, min_bytes);
+                                     bytes + empty_size,
+                                     cont1_bytes, min_bytes);
        if (ret)
                ret = setup_cluster_bitmap(block_group, cluster, &bitmaps,
-                                          offset, bytes, min_bytes);
+                                          offset, bytes + empty_size,
+                                          cont1_bytes, min_bytes);
  
        /* Clear our temporary list */
        list_for_each_entry_safe(entry, tmp, &bitmaps, list)
                list_add_tail(&cluster->block_group_list,
                              &block_group->cluster_list);
                cluster->block_group = block_group;
+       } else {
+               trace_btrfs_failed_cluster_setup(block_group);
        }
  out:
        spin_unlock(&cluster->lock);
@@@ -2588,17 -2590,57 +2590,57 @@@ void btrfs_init_free_cluster(struct btr
        cluster->block_group = NULL;
  }
  
- int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group,
-                          u64 *trimmed, u64 start, u64 end, u64 minlen)
+ static int do_trimming(struct btrfs_block_group_cache *block_group,
+                      u64 *total_trimmed, u64 start, u64 bytes,
+                      u64 reserved_start, u64 reserved_bytes)
  {
-       struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
-       struct btrfs_free_space *entry = NULL;
+       struct btrfs_space_info *space_info = block_group->space_info;
        struct btrfs_fs_info *fs_info = block_group->fs_info;
-       u64 bytes = 0;
-       u64 actually_trimmed;
-       int ret = 0;
+       int ret;
+       int update = 0;
+       u64 trimmed = 0;
  
-       *trimmed = 0;
+       spin_lock(&space_info->lock);
+       spin_lock(&block_group->lock);
+       if (!block_group->ro) {
+               block_group->reserved += reserved_bytes;
+               space_info->bytes_reserved += reserved_bytes;
+               update = 1;
+       }
+       spin_unlock(&block_group->lock);
+       spin_unlock(&space_info->lock);
+       ret = btrfs_error_discard_extent(fs_info->extent_root,
+                                        start, bytes, &trimmed);
+       if (!ret)
+               *total_trimmed += trimmed;
+       btrfs_add_free_space(block_group, reserved_start, reserved_bytes);
+       if (update) {
+               spin_lock(&space_info->lock);
+               spin_lock(&block_group->lock);
+               if (block_group->ro)
+                       space_info->bytes_readonly += reserved_bytes;
+               block_group->reserved -= reserved_bytes;
+               space_info->bytes_reserved -= reserved_bytes;
+               spin_unlock(&space_info->lock);
+               spin_unlock(&block_group->lock);
+       }
+       return ret;
+ }
+ static int trim_no_bitmap(struct btrfs_block_group_cache *block_group,
+                         u64 *total_trimmed, u64 start, u64 end, u64 minlen)
+ {
+       struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
+       struct btrfs_free_space *entry;
+       struct rb_node *node;
+       int ret = 0;
+       u64 extent_start;
+       u64 extent_bytes;
+       u64 bytes;
  
        while (start < end) {
                spin_lock(&ctl->tree_lock);
                }
  
                entry = tree_search_offset(ctl, start, 0, 1);
-               if (!entry)
-                       entry = tree_search_offset(ctl,
-                                                  offset_to_bitmap(ctl, start),
-                                                  1, 1);
-               if (!entry || entry->offset >= end) {
+               if (!entry) {
                        spin_unlock(&ctl->tree_lock);
                        break;
                }
  
-               if (entry->bitmap) {
-                       ret = search_bitmap(ctl, entry, &start, &bytes);
-                       if (!ret) {
-                               if (start >= end) {
-                                       spin_unlock(&ctl->tree_lock);
-                                       break;
-                               }
-                               bytes = min(bytes, end - start);
-                               bitmap_clear_bits(ctl, entry, start, bytes);
-                               if (entry->bytes == 0)
-                                       free_bitmap(ctl, entry);
-                       } else {
-                               start = entry->offset + BITS_PER_BITMAP *
-                                       block_group->sectorsize;
+               /* skip bitmaps */
+               while (entry->bitmap) {
+                       node = rb_next(&entry->offset_index);
+                       if (!node) {
                                spin_unlock(&ctl->tree_lock);
-                               ret = 0;
-                               continue;
+                               goto out;
                        }
-               } else {
-                       start = entry->offset;
-                       bytes = min(entry->bytes, end - start);
-                       unlink_free_space(ctl, entry);
-                       kmem_cache_free(btrfs_free_space_cachep, entry);
+                       entry = rb_entry(node, struct btrfs_free_space,
+                                        offset_index);
                }
  
+               if (entry->offset >= end) {
+                       spin_unlock(&ctl->tree_lock);
+                       break;
+               }
+               extent_start = entry->offset;
+               extent_bytes = entry->bytes;
+               start = max(start, extent_start);
+               bytes = min(extent_start + extent_bytes, end) - start;
+               if (bytes < minlen) {
+                       spin_unlock(&ctl->tree_lock);
+                       goto next;
+               }
+               unlink_free_space(ctl, entry);
+               kmem_cache_free(btrfs_free_space_cachep, entry);
                spin_unlock(&ctl->tree_lock);
  
-               if (bytes >= minlen) {
-                       struct btrfs_space_info *space_info;
-                       int update = 0;
-                       space_info = block_group->space_info;
-                       spin_lock(&space_info->lock);
-                       spin_lock(&block_group->lock);
-                       if (!block_group->ro) {
-                               block_group->reserved += bytes;
-                               space_info->bytes_reserved += bytes;
-                               update = 1;
-                       }
-                       spin_unlock(&block_group->lock);
-                       spin_unlock(&space_info->lock);
-                       ret = btrfs_error_discard_extent(fs_info->extent_root,
-                                                        start,
-                                                        bytes,
-                                                        &actually_trimmed);
-                       btrfs_add_free_space(block_group, start, bytes);
-                       if (update) {
-                               spin_lock(&space_info->lock);
-                               spin_lock(&block_group->lock);
-                               if (block_group->ro)
-                                       space_info->bytes_readonly += bytes;
-                               block_group->reserved -= bytes;
-                               space_info->bytes_reserved -= bytes;
-                               spin_unlock(&space_info->lock);
-                               spin_unlock(&block_group->lock);
-                       }
+               ret = do_trimming(block_group, total_trimmed, start, bytes,
+                                 extent_start, extent_bytes);
+               if (ret)
+                       break;
+ next:
+               start += bytes;
  
-                       if (ret)
-                               break;
-                       *trimmed += actually_trimmed;
+               if (fatal_signal_pending(current)) {
+                       ret = -ERESTARTSYS;
+                       break;
+               }
+               cond_resched();
+       }
+ out:
+       return ret;
+ }
+ static int trim_bitmaps(struct btrfs_block_group_cache *block_group,
+                       u64 *total_trimmed, u64 start, u64 end, u64 minlen)
+ {
+       struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
+       struct btrfs_free_space *entry;
+       int ret = 0;
+       int ret2;
+       u64 bytes;
+       u64 offset = offset_to_bitmap(ctl, start);
+       while (offset < end) {
+               bool next_bitmap = false;
+               spin_lock(&ctl->tree_lock);
+               if (ctl->free_space < minlen) {
+                       spin_unlock(&ctl->tree_lock);
+                       break;
+               }
+               entry = tree_search_offset(ctl, offset, 1, 0);
+               if (!entry) {
+                       spin_unlock(&ctl->tree_lock);
+                       next_bitmap = true;
+                       goto next;
+               }
+               bytes = minlen;
+               ret2 = search_bitmap(ctl, entry, &start, &bytes);
+               if (ret2 || start >= end) {
+                       spin_unlock(&ctl->tree_lock);
+                       next_bitmap = true;
+                       goto next;
+               }
+               bytes = min(bytes, end - start);
+               if (bytes < minlen) {
+                       spin_unlock(&ctl->tree_lock);
+                       goto next;
+               }
+               bitmap_clear_bits(ctl, entry, start, bytes);
+               if (entry->bytes == 0)
+                       free_bitmap(ctl, entry);
+               spin_unlock(&ctl->tree_lock);
+               ret = do_trimming(block_group, total_trimmed, start, bytes,
+                                 start, bytes);
+               if (ret)
+                       break;
+ next:
+               if (next_bitmap) {
+                       offset += BITS_PER_BITMAP * ctl->unit;
+               } else {
+                       start += bytes;
+                       if (start >= offset + BITS_PER_BITMAP * ctl->unit)
+                               offset += BITS_PER_BITMAP * ctl->unit;
                }
-               start += bytes;
-               bytes = 0;
  
                if (fatal_signal_pending(current)) {
                        ret = -ERESTARTSYS;
        return ret;
  }
  
+ int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group,
+                          u64 *trimmed, u64 start, u64 end, u64 minlen)
+ {
+       int ret;
+       *trimmed = 0;
+       ret = trim_no_bitmap(block_group, trimmed, start, end, minlen);
+       if (ret)
+               return ret;
+       ret = trim_bitmaps(block_group, trimmed, start, end, minlen);
+       return ret;
+ }
  /*
   * Find the left-most item in the cache tree, and then return the
   * smallest inode number in the item.
diff --combined fs/btrfs/inode.c
index 81b235a61f8c4149dd880d1d7d1238ab690a4e09,5977987abdb1e0794ac6098d6f46643cbeb6c1ac..0da19a0ea00d5ac1e854cfe1a861ee828ccca15f
@@@ -1944,19 -1944,35 +1944,35 @@@ enum btrfs_orphan_cleanup_state 
  };
  
  /*
 - * This is called in transaction commmit time. If there are no orphan
 + * This is called in transaction commit time. If there are no orphan
   * files in the subvolume, it removes orphan item and frees block_rsv
   * structure.
   */
  void btrfs_orphan_commit_root(struct btrfs_trans_handle *trans,
                              struct btrfs_root *root)
  {
+       struct btrfs_block_rsv *block_rsv;
        int ret;
  
        if (!list_empty(&root->orphan_list) ||
            root->orphan_cleanup_state != ORPHAN_CLEANUP_DONE)
                return;
  
+       spin_lock(&root->orphan_lock);
+       if (!list_empty(&root->orphan_list)) {
+               spin_unlock(&root->orphan_lock);
+               return;
+       }
+       if (root->orphan_cleanup_state != ORPHAN_CLEANUP_DONE) {
+               spin_unlock(&root->orphan_lock);
+               return;
+       }
+       block_rsv = root->orphan_block_rsv;
+       root->orphan_block_rsv = NULL;
+       spin_unlock(&root->orphan_lock);
        if (root->orphan_item_inserted &&
            btrfs_root_refs(&root->root_item) > 0) {
                ret = btrfs_del_orphan_item(trans, root->fs_info->tree_root,
                root->orphan_item_inserted = 0;
        }
  
-       if (root->orphan_block_rsv) {
-               WARN_ON(root->orphan_block_rsv->size > 0);
-               btrfs_free_block_rsv(root, root->orphan_block_rsv);
-               root->orphan_block_rsv = NULL;
+       if (block_rsv) {
+               WARN_ON(block_rsv->size > 0);
+               btrfs_free_block_rsv(root, block_rsv);
        }
  }
  
@@@ -2224,14 -2239,7 +2239,7 @@@ int btrfs_orphan_cleanup(struct btrfs_r
                                continue;
                        }
                        nr_truncate++;
-                       /*
-                        * Need to hold the imutex for reservation purposes, not
-                        * a huge deal here but I have a WARN_ON in
-                        * btrfs_delalloc_reserve_space to catch offenders.
-                        */
-                       mutex_lock(&inode->i_mutex);
                        ret = btrfs_truncate(inode);
-                       mutex_unlock(&inode->i_mutex);
                } else {
                        nr_unlink++;
                }
@@@ -2845,7 -2853,7 +2853,7 @@@ static void __unlink_end_trans(struct b
                BUG_ON(!root->fs_info->enospc_unlink);
                root->fs_info->enospc_unlink = 0;
        }
-       btrfs_end_transaction_throttle(trans, root);
+       btrfs_end_transaction(trans, root);
  }
  
  static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
@@@ -3009,7 -3017,6 +3017,6 @@@ int btrfs_truncate_inode_items(struct b
        int pending_del_nr = 0;
        int pending_del_slot = 0;
        int extent_type = -1;
-       int encoding;
        int ret;
        int err = 0;
        u64 ino = btrfs_ino(inode);
@@@ -3059,7 -3066,6 +3066,6 @@@ search_again
                leaf = path->nodes[0];
                btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
                found_type = btrfs_key_type(&found_key);
-               encoding = 0;
  
                if (found_key.objectid != ino)
                        break;
                        fi = btrfs_item_ptr(leaf, path->slots[0],
                                            struct btrfs_file_extent_item);
                        extent_type = btrfs_file_extent_type(leaf, fi);
-                       encoding = btrfs_file_extent_compression(leaf, fi);
-                       encoding |= btrfs_file_extent_encryption(leaf, fi);
-                       encoding |= btrfs_file_extent_other_encoding(leaf, fi);
                        if (extent_type != BTRFS_FILE_EXTENT_INLINE) {
                                item_end +=
                                    btrfs_file_extent_num_bytes(leaf, fi);
                if (extent_type != BTRFS_FILE_EXTENT_INLINE) {
                        u64 num_dec;
                        extent_start = btrfs_file_extent_disk_bytenr(leaf, fi);
-                       if (!del_item && !encoding) {
+                       if (!del_item) {
                                u64 orig_num_bytes =
                                        btrfs_file_extent_num_bytes(leaf, fi);
                                extent_num_bytes = new_size -
@@@ -3179,7 -3181,7 +3181,7 @@@ delete
                        ret = btrfs_free_extent(trans, root, extent_start,
                                                extent_num_bytes, 0,
                                                btrfs_header_owner(leaf),
-                                               ino, extent_offset);
+                                               ino, extent_offset, 0);
                        BUG_ON(ret);
                }
  
@@@ -3434,7 -3436,7 +3436,7 @@@ static int btrfs_setsize(struct inode *
                i_size_write(inode, newsize);
                btrfs_ordered_update_i_size(inode, i_size_read(inode), NULL);
                ret = btrfs_update_inode(trans, root, inode);
-               btrfs_end_transaction_throttle(trans, root);
+               btrfs_end_transaction(trans, root);
        } else {
  
                /*
@@@ -4412,8 -4414,8 +4414,8 @@@ static struct inode *btrfs_new_inode(st
                                     struct btrfs_root *root,
                                     struct inode *dir,
                                     const char *name, int name_len,
 -                                   u64 ref_objectid, u64 objectid, int mode,
 -                                   u64 *index)
 +                                   u64 ref_objectid, u64 objectid,
 +                                   umode_t mode, u64 *index)
  {
        struct inode *inode;
        struct btrfs_inode_item *inode_item;
@@@ -4596,7 -4598,7 +4598,7 @@@ static int btrfs_add_nondir(struct btrf
  }
  
  static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
 -                      int mode, dev_t rdev)
 +                      umode_t mode, dev_t rdev)
  {
        struct btrfs_trans_handle *trans;
        struct btrfs_root *root = BTRFS_I(dir)->root;
        }
  out_unlock:
        nr = trans->blocks_used;
-       btrfs_end_transaction_throttle(trans, root);
+       btrfs_end_transaction(trans, root);
        btrfs_btree_balance_dirty(root, nr);
        if (drop_inode) {
                inode_dec_link_count(inode);
  }
  
  static int btrfs_create(struct inode *dir, struct dentry *dentry,
 -                      int mode, struct nameidata *nd)
 +                      umode_t mode, struct nameidata *nd)
  {
        struct btrfs_trans_handle *trans;
        struct btrfs_root *root = BTRFS_I(dir)->root;
        }
  out_unlock:
        nr = trans->blocks_used;
-       btrfs_end_transaction_throttle(trans, root);
+       btrfs_end_transaction(trans, root);
        if (drop_inode) {
                inode_dec_link_count(inode);
                iput(inode);
@@@ -4782,7 -4784,7 +4784,7 @@@ static int btrfs_link(struct dentry *ol
        }
  
        nr = trans->blocks_used;
-       btrfs_end_transaction_throttle(trans, root);
+       btrfs_end_transaction(trans, root);
  fail:
        if (drop_inode) {
                inode_dec_link_count(inode);
        return err;
  }
  
 -static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 +static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
  {
        struct inode *inode = NULL;
        struct btrfs_trans_handle *trans;
  
  out_fail:
        nr = trans->blocks_used;
-       btrfs_end_transaction_throttle(trans, root);
+       btrfs_end_transaction(trans, root);
        if (drop_on_err)
                iput(inode);
        btrfs_btree_balance_dirty(root, nr);
@@@ -5121,7 -5123,7 +5123,7 @@@ again
                        }
                        flush_dcache_page(page);
                } else if (create && PageUptodate(page)) {
-                       WARN_ON(1);
+                       BUG();
                        if (!trans) {
                                kunmap(page);
                                free_extent_map(em);
@@@ -6402,10 -6404,7 +6404,7 @@@ int btrfs_page_mkwrite(struct vm_area_s
        u64 page_start;
        u64 page_end;
  
-       /* Need this to keep space reservations serialized */
-       mutex_lock(&inode->i_mutex);
        ret  = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE);
-       mutex_unlock(&inode->i_mutex);
        if (!ret)
                ret = btrfs_update_time(vma->vm_file);
        if (ret) {
@@@ -6494,8 -6493,8 +6493,8 @@@ out_unlock
        if (!ret)
                return VM_FAULT_LOCKED;
        unlock_page(page);
-       btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE);
  out:
+       btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE);
        return ret;
  }
  
@@@ -6668,7 -6667,7 +6667,7 @@@ end_trans
                        err = ret;
  
                nr = trans->blocks_used;
-               ret = btrfs_end_transaction_throttle(trans, root);
+               ret = btrfs_end_transaction(trans, root);
                btrfs_btree_balance_dirty(root, nr);
        }
  
@@@ -6749,6 -6748,7 +6748,7 @@@ struct inode *btrfs_alloc_inode(struct 
        extent_io_tree_init(&ei->io_tree, &inode->i_data);
        extent_io_tree_init(&ei->io_failure_tree, &inode->i_data);
        mutex_init(&ei->log_mutex);
+       mutex_init(&ei->delalloc_mutex);
        btrfs_ordered_inode_tree_init(&ei->ordered_tree);
        INIT_LIST_HEAD(&ei->i_orphan);
        INIT_LIST_HEAD(&ei->delalloc_inodes);
  static void btrfs_i_callback(struct rcu_head *head)
  {
        struct inode *inode = container_of(head, struct inode, i_rcu);
 -      INIT_LIST_HEAD(&inode->i_dentry);
        kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));
  }
  
@@@ -7074,7 -7075,7 +7074,7 @@@ static int btrfs_rename(struct inode *o
                btrfs_end_log_trans(root);
        }
  out_fail:
-       btrfs_end_transaction_throttle(trans, root);
+       btrfs_end_transaction(trans, root);
  out_notrans:
        if (old_ino == BTRFS_FIRST_FREE_OBJECTID)
                up_read(&root->fs_info->subvol_sem);
@@@ -7246,7 -7247,7 +7246,7 @@@ out_unlock
        if (!err)
                d_instantiate(dentry, inode);
        nr = trans->blocks_used;
-       btrfs_end_transaction_throttle(trans, root);
+       btrfs_end_transaction(trans, root);
        if (drop_inode) {
                inode_dec_link_count(inode);
                iput(inode);
diff --combined fs/btrfs/ioctl.c
index 5441ff1480fdbbd9ce9fe4c9caa6fc0c62392a3c,6834be4c8709568e1f44fa8d42584a10228cdc07..2db7c1455c7f8e2f0c526662edd921679fa189b5
@@@ -176,6 -176,8 +176,8 @@@ static int btrfs_ioctl_setflags(struct 
        struct btrfs_trans_handle *trans;
        unsigned int flags, oldflags;
        int ret;
+       u64 ip_oldflags;
+       unsigned int i_oldflags;
  
        if (btrfs_root_readonly(root))
                return -EROFS;
  
        mutex_lock(&inode->i_mutex);
  
+       ip_oldflags = ip->flags;
+       i_oldflags = inode->i_flags;
        flags = btrfs_mask_flags(inode->i_mode, flags);
        oldflags = btrfs_flags_to_ioctl(ip->flags);
        if ((flags ^ oldflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) {
                }
        }
  
 -      ret = mnt_want_write(file->f_path.mnt);
 +      ret = mnt_want_write_file(file);
        if (ret)
                goto out_unlock;
  
                ip->flags &= ~(BTRFS_INODE_COMPRESS | BTRFS_INODE_NOCOMPRESS);
        }
  
-       trans = btrfs_join_transaction(root);
-       BUG_ON(IS_ERR(trans));
+       trans = btrfs_start_transaction(root, 1);
+       if (IS_ERR(trans)) {
+               ret = PTR_ERR(trans);
+               goto out_drop;
+       }
  
        btrfs_update_iflags(inode);
        inode->i_ctime = CURRENT_TIME;
        ret = btrfs_update_inode(trans, root, inode);
-       BUG_ON(ret);
  
        btrfs_end_transaction(trans, root);
+  out_drop:
+       if (ret) {
+               ip->flags = ip_oldflags;
+               inode->i_flags = i_oldflags;
+       }
  
 -      mnt_drop_write(file->f_path.mnt);
 +      mnt_drop_write_file(file);
-       ret = 0;
   out_unlock:
        mutex_unlock(&inode->i_mutex);
        return ret;
@@@ -358,7 -368,7 +368,7 @@@ static noinline int create_subvol(struc
                return PTR_ERR(trans);
  
        leaf = btrfs_alloc_free_block(trans, root, root->leafsize,
-                                     0, objectid, NULL, 0, 0, 0);
+                                     0, objectid, NULL, 0, 0, 0, 0);
        if (IS_ERR(leaf)) {
                ret = PTR_ERR(leaf);
                goto fail;
@@@ -858,10 -868,8 +868,8 @@@ static int cluster_pages_for_defrag(str
                return 0;
        file_end = (isize - 1) >> PAGE_CACHE_SHIFT;
  
-       mutex_lock(&inode->i_mutex);
        ret = btrfs_delalloc_reserve_space(inode,
                                           num_pages << PAGE_CACHE_SHIFT);
-       mutex_unlock(&inode->i_mutex);
        if (ret)
                return ret;
  again:
@@@ -1203,13 -1211,21 +1211,21 @@@ static noinline int btrfs_ioctl_resize(
        if (!capable(CAP_SYS_ADMIN))
                return -EPERM;
  
+       mutex_lock(&root->fs_info->volume_mutex);
+       if (root->fs_info->balance_ctl) {
+               printk(KERN_INFO "btrfs: balance in progress\n");
+               ret = -EINVAL;
+               goto out;
+       }
        vol_args = memdup_user(arg, sizeof(*vol_args));
-       if (IS_ERR(vol_args))
-               return PTR_ERR(vol_args);
+       if (IS_ERR(vol_args)) {
+               ret = PTR_ERR(vol_args);
+               goto out;
+       }
  
        vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
  
-       mutex_lock(&root->fs_info->volume_mutex);
        sizestr = vol_args->name;
        devstr = strchr(sizestr, ':');
        if (devstr) {
                printk(KERN_INFO "btrfs: resizer unable to find device %llu\n",
                       (unsigned long long)devid);
                ret = -EINVAL;
-               goto out_unlock;
+               goto out_free;
        }
        if (!strcmp(sizestr, "max"))
                new_size = device->bdev->bd_inode->i_size;
                new_size = memparse(sizestr, NULL);
                if (new_size == 0) {
                        ret = -EINVAL;
-                       goto out_unlock;
+                       goto out_free;
                }
        }
  
        if (mod < 0) {
                if (new_size > old_size) {
                        ret = -EINVAL;
-                       goto out_unlock;
+                       goto out_free;
                }
                new_size = old_size - new_size;
        } else if (mod > 0) {
  
        if (new_size < 256 * 1024 * 1024) {
                ret = -EINVAL;
-               goto out_unlock;
+               goto out_free;
        }
        if (new_size > device->bdev->bd_inode->i_size) {
                ret = -EFBIG;
-               goto out_unlock;
+               goto out_free;
        }
  
        do_div(new_size, root->sectorsize);
                trans = btrfs_start_transaction(root, 0);
                if (IS_ERR(trans)) {
                        ret = PTR_ERR(trans);
-                       goto out_unlock;
+                       goto out_free;
                }
                ret = btrfs_grow_device(trans, device, new_size);
                btrfs_commit_transaction(trans, root);
                ret = btrfs_shrink_device(device, new_size);
        }
  
- out_unlock:
-       mutex_unlock(&root->fs_info->volume_mutex);
+ out_free:
        kfree(vol_args);
+ out:
+       mutex_unlock(&root->fs_info->volume_mutex);
        return ret;
  }
  
@@@ -1855,7 -1872,7 +1872,7 @@@ static noinline int btrfs_ioctl_snap_de
                goto out;
        }
  
 -      err = mnt_want_write(file->f_path.mnt);
 +      err = mnt_want_write_file(file);
        if (err)
                goto out;
  
@@@ -1971,7 -1988,7 +1988,7 @@@ out_dput
        dput(dentry);
  out_unlock_dir:
        mutex_unlock(&dir->i_mutex);
 -      mnt_drop_write(file->f_path.mnt);
 +      mnt_drop_write_file(file);
  out:
        kfree(vol_args);
        return err;
@@@ -1987,7 -2004,7 +2004,7 @@@ static int btrfs_ioctl_defrag(struct fi
        if (btrfs_root_readonly(root))
                return -EROFS;
  
 -      ret = mnt_want_write(file->f_path.mnt);
 +      ret = mnt_want_write_file(file);
        if (ret)
                return ret;
  
                ret = -EINVAL;
        }
  out:
 -      mnt_drop_write(file->f_path.mnt);
 +      mnt_drop_write_file(file);
        return ret;
  }
  
@@@ -2052,14 -2069,25 +2069,25 @@@ static long btrfs_ioctl_add_dev(struct 
        if (!capable(CAP_SYS_ADMIN))
                return -EPERM;
  
+       mutex_lock(&root->fs_info->volume_mutex);
+       if (root->fs_info->balance_ctl) {
+               printk(KERN_INFO "btrfs: balance in progress\n");
+               ret = -EINVAL;
+               goto out;
+       }
        vol_args = memdup_user(arg, sizeof(*vol_args));
-       if (IS_ERR(vol_args))
-               return PTR_ERR(vol_args);
+       if (IS_ERR(vol_args)) {
+               ret = PTR_ERR(vol_args);
+               goto out;
+       }
  
        vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
        ret = btrfs_init_new_device(root, vol_args->name);
  
        kfree(vol_args);
+ out:
+       mutex_unlock(&root->fs_info->volume_mutex);
        return ret;
  }
  
@@@ -2074,14 -2102,25 +2102,25 @@@ static long btrfs_ioctl_rm_dev(struct b
        if (root->fs_info->sb->s_flags & MS_RDONLY)
                return -EROFS;
  
+       mutex_lock(&root->fs_info->volume_mutex);
+       if (root->fs_info->balance_ctl) {
+               printk(KERN_INFO "btrfs: balance in progress\n");
+               ret = -EINVAL;
+               goto out;
+       }
        vol_args = memdup_user(arg, sizeof(*vol_args));
-       if (IS_ERR(vol_args))
-               return PTR_ERR(vol_args);
+       if (IS_ERR(vol_args)) {
+               ret = PTR_ERR(vol_args);
+               goto out;
+       }
  
        vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
        ret = btrfs_rm_device(root, vol_args->name);
  
        kfree(vol_args);
+ out:
+       mutex_unlock(&root->fs_info->volume_mutex);
        return ret;
  }
  
@@@ -2195,7 -2234,7 +2234,7 @@@ static noinline long btrfs_ioctl_clone(
        if (btrfs_root_readonly(root))
                return -EROFS;
  
 -      ret = mnt_want_write(file->f_path.mnt);
 +      ret = mnt_want_write_file(file);
        if (ret)
                return ret;
  
                                                        disko, diskl, 0,
                                                        root->root_key.objectid,
                                                        btrfs_ino(inode),
-                                                       new_key.offset - datao);
+                                                       new_key.offset - datao,
+                                                       0);
                                        BUG_ON(ret);
                                }
                        } else if (type == BTRFS_FILE_EXTENT_INLINE) {
@@@ -2510,7 -2550,7 +2550,7 @@@ out_unlock
  out_fput:
        fput(src_file);
  out_drop_write:
 -      mnt_drop_write(file->f_path.mnt);
 +      mnt_drop_write_file(file);
        return ret;
  }
  
@@@ -2549,7 -2589,7 +2589,7 @@@ static long btrfs_ioctl_trans_start(str
        if (btrfs_root_readonly(root))
                goto out;
  
 -      ret = mnt_want_write(file->f_path.mnt);
 +      ret = mnt_want_write_file(file);
        if (ret)
                goto out;
  
  
  out_drop:
        atomic_dec(&root->fs_info->open_ioctl_trans);
 -      mnt_drop_write(file->f_path.mnt);
 +      mnt_drop_write_file(file);
  out:
        return ret;
  }
@@@ -2800,7 -2840,7 +2840,7 @@@ long btrfs_ioctl_trans_end(struct file 
  
        atomic_dec(&root->fs_info->open_ioctl_trans);
  
 -      mnt_drop_write(file->f_path.mnt);
 +      mnt_drop_write_file(file);
        return 0;
  }
  
@@@ -2977,7 -3017,7 +3017,7 @@@ static long btrfs_ioctl_logical_to_ino(
  {
        int ret = 0;
        int size;
-       u64 extent_offset;
+       u64 extent_item_pos;
        struct btrfs_ioctl_logical_ino_args *loi;
        struct btrfs_data_container *inodes = NULL;
        struct btrfs_path *path = NULL;
        }
  
        ret = extent_from_logical(root->fs_info, loi->logical, path, &key);
+       btrfs_release_path(path);
  
        if (ret & BTRFS_EXTENT_FLAG_TREE_BLOCK)
                ret = -ENOENT;
        if (ret < 0)
                goto out;
  
-       extent_offset = loi->logical - key.objectid;
+       extent_item_pos = loi->logical - key.objectid;
        ret = iterate_extent_inodes(root->fs_info, path, key.objectid,
-                                       extent_offset, build_ino_list, inodes);
+                                       extent_item_pos, build_ino_list,
+                                       inodes);
  
        if (ret < 0)
                goto out;
        return ret;
  }
  
+ void update_ioctl_balance_args(struct btrfs_fs_info *fs_info, int lock,
+                              struct btrfs_ioctl_balance_args *bargs)
+ {
+       struct btrfs_balance_control *bctl = fs_info->balance_ctl;
+       bargs->flags = bctl->flags;
+       if (atomic_read(&fs_info->balance_running))
+               bargs->state |= BTRFS_BALANCE_STATE_RUNNING;
+       if (atomic_read(&fs_info->balance_pause_req))
+               bargs->state |= BTRFS_BALANCE_STATE_PAUSE_REQ;
+       if (atomic_read(&fs_info->balance_cancel_req))
+               bargs->state |= BTRFS_BALANCE_STATE_CANCEL_REQ;
+       memcpy(&bargs->data, &bctl->data, sizeof(bargs->data));
+       memcpy(&bargs->meta, &bctl->meta, sizeof(bargs->meta));
+       memcpy(&bargs->sys, &bctl->sys, sizeof(bargs->sys));
+       if (lock) {
+               spin_lock(&fs_info->balance_lock);
+               memcpy(&bargs->stat, &bctl->stat, sizeof(bargs->stat));
+               spin_unlock(&fs_info->balance_lock);
+       } else {
+               memcpy(&bargs->stat, &bctl->stat, sizeof(bargs->stat));
+       }
+ }
+ static long btrfs_ioctl_balance(struct btrfs_root *root, void __user *arg)
+ {
+       struct btrfs_fs_info *fs_info = root->fs_info;
+       struct btrfs_ioctl_balance_args *bargs;
+       struct btrfs_balance_control *bctl;
+       int ret;
+       if (!capable(CAP_SYS_ADMIN))
+               return -EPERM;
+       if (fs_info->sb->s_flags & MS_RDONLY)
+               return -EROFS;
+       mutex_lock(&fs_info->volume_mutex);
+       mutex_lock(&fs_info->balance_mutex);
+       if (arg) {
+               bargs = memdup_user(arg, sizeof(*bargs));
+               if (IS_ERR(bargs)) {
+                       ret = PTR_ERR(bargs);
+                       goto out;
+               }
+               if (bargs->flags & BTRFS_BALANCE_RESUME) {
+                       if (!fs_info->balance_ctl) {
+                               ret = -ENOTCONN;
+                               goto out_bargs;
+                       }
+                       bctl = fs_info->balance_ctl;
+                       spin_lock(&fs_info->balance_lock);
+                       bctl->flags |= BTRFS_BALANCE_RESUME;
+                       spin_unlock(&fs_info->balance_lock);
+                       goto do_balance;
+               }
+       } else {
+               bargs = NULL;
+       }
+       if (fs_info->balance_ctl) {
+               ret = -EINPROGRESS;
+               goto out_bargs;
+       }
+       bctl = kzalloc(sizeof(*bctl), GFP_NOFS);
+       if (!bctl) {
+               ret = -ENOMEM;
+               goto out_bargs;
+       }
+       bctl->fs_info = fs_info;
+       if (arg) {
+               memcpy(&bctl->data, &bargs->data, sizeof(bctl->data));
+               memcpy(&bctl->meta, &bargs->meta, sizeof(bctl->meta));
+               memcpy(&bctl->sys, &bargs->sys, sizeof(bctl->sys));
+               bctl->flags = bargs->flags;
+       } else {
+               /* balance everything - no filters */
+               bctl->flags |= BTRFS_BALANCE_TYPE_MASK;
+       }
+ do_balance:
+       ret = btrfs_balance(bctl, bargs);
+       /*
+        * bctl is freed in __cancel_balance or in free_fs_info if
+        * restriper was paused all the way until unmount
+        */
+       if (arg) {
+               if (copy_to_user(arg, bargs, sizeof(*bargs)))
+                       ret = -EFAULT;
+       }
+ out_bargs:
+       kfree(bargs);
+ out:
+       mutex_unlock(&fs_info->balance_mutex);
+       mutex_unlock(&fs_info->volume_mutex);
+       return ret;
+ }
+ static long btrfs_ioctl_balance_ctl(struct btrfs_root *root, int cmd)
+ {
+       if (!capable(CAP_SYS_ADMIN))
+               return -EPERM;
+       switch (cmd) {
+       case BTRFS_BALANCE_CTL_PAUSE:
+               return btrfs_pause_balance(root->fs_info);
+       case BTRFS_BALANCE_CTL_CANCEL:
+               return btrfs_cancel_balance(root->fs_info);
+       }
+       return -EINVAL;
+ }
+ static long btrfs_ioctl_balance_progress(struct btrfs_root *root,
+                                        void __user *arg)
+ {
+       struct btrfs_fs_info *fs_info = root->fs_info;
+       struct btrfs_ioctl_balance_args *bargs;
+       int ret = 0;
+       if (!capable(CAP_SYS_ADMIN))
+               return -EPERM;
+       mutex_lock(&fs_info->balance_mutex);
+       if (!fs_info->balance_ctl) {
+               ret = -ENOTCONN;
+               goto out;
+       }
+       bargs = kzalloc(sizeof(*bargs), GFP_NOFS);
+       if (!bargs) {
+               ret = -ENOMEM;
+               goto out;
+       }
+       update_ioctl_balance_args(fs_info, 1, bargs);
+       if (copy_to_user(arg, bargs, sizeof(*bargs)))
+               ret = -EFAULT;
+       kfree(bargs);
+ out:
+       mutex_unlock(&fs_info->balance_mutex);
+       return ret;
+ }
  long btrfs_ioctl(struct file *file, unsigned int
                cmd, unsigned long arg)
  {
        case BTRFS_IOC_DEV_INFO:
                return btrfs_ioctl_dev_info(root, argp);
        case BTRFS_IOC_BALANCE:
-               return btrfs_balance(root->fs_info->dev_root);
+               return btrfs_ioctl_balance(root, NULL);
        case BTRFS_IOC_CLONE:
                return btrfs_ioctl_clone(file, arg, 0, 0, 0);
        case BTRFS_IOC_CLONE_RANGE:
                return btrfs_ioctl_scrub_cancel(root, argp);
        case BTRFS_IOC_SCRUB_PROGRESS:
                return btrfs_ioctl_scrub_progress(root, argp);
+       case BTRFS_IOC_BALANCE_V2:
+               return btrfs_ioctl_balance(root, argp);
+       case BTRFS_IOC_BALANCE_CTL:
+               return btrfs_ioctl_balance_ctl(root, arg);
+       case BTRFS_IOC_BALANCE_PROGRESS:
+               return btrfs_ioctl_balance_progress(root, argp);
        }
  
        return -ENOTTY;
diff --combined fs/btrfs/super.c
index ae488aa1966a1b32d6a3c4438424eb690740d4c9,61717a4eb14f78e3aee3902a38659a754637767d..8ffaaa8e3df80fff13a75b9d6443373624e70753
@@@ -40,6 -40,7 +40,6 @@@
  #include <linux/magic.h>
  #include <linux/slab.h>
  #include <linux/cleancache.h>
 -#include <linux/mnt_namespace.h>
  #include <linux/ratelimit.h>
  #include "compat.h"
  #include "delayed-inode.h"
@@@ -163,8 -164,11 +163,11 @@@ enum 
        Opt_compress_type, Opt_compress_force, Opt_compress_force_type,
        Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard,
        Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed,
-       Opt_enospc_debug, Opt_subvolrootid, Opt_defrag,
-       Opt_inode_cache, Opt_no_space_cache, Opt_recovery, Opt_err,
+       Opt_enospc_debug, Opt_subvolrootid, Opt_defrag, Opt_inode_cache,
+       Opt_no_space_cache, Opt_recovery, Opt_skip_balance,
+       Opt_check_integrity, Opt_check_integrity_including_extent_data,
+       Opt_check_integrity_print_mask,
+       Opt_err,
  };
  
  static match_table_t tokens = {
        {Opt_inode_cache, "inode_cache"},
        {Opt_no_space_cache, "nospace_cache"},
        {Opt_recovery, "recovery"},
+       {Opt_skip_balance, "skip_balance"},
+       {Opt_check_integrity, "check_int"},
+       {Opt_check_integrity_including_extent_data, "check_int_data"},
+       {Opt_check_integrity_print_mask, "check_int_print_mask=%d"},
        {Opt_err, NULL},
  };
  
@@@ -397,6 -405,40 +404,40 @@@ int btrfs_parse_options(struct btrfs_ro
                        printk(KERN_INFO "btrfs: enabling auto recovery");
                        btrfs_set_opt(info->mount_opt, RECOVERY);
                        break;
+               case Opt_skip_balance:
+                       btrfs_set_opt(info->mount_opt, SKIP_BALANCE);
+                       break;
+ #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
+               case Opt_check_integrity_including_extent_data:
+                       printk(KERN_INFO "btrfs: enabling check integrity"
+                              " including extent data\n");
+                       btrfs_set_opt(info->mount_opt,
+                                     CHECK_INTEGRITY_INCLUDING_EXTENT_DATA);
+                       btrfs_set_opt(info->mount_opt, CHECK_INTEGRITY);
+                       break;
+               case Opt_check_integrity:
+                       printk(KERN_INFO "btrfs: enabling check integrity\n");
+                       btrfs_set_opt(info->mount_opt, CHECK_INTEGRITY);
+                       break;
+               case Opt_check_integrity_print_mask:
+                       intarg = 0;
+                       match_int(&args[0], &intarg);
+                       if (intarg) {
+                               info->check_integrity_print_mask = intarg;
+                               printk(KERN_INFO "btrfs:"
+                                      " check_integrity_print_mask 0x%x\n",
+                                      info->check_integrity_print_mask);
+                       }
+                       break;
+ #else
+               case Opt_check_integrity_including_extent_data:
+               case Opt_check_integrity:
+               case Opt_check_integrity_print_mask:
+                       printk(KERN_ERR "btrfs: support for check_integrity*"
+                              " not compiled in!\n");
+                       ret = -EINVAL;
+                       goto out;
+ #endif
                case Opt_err:
                        printk(KERN_INFO "btrfs: unrecognized mount option "
                               "'%s'\n", p);
@@@ -661,9 -703,9 +702,9 @@@ int btrfs_sync_fs(struct super_block *s
        return ret;
  }
  
 -static int btrfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
 +static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
  {
 -      struct btrfs_root *root = btrfs_sb(vfs->mnt_sb);
 +      struct btrfs_root *root = btrfs_sb(dentry->d_sb);
        struct btrfs_fs_info *info = root->fs_info;
        char *compress_type;
  
                seq_puts(seq, ",autodefrag");
        if (btrfs_test_opt(root, INODE_MAP_CACHE))
                seq_puts(seq, ",inode_cache");
+       if (btrfs_test_opt(root, SKIP_BALANCE))
+               seq_puts(seq, ",skip_balance");
        return 0;
  }