]> asedeno.scripts.mit.edu Git - linux.git/blobdiff - fs/btrfs/inode.c
Btrfs: fix race between using extent maps and merging them
[linux.git] / fs / btrfs / inode.c
index 1f7a280481e42574c997e9d382eee58eb459ff36..5b3ec93ff911d7af07127fe3a1824024eedcf511 100644 (file)
@@ -2189,6 +2189,7 @@ int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end,
 /* see btrfs_writepage_start_hook for details on why this is required */
 struct btrfs_writepage_fixup {
        struct page *page;
+       struct inode *inode;
        struct btrfs_work work;
 };
 
@@ -2203,9 +2204,20 @@ static void btrfs_writepage_fixup_worker(struct btrfs_work *work)
        u64 page_start;
        u64 page_end;
        int ret = 0;
+       bool free_delalloc_space = true;
 
        fixup = container_of(work, struct btrfs_writepage_fixup, work);
        page = fixup->page;
+       inode = fixup->inode;
+       page_start = page_offset(page);
+       page_end = page_offset(page) + PAGE_SIZE - 1;
+
+       /*
+        * This is similar to page_mkwrite, we need to reserve the space before
+        * we take the page lock.
+        */
+       ret = btrfs_delalloc_reserve_space(inode, &data_reserved, page_start,
+                                          PAGE_SIZE);
 again:
        lock_page(page);
 
@@ -2214,25 +2226,48 @@ static void btrfs_writepage_fixup_worker(struct btrfs_work *work)
         * page->mapping may go NULL, but it shouldn't be moved to a different
         * address space.
         */
-       if (!page->mapping || !PageDirty(page) || !PageChecked(page))
+       if (!page->mapping || !PageDirty(page) || !PageChecked(page)) {
+               /*
+                * Unfortunately this is a little tricky, either
+                *
+                * 1) We got here and our page had already been dealt with and
+                *    we reserved our space, thus ret == 0, so we need to just
+                *    drop our space reservation and bail.  This can happen the
+                *    first time we come into the fixup worker, or could happen
+                *    while waiting for the ordered extent.
+                * 2) Our page was already dealt with, but we happened to get an
+                *    ENOSPC above from the btrfs_delalloc_reserve_space.  In
+                *    this case we obviously don't have anything to release, but
+                *    because the page was already dealt with we don't want to
+                *    mark the page with an error, so make sure we're resetting
+                *    ret to 0.  This is why we have this check _before_ the ret
+                *    check, because we do not want to have a surprise ENOSPC
+                *    when the page was already properly dealt with.
+                */
+               if (!ret) {
+                       btrfs_delalloc_release_extents(BTRFS_I(inode),
+                                                      PAGE_SIZE);
+                       btrfs_delalloc_release_space(inode, data_reserved,
+                                                    page_start, PAGE_SIZE,
+                                                    true);
+               }
+               ret = 0;
                goto out_page;
+       }
 
        /*
-        * We keep the PageChecked() bit set until we're done with the
-        * btrfs_start_ordered_extent() dance that we do below.  That drops and
-        * retakes the page lock, so we don't want new fixup workers queued for
-        * this page during the churn.
+        * We can't mess with the page state unless it is locked, so now that
+        * it is locked bail if we failed to make our space reservation.
         */
-       inode = page->mapping->host;
-       page_start = page_offset(page);
-       page_end = page_offset(page) + PAGE_SIZE - 1;
+       if (ret)
+               goto out_page;
 
        lock_extent_bits(&BTRFS_I(inode)->io_tree, page_start, page_end,
                         &cached_state);
 
        /* already ordered? We're done */
        if (PagePrivate2(page))
-               goto out;
+               goto out_reserved;
 
        ordered = btrfs_lookup_ordered_range(BTRFS_I(inode), page_start,
                                        PAGE_SIZE);
@@ -2245,11 +2280,6 @@ static void btrfs_writepage_fixup_worker(struct btrfs_work *work)
                goto again;
        }
 
-       ret = btrfs_delalloc_reserve_space(inode, &data_reserved, page_start,
-                                          PAGE_SIZE);
-       if (ret)
-               goto out;
-
        ret = btrfs_set_extent_delalloc(inode, page_start, page_end, 0,
                                        &cached_state);
        if (ret)
@@ -2263,12 +2293,12 @@ static void btrfs_writepage_fixup_worker(struct btrfs_work *work)
         * The page was dirty when we started, nothing should have cleaned it.
         */
        BUG_ON(!PageDirty(page));
+       free_delalloc_space = false;
 out_reserved:
        btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE);
-       if (ret)
+       if (free_delalloc_space)
                btrfs_delalloc_release_space(inode, data_reserved, page_start,
                                             PAGE_SIZE, true);
-out:
        unlock_extent_cached(&BTRFS_I(inode)->io_tree, page_start, page_end,
                             &cached_state);
 out_page:
@@ -2287,6 +2317,12 @@ static void btrfs_writepage_fixup_worker(struct btrfs_work *work)
        put_page(page);
        kfree(fixup);
        extent_changeset_free(data_reserved);
+       /*
+        * As a precaution, do a delayed iput in case it would be the last iput
+        * that could need flushing space. Recursing back to fixup worker would
+        * deadlock.
+        */
+       btrfs_add_delayed_iput(inode);
 }
 
 /*
@@ -2324,10 +2360,18 @@ int btrfs_writepage_cow_fixup(struct page *page, u64 start, u64 end)
        if (!fixup)
                return -EAGAIN;
 
+       /*
+        * We are already holding a reference to this inode from
+        * write_cache_pages.  We need to hold it because the space reservation
+        * takes place outside of the page lock, and we can't trust
+        * page->mapping outside of the page lock.
+        */
+       ihold(inode);
        SetPageChecked(page);
        get_page(page);
        btrfs_init_work(&fixup->work, btrfs_writepage_fixup_worker, NULL, NULL);
        fixup->page = page;
+       fixup->inode = inode;
        btrfs_queue_work(fs_info->fixup_workers, &fixup->work);
 
        return -EAGAIN;