]> asedeno.scripts.mit.edu Git - linux.git/blobdiff - fs/btrfs/volumes.c
Merge tag 'for-5.6-rc1-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux
[linux.git] / fs / btrfs / volumes.c
index 9b78e720c6973c6e141fff069e212cd388fd58f8..9cfc668f91f4398bee6c5fb621ff2f52ea7a9ddf 100644 (file)
@@ -30,6 +30,7 @@
 #include "tree-checker.h"
 #include "space-info.h"
 #include "block-group.h"
+#include "discard.h"
 
 const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
        [BTRFS_RAID_RAID10] = {
@@ -66,6 +67,7 @@ const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
                .tolerated_failures = 2,
                .devs_increment = 3,
                .ncopies        = 3,
+               .nparity        = 0,
                .raid_name      = "raid1c3",
                .bg_flag        = BTRFS_BLOCK_GROUP_RAID1C3,
                .mindev_error   = BTRFS_ERROR_DEV_RAID1C3_MIN_NOT_MET,
@@ -78,6 +80,7 @@ const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
                .tolerated_failures = 3,
                .devs_increment = 4,
                .ncopies        = 4,
+               .nparity        = 0,
                .raid_name      = "raid1c4",
                .bg_flag        = BTRFS_BLOCK_GROUP_RAID1C4,
                .mindev_error   = BTRFS_ERROR_DEV_RAID1C4_MIN_NOT_MET,
@@ -438,39 +441,6 @@ static noinline struct btrfs_fs_devices *find_fsid(
 
        ASSERT(fsid);
 
-       if (metadata_fsid) {
-               /*
-                * Handle scanned device having completed its fsid change but
-                * belonging to a fs_devices that was created by first scanning
-                * a device which didn't have its fsid/metadata_uuid changed
-                * at all and the CHANGING_FSID_V2 flag set.
-                */
-               list_for_each_entry(fs_devices, &fs_uuids, fs_list) {
-                       if (fs_devices->fsid_change &&
-                           memcmp(metadata_fsid, fs_devices->fsid,
-                                  BTRFS_FSID_SIZE) == 0 &&
-                           memcmp(fs_devices->fsid, fs_devices->metadata_uuid,
-                                  BTRFS_FSID_SIZE) == 0) {
-                               return fs_devices;
-                       }
-               }
-               /*
-                * Handle scanned device having completed its fsid change but
-                * belonging to a fs_devices that was created by a device that
-                * has an outdated pair of fsid/metadata_uuid and
-                * CHANGING_FSID_V2 flag set.
-                */
-               list_for_each_entry(fs_devices, &fs_uuids, fs_list) {
-                       if (fs_devices->fsid_change &&
-                           memcmp(fs_devices->metadata_uuid,
-                                  fs_devices->fsid, BTRFS_FSID_SIZE) != 0 &&
-                           memcmp(metadata_fsid, fs_devices->metadata_uuid,
-                                  BTRFS_FSID_SIZE) == 0) {
-                               return fs_devices;
-                       }
-               }
-       }
-
        /* Handle non-split brain cases */
        list_for_each_entry(fs_devices, &fs_uuids, fs_list) {
                if (metadata_fsid) {
@@ -486,6 +456,47 @@ static noinline struct btrfs_fs_devices *find_fsid(
        return NULL;
 }
 
+static struct btrfs_fs_devices *find_fsid_with_metadata_uuid(
+                               struct btrfs_super_block *disk_super)
+{
+
+       struct btrfs_fs_devices *fs_devices;
+
+       /*
+        * Handle scanned device having completed its fsid change but
+        * belonging to a fs_devices that was created by first scanning
+        * a device which didn't have its fsid/metadata_uuid changed
+        * at all and the CHANGING_FSID_V2 flag set.
+        */
+       list_for_each_entry(fs_devices, &fs_uuids, fs_list) {
+               if (fs_devices->fsid_change &&
+                   memcmp(disk_super->metadata_uuid, fs_devices->fsid,
+                          BTRFS_FSID_SIZE) == 0 &&
+                   memcmp(fs_devices->fsid, fs_devices->metadata_uuid,
+                          BTRFS_FSID_SIZE) == 0) {
+                       return fs_devices;
+               }
+       }
+       /*
+        * Handle scanned device having completed its fsid change but
+        * belonging to a fs_devices that was created by a device that
+        * has an outdated pair of fsid/metadata_uuid and
+        * CHANGING_FSID_V2 flag set.
+        */
+       list_for_each_entry(fs_devices, &fs_uuids, fs_list) {
+               if (fs_devices->fsid_change &&
+                   memcmp(fs_devices->metadata_uuid,
+                          fs_devices->fsid, BTRFS_FSID_SIZE) != 0 &&
+                   memcmp(disk_super->metadata_uuid, fs_devices->metadata_uuid,
+                          BTRFS_FSID_SIZE) == 0) {
+                       return fs_devices;
+               }
+       }
+
+       return find_fsid(disk_super->fsid, disk_super->metadata_uuid);
+}
+
+
 static int
 btrfs_get_bdev_and_sb(const char *device_path, fmode_t flags, void *holder,
                      int flush, struct block_device **bdev,
@@ -669,7 +680,9 @@ static int btrfs_open_one_device(struct btrfs_fs_devices *fs_devices,
 
 /*
  * Handle scanned device having its CHANGING_FSID_V2 flag set and the fs_devices
- * being created with a disk that has already completed its fsid change.
+ * being created with a disk that has already completed its fsid change. Such
+ * disk can belong to an fs which has its FSID changed or to one which doesn't.
+ * Handle both cases here.
  */
 static struct btrfs_fs_devices *find_fsid_inprogress(
                                        struct btrfs_super_block *disk_super)
@@ -685,7 +698,7 @@ static struct btrfs_fs_devices *find_fsid_inprogress(
                }
        }
 
-       return NULL;
+       return find_fsid(disk_super->fsid, NULL);
 }
 
 
@@ -697,17 +710,54 @@ static struct btrfs_fs_devices *find_fsid_changed(
        /*
         * Handles the case where scanned device is part of an fs that had
         * multiple successful changes of FSID but curently device didn't
-        * observe it. Meaning our fsid will be different than theirs.
+        * observe it. Meaning our fsid will be different than theirs. We need
+        * to handle two subcases :
+        *  1 - The fs still continues to have different METADATA/FSID uuids.
+        *  2 - The fs is switched back to its original FSID (METADATA/FSID
+        *  are equal).
         */
        list_for_each_entry(fs_devices, &fs_uuids, fs_list) {
+               /* Changed UUIDs */
                if (memcmp(fs_devices->metadata_uuid, fs_devices->fsid,
                           BTRFS_FSID_SIZE) != 0 &&
                    memcmp(fs_devices->metadata_uuid, disk_super->metadata_uuid,
                           BTRFS_FSID_SIZE) == 0 &&
                    memcmp(fs_devices->fsid, disk_super->fsid,
-                          BTRFS_FSID_SIZE) != 0) {
+                          BTRFS_FSID_SIZE) != 0)
+                       return fs_devices;
+
+               /* Unchanged UUIDs */
+               if (memcmp(fs_devices->metadata_uuid, fs_devices->fsid,
+                          BTRFS_FSID_SIZE) == 0 &&
+                   memcmp(fs_devices->fsid, disk_super->metadata_uuid,
+                          BTRFS_FSID_SIZE) == 0)
+                       return fs_devices;
+       }
+
+       return NULL;
+}
+
+static struct btrfs_fs_devices *find_fsid_reverted_metadata(
+                               struct btrfs_super_block *disk_super)
+{
+       struct btrfs_fs_devices *fs_devices;
+
+       /*
+        * Handle the case where the scanned device is part of an fs whose last
+        * metadata UUID change reverted it to the original FSID. At the same
+        * time * fs_devices was first created by another constitutent device
+        * which didn't fully observe the operation. This results in an
+        * btrfs_fs_devices created with metadata/fsid different AND
+        * btrfs_fs_devices::fsid_change set AND the metadata_uuid of the
+        * fs_devices equal to the FSID of the disk.
+        */
+       list_for_each_entry(fs_devices, &fs_uuids, fs_list) {
+               if (memcmp(fs_devices->fsid, fs_devices->metadata_uuid,
+                          BTRFS_FSID_SIZE) != 0 &&
+                   memcmp(fs_devices->metadata_uuid, disk_super->fsid,
+                          BTRFS_FSID_SIZE) == 0 &&
+                   fs_devices->fsid_change)
                        return fs_devices;
-               }
        }
 
        return NULL;
@@ -734,24 +784,16 @@ static noinline struct btrfs_device *device_list_add(const char *path,
                                        BTRFS_SUPER_FLAG_CHANGING_FSID_V2);
 
        if (fsid_change_in_progress) {
-               if (!has_metadata_uuid) {
-                       /*
-                        * When we have an image which has CHANGING_FSID_V2 set
-                        * it might belong to either a filesystem which has
-                        * disks with completed fsid change or it might belong
-                        * to fs with no UUID changes in effect, handle both.
-                        */
+               if (!has_metadata_uuid)
                        fs_devices = find_fsid_inprogress(disk_super);
-                       if (!fs_devices)
-                               fs_devices = find_fsid(disk_super->fsid, NULL);
-               } else {
+               else
                        fs_devices = find_fsid_changed(disk_super);
-               }
        } else if (has_metadata_uuid) {
-               fs_devices = find_fsid(disk_super->fsid,
-                                      disk_super->metadata_uuid);
+               fs_devices = find_fsid_with_metadata_uuid(disk_super);
        } else {
-               fs_devices = find_fsid(disk_super->fsid, NULL);
+               fs_devices = find_fsid_reverted_metadata(disk_super);
+               if (!fs_devices)
+                       fs_devices = find_fsid(disk_super->fsid, NULL);
        }
 
 
@@ -781,12 +823,18 @@ static noinline struct btrfs_device *device_list_add(const char *path,
                 * a device which had the CHANGING_FSID_V2 flag then replace the
                 * metadata_uuid/fsid values of the fs_devices.
                 */
-               if (has_metadata_uuid && fs_devices->fsid_change &&
+               if (fs_devices->fsid_change &&
                    found_transid > fs_devices->latest_generation) {
                        memcpy(fs_devices->fsid, disk_super->fsid,
                                        BTRFS_FSID_SIZE);
-                       memcpy(fs_devices->metadata_uuid,
-                                       disk_super->metadata_uuid, BTRFS_FSID_SIZE);
+
+                       if (has_metadata_uuid)
+                               memcpy(fs_devices->metadata_uuid,
+                                      disk_super->metadata_uuid,
+                                      BTRFS_FSID_SIZE);
+                       else
+                               memcpy(fs_devices->metadata_uuid,
+                                      disk_super->fsid, BTRFS_FSID_SIZE);
 
                        fs_devices->fsid_change = false;
                }
@@ -1064,11 +1112,6 @@ static void btrfs_close_bdev(struct btrfs_device *device)
 static void btrfs_close_one_device(struct btrfs_device *device)
 {
        struct btrfs_fs_devices *fs_devices = device->fs_devices;
-       struct btrfs_device *new_device;
-       struct rcu_string *name;
-
-       if (device->bdev)
-               fs_devices->open_devices--;
 
        if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state) &&
            device->devid != BTRFS_DEV_REPLACE_DEVID) {
@@ -1080,23 +1123,22 @@ static void btrfs_close_one_device(struct btrfs_device *device)
                fs_devices->missing_devices--;
 
        btrfs_close_bdev(device);
-
-       new_device = btrfs_alloc_device(NULL, &device->devid,
-                                       device->uuid);
-       BUG_ON(IS_ERR(new_device)); /* -ENOMEM */
-
-       /* Safe because we are under uuid_mutex */
-       if (device->name) {
-               name = rcu_string_strdup(device->name->str, GFP_NOFS);
-               BUG_ON(!name); /* -ENOMEM */
-               rcu_assign_pointer(new_device->name, name);
+       if (device->bdev) {
+               fs_devices->open_devices--;
+               device->bdev = NULL;
        }
+       clear_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
 
-       list_replace_rcu(&device->dev_list, &new_device->dev_list);
-       new_device->fs_devices = device->fs_devices;
+       device->fs_info = NULL;
+       atomic_set(&device->dev_stats_ccnt, 0);
+       extent_io_tree_release(&device->alloc_state);
 
-       synchronize_rcu();
-       btrfs_free_device(device);
+       /* Verify the device is back in a pristine state  */
+       ASSERT(!test_bit(BTRFS_DEV_STATE_FLUSH_SENT, &device->dev_state));
+       ASSERT(!test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state));
+       ASSERT(list_empty(&device->dev_alloc_list));
+       ASSERT(list_empty(&device->post_commit_list));
+       ASSERT(atomic_read(&device->reada_in_flight) == 0);
 }
 
 static int close_fs_devices(struct btrfs_fs_devices *fs_devices)
@@ -2130,7 +2172,6 @@ void btrfs_destroy_dev_replace_tgtdev(struct btrfs_device *tgtdev)
 {
        struct btrfs_fs_devices *fs_devices = tgtdev->fs_info->fs_devices;
 
-       WARN_ON(!tgtdev);
        mutex_lock(&fs_devices->device_list_mutex);
 
        btrfs_sysfs_rm_device_link(fs_devices, tgtdev);
@@ -2875,6 +2916,7 @@ static int btrfs_relocate_chunk(struct btrfs_fs_info *fs_info, u64 chunk_offset)
 {
        struct btrfs_root *root = fs_info->chunk_root;
        struct btrfs_trans_handle *trans;
+       struct btrfs_block_group *block_group;
        int ret;
 
        /*
@@ -2898,6 +2940,12 @@ static int btrfs_relocate_chunk(struct btrfs_fs_info *fs_info, u64 chunk_offset)
        if (ret)
                return ret;
 
+       block_group = btrfs_lookup_block_group(fs_info, chunk_offset);
+       if (!block_group)
+               return -ENOENT;
+       btrfs_discard_cancel_work(&fs_info->discard_ctl, block_group);
+       btrfs_put_block_group(block_group);
+
        trans = btrfs_start_trans_remove_block_group(root->fs_info,
                                                     chunk_offset);
        if (IS_ERR(trans)) {
@@ -6111,75 +6159,6 @@ int btrfs_map_sblock(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
        return __btrfs_map_block(fs_info, op, logical, length, bbio_ret, 0, 1);
 }
 
-int btrfs_rmap_block(struct btrfs_fs_info *fs_info, u64 chunk_start,
-                    u64 physical, u64 **logical, int *naddrs, int *stripe_len)
-{
-       struct extent_map *em;
-       struct map_lookup *map;
-       u64 *buf;
-       u64 bytenr;
-       u64 length;
-       u64 stripe_nr;
-       u64 rmap_len;
-       int i, j, nr = 0;
-
-       em = btrfs_get_chunk_map(fs_info, chunk_start, 1);
-       if (IS_ERR(em))
-               return -EIO;
-
-       map = em->map_lookup;
-       length = em->len;
-       rmap_len = map->stripe_len;
-
-       if (map->type & BTRFS_BLOCK_GROUP_RAID10)
-               length = div_u64(length, map->num_stripes / map->sub_stripes);
-       else if (map->type & BTRFS_BLOCK_GROUP_RAID0)
-               length = div_u64(length, map->num_stripes);
-       else if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
-               length = div_u64(length, nr_data_stripes(map));
-               rmap_len = map->stripe_len * nr_data_stripes(map);
-       }
-
-       buf = kcalloc(map->num_stripes, sizeof(u64), GFP_NOFS);
-       BUG_ON(!buf); /* -ENOMEM */
-
-       for (i = 0; i < map->num_stripes; i++) {
-               if (map->stripes[i].physical > physical ||
-                   map->stripes[i].physical + length <= physical)
-                       continue;
-
-               stripe_nr = physical - map->stripes[i].physical;
-               stripe_nr = div64_u64(stripe_nr, map->stripe_len);
-
-               if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
-                       stripe_nr = stripe_nr * map->num_stripes + i;
-                       stripe_nr = div_u64(stripe_nr, map->sub_stripes);
-               } else if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
-                       stripe_nr = stripe_nr * map->num_stripes + i;
-               } /* else if RAID[56], multiply by nr_data_stripes().
-                  * Alternatively, just use rmap_len below instead of
-                  * map->stripe_len */
-
-               bytenr = chunk_start + stripe_nr * rmap_len;
-               WARN_ON(nr >= map->num_stripes);
-               for (j = 0; j < nr; j++) {
-                       if (buf[j] == bytenr)
-                               break;
-               }
-               if (j == nr) {
-                       WARN_ON(nr >= map->num_stripes);
-                       buf[nr++] = bytenr;
-               }
-       }
-
-       *logical = buf;
-       *naddrs = nr;
-       *stripe_len = rmap_len;
-
-       free_extent_map(em);
-       return 0;
-}
-
 static inline void btrfs_end_bbio(struct btrfs_bio *bbio, struct bio *bio)
 {
        bio->bi_private = bbio->private;
@@ -6480,19 +6459,14 @@ static u64 calc_stripe_length(u64 type, u64 chunk_len, int num_stripes)
 {
        int index = btrfs_bg_flags_to_raid_index(type);
        int ncopies = btrfs_raid_array[index].ncopies;
+       const int nparity = btrfs_raid_array[index].nparity;
        int data_stripes;
 
-       switch (type & BTRFS_BLOCK_GROUP_PROFILE_MASK) {
-       case BTRFS_BLOCK_GROUP_RAID5:
-               data_stripes = num_stripes - 1;
-               break;
-       case BTRFS_BLOCK_GROUP_RAID6:
-               data_stripes = num_stripes - 2;
-               break;
-       default:
+       if (nparity)
+               data_stripes = num_stripes - nparity;
+       else
                data_stripes = num_stripes / ncopies;
-               break;
-       }
+
        return div_u64(chunk_len, data_stripes);
 }
 
@@ -7331,6 +7305,8 @@ int btrfs_get_dev_stats(struct btrfs_fs_info *fs_info,
                        else
                                btrfs_dev_stat_set(dev, i, 0);
                }
+               btrfs_info(fs_info, "device stats zeroed by %s (%d)",
+                          current->comm, task_pid_nr(current));
        } else {
                for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++)
                        if (stats->nr_items > i)