]> asedeno.scripts.mit.edu Git - linux.git/blobdiff - drivers/md/md.c
Merge tag 'vfio-v4.20-rc1.v2' of git://github.com/awilliam/linux-vfio
[linux.git] / drivers / md / md.c
index 29b0cd9ec951ee4603279656e7148ba2a5b8d763..fc488cb30a94780d8a4fee0674d718aa901b047f 100644 (file)
@@ -204,10 +204,6 @@ static int start_readonly;
  */
 static bool create_on_open = true;
 
-/* bio_clone_mddev
- * like bio_clone_bioset, but with a local bio set
- */
-
 struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs,
                            struct mddev *mddev)
 {
@@ -335,6 +331,7 @@ EXPORT_SYMBOL(md_handle_request);
 static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio)
 {
        const int rw = bio_data_dir(bio);
+       const int sgrp = op_stat_group(bio_op(bio));
        struct mddev *mddev = q->queuedata;
        unsigned int sectors;
        int cpu;
@@ -363,8 +360,8 @@ static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio)
        md_handle_request(mddev, bio);
 
        cpu = part_stat_lock();
-       part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]);
-       part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw], sectors);
+       part_stat_inc(cpu, &mddev->gendisk->part0, ios[sgrp]);
+       part_stat_add(cpu, &mddev->gendisk->part0, sectors[sgrp], sectors);
        part_stat_unlock();
 
        return BLK_QC_T_NONE;
@@ -455,10 +452,11 @@ static void md_end_flush(struct bio *fbio)
        rdev_dec_pending(rdev, mddev);
 
        if (atomic_dec_and_test(&fi->flush_pending)) {
-               if (bio->bi_iter.bi_size == 0)
+               if (bio->bi_iter.bi_size == 0) {
                        /* an empty barrier - all done */
                        bio_endio(bio);
-               else {
+                       mempool_free(fi, mddev->flush_pool);
+               } else {
                        INIT_WORK(&fi->flush_work, submit_flushes);
                        queue_work(md_wq, &fi->flush_work);
                }
@@ -512,10 +510,11 @@ void md_flush_request(struct mddev *mddev, struct bio *bio)
        rcu_read_unlock();
 
        if (atomic_dec_and_test(&fi->flush_pending)) {
-               if (bio->bi_iter.bi_size == 0)
+               if (bio->bi_iter.bi_size == 0) {
                        /* an empty barrier - all done */
                        bio_endio(bio);
-               else {
+                       mempool_free(fi, mddev->flush_pool);
+               } else {
                        INIT_WORK(&fi->flush_work, submit_flushes);
                        queue_work(md_wq, &fi->flush_work);
                }
@@ -2571,7 +2570,7 @@ void md_update_sb(struct mddev *mddev, int force_change)
        if (mddev->queue)
                blk_add_trace_msg(mddev->queue, "md md_update_sb");
 rewrite:
-       bitmap_update_sb(mddev->bitmap);
+       md_bitmap_update_sb(mddev->bitmap);
        rdev_for_each(rdev, mddev) {
                char b[BDEVNAME_SIZE];
 
@@ -4384,10 +4383,10 @@ bitmap_store(struct mddev *mddev, const char *buf, size_t len)
                        if (buf == end) break;
                }
                if (*end && !isspace(*end)) break;
-               bitmap_dirty_bits(mddev->bitmap, chunk, end_chunk);
+               md_bitmap_dirty_bits(mddev->bitmap, chunk, end_chunk);
                buf = skip_spaces(end);
        }
-       bitmap_unplug(mddev->bitmap); /* flush the bits to disk */
+       md_bitmap_unplug(mddev->bitmap); /* flush the bits to disk */
 out:
        mddev_unlock(mddev);
        return len;
@@ -5547,7 +5546,8 @@ int md_run(struct mddev *mddev)
                else
                        pr_warn("md: personality for level %s is not loaded!\n",
                                mddev->clevel);
-               return -EINVAL;
+               err = -EINVAL;
+               goto abort;
        }
        spin_unlock(&pers_lock);
        if (mddev->level != pers->level) {
@@ -5560,7 +5560,8 @@ int md_run(struct mddev *mddev)
            pers->start_reshape == NULL) {
                /* This personality cannot handle reshaping... */
                module_put(pers->owner);
-               return -EINVAL;
+               err = -EINVAL;
+               goto abort;
        }
 
        if (pers->sync_request) {
@@ -5613,7 +5614,7 @@ int md_run(struct mddev *mddev)
            (mddev->bitmap_info.file || mddev->bitmap_info.offset)) {
                struct bitmap *bitmap;
 
-               bitmap = bitmap_create(mddev, -1);
+               bitmap = md_bitmap_create(mddev, -1);
                if (IS_ERR(bitmap)) {
                        err = PTR_ERR(bitmap);
                        pr_warn("%s: failed to create bitmap (%d)\n",
@@ -5628,8 +5629,8 @@ int md_run(struct mddev *mddev)
                        pers->free(mddev, mddev->private);
                mddev->private = NULL;
                module_put(pers->owner);
-               bitmap_destroy(mddev);
-               return err;
+               md_bitmap_destroy(mddev);
+               goto abort;
        }
        if (mddev->queue) {
                bool nonrot = true;
@@ -5713,9 +5714,9 @@ static int do_md_run(struct mddev *mddev)
        err = md_run(mddev);
        if (err)
                goto out;
-       err = bitmap_load(mddev);
+       err = md_bitmap_load(mddev);
        if (err) {
-               bitmap_destroy(mddev);
+               md_bitmap_destroy(mddev);
                goto out;
        }
 
@@ -5857,7 +5858,7 @@ static void __md_stop_writes(struct mddev *mddev)
                mddev->pers->quiesce(mddev, 1);
                mddev->pers->quiesce(mddev, 0);
        }
-       bitmap_flush(mddev);
+       md_bitmap_flush(mddev);
 
        if (mddev->ro == 0 &&
            ((!mddev->in_sync && !mddev_is_clustered(mddev)) ||
@@ -5879,7 +5880,7 @@ EXPORT_SYMBOL_GPL(md_stop_writes);
 
 static void mddev_detach(struct mddev *mddev)
 {
-       bitmap_wait_behind_writes(mddev);
+       md_bitmap_wait_behind_writes(mddev);
        if (mddev->pers && mddev->pers->quiesce) {
                mddev->pers->quiesce(mddev, 1);
                mddev->pers->quiesce(mddev, 0);
@@ -5892,7 +5893,7 @@ static void mddev_detach(struct mddev *mddev)
 static void __md_stop(struct mddev *mddev)
 {
        struct md_personality *pers = mddev->pers;
-       bitmap_destroy(mddev);
+       md_bitmap_destroy(mddev);
        mddev_detach(mddev);
        /* Ensure ->event_work is done */
        flush_workqueue(md_misc_wq);
@@ -5905,14 +5906,6 @@ static void __md_stop(struct mddev *mddev)
                mddev->to_remove = &md_redundancy_group;
        module_put(pers->owner);
        clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
-}
-
-void md_stop(struct mddev *mddev)
-{
-       /* stop the array and free an attached data structures.
-        * This is called from dm-raid
-        */
-       __md_stop(mddev);
        if (mddev->flush_bio_pool) {
                mempool_destroy(mddev->flush_bio_pool);
                mddev->flush_bio_pool = NULL;
@@ -5921,6 +5914,14 @@ void md_stop(struct mddev *mddev)
                mempool_destroy(mddev->flush_pool);
                mddev->flush_pool = NULL;
        }
+}
+
+void md_stop(struct mddev *mddev)
+{
+       /* stop the array and free an attached data structures.
+        * This is called from dm-raid
+        */
+       __md_stop(mddev);
        bioset_exit(&mddev->bio_set);
        bioset_exit(&mddev->sync_set);
 }
@@ -6711,21 +6712,21 @@ static int set_bitmap_file(struct mddev *mddev, int fd)
                if (fd >= 0) {
                        struct bitmap *bitmap;
 
-                       bitmap = bitmap_create(mddev, -1);
+                       bitmap = md_bitmap_create(mddev, -1);
                        mddev_suspend(mddev);
                        if (!IS_ERR(bitmap)) {
                                mddev->bitmap = bitmap;
-                               err = bitmap_load(mddev);
+                               err = md_bitmap_load(mddev);
                        } else
                                err = PTR_ERR(bitmap);
                        if (err) {
-                               bitmap_destroy(mddev);
+                               md_bitmap_destroy(mddev);
                                fd = -1;
                        }
                        mddev_resume(mddev);
                } else if (fd < 0) {
                        mddev_suspend(mddev);
-                       bitmap_destroy(mddev);
+                       md_bitmap_destroy(mddev);
                        mddev_resume(mddev);
                }
        }
@@ -7011,15 +7012,15 @@ static int update_array_info(struct mddev *mddev, mdu_array_info_t *info)
                                mddev->bitmap_info.default_offset;
                        mddev->bitmap_info.space =
                                mddev->bitmap_info.default_space;
-                       bitmap = bitmap_create(mddev, -1);
+                       bitmap = md_bitmap_create(mddev, -1);
                        mddev_suspend(mddev);
                        if (!IS_ERR(bitmap)) {
                                mddev->bitmap = bitmap;
-                               rv = bitmap_load(mddev);
+                               rv = md_bitmap_load(mddev);
                        } else
                                rv = PTR_ERR(bitmap);
                        if (rv)
-                               bitmap_destroy(mddev);
+                               md_bitmap_destroy(mddev);
                        mddev_resume(mddev);
                } else {
                        /* remove the bitmap */
@@ -7044,7 +7045,7 @@ static int update_array_info(struct mddev *mddev, mdu_array_info_t *info)
                                md_cluster_ops->leave(mddev);
                        }
                        mddev_suspend(mddev);
-                       bitmap_destroy(mddev);
+                       md_bitmap_destroy(mddev);
                        mddev_resume(mddev);
                        mddev->bitmap_info.offset = 0;
                }
@@ -7678,6 +7679,23 @@ static int status_resync(struct seq_file *seq, struct mddev *mddev)
                resync -= atomic_read(&mddev->recovery_active);
 
        if (resync == 0) {
+               if (test_bit(MD_RESYNCING_REMOTE, &mddev->recovery)) {
+                       struct md_rdev *rdev;
+
+                       rdev_for_each(rdev, mddev)
+                               if (rdev->raid_disk >= 0 &&
+                                   !test_bit(Faulty, &rdev->flags) &&
+                                   rdev->recovery_offset != MaxSector &&
+                                   rdev->recovery_offset) {
+                                       seq_printf(seq, "\trecover=REMOTE");
+                                       return 1;
+                               }
+                       if (mddev->reshape_position != MaxSector)
+                               seq_printf(seq, "\treshape=REMOTE");
+                       else
+                               seq_printf(seq, "\tresync=REMOTE");
+                       return 1;
+               }
                if (mddev->recovery_cp < MaxSector) {
                        seq_printf(seq, "\tresync=PENDING");
                        return 1;
@@ -7907,7 +7925,7 @@ static int md_seq_show(struct seq_file *seq, void *v)
                } else
                        seq_printf(seq, "\n       ");
 
-               bitmap_status(seq, mddev->bitmap);
+               md_bitmap_status(seq, mddev->bitmap);
 
                seq_printf(seq, "\n");
        }
@@ -8044,8 +8062,7 @@ static int is_mddev_idle(struct mddev *mddev, int init)
        rcu_read_lock();
        rdev_for_each_rcu(rdev, mddev) {
                struct gendisk *disk = rdev->bdev->bd_contains->bd_disk;
-               curr_events = (int)part_stat_read(&disk->part0, sectors[0]) +
-                             (int)part_stat_read(&disk->part0, sectors[1]) -
+               curr_events = (int)part_stat_read_accum(&disk->part0, sectors) -
                              atomic_read(&disk->sync_io);
                /* sync IO will cause sync_io to increase before the disk_stats
                 * as sync_io is counted when a request starts, and
@@ -8355,9 +8372,17 @@ void md_do_sync(struct md_thread *thread)
                else if (!mddev->bitmap)
                        j = mddev->recovery_cp;
 
-       } else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
+       } else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) {
                max_sectors = mddev->resync_max_sectors;
-       else {
+               /*
+                * If the original node aborts reshaping then we continue the
+                * reshaping, so set j again to avoid restart reshape from the
+                * first beginning
+                */
+               if (mddev_is_clustered(mddev) &&
+                   mddev->reshape_position != MaxSector)
+                       j = mddev->reshape_position;
+       } else {
                /* recovery follows the physical size of devices */
                max_sectors = mddev->dev_sectors;
                j = MaxSector;
@@ -8608,8 +8633,10 @@ void md_do_sync(struct md_thread *thread)
                mddev_lock_nointr(mddev);
                md_set_array_sectors(mddev, mddev->pers->size(mddev, 0, 0));
                mddev_unlock(mddev);
-               set_capacity(mddev->gendisk, mddev->array_sectors);
-               revalidate_disk(mddev->gendisk);
+               if (!mddev_is_clustered(mddev)) {
+                       set_capacity(mddev->gendisk, mddev->array_sectors);
+                       revalidate_disk(mddev->gendisk);
+               }
        }
 
        spin_lock(&mddev->lock);
@@ -8775,11 +8802,23 @@ static void md_start_sync(struct work_struct *ws)
  */
 void md_check_recovery(struct mddev *mddev)
 {
+       if (test_bit(MD_ALLOW_SB_UPDATE, &mddev->flags) && mddev->sb_flags) {
+               /* Write superblock - thread that called mddev_suspend()
+                * holds reconfig_mutex for us.
+                */
+               set_bit(MD_UPDATING_SB, &mddev->flags);
+               smp_mb__after_atomic();
+               if (test_bit(MD_ALLOW_SB_UPDATE, &mddev->flags))
+                       md_update_sb(mddev, 0);
+               clear_bit_unlock(MD_UPDATING_SB, &mddev->flags);
+               wake_up(&mddev->sb_wait);
+       }
+
        if (mddev->suspended)
                return;
 
        if (mddev->bitmap)
-               bitmap_daemon_work(mddev);
+               md_bitmap_daemon_work(mddev);
 
        if (signal_pending(current)) {
                if (mddev->pers->sync_request && !mddev->external) {
@@ -8916,7 +8955,7 @@ void md_check_recovery(struct mddev *mddev)
                                 * which has the bitmap stored on all devices.
                                 * So make sure all bitmap pages get written
                                 */
-                               bitmap_write_all(mddev->bitmap);
+                               md_bitmap_write_all(mddev->bitmap);
                        }
                        INIT_WORK(&mddev->del_work, md_start_sync);
                        queue_work(md_misc_wq, &mddev->del_work);
@@ -8934,16 +8973,6 @@ void md_check_recovery(struct mddev *mddev)
        unlock:
                wake_up(&mddev->sb_wait);
                mddev_unlock(mddev);
-       } else if (test_bit(MD_ALLOW_SB_UPDATE, &mddev->flags) && mddev->sb_flags) {
-               /* Write superblock - thread that called mddev_suspend()
-                * holds reconfig_mutex for us.
-                */
-               set_bit(MD_UPDATING_SB, &mddev->flags);
-               smp_mb__after_atomic();
-               if (test_bit(MD_ALLOW_SB_UPDATE, &mddev->flags))
-                       md_update_sb(mddev, 0);
-               clear_bit_unlock(MD_UPDATING_SB, &mddev->flags);
-               wake_up(&mddev->sb_wait);
        }
 }
 EXPORT_SYMBOL(md_check_recovery);
@@ -8951,6 +8980,8 @@ EXPORT_SYMBOL(md_check_recovery);
 void md_reap_sync_thread(struct mddev *mddev)
 {
        struct md_rdev *rdev;
+       sector_t old_dev_sectors = mddev->dev_sectors;
+       bool is_reshaped = false;
 
        /* resync has finished, collect result */
        md_unregister_thread(&mddev->sync_thread);
@@ -8965,8 +8996,11 @@ void md_reap_sync_thread(struct mddev *mddev)
                }
        }
        if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
-           mddev->pers->finish_reshape)
+           mddev->pers->finish_reshape) {
                mddev->pers->finish_reshape(mddev);
+               if (mddev_is_clustered(mddev))
+                       is_reshaped = true;
+       }
 
        /* If array is no-longer degraded, then any saved_raid_disk
         * information must be scrapped.
@@ -8987,6 +9021,14 @@ void md_reap_sync_thread(struct mddev *mddev)
        clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
        clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
        clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
+       /*
+        * We call md_cluster_ops->update_size here because sync_size could
+        * be changed by md_update_sb, and MD_RECOVERY_RESHAPE is cleared,
+        * so it is time to update size across cluster.
+        */
+       if (mddev_is_clustered(mddev) && is_reshaped
+                                     && !test_bit(MD_CLOSING, &mddev->flags))
+               md_cluster_ops->update_size(mddev, old_dev_sectors);
        wake_up(&resync_wait);
        /* flag recovery needed just to double check */
        set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
@@ -9164,7 +9206,7 @@ static void check_sb_changes(struct mddev *mddev, struct md_rdev *rdev)
                if (ret)
                        pr_info("md-cluster: resize failed\n");
                else
-                       bitmap_update_sb(mddev->bitmap);
+                       md_bitmap_update_sb(mddev->bitmap);
        }
 
        /* Check for change of roles in the active devices */
@@ -9186,8 +9228,12 @@ static void check_sb_changes(struct mddev *mddev, struct md_rdev *rdev)
                }
 
                if (role != rdev2->raid_disk) {
-                       /* got activated */
-                       if (rdev2->raid_disk == -1 && role != 0xffff) {
+                       /*
+                        * got activated except reshape is happening.
+                        */
+                       if (rdev2->raid_disk == -1 && role != 0xffff &&
+                           !(le32_to_cpu(sb->feature_map) &
+                             MD_FEATURE_RESHAPE_ACTIVE)) {
                                rdev2->saved_raid_disk = role;
                                ret = remove_and_add_spares(mddev, rdev2);
                                pr_info("Activated spare: %s\n",
@@ -9213,6 +9259,30 @@ static void check_sb_changes(struct mddev *mddev, struct md_rdev *rdev)
        if (mddev->raid_disks != le32_to_cpu(sb->raid_disks))
                update_raid_disks(mddev, le32_to_cpu(sb->raid_disks));
 
+       /*
+        * Since mddev->delta_disks has already updated in update_raid_disks,
+        * so it is time to check reshape.
+        */
+       if (test_bit(MD_RESYNCING_REMOTE, &mddev->recovery) &&
+           (le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE)) {
+               /*
+                * reshape is happening in the remote node, we need to
+                * update reshape_position and call start_reshape.
+                */
+               mddev->reshape_position = sb->reshape_position;
+               if (mddev->pers->update_reshape_pos)
+                       mddev->pers->update_reshape_pos(mddev);
+               if (mddev->pers->start_reshape)
+                       mddev->pers->start_reshape(mddev);
+       } else if (test_bit(MD_RESYNCING_REMOTE, &mddev->recovery) &&
+                  mddev->reshape_position != MaxSector &&
+                  !(le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE)) {
+               /* reshape is just done in another node. */
+               mddev->reshape_position = MaxSector;
+               if (mddev->pers->update_reshape_pos)
+                       mddev->pers->update_reshape_pos(mddev);
+       }
+
        /* Finally set the event to be up to date */
        mddev->events = le64_to_cpu(sb->events);
 }