Merge tag 'fuse-update-4.20' of git://git.kernel.org/pub/scm/linux/kernel/git/mszered...

[linux.git] / drivers / md / raid10.c
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c

index d6f7978b4449e92aba522035941ff51137e2ce38..b98e746e7fc4fd05fb8c0eaf2118f9f6a4f778d3 100644 (file)
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -25,6 +25,7 @@
  #include <linux/seq_file.h>
  #include <linux/ratelimit.h>
  #include <linux/kthread.h>
+#include <linux/raid/md_p.h>
  #include <trace/events/block.h>
  #include "md.h"
  #include "raid10.h"
@@ -1808,6 +1809,7 @@ static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev)
                 first = last = rdev->raid_disk;
  
         if (rdev->saved_raid_disk >= first &&
+           rdev->saved_raid_disk < conf->geo.raid_disks &&
             conf->mirrors[rdev->saved_raid_disk].rdev == NULL)
                 mirror = rdev->saved_raid_disk;
         else
@@ -3079,6 +3081,8 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
                         sector_t sect;
                         int must_sync;
                         int any_working;
+                       int need_recover = 0;
+                       int need_replace = 0;
                         struct raid10_info *mirror = &conf->mirrors[i];
                         struct md_rdev *mrdev, *mreplace;
  
@@ -3086,11 +3090,15 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
                         mrdev = rcu_dereference(mirror->rdev);
                         mreplace = rcu_dereference(mirror->replacement);
  
-                       if ((mrdev == NULL ||
-                            test_bit(Faulty, &mrdev->flags) ||
-                            test_bit(In_sync, &mrdev->flags)) &&
-                           (mreplace == NULL ||
-                            test_bit(Faulty, &mreplace->flags))) {
+                       if (mrdev != NULL &&
+                           !test_bit(Faulty, &mrdev->flags) &&
+                           !test_bit(In_sync, &mrdev->flags))
+                               need_recover = 1;
+                       if (mreplace != NULL &&
+                           !test_bit(Faulty, &mreplace->flags))
+                               need_replace = 1;
+
+                       if (!need_recover && !need_replace) {
                                 rcu_read_unlock();
                                 continue;
                         }
@@ -3213,7 +3221,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
                                 r10_bio->devs[1].devnum = i;
                                 r10_bio->devs[1].addr = to_addr;
  
-                               if (!test_bit(In_sync, &mrdev->flags)) {
+                               if (need_recover) {
                                         bio = r10_bio->devs[1].bio;
                                         bio->bi_next = biolist;
                                         biolist = bio;
@@ -3230,16 +3238,11 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
                                 bio = r10_bio->devs[1].repl_bio;
                                 if (bio)
                                         bio->bi_end_io = NULL;
-                               /* Note: if mreplace != NULL, then bio
+                               /* Note: if need_replace, then bio
                                  * cannot be NULL as r10buf_pool_alloc will
                                  * have allocated it.
-                                * So the second test here is pointless.
-                                * But it keeps semantic-checkers happy, and
-                                * this comment keeps human reviewers
-                                * happy.
                                  */
-                               if (mreplace == NULL || bio == NULL ||
-                                   test_bit(Faulty, &mreplace->flags))
+                               if (!need_replace)
                                         break;
                                 bio->bi_next = biolist;
                                 biolist = bio;
@@ -4286,12 +4289,46 @@ static int raid10_start_reshape(struct mddev *mddev)
         spin_unlock_irq(&conf->device_lock);
  
         if (mddev->delta_disks && mddev->bitmap) {
-               ret = md_bitmap_resize(mddev->bitmap,
-                                      raid10_size(mddev, 0, conf->geo.raid_disks),
-                                      0, 0);
+               struct mdp_superblock_1 *sb = NULL;
+               sector_t oldsize, newsize;
+
+               oldsize = raid10_size(mddev, 0, 0);
+               newsize = raid10_size(mddev, 0, conf->geo.raid_disks);
+
+               if (!mddev_is_clustered(mddev)) {
+                       ret = md_bitmap_resize(mddev->bitmap, newsize, 0, 0);
+                       if (ret)
+                               goto abort;
+                       else
+                               goto out;
+               }
+
+               rdev_for_each(rdev, mddev) {
+                       if (rdev->raid_disk > -1 &&
+                           !test_bit(Faulty, &rdev->flags))
+                               sb = page_address(rdev->sb_page);
+               }
+
+               /*
+                * some node is already performing reshape, and no need to
+                * call md_bitmap_resize again since it should be called when
+                * receiving BITMAP_RESIZE msg
+                */
+               if ((sb && (le32_to_cpu(sb->feature_map) &
+                           MD_FEATURE_RESHAPE_ACTIVE)) || (oldsize == newsize))
+                       goto out;
+
+               ret = md_bitmap_resize(mddev->bitmap, newsize, 0, 0);
                 if (ret)
                         goto abort;
+
+               ret = md_cluster_ops->resize_bitmaps(mddev, newsize, oldsize);
+               if (ret) {
+                       md_bitmap_resize(mddev->bitmap, oldsize, 0, 0);
+                       goto abort;
+               }
         }
+out:
         if (mddev->delta_disks > 0) {
                 rdev_for_each(rdev, mddev)
                         if (rdev->raid_disk < 0 &&
@@ -4568,6 +4605,32 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr,
         r10_bio->master_bio = read_bio;
         r10_bio->read_slot = r10_bio->devs[r10_bio->read_slot].devnum;
  
+       /*
+        * Broadcast RESYNC message to other nodes, so all nodes would not
+        * write to the region to avoid conflict.
+       */
+       if (mddev_is_clustered(mddev) && conf->cluster_sync_high <= sector_nr) {
+               struct mdp_superblock_1 *sb = NULL;
+               int sb_reshape_pos = 0;
+
+               conf->cluster_sync_low = sector_nr;
+               conf->cluster_sync_high = sector_nr + CLUSTER_RESYNC_WINDOW_SECTORS;
+               sb = page_address(rdev->sb_page);
+               if (sb) {
+                       sb_reshape_pos = le64_to_cpu(sb->reshape_position);
+                       /*
+                        * Set cluster_sync_low again if next address for array
+                        * reshape is less than cluster_sync_low. Since we can't
+                        * update cluster_sync_low until it has finished reshape.
+                        */
+                       if (sb_reshape_pos < conf->cluster_sync_low)
+                               conf->cluster_sync_low = sb_reshape_pos;
+               }
+
+               md_cluster_ops->resync_info_update(mddev, conf->cluster_sync_low,
+                                                         conf->cluster_sync_high);
+       }
+
         /* Now find the locations in the new layout */
         __raid10_find_phys(&conf->geo, r10_bio);
  
@@ -4719,6 +4782,19 @@ static void end_reshape(struct r10conf *conf)
         conf->fullsync = 0;
  }
  
+static void raid10_update_reshape_pos(struct mddev *mddev)
+{
+       struct r10conf *conf = mddev->private;
+       sector_t lo, hi;
+
+       md_cluster_ops->resync_info_get(mddev, &lo, &hi);
+       if (((mddev->reshape_position <= hi) && (mddev->reshape_position >= lo))
+           || mddev->reshape_position == MaxSector)
+               conf->reshape_progress = mddev->reshape_position;
+       else
+               WARN_ON_ONCE(1);
+}
+
  static int handle_reshape_read_error(struct mddev *mddev,
                                      struct r10bio *r10_bio)
  {
@@ -4887,6 +4963,7 @@ static struct md_personality raid10_personality =
         .check_reshape  = raid10_check_reshape,
         .start_reshape  = raid10_start_reshape,
         .finish_reshape = raid10_finish_reshape,
+       .update_reshape_pos = raid10_update_reshape_pos,
         .congested      = raid10_congested,
  };