]> asedeno.scripts.mit.edu Git - linux.git/commitdiff
rbd: introduce image request state machine
authorIlya Dryomov <idryomov@gmail.com>
Thu, 16 May 2019 13:06:56 +0000 (15:06 +0200)
committerIlya Dryomov <idryomov@gmail.com>
Mon, 8 Jul 2019 12:01:44 +0000 (14:01 +0200)
Make it possible to schedule image requests on a workqueue.  This fixes
parent chain recursion added in the previous commit and lays the ground
for exclusive lock wait/wake improvements.

The "wait for pending subrequests and report first nonzero result" code
is generalized to be used by object request state machine.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Reviewed-by: Dongsheng Yang <dongsheng.yang@easystack.cn>
drivers/block/rbd.c

index 9c6be82353c018765cd68cd2f6947f4f0fce6585..51dd1b99c2428920f40ac5643d717a06ab45588d 100644 (file)
@@ -203,6 +203,11 @@ struct rbd_client {
        struct list_head        node;
 };
 
+struct pending_result {
+       int                     result;         /* first nonzero result */
+       int                     num_pending;
+};
+
 struct rbd_img_request;
 
 enum obj_request_type {
@@ -295,11 +300,18 @@ enum img_req_flags {
        IMG_REQ_LAYERED,        /* ENOENT handling: normal = 0, layered = 1 */
 };
 
+enum rbd_img_state {
+       RBD_IMG_START = 1,
+       __RBD_IMG_OBJECT_REQUESTS,
+       RBD_IMG_OBJECT_REQUESTS,
+};
+
 struct rbd_img_request {
        struct rbd_device       *rbd_dev;
        enum obj_operation_type op_type;
        enum obj_request_type   data_type;
        unsigned long           flags;
+       enum rbd_img_state      state;
        union {
                u64                     snap_id;        /* for reads */
                struct ceph_snap_context *snapc;        /* for writes */
@@ -308,12 +320,13 @@ struct rbd_img_request {
                struct request          *rq;            /* block request */
                struct rbd_obj_request  *obj_request;   /* obj req initiator */
        };
-       spinlock_t              completion_lock;
-       int                     result; /* first nonzero obj_request result */
 
        struct list_head        object_extents; /* obj_req.ex structs */
-       u32                     pending_count;
 
+       struct mutex            state_mutex;
+       struct pending_result   pending;
+       struct work_struct      work;
+       int                     work_result;
        struct kref             kref;
 };
 
@@ -592,6 +605,23 @@ static int _rbd_dev_v2_snap_features(struct rbd_device *rbd_dev, u64 snap_id,
                u64 *snap_features);
 
 static void rbd_obj_handle_request(struct rbd_obj_request *obj_req, int result);
+static void rbd_img_handle_request(struct rbd_img_request *img_req, int result);
+
+/*
+ * Return true if nothing else is pending.
+ */
+static bool pending_result_dec(struct pending_result *pending, int *result)
+{
+       rbd_assert(pending->num_pending > 0);
+
+       if (*result && !pending->result)
+               pending->result = *result;
+       if (--pending->num_pending)
+               return false;
+
+       *result = pending->result;
+       return true;
+}
 
 static int rbd_open(struct block_device *bdev, fmode_t mode)
 {
@@ -1350,13 +1380,6 @@ static void rbd_obj_request_put(struct rbd_obj_request *obj_request)
        kref_put(&obj_request->kref, rbd_obj_request_destroy);
 }
 
-static void rbd_img_request_get(struct rbd_img_request *img_request)
-{
-       dout("%s: img %p (was %d)\n", __func__, img_request,
-            kref_read(&img_request->kref));
-       kref_get(&img_request->kref);
-}
-
 static void rbd_img_request_destroy(struct kref *kref);
 static void rbd_img_request_put(struct rbd_img_request *img_request)
 {
@@ -1373,7 +1396,6 @@ static inline void rbd_img_obj_request_add(struct rbd_img_request *img_request,
 
        /* Image request now owns object's original reference */
        obj_request->img_request = img_request;
-       img_request->pending_count++;
        dout("%s: img %p obj %p\n", __func__, img_request, obj_request);
 }
 
@@ -1694,8 +1716,8 @@ static struct rbd_img_request *rbd_img_request_create(
        if (rbd_dev_parent_get(rbd_dev))
                img_request_layered_set(img_request);
 
-       spin_lock_init(&img_request->completion_lock);
        INIT_LIST_HEAD(&img_request->object_extents);
+       mutex_init(&img_request->state_mutex);
        kref_init(&img_request->kref);
 
        dout("%s: rbd_dev %p %s -> img %p\n", __func__, rbd_dev,
@@ -2061,7 +2083,6 @@ static int __rbd_img_fill_request(struct rbd_img_request *img_req)
                if (ret < 0)
                        return ret;
                if (ret > 0) {
-                       img_req->pending_count--;
                        rbd_img_obj_request_del(img_req, obj_req);
                        continue;
                }
@@ -2071,6 +2092,7 @@ static int __rbd_img_fill_request(struct rbd_img_request *img_req)
                        return ret;
        }
 
+       img_req->state = RBD_IMG_START;
        return 0;
 }
 
@@ -2359,17 +2381,19 @@ static int rbd_img_fill_from_bvecs(struct rbd_img_request *img_req,
                                         &it);
 }
 
-static void rbd_img_request_submit(struct rbd_img_request *img_request)
+static void rbd_img_handle_request_work(struct work_struct *work)
 {
-       struct rbd_obj_request *obj_request;
+       struct rbd_img_request *img_req =
+           container_of(work, struct rbd_img_request, work);
 
-       dout("%s: img %p\n", __func__, img_request);
-
-       rbd_img_request_get(img_request);
-       for_each_obj_request(img_request, obj_request)
-               rbd_obj_handle_request(obj_request, 0);
+       rbd_img_handle_request(img_req, img_req->work_result);
+}
 
-       rbd_img_request_put(img_request);
+static void rbd_img_schedule(struct rbd_img_request *img_req, int result)
+{
+       INIT_WORK(&img_req->work, rbd_img_handle_request_work);
+       img_req->work_result = result;
+       queue_work(rbd_wq, &img_req->work);
 }
 
 static int rbd_obj_read_object(struct rbd_obj_request *obj_req)
@@ -2421,7 +2445,8 @@ static int rbd_obj_read_from_parent(struct rbd_obj_request *obj_req)
                return ret;
        }
 
-       rbd_img_request_submit(child_img_req);
+       /* avoid parent chain recursion */
+       rbd_img_schedule(child_img_req, 0);
        return 0;
 }
 
@@ -2756,6 +2781,7 @@ static bool __rbd_obj_handle_request(struct rbd_obj_request *obj_req,
                                     int *result)
 {
        struct rbd_img_request *img_req = obj_req->img_request;
+       struct rbd_device *rbd_dev = img_req->rbd_dev;
        bool done;
 
        mutex_lock(&obj_req->state_mutex);
@@ -2765,59 +2791,113 @@ static bool __rbd_obj_handle_request(struct rbd_obj_request *obj_req,
                done = rbd_obj_advance_write(obj_req, result);
        mutex_unlock(&obj_req->state_mutex);
 
+       if (done && *result) {
+               rbd_assert(*result < 0);
+               rbd_warn(rbd_dev, "%s at objno %llu %llu~%llu result %d",
+                        obj_op_name(img_req->op_type), obj_req->ex.oe_objno,
+                        obj_req->ex.oe_off, obj_req->ex.oe_len, *result);
+       }
        return done;
 }
 
-static void rbd_obj_end_request(struct rbd_obj_request *obj_req, int result)
+/*
+ * This is open-coded in rbd_img_handle_request() to avoid parent chain
+ * recursion.
+ */
+static void rbd_obj_handle_request(struct rbd_obj_request *obj_req, int result)
+{
+       if (__rbd_obj_handle_request(obj_req, &result))
+               rbd_img_handle_request(obj_req->img_request, result);
+}
+
+static void rbd_img_object_requests(struct rbd_img_request *img_req)
 {
-       struct rbd_img_request *img_req = obj_req->img_request;
+       struct rbd_obj_request *obj_req;
 
-       rbd_assert(result <= 0);
-       if (!result)
-               return;
+       rbd_assert(!img_req->pending.result && !img_req->pending.num_pending);
+
+       for_each_obj_request(img_req, obj_req) {
+               int result = 0;
 
-       rbd_warn(img_req->rbd_dev, "%s at objno %llu %llu~%llu result %d",
-                obj_op_name(img_req->op_type), obj_req->ex.oe_objno,
-                obj_req->ex.oe_off, obj_req->ex.oe_len, result);
-       if (!img_req->result)
-               img_req->result = result;
+               if (__rbd_obj_handle_request(obj_req, &result)) {
+                       if (result) {
+                               img_req->pending.result = result;
+                               return;
+                       }
+               } else {
+                       img_req->pending.num_pending++;
+               }
+       }
 }
 
-static void rbd_img_end_request(struct rbd_img_request *img_req)
+static bool rbd_img_advance(struct rbd_img_request *img_req, int *result)
 {
-       rbd_assert(!test_bit(IMG_REQ_CHILD, &img_req->flags));
+again:
+       switch (img_req->state) {
+       case RBD_IMG_START:
+               rbd_assert(!*result);
 
-       blk_mq_end_request(img_req->rq,
-                          errno_to_blk_status(img_req->result));
-       rbd_img_request_put(img_req);
+               rbd_img_object_requests(img_req);
+               if (!img_req->pending.num_pending) {
+                       *result = img_req->pending.result;
+                       img_req->state = RBD_IMG_OBJECT_REQUESTS;
+                       goto again;
+               }
+               img_req->state = __RBD_IMG_OBJECT_REQUESTS;
+               return false;
+       case __RBD_IMG_OBJECT_REQUESTS:
+               if (!pending_result_dec(&img_req->pending, result))
+                       return false;
+               /* fall through */
+       case RBD_IMG_OBJECT_REQUESTS:
+               return true;
+       default:
+               BUG();
+       }
 }
 
-static void rbd_obj_handle_request(struct rbd_obj_request *obj_req, int result)
+/*
+ * Return true if @img_req is completed.
+ */
+static bool __rbd_img_handle_request(struct rbd_img_request *img_req,
+                                    int *result)
 {
-       struct rbd_img_request *img_req;
+       struct rbd_device *rbd_dev = img_req->rbd_dev;
+       bool done;
 
-again:
-       if (!__rbd_obj_handle_request(obj_req, &result))
-               return;
+       mutex_lock(&img_req->state_mutex);
+       done = rbd_img_advance(img_req, result);
+       mutex_unlock(&img_req->state_mutex);
 
-       img_req = obj_req->img_request;
-       spin_lock(&img_req->completion_lock);
-       rbd_obj_end_request(obj_req, result);
-       rbd_assert(img_req->pending_count);
-       if (--img_req->pending_count) {
-               spin_unlock(&img_req->completion_lock);
-               return;
+       if (done && *result) {
+               rbd_assert(*result < 0);
+               rbd_warn(rbd_dev, "%s%s result %d",
+                     test_bit(IMG_REQ_CHILD, &img_req->flags) ? "child " : "",
+                     obj_op_name(img_req->op_type), *result);
        }
+       return done;
+}
+
+static void rbd_img_handle_request(struct rbd_img_request *img_req, int result)
+{
+again:
+       if (!__rbd_img_handle_request(img_req, &result))
+               return;
 
-       spin_unlock(&img_req->completion_lock);
-       rbd_assert(img_req->result <= 0);
        if (test_bit(IMG_REQ_CHILD, &img_req->flags)) {
-               obj_req = img_req->obj_request;
-               result = img_req->result;
+               struct rbd_obj_request *obj_req = img_req->obj_request;
+
                rbd_img_request_put(img_req);
-               goto again;
+               if (__rbd_obj_handle_request(obj_req, &result)) {
+                       img_req = obj_req->img_request;
+                       goto again;
+               }
+       } else {
+               struct request *rq = img_req->rq;
+
+               rbd_img_request_put(img_req);
+               blk_mq_end_request(rq, errno_to_blk_status(result));
        }
-       rbd_img_end_request(img_req);
 }
 
 static const struct rbd_client_id rbd_empty_cid;
@@ -3933,10 +4013,10 @@ static void rbd_queue_workfn(struct work_struct *work)
        else
                result = rbd_img_fill_from_bio(img_request, offset, length,
                                               rq->bio);
-       if (result || !img_request->pending_count)
+       if (result)
                goto err_img_request;
 
-       rbd_img_request_submit(img_request);
+       rbd_img_handle_request(img_request, 0);
        if (must_be_locked)
                up_read(&rbd_dev->lock_rwsem);
        return;