]> asedeno.scripts.mit.edu Git - linux.git/blobdiff - block/blk-mq.c
Merge branch 'core-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...
[linux.git] / block / blk-mq.c
index fc60ed7e940ead5ae7d7332ee9f64b9ffe922aca..ce0f5f4ede70cdc55e102359601ff0149fc075e6 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Block multiqueue core code
  *
@@ -143,13 +144,14 @@ void blk_mq_in_flight_rw(struct request_queue *q, struct hd_struct *part,
 
 void blk_freeze_queue_start(struct request_queue *q)
 {
-       int freeze_depth;
-
-       freeze_depth = atomic_inc_return(&q->mq_freeze_depth);
-       if (freeze_depth == 1) {
+       mutex_lock(&q->mq_freeze_lock);
+       if (++q->mq_freeze_depth == 1) {
                percpu_ref_kill(&q->q_usage_counter);
+               mutex_unlock(&q->mq_freeze_lock);
                if (queue_is_mq(q))
                        blk_mq_run_hw_queues(q, false);
+       } else {
+               mutex_unlock(&q->mq_freeze_lock);
        }
 }
 EXPORT_SYMBOL_GPL(blk_freeze_queue_start);
@@ -198,14 +200,14 @@ EXPORT_SYMBOL_GPL(blk_mq_freeze_queue);
 
 void blk_mq_unfreeze_queue(struct request_queue *q)
 {
-       int freeze_depth;
-
-       freeze_depth = atomic_dec_return(&q->mq_freeze_depth);
-       WARN_ON_ONCE(freeze_depth < 0);
-       if (!freeze_depth) {
+       mutex_lock(&q->mq_freeze_lock);
+       q->mq_freeze_depth--;
+       WARN_ON_ONCE(q->mq_freeze_depth < 0);
+       if (!q->mq_freeze_depth) {
                percpu_ref_resurrect(&q->q_usage_counter);
                wake_up_all(&q->mq_freeze_wq);
        }
+       mutex_unlock(&q->mq_freeze_lock);
 }
 EXPORT_SYMBOL_GPL(blk_mq_unfreeze_queue);
 
@@ -2062,7 +2064,7 @@ void blk_mq_free_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags,
                list_del_init(&page->lru);
                /*
                 * Remove kmemleak object previously allocated in
-                * blk_mq_init_rq_map().
+                * blk_mq_alloc_rqs().
                 */
                kmemleak_free(page_address(page));
                __free_pages(page, page->private);
@@ -2267,12 +2269,11 @@ static void blk_mq_exit_hctx(struct request_queue *q,
        if (set->ops->exit_hctx)
                set->ops->exit_hctx(hctx, hctx_idx);
 
-       if (hctx->flags & BLK_MQ_F_BLOCKING)
-               cleanup_srcu_struct(hctx->srcu);
-
        blk_mq_remove_cpuhp(hctx);
-       blk_free_flush_queue(hctx->fq);
-       sbitmap_free(&hctx->ctx_map);
+
+       spin_lock(&q->unused_hctx_lock);
+       list_add(&hctx->hctx_list, &q->unused_hctx_list);
+       spin_unlock(&q->unused_hctx_lock);
 }
 
 static void blk_mq_exit_hw_queues(struct request_queue *q,
@@ -2289,15 +2290,65 @@ static void blk_mq_exit_hw_queues(struct request_queue *q,
        }
 }
 
+static int blk_mq_hw_ctx_size(struct blk_mq_tag_set *tag_set)
+{
+       int hw_ctx_size = sizeof(struct blk_mq_hw_ctx);
+
+       BUILD_BUG_ON(ALIGN(offsetof(struct blk_mq_hw_ctx, srcu),
+                          __alignof__(struct blk_mq_hw_ctx)) !=
+                    sizeof(struct blk_mq_hw_ctx));
+
+       if (tag_set->flags & BLK_MQ_F_BLOCKING)
+               hw_ctx_size += sizeof(struct srcu_struct);
+
+       return hw_ctx_size;
+}
+
 static int blk_mq_init_hctx(struct request_queue *q,
                struct blk_mq_tag_set *set,
                struct blk_mq_hw_ctx *hctx, unsigned hctx_idx)
 {
-       int node;
+       hctx->queue_num = hctx_idx;
+
+       cpuhp_state_add_instance_nocalls(CPUHP_BLK_MQ_DEAD, &hctx->cpuhp_dead);
+
+       hctx->tags = set->tags[hctx_idx];
+
+       if (set->ops->init_hctx &&
+           set->ops->init_hctx(hctx, set->driver_data, hctx_idx))
+               goto unregister_cpu_notifier;
+
+       if (blk_mq_init_request(set, hctx->fq->flush_rq, hctx_idx,
+                               hctx->numa_node))
+               goto exit_hctx;
+       return 0;
+
+ exit_hctx:
+       if (set->ops->exit_hctx)
+               set->ops->exit_hctx(hctx, hctx_idx);
+ unregister_cpu_notifier:
+       blk_mq_remove_cpuhp(hctx);
+       return -1;
+}
+
+static struct blk_mq_hw_ctx *
+blk_mq_alloc_hctx(struct request_queue *q, struct blk_mq_tag_set *set,
+               int node)
+{
+       struct blk_mq_hw_ctx *hctx;
+       gfp_t gfp = GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY;
 
-       node = hctx->numa_node;
+       hctx = kzalloc_node(blk_mq_hw_ctx_size(set), gfp, node);
+       if (!hctx)
+               goto fail_alloc_hctx;
+
+       if (!zalloc_cpumask_var_node(&hctx->cpumask, gfp, node))
+               goto free_hctx;
+
+       atomic_set(&hctx->nr_active, 0);
        if (node == NUMA_NO_NODE)
-               node = hctx->numa_node = set->numa_node;
+               node = set->numa_node;
+       hctx->numa_node = node;
 
        INIT_DELAYED_WORK(&hctx->run_work, blk_mq_run_work_fn);
        spin_lock_init(&hctx->lock);
@@ -2305,58 +2356,47 @@ static int blk_mq_init_hctx(struct request_queue *q,
        hctx->queue = q;
        hctx->flags = set->flags & ~BLK_MQ_F_TAG_SHARED;
 
-       cpuhp_state_add_instance_nocalls(CPUHP_BLK_MQ_DEAD, &hctx->cpuhp_dead);
-
-       hctx->tags = set->tags[hctx_idx];
+       INIT_LIST_HEAD(&hctx->hctx_list);
 
        /*
         * Allocate space for all possible cpus to avoid allocation at
         * runtime
         */
        hctx->ctxs = kmalloc_array_node(nr_cpu_ids, sizeof(void *),
-                       GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY, node);
+                       gfp, node);
        if (!hctx->ctxs)
-               goto unregister_cpu_notifier;
+               goto free_cpumask;
 
        if (sbitmap_init_node(&hctx->ctx_map, nr_cpu_ids, ilog2(8),
-                               GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY, node))
+                               gfp, node))
                goto free_ctxs;
-
        hctx->nr_ctx = 0;
 
        spin_lock_init(&hctx->dispatch_wait_lock);
        init_waitqueue_func_entry(&hctx->dispatch_wait, blk_mq_dispatch_wake);
        INIT_LIST_HEAD(&hctx->dispatch_wait.entry);
 
-       if (set->ops->init_hctx &&
-           set->ops->init_hctx(hctx, set->driver_data, hctx_idx))
-               goto free_bitmap;
-
        hctx->fq = blk_alloc_flush_queue(q, hctx->numa_node, set->cmd_size,
-                       GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY);
+                       gfp);
        if (!hctx->fq)
-               goto exit_hctx;
-
-       if (blk_mq_init_request(set, hctx->fq->flush_rq, hctx_idx, node))
-               goto free_fq;
+               goto free_bitmap;
 
        if (hctx->flags & BLK_MQ_F_BLOCKING)
                init_srcu_struct(hctx->srcu);
+       blk_mq_hctx_kobj_init(hctx);
 
-       return 0;
+       return hctx;
 
- free_fq:
-       blk_free_flush_queue(hctx->fq);
- exit_hctx:
-       if (set->ops->exit_hctx)
-               set->ops->exit_hctx(hctx, hctx_idx);
  free_bitmap:
        sbitmap_free(&hctx->ctx_map);
  free_ctxs:
        kfree(hctx->ctxs);
- unregister_cpu_notifier:
-       blk_mq_remove_cpuhp(hctx);
-       return -1;
+ free_cpumask:
+       free_cpumask_var(hctx->cpumask);
+ free_hctx:
+       kfree(hctx);
+ fail_alloc_hctx:
+       return NULL;
 }
 
 static void blk_mq_init_cpu_queues(struct request_queue *q,
@@ -2631,13 +2671,17 @@ static int blk_mq_alloc_ctxs(struct request_queue *q)
  */
 void blk_mq_release(struct request_queue *q)
 {
-       struct blk_mq_hw_ctx *hctx;
-       unsigned int i;
+       struct blk_mq_hw_ctx *hctx, *next;
+       int i;
 
-       /* hctx kobj stays in hctx */
-       queue_for_each_hw_ctx(q, hctx, i) {
-               if (!hctx)
-                       continue;
+       cancel_delayed_work_sync(&q->requeue_work);
+
+       queue_for_each_hw_ctx(q, hctx, i)
+               WARN_ON_ONCE(hctx && list_empty(&hctx->hctx_list));
+
+       /* all hctx are in .unused_hctx_list now */
+       list_for_each_entry_safe(hctx, next, &q->unused_hctx_list, hctx_list) {
+               list_del_init(&hctx->hctx_list);
                kobject_put(&hctx->kobj);
        }
 
@@ -2700,51 +2744,38 @@ struct request_queue *blk_mq_init_sq_queue(struct blk_mq_tag_set *set,
 }
 EXPORT_SYMBOL(blk_mq_init_sq_queue);
 
-static int blk_mq_hw_ctx_size(struct blk_mq_tag_set *tag_set)
-{
-       int hw_ctx_size = sizeof(struct blk_mq_hw_ctx);
-
-       BUILD_BUG_ON(ALIGN(offsetof(struct blk_mq_hw_ctx, srcu),
-                          __alignof__(struct blk_mq_hw_ctx)) !=
-                    sizeof(struct blk_mq_hw_ctx));
-
-       if (tag_set->flags & BLK_MQ_F_BLOCKING)
-               hw_ctx_size += sizeof(struct srcu_struct);
-
-       return hw_ctx_size;
-}
-
 static struct blk_mq_hw_ctx *blk_mq_alloc_and_init_hctx(
                struct blk_mq_tag_set *set, struct request_queue *q,
                int hctx_idx, int node)
 {
-       struct blk_mq_hw_ctx *hctx;
+       struct blk_mq_hw_ctx *hctx = NULL, *tmp;
 
-       hctx = kzalloc_node(blk_mq_hw_ctx_size(set),
-                       GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY,
-                       node);
-       if (!hctx)
-               return NULL;
-
-       if (!zalloc_cpumask_var_node(&hctx->cpumask,
-                               GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY,
-                               node)) {
-               kfree(hctx);
-               return NULL;
+       /* reuse dead hctx first */
+       spin_lock(&q->unused_hctx_lock);
+       list_for_each_entry(tmp, &q->unused_hctx_list, hctx_list) {
+               if (tmp->numa_node == node) {
+                       hctx = tmp;
+                       break;
+               }
        }
+       if (hctx)
+               list_del_init(&hctx->hctx_list);
+       spin_unlock(&q->unused_hctx_lock);
 
-       atomic_set(&hctx->nr_active, 0);
-       hctx->numa_node = node;
-       hctx->queue_num = hctx_idx;
+       if (!hctx)
+               hctx = blk_mq_alloc_hctx(q, set, node);
+       if (!hctx)
+               goto fail;
 
-       if (blk_mq_init_hctx(q, set, hctx, hctx_idx)) {
-               free_cpumask_var(hctx->cpumask);
-               kfree(hctx);
-               return NULL;
-       }
-       blk_mq_hctx_kobj_init(hctx);
+       if (blk_mq_init_hctx(q, set, hctx, hctx_idx))
+               goto free_hctx;
 
        return hctx;
+
+ free_hctx:
+       kobject_put(&hctx->kobj);
+ fail:
+       return NULL;
 }
 
 static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
@@ -2770,10 +2801,8 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
 
                hctx = blk_mq_alloc_and_init_hctx(set, q, i, node);
                if (hctx) {
-                       if (hctxs[i]) {
+                       if (hctxs[i])
                                blk_mq_exit_hctx(q, set, hctxs[i], i);
-                               kobject_put(&hctxs[i]->kobj);
-                       }
                        hctxs[i] = hctx;
                } else {
                        if (hctxs[i])
@@ -2804,9 +2833,7 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
                        if (hctx->tags)
                                blk_mq_free_map_and_requests(set, j);
                        blk_mq_exit_hctx(q, set, hctx, j);
-                       kobject_put(&hctx->kobj);
                        hctxs[j] = NULL;
-
                }
        }
        mutex_unlock(&q->sysfs_lock);
@@ -2838,7 +2865,7 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
                goto err_exit;
 
        if (blk_mq_alloc_ctxs(q))
-               goto err_exit;
+               goto err_poll;
 
        /* init q->mq_kobj and sw queues' kobjects */
        blk_mq_sysfs_init(q);
@@ -2849,6 +2876,9 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
        if (!q->queue_hw_ctx)
                goto err_sys_init;
 
+       INIT_LIST_HEAD(&q->unused_hctx_list);
+       spin_lock_init(&q->unused_hctx_lock);
+
        blk_mq_realloc_hw_ctxs(set, q);
        if (!q->nr_hw_queues)
                goto err_hctxs;
@@ -2899,13 +2929,17 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
        kfree(q->queue_hw_ctx);
 err_sys_init:
        blk_mq_sysfs_deinit(q);
+err_poll:
+       blk_stat_free_callback(q->poll_cb);
+       q->poll_cb = NULL;
 err_exit:
        q->mq_ops = NULL;
        return ERR_PTR(-ENOMEM);
 }
 EXPORT_SYMBOL(blk_mq_init_allocated_queue);
 
-void blk_mq_free_queue(struct request_queue *q)
+/* tags can _not_ be used after returning from blk_mq_exit_queue */
+void blk_mq_exit_queue(struct request_queue *q)
 {
        struct blk_mq_tag_set   *set = q->tag_set;