blk-mq: introduce blk_mq_hw_queue_first_cpu() to figure out first cpu

[linux.git] / block / blk-mq.c
diff --git a/block/blk-mq.c b/block/blk-mq.c

index 16e83e6df404a24fd1a59baeb77b9c7b7cc9890c..e05bd10d5c84423bffe894c43848617258deff3b 100644 (file)
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -194,11 +194,7 @@ EXPORT_SYMBOL_GPL(blk_mq_unfreeze_queue);
   */
  void blk_mq_quiesce_queue_nowait(struct request_queue *q)
  {
-       unsigned long flags;
-
-       spin_lock_irqsave(q->queue_lock, flags);
-       queue_flag_set(QUEUE_FLAG_QUIESCED, q);
-       spin_unlock_irqrestore(q->queue_lock, flags);
+       blk_queue_flag_set(QUEUE_FLAG_QUIESCED, q);
  }
  EXPORT_SYMBOL_GPL(blk_mq_quiesce_queue_nowait);
  
@@ -239,11 +235,7 @@ EXPORT_SYMBOL_GPL(blk_mq_quiesce_queue);
   */
  void blk_mq_unquiesce_queue(struct request_queue *q)
  {
-       unsigned long flags;
-
-       spin_lock_irqsave(q->queue_lock, flags);
-       queue_flag_clear(QUEUE_FLAG_QUIESCED, q);
-       spin_unlock_irqrestore(q->queue_lock, flags);
+       blk_queue_flag_clear(QUEUE_FLAG_QUIESCED, q);
  
         /* dispatch requests which are inserted during quiescing */
         blk_mq_run_hw_queues(q, true);
@@ -986,9 +978,9 @@ static bool flush_busy_ctx(struct sbitmap *sb, unsigned int bitnr, void *data)
         struct blk_mq_hw_ctx *hctx = flush_data->hctx;
         struct blk_mq_ctx *ctx = hctx->ctxs[bitnr];
  
-       sbitmap_clear_bit(sb, bitnr);
         spin_lock(&ctx->lock);
         list_splice_tail_init(&ctx->rq_list, flush_data->list);
+       sbitmap_clear_bit(sb, bitnr);
         spin_unlock(&ctx->lock);
         return true;
  }
@@ -1188,7 +1180,12 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list,
                 struct blk_mq_queue_data bd;
  
                 rq = list_first_entry(list, struct request, queuelist);
-               if (!blk_mq_get_driver_tag(rq, &hctx, false)) {
+
+               hctx = blk_mq_map_queue(rq->q, rq->mq_ctx->cpu);
+               if (!got_budget && !blk_mq_get_dispatch_budget(hctx))
+                       break;
+
+               if (!blk_mq_get_driver_tag(rq, NULL, false)) {
                         /*
                          * The initial allocation attempt failed, so we need to
                          * rerun the hardware queue when a tag is freed. The
@@ -1197,8 +1194,7 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list,
                          * we'll re-run it below.
                          */
                         if (!blk_mq_mark_tag_wait(&hctx, rq)) {
-                               if (got_budget)
-                                       blk_mq_put_dispatch_budget(hctx);
+                               blk_mq_put_dispatch_budget(hctx);
                                 /*
                                  * For non-shared tags, the RESTART check
                                  * will suffice.
@@ -1209,11 +1205,6 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list,
                         }
                 }
  
-               if (!got_budget && !blk_mq_get_dispatch_budget(hctx)) {
-                       blk_mq_put_driver_tag(rq);
-                       break;
-               }
-
                 list_del_init(&rq->queuelist);
  
                 bd.rq = rq;
@@ -1344,6 +1335,15 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
         hctx_unlock(hctx, srcu_idx);
  }
  
+static inline int blk_mq_first_mapped_cpu(struct blk_mq_hw_ctx *hctx)
+{
+       int cpu = cpumask_first_and(hctx->cpumask, cpu_online_mask);
+
+       if (cpu >= nr_cpu_ids)
+               cpu = cpumask_first(hctx->cpumask);
+       return cpu;
+}
+
  /*
   * It'd be great if the workqueue API had a way to pass
   * in a mask and had some smarts for more clever placement.
@@ -1353,26 +1353,17 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
  static int blk_mq_hctx_next_cpu(struct blk_mq_hw_ctx *hctx)
  {
         bool tried = false;
+       int next_cpu = hctx->next_cpu;
  
         if (hctx->queue->nr_hw_queues == 1)
                 return WORK_CPU_UNBOUND;
  
         if (--hctx->next_cpu_batch <= 0) {
-               int next_cpu;
  select_cpu:
-               next_cpu = cpumask_next_and(hctx->next_cpu, hctx->cpumask,
+               next_cpu = cpumask_next_and(next_cpu, hctx->cpumask,
                                 cpu_online_mask);
                 if (next_cpu >= nr_cpu_ids)
-                       next_cpu = cpumask_first_and(hctx->cpumask,cpu_online_mask);
-
-               /*
-                * No online CPU is found, so have to make sure hctx->next_cpu
-                * is set correctly for not breaking workqueue.
-                */
-               if (next_cpu >= nr_cpu_ids)
-                       hctx->next_cpu = cpumask_first(hctx->cpumask);
-               else
-                       hctx->next_cpu = next_cpu;
+                       next_cpu = blk_mq_first_mapped_cpu(hctx);
                 hctx->next_cpu_batch = BLK_MQ_CPU_WORK_BATCH;
         }
  
@@ -1380,7 +1371,7 @@ static int blk_mq_hctx_next_cpu(struct blk_mq_hw_ctx *hctx)
          * Do unbound schedule if we can't find a online CPU for this hctx,
          * and it should only happen in the path of handling CPU DEAD.
          */
-       if (!cpu_online(hctx->next_cpu)) {
+       if (!cpu_online(next_cpu)) {
                 if (!tried) {
                         tried = true;
                         goto select_cpu;
@@ -1390,10 +1381,13 @@ static int blk_mq_hctx_next_cpu(struct blk_mq_hw_ctx *hctx)
                  * Make sure to re-select CPU next time once after CPUs
                  * in hctx->cpumask become online again.
                  */
+               hctx->next_cpu = next_cpu;
                 hctx->next_cpu_batch = 1;
                 return WORK_CPU_UNBOUND;
         }
-       return hctx->next_cpu;
+
+       hctx->next_cpu = next_cpu;
+       return next_cpu;
  }
  
  static void __blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async,
@@ -1812,11 +1806,11 @@ static blk_status_t __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
         if (q->elevator && !bypass_insert)
                 goto insert;
  
-       if (!blk_mq_get_driver_tag(rq, NULL, false))
+       if (!blk_mq_get_dispatch_budget(hctx))
                 goto insert;
  
-       if (!blk_mq_get_dispatch_budget(hctx)) {
-               blk_mq_put_driver_tag(rq);
+       if (!blk_mq_get_driver_tag(rq, NULL, false)) {
+               blk_mq_put_dispatch_budget(hctx);
                 goto insert;
         }
  
@@ -2438,8 +2432,7 @@ static void blk_mq_map_swqueue(struct request_queue *q)
                 /*
                  * Initialize batch roundrobin counts
                  */
-               hctx->next_cpu = cpumask_first_and(hctx->cpumask,
-                               cpu_online_mask);
+               hctx->next_cpu = blk_mq_first_mapped_cpu(hctx);
                 hctx->next_cpu_batch = BLK_MQ_CPU_WORK_BATCH;
         }
  }
@@ -2556,7 +2549,7 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
  {
         struct request_queue *uninit_q, *q;
  
-       uninit_q = blk_alloc_queue_node(GFP_KERNEL, set->numa_node);
+       uninit_q = blk_alloc_queue_node(GFP_KERNEL, set->numa_node, NULL);
         if (!uninit_q)
                 return ERR_PTR(-ENOMEM);
  
@@ -2678,7 +2671,7 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
         q->queue_flags |= QUEUE_FLAG_MQ_DEFAULT;
  
         if (!(set->flags & BLK_MQ_F_SG_MERGE))
-               q->queue_flags |= 1 << QUEUE_FLAG_NO_SG_MERGE;
+               queue_flag_set_unlocked(QUEUE_FLAG_NO_SG_MERGE, q);
  
         q->sg_reserved_size = INT_MAX;
  
@@ -3005,7 +2998,7 @@ EXPORT_SYMBOL_GPL(blk_mq_update_nr_hw_queues);
  static bool blk_poll_stats_enable(struct request_queue *q)
  {
         if (test_bit(QUEUE_FLAG_POLL_STATS, &q->queue_flags) ||
-           test_and_set_bit(QUEUE_FLAG_POLL_STATS, &q->queue_flags))
+           blk_queue_flag_test_and_set(QUEUE_FLAG_POLL_STATS, q))
                 return true;
         blk_stat_add_callback(q, q->poll_cb);
         return false;