]> asedeno.scripts.mit.edu Git - linux.git/blobdiff - block/blk-core.c
nvme: have nvme_init_identify set ctrl->cap
[linux.git] / block / blk-core.c
index 8340f69670d89625a8adc55a24e9c4e547555d7f..875e8d105067a2248b7829e487bd66c5f4c9281c 100644 (file)
@@ -36,6 +36,7 @@
 #include <linux/blk-cgroup.h>
 #include <linux/debugfs.h>
 #include <linux/bpf.h>
+#include <linux/psi.h>
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/block.h>
@@ -117,9 +118,47 @@ void blk_rq_init(struct request_queue *q, struct request *rq)
        rq->internal_tag = -1;
        rq->start_time_ns = ktime_get_ns();
        rq->part = NULL;
+       refcount_set(&rq->ref, 1);
 }
 EXPORT_SYMBOL(blk_rq_init);
 
+#define REQ_OP_NAME(name) [REQ_OP_##name] = #name
+static const char *const blk_op_name[] = {
+       REQ_OP_NAME(READ),
+       REQ_OP_NAME(WRITE),
+       REQ_OP_NAME(FLUSH),
+       REQ_OP_NAME(DISCARD),
+       REQ_OP_NAME(SECURE_ERASE),
+       REQ_OP_NAME(ZONE_RESET),
+       REQ_OP_NAME(ZONE_RESET_ALL),
+       REQ_OP_NAME(WRITE_SAME),
+       REQ_OP_NAME(WRITE_ZEROES),
+       REQ_OP_NAME(SCSI_IN),
+       REQ_OP_NAME(SCSI_OUT),
+       REQ_OP_NAME(DRV_IN),
+       REQ_OP_NAME(DRV_OUT),
+};
+#undef REQ_OP_NAME
+
+/**
+ * blk_op_str - Return string XXX in the REQ_OP_XXX.
+ * @op: REQ_OP_XXX.
+ *
+ * Description: Centralize block layer function to convert REQ_OP_XXX into
+ * string format. Useful in the debugging and tracing bio or request. For
+ * invalid REQ_OP_XXX it returns string "UNKNOWN".
+ */
+inline const char *blk_op_str(unsigned int op)
+{
+       const char *op_str = "UNKNOWN";
+
+       if (op < ARRAY_SIZE(blk_op_name) && blk_op_name[op])
+               op_str = blk_op_name[op];
+
+       return op_str;
+}
+EXPORT_SYMBOL_GPL(blk_op_str);
+
 static const struct {
        int             errno;
        const char      *name;
@@ -167,18 +206,23 @@ int blk_status_to_errno(blk_status_t status)
 }
 EXPORT_SYMBOL_GPL(blk_status_to_errno);
 
-static void print_req_error(struct request *req, blk_status_t status)
+static void print_req_error(struct request *req, blk_status_t status,
+               const char *caller)
 {
        int idx = (__force int)status;
 
        if (WARN_ON_ONCE(idx >= ARRAY_SIZE(blk_errors)))
                return;
 
-       printk_ratelimited(KERN_ERR "%s: %s error, dev %s, sector %llu flags %x\n",
-                               __func__, blk_errors[idx].name,
-                               req->rq_disk ?  req->rq_disk->disk_name : "?",
-                               (unsigned long long)blk_rq_pos(req),
-                               req->cmd_flags);
+       printk_ratelimited(KERN_ERR
+               "%s: %s error, dev %s, sector %llu op 0x%x:(%s) flags 0x%x "
+               "phys_seg %u prio class %u\n",
+               caller, blk_errors[idx].name,
+               req->rq_disk ? req->rq_disk->disk_name : "?",
+               blk_rq_pos(req), req_op(req), blk_op_str(req_op(req)),
+               req->cmd_flags & ~REQ_OP_MASK,
+               req->nr_phys_segments,
+               IOPRIO_PRIO_CLASS(req->ioprio));
 }
 
 static void req_bio_endio(struct request *rq, struct bio *bio,
@@ -302,7 +346,8 @@ void blk_cleanup_queue(struct request_queue *q)
 
        /*
         * Drain all requests queued before DYING marking. Set DEAD flag to
-        * prevent that q->request_fn() gets invoked after draining finished.
+        * prevent that blk_mq_run_hw_queues() accesses the hardware queues
+        * after draining finished.
         */
        blk_freeze_queue(q);
 
@@ -437,7 +482,6 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
        if (!q)
                return NULL;
 
-       INIT_LIST_HEAD(&q->queue_head);
        q->last_merge = NULL;
 
        q->id = ida_simple_get(&blk_queue_ida, 0, 0, gfp_mask);
@@ -476,6 +520,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
        mutex_init(&q->blk_trace_mutex);
 #endif
        mutex_init(&q->sysfs_lock);
+       mutex_init(&q->sysfs_dir_lock);
        spin_lock_init(&q->queue_lock);
 
        init_waitqueue_head(&q->mq_freeze_wq);
@@ -550,15 +595,16 @@ void blk_put_request(struct request *req)
 }
 EXPORT_SYMBOL(blk_put_request);
 
-bool bio_attempt_back_merge(struct request_queue *q, struct request *req,
-                           struct bio *bio)
+bool bio_attempt_back_merge(struct request *req, struct bio *bio,
+               unsigned int nr_segs)
 {
        const int ff = bio->bi_opf & REQ_FAILFAST_MASK;
 
-       if (!ll_back_merge_fn(q, req, bio))
+       if (!ll_back_merge_fn(req, bio, nr_segs))
                return false;
 
-       trace_block_bio_backmerge(q, req, bio);
+       trace_block_bio_backmerge(req->q, req, bio);
+       rq_qos_merge(req->q, req, bio);
 
        if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff)
                blk_rq_set_mixed_merge(req);
@@ -571,15 +617,16 @@ bool bio_attempt_back_merge(struct request_queue *q, struct request *req,
        return true;
 }
 
-bool bio_attempt_front_merge(struct request_queue *q, struct request *req,
-                            struct bio *bio)
+bool bio_attempt_front_merge(struct request *req, struct bio *bio,
+               unsigned int nr_segs)
 {
        const int ff = bio->bi_opf & REQ_FAILFAST_MASK;
 
-       if (!ll_front_merge_fn(q, req, bio))
+       if (!ll_front_merge_fn(req, bio, nr_segs))
                return false;
 
-       trace_block_bio_frontmerge(q, req, bio);
+       trace_block_bio_frontmerge(req->q, req, bio);
+       rq_qos_merge(req->q, req, bio);
 
        if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff)
                blk_rq_set_mixed_merge(req);
@@ -605,6 +652,8 @@ bool bio_attempt_discard_merge(struct request_queue *q, struct request *req,
            blk_rq_get_max_sectors(req, blk_rq_pos(req)))
                goto no_merge;
 
+       rq_qos_merge(q, req, bio);
+
        req->biotail->bi_next = bio;
        req->biotail = bio;
        req->__data_len += bio->bi_iter.bi_size;
@@ -621,6 +670,7 @@ bool bio_attempt_discard_merge(struct request_queue *q, struct request *req,
  * blk_attempt_plug_merge - try to merge with %current's plugged list
  * @q: request_queue new bio is being queued at
  * @bio: new bio being queued
+ * @nr_segs: number of segments in @bio
  * @same_queue_rq: pointer to &struct request that gets filled in when
  * another request associated with @q is found on the plug list
  * (optional, may be %NULL)
@@ -639,13 +689,13 @@ bool bio_attempt_discard_merge(struct request_queue *q, struct request *req,
  * Caller must ensure !blk_queue_nomerges(q) beforehand.
  */
 bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio,
-                           struct request **same_queue_rq)
+               unsigned int nr_segs, struct request **same_queue_rq)
 {
        struct blk_plug *plug;
        struct request *rq;
        struct list_head *plug_list;
 
-       plug = current->plug;
+       plug = blk_mq_plug(q, bio);
        if (!plug)
                return false;
 
@@ -668,10 +718,10 @@ bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio,
 
                switch (blk_try_merge(rq, bio)) {
                case ELEVATOR_BACK_MERGE:
-                       merged = bio_attempt_back_merge(q, rq, bio);
+                       merged = bio_attempt_back_merge(rq, bio, nr_segs);
                        break;
                case ELEVATOR_FRONT_MERGE:
-                       merged = bio_attempt_front_merge(q, rq, bio);
+                       merged = bio_attempt_front_merge(rq, bio, nr_segs);
                        break;
                case ELEVATOR_DISCARD_MERGE:
                        merged = bio_attempt_discard_merge(q, rq, bio);
@@ -687,18 +737,6 @@ bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio,
        return false;
 }
 
-void blk_init_request_from_bio(struct request *req, struct bio *bio)
-{
-       if (bio->bi_opf & REQ_RAHEAD)
-               req->cmd_flags |= REQ_FAILFAST_MASK;
-
-       req->__sector = bio->bi_iter.bi_sector;
-       req->ioprio = bio_prio(bio);
-       req->write_hint = bio->bi_write_hint;
-       blk_rq_bio_prep(req->q, req, bio);
-}
-EXPORT_SYMBOL_GPL(blk_init_request_from_bio);
-
 static void handle_bad_sector(struct bio *bio, sector_t maxsector)
 {
        char b[BDEVNAME_SIZE];
@@ -900,6 +938,10 @@ generic_make_request_checks(struct bio *bio)
                if (!blk_queue_is_zoned(q))
                        goto not_supported;
                break;
+       case REQ_OP_ZONE_RESET_ALL:
+               if (!blk_queue_is_zoned(q) || !blk_queue_zone_resetall(q))
+                       goto not_supported;
+               break;
        case REQ_OP_WRITE_ZEROES:
                if (!q->limits.max_write_zeroes_sectors)
                        goto not_supported;
@@ -1097,6 +1139,13 @@ EXPORT_SYMBOL_GPL(direct_make_request);
  */
 blk_qc_t submit_bio(struct bio *bio)
 {
+       bool workingset_read = false;
+       unsigned long pflags;
+       blk_qc_t ret;
+
+       if (blkcg_punt_bio_submit(bio))
+               return BLK_QC_T_NONE;
+
        /*
         * If it's a regular read/write or a barrier with data attached,
         * go through the normal accounting stuff before submission.
@@ -1112,6 +1161,8 @@ blk_qc_t submit_bio(struct bio *bio)
                if (op_is_write(bio_op(bio))) {
                        count_vm_events(PGPGOUT, count);
                } else {
+                       if (bio_flagged(bio, BIO_WORKINGSET))
+                               workingset_read = true;
                        task_io_account_read(bio->bi_iter.bi_size);
                        count_vm_events(PGPGIN, count);
                }
@@ -1126,7 +1177,21 @@ blk_qc_t submit_bio(struct bio *bio)
                }
        }
 
-       return generic_make_request(bio);
+       /*
+        * If we're reading data that is part of the userspace
+        * workingset, count submission time as memory stall. When the
+        * device is congested, or the submitting cgroup IO-throttled,
+        * submission can be a significant part of overall IO time.
+        */
+       if (workingset_read)
+               psi_memstall_enter(&pflags);
+
+       ret = generic_make_request(bio);
+
+       if (workingset_read)
+               psi_memstall_leave(&pflags);
+
+       return ret;
 }
 EXPORT_SYMBOL(submit_bio);
 
@@ -1163,7 +1228,7 @@ static int blk_cloned_rq_check_limits(struct request_queue *q,
         * Recalculate it to check the request correctly on this queue's
         * limitation.
         */
-       blk_recalc_rq_segments(rq);
+       rq->nr_phys_segments = blk_recalc_rq_segments(rq);
        if (rq->nr_phys_segments > queue_max_segments(q)) {
                printk(KERN_ERR "%s: over max segments limit. (%hu > %hu)\n",
                        __func__, rq->nr_phys_segments, queue_max_segments(q));
@@ -1348,7 +1413,7 @@ EXPORT_SYMBOL_GPL(blk_steal_bios);
  *
  *     This special helper function is only for request stacking drivers
  *     (e.g. request-based dm) so that they can handle partial completion.
- *     Actual device drivers should use blk_end_request instead.
+ *     Actual device drivers should use blk_mq_end_request instead.
  *
  *     Passing the result of blk_rq_bytes() as @nr_bytes guarantees
  *     %false return from this function.
@@ -1373,7 +1438,7 @@ bool blk_update_request(struct request *req, blk_status_t error,
 
        if (unlikely(error && !blk_rq_is_passthrough(req) &&
                     !(req->rq_flags & RQF_QUIET)))
-               print_req_error(req, error);
+               print_req_error(req, error, __func__);
 
        blk_account_io_completion(req, nr_bytes);
 
@@ -1432,28 +1497,13 @@ bool blk_update_request(struct request *req, blk_status_t error,
                }
 
                /* recalculate the number of segments */
-               blk_recalc_rq_segments(req);
+               req->nr_phys_segments = blk_recalc_rq_segments(req);
        }
 
        return true;
 }
 EXPORT_SYMBOL_GPL(blk_update_request);
 
-void blk_rq_bio_prep(struct request_queue *q, struct request *rq,
-                    struct bio *bio)
-{
-       if (bio_has_data(bio))
-               rq->nr_phys_segments = bio_phys_segments(q, bio);
-       else if (bio_op(bio) == REQ_OP_DISCARD)
-               rq->nr_phys_segments = 1;
-
-       rq->__data_len = bio->bi_iter.bi_size;
-       rq->bio = rq->biotail = bio;
-
-       if (bio->bi_disk)
-               rq->rq_disk = bio->bi_disk;
-}
-
 #if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE
 /**
  * rq_flush_dcache_pages - Helper function to flush all pages in a request