]> asedeno.scripts.mit.edu Git - linux.git/blobdiff - drivers/nvme/host/rdma.c
nvme-fabrics: allow nvmf_connect_io_queue to poll
[linux.git] / drivers / nvme / host / rdma.c
index d181cafedc584916d0b04db2e08dd9e0802cba0c..b907ed43814f6e69763ff781b7db8bc007ccdf46 100644 (file)
@@ -184,6 +184,7 @@ static int nvme_rdma_alloc_qe(struct ib_device *ibdev, struct nvme_rdma_qe *qe,
        qe->dma = ib_dma_map_single(ibdev, qe->data, capsule_size, dir);
        if (ib_dma_mapping_error(ibdev, qe->dma)) {
                kfree(qe->data);
+               qe->data = NULL;
                return -ENOMEM;
        }
 
@@ -597,7 +598,7 @@ static int nvme_rdma_start_queue(struct nvme_rdma_ctrl *ctrl, int idx)
        int ret;
 
        if (idx)
-               ret = nvmf_connect_io_queue(&ctrl->ctrl, idx);
+               ret = nvmf_connect_io_queue(&ctrl->ctrl, idx, false);
        else
                ret = nvmf_connect_admin_queue(&ctrl->ctrl);
 
@@ -644,6 +645,8 @@ static int nvme_rdma_alloc_io_queues(struct nvme_rdma_ctrl *ctrl)
        nr_io_queues = min_t(unsigned int, nr_io_queues,
                                ibdev->num_comp_vectors);
 
+       nr_io_queues += min(opts->nr_write_queues, num_online_cpus());
+
        ret = nvme_set_queue_count(&ctrl->ctrl, &nr_io_queues);
        if (ret)
                return ret;
@@ -693,7 +696,7 @@ static struct blk_mq_tag_set *nvme_rdma_alloc_tagset(struct nvme_ctrl *nctrl,
                set->ops = &nvme_rdma_admin_mq_ops;
                set->queue_depth = NVME_AQ_MQ_TAG_DEPTH;
                set->reserved_tags = 2; /* connect + keep-alive */
-               set->numa_node = NUMA_NO_NODE;
+               set->numa_node = nctrl->numa_node;
                set->cmd_size = sizeof(struct nvme_rdma_request) +
                        SG_CHUNK_SIZE * sizeof(struct scatterlist);
                set->driver_data = ctrl;
@@ -706,13 +709,14 @@ static struct blk_mq_tag_set *nvme_rdma_alloc_tagset(struct nvme_ctrl *nctrl,
                set->ops = &nvme_rdma_mq_ops;
                set->queue_depth = nctrl->sqsize + 1;
                set->reserved_tags = 1; /* fabric connect */
-               set->numa_node = NUMA_NO_NODE;
+               set->numa_node = nctrl->numa_node;
                set->flags = BLK_MQ_F_SHOULD_MERGE;
                set->cmd_size = sizeof(struct nvme_rdma_request) +
                        SG_CHUNK_SIZE * sizeof(struct scatterlist);
                set->driver_data = ctrl;
                set->nr_hw_queues = nctrl->queue_count - 1;
                set->timeout = NVME_IO_TIMEOUT;
+               set->nr_maps = 2 /* default + read */;
        }
 
        ret = blk_mq_alloc_tag_set(set);
@@ -762,6 +766,7 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl,
                return error;
 
        ctrl->device = ctrl->queues[0].device;
+       ctrl->ctrl.numa_node = dev_to_node(ctrl->device->dev->dma_device);
 
        ctrl->max_fr_pages = nvme_rdma_get_max_fr_pages(ctrl->device->dev);
 
@@ -823,6 +828,7 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl,
 out_free_async_qe:
        nvme_rdma_free_qe(ctrl->device->dev, &ctrl->async_event_sqe,
                sizeof(struct nvme_command), DMA_TO_DEVICE);
+       ctrl->async_event_sqe.data = NULL;
 out_free_queue:
        nvme_rdma_free_queue(&ctrl->queues[0]);
        return error;
@@ -1409,12 +1415,11 @@ static void nvme_rdma_submit_async_event(struct nvme_ctrl *arg)
        WARN_ON_ONCE(ret);
 }
 
-static int nvme_rdma_process_nvme_rsp(struct nvme_rdma_queue *queue,
-               struct nvme_completion *cqe, struct ib_wc *wc, int tag)
+static void nvme_rdma_process_nvme_rsp(struct nvme_rdma_queue *queue,
+               struct nvme_completion *cqe, struct ib_wc *wc)
 {
        struct request *rq;
        struct nvme_rdma_request *req;
-       int ret = 0;
 
        rq = blk_mq_tag_to_rq(nvme_rdma_tagset(queue), cqe->command_id);
        if (!rq) {
@@ -1422,7 +1427,7 @@ static int nvme_rdma_process_nvme_rsp(struct nvme_rdma_queue *queue,
                        "tag 0x%x on QP %#x not found\n",
                        cqe->command_id, queue->qp->qp_num);
                nvme_rdma_error_recovery(queue->ctrl);
-               return ret;
+               return;
        }
        req = blk_mq_rq_to_pdu(rq);
 
@@ -1437,6 +1442,8 @@ static int nvme_rdma_process_nvme_rsp(struct nvme_rdma_queue *queue,
                        nvme_rdma_error_recovery(queue->ctrl);
                }
        } else if (req->mr) {
+               int ret;
+
                ret = nvme_rdma_inv_rkey(queue, req);
                if (unlikely(ret < 0)) {
                        dev_err(queue->ctrl->ctrl.device,
@@ -1445,19 +1452,14 @@ static int nvme_rdma_process_nvme_rsp(struct nvme_rdma_queue *queue,
                        nvme_rdma_error_recovery(queue->ctrl);
                }
                /* the local invalidation completion will end the request */
-               return 0;
+               return;
        }
 
-       if (refcount_dec_and_test(&req->ref)) {
-               if (rq->tag == tag)
-                       ret = 1;
+       if (refcount_dec_and_test(&req->ref))
                nvme_end_request(rq, req->status, req->result);
-       }
-
-       return ret;
 }
 
-static int __nvme_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc, int tag)
+static void nvme_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc)
 {
        struct nvme_rdma_qe *qe =
                container_of(wc->wr_cqe, struct nvme_rdma_qe, cqe);
@@ -1465,11 +1467,10 @@ static int __nvme_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc, int tag)
        struct ib_device *ibdev = queue->device->dev;
        struct nvme_completion *cqe = qe->data;
        const size_t len = sizeof(struct nvme_completion);
-       int ret = 0;
 
        if (unlikely(wc->status != IB_WC_SUCCESS)) {
                nvme_rdma_wr_error(cq, wc, "RECV");
-               return 0;
+               return;
        }
 
        ib_dma_sync_single_for_cpu(ibdev, qe->dma, len, DMA_FROM_DEVICE);
@@ -1484,16 +1485,10 @@ static int __nvme_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc, int tag)
                nvme_complete_async_event(&queue->ctrl->ctrl, cqe->status,
                                &cqe->result);
        else
-               ret = nvme_rdma_process_nvme_rsp(queue, cqe, wc, tag);
+               nvme_rdma_process_nvme_rsp(queue, cqe, wc);
        ib_dma_sync_single_for_device(ibdev, qe->dma, len, DMA_FROM_DEVICE);
 
        nvme_rdma_post_recv(queue, qe);
-       return ret;
-}
-
-static void nvme_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc)
-{
-       __nvme_rdma_recv_done(cq, wc, -1);
 }
 
 static int nvme_rdma_conn_established(struct nvme_rdma_queue *queue)
@@ -1747,27 +1742,6 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
        return BLK_STS_IOERR;
 }
 
-static int nvme_rdma_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag)
-{
-       struct nvme_rdma_queue *queue = hctx->driver_data;
-       struct ib_cq *cq = queue->ib_cq;
-       struct ib_wc wc;
-       int found = 0;
-
-       while (ib_poll_cq(cq, 1, &wc) > 0) {
-               struct ib_cqe *cqe = wc.wr_cqe;
-
-               if (cqe) {
-                       if (cqe->done == nvme_rdma_recv_done)
-                               found |= __nvme_rdma_recv_done(cq, &wc, tag);
-                       else
-                               cqe->done(cq, &wc);
-               }
-       }
-
-       return found;
-}
-
 static void nvme_rdma_complete_rq(struct request *rq)
 {
        struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
@@ -1780,7 +1754,25 @@ static int nvme_rdma_map_queues(struct blk_mq_tag_set *set)
 {
        struct nvme_rdma_ctrl *ctrl = set->driver_data;
 
-       return blk_mq_rdma_map_queues(set, ctrl->device->dev, 0);
+       set->map[HCTX_TYPE_DEFAULT].queue_offset = 0;
+       set->map[HCTX_TYPE_READ].nr_queues = ctrl->ctrl.opts->nr_io_queues;
+       if (ctrl->ctrl.opts->nr_write_queues) {
+               /* separate read/write queues */
+               set->map[HCTX_TYPE_DEFAULT].nr_queues =
+                               ctrl->ctrl.opts->nr_write_queues;
+               set->map[HCTX_TYPE_READ].queue_offset =
+                               ctrl->ctrl.opts->nr_write_queues;
+       } else {
+               /* mixed read/write queues */
+               set->map[HCTX_TYPE_DEFAULT].nr_queues =
+                               ctrl->ctrl.opts->nr_io_queues;
+               set->map[HCTX_TYPE_READ].queue_offset = 0;
+       }
+       blk_mq_rdma_map_queues(&set->map[HCTX_TYPE_DEFAULT],
+                       ctrl->device->dev, 0);
+       blk_mq_rdma_map_queues(&set->map[HCTX_TYPE_READ],
+                       ctrl->device->dev, 0);
+       return 0;
 }
 
 static const struct blk_mq_ops nvme_rdma_mq_ops = {
@@ -1789,7 +1781,6 @@ static const struct blk_mq_ops nvme_rdma_mq_ops = {
        .init_request   = nvme_rdma_init_request,
        .exit_request   = nvme_rdma_exit_request,
        .init_hctx      = nvme_rdma_init_hctx,
-       .poll           = nvme_rdma_poll,
        .timeout        = nvme_rdma_timeout,
        .map_queues     = nvme_rdma_map_queues,
 };
@@ -1936,7 +1927,7 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
        INIT_WORK(&ctrl->err_work, nvme_rdma_error_recovery_work);
        INIT_WORK(&ctrl->ctrl.reset_work, nvme_rdma_reset_ctrl_work);
 
-       ctrl->ctrl.queue_count = opts->nr_io_queues + 1; /* +1 for admin queue */
+       ctrl->ctrl.queue_count = opts->nr_io_queues + opts->nr_write_queues + 1;
        ctrl->ctrl.sqsize = opts->queue_size - 1;
        ctrl->ctrl.kato = opts->kato;
 
@@ -1987,7 +1978,8 @@ static struct nvmf_transport_ops nvme_rdma_transport = {
        .module         = THIS_MODULE,
        .required_opts  = NVMF_OPT_TRADDR,
        .allowed_opts   = NVMF_OPT_TRSVCID | NVMF_OPT_RECONNECT_DELAY |
-                         NVMF_OPT_HOST_TRADDR | NVMF_OPT_CTRL_LOSS_TMO,
+                         NVMF_OPT_HOST_TRADDR | NVMF_OPT_CTRL_LOSS_TMO |
+                         NVMF_OPT_NR_WRITE_QUEUES,
        .create_ctrl    = nvme_rdma_create_ctrl,
 };