Simple polling support via socket busy_poll interface.
Although we do not shutdown interrupts but simply hammer
the socket poll, we can sometimes find completions faster
than the normal interrupt driven RX path.
We add per queue nr_cqe counter that resets every time
RX path is invoked such that .poll callback can return it
to stay consistent with the semantics.
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
#include <net/tcp.h>
#include <linux/blk-mq.h>
#include <crypto/hash.h>
#include <net/tcp.h>
#include <linux/blk-mq.h>
#include <crypto/hash.h>
+#include <net/busy_poll.h>
#include "nvme.h"
#include "fabrics.h"
#include "nvme.h"
#include "fabrics.h"
int pdu_offset;
size_t data_remaining;
size_t ddgst_remaining;
int pdu_offset;
size_t data_remaining;
size_t ddgst_remaining;
/* send state */
struct nvme_tcp_request *request;
/* send state */
struct nvme_tcp_request *request;
}
nvme_end_request(rq, cqe->status, cqe->result);
}
nvme_end_request(rq, cqe->status, cqe->result);
nvme_tcp_ddgst_final(queue->rcv_hash, &queue->exp_ddgst);
queue->ddgst_remaining = NVME_TCP_DIGEST_LENGTH;
} else {
nvme_tcp_ddgst_final(queue->rcv_hash, &queue->exp_ddgst);
queue->ddgst_remaining = NVME_TCP_DIGEST_LENGTH;
} else {
- if (pdu->hdr.flags & NVME_TCP_F_DATA_SUCCESS)
+ if (pdu->hdr.flags & NVME_TCP_F_DATA_SUCCESS) {
nvme_tcp_end_request(rq, NVME_SC_SUCCESS);
nvme_tcp_end_request(rq, NVME_SC_SUCCESS);
nvme_tcp_init_recv_ctx(queue);
}
}
nvme_tcp_init_recv_ctx(queue);
}
}
pdu->command_id);
nvme_tcp_end_request(rq, NVME_SC_SUCCESS);
pdu->command_id);
nvme_tcp_end_request(rq, NVME_SC_SUCCESS);
}
nvme_tcp_init_recv_ctx(queue);
}
nvme_tcp_init_recv_ctx(queue);
rd_desc.arg.data = queue;
rd_desc.count = 1;
lock_sock(sk);
rd_desc.arg.data = queue;
rd_desc.count = 1;
lock_sock(sk);
consumed = sock->ops->read_sock(sk, &rd_desc, nvme_tcp_recv_skb);
release_sock(sk);
return consumed;
consumed = sock->ops->read_sock(sk, &rd_desc, nvme_tcp_recv_skb);
release_sock(sk);
return consumed;
queue->sock->sk->sk_data_ready = nvme_tcp_data_ready;
queue->sock->sk->sk_state_change = nvme_tcp_state_change;
queue->sock->sk->sk_write_space = nvme_tcp_write_space;
queue->sock->sk->sk_data_ready = nvme_tcp_data_ready;
queue->sock->sk->sk_state_change = nvme_tcp_state_change;
queue->sock->sk->sk_write_space = nvme_tcp_write_space;
+ queue->sock->sk->sk_ll_usec = 1;
write_unlock_bh(&queue->sock->sk->sk_callback_lock);
return 0;
write_unlock_bh(&queue->sock->sk->sk_callback_lock);
return 0;
set->driver_data = ctrl;
set->nr_hw_queues = nctrl->queue_count - 1;
set->timeout = NVME_IO_TIMEOUT;
set->driver_data = ctrl;
set->nr_hw_queues = nctrl->queue_count - 1;
set->timeout = NVME_IO_TIMEOUT;
- set->nr_maps = 2 /* default + read */;
+ set->nr_maps = nctrl->opts->nr_poll_queues ? HCTX_MAX_TYPES : 2;
}
ret = blk_mq_alloc_tag_set(set);
}
ret = blk_mq_alloc_tag_set(set);
nr_io_queues = min(ctrl->opts->nr_io_queues, num_online_cpus());
nr_io_queues += min(ctrl->opts->nr_write_queues, num_online_cpus());
nr_io_queues = min(ctrl->opts->nr_io_queues, num_online_cpus());
nr_io_queues += min(ctrl->opts->nr_write_queues, num_online_cpus());
+ nr_io_queues += min(ctrl->opts->nr_poll_queues, num_online_cpus());
min(opts->nr_io_queues, nr_io_queues);
nr_io_queues -= ctrl->io_queues[HCTX_TYPE_DEFAULT];
}
min(opts->nr_io_queues, nr_io_queues);
nr_io_queues -= ctrl->io_queues[HCTX_TYPE_DEFAULT];
}
+
+ if (opts->nr_poll_queues && nr_io_queues) {
+ /* map dedicated poll queues only if we have queues left */
+ ctrl->io_queues[HCTX_TYPE_POLL] =
+ min(opts->nr_poll_queues, nr_io_queues);
+ }
}
static int nvme_tcp_alloc_io_queues(struct nvme_ctrl *ctrl)
}
static int nvme_tcp_alloc_io_queues(struct nvme_ctrl *ctrl)
blk_mq_map_queues(&set->map[HCTX_TYPE_DEFAULT]);
blk_mq_map_queues(&set->map[HCTX_TYPE_READ]);
blk_mq_map_queues(&set->map[HCTX_TYPE_DEFAULT]);
blk_mq_map_queues(&set->map[HCTX_TYPE_READ]);
+ if (opts->nr_poll_queues && ctrl->io_queues[HCTX_TYPE_POLL]) {
+ /* map dedicated poll queues only if we have queues left */
+ set->map[HCTX_TYPE_POLL].nr_queues =
+ ctrl->io_queues[HCTX_TYPE_POLL];
+ set->map[HCTX_TYPE_POLL].queue_offset =
+ ctrl->io_queues[HCTX_TYPE_DEFAULT] +
+ ctrl->io_queues[HCTX_TYPE_READ];
+ blk_mq_map_queues(&set->map[HCTX_TYPE_POLL]);
+ }
+
dev_info(ctrl->ctrl.device,
dev_info(ctrl->ctrl.device,
- "mapped %d/%d default/read queues.\n",
+ "mapped %d/%d/%d default/read/poll queues.\n",
ctrl->io_queues[HCTX_TYPE_DEFAULT],
ctrl->io_queues[HCTX_TYPE_DEFAULT],
- ctrl->io_queues[HCTX_TYPE_READ]);
+ ctrl->io_queues[HCTX_TYPE_READ],
+ ctrl->io_queues[HCTX_TYPE_POLL]);
+static int nvme_tcp_poll(struct blk_mq_hw_ctx *hctx)
+{
+ struct nvme_tcp_queue *queue = hctx->driver_data;
+ struct sock *sk = queue->sock->sk;
+
+ if (sk_can_busy_loop(sk) && skb_queue_empty(&sk->sk_receive_queue))
+ sk_busy_loop(sk, true);
+ nvme_tcp_try_recv(queue);
+ return queue->nr_cqe;
+}
+
static struct blk_mq_ops nvme_tcp_mq_ops = {
.queue_rq = nvme_tcp_queue_rq,
.complete = nvme_complete_rq,
static struct blk_mq_ops nvme_tcp_mq_ops = {
.queue_rq = nvme_tcp_queue_rq,
.complete = nvme_complete_rq,
.init_hctx = nvme_tcp_init_hctx,
.timeout = nvme_tcp_timeout,
.map_queues = nvme_tcp_map_queues,
.init_hctx = nvme_tcp_init_hctx,
.timeout = nvme_tcp_timeout,
.map_queues = nvme_tcp_map_queues,
};
static struct blk_mq_ops nvme_tcp_admin_mq_ops = {
};
static struct blk_mq_ops nvme_tcp_admin_mq_ops = {
INIT_LIST_HEAD(&ctrl->list);
ctrl->ctrl.opts = opts;
INIT_LIST_HEAD(&ctrl->list);
ctrl->ctrl.opts = opts;
- ctrl->ctrl.queue_count = opts->nr_io_queues + opts->nr_write_queues + 1;
+ ctrl->ctrl.queue_count = opts->nr_io_queues + opts->nr_write_queues +
+ opts->nr_poll_queues + 1;
ctrl->ctrl.sqsize = opts->queue_size - 1;
ctrl->ctrl.kato = opts->kato;
ctrl->ctrl.sqsize = opts->queue_size - 1;
ctrl->ctrl.kato = opts->kato;
.allowed_opts = NVMF_OPT_TRSVCID | NVMF_OPT_RECONNECT_DELAY |
NVMF_OPT_HOST_TRADDR | NVMF_OPT_CTRL_LOSS_TMO |
NVMF_OPT_HDR_DIGEST | NVMF_OPT_DATA_DIGEST |
.allowed_opts = NVMF_OPT_TRSVCID | NVMF_OPT_RECONNECT_DELAY |
NVMF_OPT_HOST_TRADDR | NVMF_OPT_CTRL_LOSS_TMO |
NVMF_OPT_HDR_DIGEST | NVMF_OPT_DATA_DIGEST |
- NVMF_OPT_NR_WRITE_QUEUES,
+ NVMF_OPT_NR_WRITE_QUEUES | NVMF_OPT_NR_POLL_QUEUES,
.create_ctrl = nvme_tcp_create_ctrl,
};
.create_ctrl = nvme_tcp_create_ctrl,
};