2 * Copyright (c) 2005 Cisco Systems. All rights reserved.
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
35 #include <linux/module.h>
36 #include <linux/init.h>
37 #include <linux/slab.h>
38 #include <linux/err.h>
39 #include <linux/string.h>
40 #include <linux/parser.h>
41 #include <linux/random.h>
42 #include <linux/jiffies.h>
43 #include <rdma/ib_cache.h>
45 #include <linux/atomic.h>
47 #include <scsi/scsi.h>
48 #include <scsi/scsi_device.h>
49 #include <scsi/scsi_dbg.h>
50 #include <scsi/scsi_tcq.h>
52 #include <scsi/scsi_transport_srp.h>
56 #define DRV_NAME "ib_srp"
57 #define PFX DRV_NAME ": "
58 #define DRV_VERSION "2.0"
59 #define DRV_RELDATE "July 26, 2015"
61 MODULE_AUTHOR("Roland Dreier");
62 MODULE_DESCRIPTION("InfiniBand SCSI RDMA Protocol initiator");
63 MODULE_LICENSE("Dual BSD/GPL");
64 MODULE_VERSION(DRV_VERSION);
65 MODULE_INFO(release_date, DRV_RELDATE);
67 static unsigned int srp_sg_tablesize;
68 static unsigned int cmd_sg_entries;
69 static unsigned int indirect_sg_entries;
70 static bool allow_ext_sg;
71 static bool prefer_fr = true;
72 static bool register_always = true;
73 static bool never_register;
74 static int topspin_workarounds = 1;
76 module_param(srp_sg_tablesize, uint, 0444);
77 MODULE_PARM_DESC(srp_sg_tablesize, "Deprecated name for cmd_sg_entries");
79 module_param(cmd_sg_entries, uint, 0444);
80 MODULE_PARM_DESC(cmd_sg_entries,
81 "Default number of gather/scatter entries in the SRP command (default is 12, max 255)");
83 module_param(indirect_sg_entries, uint, 0444);
84 MODULE_PARM_DESC(indirect_sg_entries,
85 "Default max number of gather/scatter entries (default is 12, max is " __stringify(SG_MAX_SEGMENTS) ")");
87 module_param(allow_ext_sg, bool, 0444);
88 MODULE_PARM_DESC(allow_ext_sg,
89 "Default behavior when there are more than cmd_sg_entries S/G entries after mapping; fails the request when false (default false)");
91 module_param(topspin_workarounds, int, 0444);
92 MODULE_PARM_DESC(topspin_workarounds,
93 "Enable workarounds for Topspin/Cisco SRP target bugs if != 0");
95 module_param(prefer_fr, bool, 0444);
96 MODULE_PARM_DESC(prefer_fr,
97 "Whether to use fast registration if both FMR and fast registration are supported");
99 module_param(register_always, bool, 0444);
100 MODULE_PARM_DESC(register_always,
101 "Use memory registration even for contiguous memory regions");
103 module_param(never_register, bool, 0444);
104 MODULE_PARM_DESC(never_register, "Never register memory");
106 static const struct kernel_param_ops srp_tmo_ops;
108 static int srp_reconnect_delay = 10;
109 module_param_cb(reconnect_delay, &srp_tmo_ops, &srp_reconnect_delay,
111 MODULE_PARM_DESC(reconnect_delay, "Time between successive reconnect attempts");
113 static int srp_fast_io_fail_tmo = 15;
114 module_param_cb(fast_io_fail_tmo, &srp_tmo_ops, &srp_fast_io_fail_tmo,
116 MODULE_PARM_DESC(fast_io_fail_tmo,
117 "Number of seconds between the observation of a transport"
118 " layer error and failing all I/O. \"off\" means that this"
119 " functionality is disabled.");
121 static int srp_dev_loss_tmo = 600;
122 module_param_cb(dev_loss_tmo, &srp_tmo_ops, &srp_dev_loss_tmo,
124 MODULE_PARM_DESC(dev_loss_tmo,
125 "Maximum number of seconds that the SRP transport should"
126 " insulate transport layer errors. After this time has been"
127 " exceeded the SCSI host is removed. Should be"
128 " between 1 and " __stringify(SCSI_DEVICE_BLOCK_MAX_TIMEOUT)
129 " if fast_io_fail_tmo has not been set. \"off\" means that"
130 " this functionality is disabled.");
132 static unsigned ch_count;
133 module_param(ch_count, uint, 0444);
134 MODULE_PARM_DESC(ch_count,
135 "Number of RDMA channels to use for communication with an SRP target. Using more than one channel improves performance if the HCA supports multiple completion vectors. The default value is the minimum of four times the number of online CPU sockets and the number of completion vectors supported by the HCA.");
137 static void srp_add_one(struct ib_device *device);
138 static void srp_remove_one(struct ib_device *device, void *client_data);
139 static void srp_recv_done(struct ib_cq *cq, struct ib_wc *wc);
140 static void srp_handle_qp_err(struct ib_cq *cq, struct ib_wc *wc,
142 static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event);
144 static struct scsi_transport_template *ib_srp_transport_template;
145 static struct workqueue_struct *srp_remove_wq;
147 static struct ib_client srp_client = {
150 .remove = srp_remove_one
153 static struct ib_sa_client srp_sa_client;
155 static int srp_tmo_get(char *buffer, const struct kernel_param *kp)
157 int tmo = *(int *)kp->arg;
160 return sprintf(buffer, "%d", tmo);
162 return sprintf(buffer, "off");
165 static int srp_tmo_set(const char *val, const struct kernel_param *kp)
169 res = srp_parse_tmo(&tmo, val);
173 if (kp->arg == &srp_reconnect_delay)
174 res = srp_tmo_valid(tmo, srp_fast_io_fail_tmo,
176 else if (kp->arg == &srp_fast_io_fail_tmo)
177 res = srp_tmo_valid(srp_reconnect_delay, tmo, srp_dev_loss_tmo);
179 res = srp_tmo_valid(srp_reconnect_delay, srp_fast_io_fail_tmo,
183 *(int *)kp->arg = tmo;
189 static const struct kernel_param_ops srp_tmo_ops = {
194 static inline struct srp_target_port *host_to_target(struct Scsi_Host *host)
196 return (struct srp_target_port *) host->hostdata;
199 static const char *srp_target_info(struct Scsi_Host *host)
201 return host_to_target(host)->target_name;
204 static int srp_target_is_topspin(struct srp_target_port *target)
206 static const u8 topspin_oui[3] = { 0x00, 0x05, 0xad };
207 static const u8 cisco_oui[3] = { 0x00, 0x1b, 0x0d };
209 return topspin_workarounds &&
210 (!memcmp(&target->ioc_guid, topspin_oui, sizeof topspin_oui) ||
211 !memcmp(&target->ioc_guid, cisco_oui, sizeof cisco_oui));
214 static struct srp_iu *srp_alloc_iu(struct srp_host *host, size_t size,
216 enum dma_data_direction direction)
220 iu = kmalloc(sizeof *iu, gfp_mask);
224 iu->buf = kzalloc(size, gfp_mask);
228 iu->dma = ib_dma_map_single(host->srp_dev->dev, iu->buf, size,
230 if (ib_dma_mapping_error(host->srp_dev->dev, iu->dma))
234 iu->direction = direction;
246 static void srp_free_iu(struct srp_host *host, struct srp_iu *iu)
251 ib_dma_unmap_single(host->srp_dev->dev, iu->dma, iu->size,
257 static void srp_qp_event(struct ib_event *event, void *context)
259 pr_debug("QP event %s (%d)\n",
260 ib_event_msg(event->event), event->event);
263 static int srp_init_qp(struct srp_target_port *target,
266 struct ib_qp_attr *attr;
269 attr = kmalloc(sizeof *attr, GFP_KERNEL);
273 ret = ib_find_cached_pkey(target->srp_host->srp_dev->dev,
274 target->srp_host->port,
275 be16_to_cpu(target->pkey),
280 attr->qp_state = IB_QPS_INIT;
281 attr->qp_access_flags = (IB_ACCESS_REMOTE_READ |
282 IB_ACCESS_REMOTE_WRITE);
283 attr->port_num = target->srp_host->port;
285 ret = ib_modify_qp(qp, attr,
296 static int srp_new_cm_id(struct srp_rdma_ch *ch)
298 struct srp_target_port *target = ch->target;
299 struct ib_cm_id *new_cm_id;
301 new_cm_id = ib_create_cm_id(target->srp_host->srp_dev->dev,
303 if (IS_ERR(new_cm_id))
304 return PTR_ERR(new_cm_id);
307 ib_destroy_cm_id(ch->cm_id);
308 ch->cm_id = new_cm_id;
309 ch->path.sgid = target->sgid;
310 ch->path.dgid = target->orig_dgid;
311 ch->path.pkey = target->pkey;
312 ch->path.service_id = target->service_id;
317 static struct ib_fmr_pool *srp_alloc_fmr_pool(struct srp_target_port *target)
319 struct srp_device *dev = target->srp_host->srp_dev;
320 struct ib_fmr_pool_param fmr_param;
322 memset(&fmr_param, 0, sizeof(fmr_param));
323 fmr_param.pool_size = target->mr_pool_size;
324 fmr_param.dirty_watermark = fmr_param.pool_size / 4;
326 fmr_param.max_pages_per_fmr = dev->max_pages_per_mr;
327 fmr_param.page_shift = ilog2(dev->mr_page_size);
328 fmr_param.access = (IB_ACCESS_LOCAL_WRITE |
329 IB_ACCESS_REMOTE_WRITE |
330 IB_ACCESS_REMOTE_READ);
332 return ib_create_fmr_pool(dev->pd, &fmr_param);
336 * srp_destroy_fr_pool() - free the resources owned by a pool
337 * @pool: Fast registration pool to be destroyed.
339 static void srp_destroy_fr_pool(struct srp_fr_pool *pool)
342 struct srp_fr_desc *d;
347 for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) {
355 * srp_create_fr_pool() - allocate and initialize a pool for fast registration
356 * @device: IB device to allocate fast registration descriptors for.
357 * @pd: Protection domain associated with the FR descriptors.
358 * @pool_size: Number of descriptors to allocate.
359 * @max_page_list_len: Maximum fast registration work request page list length.
361 static struct srp_fr_pool *srp_create_fr_pool(struct ib_device *device,
362 struct ib_pd *pd, int pool_size,
363 int max_page_list_len)
365 struct srp_fr_pool *pool;
366 struct srp_fr_desc *d;
368 int i, ret = -EINVAL;
373 pool = kzalloc(sizeof(struct srp_fr_pool) +
374 pool_size * sizeof(struct srp_fr_desc), GFP_KERNEL);
377 pool->size = pool_size;
378 pool->max_page_list_len = max_page_list_len;
379 spin_lock_init(&pool->lock);
380 INIT_LIST_HEAD(&pool->free_list);
382 for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) {
383 mr = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG,
390 list_add_tail(&d->entry, &pool->free_list);
397 srp_destroy_fr_pool(pool);
405 * srp_fr_pool_get() - obtain a descriptor suitable for fast registration
406 * @pool: Pool to obtain descriptor from.
408 static struct srp_fr_desc *srp_fr_pool_get(struct srp_fr_pool *pool)
410 struct srp_fr_desc *d = NULL;
413 spin_lock_irqsave(&pool->lock, flags);
414 if (!list_empty(&pool->free_list)) {
415 d = list_first_entry(&pool->free_list, typeof(*d), entry);
418 spin_unlock_irqrestore(&pool->lock, flags);
424 * srp_fr_pool_put() - put an FR descriptor back in the free list
425 * @pool: Pool the descriptor was allocated from.
426 * @desc: Pointer to an array of fast registration descriptor pointers.
427 * @n: Number of descriptors to put back.
429 * Note: The caller must already have queued an invalidation request for
430 * desc->mr->rkey before calling this function.
432 static void srp_fr_pool_put(struct srp_fr_pool *pool, struct srp_fr_desc **desc,
438 spin_lock_irqsave(&pool->lock, flags);
439 for (i = 0; i < n; i++)
440 list_add(&desc[i]->entry, &pool->free_list);
441 spin_unlock_irqrestore(&pool->lock, flags);
444 static struct srp_fr_pool *srp_alloc_fr_pool(struct srp_target_port *target)
446 struct srp_device *dev = target->srp_host->srp_dev;
448 return srp_create_fr_pool(dev->dev, dev->pd, target->mr_pool_size,
449 dev->max_pages_per_mr);
453 * srp_destroy_qp() - destroy an RDMA queue pair
454 * @qp: RDMA queue pair.
456 * Drain the qp before destroying it. This avoids that the receive
457 * completion handler can access the queue pair while it is
460 static void srp_destroy_qp(struct ib_qp *qp)
466 static int srp_create_ch_ib(struct srp_rdma_ch *ch)
468 struct srp_target_port *target = ch->target;
469 struct srp_device *dev = target->srp_host->srp_dev;
470 struct ib_qp_init_attr *init_attr;
471 struct ib_cq *recv_cq, *send_cq;
473 struct ib_fmr_pool *fmr_pool = NULL;
474 struct srp_fr_pool *fr_pool = NULL;
475 const int m = 1 + dev->use_fast_reg * target->mr_per_cmd * 2;
478 init_attr = kzalloc(sizeof *init_attr, GFP_KERNEL);
482 /* queue_size + 1 for ib_drain_rq() */
483 recv_cq = ib_alloc_cq(dev->dev, ch, target->queue_size + 1,
484 ch->comp_vector, IB_POLL_SOFTIRQ);
485 if (IS_ERR(recv_cq)) {
486 ret = PTR_ERR(recv_cq);
490 send_cq = ib_alloc_cq(dev->dev, ch, m * target->queue_size,
491 ch->comp_vector, IB_POLL_DIRECT);
492 if (IS_ERR(send_cq)) {
493 ret = PTR_ERR(send_cq);
497 init_attr->event_handler = srp_qp_event;
498 init_attr->cap.max_send_wr = m * target->queue_size;
499 init_attr->cap.max_recv_wr = target->queue_size + 1;
500 init_attr->cap.max_recv_sge = 1;
501 init_attr->cap.max_send_sge = 1;
502 init_attr->sq_sig_type = IB_SIGNAL_REQ_WR;
503 init_attr->qp_type = IB_QPT_RC;
504 init_attr->send_cq = send_cq;
505 init_attr->recv_cq = recv_cq;
507 qp = ib_create_qp(dev->pd, init_attr);
513 ret = srp_init_qp(target, qp);
517 if (dev->use_fast_reg) {
518 fr_pool = srp_alloc_fr_pool(target);
519 if (IS_ERR(fr_pool)) {
520 ret = PTR_ERR(fr_pool);
521 shost_printk(KERN_WARNING, target->scsi_host, PFX
522 "FR pool allocation failed (%d)\n", ret);
525 } else if (dev->use_fmr) {
526 fmr_pool = srp_alloc_fmr_pool(target);
527 if (IS_ERR(fmr_pool)) {
528 ret = PTR_ERR(fmr_pool);
529 shost_printk(KERN_WARNING, target->scsi_host, PFX
530 "FMR pool allocation failed (%d)\n", ret);
536 srp_destroy_qp(ch->qp);
538 ib_free_cq(ch->recv_cq);
540 ib_free_cq(ch->send_cq);
543 ch->recv_cq = recv_cq;
544 ch->send_cq = send_cq;
546 if (dev->use_fast_reg) {
548 srp_destroy_fr_pool(ch->fr_pool);
549 ch->fr_pool = fr_pool;
550 } else if (dev->use_fmr) {
552 ib_destroy_fmr_pool(ch->fmr_pool);
553 ch->fmr_pool = fmr_pool;
574 * Note: this function may be called without srp_alloc_iu_bufs() having been
575 * invoked. Hence the ch->[rt]x_ring checks.
577 static void srp_free_ch_ib(struct srp_target_port *target,
578 struct srp_rdma_ch *ch)
580 struct srp_device *dev = target->srp_host->srp_dev;
587 ib_destroy_cm_id(ch->cm_id);
591 /* If srp_new_cm_id() succeeded but srp_create_ch_ib() not, return. */
595 if (dev->use_fast_reg) {
597 srp_destroy_fr_pool(ch->fr_pool);
598 } else if (dev->use_fmr) {
600 ib_destroy_fmr_pool(ch->fmr_pool);
603 srp_destroy_qp(ch->qp);
604 ib_free_cq(ch->send_cq);
605 ib_free_cq(ch->recv_cq);
608 * Avoid that the SCSI error handler tries to use this channel after
609 * it has been freed. The SCSI error handler can namely continue
610 * trying to perform recovery actions after scsi_remove_host()
616 ch->send_cq = ch->recv_cq = NULL;
619 for (i = 0; i < target->queue_size; ++i)
620 srp_free_iu(target->srp_host, ch->rx_ring[i]);
625 for (i = 0; i < target->queue_size; ++i)
626 srp_free_iu(target->srp_host, ch->tx_ring[i]);
632 static void srp_path_rec_completion(int status,
633 struct ib_sa_path_rec *pathrec,
636 struct srp_rdma_ch *ch = ch_ptr;
637 struct srp_target_port *target = ch->target;
641 shost_printk(KERN_ERR, target->scsi_host,
642 PFX "Got failed path rec status %d\n", status);
648 static int srp_lookup_path(struct srp_rdma_ch *ch)
650 struct srp_target_port *target = ch->target;
653 ch->path.numb_path = 1;
655 init_completion(&ch->done);
657 ch->path_query_id = ib_sa_path_rec_get(&srp_sa_client,
658 target->srp_host->srp_dev->dev,
659 target->srp_host->port,
661 IB_SA_PATH_REC_SERVICE_ID |
662 IB_SA_PATH_REC_DGID |
663 IB_SA_PATH_REC_SGID |
664 IB_SA_PATH_REC_NUMB_PATH |
666 SRP_PATH_REC_TIMEOUT_MS,
668 srp_path_rec_completion,
669 ch, &ch->path_query);
670 if (ch->path_query_id < 0)
671 return ch->path_query_id;
673 ret = wait_for_completion_interruptible(&ch->done);
678 shost_printk(KERN_WARNING, target->scsi_host,
679 PFX "Path record query failed\n");
684 static int srp_send_req(struct srp_rdma_ch *ch, bool multich)
686 struct srp_target_port *target = ch->target;
688 struct ib_cm_req_param param;
689 struct srp_login_req priv;
693 req = kzalloc(sizeof *req, GFP_KERNEL);
697 req->param.primary_path = &ch->path;
698 req->param.alternate_path = NULL;
699 req->param.service_id = target->service_id;
700 req->param.qp_num = ch->qp->qp_num;
701 req->param.qp_type = ch->qp->qp_type;
702 req->param.private_data = &req->priv;
703 req->param.private_data_len = sizeof req->priv;
704 req->param.flow_control = 1;
706 get_random_bytes(&req->param.starting_psn, 4);
707 req->param.starting_psn &= 0xffffff;
710 * Pick some arbitrary defaults here; we could make these
711 * module parameters if anyone cared about setting them.
713 req->param.responder_resources = 4;
714 req->param.remote_cm_response_timeout = 20;
715 req->param.local_cm_response_timeout = 20;
716 req->param.retry_count = target->tl_retry_count;
717 req->param.rnr_retry_count = 7;
718 req->param.max_cm_retries = 15;
720 req->priv.opcode = SRP_LOGIN_REQ;
722 req->priv.req_it_iu_len = cpu_to_be32(target->max_iu_len);
723 req->priv.req_buf_fmt = cpu_to_be16(SRP_BUF_FORMAT_DIRECT |
724 SRP_BUF_FORMAT_INDIRECT);
725 req->priv.req_flags = (multich ? SRP_MULTICHAN_MULTI :
726 SRP_MULTICHAN_SINGLE);
728 * In the published SRP specification (draft rev. 16a), the
729 * port identifier format is 8 bytes of ID extension followed
730 * by 8 bytes of GUID. Older drafts put the two halves in the
731 * opposite order, so that the GUID comes first.
733 * Targets conforming to these obsolete drafts can be
734 * recognized by the I/O Class they report.
736 if (target->io_class == SRP_REV10_IB_IO_CLASS) {
737 memcpy(req->priv.initiator_port_id,
738 &target->sgid.global.interface_id, 8);
739 memcpy(req->priv.initiator_port_id + 8,
740 &target->initiator_ext, 8);
741 memcpy(req->priv.target_port_id, &target->ioc_guid, 8);
742 memcpy(req->priv.target_port_id + 8, &target->id_ext, 8);
744 memcpy(req->priv.initiator_port_id,
745 &target->initiator_ext, 8);
746 memcpy(req->priv.initiator_port_id + 8,
747 &target->sgid.global.interface_id, 8);
748 memcpy(req->priv.target_port_id, &target->id_ext, 8);
749 memcpy(req->priv.target_port_id + 8, &target->ioc_guid, 8);
753 * Topspin/Cisco SRP targets will reject our login unless we
754 * zero out the first 8 bytes of our initiator port ID and set
755 * the second 8 bytes to the local node GUID.
757 if (srp_target_is_topspin(target)) {
758 shost_printk(KERN_DEBUG, target->scsi_host,
759 PFX "Topspin/Cisco initiator port ID workaround "
760 "activated for target GUID %016llx\n",
761 be64_to_cpu(target->ioc_guid));
762 memset(req->priv.initiator_port_id, 0, 8);
763 memcpy(req->priv.initiator_port_id + 8,
764 &target->srp_host->srp_dev->dev->node_guid, 8);
767 status = ib_send_cm_req(ch->cm_id, &req->param);
774 static bool srp_queue_remove_work(struct srp_target_port *target)
776 bool changed = false;
778 spin_lock_irq(&target->lock);
779 if (target->state != SRP_TARGET_REMOVED) {
780 target->state = SRP_TARGET_REMOVED;
783 spin_unlock_irq(&target->lock);
786 queue_work(srp_remove_wq, &target->remove_work);
791 static void srp_disconnect_target(struct srp_target_port *target)
793 struct srp_rdma_ch *ch;
796 /* XXX should send SRP_I_LOGOUT request */
798 for (i = 0; i < target->ch_count; i++) {
800 ch->connected = false;
801 if (ch->cm_id && ib_send_cm_dreq(ch->cm_id, NULL, 0)) {
802 shost_printk(KERN_DEBUG, target->scsi_host,
803 PFX "Sending CM DREQ failed\n");
808 static void srp_free_req_data(struct srp_target_port *target,
809 struct srp_rdma_ch *ch)
811 struct srp_device *dev = target->srp_host->srp_dev;
812 struct ib_device *ibdev = dev->dev;
813 struct srp_request *req;
819 for (i = 0; i < target->req_ring_size; ++i) {
820 req = &ch->req_ring[i];
821 if (dev->use_fast_reg) {
824 kfree(req->fmr_list);
825 kfree(req->map_page);
827 if (req->indirect_dma_addr) {
828 ib_dma_unmap_single(ibdev, req->indirect_dma_addr,
829 target->indirect_size,
832 kfree(req->indirect_desc);
839 static int srp_alloc_req_data(struct srp_rdma_ch *ch)
841 struct srp_target_port *target = ch->target;
842 struct srp_device *srp_dev = target->srp_host->srp_dev;
843 struct ib_device *ibdev = srp_dev->dev;
844 struct srp_request *req;
847 int i, ret = -ENOMEM;
849 ch->req_ring = kcalloc(target->req_ring_size, sizeof(*ch->req_ring),
854 for (i = 0; i < target->req_ring_size; ++i) {
855 req = &ch->req_ring[i];
856 mr_list = kmalloc(target->mr_per_cmd * sizeof(void *),
860 if (srp_dev->use_fast_reg) {
861 req->fr_list = mr_list;
863 req->fmr_list = mr_list;
864 req->map_page = kmalloc(srp_dev->max_pages_per_mr *
865 sizeof(void *), GFP_KERNEL);
869 req->indirect_desc = kmalloc(target->indirect_size, GFP_KERNEL);
870 if (!req->indirect_desc)
873 dma_addr = ib_dma_map_single(ibdev, req->indirect_desc,
874 target->indirect_size,
876 if (ib_dma_mapping_error(ibdev, dma_addr))
879 req->indirect_dma_addr = dma_addr;
888 * srp_del_scsi_host_attr() - Remove attributes defined in the host template.
889 * @shost: SCSI host whose attributes to remove from sysfs.
891 * Note: Any attributes defined in the host template and that did not exist
892 * before invocation of this function will be ignored.
894 static void srp_del_scsi_host_attr(struct Scsi_Host *shost)
896 struct device_attribute **attr;
898 for (attr = shost->hostt->shost_attrs; attr && *attr; ++attr)
899 device_remove_file(&shost->shost_dev, *attr);
902 static void srp_remove_target(struct srp_target_port *target)
904 struct srp_rdma_ch *ch;
907 WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED);
909 srp_del_scsi_host_attr(target->scsi_host);
910 srp_rport_get(target->rport);
911 srp_remove_host(target->scsi_host);
912 scsi_remove_host(target->scsi_host);
913 srp_stop_rport_timers(target->rport);
914 srp_disconnect_target(target);
915 for (i = 0; i < target->ch_count; i++) {
917 srp_free_ch_ib(target, ch);
919 cancel_work_sync(&target->tl_err_work);
920 srp_rport_put(target->rport);
921 for (i = 0; i < target->ch_count; i++) {
923 srp_free_req_data(target, ch);
928 spin_lock(&target->srp_host->target_lock);
929 list_del(&target->list);
930 spin_unlock(&target->srp_host->target_lock);
932 scsi_host_put(target->scsi_host);
935 static void srp_remove_work(struct work_struct *work)
937 struct srp_target_port *target =
938 container_of(work, struct srp_target_port, remove_work);
940 WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED);
942 srp_remove_target(target);
945 static void srp_rport_delete(struct srp_rport *rport)
947 struct srp_target_port *target = rport->lld_data;
949 srp_queue_remove_work(target);
953 * srp_connected_ch() - number of connected channels
954 * @target: SRP target port.
956 static int srp_connected_ch(struct srp_target_port *target)
960 for (i = 0; i < target->ch_count; i++)
961 c += target->ch[i].connected;
966 static int srp_connect_ch(struct srp_rdma_ch *ch, bool multich)
968 struct srp_target_port *target = ch->target;
971 WARN_ON_ONCE(!multich && srp_connected_ch(target) > 0);
973 ret = srp_lookup_path(ch);
978 init_completion(&ch->done);
979 ret = srp_send_req(ch, multich);
982 ret = wait_for_completion_interruptible(&ch->done);
987 * The CM event handling code will set status to
988 * SRP_PORT_REDIRECT if we get a port redirect REJ
989 * back, or SRP_DLID_REDIRECT if we get a lid/qp
995 ch->connected = true;
998 case SRP_PORT_REDIRECT:
999 ret = srp_lookup_path(ch);
1004 case SRP_DLID_REDIRECT:
1007 case SRP_STALE_CONN:
1008 shost_printk(KERN_ERR, target->scsi_host, PFX
1009 "giving up on stale connection\n");
1019 return ret <= 0 ? ret : -ENODEV;
1022 static void srp_inv_rkey_err_done(struct ib_cq *cq, struct ib_wc *wc)
1024 srp_handle_qp_err(cq, wc, "INV RKEY");
1027 static int srp_inv_rkey(struct srp_request *req, struct srp_rdma_ch *ch,
1030 struct ib_send_wr *bad_wr;
1031 struct ib_send_wr wr = {
1032 .opcode = IB_WR_LOCAL_INV,
1036 .ex.invalidate_rkey = rkey,
1039 wr.wr_cqe = &req->reg_cqe;
1040 req->reg_cqe.done = srp_inv_rkey_err_done;
1041 return ib_post_send(ch->qp, &wr, &bad_wr);
1044 static void srp_unmap_data(struct scsi_cmnd *scmnd,
1045 struct srp_rdma_ch *ch,
1046 struct srp_request *req)
1048 struct srp_target_port *target = ch->target;
1049 struct srp_device *dev = target->srp_host->srp_dev;
1050 struct ib_device *ibdev = dev->dev;
1053 if (!scsi_sglist(scmnd) ||
1054 (scmnd->sc_data_direction != DMA_TO_DEVICE &&
1055 scmnd->sc_data_direction != DMA_FROM_DEVICE))
1058 if (dev->use_fast_reg) {
1059 struct srp_fr_desc **pfr;
1061 for (i = req->nmdesc, pfr = req->fr_list; i > 0; i--, pfr++) {
1062 res = srp_inv_rkey(req, ch, (*pfr)->mr->rkey);
1064 shost_printk(KERN_ERR, target->scsi_host, PFX
1065 "Queueing INV WR for rkey %#x failed (%d)\n",
1066 (*pfr)->mr->rkey, res);
1067 queue_work(system_long_wq,
1068 &target->tl_err_work);
1072 srp_fr_pool_put(ch->fr_pool, req->fr_list,
1074 } else if (dev->use_fmr) {
1075 struct ib_pool_fmr **pfmr;
1077 for (i = req->nmdesc, pfmr = req->fmr_list; i > 0; i--, pfmr++)
1078 ib_fmr_pool_unmap(*pfmr);
1081 ib_dma_unmap_sg(ibdev, scsi_sglist(scmnd), scsi_sg_count(scmnd),
1082 scmnd->sc_data_direction);
1086 * srp_claim_req - Take ownership of the scmnd associated with a request.
1087 * @ch: SRP RDMA channel.
1088 * @req: SRP request.
1089 * @sdev: If not NULL, only take ownership for this SCSI device.
1090 * @scmnd: If NULL, take ownership of @req->scmnd. If not NULL, only take
1091 * ownership of @req->scmnd if it equals @scmnd.
1094 * Either NULL or a pointer to the SCSI command the caller became owner of.
1096 static struct scsi_cmnd *srp_claim_req(struct srp_rdma_ch *ch,
1097 struct srp_request *req,
1098 struct scsi_device *sdev,
1099 struct scsi_cmnd *scmnd)
1101 unsigned long flags;
1103 spin_lock_irqsave(&ch->lock, flags);
1105 (!sdev || req->scmnd->device == sdev) &&
1106 (!scmnd || req->scmnd == scmnd)) {
1112 spin_unlock_irqrestore(&ch->lock, flags);
1118 * srp_free_req() - Unmap data and adjust ch->req_lim.
1119 * @ch: SRP RDMA channel.
1120 * @req: Request to be freed.
1121 * @scmnd: SCSI command associated with @req.
1122 * @req_lim_delta: Amount to be added to @target->req_lim.
1124 static void srp_free_req(struct srp_rdma_ch *ch, struct srp_request *req,
1125 struct scsi_cmnd *scmnd, s32 req_lim_delta)
1127 unsigned long flags;
1129 srp_unmap_data(scmnd, ch, req);
1131 spin_lock_irqsave(&ch->lock, flags);
1132 ch->req_lim += req_lim_delta;
1133 spin_unlock_irqrestore(&ch->lock, flags);
1136 static void srp_finish_req(struct srp_rdma_ch *ch, struct srp_request *req,
1137 struct scsi_device *sdev, int result)
1139 struct scsi_cmnd *scmnd = srp_claim_req(ch, req, sdev, NULL);
1142 srp_free_req(ch, req, scmnd, 0);
1143 scmnd->result = result;
1144 scmnd->scsi_done(scmnd);
1148 static void srp_terminate_io(struct srp_rport *rport)
1150 struct srp_target_port *target = rport->lld_data;
1151 struct srp_rdma_ch *ch;
1152 struct Scsi_Host *shost = target->scsi_host;
1153 struct scsi_device *sdev;
1157 * Invoking srp_terminate_io() while srp_queuecommand() is running
1158 * is not safe. Hence the warning statement below.
1160 shost_for_each_device(sdev, shost)
1161 WARN_ON_ONCE(sdev->request_queue->request_fn_active);
1163 for (i = 0; i < target->ch_count; i++) {
1164 ch = &target->ch[i];
1166 for (j = 0; j < target->req_ring_size; ++j) {
1167 struct srp_request *req = &ch->req_ring[j];
1169 srp_finish_req(ch, req, NULL,
1170 DID_TRANSPORT_FAILFAST << 16);
1176 * It is up to the caller to ensure that srp_rport_reconnect() calls are
1177 * serialized and that no concurrent srp_queuecommand(), srp_abort(),
1178 * srp_reset_device() or srp_reset_host() calls will occur while this function
1179 * is in progress. One way to realize that is not to call this function
1180 * directly but to call srp_reconnect_rport() instead since that last function
1181 * serializes calls of this function via rport->mutex and also blocks
1182 * srp_queuecommand() calls before invoking this function.
1184 static int srp_rport_reconnect(struct srp_rport *rport)
1186 struct srp_target_port *target = rport->lld_data;
1187 struct srp_rdma_ch *ch;
1189 bool multich = false;
1191 srp_disconnect_target(target);
1193 if (target->state == SRP_TARGET_SCANNING)
1197 * Now get a new local CM ID so that we avoid confusing the target in
1198 * case things are really fouled up. Doing so also ensures that all CM
1199 * callbacks will have finished before a new QP is allocated.
1201 for (i = 0; i < target->ch_count; i++) {
1202 ch = &target->ch[i];
1203 ret += srp_new_cm_id(ch);
1205 for (i = 0; i < target->ch_count; i++) {
1206 ch = &target->ch[i];
1207 for (j = 0; j < target->req_ring_size; ++j) {
1208 struct srp_request *req = &ch->req_ring[j];
1210 srp_finish_req(ch, req, NULL, DID_RESET << 16);
1213 for (i = 0; i < target->ch_count; i++) {
1214 ch = &target->ch[i];
1216 * Whether or not creating a new CM ID succeeded, create a new
1217 * QP. This guarantees that all completion callback function
1218 * invocations have finished before request resetting starts.
1220 ret += srp_create_ch_ib(ch);
1222 INIT_LIST_HEAD(&ch->free_tx);
1223 for (j = 0; j < target->queue_size; ++j)
1224 list_add(&ch->tx_ring[j]->list, &ch->free_tx);
1227 target->qp_in_error = false;
1229 for (i = 0; i < target->ch_count; i++) {
1230 ch = &target->ch[i];
1233 ret = srp_connect_ch(ch, multich);
1238 shost_printk(KERN_INFO, target->scsi_host,
1239 PFX "reconnect succeeded\n");
1244 static void srp_map_desc(struct srp_map_state *state, dma_addr_t dma_addr,
1245 unsigned int dma_len, u32 rkey)
1247 struct srp_direct_buf *desc = state->desc;
1249 WARN_ON_ONCE(!dma_len);
1251 desc->va = cpu_to_be64(dma_addr);
1252 desc->key = cpu_to_be32(rkey);
1253 desc->len = cpu_to_be32(dma_len);
1255 state->total_len += dma_len;
1260 static int srp_map_finish_fmr(struct srp_map_state *state,
1261 struct srp_rdma_ch *ch)
1263 struct srp_target_port *target = ch->target;
1264 struct srp_device *dev = target->srp_host->srp_dev;
1265 struct ib_pool_fmr *fmr;
1268 if (state->fmr.next >= state->fmr.end)
1271 WARN_ON_ONCE(!dev->use_fmr);
1273 if (state->npages == 0)
1276 if (state->npages == 1 && target->global_mr) {
1277 srp_map_desc(state, state->base_dma_addr, state->dma_len,
1278 target->global_mr->rkey);
1282 fmr = ib_fmr_pool_map_phys(ch->fmr_pool, state->pages,
1283 state->npages, io_addr);
1285 return PTR_ERR(fmr);
1287 *state->fmr.next++ = fmr;
1290 srp_map_desc(state, state->base_dma_addr & ~dev->mr_page_mask,
1291 state->dma_len, fmr->fmr->rkey);
1300 static void srp_reg_mr_err_done(struct ib_cq *cq, struct ib_wc *wc)
1302 srp_handle_qp_err(cq, wc, "FAST REG");
1306 * Map up to sg_nents elements of state->sg where *sg_offset_p is the offset
1307 * where to start in the first element. If sg_offset_p != NULL then
1308 * *sg_offset_p is updated to the offset in state->sg[retval] of the first
1309 * byte that has not yet been mapped.
1311 static int srp_map_finish_fr(struct srp_map_state *state,
1312 struct srp_request *req,
1313 struct srp_rdma_ch *ch, int sg_nents,
1314 unsigned int *sg_offset_p)
1316 struct srp_target_port *target = ch->target;
1317 struct srp_device *dev = target->srp_host->srp_dev;
1318 struct ib_send_wr *bad_wr;
1319 struct ib_reg_wr wr;
1320 struct srp_fr_desc *desc;
1324 if (state->fr.next >= state->fr.end)
1327 WARN_ON_ONCE(!dev->use_fast_reg);
1329 if (sg_nents == 1 && target->global_mr) {
1330 unsigned int sg_offset = sg_offset_p ? *sg_offset_p : 0;
1332 srp_map_desc(state, sg_dma_address(state->sg) + sg_offset,
1333 sg_dma_len(state->sg) - sg_offset,
1334 target->global_mr->rkey);
1340 desc = srp_fr_pool_get(ch->fr_pool);
1344 rkey = ib_inc_rkey(desc->mr->rkey);
1345 ib_update_fast_reg_key(desc->mr, rkey);
1347 n = ib_map_mr_sg(desc->mr, state->sg, sg_nents, sg_offset_p,
1349 if (unlikely(n < 0)) {
1350 srp_fr_pool_put(ch->fr_pool, &desc, 1);
1351 pr_debug("%s: ib_map_mr_sg(%d, %d) returned %d.\n",
1352 dev_name(&req->scmnd->device->sdev_gendev), sg_nents,
1353 sg_offset_p ? *sg_offset_p : -1, n);
1357 WARN_ON_ONCE(desc->mr->length == 0);
1359 req->reg_cqe.done = srp_reg_mr_err_done;
1362 wr.wr.opcode = IB_WR_REG_MR;
1363 wr.wr.wr_cqe = &req->reg_cqe;
1365 wr.wr.send_flags = 0;
1367 wr.key = desc->mr->rkey;
1368 wr.access = (IB_ACCESS_LOCAL_WRITE |
1369 IB_ACCESS_REMOTE_READ |
1370 IB_ACCESS_REMOTE_WRITE);
1372 *state->fr.next++ = desc;
1375 srp_map_desc(state, desc->mr->iova,
1376 desc->mr->length, desc->mr->rkey);
1378 err = ib_post_send(ch->qp, &wr.wr, &bad_wr);
1379 if (unlikely(err)) {
1380 WARN_ON_ONCE(err == -ENOMEM);
1387 static int srp_map_sg_entry(struct srp_map_state *state,
1388 struct srp_rdma_ch *ch,
1389 struct scatterlist *sg, int sg_index)
1391 struct srp_target_port *target = ch->target;
1392 struct srp_device *dev = target->srp_host->srp_dev;
1393 struct ib_device *ibdev = dev->dev;
1394 dma_addr_t dma_addr = ib_sg_dma_address(ibdev, sg);
1395 unsigned int dma_len = ib_sg_dma_len(ibdev, sg);
1396 unsigned int len = 0;
1399 WARN_ON_ONCE(!dma_len);
1402 unsigned offset = dma_addr & ~dev->mr_page_mask;
1403 if (state->npages == dev->max_pages_per_mr || offset != 0) {
1404 ret = srp_map_finish_fmr(state, ch);
1409 len = min_t(unsigned int, dma_len, dev->mr_page_size - offset);
1412 state->base_dma_addr = dma_addr;
1413 state->pages[state->npages++] = dma_addr & dev->mr_page_mask;
1414 state->dma_len += len;
1420 * If the last entry of the MR wasn't a full page, then we need to
1421 * close it out and start a new one -- we can only merge at page
1425 if (len != dev->mr_page_size)
1426 ret = srp_map_finish_fmr(state, ch);
1430 static int srp_map_sg_fmr(struct srp_map_state *state, struct srp_rdma_ch *ch,
1431 struct srp_request *req, struct scatterlist *scat,
1434 struct scatterlist *sg;
1437 state->pages = req->map_page;
1438 state->fmr.next = req->fmr_list;
1439 state->fmr.end = req->fmr_list + ch->target->mr_per_cmd;
1441 for_each_sg(scat, sg, count, i) {
1442 ret = srp_map_sg_entry(state, ch, sg, i);
1447 ret = srp_map_finish_fmr(state, ch);
1454 static int srp_map_sg_fr(struct srp_map_state *state, struct srp_rdma_ch *ch,
1455 struct srp_request *req, struct scatterlist *scat,
1458 unsigned int sg_offset = 0;
1460 state->fr.next = req->fr_list;
1461 state->fr.end = req->fr_list + ch->target->mr_per_cmd;
1470 n = srp_map_finish_fr(state, req, ch, count, &sg_offset);
1471 if (unlikely(n < 0))
1475 for (i = 0; i < n; i++)
1476 state->sg = sg_next(state->sg);
1482 static int srp_map_sg_dma(struct srp_map_state *state, struct srp_rdma_ch *ch,
1483 struct srp_request *req, struct scatterlist *scat,
1486 struct srp_target_port *target = ch->target;
1487 struct srp_device *dev = target->srp_host->srp_dev;
1488 struct scatterlist *sg;
1491 for_each_sg(scat, sg, count, i) {
1492 srp_map_desc(state, ib_sg_dma_address(dev->dev, sg),
1493 ib_sg_dma_len(dev->dev, sg),
1494 target->global_mr->rkey);
1501 * Register the indirect data buffer descriptor with the HCA.
1503 * Note: since the indirect data buffer descriptor has been allocated with
1504 * kmalloc() it is guaranteed that this buffer is a physically contiguous
1507 static int srp_map_idb(struct srp_rdma_ch *ch, struct srp_request *req,
1508 void **next_mr, void **end_mr, u32 idb_len,
1511 struct srp_target_port *target = ch->target;
1512 struct srp_device *dev = target->srp_host->srp_dev;
1513 struct srp_map_state state;
1514 struct srp_direct_buf idb_desc;
1516 struct scatterlist idb_sg[1];
1519 memset(&state, 0, sizeof(state));
1520 memset(&idb_desc, 0, sizeof(idb_desc));
1521 state.gen.next = next_mr;
1522 state.gen.end = end_mr;
1523 state.desc = &idb_desc;
1524 state.base_dma_addr = req->indirect_dma_addr;
1525 state.dma_len = idb_len;
1527 if (dev->use_fast_reg) {
1529 sg_init_one(idb_sg, req->indirect_desc, idb_len);
1530 idb_sg->dma_address = req->indirect_dma_addr; /* hack! */
1531 #ifdef CONFIG_NEED_SG_DMA_LENGTH
1532 idb_sg->dma_length = idb_sg->length; /* hack^2 */
1534 ret = srp_map_finish_fr(&state, req, ch, 1, NULL);
1537 WARN_ON_ONCE(ret < 1);
1538 } else if (dev->use_fmr) {
1539 state.pages = idb_pages;
1540 state.pages[0] = (req->indirect_dma_addr &
1543 ret = srp_map_finish_fmr(&state, ch);
1550 *idb_rkey = idb_desc.key;
1555 #if defined(DYNAMIC_DATA_DEBUG)
1556 static void srp_check_mapping(struct srp_map_state *state,
1557 struct srp_rdma_ch *ch, struct srp_request *req,
1558 struct scatterlist *scat, int count)
1560 struct srp_device *dev = ch->target->srp_host->srp_dev;
1561 struct srp_fr_desc **pfr;
1562 u64 desc_len = 0, mr_len = 0;
1565 for (i = 0; i < state->ndesc; i++)
1566 desc_len += be32_to_cpu(req->indirect_desc[i].len);
1567 if (dev->use_fast_reg)
1568 for (i = 0, pfr = req->fr_list; i < state->nmdesc; i++, pfr++)
1569 mr_len += (*pfr)->mr->length;
1570 else if (dev->use_fmr)
1571 for (i = 0; i < state->nmdesc; i++)
1572 mr_len += be32_to_cpu(req->indirect_desc[i].len);
1573 if (desc_len != scsi_bufflen(req->scmnd) ||
1574 mr_len > scsi_bufflen(req->scmnd))
1575 pr_err("Inconsistent: scsi len %d <> desc len %lld <> mr len %lld; ndesc %d; nmdesc = %d\n",
1576 scsi_bufflen(req->scmnd), desc_len, mr_len,
1577 state->ndesc, state->nmdesc);
1582 * srp_map_data() - map SCSI data buffer onto an SRP request
1583 * @scmnd: SCSI command to map
1584 * @ch: SRP RDMA channel
1587 * Returns the length in bytes of the SRP_CMD IU or a negative value if
1590 static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch,
1591 struct srp_request *req)
1593 struct srp_target_port *target = ch->target;
1594 struct scatterlist *scat;
1595 struct srp_cmd *cmd = req->cmd->buf;
1596 int len, nents, count, ret;
1597 struct srp_device *dev;
1598 struct ib_device *ibdev;
1599 struct srp_map_state state;
1600 struct srp_indirect_buf *indirect_hdr;
1601 u32 idb_len, table_len;
1605 if (!scsi_sglist(scmnd) || scmnd->sc_data_direction == DMA_NONE)
1606 return sizeof (struct srp_cmd);
1608 if (scmnd->sc_data_direction != DMA_FROM_DEVICE &&
1609 scmnd->sc_data_direction != DMA_TO_DEVICE) {
1610 shost_printk(KERN_WARNING, target->scsi_host,
1611 PFX "Unhandled data direction %d\n",
1612 scmnd->sc_data_direction);
1616 nents = scsi_sg_count(scmnd);
1617 scat = scsi_sglist(scmnd);
1619 dev = target->srp_host->srp_dev;
1622 count = ib_dma_map_sg(ibdev, scat, nents, scmnd->sc_data_direction);
1623 if (unlikely(count == 0))
1626 fmt = SRP_DATA_DESC_DIRECT;
1627 len = sizeof (struct srp_cmd) + sizeof (struct srp_direct_buf);
1629 if (count == 1 && target->global_mr) {
1631 * The midlayer only generated a single gather/scatter
1632 * entry, or DMA mapping coalesced everything to a
1633 * single entry. So a direct descriptor along with
1634 * the DMA MR suffices.
1636 struct srp_direct_buf *buf = (void *) cmd->add_data;
1638 buf->va = cpu_to_be64(ib_sg_dma_address(ibdev, scat));
1639 buf->key = cpu_to_be32(target->global_mr->rkey);
1640 buf->len = cpu_to_be32(ib_sg_dma_len(ibdev, scat));
1647 * We have more than one scatter/gather entry, so build our indirect
1648 * descriptor table, trying to merge as many entries as we can.
1650 indirect_hdr = (void *) cmd->add_data;
1652 ib_dma_sync_single_for_cpu(ibdev, req->indirect_dma_addr,
1653 target->indirect_size, DMA_TO_DEVICE);
1655 memset(&state, 0, sizeof(state));
1656 state.desc = req->indirect_desc;
1657 if (dev->use_fast_reg)
1658 ret = srp_map_sg_fr(&state, ch, req, scat, count);
1659 else if (dev->use_fmr)
1660 ret = srp_map_sg_fmr(&state, ch, req, scat, count);
1662 ret = srp_map_sg_dma(&state, ch, req, scat, count);
1663 req->nmdesc = state.nmdesc;
1667 #if defined(DYNAMIC_DEBUG)
1669 DEFINE_DYNAMIC_DEBUG_METADATA(ddm,
1670 "Memory mapping consistency check");
1671 if (unlikely(ddm.flags & _DPRINTK_FLAGS_PRINT))
1672 srp_check_mapping(&state, ch, req, scat, count);
1676 /* We've mapped the request, now pull as much of the indirect
1677 * descriptor table as we can into the command buffer. If this
1678 * target is not using an external indirect table, we are
1679 * guaranteed to fit into the command, as the SCSI layer won't
1680 * give us more S/G entries than we allow.
1682 if (state.ndesc == 1) {
1684 * Memory registration collapsed the sg-list into one entry,
1685 * so use a direct descriptor.
1687 struct srp_direct_buf *buf = (void *) cmd->add_data;
1689 *buf = req->indirect_desc[0];
1693 if (unlikely(target->cmd_sg_cnt < state.ndesc &&
1694 !target->allow_ext_sg)) {
1695 shost_printk(KERN_ERR, target->scsi_host,
1696 "Could not fit S/G list into SRP_CMD\n");
1701 count = min(state.ndesc, target->cmd_sg_cnt);
1702 table_len = state.ndesc * sizeof (struct srp_direct_buf);
1703 idb_len = sizeof(struct srp_indirect_buf) + table_len;
1705 fmt = SRP_DATA_DESC_INDIRECT;
1706 len = sizeof(struct srp_cmd) + sizeof (struct srp_indirect_buf);
1707 len += count * sizeof (struct srp_direct_buf);
1709 memcpy(indirect_hdr->desc_list, req->indirect_desc,
1710 count * sizeof (struct srp_direct_buf));
1712 if (!target->global_mr) {
1713 ret = srp_map_idb(ch, req, state.gen.next, state.gen.end,
1714 idb_len, &idb_rkey);
1719 idb_rkey = cpu_to_be32(target->global_mr->rkey);
1722 indirect_hdr->table_desc.va = cpu_to_be64(req->indirect_dma_addr);
1723 indirect_hdr->table_desc.key = idb_rkey;
1724 indirect_hdr->table_desc.len = cpu_to_be32(table_len);
1725 indirect_hdr->len = cpu_to_be32(state.total_len);
1727 if (scmnd->sc_data_direction == DMA_TO_DEVICE)
1728 cmd->data_out_desc_cnt = count;
1730 cmd->data_in_desc_cnt = count;
1732 ib_dma_sync_single_for_device(ibdev, req->indirect_dma_addr, table_len,
1736 if (scmnd->sc_data_direction == DMA_TO_DEVICE)
1737 cmd->buf_fmt = fmt << 4;
1744 srp_unmap_data(scmnd, ch, req);
1745 if (ret == -ENOMEM && req->nmdesc >= target->mr_pool_size)
1751 * Return an IU and possible credit to the free pool
1753 static void srp_put_tx_iu(struct srp_rdma_ch *ch, struct srp_iu *iu,
1754 enum srp_iu_type iu_type)
1756 unsigned long flags;
1758 spin_lock_irqsave(&ch->lock, flags);
1759 list_add(&iu->list, &ch->free_tx);
1760 if (iu_type != SRP_IU_RSP)
1762 spin_unlock_irqrestore(&ch->lock, flags);
1766 * Must be called with ch->lock held to protect req_lim and free_tx.
1767 * If IU is not sent, it must be returned using srp_put_tx_iu().
1770 * An upper limit for the number of allocated information units for each
1772 * - SRP_IU_CMD: SRP_CMD_SQ_SIZE, since the SCSI mid-layer never queues
1773 * more than Scsi_Host.can_queue requests.
1774 * - SRP_IU_TSK_MGMT: SRP_TSK_MGMT_SQ_SIZE.
1775 * - SRP_IU_RSP: 1, since a conforming SRP target never sends more than
1776 * one unanswered SRP request to an initiator.
1778 static struct srp_iu *__srp_get_tx_iu(struct srp_rdma_ch *ch,
1779 enum srp_iu_type iu_type)
1781 struct srp_target_port *target = ch->target;
1782 s32 rsv = (iu_type == SRP_IU_TSK_MGMT) ? 0 : SRP_TSK_MGMT_SQ_SIZE;
1785 ib_process_cq_direct(ch->send_cq, -1);
1787 if (list_empty(&ch->free_tx))
1790 /* Initiator responses to target requests do not consume credits */
1791 if (iu_type != SRP_IU_RSP) {
1792 if (ch->req_lim <= rsv) {
1793 ++target->zero_req_lim;
1800 iu = list_first_entry(&ch->free_tx, struct srp_iu, list);
1801 list_del(&iu->list);
1805 static void srp_send_done(struct ib_cq *cq, struct ib_wc *wc)
1807 struct srp_iu *iu = container_of(wc->wr_cqe, struct srp_iu, cqe);
1808 struct srp_rdma_ch *ch = cq->cq_context;
1810 if (unlikely(wc->status != IB_WC_SUCCESS)) {
1811 srp_handle_qp_err(cq, wc, "SEND");
1815 list_add(&iu->list, &ch->free_tx);
1818 static int srp_post_send(struct srp_rdma_ch *ch, struct srp_iu *iu, int len)
1820 struct srp_target_port *target = ch->target;
1822 struct ib_send_wr wr, *bad_wr;
1824 list.addr = iu->dma;
1826 list.lkey = target->lkey;
1828 iu->cqe.done = srp_send_done;
1831 wr.wr_cqe = &iu->cqe;
1834 wr.opcode = IB_WR_SEND;
1835 wr.send_flags = IB_SEND_SIGNALED;
1837 return ib_post_send(ch->qp, &wr, &bad_wr);
1840 static int srp_post_recv(struct srp_rdma_ch *ch, struct srp_iu *iu)
1842 struct srp_target_port *target = ch->target;
1843 struct ib_recv_wr wr, *bad_wr;
1846 list.addr = iu->dma;
1847 list.length = iu->size;
1848 list.lkey = target->lkey;
1850 iu->cqe.done = srp_recv_done;
1853 wr.wr_cqe = &iu->cqe;
1857 return ib_post_recv(ch->qp, &wr, &bad_wr);
1860 static void srp_process_rsp(struct srp_rdma_ch *ch, struct srp_rsp *rsp)
1862 struct srp_target_port *target = ch->target;
1863 struct srp_request *req;
1864 struct scsi_cmnd *scmnd;
1865 unsigned long flags;
1867 if (unlikely(rsp->tag & SRP_TAG_TSK_MGMT)) {
1868 spin_lock_irqsave(&ch->lock, flags);
1869 ch->req_lim += be32_to_cpu(rsp->req_lim_delta);
1870 spin_unlock_irqrestore(&ch->lock, flags);
1872 ch->tsk_mgmt_status = -1;
1873 if (be32_to_cpu(rsp->resp_data_len) >= 4)
1874 ch->tsk_mgmt_status = rsp->data[3];
1875 complete(&ch->tsk_mgmt_done);
1877 scmnd = scsi_host_find_tag(target->scsi_host, rsp->tag);
1879 req = (void *)scmnd->host_scribble;
1880 scmnd = srp_claim_req(ch, req, NULL, scmnd);
1883 shost_printk(KERN_ERR, target->scsi_host,
1884 "Null scmnd for RSP w/tag %#016llx received on ch %td / QP %#x\n",
1885 rsp->tag, ch - target->ch, ch->qp->qp_num);
1887 spin_lock_irqsave(&ch->lock, flags);
1888 ch->req_lim += be32_to_cpu(rsp->req_lim_delta);
1889 spin_unlock_irqrestore(&ch->lock, flags);
1893 scmnd->result = rsp->status;
1895 if (rsp->flags & SRP_RSP_FLAG_SNSVALID) {
1896 memcpy(scmnd->sense_buffer, rsp->data +
1897 be32_to_cpu(rsp->resp_data_len),
1898 min_t(int, be32_to_cpu(rsp->sense_data_len),
1899 SCSI_SENSE_BUFFERSIZE));
1902 if (unlikely(rsp->flags & SRP_RSP_FLAG_DIUNDER))
1903 scsi_set_resid(scmnd, be32_to_cpu(rsp->data_in_res_cnt));
1904 else if (unlikely(rsp->flags & SRP_RSP_FLAG_DIOVER))
1905 scsi_set_resid(scmnd, -be32_to_cpu(rsp->data_in_res_cnt));
1906 else if (unlikely(rsp->flags & SRP_RSP_FLAG_DOUNDER))
1907 scsi_set_resid(scmnd, be32_to_cpu(rsp->data_out_res_cnt));
1908 else if (unlikely(rsp->flags & SRP_RSP_FLAG_DOOVER))
1909 scsi_set_resid(scmnd, -be32_to_cpu(rsp->data_out_res_cnt));
1911 srp_free_req(ch, req, scmnd,
1912 be32_to_cpu(rsp->req_lim_delta));
1914 scmnd->host_scribble = NULL;
1915 scmnd->scsi_done(scmnd);
1919 static int srp_response_common(struct srp_rdma_ch *ch, s32 req_delta,
1922 struct srp_target_port *target = ch->target;
1923 struct ib_device *dev = target->srp_host->srp_dev->dev;
1924 unsigned long flags;
1928 spin_lock_irqsave(&ch->lock, flags);
1929 ch->req_lim += req_delta;
1930 iu = __srp_get_tx_iu(ch, SRP_IU_RSP);
1931 spin_unlock_irqrestore(&ch->lock, flags);
1934 shost_printk(KERN_ERR, target->scsi_host, PFX
1935 "no IU available to send response\n");
1939 ib_dma_sync_single_for_cpu(dev, iu->dma, len, DMA_TO_DEVICE);
1940 memcpy(iu->buf, rsp, len);
1941 ib_dma_sync_single_for_device(dev, iu->dma, len, DMA_TO_DEVICE);
1943 err = srp_post_send(ch, iu, len);
1945 shost_printk(KERN_ERR, target->scsi_host, PFX
1946 "unable to post response: %d\n", err);
1947 srp_put_tx_iu(ch, iu, SRP_IU_RSP);
1953 static void srp_process_cred_req(struct srp_rdma_ch *ch,
1954 struct srp_cred_req *req)
1956 struct srp_cred_rsp rsp = {
1957 .opcode = SRP_CRED_RSP,
1960 s32 delta = be32_to_cpu(req->req_lim_delta);
1962 if (srp_response_common(ch, delta, &rsp, sizeof(rsp)))
1963 shost_printk(KERN_ERR, ch->target->scsi_host, PFX
1964 "problems processing SRP_CRED_REQ\n");
1967 static void srp_process_aer_req(struct srp_rdma_ch *ch,
1968 struct srp_aer_req *req)
1970 struct srp_target_port *target = ch->target;
1971 struct srp_aer_rsp rsp = {
1972 .opcode = SRP_AER_RSP,
1975 s32 delta = be32_to_cpu(req->req_lim_delta);
1977 shost_printk(KERN_ERR, target->scsi_host, PFX
1978 "ignoring AER for LUN %llu\n", scsilun_to_int(&req->lun));
1980 if (srp_response_common(ch, delta, &rsp, sizeof(rsp)))
1981 shost_printk(KERN_ERR, target->scsi_host, PFX
1982 "problems processing SRP_AER_REQ\n");
1985 static void srp_recv_done(struct ib_cq *cq, struct ib_wc *wc)
1987 struct srp_iu *iu = container_of(wc->wr_cqe, struct srp_iu, cqe);
1988 struct srp_rdma_ch *ch = cq->cq_context;
1989 struct srp_target_port *target = ch->target;
1990 struct ib_device *dev = target->srp_host->srp_dev->dev;
1994 if (unlikely(wc->status != IB_WC_SUCCESS)) {
1995 srp_handle_qp_err(cq, wc, "RECV");
1999 ib_dma_sync_single_for_cpu(dev, iu->dma, ch->max_ti_iu_len,
2002 opcode = *(u8 *) iu->buf;
2005 shost_printk(KERN_ERR, target->scsi_host,
2006 PFX "recv completion, opcode 0x%02x\n", opcode);
2007 print_hex_dump(KERN_ERR, "", DUMP_PREFIX_OFFSET, 8, 1,
2008 iu->buf, wc->byte_len, true);
2013 srp_process_rsp(ch, iu->buf);
2017 srp_process_cred_req(ch, iu->buf);
2021 srp_process_aer_req(ch, iu->buf);
2025 /* XXX Handle target logout */
2026 shost_printk(KERN_WARNING, target->scsi_host,
2027 PFX "Got target logout request\n");
2031 shost_printk(KERN_WARNING, target->scsi_host,
2032 PFX "Unhandled SRP opcode 0x%02x\n", opcode);
2036 ib_dma_sync_single_for_device(dev, iu->dma, ch->max_ti_iu_len,
2039 res = srp_post_recv(ch, iu);
2041 shost_printk(KERN_ERR, target->scsi_host,
2042 PFX "Recv failed with error code %d\n", res);
2046 * srp_tl_err_work() - handle a transport layer error
2047 * @work: Work structure embedded in an SRP target port.
2049 * Note: This function may get invoked before the rport has been created,
2050 * hence the target->rport test.
2052 static void srp_tl_err_work(struct work_struct *work)
2054 struct srp_target_port *target;
2056 target = container_of(work, struct srp_target_port, tl_err_work);
2058 srp_start_tl_fail_timers(target->rport);
2061 static void srp_handle_qp_err(struct ib_cq *cq, struct ib_wc *wc,
2064 struct srp_rdma_ch *ch = cq->cq_context;
2065 struct srp_target_port *target = ch->target;
2067 if (ch->connected && !target->qp_in_error) {
2068 shost_printk(KERN_ERR, target->scsi_host,
2069 PFX "failed %s status %s (%d) for CQE %p\n",
2070 opname, ib_wc_status_msg(wc->status), wc->status,
2072 queue_work(system_long_wq, &target->tl_err_work);
2074 target->qp_in_error = true;
2077 static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd)
2079 struct srp_target_port *target = host_to_target(shost);
2080 struct srp_rport *rport = target->rport;
2081 struct srp_rdma_ch *ch;
2082 struct srp_request *req;
2084 struct srp_cmd *cmd;
2085 struct ib_device *dev;
2086 unsigned long flags;
2090 const bool in_scsi_eh = !in_interrupt() && current == shost->ehandler;
2093 * The SCSI EH thread is the only context from which srp_queuecommand()
2094 * can get invoked for blocked devices (SDEV_BLOCK /
2095 * SDEV_CREATED_BLOCK). Avoid racing with srp_reconnect_rport() by
2096 * locking the rport mutex if invoked from inside the SCSI EH.
2099 mutex_lock(&rport->mutex);
2101 scmnd->result = srp_chkready(target->rport);
2102 if (unlikely(scmnd->result))
2105 WARN_ON_ONCE(scmnd->request->tag < 0);
2106 tag = blk_mq_unique_tag(scmnd->request);
2107 ch = &target->ch[blk_mq_unique_tag_to_hwq(tag)];
2108 idx = blk_mq_unique_tag_to_tag(tag);
2109 WARN_ONCE(idx >= target->req_ring_size, "%s: tag %#x: idx %d >= %d\n",
2110 dev_name(&shost->shost_gendev), tag, idx,
2111 target->req_ring_size);
2113 spin_lock_irqsave(&ch->lock, flags);
2114 iu = __srp_get_tx_iu(ch, SRP_IU_CMD);
2115 spin_unlock_irqrestore(&ch->lock, flags);
2120 req = &ch->req_ring[idx];
2121 dev = target->srp_host->srp_dev->dev;
2122 ib_dma_sync_single_for_cpu(dev, iu->dma, target->max_iu_len,
2125 scmnd->host_scribble = (void *) req;
2128 memset(cmd, 0, sizeof *cmd);
2130 cmd->opcode = SRP_CMD;
2131 int_to_scsilun(scmnd->device->lun, &cmd->lun);
2133 memcpy(cmd->cdb, scmnd->cmnd, scmnd->cmd_len);
2138 len = srp_map_data(scmnd, ch, req);
2140 shost_printk(KERN_ERR, target->scsi_host,
2141 PFX "Failed to map data (%d)\n", len);
2143 * If we ran out of memory descriptors (-ENOMEM) because an
2144 * application is queuing many requests with more than
2145 * max_pages_per_mr sg-list elements, tell the SCSI mid-layer
2146 * to reduce queue depth temporarily.
2148 scmnd->result = len == -ENOMEM ?
2149 DID_OK << 16 | QUEUE_FULL << 1 : DID_ERROR << 16;
2153 ib_dma_sync_single_for_device(dev, iu->dma, target->max_iu_len,
2156 if (srp_post_send(ch, iu, len)) {
2157 shost_printk(KERN_ERR, target->scsi_host, PFX "Send failed\n");
2165 mutex_unlock(&rport->mutex);
2170 srp_unmap_data(scmnd, ch, req);
2173 srp_put_tx_iu(ch, iu, SRP_IU_CMD);
2176 * Avoid that the loops that iterate over the request ring can
2177 * encounter a dangling SCSI command pointer.
2182 if (scmnd->result) {
2183 scmnd->scsi_done(scmnd);
2186 ret = SCSI_MLQUEUE_HOST_BUSY;
2193 * Note: the resources allocated in this function are freed in
2196 static int srp_alloc_iu_bufs(struct srp_rdma_ch *ch)
2198 struct srp_target_port *target = ch->target;
2201 ch->rx_ring = kcalloc(target->queue_size, sizeof(*ch->rx_ring),
2205 ch->tx_ring = kcalloc(target->queue_size, sizeof(*ch->tx_ring),
2210 for (i = 0; i < target->queue_size; ++i) {
2211 ch->rx_ring[i] = srp_alloc_iu(target->srp_host,
2213 GFP_KERNEL, DMA_FROM_DEVICE);
2214 if (!ch->rx_ring[i])
2218 for (i = 0; i < target->queue_size; ++i) {
2219 ch->tx_ring[i] = srp_alloc_iu(target->srp_host,
2221 GFP_KERNEL, DMA_TO_DEVICE);
2222 if (!ch->tx_ring[i])
2225 list_add(&ch->tx_ring[i]->list, &ch->free_tx);
2231 for (i = 0; i < target->queue_size; ++i) {
2232 srp_free_iu(target->srp_host, ch->rx_ring[i]);
2233 srp_free_iu(target->srp_host, ch->tx_ring[i]);
2246 static uint32_t srp_compute_rq_tmo(struct ib_qp_attr *qp_attr, int attr_mask)
2248 uint64_t T_tr_ns, max_compl_time_ms;
2249 uint32_t rq_tmo_jiffies;
2252 * According to section 11.2.4.2 in the IBTA spec (Modify Queue Pair,
2253 * table 91), both the QP timeout and the retry count have to be set
2254 * for RC QP's during the RTR to RTS transition.
2256 WARN_ON_ONCE((attr_mask & (IB_QP_TIMEOUT | IB_QP_RETRY_CNT)) !=
2257 (IB_QP_TIMEOUT | IB_QP_RETRY_CNT));
2260 * Set target->rq_tmo_jiffies to one second more than the largest time
2261 * it can take before an error completion is generated. See also
2262 * C9-140..142 in the IBTA spec for more information about how to
2263 * convert the QP Local ACK Timeout value to nanoseconds.
2265 T_tr_ns = 4096 * (1ULL << qp_attr->timeout);
2266 max_compl_time_ms = qp_attr->retry_cnt * 4 * T_tr_ns;
2267 do_div(max_compl_time_ms, NSEC_PER_MSEC);
2268 rq_tmo_jiffies = msecs_to_jiffies(max_compl_time_ms + 1000);
2270 return rq_tmo_jiffies;
2273 static void srp_cm_rep_handler(struct ib_cm_id *cm_id,
2274 const struct srp_login_rsp *lrsp,
2275 struct srp_rdma_ch *ch)
2277 struct srp_target_port *target = ch->target;
2278 struct ib_qp_attr *qp_attr = NULL;
2283 if (lrsp->opcode == SRP_LOGIN_RSP) {
2284 ch->max_ti_iu_len = be32_to_cpu(lrsp->max_ti_iu_len);
2285 ch->req_lim = be32_to_cpu(lrsp->req_lim_delta);
2288 * Reserve credits for task management so we don't
2289 * bounce requests back to the SCSI mid-layer.
2291 target->scsi_host->can_queue
2292 = min(ch->req_lim - SRP_TSK_MGMT_SQ_SIZE,
2293 target->scsi_host->can_queue);
2294 target->scsi_host->cmd_per_lun
2295 = min_t(int, target->scsi_host->can_queue,
2296 target->scsi_host->cmd_per_lun);
2298 shost_printk(KERN_WARNING, target->scsi_host,
2299 PFX "Unhandled RSP opcode %#x\n", lrsp->opcode);
2305 ret = srp_alloc_iu_bufs(ch);
2311 qp_attr = kmalloc(sizeof *qp_attr, GFP_KERNEL);
2315 qp_attr->qp_state = IB_QPS_RTR;
2316 ret = ib_cm_init_qp_attr(cm_id, qp_attr, &attr_mask);
2320 ret = ib_modify_qp(ch->qp, qp_attr, attr_mask);
2324 for (i = 0; i < target->queue_size; i++) {
2325 struct srp_iu *iu = ch->rx_ring[i];
2327 ret = srp_post_recv(ch, iu);
2332 qp_attr->qp_state = IB_QPS_RTS;
2333 ret = ib_cm_init_qp_attr(cm_id, qp_attr, &attr_mask);
2337 target->rq_tmo_jiffies = srp_compute_rq_tmo(qp_attr, attr_mask);
2339 ret = ib_modify_qp(ch->qp, qp_attr, attr_mask);
2343 ret = ib_send_cm_rtu(cm_id, NULL, 0);
2352 static void srp_cm_rej_handler(struct ib_cm_id *cm_id,
2353 struct ib_cm_event *event,
2354 struct srp_rdma_ch *ch)
2356 struct srp_target_port *target = ch->target;
2357 struct Scsi_Host *shost = target->scsi_host;
2358 struct ib_class_port_info *cpi;
2361 switch (event->param.rej_rcvd.reason) {
2362 case IB_CM_REJ_PORT_CM_REDIRECT:
2363 cpi = event->param.rej_rcvd.ari;
2364 ch->path.dlid = cpi->redirect_lid;
2365 ch->path.pkey = cpi->redirect_pkey;
2366 cm_id->remote_cm_qpn = be32_to_cpu(cpi->redirect_qp) & 0x00ffffff;
2367 memcpy(ch->path.dgid.raw, cpi->redirect_gid, 16);
2369 ch->status = ch->path.dlid ?
2370 SRP_DLID_REDIRECT : SRP_PORT_REDIRECT;
2373 case IB_CM_REJ_PORT_REDIRECT:
2374 if (srp_target_is_topspin(target)) {
2376 * Topspin/Cisco SRP gateways incorrectly send
2377 * reject reason code 25 when they mean 24
2380 memcpy(ch->path.dgid.raw,
2381 event->param.rej_rcvd.ari, 16);
2383 shost_printk(KERN_DEBUG, shost,
2384 PFX "Topspin/Cisco redirect to target port GID %016llx%016llx\n",
2385 be64_to_cpu(ch->path.dgid.global.subnet_prefix),
2386 be64_to_cpu(ch->path.dgid.global.interface_id));
2388 ch->status = SRP_PORT_REDIRECT;
2390 shost_printk(KERN_WARNING, shost,
2391 " REJ reason: IB_CM_REJ_PORT_REDIRECT\n");
2392 ch->status = -ECONNRESET;
2396 case IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID:
2397 shost_printk(KERN_WARNING, shost,
2398 " REJ reason: IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID\n");
2399 ch->status = -ECONNRESET;
2402 case IB_CM_REJ_CONSUMER_DEFINED:
2403 opcode = *(u8 *) event->private_data;
2404 if (opcode == SRP_LOGIN_REJ) {
2405 struct srp_login_rej *rej = event->private_data;
2406 u32 reason = be32_to_cpu(rej->reason);
2408 if (reason == SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE)
2409 shost_printk(KERN_WARNING, shost,
2410 PFX "SRP_LOGIN_REJ: requested max_it_iu_len too large\n");
2412 shost_printk(KERN_WARNING, shost, PFX
2413 "SRP LOGIN from %pI6 to %pI6 REJECTED, reason 0x%08x\n",
2415 target->orig_dgid.raw, reason);
2417 shost_printk(KERN_WARNING, shost,
2418 " REJ reason: IB_CM_REJ_CONSUMER_DEFINED,"
2419 " opcode 0x%02x\n", opcode);
2420 ch->status = -ECONNRESET;
2423 case IB_CM_REJ_STALE_CONN:
2424 shost_printk(KERN_WARNING, shost, " REJ reason: stale connection\n");
2425 ch->status = SRP_STALE_CONN;
2429 shost_printk(KERN_WARNING, shost, " REJ reason 0x%x\n",
2430 event->param.rej_rcvd.reason);
2431 ch->status = -ECONNRESET;
2435 static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
2437 struct srp_rdma_ch *ch = cm_id->context;
2438 struct srp_target_port *target = ch->target;
2441 switch (event->event) {
2442 case IB_CM_REQ_ERROR:
2443 shost_printk(KERN_DEBUG, target->scsi_host,
2444 PFX "Sending CM REQ failed\n");
2446 ch->status = -ECONNRESET;
2449 case IB_CM_REP_RECEIVED:
2451 srp_cm_rep_handler(cm_id, event->private_data, ch);
2454 case IB_CM_REJ_RECEIVED:
2455 shost_printk(KERN_DEBUG, target->scsi_host, PFX "REJ received\n");
2458 srp_cm_rej_handler(cm_id, event, ch);
2461 case IB_CM_DREQ_RECEIVED:
2462 shost_printk(KERN_WARNING, target->scsi_host,
2463 PFX "DREQ received - connection closed\n");
2464 ch->connected = false;
2465 if (ib_send_cm_drep(cm_id, NULL, 0))
2466 shost_printk(KERN_ERR, target->scsi_host,
2467 PFX "Sending CM DREP failed\n");
2468 queue_work(system_long_wq, &target->tl_err_work);
2471 case IB_CM_TIMEWAIT_EXIT:
2472 shost_printk(KERN_ERR, target->scsi_host,
2473 PFX "connection closed\n");
2479 case IB_CM_MRA_RECEIVED:
2480 case IB_CM_DREQ_ERROR:
2481 case IB_CM_DREP_RECEIVED:
2485 shost_printk(KERN_WARNING, target->scsi_host,
2486 PFX "Unhandled CM event %d\n", event->event);
2491 complete(&ch->done);
2497 * srp_change_queue_depth - setting device queue depth
2498 * @sdev: scsi device struct
2499 * @qdepth: requested queue depth
2501 * Returns queue depth.
2504 srp_change_queue_depth(struct scsi_device *sdev, int qdepth)
2506 if (!sdev->tagged_supported)
2508 return scsi_change_queue_depth(sdev, qdepth);
2511 static int srp_send_tsk_mgmt(struct srp_rdma_ch *ch, u64 req_tag, u64 lun,
2514 struct srp_target_port *target = ch->target;
2515 struct srp_rport *rport = target->rport;
2516 struct ib_device *dev = target->srp_host->srp_dev->dev;
2518 struct srp_tsk_mgmt *tsk_mgmt;
2520 if (!ch->connected || target->qp_in_error)
2523 init_completion(&ch->tsk_mgmt_done);
2526 * Lock the rport mutex to avoid that srp_create_ch_ib() is
2527 * invoked while a task management function is being sent.
2529 mutex_lock(&rport->mutex);
2530 spin_lock_irq(&ch->lock);
2531 iu = __srp_get_tx_iu(ch, SRP_IU_TSK_MGMT);
2532 spin_unlock_irq(&ch->lock);
2535 mutex_unlock(&rport->mutex);
2540 ib_dma_sync_single_for_cpu(dev, iu->dma, sizeof *tsk_mgmt,
2543 memset(tsk_mgmt, 0, sizeof *tsk_mgmt);
2545 tsk_mgmt->opcode = SRP_TSK_MGMT;
2546 int_to_scsilun(lun, &tsk_mgmt->lun);
2547 tsk_mgmt->tag = req_tag | SRP_TAG_TSK_MGMT;
2548 tsk_mgmt->tsk_mgmt_func = func;
2549 tsk_mgmt->task_tag = req_tag;
2551 ib_dma_sync_single_for_device(dev, iu->dma, sizeof *tsk_mgmt,
2553 if (srp_post_send(ch, iu, sizeof(*tsk_mgmt))) {
2554 srp_put_tx_iu(ch, iu, SRP_IU_TSK_MGMT);
2555 mutex_unlock(&rport->mutex);
2559 mutex_unlock(&rport->mutex);
2561 if (!wait_for_completion_timeout(&ch->tsk_mgmt_done,
2562 msecs_to_jiffies(SRP_ABORT_TIMEOUT_MS)))
2568 static int srp_abort(struct scsi_cmnd *scmnd)
2570 struct srp_target_port *target = host_to_target(scmnd->device->host);
2571 struct srp_request *req = (struct srp_request *) scmnd->host_scribble;
2574 struct srp_rdma_ch *ch;
2577 shost_printk(KERN_ERR, target->scsi_host, "SRP abort called\n");
2581 tag = blk_mq_unique_tag(scmnd->request);
2582 ch_idx = blk_mq_unique_tag_to_hwq(tag);
2583 if (WARN_ON_ONCE(ch_idx >= target->ch_count))
2585 ch = &target->ch[ch_idx];
2586 if (!srp_claim_req(ch, req, NULL, scmnd))
2588 shost_printk(KERN_ERR, target->scsi_host,
2589 "Sending SRP abort for tag %#x\n", tag);
2590 if (srp_send_tsk_mgmt(ch, tag, scmnd->device->lun,
2591 SRP_TSK_ABORT_TASK) == 0)
2593 else if (target->rport->state == SRP_RPORT_LOST)
2597 srp_free_req(ch, req, scmnd, 0);
2598 scmnd->result = DID_ABORT << 16;
2599 scmnd->scsi_done(scmnd);
2604 static int srp_reset_device(struct scsi_cmnd *scmnd)
2606 struct srp_target_port *target = host_to_target(scmnd->device->host);
2607 struct srp_rdma_ch *ch;
2610 shost_printk(KERN_ERR, target->scsi_host, "SRP reset_device called\n");
2612 ch = &target->ch[0];
2613 if (srp_send_tsk_mgmt(ch, SRP_TAG_NO_REQ, scmnd->device->lun,
2616 if (ch->tsk_mgmt_status)
2619 for (i = 0; i < target->ch_count; i++) {
2620 ch = &target->ch[i];
2621 for (i = 0; i < target->req_ring_size; ++i) {
2622 struct srp_request *req = &ch->req_ring[i];
2624 srp_finish_req(ch, req, scmnd->device, DID_RESET << 16);
2631 static int srp_reset_host(struct scsi_cmnd *scmnd)
2633 struct srp_target_port *target = host_to_target(scmnd->device->host);
2635 shost_printk(KERN_ERR, target->scsi_host, PFX "SRP reset_host called\n");
2637 return srp_reconnect_rport(target->rport) == 0 ? SUCCESS : FAILED;
2640 static int srp_slave_alloc(struct scsi_device *sdev)
2642 struct Scsi_Host *shost = sdev->host;
2643 struct srp_target_port *target = host_to_target(shost);
2644 struct srp_device *srp_dev = target->srp_host->srp_dev;
2645 struct ib_device *ibdev = srp_dev->dev;
2647 if (!(ibdev->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG))
2648 blk_queue_virt_boundary(sdev->request_queue,
2649 ~srp_dev->mr_page_mask);
2654 static int srp_slave_configure(struct scsi_device *sdev)
2656 struct Scsi_Host *shost = sdev->host;
2657 struct srp_target_port *target = host_to_target(shost);
2658 struct request_queue *q = sdev->request_queue;
2659 unsigned long timeout;
2661 if (sdev->type == TYPE_DISK) {
2662 timeout = max_t(unsigned, 30 * HZ, target->rq_tmo_jiffies);
2663 blk_queue_rq_timeout(q, timeout);
2669 static ssize_t show_id_ext(struct device *dev, struct device_attribute *attr,
2672 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2674 return sprintf(buf, "0x%016llx\n", be64_to_cpu(target->id_ext));
2677 static ssize_t show_ioc_guid(struct device *dev, struct device_attribute *attr,
2680 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2682 return sprintf(buf, "0x%016llx\n", be64_to_cpu(target->ioc_guid));
2685 static ssize_t show_service_id(struct device *dev,
2686 struct device_attribute *attr, char *buf)
2688 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2690 return sprintf(buf, "0x%016llx\n", be64_to_cpu(target->service_id));
2693 static ssize_t show_pkey(struct device *dev, struct device_attribute *attr,
2696 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2698 return sprintf(buf, "0x%04x\n", be16_to_cpu(target->pkey));
2701 static ssize_t show_sgid(struct device *dev, struct device_attribute *attr,
2704 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2706 return sprintf(buf, "%pI6\n", target->sgid.raw);
2709 static ssize_t show_dgid(struct device *dev, struct device_attribute *attr,
2712 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2713 struct srp_rdma_ch *ch = &target->ch[0];
2715 return sprintf(buf, "%pI6\n", ch->path.dgid.raw);
2718 static ssize_t show_orig_dgid(struct device *dev,
2719 struct device_attribute *attr, char *buf)
2721 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2723 return sprintf(buf, "%pI6\n", target->orig_dgid.raw);
2726 static ssize_t show_req_lim(struct device *dev,
2727 struct device_attribute *attr, char *buf)
2729 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2730 struct srp_rdma_ch *ch;
2731 int i, req_lim = INT_MAX;
2733 for (i = 0; i < target->ch_count; i++) {
2734 ch = &target->ch[i];
2735 req_lim = min(req_lim, ch->req_lim);
2737 return sprintf(buf, "%d\n", req_lim);
2740 static ssize_t show_zero_req_lim(struct device *dev,
2741 struct device_attribute *attr, char *buf)
2743 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2745 return sprintf(buf, "%d\n", target->zero_req_lim);
2748 static ssize_t show_local_ib_port(struct device *dev,
2749 struct device_attribute *attr, char *buf)
2751 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2753 return sprintf(buf, "%d\n", target->srp_host->port);
2756 static ssize_t show_local_ib_device(struct device *dev,
2757 struct device_attribute *attr, char *buf)
2759 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2761 return sprintf(buf, "%s\n", target->srp_host->srp_dev->dev->name);
2764 static ssize_t show_ch_count(struct device *dev, struct device_attribute *attr,
2767 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2769 return sprintf(buf, "%d\n", target->ch_count);
2772 static ssize_t show_comp_vector(struct device *dev,
2773 struct device_attribute *attr, char *buf)
2775 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2777 return sprintf(buf, "%d\n", target->comp_vector);
2780 static ssize_t show_tl_retry_count(struct device *dev,
2781 struct device_attribute *attr, char *buf)
2783 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2785 return sprintf(buf, "%d\n", target->tl_retry_count);
2788 static ssize_t show_cmd_sg_entries(struct device *dev,
2789 struct device_attribute *attr, char *buf)
2791 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2793 return sprintf(buf, "%u\n", target->cmd_sg_cnt);
2796 static ssize_t show_allow_ext_sg(struct device *dev,
2797 struct device_attribute *attr, char *buf)
2799 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2801 return sprintf(buf, "%s\n", target->allow_ext_sg ? "true" : "false");
2804 static DEVICE_ATTR(id_ext, S_IRUGO, show_id_ext, NULL);
2805 static DEVICE_ATTR(ioc_guid, S_IRUGO, show_ioc_guid, NULL);
2806 static DEVICE_ATTR(service_id, S_IRUGO, show_service_id, NULL);
2807 static DEVICE_ATTR(pkey, S_IRUGO, show_pkey, NULL);
2808 static DEVICE_ATTR(sgid, S_IRUGO, show_sgid, NULL);
2809 static DEVICE_ATTR(dgid, S_IRUGO, show_dgid, NULL);
2810 static DEVICE_ATTR(orig_dgid, S_IRUGO, show_orig_dgid, NULL);
2811 static DEVICE_ATTR(req_lim, S_IRUGO, show_req_lim, NULL);
2812 static DEVICE_ATTR(zero_req_lim, S_IRUGO, show_zero_req_lim, NULL);
2813 static DEVICE_ATTR(local_ib_port, S_IRUGO, show_local_ib_port, NULL);
2814 static DEVICE_ATTR(local_ib_device, S_IRUGO, show_local_ib_device, NULL);
2815 static DEVICE_ATTR(ch_count, S_IRUGO, show_ch_count, NULL);
2816 static DEVICE_ATTR(comp_vector, S_IRUGO, show_comp_vector, NULL);
2817 static DEVICE_ATTR(tl_retry_count, S_IRUGO, show_tl_retry_count, NULL);
2818 static DEVICE_ATTR(cmd_sg_entries, S_IRUGO, show_cmd_sg_entries, NULL);
2819 static DEVICE_ATTR(allow_ext_sg, S_IRUGO, show_allow_ext_sg, NULL);
2821 static struct device_attribute *srp_host_attrs[] = {
2824 &dev_attr_service_id,
2828 &dev_attr_orig_dgid,
2830 &dev_attr_zero_req_lim,
2831 &dev_attr_local_ib_port,
2832 &dev_attr_local_ib_device,
2834 &dev_attr_comp_vector,
2835 &dev_attr_tl_retry_count,
2836 &dev_attr_cmd_sg_entries,
2837 &dev_attr_allow_ext_sg,
2841 static struct scsi_host_template srp_template = {
2842 .module = THIS_MODULE,
2843 .name = "InfiniBand SRP initiator",
2844 .proc_name = DRV_NAME,
2845 .slave_alloc = srp_slave_alloc,
2846 .slave_configure = srp_slave_configure,
2847 .info = srp_target_info,
2848 .queuecommand = srp_queuecommand,
2849 .change_queue_depth = srp_change_queue_depth,
2850 .eh_abort_handler = srp_abort,
2851 .eh_device_reset_handler = srp_reset_device,
2852 .eh_host_reset_handler = srp_reset_host,
2853 .skip_settle_delay = true,
2854 .sg_tablesize = SRP_DEF_SG_TABLESIZE,
2855 .can_queue = SRP_DEFAULT_CMD_SQ_SIZE,
2857 .cmd_per_lun = SRP_DEFAULT_CMD_SQ_SIZE,
2858 .use_clustering = ENABLE_CLUSTERING,
2859 .shost_attrs = srp_host_attrs,
2860 .track_queue_depth = 1,
2863 static int srp_sdev_count(struct Scsi_Host *host)
2865 struct scsi_device *sdev;
2868 shost_for_each_device(sdev, host)
2876 * < 0 upon failure. Caller is responsible for SRP target port cleanup.
2877 * 0 and target->state == SRP_TARGET_REMOVED if asynchronous target port
2878 * removal has been scheduled.
2879 * 0 and target->state != SRP_TARGET_REMOVED upon success.
2881 static int srp_add_target(struct srp_host *host, struct srp_target_port *target)
2883 struct srp_rport_identifiers ids;
2884 struct srp_rport *rport;
2886 target->state = SRP_TARGET_SCANNING;
2887 sprintf(target->target_name, "SRP.T10:%016llX",
2888 be64_to_cpu(target->id_ext));
2890 if (scsi_add_host(target->scsi_host, host->srp_dev->dev->dma_device))
2893 memcpy(ids.port_id, &target->id_ext, 8);
2894 memcpy(ids.port_id + 8, &target->ioc_guid, 8);
2895 ids.roles = SRP_RPORT_ROLE_TARGET;
2896 rport = srp_rport_add(target->scsi_host, &ids);
2897 if (IS_ERR(rport)) {
2898 scsi_remove_host(target->scsi_host);
2899 return PTR_ERR(rport);
2902 rport->lld_data = target;
2903 target->rport = rport;
2905 spin_lock(&host->target_lock);
2906 list_add_tail(&target->list, &host->target_list);
2907 spin_unlock(&host->target_lock);
2909 scsi_scan_target(&target->scsi_host->shost_gendev,
2910 0, target->scsi_id, SCAN_WILD_CARD, SCSI_SCAN_INITIAL);
2912 if (srp_connected_ch(target) < target->ch_count ||
2913 target->qp_in_error) {
2914 shost_printk(KERN_INFO, target->scsi_host,
2915 PFX "SCSI scan failed - removing SCSI host\n");
2916 srp_queue_remove_work(target);
2920 pr_debug("%s: SCSI scan succeeded - detected %d LUNs\n",
2921 dev_name(&target->scsi_host->shost_gendev),
2922 srp_sdev_count(target->scsi_host));
2924 spin_lock_irq(&target->lock);
2925 if (target->state == SRP_TARGET_SCANNING)
2926 target->state = SRP_TARGET_LIVE;
2927 spin_unlock_irq(&target->lock);
2933 static void srp_release_dev(struct device *dev)
2935 struct srp_host *host =
2936 container_of(dev, struct srp_host, dev);
2938 complete(&host->released);
2941 static struct class srp_class = {
2942 .name = "infiniband_srp",
2943 .dev_release = srp_release_dev
2947 * srp_conn_unique() - check whether the connection to a target is unique
2949 * @target: SRP target port.
2951 static bool srp_conn_unique(struct srp_host *host,
2952 struct srp_target_port *target)
2954 struct srp_target_port *t;
2957 if (target->state == SRP_TARGET_REMOVED)
2962 spin_lock(&host->target_lock);
2963 list_for_each_entry(t, &host->target_list, list) {
2965 target->id_ext == t->id_ext &&
2966 target->ioc_guid == t->ioc_guid &&
2967 target->initiator_ext == t->initiator_ext) {
2972 spin_unlock(&host->target_lock);
2979 * Target ports are added by writing
2981 * id_ext=<SRP ID ext>,ioc_guid=<SRP IOC GUID>,dgid=<dest GID>,
2982 * pkey=<P_Key>,service_id=<service ID>
2984 * to the add_target sysfs attribute.
2988 SRP_OPT_ID_EXT = 1 << 0,
2989 SRP_OPT_IOC_GUID = 1 << 1,
2990 SRP_OPT_DGID = 1 << 2,
2991 SRP_OPT_PKEY = 1 << 3,
2992 SRP_OPT_SERVICE_ID = 1 << 4,
2993 SRP_OPT_MAX_SECT = 1 << 5,
2994 SRP_OPT_MAX_CMD_PER_LUN = 1 << 6,
2995 SRP_OPT_IO_CLASS = 1 << 7,
2996 SRP_OPT_INITIATOR_EXT = 1 << 8,
2997 SRP_OPT_CMD_SG_ENTRIES = 1 << 9,
2998 SRP_OPT_ALLOW_EXT_SG = 1 << 10,
2999 SRP_OPT_SG_TABLESIZE = 1 << 11,
3000 SRP_OPT_COMP_VECTOR = 1 << 12,
3001 SRP_OPT_TL_RETRY_COUNT = 1 << 13,
3002 SRP_OPT_QUEUE_SIZE = 1 << 14,
3003 SRP_OPT_ALL = (SRP_OPT_ID_EXT |
3007 SRP_OPT_SERVICE_ID),
3010 static const match_table_t srp_opt_tokens = {
3011 { SRP_OPT_ID_EXT, "id_ext=%s" },
3012 { SRP_OPT_IOC_GUID, "ioc_guid=%s" },
3013 { SRP_OPT_DGID, "dgid=%s" },
3014 { SRP_OPT_PKEY, "pkey=%x" },
3015 { SRP_OPT_SERVICE_ID, "service_id=%s" },
3016 { SRP_OPT_MAX_SECT, "max_sect=%d" },
3017 { SRP_OPT_MAX_CMD_PER_LUN, "max_cmd_per_lun=%d" },
3018 { SRP_OPT_IO_CLASS, "io_class=%x" },
3019 { SRP_OPT_INITIATOR_EXT, "initiator_ext=%s" },
3020 { SRP_OPT_CMD_SG_ENTRIES, "cmd_sg_entries=%u" },
3021 { SRP_OPT_ALLOW_EXT_SG, "allow_ext_sg=%u" },
3022 { SRP_OPT_SG_TABLESIZE, "sg_tablesize=%u" },
3023 { SRP_OPT_COMP_VECTOR, "comp_vector=%u" },
3024 { SRP_OPT_TL_RETRY_COUNT, "tl_retry_count=%u" },
3025 { SRP_OPT_QUEUE_SIZE, "queue_size=%d" },
3026 { SRP_OPT_ERR, NULL }
3029 static int srp_parse_options(const char *buf, struct srp_target_port *target)
3031 char *options, *sep_opt;
3034 substring_t args[MAX_OPT_ARGS];
3040 options = kstrdup(buf, GFP_KERNEL);
3045 while ((p = strsep(&sep_opt, ",\n")) != NULL) {
3049 token = match_token(p, srp_opt_tokens, args);
3053 case SRP_OPT_ID_EXT:
3054 p = match_strdup(args);
3059 target->id_ext = cpu_to_be64(simple_strtoull(p, NULL, 16));
3063 case SRP_OPT_IOC_GUID:
3064 p = match_strdup(args);
3069 target->ioc_guid = cpu_to_be64(simple_strtoull(p, NULL, 16));
3074 p = match_strdup(args);
3079 if (strlen(p) != 32) {
3080 pr_warn("bad dest GID parameter '%s'\n", p);
3085 for (i = 0; i < 16; ++i) {
3086 strlcpy(dgid, p + i * 2, sizeof(dgid));
3087 if (sscanf(dgid, "%hhx",
3088 &target->orig_dgid.raw[i]) < 1) {
3098 if (match_hex(args, &token)) {
3099 pr_warn("bad P_Key parameter '%s'\n", p);
3102 target->pkey = cpu_to_be16(token);
3105 case SRP_OPT_SERVICE_ID:
3106 p = match_strdup(args);
3111 target->service_id = cpu_to_be64(simple_strtoull(p, NULL, 16));
3115 case SRP_OPT_MAX_SECT:
3116 if (match_int(args, &token)) {
3117 pr_warn("bad max sect parameter '%s'\n", p);
3120 target->scsi_host->max_sectors = token;
3123 case SRP_OPT_QUEUE_SIZE:
3124 if (match_int(args, &token) || token < 1) {
3125 pr_warn("bad queue_size parameter '%s'\n", p);
3128 target->scsi_host->can_queue = token;
3129 target->queue_size = token + SRP_RSP_SQ_SIZE +
3130 SRP_TSK_MGMT_SQ_SIZE;
3131 if (!(opt_mask & SRP_OPT_MAX_CMD_PER_LUN))
3132 target->scsi_host->cmd_per_lun = token;
3135 case SRP_OPT_MAX_CMD_PER_LUN:
3136 if (match_int(args, &token) || token < 1) {
3137 pr_warn("bad max cmd_per_lun parameter '%s'\n",
3141 target->scsi_host->cmd_per_lun = token;
3144 case SRP_OPT_IO_CLASS:
3145 if (match_hex(args, &token)) {
3146 pr_warn("bad IO class parameter '%s'\n", p);
3149 if (token != SRP_REV10_IB_IO_CLASS &&
3150 token != SRP_REV16A_IB_IO_CLASS) {
3151 pr_warn("unknown IO class parameter value %x specified (use %x or %x).\n",
3152 token, SRP_REV10_IB_IO_CLASS,
3153 SRP_REV16A_IB_IO_CLASS);
3156 target->io_class = token;
3159 case SRP_OPT_INITIATOR_EXT:
3160 p = match_strdup(args);
3165 target->initiator_ext = cpu_to_be64(simple_strtoull(p, NULL, 16));
3169 case SRP_OPT_CMD_SG_ENTRIES:
3170 if (match_int(args, &token) || token < 1 || token > 255) {
3171 pr_warn("bad max cmd_sg_entries parameter '%s'\n",
3175 target->cmd_sg_cnt = token;
3178 case SRP_OPT_ALLOW_EXT_SG:
3179 if (match_int(args, &token)) {
3180 pr_warn("bad allow_ext_sg parameter '%s'\n", p);
3183 target->allow_ext_sg = !!token;
3186 case SRP_OPT_SG_TABLESIZE:
3187 if (match_int(args, &token) || token < 1 ||
3188 token > SG_MAX_SEGMENTS) {
3189 pr_warn("bad max sg_tablesize parameter '%s'\n",
3193 target->sg_tablesize = token;
3196 case SRP_OPT_COMP_VECTOR:
3197 if (match_int(args, &token) || token < 0) {
3198 pr_warn("bad comp_vector parameter '%s'\n", p);
3201 target->comp_vector = token;
3204 case SRP_OPT_TL_RETRY_COUNT:
3205 if (match_int(args, &token) || token < 2 || token > 7) {
3206 pr_warn("bad tl_retry_count parameter '%s' (must be a number between 2 and 7)\n",
3210 target->tl_retry_count = token;
3214 pr_warn("unknown parameter or missing value '%s' in target creation request\n",
3220 if ((opt_mask & SRP_OPT_ALL) == SRP_OPT_ALL)
3223 for (i = 0; i < ARRAY_SIZE(srp_opt_tokens); ++i)
3224 if ((srp_opt_tokens[i].token & SRP_OPT_ALL) &&
3225 !(srp_opt_tokens[i].token & opt_mask))
3226 pr_warn("target creation request is missing parameter '%s'\n",
3227 srp_opt_tokens[i].pattern);
3229 if (target->scsi_host->cmd_per_lun > target->scsi_host->can_queue
3230 && (opt_mask & SRP_OPT_MAX_CMD_PER_LUN))
3231 pr_warn("cmd_per_lun = %d > queue_size = %d\n",
3232 target->scsi_host->cmd_per_lun,
3233 target->scsi_host->can_queue);
3240 static ssize_t srp_create_target(struct device *dev,
3241 struct device_attribute *attr,
3242 const char *buf, size_t count)
3244 struct srp_host *host =
3245 container_of(dev, struct srp_host, dev);
3246 struct Scsi_Host *target_host;
3247 struct srp_target_port *target;
3248 struct srp_rdma_ch *ch;
3249 struct srp_device *srp_dev = host->srp_dev;
3250 struct ib_device *ibdev = srp_dev->dev;
3251 int ret, node_idx, node, cpu, i;
3252 unsigned int max_sectors_per_mr, mr_per_cmd = 0;
3253 bool multich = false;
3255 target_host = scsi_host_alloc(&srp_template,
3256 sizeof (struct srp_target_port));
3260 target_host->transportt = ib_srp_transport_template;
3261 target_host->max_channel = 0;
3262 target_host->max_id = 1;
3263 target_host->max_lun = -1LL;
3264 target_host->max_cmd_len = sizeof ((struct srp_cmd *) (void *) 0L)->cdb;
3266 target = host_to_target(target_host);
3268 target->io_class = SRP_REV16A_IB_IO_CLASS;
3269 target->scsi_host = target_host;
3270 target->srp_host = host;
3271 target->lkey = host->srp_dev->pd->local_dma_lkey;
3272 target->global_mr = host->srp_dev->global_mr;
3273 target->cmd_sg_cnt = cmd_sg_entries;
3274 target->sg_tablesize = indirect_sg_entries ? : cmd_sg_entries;
3275 target->allow_ext_sg = allow_ext_sg;
3276 target->tl_retry_count = 7;
3277 target->queue_size = SRP_DEFAULT_QUEUE_SIZE;
3280 * Avoid that the SCSI host can be removed by srp_remove_target()
3281 * before this function returns.
3283 scsi_host_get(target->scsi_host);
3285 mutex_lock(&host->add_target_mutex);
3287 ret = srp_parse_options(buf, target);
3291 target->req_ring_size = target->queue_size - SRP_TSK_MGMT_SQ_SIZE;
3293 if (!srp_conn_unique(target->srp_host, target)) {
3294 shost_printk(KERN_INFO, target->scsi_host,
3295 PFX "Already connected to target port with id_ext=%016llx;ioc_guid=%016llx;initiator_ext=%016llx\n",
3296 be64_to_cpu(target->id_ext),
3297 be64_to_cpu(target->ioc_guid),
3298 be64_to_cpu(target->initiator_ext));
3303 if (!srp_dev->has_fmr && !srp_dev->has_fr && !target->allow_ext_sg &&
3304 target->cmd_sg_cnt < target->sg_tablesize) {
3305 pr_warn("No MR pool and no external indirect descriptors, limiting sg_tablesize to cmd_sg_cnt\n");
3306 target->sg_tablesize = target->cmd_sg_cnt;
3309 if (srp_dev->use_fast_reg || srp_dev->use_fmr) {
3311 * FR and FMR can only map one HCA page per entry. If the
3312 * start address is not aligned on a HCA page boundary two
3313 * entries will be used for the head and the tail although
3314 * these two entries combined contain at most one HCA page of
3315 * data. Hence the "+ 1" in the calculation below.
3317 * The indirect data buffer descriptor is contiguous so the
3318 * memory for that buffer will only be registered if
3319 * register_always is true. Hence add one to mr_per_cmd if
3320 * register_always has been set.
3322 max_sectors_per_mr = srp_dev->max_pages_per_mr <<
3323 (ilog2(srp_dev->mr_page_size) - 9);
3324 mr_per_cmd = register_always +
3325 (target->scsi_host->max_sectors + 1 +
3326 max_sectors_per_mr - 1) / max_sectors_per_mr;
3327 pr_debug("max_sectors = %u; max_pages_per_mr = %u; mr_page_size = %u; max_sectors_per_mr = %u; mr_per_cmd = %u\n",
3328 target->scsi_host->max_sectors,
3329 srp_dev->max_pages_per_mr, srp_dev->mr_page_size,
3330 max_sectors_per_mr, mr_per_cmd);
3333 target_host->sg_tablesize = target->sg_tablesize;
3334 target->mr_pool_size = target->scsi_host->can_queue * mr_per_cmd;
3335 target->mr_per_cmd = mr_per_cmd;
3336 target->indirect_size = target->sg_tablesize *
3337 sizeof (struct srp_direct_buf);
3338 target->max_iu_len = sizeof (struct srp_cmd) +
3339 sizeof (struct srp_indirect_buf) +
3340 target->cmd_sg_cnt * sizeof (struct srp_direct_buf);
3342 INIT_WORK(&target->tl_err_work, srp_tl_err_work);
3343 INIT_WORK(&target->remove_work, srp_remove_work);
3344 spin_lock_init(&target->lock);
3345 ret = ib_query_gid(ibdev, host->port, 0, &target->sgid, NULL);
3350 target->ch_count = max_t(unsigned, num_online_nodes(),
3352 min(4 * num_online_nodes(),
3353 ibdev->num_comp_vectors),
3354 num_online_cpus()));
3355 target->ch = kcalloc(target->ch_count, sizeof(*target->ch),
3361 for_each_online_node(node) {
3362 const int ch_start = (node_idx * target->ch_count /
3363 num_online_nodes());
3364 const int ch_end = ((node_idx + 1) * target->ch_count /
3365 num_online_nodes());
3366 const int cv_start = (node_idx * ibdev->num_comp_vectors /
3367 num_online_nodes() + target->comp_vector)
3368 % ibdev->num_comp_vectors;
3369 const int cv_end = ((node_idx + 1) * ibdev->num_comp_vectors /
3370 num_online_nodes() + target->comp_vector)
3371 % ibdev->num_comp_vectors;
3374 for_each_online_cpu(cpu) {
3375 if (cpu_to_node(cpu) != node)
3377 if (ch_start + cpu_idx >= ch_end)
3379 ch = &target->ch[ch_start + cpu_idx];
3380 ch->target = target;
3381 ch->comp_vector = cv_start == cv_end ? cv_start :
3382 cv_start + cpu_idx % (cv_end - cv_start);
3383 spin_lock_init(&ch->lock);
3384 INIT_LIST_HEAD(&ch->free_tx);
3385 ret = srp_new_cm_id(ch);
3387 goto err_disconnect;
3389 ret = srp_create_ch_ib(ch);
3391 goto err_disconnect;
3393 ret = srp_alloc_req_data(ch);
3395 goto err_disconnect;
3397 ret = srp_connect_ch(ch, multich);
3399 shost_printk(KERN_ERR, target->scsi_host,
3400 PFX "Connection %d/%d failed\n",
3403 if (node_idx == 0 && cpu_idx == 0) {
3404 goto err_disconnect;
3406 srp_free_ch_ib(target, ch);
3407 srp_free_req_data(target, ch);
3408 target->ch_count = ch - target->ch;
3420 target->scsi_host->nr_hw_queues = target->ch_count;
3422 ret = srp_add_target(host, target);
3424 goto err_disconnect;
3426 if (target->state != SRP_TARGET_REMOVED) {
3427 shost_printk(KERN_DEBUG, target->scsi_host, PFX
3428 "new target: id_ext %016llx ioc_guid %016llx pkey %04x service_id %016llx sgid %pI6 dgid %pI6\n",
3429 be64_to_cpu(target->id_ext),
3430 be64_to_cpu(target->ioc_guid),
3431 be16_to_cpu(target->pkey),
3432 be64_to_cpu(target->service_id),
3433 target->sgid.raw, target->orig_dgid.raw);
3439 mutex_unlock(&host->add_target_mutex);
3441 scsi_host_put(target->scsi_host);
3443 scsi_host_put(target->scsi_host);
3448 srp_disconnect_target(target);
3450 for (i = 0; i < target->ch_count; i++) {
3451 ch = &target->ch[i];
3452 srp_free_ch_ib(target, ch);
3453 srp_free_req_data(target, ch);
3460 static DEVICE_ATTR(add_target, S_IWUSR, NULL, srp_create_target);
3462 static ssize_t show_ibdev(struct device *dev, struct device_attribute *attr,
3465 struct srp_host *host = container_of(dev, struct srp_host, dev);
3467 return sprintf(buf, "%s\n", host->srp_dev->dev->name);
3470 static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL);
3472 static ssize_t show_port(struct device *dev, struct device_attribute *attr,
3475 struct srp_host *host = container_of(dev, struct srp_host, dev);
3477 return sprintf(buf, "%d\n", host->port);
3480 static DEVICE_ATTR(port, S_IRUGO, show_port, NULL);
3482 static struct srp_host *srp_add_port(struct srp_device *device, u8 port)
3484 struct srp_host *host;
3486 host = kzalloc(sizeof *host, GFP_KERNEL);
3490 INIT_LIST_HEAD(&host->target_list);
3491 spin_lock_init(&host->target_lock);
3492 init_completion(&host->released);
3493 mutex_init(&host->add_target_mutex);
3494 host->srp_dev = device;
3497 host->dev.class = &srp_class;
3498 host->dev.parent = device->dev->dma_device;
3499 dev_set_name(&host->dev, "srp-%s-%d", device->dev->name, port);
3501 if (device_register(&host->dev))
3503 if (device_create_file(&host->dev, &dev_attr_add_target))
3505 if (device_create_file(&host->dev, &dev_attr_ibdev))
3507 if (device_create_file(&host->dev, &dev_attr_port))
3513 device_unregister(&host->dev);
3521 static void srp_add_one(struct ib_device *device)
3523 struct srp_device *srp_dev;
3524 struct srp_host *host;
3525 int mr_page_shift, p;
3526 u64 max_pages_per_mr;
3528 srp_dev = kzalloc(sizeof(*srp_dev), GFP_KERNEL);
3533 * Use the smallest page size supported by the HCA, down to a
3534 * minimum of 4096 bytes. We're unlikely to build large sglists
3535 * out of smaller entries.
3537 mr_page_shift = max(12, ffs(device->attrs.page_size_cap) - 1);
3538 srp_dev->mr_page_size = 1 << mr_page_shift;
3539 srp_dev->mr_page_mask = ~((u64) srp_dev->mr_page_size - 1);
3540 max_pages_per_mr = device->attrs.max_mr_size;
3541 do_div(max_pages_per_mr, srp_dev->mr_page_size);
3542 pr_debug("%s: %llu / %u = %llu <> %u\n", __func__,
3543 device->attrs.max_mr_size, srp_dev->mr_page_size,
3544 max_pages_per_mr, SRP_MAX_PAGES_PER_MR);
3545 srp_dev->max_pages_per_mr = min_t(u64, SRP_MAX_PAGES_PER_MR,
3548 srp_dev->has_fmr = (device->alloc_fmr && device->dealloc_fmr &&
3549 device->map_phys_fmr && device->unmap_fmr);
3550 srp_dev->has_fr = (device->attrs.device_cap_flags &
3551 IB_DEVICE_MEM_MGT_EXTENSIONS);
3552 if (!never_register && !srp_dev->has_fmr && !srp_dev->has_fr) {
3553 dev_warn(&device->dev, "neither FMR nor FR is supported\n");
3554 } else if (!never_register &&
3555 device->attrs.max_mr_size >= 2 * srp_dev->mr_page_size) {
3556 srp_dev->use_fast_reg = (srp_dev->has_fr &&
3557 (!srp_dev->has_fmr || prefer_fr));
3558 srp_dev->use_fmr = !srp_dev->use_fast_reg && srp_dev->has_fmr;
3561 if (srp_dev->use_fast_reg) {
3562 srp_dev->max_pages_per_mr =
3563 min_t(u32, srp_dev->max_pages_per_mr,
3564 device->attrs.max_fast_reg_page_list_len);
3566 srp_dev->mr_max_size = srp_dev->mr_page_size *
3567 srp_dev->max_pages_per_mr;
3568 pr_debug("%s: mr_page_shift = %d, device->max_mr_size = %#llx, device->max_fast_reg_page_list_len = %u, max_pages_per_mr = %d, mr_max_size = %#x\n",
3569 device->name, mr_page_shift, device->attrs.max_mr_size,
3570 device->attrs.max_fast_reg_page_list_len,
3571 srp_dev->max_pages_per_mr, srp_dev->mr_max_size);
3573 INIT_LIST_HEAD(&srp_dev->dev_list);
3575 srp_dev->dev = device;
3576 srp_dev->pd = ib_alloc_pd(device);
3577 if (IS_ERR(srp_dev->pd))
3580 if (never_register || !register_always ||
3581 (!srp_dev->has_fmr && !srp_dev->has_fr)) {
3582 srp_dev->global_mr = ib_get_dma_mr(srp_dev->pd,
3583 IB_ACCESS_LOCAL_WRITE |
3584 IB_ACCESS_REMOTE_READ |
3585 IB_ACCESS_REMOTE_WRITE);
3586 if (IS_ERR(srp_dev->global_mr))
3590 for (p = rdma_start_port(device); p <= rdma_end_port(device); ++p) {
3591 host = srp_add_port(srp_dev, p);
3593 list_add_tail(&host->list, &srp_dev->dev_list);
3596 ib_set_client_data(device, &srp_client, srp_dev);
3600 ib_dealloc_pd(srp_dev->pd);
3606 static void srp_remove_one(struct ib_device *device, void *client_data)
3608 struct srp_device *srp_dev;
3609 struct srp_host *host, *tmp_host;
3610 struct srp_target_port *target;
3612 srp_dev = client_data;
3616 list_for_each_entry_safe(host, tmp_host, &srp_dev->dev_list, list) {
3617 device_unregister(&host->dev);
3619 * Wait for the sysfs entry to go away, so that no new
3620 * target ports can be created.
3622 wait_for_completion(&host->released);
3625 * Remove all target ports.
3627 spin_lock(&host->target_lock);
3628 list_for_each_entry(target, &host->target_list, list)
3629 srp_queue_remove_work(target);
3630 spin_unlock(&host->target_lock);
3633 * Wait for tl_err and target port removal tasks.
3635 flush_workqueue(system_long_wq);
3636 flush_workqueue(srp_remove_wq);
3641 if (srp_dev->global_mr)
3642 ib_dereg_mr(srp_dev->global_mr);
3643 ib_dealloc_pd(srp_dev->pd);
3648 static struct srp_function_template ib_srp_transport_functions = {
3649 .has_rport_state = true,
3650 .reset_timer_if_blocked = true,
3651 .reconnect_delay = &srp_reconnect_delay,
3652 .fast_io_fail_tmo = &srp_fast_io_fail_tmo,
3653 .dev_loss_tmo = &srp_dev_loss_tmo,
3654 .reconnect = srp_rport_reconnect,
3655 .rport_delete = srp_rport_delete,
3656 .terminate_rport_io = srp_terminate_io,
3659 static int __init srp_init_module(void)
3663 if (srp_sg_tablesize) {
3664 pr_warn("srp_sg_tablesize is deprecated, please use cmd_sg_entries\n");
3665 if (!cmd_sg_entries)
3666 cmd_sg_entries = srp_sg_tablesize;
3669 if (!cmd_sg_entries)
3670 cmd_sg_entries = SRP_DEF_SG_TABLESIZE;
3672 if (cmd_sg_entries > 255) {
3673 pr_warn("Clamping cmd_sg_entries to 255\n");
3674 cmd_sg_entries = 255;
3677 if (!indirect_sg_entries)
3678 indirect_sg_entries = cmd_sg_entries;
3679 else if (indirect_sg_entries < cmd_sg_entries) {
3680 pr_warn("Bumping up indirect_sg_entries to match cmd_sg_entries (%u)\n",
3682 indirect_sg_entries = cmd_sg_entries;
3685 srp_remove_wq = create_workqueue("srp_remove");
3686 if (!srp_remove_wq) {
3692 ib_srp_transport_template =
3693 srp_attach_transport(&ib_srp_transport_functions);
3694 if (!ib_srp_transport_template)
3697 ret = class_register(&srp_class);
3699 pr_err("couldn't register class infiniband_srp\n");
3703 ib_sa_register_client(&srp_sa_client);
3705 ret = ib_register_client(&srp_client);
3707 pr_err("couldn't register IB client\n");
3715 ib_sa_unregister_client(&srp_sa_client);
3716 class_unregister(&srp_class);
3719 srp_release_transport(ib_srp_transport_template);
3722 destroy_workqueue(srp_remove_wq);
3726 static void __exit srp_cleanup_module(void)
3728 ib_unregister_client(&srp_client);
3729 ib_sa_unregister_client(&srp_sa_client);
3730 class_unregister(&srp_class);
3731 srp_release_transport(ib_srp_transport_template);
3732 destroy_workqueue(srp_remove_wq);
3735 module_init(srp_init_module);
3736 module_exit(srp_cleanup_module);