2 * Copyright (c) 2005 Cisco Systems. All rights reserved.
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
35 #include <linux/module.h>
36 #include <linux/init.h>
37 #include <linux/slab.h>
38 #include <linux/err.h>
39 #include <linux/string.h>
40 #include <linux/parser.h>
41 #include <linux/random.h>
42 #include <linux/jiffies.h>
43 #include <rdma/ib_cache.h>
45 #include <linux/atomic.h>
47 #include <scsi/scsi.h>
48 #include <scsi/scsi_device.h>
49 #include <scsi/scsi_dbg.h>
50 #include <scsi/scsi_tcq.h>
52 #include <scsi/scsi_transport_srp.h>
56 #define DRV_NAME "ib_srp"
57 #define PFX DRV_NAME ": "
58 #define DRV_VERSION "2.0"
59 #define DRV_RELDATE "July 26, 2015"
61 MODULE_AUTHOR("Roland Dreier");
62 MODULE_DESCRIPTION("InfiniBand SCSI RDMA Protocol initiator");
63 MODULE_LICENSE("Dual BSD/GPL");
64 MODULE_VERSION(DRV_VERSION);
65 MODULE_INFO(release_date, DRV_RELDATE);
67 #if !defined(CONFIG_DYNAMIC_DEBUG)
68 #define DEFINE_DYNAMIC_DEBUG_METADATA(name, fmt)
69 #define DYNAMIC_DEBUG_BRANCH(descriptor) false
72 static unsigned int srp_sg_tablesize;
73 static unsigned int cmd_sg_entries;
74 static unsigned int indirect_sg_entries;
75 static bool allow_ext_sg;
76 static bool prefer_fr = true;
77 static bool register_always = true;
78 static bool never_register;
79 static int topspin_workarounds = 1;
81 module_param(srp_sg_tablesize, uint, 0444);
82 MODULE_PARM_DESC(srp_sg_tablesize, "Deprecated name for cmd_sg_entries");
84 module_param(cmd_sg_entries, uint, 0444);
85 MODULE_PARM_DESC(cmd_sg_entries,
86 "Default number of gather/scatter entries in the SRP command (default is 12, max 255)");
88 module_param(indirect_sg_entries, uint, 0444);
89 MODULE_PARM_DESC(indirect_sg_entries,
90 "Default max number of gather/scatter entries (default is 12, max is " __stringify(SG_MAX_SEGMENTS) ")");
92 module_param(allow_ext_sg, bool, 0444);
93 MODULE_PARM_DESC(allow_ext_sg,
94 "Default behavior when there are more than cmd_sg_entries S/G entries after mapping; fails the request when false (default false)");
96 module_param(topspin_workarounds, int, 0444);
97 MODULE_PARM_DESC(topspin_workarounds,
98 "Enable workarounds for Topspin/Cisco SRP target bugs if != 0");
100 module_param(prefer_fr, bool, 0444);
101 MODULE_PARM_DESC(prefer_fr,
102 "Whether to use fast registration if both FMR and fast registration are supported");
104 module_param(register_always, bool, 0444);
105 MODULE_PARM_DESC(register_always,
106 "Use memory registration even for contiguous memory regions");
108 module_param(never_register, bool, 0444);
109 MODULE_PARM_DESC(never_register, "Never register memory");
111 static const struct kernel_param_ops srp_tmo_ops;
113 static int srp_reconnect_delay = 10;
114 module_param_cb(reconnect_delay, &srp_tmo_ops, &srp_reconnect_delay,
116 MODULE_PARM_DESC(reconnect_delay, "Time between successive reconnect attempts");
118 static int srp_fast_io_fail_tmo = 15;
119 module_param_cb(fast_io_fail_tmo, &srp_tmo_ops, &srp_fast_io_fail_tmo,
121 MODULE_PARM_DESC(fast_io_fail_tmo,
122 "Number of seconds between the observation of a transport"
123 " layer error and failing all I/O. \"off\" means that this"
124 " functionality is disabled.");
126 static int srp_dev_loss_tmo = 600;
127 module_param_cb(dev_loss_tmo, &srp_tmo_ops, &srp_dev_loss_tmo,
129 MODULE_PARM_DESC(dev_loss_tmo,
130 "Maximum number of seconds that the SRP transport should"
131 " insulate transport layer errors. After this time has been"
132 " exceeded the SCSI host is removed. Should be"
133 " between 1 and " __stringify(SCSI_DEVICE_BLOCK_MAX_TIMEOUT)
134 " if fast_io_fail_tmo has not been set. \"off\" means that"
135 " this functionality is disabled.");
137 static unsigned ch_count;
138 module_param(ch_count, uint, 0444);
139 MODULE_PARM_DESC(ch_count,
140 "Number of RDMA channels to use for communication with an SRP target. Using more than one channel improves performance if the HCA supports multiple completion vectors. The default value is the minimum of four times the number of online CPU sockets and the number of completion vectors supported by the HCA.");
142 static void srp_add_one(struct ib_device *device);
143 static void srp_remove_one(struct ib_device *device, void *client_data);
144 static void srp_recv_done(struct ib_cq *cq, struct ib_wc *wc);
145 static void srp_handle_qp_err(struct ib_cq *cq, struct ib_wc *wc,
147 static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event);
149 static struct scsi_transport_template *ib_srp_transport_template;
150 static struct workqueue_struct *srp_remove_wq;
152 static struct ib_client srp_client = {
155 .remove = srp_remove_one
158 static struct ib_sa_client srp_sa_client;
160 static int srp_tmo_get(char *buffer, const struct kernel_param *kp)
162 int tmo = *(int *)kp->arg;
165 return sprintf(buffer, "%d", tmo);
167 return sprintf(buffer, "off");
170 static int srp_tmo_set(const char *val, const struct kernel_param *kp)
174 res = srp_parse_tmo(&tmo, val);
178 if (kp->arg == &srp_reconnect_delay)
179 res = srp_tmo_valid(tmo, srp_fast_io_fail_tmo,
181 else if (kp->arg == &srp_fast_io_fail_tmo)
182 res = srp_tmo_valid(srp_reconnect_delay, tmo, srp_dev_loss_tmo);
184 res = srp_tmo_valid(srp_reconnect_delay, srp_fast_io_fail_tmo,
188 *(int *)kp->arg = tmo;
194 static const struct kernel_param_ops srp_tmo_ops = {
199 static inline struct srp_target_port *host_to_target(struct Scsi_Host *host)
201 return (struct srp_target_port *) host->hostdata;
204 static const char *srp_target_info(struct Scsi_Host *host)
206 return host_to_target(host)->target_name;
209 static int srp_target_is_topspin(struct srp_target_port *target)
211 static const u8 topspin_oui[3] = { 0x00, 0x05, 0xad };
212 static const u8 cisco_oui[3] = { 0x00, 0x1b, 0x0d };
214 return topspin_workarounds &&
215 (!memcmp(&target->ioc_guid, topspin_oui, sizeof topspin_oui) ||
216 !memcmp(&target->ioc_guid, cisco_oui, sizeof cisco_oui));
219 static struct srp_iu *srp_alloc_iu(struct srp_host *host, size_t size,
221 enum dma_data_direction direction)
225 iu = kmalloc(sizeof *iu, gfp_mask);
229 iu->buf = kzalloc(size, gfp_mask);
233 iu->dma = ib_dma_map_single(host->srp_dev->dev, iu->buf, size,
235 if (ib_dma_mapping_error(host->srp_dev->dev, iu->dma))
239 iu->direction = direction;
251 static void srp_free_iu(struct srp_host *host, struct srp_iu *iu)
256 ib_dma_unmap_single(host->srp_dev->dev, iu->dma, iu->size,
262 static void srp_qp_event(struct ib_event *event, void *context)
264 pr_debug("QP event %s (%d)\n",
265 ib_event_msg(event->event), event->event);
268 static int srp_init_qp(struct srp_target_port *target,
271 struct ib_qp_attr *attr;
274 attr = kmalloc(sizeof *attr, GFP_KERNEL);
278 ret = ib_find_cached_pkey(target->srp_host->srp_dev->dev,
279 target->srp_host->port,
280 be16_to_cpu(target->pkey),
285 attr->qp_state = IB_QPS_INIT;
286 attr->qp_access_flags = (IB_ACCESS_REMOTE_READ |
287 IB_ACCESS_REMOTE_WRITE);
288 attr->port_num = target->srp_host->port;
290 ret = ib_modify_qp(qp, attr,
301 static int srp_new_cm_id(struct srp_rdma_ch *ch)
303 struct srp_target_port *target = ch->target;
304 struct ib_cm_id *new_cm_id;
306 new_cm_id = ib_create_cm_id(target->srp_host->srp_dev->dev,
308 if (IS_ERR(new_cm_id))
309 return PTR_ERR(new_cm_id);
312 ib_destroy_cm_id(ch->cm_id);
313 ch->cm_id = new_cm_id;
314 ch->path.sgid = target->sgid;
315 ch->path.dgid = target->orig_dgid;
316 ch->path.pkey = target->pkey;
317 ch->path.service_id = target->service_id;
322 static struct ib_fmr_pool *srp_alloc_fmr_pool(struct srp_target_port *target)
324 struct srp_device *dev = target->srp_host->srp_dev;
325 struct ib_fmr_pool_param fmr_param;
327 memset(&fmr_param, 0, sizeof(fmr_param));
328 fmr_param.pool_size = target->mr_pool_size;
329 fmr_param.dirty_watermark = fmr_param.pool_size / 4;
331 fmr_param.max_pages_per_fmr = dev->max_pages_per_mr;
332 fmr_param.page_shift = ilog2(dev->mr_page_size);
333 fmr_param.access = (IB_ACCESS_LOCAL_WRITE |
334 IB_ACCESS_REMOTE_WRITE |
335 IB_ACCESS_REMOTE_READ);
337 return ib_create_fmr_pool(dev->pd, &fmr_param);
341 * srp_destroy_fr_pool() - free the resources owned by a pool
342 * @pool: Fast registration pool to be destroyed.
344 static void srp_destroy_fr_pool(struct srp_fr_pool *pool)
347 struct srp_fr_desc *d;
352 for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) {
360 * srp_create_fr_pool() - allocate and initialize a pool for fast registration
361 * @device: IB device to allocate fast registration descriptors for.
362 * @pd: Protection domain associated with the FR descriptors.
363 * @pool_size: Number of descriptors to allocate.
364 * @max_page_list_len: Maximum fast registration work request page list length.
366 static struct srp_fr_pool *srp_create_fr_pool(struct ib_device *device,
367 struct ib_pd *pd, int pool_size,
368 int max_page_list_len)
370 struct srp_fr_pool *pool;
371 struct srp_fr_desc *d;
373 int i, ret = -EINVAL;
378 pool = kzalloc(sizeof(struct srp_fr_pool) +
379 pool_size * sizeof(struct srp_fr_desc), GFP_KERNEL);
382 pool->size = pool_size;
383 pool->max_page_list_len = max_page_list_len;
384 spin_lock_init(&pool->lock);
385 INIT_LIST_HEAD(&pool->free_list);
387 for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) {
388 mr = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG,
393 pr_info("%s: ib_alloc_mr() failed. Try to reduce max_cmd_per_lun, max_sect or ch_count\n",
394 dev_name(&device->dev));
398 list_add_tail(&d->entry, &pool->free_list);
405 srp_destroy_fr_pool(pool);
413 * srp_fr_pool_get() - obtain a descriptor suitable for fast registration
414 * @pool: Pool to obtain descriptor from.
416 static struct srp_fr_desc *srp_fr_pool_get(struct srp_fr_pool *pool)
418 struct srp_fr_desc *d = NULL;
421 spin_lock_irqsave(&pool->lock, flags);
422 if (!list_empty(&pool->free_list)) {
423 d = list_first_entry(&pool->free_list, typeof(*d), entry);
426 spin_unlock_irqrestore(&pool->lock, flags);
432 * srp_fr_pool_put() - put an FR descriptor back in the free list
433 * @pool: Pool the descriptor was allocated from.
434 * @desc: Pointer to an array of fast registration descriptor pointers.
435 * @n: Number of descriptors to put back.
437 * Note: The caller must already have queued an invalidation request for
438 * desc->mr->rkey before calling this function.
440 static void srp_fr_pool_put(struct srp_fr_pool *pool, struct srp_fr_desc **desc,
446 spin_lock_irqsave(&pool->lock, flags);
447 for (i = 0; i < n; i++)
448 list_add(&desc[i]->entry, &pool->free_list);
449 spin_unlock_irqrestore(&pool->lock, flags);
452 static struct srp_fr_pool *srp_alloc_fr_pool(struct srp_target_port *target)
454 struct srp_device *dev = target->srp_host->srp_dev;
456 return srp_create_fr_pool(dev->dev, dev->pd, target->mr_pool_size,
457 dev->max_pages_per_mr);
461 * srp_destroy_qp() - destroy an RDMA queue pair
462 * @qp: RDMA queue pair.
464 * Drain the qp before destroying it. This avoids that the receive
465 * completion handler can access the queue pair while it is
468 static void srp_destroy_qp(struct ib_qp *qp)
474 static int srp_create_ch_ib(struct srp_rdma_ch *ch)
476 struct srp_target_port *target = ch->target;
477 struct srp_device *dev = target->srp_host->srp_dev;
478 struct ib_qp_init_attr *init_attr;
479 struct ib_cq *recv_cq, *send_cq;
481 struct ib_fmr_pool *fmr_pool = NULL;
482 struct srp_fr_pool *fr_pool = NULL;
483 const int m = 1 + dev->use_fast_reg * target->mr_per_cmd * 2;
486 init_attr = kzalloc(sizeof *init_attr, GFP_KERNEL);
490 /* queue_size + 1 for ib_drain_rq() */
491 recv_cq = ib_alloc_cq(dev->dev, ch, target->queue_size + 1,
492 ch->comp_vector, IB_POLL_SOFTIRQ);
493 if (IS_ERR(recv_cq)) {
494 ret = PTR_ERR(recv_cq);
498 send_cq = ib_alloc_cq(dev->dev, ch, m * target->queue_size,
499 ch->comp_vector, IB_POLL_DIRECT);
500 if (IS_ERR(send_cq)) {
501 ret = PTR_ERR(send_cq);
505 init_attr->event_handler = srp_qp_event;
506 init_attr->cap.max_send_wr = m * target->queue_size;
507 init_attr->cap.max_recv_wr = target->queue_size + 1;
508 init_attr->cap.max_recv_sge = 1;
509 init_attr->cap.max_send_sge = 1;
510 init_attr->sq_sig_type = IB_SIGNAL_REQ_WR;
511 init_attr->qp_type = IB_QPT_RC;
512 init_attr->send_cq = send_cq;
513 init_attr->recv_cq = recv_cq;
515 qp = ib_create_qp(dev->pd, init_attr);
521 ret = srp_init_qp(target, qp);
525 if (dev->use_fast_reg) {
526 fr_pool = srp_alloc_fr_pool(target);
527 if (IS_ERR(fr_pool)) {
528 ret = PTR_ERR(fr_pool);
529 shost_printk(KERN_WARNING, target->scsi_host, PFX
530 "FR pool allocation failed (%d)\n", ret);
533 } else if (dev->use_fmr) {
534 fmr_pool = srp_alloc_fmr_pool(target);
535 if (IS_ERR(fmr_pool)) {
536 ret = PTR_ERR(fmr_pool);
537 shost_printk(KERN_WARNING, target->scsi_host, PFX
538 "FMR pool allocation failed (%d)\n", ret);
544 srp_destroy_qp(ch->qp);
546 ib_free_cq(ch->recv_cq);
548 ib_free_cq(ch->send_cq);
551 ch->recv_cq = recv_cq;
552 ch->send_cq = send_cq;
554 if (dev->use_fast_reg) {
556 srp_destroy_fr_pool(ch->fr_pool);
557 ch->fr_pool = fr_pool;
558 } else if (dev->use_fmr) {
560 ib_destroy_fmr_pool(ch->fmr_pool);
561 ch->fmr_pool = fmr_pool;
582 * Note: this function may be called without srp_alloc_iu_bufs() having been
583 * invoked. Hence the ch->[rt]x_ring checks.
585 static void srp_free_ch_ib(struct srp_target_port *target,
586 struct srp_rdma_ch *ch)
588 struct srp_device *dev = target->srp_host->srp_dev;
595 ib_destroy_cm_id(ch->cm_id);
599 /* If srp_new_cm_id() succeeded but srp_create_ch_ib() not, return. */
603 if (dev->use_fast_reg) {
605 srp_destroy_fr_pool(ch->fr_pool);
606 } else if (dev->use_fmr) {
608 ib_destroy_fmr_pool(ch->fmr_pool);
611 srp_destroy_qp(ch->qp);
612 ib_free_cq(ch->send_cq);
613 ib_free_cq(ch->recv_cq);
616 * Avoid that the SCSI error handler tries to use this channel after
617 * it has been freed. The SCSI error handler can namely continue
618 * trying to perform recovery actions after scsi_remove_host()
624 ch->send_cq = ch->recv_cq = NULL;
627 for (i = 0; i < target->queue_size; ++i)
628 srp_free_iu(target->srp_host, ch->rx_ring[i]);
633 for (i = 0; i < target->queue_size; ++i)
634 srp_free_iu(target->srp_host, ch->tx_ring[i]);
640 static void srp_path_rec_completion(int status,
641 struct ib_sa_path_rec *pathrec,
644 struct srp_rdma_ch *ch = ch_ptr;
645 struct srp_target_port *target = ch->target;
649 shost_printk(KERN_ERR, target->scsi_host,
650 PFX "Got failed path rec status %d\n", status);
656 static int srp_lookup_path(struct srp_rdma_ch *ch)
658 struct srp_target_port *target = ch->target;
661 ch->path.numb_path = 1;
663 init_completion(&ch->done);
665 ch->path_query_id = ib_sa_path_rec_get(&srp_sa_client,
666 target->srp_host->srp_dev->dev,
667 target->srp_host->port,
669 IB_SA_PATH_REC_SERVICE_ID |
670 IB_SA_PATH_REC_DGID |
671 IB_SA_PATH_REC_SGID |
672 IB_SA_PATH_REC_NUMB_PATH |
674 SRP_PATH_REC_TIMEOUT_MS,
676 srp_path_rec_completion,
677 ch, &ch->path_query);
678 if (ch->path_query_id < 0)
679 return ch->path_query_id;
681 ret = wait_for_completion_interruptible(&ch->done);
686 shost_printk(KERN_WARNING, target->scsi_host,
687 PFX "Path record query failed\n");
692 static int srp_send_req(struct srp_rdma_ch *ch, bool multich)
694 struct srp_target_port *target = ch->target;
696 struct ib_cm_req_param param;
697 struct srp_login_req priv;
701 req = kzalloc(sizeof *req, GFP_KERNEL);
705 req->param.primary_path = &ch->path;
706 req->param.alternate_path = NULL;
707 req->param.service_id = target->service_id;
708 req->param.qp_num = ch->qp->qp_num;
709 req->param.qp_type = ch->qp->qp_type;
710 req->param.private_data = &req->priv;
711 req->param.private_data_len = sizeof req->priv;
712 req->param.flow_control = 1;
714 get_random_bytes(&req->param.starting_psn, 4);
715 req->param.starting_psn &= 0xffffff;
718 * Pick some arbitrary defaults here; we could make these
719 * module parameters if anyone cared about setting them.
721 req->param.responder_resources = 4;
722 req->param.remote_cm_response_timeout = 20;
723 req->param.local_cm_response_timeout = 20;
724 req->param.retry_count = target->tl_retry_count;
725 req->param.rnr_retry_count = 7;
726 req->param.max_cm_retries = 15;
728 req->priv.opcode = SRP_LOGIN_REQ;
730 req->priv.req_it_iu_len = cpu_to_be32(target->max_iu_len);
731 req->priv.req_buf_fmt = cpu_to_be16(SRP_BUF_FORMAT_DIRECT |
732 SRP_BUF_FORMAT_INDIRECT);
733 req->priv.req_flags = (multich ? SRP_MULTICHAN_MULTI :
734 SRP_MULTICHAN_SINGLE);
736 * In the published SRP specification (draft rev. 16a), the
737 * port identifier format is 8 bytes of ID extension followed
738 * by 8 bytes of GUID. Older drafts put the two halves in the
739 * opposite order, so that the GUID comes first.
741 * Targets conforming to these obsolete drafts can be
742 * recognized by the I/O Class they report.
744 if (target->io_class == SRP_REV10_IB_IO_CLASS) {
745 memcpy(req->priv.initiator_port_id,
746 &target->sgid.global.interface_id, 8);
747 memcpy(req->priv.initiator_port_id + 8,
748 &target->initiator_ext, 8);
749 memcpy(req->priv.target_port_id, &target->ioc_guid, 8);
750 memcpy(req->priv.target_port_id + 8, &target->id_ext, 8);
752 memcpy(req->priv.initiator_port_id,
753 &target->initiator_ext, 8);
754 memcpy(req->priv.initiator_port_id + 8,
755 &target->sgid.global.interface_id, 8);
756 memcpy(req->priv.target_port_id, &target->id_ext, 8);
757 memcpy(req->priv.target_port_id + 8, &target->ioc_guid, 8);
761 * Topspin/Cisco SRP targets will reject our login unless we
762 * zero out the first 8 bytes of our initiator port ID and set
763 * the second 8 bytes to the local node GUID.
765 if (srp_target_is_topspin(target)) {
766 shost_printk(KERN_DEBUG, target->scsi_host,
767 PFX "Topspin/Cisco initiator port ID workaround "
768 "activated for target GUID %016llx\n",
769 be64_to_cpu(target->ioc_guid));
770 memset(req->priv.initiator_port_id, 0, 8);
771 memcpy(req->priv.initiator_port_id + 8,
772 &target->srp_host->srp_dev->dev->node_guid, 8);
775 status = ib_send_cm_req(ch->cm_id, &req->param);
782 static bool srp_queue_remove_work(struct srp_target_port *target)
784 bool changed = false;
786 spin_lock_irq(&target->lock);
787 if (target->state != SRP_TARGET_REMOVED) {
788 target->state = SRP_TARGET_REMOVED;
791 spin_unlock_irq(&target->lock);
794 queue_work(srp_remove_wq, &target->remove_work);
799 static void srp_disconnect_target(struct srp_target_port *target)
801 struct srp_rdma_ch *ch;
804 /* XXX should send SRP_I_LOGOUT request */
806 for (i = 0; i < target->ch_count; i++) {
808 ch->connected = false;
809 if (ch->cm_id && ib_send_cm_dreq(ch->cm_id, NULL, 0)) {
810 shost_printk(KERN_DEBUG, target->scsi_host,
811 PFX "Sending CM DREQ failed\n");
816 static void srp_free_req_data(struct srp_target_port *target,
817 struct srp_rdma_ch *ch)
819 struct srp_device *dev = target->srp_host->srp_dev;
820 struct ib_device *ibdev = dev->dev;
821 struct srp_request *req;
827 for (i = 0; i < target->req_ring_size; ++i) {
828 req = &ch->req_ring[i];
829 if (dev->use_fast_reg) {
832 kfree(req->fmr_list);
833 kfree(req->map_page);
835 if (req->indirect_dma_addr) {
836 ib_dma_unmap_single(ibdev, req->indirect_dma_addr,
837 target->indirect_size,
840 kfree(req->indirect_desc);
847 static int srp_alloc_req_data(struct srp_rdma_ch *ch)
849 struct srp_target_port *target = ch->target;
850 struct srp_device *srp_dev = target->srp_host->srp_dev;
851 struct ib_device *ibdev = srp_dev->dev;
852 struct srp_request *req;
855 int i, ret = -ENOMEM;
857 ch->req_ring = kcalloc(target->req_ring_size, sizeof(*ch->req_ring),
862 for (i = 0; i < target->req_ring_size; ++i) {
863 req = &ch->req_ring[i];
864 mr_list = kmalloc(target->mr_per_cmd * sizeof(void *),
868 if (srp_dev->use_fast_reg) {
869 req->fr_list = mr_list;
871 req->fmr_list = mr_list;
872 req->map_page = kmalloc(srp_dev->max_pages_per_mr *
873 sizeof(void *), GFP_KERNEL);
877 req->indirect_desc = kmalloc(target->indirect_size, GFP_KERNEL);
878 if (!req->indirect_desc)
881 dma_addr = ib_dma_map_single(ibdev, req->indirect_desc,
882 target->indirect_size,
884 if (ib_dma_mapping_error(ibdev, dma_addr))
887 req->indirect_dma_addr = dma_addr;
896 * srp_del_scsi_host_attr() - Remove attributes defined in the host template.
897 * @shost: SCSI host whose attributes to remove from sysfs.
899 * Note: Any attributes defined in the host template and that did not exist
900 * before invocation of this function will be ignored.
902 static void srp_del_scsi_host_attr(struct Scsi_Host *shost)
904 struct device_attribute **attr;
906 for (attr = shost->hostt->shost_attrs; attr && *attr; ++attr)
907 device_remove_file(&shost->shost_dev, *attr);
910 static void srp_remove_target(struct srp_target_port *target)
912 struct srp_rdma_ch *ch;
915 WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED);
917 srp_del_scsi_host_attr(target->scsi_host);
918 srp_rport_get(target->rport);
919 srp_remove_host(target->scsi_host);
920 scsi_remove_host(target->scsi_host);
921 srp_stop_rport_timers(target->rport);
922 srp_disconnect_target(target);
923 for (i = 0; i < target->ch_count; i++) {
925 srp_free_ch_ib(target, ch);
927 cancel_work_sync(&target->tl_err_work);
928 srp_rport_put(target->rport);
929 for (i = 0; i < target->ch_count; i++) {
931 srp_free_req_data(target, ch);
936 spin_lock(&target->srp_host->target_lock);
937 list_del(&target->list);
938 spin_unlock(&target->srp_host->target_lock);
940 scsi_host_put(target->scsi_host);
943 static void srp_remove_work(struct work_struct *work)
945 struct srp_target_port *target =
946 container_of(work, struct srp_target_port, remove_work);
948 WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED);
950 srp_remove_target(target);
953 static void srp_rport_delete(struct srp_rport *rport)
955 struct srp_target_port *target = rport->lld_data;
957 srp_queue_remove_work(target);
961 * srp_connected_ch() - number of connected channels
962 * @target: SRP target port.
964 static int srp_connected_ch(struct srp_target_port *target)
968 for (i = 0; i < target->ch_count; i++)
969 c += target->ch[i].connected;
974 static int srp_connect_ch(struct srp_rdma_ch *ch, bool multich)
976 struct srp_target_port *target = ch->target;
979 WARN_ON_ONCE(!multich && srp_connected_ch(target) > 0);
981 ret = srp_lookup_path(ch);
986 init_completion(&ch->done);
987 ret = srp_send_req(ch, multich);
990 ret = wait_for_completion_interruptible(&ch->done);
995 * The CM event handling code will set status to
996 * SRP_PORT_REDIRECT if we get a port redirect REJ
997 * back, or SRP_DLID_REDIRECT if we get a lid/qp
1003 ch->connected = true;
1006 case SRP_PORT_REDIRECT:
1007 ret = srp_lookup_path(ch);
1012 case SRP_DLID_REDIRECT:
1015 case SRP_STALE_CONN:
1016 shost_printk(KERN_ERR, target->scsi_host, PFX
1017 "giving up on stale connection\n");
1027 return ret <= 0 ? ret : -ENODEV;
1030 static void srp_inv_rkey_err_done(struct ib_cq *cq, struct ib_wc *wc)
1032 srp_handle_qp_err(cq, wc, "INV RKEY");
1035 static int srp_inv_rkey(struct srp_request *req, struct srp_rdma_ch *ch,
1038 struct ib_send_wr *bad_wr;
1039 struct ib_send_wr wr = {
1040 .opcode = IB_WR_LOCAL_INV,
1044 .ex.invalidate_rkey = rkey,
1047 wr.wr_cqe = &req->reg_cqe;
1048 req->reg_cqe.done = srp_inv_rkey_err_done;
1049 return ib_post_send(ch->qp, &wr, &bad_wr);
1052 static void srp_unmap_data(struct scsi_cmnd *scmnd,
1053 struct srp_rdma_ch *ch,
1054 struct srp_request *req)
1056 struct srp_target_port *target = ch->target;
1057 struct srp_device *dev = target->srp_host->srp_dev;
1058 struct ib_device *ibdev = dev->dev;
1061 if (!scsi_sglist(scmnd) ||
1062 (scmnd->sc_data_direction != DMA_TO_DEVICE &&
1063 scmnd->sc_data_direction != DMA_FROM_DEVICE))
1066 if (dev->use_fast_reg) {
1067 struct srp_fr_desc **pfr;
1069 for (i = req->nmdesc, pfr = req->fr_list; i > 0; i--, pfr++) {
1070 res = srp_inv_rkey(req, ch, (*pfr)->mr->rkey);
1072 shost_printk(KERN_ERR, target->scsi_host, PFX
1073 "Queueing INV WR for rkey %#x failed (%d)\n",
1074 (*pfr)->mr->rkey, res);
1075 queue_work(system_long_wq,
1076 &target->tl_err_work);
1080 srp_fr_pool_put(ch->fr_pool, req->fr_list,
1082 } else if (dev->use_fmr) {
1083 struct ib_pool_fmr **pfmr;
1085 for (i = req->nmdesc, pfmr = req->fmr_list; i > 0; i--, pfmr++)
1086 ib_fmr_pool_unmap(*pfmr);
1089 ib_dma_unmap_sg(ibdev, scsi_sglist(scmnd), scsi_sg_count(scmnd),
1090 scmnd->sc_data_direction);
1094 * srp_claim_req - Take ownership of the scmnd associated with a request.
1095 * @ch: SRP RDMA channel.
1096 * @req: SRP request.
1097 * @sdev: If not NULL, only take ownership for this SCSI device.
1098 * @scmnd: If NULL, take ownership of @req->scmnd. If not NULL, only take
1099 * ownership of @req->scmnd if it equals @scmnd.
1102 * Either NULL or a pointer to the SCSI command the caller became owner of.
1104 static struct scsi_cmnd *srp_claim_req(struct srp_rdma_ch *ch,
1105 struct srp_request *req,
1106 struct scsi_device *sdev,
1107 struct scsi_cmnd *scmnd)
1109 unsigned long flags;
1111 spin_lock_irqsave(&ch->lock, flags);
1113 (!sdev || req->scmnd->device == sdev) &&
1114 (!scmnd || req->scmnd == scmnd)) {
1120 spin_unlock_irqrestore(&ch->lock, flags);
1126 * srp_free_req() - Unmap data and adjust ch->req_lim.
1127 * @ch: SRP RDMA channel.
1128 * @req: Request to be freed.
1129 * @scmnd: SCSI command associated with @req.
1130 * @req_lim_delta: Amount to be added to @target->req_lim.
1132 static void srp_free_req(struct srp_rdma_ch *ch, struct srp_request *req,
1133 struct scsi_cmnd *scmnd, s32 req_lim_delta)
1135 unsigned long flags;
1137 srp_unmap_data(scmnd, ch, req);
1139 spin_lock_irqsave(&ch->lock, flags);
1140 ch->req_lim += req_lim_delta;
1141 spin_unlock_irqrestore(&ch->lock, flags);
1144 static void srp_finish_req(struct srp_rdma_ch *ch, struct srp_request *req,
1145 struct scsi_device *sdev, int result)
1147 struct scsi_cmnd *scmnd = srp_claim_req(ch, req, sdev, NULL);
1150 srp_free_req(ch, req, scmnd, 0);
1151 scmnd->result = result;
1152 scmnd->scsi_done(scmnd);
1156 static void srp_terminate_io(struct srp_rport *rport)
1158 struct srp_target_port *target = rport->lld_data;
1159 struct srp_rdma_ch *ch;
1160 struct Scsi_Host *shost = target->scsi_host;
1161 struct scsi_device *sdev;
1165 * Invoking srp_terminate_io() while srp_queuecommand() is running
1166 * is not safe. Hence the warning statement below.
1168 shost_for_each_device(sdev, shost)
1169 WARN_ON_ONCE(sdev->request_queue->request_fn_active);
1171 for (i = 0; i < target->ch_count; i++) {
1172 ch = &target->ch[i];
1174 for (j = 0; j < target->req_ring_size; ++j) {
1175 struct srp_request *req = &ch->req_ring[j];
1177 srp_finish_req(ch, req, NULL,
1178 DID_TRANSPORT_FAILFAST << 16);
1184 * It is up to the caller to ensure that srp_rport_reconnect() calls are
1185 * serialized and that no concurrent srp_queuecommand(), srp_abort(),
1186 * srp_reset_device() or srp_reset_host() calls will occur while this function
1187 * is in progress. One way to realize that is not to call this function
1188 * directly but to call srp_reconnect_rport() instead since that last function
1189 * serializes calls of this function via rport->mutex and also blocks
1190 * srp_queuecommand() calls before invoking this function.
1192 static int srp_rport_reconnect(struct srp_rport *rport)
1194 struct srp_target_port *target = rport->lld_data;
1195 struct srp_rdma_ch *ch;
1197 bool multich = false;
1199 srp_disconnect_target(target);
1201 if (target->state == SRP_TARGET_SCANNING)
1205 * Now get a new local CM ID so that we avoid confusing the target in
1206 * case things are really fouled up. Doing so also ensures that all CM
1207 * callbacks will have finished before a new QP is allocated.
1209 for (i = 0; i < target->ch_count; i++) {
1210 ch = &target->ch[i];
1211 ret += srp_new_cm_id(ch);
1213 for (i = 0; i < target->ch_count; i++) {
1214 ch = &target->ch[i];
1215 for (j = 0; j < target->req_ring_size; ++j) {
1216 struct srp_request *req = &ch->req_ring[j];
1218 srp_finish_req(ch, req, NULL, DID_RESET << 16);
1221 for (i = 0; i < target->ch_count; i++) {
1222 ch = &target->ch[i];
1224 * Whether or not creating a new CM ID succeeded, create a new
1225 * QP. This guarantees that all completion callback function
1226 * invocations have finished before request resetting starts.
1228 ret += srp_create_ch_ib(ch);
1230 INIT_LIST_HEAD(&ch->free_tx);
1231 for (j = 0; j < target->queue_size; ++j)
1232 list_add(&ch->tx_ring[j]->list, &ch->free_tx);
1235 target->qp_in_error = false;
1237 for (i = 0; i < target->ch_count; i++) {
1238 ch = &target->ch[i];
1241 ret = srp_connect_ch(ch, multich);
1246 shost_printk(KERN_INFO, target->scsi_host,
1247 PFX "reconnect succeeded\n");
1252 static void srp_map_desc(struct srp_map_state *state, dma_addr_t dma_addr,
1253 unsigned int dma_len, u32 rkey)
1255 struct srp_direct_buf *desc = state->desc;
1257 WARN_ON_ONCE(!dma_len);
1259 desc->va = cpu_to_be64(dma_addr);
1260 desc->key = cpu_to_be32(rkey);
1261 desc->len = cpu_to_be32(dma_len);
1263 state->total_len += dma_len;
1268 static int srp_map_finish_fmr(struct srp_map_state *state,
1269 struct srp_rdma_ch *ch)
1271 struct srp_target_port *target = ch->target;
1272 struct srp_device *dev = target->srp_host->srp_dev;
1273 struct ib_pd *pd = target->pd;
1274 struct ib_pool_fmr *fmr;
1277 if (state->fmr.next >= state->fmr.end) {
1278 shost_printk(KERN_ERR, ch->target->scsi_host,
1279 PFX "Out of MRs (mr_per_cmd = %d)\n",
1280 ch->target->mr_per_cmd);
1284 WARN_ON_ONCE(!dev->use_fmr);
1286 if (state->npages == 0)
1289 if (state->npages == 1 && (pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY)) {
1290 srp_map_desc(state, state->base_dma_addr, state->dma_len,
1291 pd->unsafe_global_rkey);
1295 fmr = ib_fmr_pool_map_phys(ch->fmr_pool, state->pages,
1296 state->npages, io_addr);
1298 return PTR_ERR(fmr);
1300 *state->fmr.next++ = fmr;
1303 srp_map_desc(state, state->base_dma_addr & ~dev->mr_page_mask,
1304 state->dma_len, fmr->fmr->rkey);
1313 static void srp_reg_mr_err_done(struct ib_cq *cq, struct ib_wc *wc)
1315 srp_handle_qp_err(cq, wc, "FAST REG");
1319 * Map up to sg_nents elements of state->sg where *sg_offset_p is the offset
1320 * where to start in the first element. If sg_offset_p != NULL then
1321 * *sg_offset_p is updated to the offset in state->sg[retval] of the first
1322 * byte that has not yet been mapped.
1324 static int srp_map_finish_fr(struct srp_map_state *state,
1325 struct srp_request *req,
1326 struct srp_rdma_ch *ch, int sg_nents,
1327 unsigned int *sg_offset_p)
1329 struct srp_target_port *target = ch->target;
1330 struct srp_device *dev = target->srp_host->srp_dev;
1331 struct ib_pd *pd = target->pd;
1332 struct ib_send_wr *bad_wr;
1333 struct ib_reg_wr wr;
1334 struct srp_fr_desc *desc;
1338 if (state->fr.next >= state->fr.end) {
1339 shost_printk(KERN_ERR, ch->target->scsi_host,
1340 PFX "Out of MRs (mr_per_cmd = %d)\n",
1341 ch->target->mr_per_cmd);
1345 WARN_ON_ONCE(!dev->use_fast_reg);
1347 if (sg_nents == 1 && (pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY)) {
1348 unsigned int sg_offset = sg_offset_p ? *sg_offset_p : 0;
1350 srp_map_desc(state, sg_dma_address(state->sg) + sg_offset,
1351 sg_dma_len(state->sg) - sg_offset,
1352 pd->unsafe_global_rkey);
1358 desc = srp_fr_pool_get(ch->fr_pool);
1362 rkey = ib_inc_rkey(desc->mr->rkey);
1363 ib_update_fast_reg_key(desc->mr, rkey);
1365 n = ib_map_mr_sg(desc->mr, state->sg, sg_nents, sg_offset_p,
1367 if (unlikely(n < 0)) {
1368 srp_fr_pool_put(ch->fr_pool, &desc, 1);
1369 pr_debug("%s: ib_map_mr_sg(%d, %d) returned %d.\n",
1370 dev_name(&req->scmnd->device->sdev_gendev), sg_nents,
1371 sg_offset_p ? *sg_offset_p : -1, n);
1375 WARN_ON_ONCE(desc->mr->length == 0);
1377 req->reg_cqe.done = srp_reg_mr_err_done;
1380 wr.wr.opcode = IB_WR_REG_MR;
1381 wr.wr.wr_cqe = &req->reg_cqe;
1383 wr.wr.send_flags = 0;
1385 wr.key = desc->mr->rkey;
1386 wr.access = (IB_ACCESS_LOCAL_WRITE |
1387 IB_ACCESS_REMOTE_READ |
1388 IB_ACCESS_REMOTE_WRITE);
1390 *state->fr.next++ = desc;
1393 srp_map_desc(state, desc->mr->iova,
1394 desc->mr->length, desc->mr->rkey);
1396 err = ib_post_send(ch->qp, &wr.wr, &bad_wr);
1397 if (unlikely(err)) {
1398 WARN_ON_ONCE(err == -ENOMEM);
1405 static int srp_map_sg_entry(struct srp_map_state *state,
1406 struct srp_rdma_ch *ch,
1407 struct scatterlist *sg)
1409 struct srp_target_port *target = ch->target;
1410 struct srp_device *dev = target->srp_host->srp_dev;
1411 struct ib_device *ibdev = dev->dev;
1412 dma_addr_t dma_addr = ib_sg_dma_address(ibdev, sg);
1413 unsigned int dma_len = ib_sg_dma_len(ibdev, sg);
1414 unsigned int len = 0;
1417 WARN_ON_ONCE(!dma_len);
1420 unsigned offset = dma_addr & ~dev->mr_page_mask;
1422 if (state->npages == dev->max_pages_per_mr ||
1423 (state->npages > 0 && offset != 0)) {
1424 ret = srp_map_finish_fmr(state, ch);
1429 len = min_t(unsigned int, dma_len, dev->mr_page_size - offset);
1432 state->base_dma_addr = dma_addr;
1433 state->pages[state->npages++] = dma_addr & dev->mr_page_mask;
1434 state->dma_len += len;
1440 * If the end of the MR is not on a page boundary then we need to
1441 * close it out and start a new one -- we can only merge at page
1445 if ((dma_addr & ~dev->mr_page_mask) != 0)
1446 ret = srp_map_finish_fmr(state, ch);
1450 static int srp_map_sg_fmr(struct srp_map_state *state, struct srp_rdma_ch *ch,
1451 struct srp_request *req, struct scatterlist *scat,
1454 struct scatterlist *sg;
1457 state->pages = req->map_page;
1458 state->fmr.next = req->fmr_list;
1459 state->fmr.end = req->fmr_list + ch->target->mr_per_cmd;
1461 for_each_sg(scat, sg, count, i) {
1462 ret = srp_map_sg_entry(state, ch, sg);
1467 ret = srp_map_finish_fmr(state, ch);
1474 static int srp_map_sg_fr(struct srp_map_state *state, struct srp_rdma_ch *ch,
1475 struct srp_request *req, struct scatterlist *scat,
1478 unsigned int sg_offset = 0;
1480 state->fr.next = req->fr_list;
1481 state->fr.end = req->fr_list + ch->target->mr_per_cmd;
1490 n = srp_map_finish_fr(state, req, ch, count, &sg_offset);
1491 if (unlikely(n < 0))
1495 for (i = 0; i < n; i++)
1496 state->sg = sg_next(state->sg);
1502 static int srp_map_sg_dma(struct srp_map_state *state, struct srp_rdma_ch *ch,
1503 struct srp_request *req, struct scatterlist *scat,
1506 struct srp_target_port *target = ch->target;
1507 struct srp_device *dev = target->srp_host->srp_dev;
1508 struct scatterlist *sg;
1511 for_each_sg(scat, sg, count, i) {
1512 srp_map_desc(state, ib_sg_dma_address(dev->dev, sg),
1513 ib_sg_dma_len(dev->dev, sg),
1514 target->pd->unsafe_global_rkey);
1521 * Register the indirect data buffer descriptor with the HCA.
1523 * Note: since the indirect data buffer descriptor has been allocated with
1524 * kmalloc() it is guaranteed that this buffer is a physically contiguous
1527 static int srp_map_idb(struct srp_rdma_ch *ch, struct srp_request *req,
1528 void **next_mr, void **end_mr, u32 idb_len,
1531 struct srp_target_port *target = ch->target;
1532 struct srp_device *dev = target->srp_host->srp_dev;
1533 struct srp_map_state state;
1534 struct srp_direct_buf idb_desc;
1536 struct scatterlist idb_sg[1];
1539 memset(&state, 0, sizeof(state));
1540 memset(&idb_desc, 0, sizeof(idb_desc));
1541 state.gen.next = next_mr;
1542 state.gen.end = end_mr;
1543 state.desc = &idb_desc;
1544 state.base_dma_addr = req->indirect_dma_addr;
1545 state.dma_len = idb_len;
1547 if (dev->use_fast_reg) {
1549 sg_init_one(idb_sg, req->indirect_desc, idb_len);
1550 idb_sg->dma_address = req->indirect_dma_addr; /* hack! */
1551 #ifdef CONFIG_NEED_SG_DMA_LENGTH
1552 idb_sg->dma_length = idb_sg->length; /* hack^2 */
1554 ret = srp_map_finish_fr(&state, req, ch, 1, NULL);
1557 WARN_ON_ONCE(ret < 1);
1558 } else if (dev->use_fmr) {
1559 state.pages = idb_pages;
1560 state.pages[0] = (req->indirect_dma_addr &
1563 ret = srp_map_finish_fmr(&state, ch);
1570 *idb_rkey = idb_desc.key;
1575 static void srp_check_mapping(struct srp_map_state *state,
1576 struct srp_rdma_ch *ch, struct srp_request *req,
1577 struct scatterlist *scat, int count)
1579 struct srp_device *dev = ch->target->srp_host->srp_dev;
1580 struct srp_fr_desc **pfr;
1581 u64 desc_len = 0, mr_len = 0;
1584 for (i = 0; i < state->ndesc; i++)
1585 desc_len += be32_to_cpu(req->indirect_desc[i].len);
1586 if (dev->use_fast_reg)
1587 for (i = 0, pfr = req->fr_list; i < state->nmdesc; i++, pfr++)
1588 mr_len += (*pfr)->mr->length;
1589 else if (dev->use_fmr)
1590 for (i = 0; i < state->nmdesc; i++)
1591 mr_len += be32_to_cpu(req->indirect_desc[i].len);
1592 if (desc_len != scsi_bufflen(req->scmnd) ||
1593 mr_len > scsi_bufflen(req->scmnd))
1594 pr_err("Inconsistent: scsi len %d <> desc len %lld <> mr len %lld; ndesc %d; nmdesc = %d\n",
1595 scsi_bufflen(req->scmnd), desc_len, mr_len,
1596 state->ndesc, state->nmdesc);
1600 * srp_map_data() - map SCSI data buffer onto an SRP request
1601 * @scmnd: SCSI command to map
1602 * @ch: SRP RDMA channel
1605 * Returns the length in bytes of the SRP_CMD IU or a negative value if
1608 static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch,
1609 struct srp_request *req)
1611 struct srp_target_port *target = ch->target;
1612 struct ib_pd *pd = target->pd;
1613 struct scatterlist *scat;
1614 struct srp_cmd *cmd = req->cmd->buf;
1615 int len, nents, count, ret;
1616 struct srp_device *dev;
1617 struct ib_device *ibdev;
1618 struct srp_map_state state;
1619 struct srp_indirect_buf *indirect_hdr;
1620 u32 idb_len, table_len;
1624 if (!scsi_sglist(scmnd) || scmnd->sc_data_direction == DMA_NONE)
1625 return sizeof (struct srp_cmd);
1627 if (scmnd->sc_data_direction != DMA_FROM_DEVICE &&
1628 scmnd->sc_data_direction != DMA_TO_DEVICE) {
1629 shost_printk(KERN_WARNING, target->scsi_host,
1630 PFX "Unhandled data direction %d\n",
1631 scmnd->sc_data_direction);
1635 nents = scsi_sg_count(scmnd);
1636 scat = scsi_sglist(scmnd);
1638 dev = target->srp_host->srp_dev;
1641 count = ib_dma_map_sg(ibdev, scat, nents, scmnd->sc_data_direction);
1642 if (unlikely(count == 0))
1645 fmt = SRP_DATA_DESC_DIRECT;
1646 len = sizeof (struct srp_cmd) + sizeof (struct srp_direct_buf);
1648 if (count == 1 && (pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY)) {
1650 * The midlayer only generated a single gather/scatter
1651 * entry, or DMA mapping coalesced everything to a
1652 * single entry. So a direct descriptor along with
1653 * the DMA MR suffices.
1655 struct srp_direct_buf *buf = (void *) cmd->add_data;
1657 buf->va = cpu_to_be64(ib_sg_dma_address(ibdev, scat));
1658 buf->key = cpu_to_be32(pd->unsafe_global_rkey);
1659 buf->len = cpu_to_be32(ib_sg_dma_len(ibdev, scat));
1666 * We have more than one scatter/gather entry, so build our indirect
1667 * descriptor table, trying to merge as many entries as we can.
1669 indirect_hdr = (void *) cmd->add_data;
1671 ib_dma_sync_single_for_cpu(ibdev, req->indirect_dma_addr,
1672 target->indirect_size, DMA_TO_DEVICE);
1674 memset(&state, 0, sizeof(state));
1675 state.desc = req->indirect_desc;
1676 if (dev->use_fast_reg)
1677 ret = srp_map_sg_fr(&state, ch, req, scat, count);
1678 else if (dev->use_fmr)
1679 ret = srp_map_sg_fmr(&state, ch, req, scat, count);
1681 ret = srp_map_sg_dma(&state, ch, req, scat, count);
1682 req->nmdesc = state.nmdesc;
1687 DEFINE_DYNAMIC_DEBUG_METADATA(ddm,
1688 "Memory mapping consistency check");
1689 if (DYNAMIC_DEBUG_BRANCH(ddm))
1690 srp_check_mapping(&state, ch, req, scat, count);
1693 /* We've mapped the request, now pull as much of the indirect
1694 * descriptor table as we can into the command buffer. If this
1695 * target is not using an external indirect table, we are
1696 * guaranteed to fit into the command, as the SCSI layer won't
1697 * give us more S/G entries than we allow.
1699 if (state.ndesc == 1) {
1701 * Memory registration collapsed the sg-list into one entry,
1702 * so use a direct descriptor.
1704 struct srp_direct_buf *buf = (void *) cmd->add_data;
1706 *buf = req->indirect_desc[0];
1710 if (unlikely(target->cmd_sg_cnt < state.ndesc &&
1711 !target->allow_ext_sg)) {
1712 shost_printk(KERN_ERR, target->scsi_host,
1713 "Could not fit S/G list into SRP_CMD\n");
1718 count = min(state.ndesc, target->cmd_sg_cnt);
1719 table_len = state.ndesc * sizeof (struct srp_direct_buf);
1720 idb_len = sizeof(struct srp_indirect_buf) + table_len;
1722 fmt = SRP_DATA_DESC_INDIRECT;
1723 len = sizeof(struct srp_cmd) + sizeof (struct srp_indirect_buf);
1724 len += count * sizeof (struct srp_direct_buf);
1726 memcpy(indirect_hdr->desc_list, req->indirect_desc,
1727 count * sizeof (struct srp_direct_buf));
1729 if (!(pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY)) {
1730 ret = srp_map_idb(ch, req, state.gen.next, state.gen.end,
1731 idb_len, &idb_rkey);
1736 idb_rkey = cpu_to_be32(pd->unsafe_global_rkey);
1739 indirect_hdr->table_desc.va = cpu_to_be64(req->indirect_dma_addr);
1740 indirect_hdr->table_desc.key = idb_rkey;
1741 indirect_hdr->table_desc.len = cpu_to_be32(table_len);
1742 indirect_hdr->len = cpu_to_be32(state.total_len);
1744 if (scmnd->sc_data_direction == DMA_TO_DEVICE)
1745 cmd->data_out_desc_cnt = count;
1747 cmd->data_in_desc_cnt = count;
1749 ib_dma_sync_single_for_device(ibdev, req->indirect_dma_addr, table_len,
1753 if (scmnd->sc_data_direction == DMA_TO_DEVICE)
1754 cmd->buf_fmt = fmt << 4;
1761 srp_unmap_data(scmnd, ch, req);
1762 if (ret == -ENOMEM && req->nmdesc >= target->mr_pool_size)
1768 * Return an IU and possible credit to the free pool
1770 static void srp_put_tx_iu(struct srp_rdma_ch *ch, struct srp_iu *iu,
1771 enum srp_iu_type iu_type)
1773 unsigned long flags;
1775 spin_lock_irqsave(&ch->lock, flags);
1776 list_add(&iu->list, &ch->free_tx);
1777 if (iu_type != SRP_IU_RSP)
1779 spin_unlock_irqrestore(&ch->lock, flags);
1783 * Must be called with ch->lock held to protect req_lim and free_tx.
1784 * If IU is not sent, it must be returned using srp_put_tx_iu().
1787 * An upper limit for the number of allocated information units for each
1789 * - SRP_IU_CMD: SRP_CMD_SQ_SIZE, since the SCSI mid-layer never queues
1790 * more than Scsi_Host.can_queue requests.
1791 * - SRP_IU_TSK_MGMT: SRP_TSK_MGMT_SQ_SIZE.
1792 * - SRP_IU_RSP: 1, since a conforming SRP target never sends more than
1793 * one unanswered SRP request to an initiator.
1795 static struct srp_iu *__srp_get_tx_iu(struct srp_rdma_ch *ch,
1796 enum srp_iu_type iu_type)
1798 struct srp_target_port *target = ch->target;
1799 s32 rsv = (iu_type == SRP_IU_TSK_MGMT) ? 0 : SRP_TSK_MGMT_SQ_SIZE;
1802 ib_process_cq_direct(ch->send_cq, -1);
1804 if (list_empty(&ch->free_tx))
1807 /* Initiator responses to target requests do not consume credits */
1808 if (iu_type != SRP_IU_RSP) {
1809 if (ch->req_lim <= rsv) {
1810 ++target->zero_req_lim;
1817 iu = list_first_entry(&ch->free_tx, struct srp_iu, list);
1818 list_del(&iu->list);
1822 static void srp_send_done(struct ib_cq *cq, struct ib_wc *wc)
1824 struct srp_iu *iu = container_of(wc->wr_cqe, struct srp_iu, cqe);
1825 struct srp_rdma_ch *ch = cq->cq_context;
1827 if (unlikely(wc->status != IB_WC_SUCCESS)) {
1828 srp_handle_qp_err(cq, wc, "SEND");
1832 list_add(&iu->list, &ch->free_tx);
1835 static int srp_post_send(struct srp_rdma_ch *ch, struct srp_iu *iu, int len)
1837 struct srp_target_port *target = ch->target;
1839 struct ib_send_wr wr, *bad_wr;
1841 list.addr = iu->dma;
1843 list.lkey = target->lkey;
1845 iu->cqe.done = srp_send_done;
1848 wr.wr_cqe = &iu->cqe;
1851 wr.opcode = IB_WR_SEND;
1852 wr.send_flags = IB_SEND_SIGNALED;
1854 return ib_post_send(ch->qp, &wr, &bad_wr);
1857 static int srp_post_recv(struct srp_rdma_ch *ch, struct srp_iu *iu)
1859 struct srp_target_port *target = ch->target;
1860 struct ib_recv_wr wr, *bad_wr;
1863 list.addr = iu->dma;
1864 list.length = iu->size;
1865 list.lkey = target->lkey;
1867 iu->cqe.done = srp_recv_done;
1870 wr.wr_cqe = &iu->cqe;
1874 return ib_post_recv(ch->qp, &wr, &bad_wr);
1877 static void srp_process_rsp(struct srp_rdma_ch *ch, struct srp_rsp *rsp)
1879 struct srp_target_port *target = ch->target;
1880 struct srp_request *req;
1881 struct scsi_cmnd *scmnd;
1882 unsigned long flags;
1884 if (unlikely(rsp->tag & SRP_TAG_TSK_MGMT)) {
1885 spin_lock_irqsave(&ch->lock, flags);
1886 ch->req_lim += be32_to_cpu(rsp->req_lim_delta);
1887 if (rsp->tag == ch->tsk_mgmt_tag) {
1888 ch->tsk_mgmt_status = -1;
1889 if (be32_to_cpu(rsp->resp_data_len) >= 4)
1890 ch->tsk_mgmt_status = rsp->data[3];
1891 complete(&ch->tsk_mgmt_done);
1893 shost_printk(KERN_ERR, target->scsi_host,
1894 "Received tsk mgmt response too late for tag %#llx\n",
1897 spin_unlock_irqrestore(&ch->lock, flags);
1899 scmnd = scsi_host_find_tag(target->scsi_host, rsp->tag);
1900 if (scmnd && scmnd->host_scribble) {
1901 req = (void *)scmnd->host_scribble;
1902 scmnd = srp_claim_req(ch, req, NULL, scmnd);
1907 shost_printk(KERN_ERR, target->scsi_host,
1908 "Null scmnd for RSP w/tag %#016llx received on ch %td / QP %#x\n",
1909 rsp->tag, ch - target->ch, ch->qp->qp_num);
1911 spin_lock_irqsave(&ch->lock, flags);
1912 ch->req_lim += be32_to_cpu(rsp->req_lim_delta);
1913 spin_unlock_irqrestore(&ch->lock, flags);
1917 scmnd->result = rsp->status;
1919 if (rsp->flags & SRP_RSP_FLAG_SNSVALID) {
1920 memcpy(scmnd->sense_buffer, rsp->data +
1921 be32_to_cpu(rsp->resp_data_len),
1922 min_t(int, be32_to_cpu(rsp->sense_data_len),
1923 SCSI_SENSE_BUFFERSIZE));
1926 if (unlikely(rsp->flags & SRP_RSP_FLAG_DIUNDER))
1927 scsi_set_resid(scmnd, be32_to_cpu(rsp->data_in_res_cnt));
1928 else if (unlikely(rsp->flags & SRP_RSP_FLAG_DIOVER))
1929 scsi_set_resid(scmnd, -be32_to_cpu(rsp->data_in_res_cnt));
1930 else if (unlikely(rsp->flags & SRP_RSP_FLAG_DOUNDER))
1931 scsi_set_resid(scmnd, be32_to_cpu(rsp->data_out_res_cnt));
1932 else if (unlikely(rsp->flags & SRP_RSP_FLAG_DOOVER))
1933 scsi_set_resid(scmnd, -be32_to_cpu(rsp->data_out_res_cnt));
1935 srp_free_req(ch, req, scmnd,
1936 be32_to_cpu(rsp->req_lim_delta));
1938 scmnd->host_scribble = NULL;
1939 scmnd->scsi_done(scmnd);
1943 static int srp_response_common(struct srp_rdma_ch *ch, s32 req_delta,
1946 struct srp_target_port *target = ch->target;
1947 struct ib_device *dev = target->srp_host->srp_dev->dev;
1948 unsigned long flags;
1952 spin_lock_irqsave(&ch->lock, flags);
1953 ch->req_lim += req_delta;
1954 iu = __srp_get_tx_iu(ch, SRP_IU_RSP);
1955 spin_unlock_irqrestore(&ch->lock, flags);
1958 shost_printk(KERN_ERR, target->scsi_host, PFX
1959 "no IU available to send response\n");
1963 ib_dma_sync_single_for_cpu(dev, iu->dma, len, DMA_TO_DEVICE);
1964 memcpy(iu->buf, rsp, len);
1965 ib_dma_sync_single_for_device(dev, iu->dma, len, DMA_TO_DEVICE);
1967 err = srp_post_send(ch, iu, len);
1969 shost_printk(KERN_ERR, target->scsi_host, PFX
1970 "unable to post response: %d\n", err);
1971 srp_put_tx_iu(ch, iu, SRP_IU_RSP);
1977 static void srp_process_cred_req(struct srp_rdma_ch *ch,
1978 struct srp_cred_req *req)
1980 struct srp_cred_rsp rsp = {
1981 .opcode = SRP_CRED_RSP,
1984 s32 delta = be32_to_cpu(req->req_lim_delta);
1986 if (srp_response_common(ch, delta, &rsp, sizeof(rsp)))
1987 shost_printk(KERN_ERR, ch->target->scsi_host, PFX
1988 "problems processing SRP_CRED_REQ\n");
1991 static void srp_process_aer_req(struct srp_rdma_ch *ch,
1992 struct srp_aer_req *req)
1994 struct srp_target_port *target = ch->target;
1995 struct srp_aer_rsp rsp = {
1996 .opcode = SRP_AER_RSP,
1999 s32 delta = be32_to_cpu(req->req_lim_delta);
2001 shost_printk(KERN_ERR, target->scsi_host, PFX
2002 "ignoring AER for LUN %llu\n", scsilun_to_int(&req->lun));
2004 if (srp_response_common(ch, delta, &rsp, sizeof(rsp)))
2005 shost_printk(KERN_ERR, target->scsi_host, PFX
2006 "problems processing SRP_AER_REQ\n");
2009 static void srp_recv_done(struct ib_cq *cq, struct ib_wc *wc)
2011 struct srp_iu *iu = container_of(wc->wr_cqe, struct srp_iu, cqe);
2012 struct srp_rdma_ch *ch = cq->cq_context;
2013 struct srp_target_port *target = ch->target;
2014 struct ib_device *dev = target->srp_host->srp_dev->dev;
2018 if (unlikely(wc->status != IB_WC_SUCCESS)) {
2019 srp_handle_qp_err(cq, wc, "RECV");
2023 ib_dma_sync_single_for_cpu(dev, iu->dma, ch->max_ti_iu_len,
2026 opcode = *(u8 *) iu->buf;
2029 shost_printk(KERN_ERR, target->scsi_host,
2030 PFX "recv completion, opcode 0x%02x\n", opcode);
2031 print_hex_dump(KERN_ERR, "", DUMP_PREFIX_OFFSET, 8, 1,
2032 iu->buf, wc->byte_len, true);
2037 srp_process_rsp(ch, iu->buf);
2041 srp_process_cred_req(ch, iu->buf);
2045 srp_process_aer_req(ch, iu->buf);
2049 /* XXX Handle target logout */
2050 shost_printk(KERN_WARNING, target->scsi_host,
2051 PFX "Got target logout request\n");
2055 shost_printk(KERN_WARNING, target->scsi_host,
2056 PFX "Unhandled SRP opcode 0x%02x\n", opcode);
2060 ib_dma_sync_single_for_device(dev, iu->dma, ch->max_ti_iu_len,
2063 res = srp_post_recv(ch, iu);
2065 shost_printk(KERN_ERR, target->scsi_host,
2066 PFX "Recv failed with error code %d\n", res);
2070 * srp_tl_err_work() - handle a transport layer error
2071 * @work: Work structure embedded in an SRP target port.
2073 * Note: This function may get invoked before the rport has been created,
2074 * hence the target->rport test.
2076 static void srp_tl_err_work(struct work_struct *work)
2078 struct srp_target_port *target;
2080 target = container_of(work, struct srp_target_port, tl_err_work);
2082 srp_start_tl_fail_timers(target->rport);
2085 static void srp_handle_qp_err(struct ib_cq *cq, struct ib_wc *wc,
2088 struct srp_rdma_ch *ch = cq->cq_context;
2089 struct srp_target_port *target = ch->target;
2091 if (ch->connected && !target->qp_in_error) {
2092 shost_printk(KERN_ERR, target->scsi_host,
2093 PFX "failed %s status %s (%d) for CQE %p\n",
2094 opname, ib_wc_status_msg(wc->status), wc->status,
2096 queue_work(system_long_wq, &target->tl_err_work);
2098 target->qp_in_error = true;
2101 static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd)
2103 struct srp_target_port *target = host_to_target(shost);
2104 struct srp_rport *rport = target->rport;
2105 struct srp_rdma_ch *ch;
2106 struct srp_request *req;
2108 struct srp_cmd *cmd;
2109 struct ib_device *dev;
2110 unsigned long flags;
2114 const bool in_scsi_eh = !in_interrupt() && current == shost->ehandler;
2117 * The SCSI EH thread is the only context from which srp_queuecommand()
2118 * can get invoked for blocked devices (SDEV_BLOCK /
2119 * SDEV_CREATED_BLOCK). Avoid racing with srp_reconnect_rport() by
2120 * locking the rport mutex if invoked from inside the SCSI EH.
2123 mutex_lock(&rport->mutex);
2125 scmnd->result = srp_chkready(target->rport);
2126 if (unlikely(scmnd->result))
2129 WARN_ON_ONCE(scmnd->request->tag < 0);
2130 tag = blk_mq_unique_tag(scmnd->request);
2131 ch = &target->ch[blk_mq_unique_tag_to_hwq(tag)];
2132 idx = blk_mq_unique_tag_to_tag(tag);
2133 WARN_ONCE(idx >= target->req_ring_size, "%s: tag %#x: idx %d >= %d\n",
2134 dev_name(&shost->shost_gendev), tag, idx,
2135 target->req_ring_size);
2137 spin_lock_irqsave(&ch->lock, flags);
2138 iu = __srp_get_tx_iu(ch, SRP_IU_CMD);
2139 spin_unlock_irqrestore(&ch->lock, flags);
2144 req = &ch->req_ring[idx];
2145 dev = target->srp_host->srp_dev->dev;
2146 ib_dma_sync_single_for_cpu(dev, iu->dma, target->max_iu_len,
2149 scmnd->host_scribble = (void *) req;
2152 memset(cmd, 0, sizeof *cmd);
2154 cmd->opcode = SRP_CMD;
2155 int_to_scsilun(scmnd->device->lun, &cmd->lun);
2157 memcpy(cmd->cdb, scmnd->cmnd, scmnd->cmd_len);
2162 len = srp_map_data(scmnd, ch, req);
2164 shost_printk(KERN_ERR, target->scsi_host,
2165 PFX "Failed to map data (%d)\n", len);
2167 * If we ran out of memory descriptors (-ENOMEM) because an
2168 * application is queuing many requests with more than
2169 * max_pages_per_mr sg-list elements, tell the SCSI mid-layer
2170 * to reduce queue depth temporarily.
2172 scmnd->result = len == -ENOMEM ?
2173 DID_OK << 16 | QUEUE_FULL << 1 : DID_ERROR << 16;
2177 ib_dma_sync_single_for_device(dev, iu->dma, target->max_iu_len,
2180 if (srp_post_send(ch, iu, len)) {
2181 shost_printk(KERN_ERR, target->scsi_host, PFX "Send failed\n");
2189 mutex_unlock(&rport->mutex);
2194 srp_unmap_data(scmnd, ch, req);
2197 srp_put_tx_iu(ch, iu, SRP_IU_CMD);
2200 * Avoid that the loops that iterate over the request ring can
2201 * encounter a dangling SCSI command pointer.
2206 if (scmnd->result) {
2207 scmnd->scsi_done(scmnd);
2210 ret = SCSI_MLQUEUE_HOST_BUSY;
2217 * Note: the resources allocated in this function are freed in
2220 static int srp_alloc_iu_bufs(struct srp_rdma_ch *ch)
2222 struct srp_target_port *target = ch->target;
2225 ch->rx_ring = kcalloc(target->queue_size, sizeof(*ch->rx_ring),
2229 ch->tx_ring = kcalloc(target->queue_size, sizeof(*ch->tx_ring),
2234 for (i = 0; i < target->queue_size; ++i) {
2235 ch->rx_ring[i] = srp_alloc_iu(target->srp_host,
2237 GFP_KERNEL, DMA_FROM_DEVICE);
2238 if (!ch->rx_ring[i])
2242 for (i = 0; i < target->queue_size; ++i) {
2243 ch->tx_ring[i] = srp_alloc_iu(target->srp_host,
2245 GFP_KERNEL, DMA_TO_DEVICE);
2246 if (!ch->tx_ring[i])
2249 list_add(&ch->tx_ring[i]->list, &ch->free_tx);
2255 for (i = 0; i < target->queue_size; ++i) {
2256 srp_free_iu(target->srp_host, ch->rx_ring[i]);
2257 srp_free_iu(target->srp_host, ch->tx_ring[i]);
2270 static uint32_t srp_compute_rq_tmo(struct ib_qp_attr *qp_attr, int attr_mask)
2272 uint64_t T_tr_ns, max_compl_time_ms;
2273 uint32_t rq_tmo_jiffies;
2276 * According to section 11.2.4.2 in the IBTA spec (Modify Queue Pair,
2277 * table 91), both the QP timeout and the retry count have to be set
2278 * for RC QP's during the RTR to RTS transition.
2280 WARN_ON_ONCE((attr_mask & (IB_QP_TIMEOUT | IB_QP_RETRY_CNT)) !=
2281 (IB_QP_TIMEOUT | IB_QP_RETRY_CNT));
2284 * Set target->rq_tmo_jiffies to one second more than the largest time
2285 * it can take before an error completion is generated. See also
2286 * C9-140..142 in the IBTA spec for more information about how to
2287 * convert the QP Local ACK Timeout value to nanoseconds.
2289 T_tr_ns = 4096 * (1ULL << qp_attr->timeout);
2290 max_compl_time_ms = qp_attr->retry_cnt * 4 * T_tr_ns;
2291 do_div(max_compl_time_ms, NSEC_PER_MSEC);
2292 rq_tmo_jiffies = msecs_to_jiffies(max_compl_time_ms + 1000);
2294 return rq_tmo_jiffies;
2297 static void srp_cm_rep_handler(struct ib_cm_id *cm_id,
2298 const struct srp_login_rsp *lrsp,
2299 struct srp_rdma_ch *ch)
2301 struct srp_target_port *target = ch->target;
2302 struct ib_qp_attr *qp_attr = NULL;
2307 if (lrsp->opcode == SRP_LOGIN_RSP) {
2308 ch->max_ti_iu_len = be32_to_cpu(lrsp->max_ti_iu_len);
2309 ch->req_lim = be32_to_cpu(lrsp->req_lim_delta);
2312 * Reserve credits for task management so we don't
2313 * bounce requests back to the SCSI mid-layer.
2315 target->scsi_host->can_queue
2316 = min(ch->req_lim - SRP_TSK_MGMT_SQ_SIZE,
2317 target->scsi_host->can_queue);
2318 target->scsi_host->cmd_per_lun
2319 = min_t(int, target->scsi_host->can_queue,
2320 target->scsi_host->cmd_per_lun);
2322 shost_printk(KERN_WARNING, target->scsi_host,
2323 PFX "Unhandled RSP opcode %#x\n", lrsp->opcode);
2329 ret = srp_alloc_iu_bufs(ch);
2335 qp_attr = kmalloc(sizeof *qp_attr, GFP_KERNEL);
2339 qp_attr->qp_state = IB_QPS_RTR;
2340 ret = ib_cm_init_qp_attr(cm_id, qp_attr, &attr_mask);
2344 ret = ib_modify_qp(ch->qp, qp_attr, attr_mask);
2348 for (i = 0; i < target->queue_size; i++) {
2349 struct srp_iu *iu = ch->rx_ring[i];
2351 ret = srp_post_recv(ch, iu);
2356 qp_attr->qp_state = IB_QPS_RTS;
2357 ret = ib_cm_init_qp_attr(cm_id, qp_attr, &attr_mask);
2361 target->rq_tmo_jiffies = srp_compute_rq_tmo(qp_attr, attr_mask);
2363 ret = ib_modify_qp(ch->qp, qp_attr, attr_mask);
2367 ret = ib_send_cm_rtu(cm_id, NULL, 0);
2376 static void srp_cm_rej_handler(struct ib_cm_id *cm_id,
2377 struct ib_cm_event *event,
2378 struct srp_rdma_ch *ch)
2380 struct srp_target_port *target = ch->target;
2381 struct Scsi_Host *shost = target->scsi_host;
2382 struct ib_class_port_info *cpi;
2385 switch (event->param.rej_rcvd.reason) {
2386 case IB_CM_REJ_PORT_CM_REDIRECT:
2387 cpi = event->param.rej_rcvd.ari;
2388 ch->path.dlid = cpi->redirect_lid;
2389 ch->path.pkey = cpi->redirect_pkey;
2390 cm_id->remote_cm_qpn = be32_to_cpu(cpi->redirect_qp) & 0x00ffffff;
2391 memcpy(ch->path.dgid.raw, cpi->redirect_gid, 16);
2393 ch->status = ch->path.dlid ?
2394 SRP_DLID_REDIRECT : SRP_PORT_REDIRECT;
2397 case IB_CM_REJ_PORT_REDIRECT:
2398 if (srp_target_is_topspin(target)) {
2400 * Topspin/Cisco SRP gateways incorrectly send
2401 * reject reason code 25 when they mean 24
2404 memcpy(ch->path.dgid.raw,
2405 event->param.rej_rcvd.ari, 16);
2407 shost_printk(KERN_DEBUG, shost,
2408 PFX "Topspin/Cisco redirect to target port GID %016llx%016llx\n",
2409 be64_to_cpu(ch->path.dgid.global.subnet_prefix),
2410 be64_to_cpu(ch->path.dgid.global.interface_id));
2412 ch->status = SRP_PORT_REDIRECT;
2414 shost_printk(KERN_WARNING, shost,
2415 " REJ reason: IB_CM_REJ_PORT_REDIRECT\n");
2416 ch->status = -ECONNRESET;
2420 case IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID:
2421 shost_printk(KERN_WARNING, shost,
2422 " REJ reason: IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID\n");
2423 ch->status = -ECONNRESET;
2426 case IB_CM_REJ_CONSUMER_DEFINED:
2427 opcode = *(u8 *) event->private_data;
2428 if (opcode == SRP_LOGIN_REJ) {
2429 struct srp_login_rej *rej = event->private_data;
2430 u32 reason = be32_to_cpu(rej->reason);
2432 if (reason == SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE)
2433 shost_printk(KERN_WARNING, shost,
2434 PFX "SRP_LOGIN_REJ: requested max_it_iu_len too large\n");
2436 shost_printk(KERN_WARNING, shost, PFX
2437 "SRP LOGIN from %pI6 to %pI6 REJECTED, reason 0x%08x\n",
2439 target->orig_dgid.raw, reason);
2441 shost_printk(KERN_WARNING, shost,
2442 " REJ reason: IB_CM_REJ_CONSUMER_DEFINED,"
2443 " opcode 0x%02x\n", opcode);
2444 ch->status = -ECONNRESET;
2447 case IB_CM_REJ_STALE_CONN:
2448 shost_printk(KERN_WARNING, shost, " REJ reason: stale connection\n");
2449 ch->status = SRP_STALE_CONN;
2453 shost_printk(KERN_WARNING, shost, " REJ reason 0x%x\n",
2454 event->param.rej_rcvd.reason);
2455 ch->status = -ECONNRESET;
2459 static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
2461 struct srp_rdma_ch *ch = cm_id->context;
2462 struct srp_target_port *target = ch->target;
2465 switch (event->event) {
2466 case IB_CM_REQ_ERROR:
2467 shost_printk(KERN_DEBUG, target->scsi_host,
2468 PFX "Sending CM REQ failed\n");
2470 ch->status = -ECONNRESET;
2473 case IB_CM_REP_RECEIVED:
2475 srp_cm_rep_handler(cm_id, event->private_data, ch);
2478 case IB_CM_REJ_RECEIVED:
2479 shost_printk(KERN_DEBUG, target->scsi_host, PFX "REJ received\n");
2482 srp_cm_rej_handler(cm_id, event, ch);
2485 case IB_CM_DREQ_RECEIVED:
2486 shost_printk(KERN_WARNING, target->scsi_host,
2487 PFX "DREQ received - connection closed\n");
2488 ch->connected = false;
2489 if (ib_send_cm_drep(cm_id, NULL, 0))
2490 shost_printk(KERN_ERR, target->scsi_host,
2491 PFX "Sending CM DREP failed\n");
2492 queue_work(system_long_wq, &target->tl_err_work);
2495 case IB_CM_TIMEWAIT_EXIT:
2496 shost_printk(KERN_ERR, target->scsi_host,
2497 PFX "connection closed\n");
2503 case IB_CM_MRA_RECEIVED:
2504 case IB_CM_DREQ_ERROR:
2505 case IB_CM_DREP_RECEIVED:
2509 shost_printk(KERN_WARNING, target->scsi_host,
2510 PFX "Unhandled CM event %d\n", event->event);
2515 complete(&ch->done);
2521 * srp_change_queue_depth - setting device queue depth
2522 * @sdev: scsi device struct
2523 * @qdepth: requested queue depth
2525 * Returns queue depth.
2528 srp_change_queue_depth(struct scsi_device *sdev, int qdepth)
2530 if (!sdev->tagged_supported)
2532 return scsi_change_queue_depth(sdev, qdepth);
2535 static int srp_send_tsk_mgmt(struct srp_rdma_ch *ch, u64 req_tag, u64 lun,
2536 u8 func, u8 *status)
2538 struct srp_target_port *target = ch->target;
2539 struct srp_rport *rport = target->rport;
2540 struct ib_device *dev = target->srp_host->srp_dev->dev;
2542 struct srp_tsk_mgmt *tsk_mgmt;
2545 if (!ch->connected || target->qp_in_error)
2549 * Lock the rport mutex to avoid that srp_create_ch_ib() is
2550 * invoked while a task management function is being sent.
2552 mutex_lock(&rport->mutex);
2553 spin_lock_irq(&ch->lock);
2554 iu = __srp_get_tx_iu(ch, SRP_IU_TSK_MGMT);
2555 spin_unlock_irq(&ch->lock);
2558 mutex_unlock(&rport->mutex);
2563 ib_dma_sync_single_for_cpu(dev, iu->dma, sizeof *tsk_mgmt,
2566 memset(tsk_mgmt, 0, sizeof *tsk_mgmt);
2568 tsk_mgmt->opcode = SRP_TSK_MGMT;
2569 int_to_scsilun(lun, &tsk_mgmt->lun);
2570 tsk_mgmt->tsk_mgmt_func = func;
2571 tsk_mgmt->task_tag = req_tag;
2573 spin_lock_irq(&ch->lock);
2574 ch->tsk_mgmt_tag = (ch->tsk_mgmt_tag + 1) | SRP_TAG_TSK_MGMT;
2575 tsk_mgmt->tag = ch->tsk_mgmt_tag;
2576 spin_unlock_irq(&ch->lock);
2578 init_completion(&ch->tsk_mgmt_done);
2580 ib_dma_sync_single_for_device(dev, iu->dma, sizeof *tsk_mgmt,
2582 if (srp_post_send(ch, iu, sizeof(*tsk_mgmt))) {
2583 srp_put_tx_iu(ch, iu, SRP_IU_TSK_MGMT);
2584 mutex_unlock(&rport->mutex);
2588 res = wait_for_completion_timeout(&ch->tsk_mgmt_done,
2589 msecs_to_jiffies(SRP_ABORT_TIMEOUT_MS));
2590 if (res > 0 && status)
2591 *status = ch->tsk_mgmt_status;
2592 mutex_unlock(&rport->mutex);
2594 WARN_ON_ONCE(res < 0);
2596 return res > 0 ? 0 : -1;
2599 static int srp_abort(struct scsi_cmnd *scmnd)
2601 struct srp_target_port *target = host_to_target(scmnd->device->host);
2602 struct srp_request *req = (struct srp_request *) scmnd->host_scribble;
2605 struct srp_rdma_ch *ch;
2608 shost_printk(KERN_ERR, target->scsi_host, "SRP abort called\n");
2612 tag = blk_mq_unique_tag(scmnd->request);
2613 ch_idx = blk_mq_unique_tag_to_hwq(tag);
2614 if (WARN_ON_ONCE(ch_idx >= target->ch_count))
2616 ch = &target->ch[ch_idx];
2617 if (!srp_claim_req(ch, req, NULL, scmnd))
2619 shost_printk(KERN_ERR, target->scsi_host,
2620 "Sending SRP abort for tag %#x\n", tag);
2621 if (srp_send_tsk_mgmt(ch, tag, scmnd->device->lun,
2622 SRP_TSK_ABORT_TASK, NULL) == 0)
2624 else if (target->rport->state == SRP_RPORT_LOST)
2628 srp_free_req(ch, req, scmnd, 0);
2629 scmnd->result = DID_ABORT << 16;
2630 scmnd->scsi_done(scmnd);
2635 static int srp_reset_device(struct scsi_cmnd *scmnd)
2637 struct srp_target_port *target = host_to_target(scmnd->device->host);
2638 struct srp_rdma_ch *ch;
2642 shost_printk(KERN_ERR, target->scsi_host, "SRP reset_device called\n");
2644 ch = &target->ch[0];
2645 if (srp_send_tsk_mgmt(ch, SRP_TAG_NO_REQ, scmnd->device->lun,
2646 SRP_TSK_LUN_RESET, &status))
2651 for (i = 0; i < target->ch_count; i++) {
2652 ch = &target->ch[i];
2653 for (i = 0; i < target->req_ring_size; ++i) {
2654 struct srp_request *req = &ch->req_ring[i];
2656 srp_finish_req(ch, req, scmnd->device, DID_RESET << 16);
2663 static int srp_reset_host(struct scsi_cmnd *scmnd)
2665 struct srp_target_port *target = host_to_target(scmnd->device->host);
2667 shost_printk(KERN_ERR, target->scsi_host, PFX "SRP reset_host called\n");
2669 return srp_reconnect_rport(target->rport) == 0 ? SUCCESS : FAILED;
2672 static int srp_slave_alloc(struct scsi_device *sdev)
2674 struct Scsi_Host *shost = sdev->host;
2675 struct srp_target_port *target = host_to_target(shost);
2676 struct srp_device *srp_dev = target->srp_host->srp_dev;
2679 blk_queue_virt_boundary(sdev->request_queue,
2680 ~srp_dev->mr_page_mask);
2685 static int srp_slave_configure(struct scsi_device *sdev)
2687 struct Scsi_Host *shost = sdev->host;
2688 struct srp_target_port *target = host_to_target(shost);
2689 struct request_queue *q = sdev->request_queue;
2690 unsigned long timeout;
2692 if (sdev->type == TYPE_DISK) {
2693 timeout = max_t(unsigned, 30 * HZ, target->rq_tmo_jiffies);
2694 blk_queue_rq_timeout(q, timeout);
2700 static ssize_t show_id_ext(struct device *dev, struct device_attribute *attr,
2703 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2705 return sprintf(buf, "0x%016llx\n", be64_to_cpu(target->id_ext));
2708 static ssize_t show_ioc_guid(struct device *dev, struct device_attribute *attr,
2711 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2713 return sprintf(buf, "0x%016llx\n", be64_to_cpu(target->ioc_guid));
2716 static ssize_t show_service_id(struct device *dev,
2717 struct device_attribute *attr, char *buf)
2719 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2721 return sprintf(buf, "0x%016llx\n", be64_to_cpu(target->service_id));
2724 static ssize_t show_pkey(struct device *dev, struct device_attribute *attr,
2727 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2729 return sprintf(buf, "0x%04x\n", be16_to_cpu(target->pkey));
2732 static ssize_t show_sgid(struct device *dev, struct device_attribute *attr,
2735 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2737 return sprintf(buf, "%pI6\n", target->sgid.raw);
2740 static ssize_t show_dgid(struct device *dev, struct device_attribute *attr,
2743 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2744 struct srp_rdma_ch *ch = &target->ch[0];
2746 return sprintf(buf, "%pI6\n", ch->path.dgid.raw);
2749 static ssize_t show_orig_dgid(struct device *dev,
2750 struct device_attribute *attr, char *buf)
2752 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2754 return sprintf(buf, "%pI6\n", target->orig_dgid.raw);
2757 static ssize_t show_req_lim(struct device *dev,
2758 struct device_attribute *attr, char *buf)
2760 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2761 struct srp_rdma_ch *ch;
2762 int i, req_lim = INT_MAX;
2764 for (i = 0; i < target->ch_count; i++) {
2765 ch = &target->ch[i];
2766 req_lim = min(req_lim, ch->req_lim);
2768 return sprintf(buf, "%d\n", req_lim);
2771 static ssize_t show_zero_req_lim(struct device *dev,
2772 struct device_attribute *attr, char *buf)
2774 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2776 return sprintf(buf, "%d\n", target->zero_req_lim);
2779 static ssize_t show_local_ib_port(struct device *dev,
2780 struct device_attribute *attr, char *buf)
2782 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2784 return sprintf(buf, "%d\n", target->srp_host->port);
2787 static ssize_t show_local_ib_device(struct device *dev,
2788 struct device_attribute *attr, char *buf)
2790 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2792 return sprintf(buf, "%s\n", target->srp_host->srp_dev->dev->name);
2795 static ssize_t show_ch_count(struct device *dev, struct device_attribute *attr,
2798 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2800 return sprintf(buf, "%d\n", target->ch_count);
2803 static ssize_t show_comp_vector(struct device *dev,
2804 struct device_attribute *attr, char *buf)
2806 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2808 return sprintf(buf, "%d\n", target->comp_vector);
2811 static ssize_t show_tl_retry_count(struct device *dev,
2812 struct device_attribute *attr, char *buf)
2814 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2816 return sprintf(buf, "%d\n", target->tl_retry_count);
2819 static ssize_t show_cmd_sg_entries(struct device *dev,
2820 struct device_attribute *attr, char *buf)
2822 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2824 return sprintf(buf, "%u\n", target->cmd_sg_cnt);
2827 static ssize_t show_allow_ext_sg(struct device *dev,
2828 struct device_attribute *attr, char *buf)
2830 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2832 return sprintf(buf, "%s\n", target->allow_ext_sg ? "true" : "false");
2835 static DEVICE_ATTR(id_ext, S_IRUGO, show_id_ext, NULL);
2836 static DEVICE_ATTR(ioc_guid, S_IRUGO, show_ioc_guid, NULL);
2837 static DEVICE_ATTR(service_id, S_IRUGO, show_service_id, NULL);
2838 static DEVICE_ATTR(pkey, S_IRUGO, show_pkey, NULL);
2839 static DEVICE_ATTR(sgid, S_IRUGO, show_sgid, NULL);
2840 static DEVICE_ATTR(dgid, S_IRUGO, show_dgid, NULL);
2841 static DEVICE_ATTR(orig_dgid, S_IRUGO, show_orig_dgid, NULL);
2842 static DEVICE_ATTR(req_lim, S_IRUGO, show_req_lim, NULL);
2843 static DEVICE_ATTR(zero_req_lim, S_IRUGO, show_zero_req_lim, NULL);
2844 static DEVICE_ATTR(local_ib_port, S_IRUGO, show_local_ib_port, NULL);
2845 static DEVICE_ATTR(local_ib_device, S_IRUGO, show_local_ib_device, NULL);
2846 static DEVICE_ATTR(ch_count, S_IRUGO, show_ch_count, NULL);
2847 static DEVICE_ATTR(comp_vector, S_IRUGO, show_comp_vector, NULL);
2848 static DEVICE_ATTR(tl_retry_count, S_IRUGO, show_tl_retry_count, NULL);
2849 static DEVICE_ATTR(cmd_sg_entries, S_IRUGO, show_cmd_sg_entries, NULL);
2850 static DEVICE_ATTR(allow_ext_sg, S_IRUGO, show_allow_ext_sg, NULL);
2852 static struct device_attribute *srp_host_attrs[] = {
2855 &dev_attr_service_id,
2859 &dev_attr_orig_dgid,
2861 &dev_attr_zero_req_lim,
2862 &dev_attr_local_ib_port,
2863 &dev_attr_local_ib_device,
2865 &dev_attr_comp_vector,
2866 &dev_attr_tl_retry_count,
2867 &dev_attr_cmd_sg_entries,
2868 &dev_attr_allow_ext_sg,
2872 static struct scsi_host_template srp_template = {
2873 .module = THIS_MODULE,
2874 .name = "InfiniBand SRP initiator",
2875 .proc_name = DRV_NAME,
2876 .slave_alloc = srp_slave_alloc,
2877 .slave_configure = srp_slave_configure,
2878 .info = srp_target_info,
2879 .queuecommand = srp_queuecommand,
2880 .change_queue_depth = srp_change_queue_depth,
2881 .eh_abort_handler = srp_abort,
2882 .eh_device_reset_handler = srp_reset_device,
2883 .eh_host_reset_handler = srp_reset_host,
2884 .skip_settle_delay = true,
2885 .sg_tablesize = SRP_DEF_SG_TABLESIZE,
2886 .can_queue = SRP_DEFAULT_CMD_SQ_SIZE,
2888 .cmd_per_lun = SRP_DEFAULT_CMD_SQ_SIZE,
2889 .use_clustering = ENABLE_CLUSTERING,
2890 .shost_attrs = srp_host_attrs,
2891 .track_queue_depth = 1,
2894 static int srp_sdev_count(struct Scsi_Host *host)
2896 struct scsi_device *sdev;
2899 shost_for_each_device(sdev, host)
2907 * < 0 upon failure. Caller is responsible for SRP target port cleanup.
2908 * 0 and target->state == SRP_TARGET_REMOVED if asynchronous target port
2909 * removal has been scheduled.
2910 * 0 and target->state != SRP_TARGET_REMOVED upon success.
2912 static int srp_add_target(struct srp_host *host, struct srp_target_port *target)
2914 struct srp_rport_identifiers ids;
2915 struct srp_rport *rport;
2917 target->state = SRP_TARGET_SCANNING;
2918 sprintf(target->target_name, "SRP.T10:%016llX",
2919 be64_to_cpu(target->id_ext));
2921 if (scsi_add_host(target->scsi_host, host->srp_dev->dev->dma_device))
2924 memcpy(ids.port_id, &target->id_ext, 8);
2925 memcpy(ids.port_id + 8, &target->ioc_guid, 8);
2926 ids.roles = SRP_RPORT_ROLE_TARGET;
2927 rport = srp_rport_add(target->scsi_host, &ids);
2928 if (IS_ERR(rport)) {
2929 scsi_remove_host(target->scsi_host);
2930 return PTR_ERR(rport);
2933 rport->lld_data = target;
2934 target->rport = rport;
2936 spin_lock(&host->target_lock);
2937 list_add_tail(&target->list, &host->target_list);
2938 spin_unlock(&host->target_lock);
2940 scsi_scan_target(&target->scsi_host->shost_gendev,
2941 0, target->scsi_id, SCAN_WILD_CARD, SCSI_SCAN_INITIAL);
2943 if (srp_connected_ch(target) < target->ch_count ||
2944 target->qp_in_error) {
2945 shost_printk(KERN_INFO, target->scsi_host,
2946 PFX "SCSI scan failed - removing SCSI host\n");
2947 srp_queue_remove_work(target);
2951 pr_debug("%s: SCSI scan succeeded - detected %d LUNs\n",
2952 dev_name(&target->scsi_host->shost_gendev),
2953 srp_sdev_count(target->scsi_host));
2955 spin_lock_irq(&target->lock);
2956 if (target->state == SRP_TARGET_SCANNING)
2957 target->state = SRP_TARGET_LIVE;
2958 spin_unlock_irq(&target->lock);
2964 static void srp_release_dev(struct device *dev)
2966 struct srp_host *host =
2967 container_of(dev, struct srp_host, dev);
2969 complete(&host->released);
2972 static struct class srp_class = {
2973 .name = "infiniband_srp",
2974 .dev_release = srp_release_dev
2978 * srp_conn_unique() - check whether the connection to a target is unique
2980 * @target: SRP target port.
2982 static bool srp_conn_unique(struct srp_host *host,
2983 struct srp_target_port *target)
2985 struct srp_target_port *t;
2988 if (target->state == SRP_TARGET_REMOVED)
2993 spin_lock(&host->target_lock);
2994 list_for_each_entry(t, &host->target_list, list) {
2996 target->id_ext == t->id_ext &&
2997 target->ioc_guid == t->ioc_guid &&
2998 target->initiator_ext == t->initiator_ext) {
3003 spin_unlock(&host->target_lock);
3010 * Target ports are added by writing
3012 * id_ext=<SRP ID ext>,ioc_guid=<SRP IOC GUID>,dgid=<dest GID>,
3013 * pkey=<P_Key>,service_id=<service ID>
3015 * to the add_target sysfs attribute.
3019 SRP_OPT_ID_EXT = 1 << 0,
3020 SRP_OPT_IOC_GUID = 1 << 1,
3021 SRP_OPT_DGID = 1 << 2,
3022 SRP_OPT_PKEY = 1 << 3,
3023 SRP_OPT_SERVICE_ID = 1 << 4,
3024 SRP_OPT_MAX_SECT = 1 << 5,
3025 SRP_OPT_MAX_CMD_PER_LUN = 1 << 6,
3026 SRP_OPT_IO_CLASS = 1 << 7,
3027 SRP_OPT_INITIATOR_EXT = 1 << 8,
3028 SRP_OPT_CMD_SG_ENTRIES = 1 << 9,
3029 SRP_OPT_ALLOW_EXT_SG = 1 << 10,
3030 SRP_OPT_SG_TABLESIZE = 1 << 11,
3031 SRP_OPT_COMP_VECTOR = 1 << 12,
3032 SRP_OPT_TL_RETRY_COUNT = 1 << 13,
3033 SRP_OPT_QUEUE_SIZE = 1 << 14,
3034 SRP_OPT_ALL = (SRP_OPT_ID_EXT |
3038 SRP_OPT_SERVICE_ID),
3041 static const match_table_t srp_opt_tokens = {
3042 { SRP_OPT_ID_EXT, "id_ext=%s" },
3043 { SRP_OPT_IOC_GUID, "ioc_guid=%s" },
3044 { SRP_OPT_DGID, "dgid=%s" },
3045 { SRP_OPT_PKEY, "pkey=%x" },
3046 { SRP_OPT_SERVICE_ID, "service_id=%s" },
3047 { SRP_OPT_MAX_SECT, "max_sect=%d" },
3048 { SRP_OPT_MAX_CMD_PER_LUN, "max_cmd_per_lun=%d" },
3049 { SRP_OPT_IO_CLASS, "io_class=%x" },
3050 { SRP_OPT_INITIATOR_EXT, "initiator_ext=%s" },
3051 { SRP_OPT_CMD_SG_ENTRIES, "cmd_sg_entries=%u" },
3052 { SRP_OPT_ALLOW_EXT_SG, "allow_ext_sg=%u" },
3053 { SRP_OPT_SG_TABLESIZE, "sg_tablesize=%u" },
3054 { SRP_OPT_COMP_VECTOR, "comp_vector=%u" },
3055 { SRP_OPT_TL_RETRY_COUNT, "tl_retry_count=%u" },
3056 { SRP_OPT_QUEUE_SIZE, "queue_size=%d" },
3057 { SRP_OPT_ERR, NULL }
3060 static int srp_parse_options(const char *buf, struct srp_target_port *target)
3062 char *options, *sep_opt;
3065 substring_t args[MAX_OPT_ARGS];
3071 options = kstrdup(buf, GFP_KERNEL);
3076 while ((p = strsep(&sep_opt, ",\n")) != NULL) {
3080 token = match_token(p, srp_opt_tokens, args);
3084 case SRP_OPT_ID_EXT:
3085 p = match_strdup(args);
3090 target->id_ext = cpu_to_be64(simple_strtoull(p, NULL, 16));
3094 case SRP_OPT_IOC_GUID:
3095 p = match_strdup(args);
3100 target->ioc_guid = cpu_to_be64(simple_strtoull(p, NULL, 16));
3105 p = match_strdup(args);
3110 if (strlen(p) != 32) {
3111 pr_warn("bad dest GID parameter '%s'\n", p);
3116 for (i = 0; i < 16; ++i) {
3117 strlcpy(dgid, p + i * 2, sizeof(dgid));
3118 if (sscanf(dgid, "%hhx",
3119 &target->orig_dgid.raw[i]) < 1) {
3129 if (match_hex(args, &token)) {
3130 pr_warn("bad P_Key parameter '%s'\n", p);
3133 target->pkey = cpu_to_be16(token);
3136 case SRP_OPT_SERVICE_ID:
3137 p = match_strdup(args);
3142 target->service_id = cpu_to_be64(simple_strtoull(p, NULL, 16));
3146 case SRP_OPT_MAX_SECT:
3147 if (match_int(args, &token)) {
3148 pr_warn("bad max sect parameter '%s'\n", p);
3151 target->scsi_host->max_sectors = token;
3154 case SRP_OPT_QUEUE_SIZE:
3155 if (match_int(args, &token) || token < 1) {
3156 pr_warn("bad queue_size parameter '%s'\n", p);
3159 target->scsi_host->can_queue = token;
3160 target->queue_size = token + SRP_RSP_SQ_SIZE +
3161 SRP_TSK_MGMT_SQ_SIZE;
3162 if (!(opt_mask & SRP_OPT_MAX_CMD_PER_LUN))
3163 target->scsi_host->cmd_per_lun = token;
3166 case SRP_OPT_MAX_CMD_PER_LUN:
3167 if (match_int(args, &token) || token < 1) {
3168 pr_warn("bad max cmd_per_lun parameter '%s'\n",
3172 target->scsi_host->cmd_per_lun = token;
3175 case SRP_OPT_IO_CLASS:
3176 if (match_hex(args, &token)) {
3177 pr_warn("bad IO class parameter '%s'\n", p);
3180 if (token != SRP_REV10_IB_IO_CLASS &&
3181 token != SRP_REV16A_IB_IO_CLASS) {
3182 pr_warn("unknown IO class parameter value %x specified (use %x or %x).\n",
3183 token, SRP_REV10_IB_IO_CLASS,
3184 SRP_REV16A_IB_IO_CLASS);
3187 target->io_class = token;
3190 case SRP_OPT_INITIATOR_EXT:
3191 p = match_strdup(args);
3196 target->initiator_ext = cpu_to_be64(simple_strtoull(p, NULL, 16));
3200 case SRP_OPT_CMD_SG_ENTRIES:
3201 if (match_int(args, &token) || token < 1 || token > 255) {
3202 pr_warn("bad max cmd_sg_entries parameter '%s'\n",
3206 target->cmd_sg_cnt = token;
3209 case SRP_OPT_ALLOW_EXT_SG:
3210 if (match_int(args, &token)) {
3211 pr_warn("bad allow_ext_sg parameter '%s'\n", p);
3214 target->allow_ext_sg = !!token;
3217 case SRP_OPT_SG_TABLESIZE:
3218 if (match_int(args, &token) || token < 1 ||
3219 token > SG_MAX_SEGMENTS) {
3220 pr_warn("bad max sg_tablesize parameter '%s'\n",
3224 target->sg_tablesize = token;
3227 case SRP_OPT_COMP_VECTOR:
3228 if (match_int(args, &token) || token < 0) {
3229 pr_warn("bad comp_vector parameter '%s'\n", p);
3232 target->comp_vector = token;
3235 case SRP_OPT_TL_RETRY_COUNT:
3236 if (match_int(args, &token) || token < 2 || token > 7) {
3237 pr_warn("bad tl_retry_count parameter '%s' (must be a number between 2 and 7)\n",
3241 target->tl_retry_count = token;
3245 pr_warn("unknown parameter or missing value '%s' in target creation request\n",
3251 if ((opt_mask & SRP_OPT_ALL) == SRP_OPT_ALL)
3254 for (i = 0; i < ARRAY_SIZE(srp_opt_tokens); ++i)
3255 if ((srp_opt_tokens[i].token & SRP_OPT_ALL) &&
3256 !(srp_opt_tokens[i].token & opt_mask))
3257 pr_warn("target creation request is missing parameter '%s'\n",
3258 srp_opt_tokens[i].pattern);
3260 if (target->scsi_host->cmd_per_lun > target->scsi_host->can_queue
3261 && (opt_mask & SRP_OPT_MAX_CMD_PER_LUN))
3262 pr_warn("cmd_per_lun = %d > queue_size = %d\n",
3263 target->scsi_host->cmd_per_lun,
3264 target->scsi_host->can_queue);
3271 static ssize_t srp_create_target(struct device *dev,
3272 struct device_attribute *attr,
3273 const char *buf, size_t count)
3275 struct srp_host *host =
3276 container_of(dev, struct srp_host, dev);
3277 struct Scsi_Host *target_host;
3278 struct srp_target_port *target;
3279 struct srp_rdma_ch *ch;
3280 struct srp_device *srp_dev = host->srp_dev;
3281 struct ib_device *ibdev = srp_dev->dev;
3282 int ret, node_idx, node, cpu, i;
3283 unsigned int max_sectors_per_mr, mr_per_cmd = 0;
3284 bool multich = false;
3286 target_host = scsi_host_alloc(&srp_template,
3287 sizeof (struct srp_target_port));
3291 target_host->transportt = ib_srp_transport_template;
3292 target_host->max_channel = 0;
3293 target_host->max_id = 1;
3294 target_host->max_lun = -1LL;
3295 target_host->max_cmd_len = sizeof ((struct srp_cmd *) (void *) 0L)->cdb;
3297 target = host_to_target(target_host);
3299 target->io_class = SRP_REV16A_IB_IO_CLASS;
3300 target->scsi_host = target_host;
3301 target->srp_host = host;
3302 target->pd = host->srp_dev->pd;
3303 target->lkey = host->srp_dev->pd->local_dma_lkey;
3304 target->cmd_sg_cnt = cmd_sg_entries;
3305 target->sg_tablesize = indirect_sg_entries ? : cmd_sg_entries;
3306 target->allow_ext_sg = allow_ext_sg;
3307 target->tl_retry_count = 7;
3308 target->queue_size = SRP_DEFAULT_QUEUE_SIZE;
3311 * Avoid that the SCSI host can be removed by srp_remove_target()
3312 * before this function returns.
3314 scsi_host_get(target->scsi_host);
3316 ret = mutex_lock_interruptible(&host->add_target_mutex);
3320 ret = srp_parse_options(buf, target);
3324 target->req_ring_size = target->queue_size - SRP_TSK_MGMT_SQ_SIZE;
3326 if (!srp_conn_unique(target->srp_host, target)) {
3327 shost_printk(KERN_INFO, target->scsi_host,
3328 PFX "Already connected to target port with id_ext=%016llx;ioc_guid=%016llx;initiator_ext=%016llx\n",
3329 be64_to_cpu(target->id_ext),
3330 be64_to_cpu(target->ioc_guid),
3331 be64_to_cpu(target->initiator_ext));
3336 if (!srp_dev->has_fmr && !srp_dev->has_fr && !target->allow_ext_sg &&
3337 target->cmd_sg_cnt < target->sg_tablesize) {
3338 pr_warn("No MR pool and no external indirect descriptors, limiting sg_tablesize to cmd_sg_cnt\n");
3339 target->sg_tablesize = target->cmd_sg_cnt;
3342 if (srp_dev->use_fast_reg || srp_dev->use_fmr) {
3344 * FR and FMR can only map one HCA page per entry. If the
3345 * start address is not aligned on a HCA page boundary two
3346 * entries will be used for the head and the tail although
3347 * these two entries combined contain at most one HCA page of
3348 * data. Hence the "+ 1" in the calculation below.
3350 * The indirect data buffer descriptor is contiguous so the
3351 * memory for that buffer will only be registered if
3352 * register_always is true. Hence add one to mr_per_cmd if
3353 * register_always has been set.
3355 max_sectors_per_mr = srp_dev->max_pages_per_mr <<
3356 (ilog2(srp_dev->mr_page_size) - 9);
3357 mr_per_cmd = register_always +
3358 (target->scsi_host->max_sectors + 1 +
3359 max_sectors_per_mr - 1) / max_sectors_per_mr;
3360 pr_debug("max_sectors = %u; max_pages_per_mr = %u; mr_page_size = %u; max_sectors_per_mr = %u; mr_per_cmd = %u\n",
3361 target->scsi_host->max_sectors,
3362 srp_dev->max_pages_per_mr, srp_dev->mr_page_size,
3363 max_sectors_per_mr, mr_per_cmd);
3366 target_host->sg_tablesize = target->sg_tablesize;
3367 target->mr_pool_size = target->scsi_host->can_queue * mr_per_cmd;
3368 target->mr_per_cmd = mr_per_cmd;
3369 target->indirect_size = target->sg_tablesize *
3370 sizeof (struct srp_direct_buf);
3371 target->max_iu_len = sizeof (struct srp_cmd) +
3372 sizeof (struct srp_indirect_buf) +
3373 target->cmd_sg_cnt * sizeof (struct srp_direct_buf);
3375 INIT_WORK(&target->tl_err_work, srp_tl_err_work);
3376 INIT_WORK(&target->remove_work, srp_remove_work);
3377 spin_lock_init(&target->lock);
3378 ret = ib_query_gid(ibdev, host->port, 0, &target->sgid, NULL);
3383 target->ch_count = max_t(unsigned, num_online_nodes(),
3385 min(4 * num_online_nodes(),
3386 ibdev->num_comp_vectors),
3387 num_online_cpus()));
3388 target->ch = kcalloc(target->ch_count, sizeof(*target->ch),
3394 for_each_online_node(node) {
3395 const int ch_start = (node_idx * target->ch_count /
3396 num_online_nodes());
3397 const int ch_end = ((node_idx + 1) * target->ch_count /
3398 num_online_nodes());
3399 const int cv_start = (node_idx * ibdev->num_comp_vectors /
3400 num_online_nodes() + target->comp_vector)
3401 % ibdev->num_comp_vectors;
3402 const int cv_end = ((node_idx + 1) * ibdev->num_comp_vectors /
3403 num_online_nodes() + target->comp_vector)
3404 % ibdev->num_comp_vectors;
3407 for_each_online_cpu(cpu) {
3408 if (cpu_to_node(cpu) != node)
3410 if (ch_start + cpu_idx >= ch_end)
3412 ch = &target->ch[ch_start + cpu_idx];
3413 ch->target = target;
3414 ch->comp_vector = cv_start == cv_end ? cv_start :
3415 cv_start + cpu_idx % (cv_end - cv_start);
3416 spin_lock_init(&ch->lock);
3417 INIT_LIST_HEAD(&ch->free_tx);
3418 ret = srp_new_cm_id(ch);
3420 goto err_disconnect;
3422 ret = srp_create_ch_ib(ch);
3424 goto err_disconnect;
3426 ret = srp_alloc_req_data(ch);
3428 goto err_disconnect;
3430 ret = srp_connect_ch(ch, multich);
3432 shost_printk(KERN_ERR, target->scsi_host,
3433 PFX "Connection %d/%d failed\n",
3436 if (node_idx == 0 && cpu_idx == 0) {
3437 goto err_disconnect;
3439 srp_free_ch_ib(target, ch);
3440 srp_free_req_data(target, ch);
3441 target->ch_count = ch - target->ch;
3453 target->scsi_host->nr_hw_queues = target->ch_count;
3455 ret = srp_add_target(host, target);
3457 goto err_disconnect;
3459 if (target->state != SRP_TARGET_REMOVED) {
3460 shost_printk(KERN_DEBUG, target->scsi_host, PFX
3461 "new target: id_ext %016llx ioc_guid %016llx pkey %04x service_id %016llx sgid %pI6 dgid %pI6\n",
3462 be64_to_cpu(target->id_ext),
3463 be64_to_cpu(target->ioc_guid),
3464 be16_to_cpu(target->pkey),
3465 be64_to_cpu(target->service_id),
3466 target->sgid.raw, target->orig_dgid.raw);
3472 mutex_unlock(&host->add_target_mutex);
3475 scsi_host_put(target->scsi_host);
3477 scsi_host_put(target->scsi_host);
3482 srp_disconnect_target(target);
3484 for (i = 0; i < target->ch_count; i++) {
3485 ch = &target->ch[i];
3486 srp_free_ch_ib(target, ch);
3487 srp_free_req_data(target, ch);
3494 static DEVICE_ATTR(add_target, S_IWUSR, NULL, srp_create_target);
3496 static ssize_t show_ibdev(struct device *dev, struct device_attribute *attr,
3499 struct srp_host *host = container_of(dev, struct srp_host, dev);
3501 return sprintf(buf, "%s\n", host->srp_dev->dev->name);
3504 static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL);
3506 static ssize_t show_port(struct device *dev, struct device_attribute *attr,
3509 struct srp_host *host = container_of(dev, struct srp_host, dev);
3511 return sprintf(buf, "%d\n", host->port);
3514 static DEVICE_ATTR(port, S_IRUGO, show_port, NULL);
3516 static struct srp_host *srp_add_port(struct srp_device *device, u8 port)
3518 struct srp_host *host;
3520 host = kzalloc(sizeof *host, GFP_KERNEL);
3524 INIT_LIST_HEAD(&host->target_list);
3525 spin_lock_init(&host->target_lock);
3526 init_completion(&host->released);
3527 mutex_init(&host->add_target_mutex);
3528 host->srp_dev = device;
3531 host->dev.class = &srp_class;
3532 host->dev.parent = device->dev->dma_device;
3533 dev_set_name(&host->dev, "srp-%s-%d", device->dev->name, port);
3535 if (device_register(&host->dev))
3537 if (device_create_file(&host->dev, &dev_attr_add_target))
3539 if (device_create_file(&host->dev, &dev_attr_ibdev))
3541 if (device_create_file(&host->dev, &dev_attr_port))
3547 device_unregister(&host->dev);
3555 static void srp_add_one(struct ib_device *device)
3557 struct srp_device *srp_dev;
3558 struct ib_device_attr *attr = &device->attrs;
3559 struct srp_host *host;
3560 int mr_page_shift, p;
3561 u64 max_pages_per_mr;
3562 unsigned int flags = 0;
3564 srp_dev = kzalloc(sizeof(*srp_dev), GFP_KERNEL);
3569 * Use the smallest page size supported by the HCA, down to a
3570 * minimum of 4096 bytes. We're unlikely to build large sglists
3571 * out of smaller entries.
3573 mr_page_shift = max(12, ffs(attr->page_size_cap) - 1);
3574 srp_dev->mr_page_size = 1 << mr_page_shift;
3575 srp_dev->mr_page_mask = ~((u64) srp_dev->mr_page_size - 1);
3576 max_pages_per_mr = attr->max_mr_size;
3577 do_div(max_pages_per_mr, srp_dev->mr_page_size);
3578 pr_debug("%s: %llu / %u = %llu <> %u\n", __func__,
3579 attr->max_mr_size, srp_dev->mr_page_size,
3580 max_pages_per_mr, SRP_MAX_PAGES_PER_MR);
3581 srp_dev->max_pages_per_mr = min_t(u64, SRP_MAX_PAGES_PER_MR,
3584 srp_dev->has_fmr = (device->alloc_fmr && device->dealloc_fmr &&
3585 device->map_phys_fmr && device->unmap_fmr);
3586 srp_dev->has_fr = (attr->device_cap_flags &
3587 IB_DEVICE_MEM_MGT_EXTENSIONS);
3588 if (!never_register && !srp_dev->has_fmr && !srp_dev->has_fr) {
3589 dev_warn(&device->dev, "neither FMR nor FR is supported\n");
3590 } else if (!never_register &&
3591 attr->max_mr_size >= 2 * srp_dev->mr_page_size) {
3592 srp_dev->use_fast_reg = (srp_dev->has_fr &&
3593 (!srp_dev->has_fmr || prefer_fr));
3594 srp_dev->use_fmr = !srp_dev->use_fast_reg && srp_dev->has_fmr;
3597 if (never_register || !register_always ||
3598 (!srp_dev->has_fmr && !srp_dev->has_fr))
3599 flags |= IB_PD_UNSAFE_GLOBAL_RKEY;
3601 if (srp_dev->use_fast_reg) {
3602 srp_dev->max_pages_per_mr =
3603 min_t(u32, srp_dev->max_pages_per_mr,
3604 attr->max_fast_reg_page_list_len);
3606 srp_dev->mr_max_size = srp_dev->mr_page_size *
3607 srp_dev->max_pages_per_mr;
3608 pr_debug("%s: mr_page_shift = %d, device->max_mr_size = %#llx, device->max_fast_reg_page_list_len = %u, max_pages_per_mr = %d, mr_max_size = %#x\n",
3609 device->name, mr_page_shift, attr->max_mr_size,
3610 attr->max_fast_reg_page_list_len,
3611 srp_dev->max_pages_per_mr, srp_dev->mr_max_size);
3613 INIT_LIST_HEAD(&srp_dev->dev_list);
3615 srp_dev->dev = device;
3616 srp_dev->pd = ib_alloc_pd(device, flags);
3617 if (IS_ERR(srp_dev->pd))
3621 for (p = rdma_start_port(device); p <= rdma_end_port(device); ++p) {
3622 host = srp_add_port(srp_dev, p);
3624 list_add_tail(&host->list, &srp_dev->dev_list);
3627 ib_set_client_data(device, &srp_client, srp_dev);
3634 static void srp_remove_one(struct ib_device *device, void *client_data)
3636 struct srp_device *srp_dev;
3637 struct srp_host *host, *tmp_host;
3638 struct srp_target_port *target;
3640 srp_dev = client_data;
3644 list_for_each_entry_safe(host, tmp_host, &srp_dev->dev_list, list) {
3645 device_unregister(&host->dev);
3647 * Wait for the sysfs entry to go away, so that no new
3648 * target ports can be created.
3650 wait_for_completion(&host->released);
3653 * Remove all target ports.
3655 spin_lock(&host->target_lock);
3656 list_for_each_entry(target, &host->target_list, list)
3657 srp_queue_remove_work(target);
3658 spin_unlock(&host->target_lock);
3661 * Wait for tl_err and target port removal tasks.
3663 flush_workqueue(system_long_wq);
3664 flush_workqueue(srp_remove_wq);
3669 ib_dealloc_pd(srp_dev->pd);
3674 static struct srp_function_template ib_srp_transport_functions = {
3675 .has_rport_state = true,
3676 .reset_timer_if_blocked = true,
3677 .reconnect_delay = &srp_reconnect_delay,
3678 .fast_io_fail_tmo = &srp_fast_io_fail_tmo,
3679 .dev_loss_tmo = &srp_dev_loss_tmo,
3680 .reconnect = srp_rport_reconnect,
3681 .rport_delete = srp_rport_delete,
3682 .terminate_rport_io = srp_terminate_io,
3685 static int __init srp_init_module(void)
3689 if (srp_sg_tablesize) {
3690 pr_warn("srp_sg_tablesize is deprecated, please use cmd_sg_entries\n");
3691 if (!cmd_sg_entries)
3692 cmd_sg_entries = srp_sg_tablesize;
3695 if (!cmd_sg_entries)
3696 cmd_sg_entries = SRP_DEF_SG_TABLESIZE;
3698 if (cmd_sg_entries > 255) {
3699 pr_warn("Clamping cmd_sg_entries to 255\n");
3700 cmd_sg_entries = 255;
3703 if (!indirect_sg_entries)
3704 indirect_sg_entries = cmd_sg_entries;
3705 else if (indirect_sg_entries < cmd_sg_entries) {
3706 pr_warn("Bumping up indirect_sg_entries to match cmd_sg_entries (%u)\n",
3708 indirect_sg_entries = cmd_sg_entries;
3711 if (indirect_sg_entries > SG_MAX_SEGMENTS) {
3712 pr_warn("Clamping indirect_sg_entries to %u\n",
3714 indirect_sg_entries = SG_MAX_SEGMENTS;
3717 srp_remove_wq = create_workqueue("srp_remove");
3718 if (!srp_remove_wq) {
3724 ib_srp_transport_template =
3725 srp_attach_transport(&ib_srp_transport_functions);
3726 if (!ib_srp_transport_template)
3729 ret = class_register(&srp_class);
3731 pr_err("couldn't register class infiniband_srp\n");
3735 ib_sa_register_client(&srp_sa_client);
3737 ret = ib_register_client(&srp_client);
3739 pr_err("couldn't register IB client\n");
3747 ib_sa_unregister_client(&srp_sa_client);
3748 class_unregister(&srp_class);
3751 srp_release_transport(ib_srp_transport_template);
3754 destroy_workqueue(srp_remove_wq);
3758 static void __exit srp_cleanup_module(void)
3760 ib_unregister_client(&srp_client);
3761 ib_sa_unregister_client(&srp_sa_client);
3762 class_unregister(&srp_class);
3763 srp_release_transport(ib_srp_transport_template);
3764 destroy_workqueue(srp_remove_wq);
3767 module_init(srp_init_module);
3768 module_exit(srp_cleanup_module);