]> asedeno.scripts.mit.edu Git - linux.git/blobdiff - net/sunrpc/xprtrdma/verbs.c
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma
[linux.git] / net / sunrpc / xprtrdma / verbs.c
index afd504375a9a9afd001fda7b749ac25fad7bb1b8..682996779970c6ccae749c9de566f06a9b205c80 100644 (file)
@@ -52,6 +52,7 @@
 #include <linux/prefetch.h>
 #include <linux/sunrpc/addr.h>
 #include <asm/bitops.h>
+#include <linux/module.h> /* try_module_get()/module_put() */
 
 #include "xprt_rdma.h"
 
@@ -414,6 +415,14 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
        return 0;
 }
 
+static void rpcrdma_destroy_id(struct rdma_cm_id *id)
+{
+       if (id) {
+               module_put(id->device->owner);
+               rdma_destroy_id(id);
+       }
+}
+
 static struct rdma_cm_id *
 rpcrdma_create_id(struct rpcrdma_xprt *xprt,
                        struct rpcrdma_ia *ia, struct sockaddr *addr)
@@ -440,6 +449,17 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt,
        }
        wait_for_completion_interruptible_timeout(&ia->ri_done,
                                msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1);
+
+       /* FIXME:
+        * Until xprtrdma supports DEVICE_REMOVAL, the provider must
+        * be pinned while there are active NFS/RDMA mounts to prevent
+        * hangs and crashes at umount time.
+        */
+       if (!ia->ri_async_rc && !try_module_get(id->device->owner)) {
+               dprintk("RPC:       %s: Failed to get device module\n",
+                       __func__);
+               ia->ri_async_rc = -ENODEV;
+       }
        rc = ia->ri_async_rc;
        if (rc)
                goto out;
@@ -449,16 +469,17 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt,
        if (rc) {
                dprintk("RPC:       %s: rdma_resolve_route() failed %i\n",
                        __func__, rc);
-               goto out;
+               goto put;
        }
        wait_for_completion_interruptible_timeout(&ia->ri_done,
                                msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1);
        rc = ia->ri_async_rc;
        if (rc)
-               goto out;
+               goto put;
 
        return id;
-
+put:
+       module_put(id->device->owner);
 out:
        rdma_destroy_id(id);
        return ERR_PTR(rc);
@@ -493,9 +514,11 @@ rpcrdma_clean_cq(struct ib_cq *cq)
 int
 rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
 {
-       int rc, mem_priv;
        struct rpcrdma_ia *ia = &xprt->rx_ia;
        struct ib_device_attr *devattr = &ia->ri_devattr;
+       int rc;
+
+       ia->ri_dma_mr = NULL;
 
        ia->ri_id = rpcrdma_create_id(xprt, ia, addr);
        if (IS_ERR(ia->ri_id)) {
@@ -519,11 +542,6 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
                goto out3;
        }
 
-       if (devattr->device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY) {
-               ia->ri_have_dma_lkey = 1;
-               ia->ri_dma_lkey = ia->ri_device->local_dma_lkey;
-       }
-
        if (memreg == RPCRDMA_FRMR) {
                /* Requires both frmr reg and local dma lkey */
                if (((devattr->device_cap_flags &
@@ -539,42 +557,19 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
                if (!ia->ri_device->alloc_fmr) {
                        dprintk("RPC:       %s: MTHCAFMR registration "
                                "not supported by HCA\n", __func__);
-                       memreg = RPCRDMA_ALLPHYSICAL;
+                       goto out3;
                }
        }
 
-       /*
-        * Optionally obtain an underlying physical identity mapping in
-        * order to do a memory window-based bind. This base registration
-        * is protected from remote access - that is enabled only by binding
-        * for the specific bytes targeted during each RPC operation, and
-        * revoked after the corresponding completion similar to a storage
-        * adapter.
-        */
        switch (memreg) {
        case RPCRDMA_FRMR:
                ia->ri_ops = &rpcrdma_frwr_memreg_ops;
                break;
        case RPCRDMA_ALLPHYSICAL:
                ia->ri_ops = &rpcrdma_physical_memreg_ops;
-               mem_priv = IB_ACCESS_LOCAL_WRITE |
-                               IB_ACCESS_REMOTE_WRITE |
-                               IB_ACCESS_REMOTE_READ;
-               goto register_setup;
+               break;
        case RPCRDMA_MTHCAFMR:
                ia->ri_ops = &rpcrdma_fmr_memreg_ops;
-               if (ia->ri_have_dma_lkey)
-                       break;
-               mem_priv = IB_ACCESS_LOCAL_WRITE;
-       register_setup:
-               ia->ri_bind_mem = ib_get_dma_mr(ia->ri_pd, mem_priv);
-               if (IS_ERR(ia->ri_bind_mem)) {
-                       printk(KERN_ALERT "%s: ib_get_dma_mr for "
-                               "phys register failed with %lX\n",
-                               __func__, PTR_ERR(ia->ri_bind_mem));
-                       rc = -ENOMEM;
-                       goto out3;
-               }
                break;
        default:
                printk(KERN_ERR "RPC: Unsupported memory "
@@ -592,7 +587,7 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
        ib_dealloc_pd(ia->ri_pd);
        ia->ri_pd = NULL;
 out2:
-       rdma_destroy_id(ia->ri_id);
+       rpcrdma_destroy_id(ia->ri_id);
        ia->ri_id = NULL;
 out1:
        return rc;
@@ -606,19 +601,11 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
 void
 rpcrdma_ia_close(struct rpcrdma_ia *ia)
 {
-       int rc;
-
        dprintk("RPC:       %s: entering\n", __func__);
-       if (ia->ri_bind_mem != NULL) {
-               rc = ib_dereg_mr(ia->ri_bind_mem);
-               dprintk("RPC:       %s: ib_dereg_mr returned %i\n",
-                       __func__, rc);
-       }
-
        if (ia->ri_id != NULL && !IS_ERR(ia->ri_id)) {
                if (ia->ri_id->qp)
                        rdma_destroy_qp(ia->ri_id);
-               rdma_destroy_id(ia->ri_id);
+               rpcrdma_destroy_id(ia->ri_id);
                ia->ri_id = NULL;
        }
 
@@ -639,6 +626,12 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
        struct ib_cq_init_attr cq_attr = {};
        int rc, err;
 
+       if (devattr->max_sge < RPCRDMA_MAX_IOVS) {
+               dprintk("RPC:       %s: insufficient sge's available\n",
+                       __func__);
+               return -ENOMEM;
+       }
+
        /* check provider's send/recv wr limits */
        if (cdata->max_requests > devattr->max_qp_wr)
                cdata->max_requests = devattr->max_qp_wr;
@@ -651,21 +644,13 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
        if (rc)
                return rc;
        ep->rep_attr.cap.max_recv_wr = cdata->max_requests;
-       ep->rep_attr.cap.max_send_sge = (cdata->padding ? 4 : 2);
+       ep->rep_attr.cap.max_send_sge = RPCRDMA_MAX_IOVS;
        ep->rep_attr.cap.max_recv_sge = 1;
        ep->rep_attr.cap.max_inline_data = 0;
        ep->rep_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
        ep->rep_attr.qp_type = IB_QPT_RC;
        ep->rep_attr.port_num = ~0;
 
-       if (cdata->padding) {
-               ep->rep_padbuf = rpcrdma_alloc_regbuf(ia, cdata->padding,
-                                                     GFP_KERNEL);
-               if (IS_ERR(ep->rep_padbuf))
-                       return PTR_ERR(ep->rep_padbuf);
-       } else
-               ep->rep_padbuf = NULL;
-
        dprintk("RPC:       %s: requested max: dtos: send %d recv %d; "
                "iovs: send %d recv %d\n",
                __func__,
@@ -748,7 +733,8 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
                dprintk("RPC:       %s: ib_destroy_cq returned %i\n",
                        __func__, err);
 out1:
-       rpcrdma_free_regbuf(ia, ep->rep_padbuf);
+       if (ia->ri_dma_mr)
+               ib_dereg_mr(ia->ri_dma_mr);
        return rc;
 }
 
@@ -775,8 +761,6 @@ rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
                ia->ri_id->qp = NULL;
        }
 
-       rpcrdma_free_regbuf(ia, ep->rep_padbuf);
-
        rpcrdma_clean_cq(ep->rep_attr.recv_cq);
        rc = ib_destroy_cq(ep->rep_attr.recv_cq);
        if (rc)
@@ -788,6 +772,12 @@ rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
        if (rc)
                dprintk("RPC:       %s: ib_destroy_cq returned %i\n",
                        __func__, rc);
+
+       if (ia->ri_dma_mr) {
+               rc = ib_dereg_mr(ia->ri_dma_mr);
+               dprintk("RPC:       %s: ib_dereg_mr returned %i\n",
+                       __func__, rc);
+       }
 }
 
 /*
@@ -825,7 +815,7 @@ rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
                if (ia->ri_device != id->device) {
                        printk("RPC:       %s: can't reconnect on "
                                "different device!\n", __func__);
-                       rdma_destroy_id(id);
+                       rpcrdma_destroy_id(id);
                        rc = -ENETUNREACH;
                        goto out;
                }
@@ -834,7 +824,7 @@ rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
                if (rc) {
                        dprintk("RPC:       %s: rdma_create_qp failed %i\n",
                                __func__, rc);
-                       rdma_destroy_id(id);
+                       rpcrdma_destroy_id(id);
                        rc = -ENETUNREACH;
                        goto out;
                }
@@ -845,7 +835,7 @@ rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
                write_unlock(&ia->ri_qplock);
 
                rdma_destroy_qp(old);
-               rdma_destroy_id(old);
+               rpcrdma_destroy_id(old);
        } else {
                dprintk("RPC:       %s: connecting...\n", __func__);
                rc = rdma_create_qp(ia->ri_id, ia->ri_pd, &ep->rep_attr);
@@ -1229,75 +1219,6 @@ rpcrdma_mapping_error(struct rpcrdma_mr_seg *seg)
                (unsigned long long)seg->mr_dma, seg->mr_dmalen);
 }
 
-static int
-rpcrdma_register_internal(struct rpcrdma_ia *ia, void *va, int len,
-                               struct ib_mr **mrp, struct ib_sge *iov)
-{
-       struct ib_phys_buf ipb;
-       struct ib_mr *mr;
-       int rc;
-
-       /*
-        * All memory passed here was kmalloc'ed, therefore phys-contiguous.
-        */
-       iov->addr = ib_dma_map_single(ia->ri_device,
-                       va, len, DMA_BIDIRECTIONAL);
-       if (ib_dma_mapping_error(ia->ri_device, iov->addr))
-               return -ENOMEM;
-
-       iov->length = len;
-
-       if (ia->ri_have_dma_lkey) {
-               *mrp = NULL;
-               iov->lkey = ia->ri_dma_lkey;
-               return 0;
-       } else if (ia->ri_bind_mem != NULL) {
-               *mrp = NULL;
-               iov->lkey = ia->ri_bind_mem->lkey;
-               return 0;
-       }
-
-       ipb.addr = iov->addr;
-       ipb.size = iov->length;
-       mr = ib_reg_phys_mr(ia->ri_pd, &ipb, 1,
-                       IB_ACCESS_LOCAL_WRITE, &iov->addr);
-
-       dprintk("RPC:       %s: phys convert: 0x%llx "
-                       "registered 0x%llx length %d\n",
-                       __func__, (unsigned long long)ipb.addr,
-                       (unsigned long long)iov->addr, len);
-
-       if (IS_ERR(mr)) {
-               *mrp = NULL;
-               rc = PTR_ERR(mr);
-               dprintk("RPC:       %s: failed with %i\n", __func__, rc);
-       } else {
-               *mrp = mr;
-               iov->lkey = mr->lkey;
-               rc = 0;
-       }
-
-       return rc;
-}
-
-static int
-rpcrdma_deregister_internal(struct rpcrdma_ia *ia,
-                               struct ib_mr *mr, struct ib_sge *iov)
-{
-       int rc;
-
-       ib_dma_unmap_single(ia->ri_device,
-                           iov->addr, iov->length, DMA_BIDIRECTIONAL);
-
-       if (NULL == mr)
-               return 0;
-
-       rc = ib_dereg_mr(mr);
-       if (rc)
-               dprintk("RPC:       %s: ib_dereg_mr failed %i\n", __func__, rc);
-       return rc;
-}
-
 /**
  * rpcrdma_alloc_regbuf - kmalloc and register memory for SEND/RECV buffers
  * @ia: controlling rpcrdma_ia
@@ -1317,26 +1238,29 @@ struct rpcrdma_regbuf *
 rpcrdma_alloc_regbuf(struct rpcrdma_ia *ia, size_t size, gfp_t flags)
 {
        struct rpcrdma_regbuf *rb;
-       int rc;
+       struct ib_sge *iov;
 
-       rc = -ENOMEM;
        rb = kmalloc(sizeof(*rb) + size, flags);
        if (rb == NULL)
                goto out;
 
-       rb->rg_size = size;
-       rb->rg_owner = NULL;
-       rc = rpcrdma_register_internal(ia, rb->rg_base, size,
-                                      &rb->rg_mr, &rb->rg_iov);
-       if (rc)
+       iov = &rb->rg_iov;
+       iov->addr = ib_dma_map_single(ia->ri_device,
+                                     (void *)rb->rg_base, size,
+                                     DMA_BIDIRECTIONAL);
+       if (ib_dma_mapping_error(ia->ri_device, iov->addr))
                goto out_free;
 
+       iov->length = size;
+       iov->lkey = ia->ri_dma_lkey;
+       rb->rg_size = size;
+       rb->rg_owner = NULL;
        return rb;
 
 out_free:
        kfree(rb);
 out:
-       return ERR_PTR(rc);
+       return ERR_PTR(-ENOMEM);
 }
 
 /**
@@ -1347,10 +1271,15 @@ rpcrdma_alloc_regbuf(struct rpcrdma_ia *ia, size_t size, gfp_t flags)
 void
 rpcrdma_free_regbuf(struct rpcrdma_ia *ia, struct rpcrdma_regbuf *rb)
 {
-       if (rb) {
-               rpcrdma_deregister_internal(ia, rb->rg_mr, &rb->rg_iov);
-               kfree(rb);
-       }
+       struct ib_sge *iov;
+
+       if (!rb)
+               return;
+
+       iov = &rb->rg_iov;
+       ib_dma_unmap_single(ia->ri_device,
+                           iov->addr, iov->length, DMA_BIDIRECTIONAL);
+       kfree(rb);
 }
 
 /*
@@ -1363,9 +1292,11 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia,
                struct rpcrdma_ep *ep,
                struct rpcrdma_req *req)
 {
+       struct ib_device *device = ia->ri_device;
        struct ib_send_wr send_wr, *send_wr_fail;
        struct rpcrdma_rep *rep = req->rl_reply;
-       int rc;
+       struct ib_sge *iov = req->rl_send_iov;
+       int i, rc;
 
        if (rep) {
                rc = rpcrdma_ep_post_recv(ia, ep, rep);
@@ -1376,22 +1307,15 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia,
 
        send_wr.next = NULL;
        send_wr.wr_id = RPCRDMA_IGNORE_COMPLETION;
-       send_wr.sg_list = req->rl_send_iov;
+       send_wr.sg_list = iov;
        send_wr.num_sge = req->rl_niovs;
        send_wr.opcode = IB_WR_SEND;
-       if (send_wr.num_sge == 4)       /* no need to sync any pad (constant) */
-               ib_dma_sync_single_for_device(ia->ri_device,
-                                             req->rl_send_iov[3].addr,
-                                             req->rl_send_iov[3].length,
-                                             DMA_TO_DEVICE);
-       ib_dma_sync_single_for_device(ia->ri_device,
-                                     req->rl_send_iov[1].addr,
-                                     req->rl_send_iov[1].length,
-                                     DMA_TO_DEVICE);
-       ib_dma_sync_single_for_device(ia->ri_device,
-                                     req->rl_send_iov[0].addr,
-                                     req->rl_send_iov[0].length,
-                                     DMA_TO_DEVICE);
+
+       for (i = 0; i < send_wr.num_sge; i++)
+               ib_dma_sync_single_for_device(device, iov[i].addr,
+                                             iov[i].length, DMA_TO_DEVICE);
+       dprintk("RPC:       %s: posting %d s/g entries\n",
+               __func__, send_wr.num_sge);
 
        if (DECR_CQCOUNT(ep) > 0)
                send_wr.send_flags = 0;