]> asedeno.scripts.mit.edu Git - linux.git/commitdiff
svcrdma: Optimize the logic that selects the R_key to invalidate
authorChuck Lever <chuck.lever@oracle.com>
Tue, 27 Nov 2018 16:11:35 +0000 (11:11 -0500)
committerJ. Bruce Fields <bfields@redhat.com>
Wed, 28 Nov 2018 23:36:03 +0000 (18:36 -0500)
o Select the R_key to invalidate while the CPU cache still contains
  the received RPC Call transport header, rather than waiting until
  we're about to send the RPC Reply.

o Choose Send With Invalidate if there is exactly one distinct R_key
  in the received transport header. If there's more than one, the
  client will have to perform local invalidation after it has
  already waited for remote invalidation.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
include/linux/sunrpc/svc_rdma.h
net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
net/sunrpc/xprtrdma/svc_rdma_sendto.c

index e6e26918504c1ee6644b52604bd25c223303a3ce..7e22681333d02c4f62b727bdc73c877d5a232eaf 100644 (file)
@@ -135,6 +135,7 @@ struct svc_rdma_recv_ctxt {
        u32                     rc_byte_len;
        unsigned int            rc_page_count;
        unsigned int            rc_hdr_count;
+       u32                     rc_inv_rkey;
        struct page             *rc_pages[RPCSVC_MAXPAGES];
 };
 
index b24d5b8f2feeda5a2d5d66d755694475a6003cf7..828b149eaaefe004a23374fdb555886d976c5b05 100644 (file)
@@ -485,6 +485,68 @@ static __be32 *xdr_check_reply_chunk(__be32 *p, const __be32 *end)
        return p;
 }
 
+/* RPC-over-RDMA Version One private extension: Remote Invalidation.
+ * Responder's choice: requester signals it can handle Send With
+ * Invalidate, and responder chooses one R_key to invalidate.
+ *
+ * If there is exactly one distinct R_key in the received transport
+ * header, set rc_inv_rkey to that R_key. Otherwise, set it to zero.
+ *
+ * Perform this operation while the received transport header is
+ * still in the CPU cache.
+ */
+static void svc_rdma_get_inv_rkey(struct svcxprt_rdma *rdma,
+                                 struct svc_rdma_recv_ctxt *ctxt)
+{
+       __be32 inv_rkey, *p;
+       u32 i, segcount;
+
+       ctxt->rc_inv_rkey = 0;
+
+       if (!rdma->sc_snd_w_inv)
+               return;
+
+       inv_rkey = xdr_zero;
+       p = ctxt->rc_recv_buf;
+       p += rpcrdma_fixed_maxsz;
+
+       /* Read list */
+       while (*p++ != xdr_zero) {
+               p++;    /* position */
+               if (inv_rkey == xdr_zero)
+                       inv_rkey = *p;
+               else if (inv_rkey != *p)
+                       return;
+               p += 4;
+       }
+
+       /* Write list */
+       while (*p++ != xdr_zero) {
+               segcount = be32_to_cpup(p++);
+               for (i = 0; i < segcount; i++) {
+                       if (inv_rkey == xdr_zero)
+                               inv_rkey = *p;
+                       else if (inv_rkey != *p)
+                               return;
+                       p += 4;
+               }
+       }
+
+       /* Reply chunk */
+       if (*p++ != xdr_zero) {
+               segcount = be32_to_cpup(p++);
+               for (i = 0; i < segcount; i++) {
+                       if (inv_rkey == xdr_zero)
+                               inv_rkey = *p;
+                       else if (inv_rkey != *p)
+                               return;
+                       p += 4;
+               }
+       }
+
+       ctxt->rc_inv_rkey = be32_to_cpu(inv_rkey);
+}
+
 /* On entry, xdr->head[0].iov_base points to first byte in the
  * RPC-over-RDMA header.
  *
@@ -746,6 +808,7 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
                svc_rdma_recv_ctxt_put(rdma_xprt, ctxt);
                return ret;
        }
+       svc_rdma_get_inv_rkey(rdma_xprt, ctxt);
 
        p += rpcrdma_fixed_maxsz;
        if (*p != xdr_zero)
index 8602a5f1b5156049bc780919dd613c889824647a..d48bc6dd7b96ee21941d924c7fd6e5cb870b9891 100644 (file)
@@ -484,32 +484,6 @@ static void svc_rdma_get_write_arrays(__be32 *rdma_argp,
                *reply = NULL;
 }
 
-/* RPC-over-RDMA Version One private extension: Remote Invalidation.
- * Responder's choice: requester signals it can handle Send With
- * Invalidate, and responder chooses one rkey to invalidate.
- *
- * Find a candidate rkey to invalidate when sending a reply.  Picks the
- * first R_key it finds in the chunk lists.
- *
- * Returns zero if RPC's chunk lists are empty.
- */
-static u32 svc_rdma_get_inv_rkey(__be32 *rdma_argp,
-                                __be32 *wr_lst, __be32 *rp_ch)
-{
-       __be32 *p;
-
-       p = rdma_argp + rpcrdma_fixed_maxsz;
-       if (*p != xdr_zero)
-               p += 2;
-       else if (wr_lst && be32_to_cpup(wr_lst + 1))
-               p = wr_lst + 2;
-       else if (rp_ch && be32_to_cpup(rp_ch + 1))
-               p = rp_ch + 2;
-       else
-               return 0;
-       return be32_to_cpup(p);
-}
-
 static int svc_rdma_dma_map_page(struct svcxprt_rdma *rdma,
                                 struct svc_rdma_send_ctxt *ctxt,
                                 struct page *page,
@@ -672,7 +646,7 @@ static void svc_rdma_save_io_pages(struct svc_rqst *rqstp,
  *
  * RDMA Send is the last step of transmitting an RPC reply. Pages
  * involved in the earlier RDMA Writes are here transferred out
- * of the rqstp and into the ctxt's page array. These pages are
+ * of the rqstp and into the sctxt's page array. These pages are
  * DMA unmapped by each Write completion, but the subsequent Send
  * completion finally releases these pages.
  *
@@ -680,32 +654,31 @@ static void svc_rdma_save_io_pages(struct svc_rqst *rqstp,
  * - The Reply's transport header will never be larger than a page.
  */
 static int svc_rdma_send_reply_msg(struct svcxprt_rdma *rdma,
-                                  struct svc_rdma_send_ctxt *ctxt,
-                                  __be32 *rdma_argp,
+                                  struct svc_rdma_send_ctxt *sctxt,
+                                  struct svc_rdma_recv_ctxt *rctxt,
                                   struct svc_rqst *rqstp,
                                   __be32 *wr_lst, __be32 *rp_ch)
 {
        int ret;
 
        if (!rp_ch) {
-               ret = svc_rdma_map_reply_msg(rdma, ctxt,
+               ret = svc_rdma_map_reply_msg(rdma, sctxt,
                                             &rqstp->rq_res, wr_lst);
                if (ret < 0)
                        return ret;
        }
 
-       svc_rdma_save_io_pages(rqstp, ctxt);
+       svc_rdma_save_io_pages(rqstp, sctxt);
 
-       ctxt->sc_send_wr.opcode = IB_WR_SEND;
-       if (rdma->sc_snd_w_inv) {
-               ctxt->sc_send_wr.ex.invalidate_rkey =
-                       svc_rdma_get_inv_rkey(rdma_argp, wr_lst, rp_ch);
-               if (ctxt->sc_send_wr.ex.invalidate_rkey)
-                       ctxt->sc_send_wr.opcode = IB_WR_SEND_WITH_INV;
+       if (rctxt->rc_inv_rkey) {
+               sctxt->sc_send_wr.opcode = IB_WR_SEND_WITH_INV;
+               sctxt->sc_send_wr.ex.invalidate_rkey = rctxt->rc_inv_rkey;
+       } else {
+               sctxt->sc_send_wr.opcode = IB_WR_SEND;
        }
        dprintk("svcrdma: posting Send WR with %u sge(s)\n",
-               ctxt->sc_send_wr.num_sge);
-       return svc_rdma_send(rdma, &ctxt->sc_send_wr);
+               sctxt->sc_send_wr.num_sge);
+       return svc_rdma_send(rdma, &sctxt->sc_send_wr);
 }
 
 /* Given the client-provided Write and Reply chunks, the server was not
@@ -809,7 +782,7 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
        }
 
        svc_rdma_sync_reply_hdr(rdma, sctxt, svc_rdma_reply_hdr_len(rdma_resp));
-       ret = svc_rdma_send_reply_msg(rdma, sctxt, rdma_argp, rqstp,
+       ret = svc_rdma_send_reply_msg(rdma, sctxt, rctxt, rqstp,
                                      wr_lst, rp_ch);
        if (ret < 0)
                goto err1;