]> asedeno.scripts.mit.edu Git - linux.git/blobdiff - net/rds/rdma.c
Merge tag 'trace-v5.6-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/rostedt...
[linux.git] / net / rds / rdma.c
index 916f5ec373d8eac066b871f56901d041d0cb189b..3341eee87bf9b3c8bad89b88ca0e8fcd74186539 100644 (file)
@@ -156,11 +156,13 @@ void rds_rdma_drop_keys(struct rds_sock *rs)
 static int rds_pin_pages(unsigned long user_addr, unsigned int nr_pages,
                        struct page **pages, int write)
 {
+       unsigned int gup_flags = FOLL_LONGTERM;
        int ret;
 
-       ret = get_user_pages_fast(user_addr, nr_pages, write ? FOLL_WRITE : 0,
-                                 pages);
+       if (write)
+               gup_flags |= FOLL_WRITE;
 
+       ret = get_user_pages_fast(user_addr, nr_pages, gup_flags, pages);
        if (ret >= 0 && ret < nr_pages) {
                while (ret--)
                        put_page(pages[ret]);
@@ -175,13 +177,14 @@ static int __rds_rdma_map(struct rds_sock *rs, struct rds_get_mr_args *args,
                          struct rds_conn_path *cp)
 {
        struct rds_mr *mr = NULL, *found;
+       struct scatterlist *sg = NULL;
        unsigned int nr_pages;
        struct page **pages = NULL;
-       struct scatterlist *sg;
        void *trans_private;
        unsigned long flags;
        rds_rdma_cookie_t cookie;
-       unsigned int nents;
+       unsigned int nents = 0;
+       int need_odp = 0;
        long i;
        int ret;
 
@@ -195,6 +198,21 @@ static int __rds_rdma_map(struct rds_sock *rs, struct rds_get_mr_args *args,
                goto out;
        }
 
+       /* If the combination of the addr and size requested for this memory
+        * region causes an integer overflow, return error.
+        */
+       if (((args->vec.addr + args->vec.bytes) < args->vec.addr) ||
+           PAGE_ALIGN(args->vec.addr + args->vec.bytes) <
+                   (args->vec.addr + args->vec.bytes)) {
+               ret = -EINVAL;
+               goto out;
+       }
+
+       if (!can_do_mlock()) {
+               ret = -EPERM;
+               goto out;
+       }
+
        nr_pages = rds_pages_in_vec(&args->vec);
        if (nr_pages == 0) {
                ret = -EINVAL;
@@ -248,36 +266,44 @@ static int __rds_rdma_map(struct rds_sock *rs, struct rds_get_mr_args *args,
         * the zero page.
         */
        ret = rds_pin_pages(args->vec.addr, nr_pages, pages, 1);
-       if (ret < 0)
-               goto out;
-
-       nents = ret;
-       sg = kcalloc(nents, sizeof(*sg), GFP_KERNEL);
-       if (!sg) {
-               ret = -ENOMEM;
+       if (ret == -EOPNOTSUPP) {
+               need_odp = 1;
+       } else if (ret <= 0) {
                goto out;
-       }
-       WARN_ON(!nents);
-       sg_init_table(sg, nents);
-
-       /* Stick all pages into the scatterlist */
-       for (i = 0 ; i < nents; i++)
-               sg_set_page(&sg[i], pages[i], PAGE_SIZE, 0);
+       } else {
+               nents = ret;
+               sg = kcalloc(nents, sizeof(*sg), GFP_KERNEL);
+               if (!sg) {
+                       ret = -ENOMEM;
+                       goto out;
+               }
+               WARN_ON(!nents);
+               sg_init_table(sg, nents);
 
-       rdsdebug("RDS: trans_private nents is %u\n", nents);
+               /* Stick all pages into the scatterlist */
+               for (i = 0 ; i < nents; i++)
+                       sg_set_page(&sg[i], pages[i], PAGE_SIZE, 0);
 
+               rdsdebug("RDS: trans_private nents is %u\n", nents);
+       }
        /* Obtain a transport specific MR. If this succeeds, the
         * s/g list is now owned by the MR.
         * Note that dma_map() implies that pending writes are
         * flushed to RAM, so no dma_sync is needed here. */
-       trans_private = rs->rs_transport->get_mr(sg, nents, rs,
-                                                &mr->r_key,
-                                                cp ? cp->cp_conn : NULL);
+       trans_private = rs->rs_transport->get_mr(
+               sg, nents, rs, &mr->r_key, cp ? cp->cp_conn : NULL,
+               args->vec.addr, args->vec.bytes,
+               need_odp ? ODP_ZEROBASED : ODP_NOT_NEEDED);
 
        if (IS_ERR(trans_private)) {
-               for (i = 0 ; i < nents; i++)
-                       put_page(sg_page(&sg[i]));
-               kfree(sg);
+               /* In ODP case, we don't GUP pages, so don't need
+                * to release anything.
+                */
+               if (!need_odp) {
+                       for (i = 0 ; i < nents; i++)
+                               put_page(sg_page(&sg[i]));
+                       kfree(sg);
+               }
                ret = PTR_ERR(trans_private);
                goto out;
        }
@@ -291,7 +317,11 @@ static int __rds_rdma_map(struct rds_sock *rs, struct rds_get_mr_args *args,
         * map page aligned regions. So we keep the offset, and build
         * a 64bit cookie containing <R_Key, offset> and pass that
         * around. */
-       cookie = rds_rdma_make_cookie(mr->r_key, args->vec.addr & ~PAGE_MASK);
+       if (need_odp)
+               cookie = rds_rdma_make_cookie(mr->r_key, 0);
+       else
+               cookie = rds_rdma_make_cookie(mr->r_key,
+                                             args->vec.addr & ~PAGE_MASK);
        if (cookie_ret)
                *cookie_ret = cookie;
 
@@ -456,22 +486,26 @@ void rds_rdma_free_op(struct rm_rdma_op *ro)
 {
        unsigned int i;
 
-       for (i = 0; i < ro->op_nents; i++) {
-               struct page *page = sg_page(&ro->op_sg[i]);
-
-               /* Mark page dirty if it was possibly modified, which
-                * is the case for a RDMA_READ which copies from remote
-                * to local memory */
-               if (!ro->op_write) {
-                       WARN_ON(!page->mapping && irqs_disabled());
-                       set_page_dirty(page);
+       if (ro->op_odp_mr) {
+               rds_mr_put(ro->op_odp_mr);
+       } else {
+               for (i = 0; i < ro->op_nents; i++) {
+                       struct page *page = sg_page(&ro->op_sg[i]);
+
+                       /* Mark page dirty if it was possibly modified, which
+                        * is the case for a RDMA_READ which copies from remote
+                        * to local memory
+                        */
+                       if (!ro->op_write)
+                               set_page_dirty(page);
+                       put_page(page);
                }
-               put_page(page);
        }
 
        kfree(ro->op_notifier);
        ro->op_notifier = NULL;
        ro->op_active = 0;
+       ro->op_odp_mr = NULL;
 }
 
 void rds_atomic_free_op(struct rm_atomic_op *ao)
@@ -581,6 +615,7 @@ int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm,
        struct rds_iovec *iovs;
        unsigned int i, j;
        int ret = 0;
+       bool odp_supported = true;
 
        if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct rds_rdma_args))
            || rm->rdma.op_active)
@@ -602,6 +637,9 @@ int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm,
                ret = -EINVAL;
                goto out_ret;
        }
+       /* odp-mr is not supported for multiple requests within one message */
+       if (args->nr_local != 1)
+               odp_supported = false;
 
        iovs = vec->iov;
 
@@ -623,6 +661,8 @@ int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm,
        op->op_silent = !!(args->flags & RDS_RDMA_SILENT);
        op->op_active = 1;
        op->op_recverr = rs->rs_recverr;
+       op->op_odp_mr = NULL;
+
        WARN_ON(!nr_pages);
        op->op_sg = rds_message_alloc_sgs(rm, nr_pages, &ret);
        if (!op->op_sg)
@@ -672,10 +712,44 @@ int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm,
                 * If it's a READ operation, we need to pin the pages for writing.
                 */
                ret = rds_pin_pages(iov->addr, nr, pages, !op->op_write);
-               if (ret < 0)
+               if ((!odp_supported && ret <= 0) ||
+                   (odp_supported && ret <= 0 && ret != -EOPNOTSUPP))
                        goto out_pages;
-               else
-                       ret = 0;
+
+               if (ret == -EOPNOTSUPP) {
+                       struct rds_mr *local_odp_mr;
+
+                       if (!rs->rs_transport->get_mr) {
+                               ret = -EOPNOTSUPP;
+                               goto out_pages;
+                       }
+                       local_odp_mr =
+                               kzalloc(sizeof(*local_odp_mr), GFP_KERNEL);
+                       if (!local_odp_mr) {
+                               ret = -ENOMEM;
+                               goto out_pages;
+                       }
+                       RB_CLEAR_NODE(&local_odp_mr->r_rb_node);
+                       refcount_set(&local_odp_mr->r_refcount, 1);
+                       local_odp_mr->r_trans = rs->rs_transport;
+                       local_odp_mr->r_sock = rs;
+                       local_odp_mr->r_trans_private =
+                               rs->rs_transport->get_mr(
+                                       NULL, 0, rs, &local_odp_mr->r_key, NULL,
+                                       iov->addr, iov->bytes, ODP_VIRTUAL);
+                       if (IS_ERR(local_odp_mr->r_trans_private)) {
+                               ret = IS_ERR(local_odp_mr->r_trans_private);
+                               rdsdebug("get_mr ret %d %p\"", ret,
+                                        local_odp_mr->r_trans_private);
+                               kfree(local_odp_mr);
+                               ret = -EOPNOTSUPP;
+                               goto out_pages;
+                       }
+                       rdsdebug("Need odp; local_odp_mr %p trans_private %p\n",
+                                local_odp_mr, local_odp_mr->r_trans_private);
+                       op->op_odp_mr = local_odp_mr;
+                       op->op_odp_addr = iov->addr;
+               }
 
                rdsdebug("RDS: nr_bytes %u nr %u iov->bytes %llu iov->addr %llx\n",
                         nr_bytes, nr, iov->bytes, iov->addr);
@@ -691,6 +765,7 @@ int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm,
                                        min_t(unsigned int, iov->bytes, PAGE_SIZE - offset),
                                        offset);
 
+                       sg_dma_len(sg) = sg->length;
                        rdsdebug("RDS: sg->offset %x sg->len %x iov->addr %llx iov->bytes %llu\n",
                               sg->offset, sg->length, iov->addr, iov->bytes);
 
@@ -709,6 +784,7 @@ int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm,
                goto out_pages;
        }
        op->op_bytes = nr_bytes;
+       ret = 0;
 
 out_pages:
        kfree(pages);
@@ -755,7 +831,8 @@ int rds_cmsg_rdma_dest(struct rds_sock *rs, struct rds_message *rm,
        spin_unlock_irqrestore(&rs->rs_rdma_lock, flags);
 
        if (mr) {
-               mr->r_trans->sync_mr(mr->r_trans_private, DMA_TO_DEVICE);
+               mr->r_trans->sync_mr(mr->r_trans_private,
+                                    DMA_TO_DEVICE);
                rm->rdma.op_rdma_mr = mr;
        }
        return err;